Export of internal Abseil changes
-- db8dbd0e8a7b0125a4819dfc81c9bd2496849c71 by Abseil Team <absl-team@google.com>: Create GetSkipCount() and GetStride() methods and add rounding bias correction. PiperOrigin-RevId: 281780897 GitOrigin-RevId: db8dbd0e8a7b0125a4819dfc81c9bd2496849c71 Change-Id: I56a97288b1cb38a9357c065747f8d9bc4b187fee
This commit is contained in:
parent
bcaae6009c
commit
16d9fd58a5
6 changed files with 162 additions and 30 deletions
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
@ -26,6 +27,42 @@
|
||||||
namespace absl {
|
namespace absl {
|
||||||
namespace base_internal {
|
namespace base_internal {
|
||||||
|
|
||||||
|
|
||||||
|
int64_t ExponentialBiased::GetSkipCount(int64_t mean) {
|
||||||
|
if (ABSL_PREDICT_FALSE(!initialized_)) {
|
||||||
|
Initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t rng = NextRandom(rng_);
|
||||||
|
rng_ = rng;
|
||||||
|
|
||||||
|
// Take the top 26 bits as the random number
|
||||||
|
// (This plus the 1<<58 sampling bound give a max possible step of
|
||||||
|
// 5194297183973780480 bytes.)
|
||||||
|
// The uint32_t cast is to prevent a (hard-to-reproduce) NAN
|
||||||
|
// under piii debug for some binaries.
|
||||||
|
double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0;
|
||||||
|
// Put the computed p-value through the CDF of a geometric.
|
||||||
|
double interval = bias_ + (std::log2(q) - 26) * (-std::log(2.0) * mean);
|
||||||
|
// Very large values of interval overflow int64_t. To avoid that, we will
|
||||||
|
// cheat and clamp any huge values to (int64_t max)/2. This is a potential
|
||||||
|
// source of bias, but the mean would need to be such a large value that it's
|
||||||
|
// not likely to come up. For example, with a mean of 1e18, the probability of
|
||||||
|
// hitting this condition is about 1/1000. For a mean of 1e17, standard
|
||||||
|
// calculators claim that this event won't happen.
|
||||||
|
if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) {
|
||||||
|
// Assume huge values are bias neutral, retain bias for next call.
|
||||||
|
return std::numeric_limits<int64_t>::max() / 2;
|
||||||
|
}
|
||||||
|
double value = std::round(interval);
|
||||||
|
bias_ = interval - value;
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t ExponentialBiased::GetStride(int64_t mean) {
|
||||||
|
return GetSkipCount(mean - 1) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
// The algorithm generates a random number between 0 and 1 and applies the
|
// The algorithm generates a random number between 0 and 1 and applies the
|
||||||
// inverse cumulative distribution function for an exponential. Specifically:
|
// inverse cumulative distribution function for an exponential. Specifically:
|
||||||
// Let m be the inverse of the sample period, then the probability
|
// Let m be the inverse of the sample period, then the probability
|
||||||
|
@ -51,7 +88,7 @@ int64_t ExponentialBiased::Get(int64_t mean) {
|
||||||
// under piii debug for some binaries.
|
// under piii debug for some binaries.
|
||||||
double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0;
|
double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0;
|
||||||
// Put the computed p-value through the CDF of a geometric.
|
// Put the computed p-value through the CDF of a geometric.
|
||||||
double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean);
|
double interval = bias_ + (std::log2(q) - 26) * (-std::log(2.0) * mean);
|
||||||
// Very large values of interval overflow int64_t. To avoid that, we will cheat
|
// Very large values of interval overflow int64_t. To avoid that, we will cheat
|
||||||
// and clamp any huge values to (int64_t max)/2. This is a potential source of
|
// and clamp any huge values to (int64_t max)/2. This is a potential source of
|
||||||
// bias, but the mean would need to be such a large value that it's not likely
|
// bias, but the mean would need to be such a large value that it's not likely
|
||||||
|
@ -59,10 +96,12 @@ int64_t ExponentialBiased::Get(int64_t mean) {
|
||||||
// this condition is about 1/1000. For a mean of 1e17, standard calculators
|
// this condition is about 1/1000. For a mean of 1e17, standard calculators
|
||||||
// claim that this event won't happen.
|
// claim that this event won't happen.
|
||||||
if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) {
|
if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) {
|
||||||
|
// Assume huge values are bias neutral, retain bias for next call.
|
||||||
return std::numeric_limits<int64_t>::max() / 2;
|
return std::numeric_limits<int64_t>::max() / 2;
|
||||||
}
|
}
|
||||||
|
int64_t value = std::max<int64_t>(1, std::round(interval));
|
||||||
return static_cast<int64_t>(interval);
|
bias_ = interval - value;
|
||||||
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExponentialBiased::Initialize() {
|
void ExponentialBiased::Initialize() {
|
||||||
|
|
|
@ -17,24 +17,56 @@
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "absl/base/macros.h"
|
||||||
|
|
||||||
namespace absl {
|
namespace absl {
|
||||||
namespace base_internal {
|
namespace base_internal {
|
||||||
|
|
||||||
// ExponentialBiased provides a small and fast random number generator for a
|
// ExponentialBiased provides a small and fast random number generator for a
|
||||||
// rounded exponential distribution. This generator doesn't requires very little
|
// rounded exponential distribution. This generator manages very little state,
|
||||||
// state doesn't impose synchronization overhead, which makes it useful in some
|
// and imposes no synchronization overhead. This makes it useful in specialized
|
||||||
// specialized scenarios.
|
// scenarios requiring minimum overhead, such as stride based periodic sampling.
|
||||||
//
|
//
|
||||||
// For the generated variable X, X ~ floor(Exponential(1/mean)). The floor
|
// ExponentialBiased provides two closely related functions, GetSkipCount() and
|
||||||
// operation introduces a small amount of bias, but the distribution is useful
|
// GetStride(), both returning a rounded integer defining a number of events
|
||||||
// to generate a wait time. That is, if an operation is supposed to happen on
|
// required before some event with a given mean probability occurs.
|
||||||
// average to 1/mean events, then the generated variable X will describe how
|
|
||||||
// many events to skip before performing the operation and computing a new X.
|
|
||||||
//
|
//
|
||||||
// The mathematically precise distribution to use for integer wait times is a
|
// The distribution is useful to generate a random wait time or some periodic
|
||||||
// Geometric distribution, but a Geometric distribution takes slightly more time
|
// event with a given mean probability. For example, if an action is supposed to
|
||||||
// to compute and when the mean is large (say, 100+), the Geometric distribution
|
// happen on average once every 'N' events, then we can get a random 'stride'
|
||||||
// is hard to distinguish from the result of ExponentialBiased.
|
// counting down how long before the event to happen. For example, if we'd want
|
||||||
|
// to sample one in every 1000 'Frobber' calls, our code could look like this:
|
||||||
|
//
|
||||||
|
// Frobber::Frobber() {
|
||||||
|
// stride_ = exponential_biased_.GetStride(1000);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// void Frobber::Frob(int arg) {
|
||||||
|
// if (--stride == 0) {
|
||||||
|
// SampleFrob(arg);
|
||||||
|
// stride_ = exponential_biased_.GetStride(1000);
|
||||||
|
// }
|
||||||
|
// ...
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// The rounding of the return value creates a bias, especially for smaller means
|
||||||
|
// where the distribution of the fraction is not evenly distributed. We correct
|
||||||
|
// this bias by tracking the fraction we rounded up or down on each iteration,
|
||||||
|
// effectively tracking the distance between the cumulative value, and the
|
||||||
|
// rounded cumulative value. For example, given a mean of 2:
|
||||||
|
//
|
||||||
|
// raw = 1.63076, cumulative = 1.63076, rounded = 2, bias = -0.36923
|
||||||
|
// raw = 0.14624, cumulative = 1.77701, rounded = 2, bias = 0.14624
|
||||||
|
// raw = 4.93194, cumulative = 6.70895, rounded = 7, bias = -0.06805
|
||||||
|
// raw = 0.24206, cumulative = 6.95101, rounded = 7, bias = 0.24206
|
||||||
|
// etc...
|
||||||
|
//
|
||||||
|
// Adjusting with rounding bias is relatively trivial:
|
||||||
|
//
|
||||||
|
// double value = bias_ + exponential_distribution(mean)();
|
||||||
|
// double rounded_value = std::round(value);
|
||||||
|
// bias_ = value - rounded_value;
|
||||||
|
// return rounded_value;
|
||||||
//
|
//
|
||||||
// This class is thread-compatible.
|
// This class is thread-compatible.
|
||||||
class ExponentialBiased {
|
class ExponentialBiased {
|
||||||
|
@ -42,9 +74,32 @@ class ExponentialBiased {
|
||||||
// The number of bits set by NextRandom.
|
// The number of bits set by NextRandom.
|
||||||
static constexpr int kPrngNumBits = 48;
|
static constexpr int kPrngNumBits = 48;
|
||||||
|
|
||||||
// Generates the floor of an exponentially distributed random variable by
|
// `GetSkipCount()` returns the number of events to skip before some chosen
|
||||||
// rounding the value down to the nearest integer. The result will be in the
|
// event happens. For example, randomly tossing a coin, we will on average
|
||||||
// range [0, int64_t max / 2].
|
// throw heads once before we get tails. We can simulate random coin tosses
|
||||||
|
// using GetSkipCount() as:
|
||||||
|
//
|
||||||
|
// ExponentialBiased eb;
|
||||||
|
// for (...) {
|
||||||
|
// int number_of_heads_before_tail = eb.GetSkipCount(1);
|
||||||
|
// for (int flips = 0; flips < number_of_heads_before_tail; ++flips) {
|
||||||
|
// printf("head...");
|
||||||
|
// }
|
||||||
|
// printf("tail\n");
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
int64_t GetSkipCount(int64_t mean);
|
||||||
|
|
||||||
|
// GetStride() returns the number of events required for a specific event to
|
||||||
|
// happen. See the class comments for a usage example. `GetStride()` is
|
||||||
|
// equivalent to `GetSkipCount(mean - 1) + 1`. When to use `GetStride()` or
|
||||||
|
// `GetSkipCount()` depends mostly on what best fits the use case.
|
||||||
|
int64_t GetStride(int64_t mean);
|
||||||
|
|
||||||
|
// Generates a rounded exponentially distributed random variable
|
||||||
|
// by rounding the value to the nearest integer.
|
||||||
|
// The result will be in the range [0, int64_t max / 2].
|
||||||
|
ABSL_DEPRECATED("Use GetSkipCount() or GetStride() instead")
|
||||||
int64_t Get(int64_t mean);
|
int64_t Get(int64_t mean);
|
||||||
|
|
||||||
// Computes a random number in the range [0, 1<<(kPrngNumBits+1) - 1]
|
// Computes a random number in the range [0, 1<<(kPrngNumBits+1) - 1]
|
||||||
|
@ -56,6 +111,7 @@ class ExponentialBiased {
|
||||||
void Initialize();
|
void Initialize();
|
||||||
|
|
||||||
uint64_t rng_{0};
|
uint64_t rng_{0};
|
||||||
|
double bias_{0};
|
||||||
bool initialized_{false};
|
bool initialized_{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -113,6 +113,35 @@ double AndersonDarlingTest(const std::vector<double>& random_sample) {
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(ExponentialBiasedTest, CoinTossDemoWithGetSkipCount) {
|
||||||
|
ExponentialBiased eb;
|
||||||
|
for (int runs = 0; runs < 10; ++runs) {
|
||||||
|
for (int flips = eb.GetSkipCount(1); flips > 0; --flips) {
|
||||||
|
printf("head...");
|
||||||
|
}
|
||||||
|
printf("tail\n");
|
||||||
|
}
|
||||||
|
int heads = 0;
|
||||||
|
for (int i = 0; i < 10000000; i += 1 + eb.GetSkipCount(1)) {
|
||||||
|
++heads;
|
||||||
|
}
|
||||||
|
printf("Heads = %d (%f%%)\n", heads, 100.0 * heads / 10000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ExponentialBiasedTest, SampleDemoWithStride) {
|
||||||
|
ExponentialBiased eb;
|
||||||
|
int stride = eb.GetStride(10);
|
||||||
|
int samples = 0;
|
||||||
|
for (int i = 0; i < 10000000; ++i) {
|
||||||
|
if (--stride == 0) {
|
||||||
|
++samples;
|
||||||
|
stride = eb.GetStride(10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("Samples = %d (%f%%)\n", samples, 100.0 * samples / 10000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Testing that NextRandom generates uniform random numbers. Applies the
|
// Testing that NextRandom generates uniform random numbers. Applies the
|
||||||
// Anderson-Darling test for uniformity
|
// Anderson-Darling test for uniformity
|
||||||
TEST(ExponentialBiasedTest, TestNextRandom) {
|
TEST(ExponentialBiasedTest, TestNextRandom) {
|
||||||
|
@ -153,15 +182,15 @@ TEST(ExponentialBiasedTest, TestNextRandom) {
|
||||||
// variable.
|
// variable.
|
||||||
TEST(ExponentialBiasedTest, InitializationModes) {
|
TEST(ExponentialBiasedTest, InitializationModes) {
|
||||||
ABSL_CONST_INIT static ExponentialBiased eb_static;
|
ABSL_CONST_INIT static ExponentialBiased eb_static;
|
||||||
EXPECT_THAT(eb_static.Get(2), Ge(0));
|
EXPECT_THAT(eb_static.GetSkipCount(2), Ge(0));
|
||||||
|
|
||||||
#if ABSL_HAVE_THREAD_LOCAL
|
#if ABSL_HAVE_THREAD_LOCAL
|
||||||
thread_local ExponentialBiased eb_thread;
|
thread_local ExponentialBiased eb_thread;
|
||||||
EXPECT_THAT(eb_thread.Get(2), Ge(0));
|
EXPECT_THAT(eb_thread.GetSkipCount(2), Ge(0));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ExponentialBiased eb_stack;
|
ExponentialBiased eb_stack;
|
||||||
EXPECT_THAT(eb_stack.Get(2), Ge(0));
|
EXPECT_THAT(eb_stack.GetSkipCount(2), Ge(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace base_internal
|
} // namespace base_internal
|
||||||
|
|
|
@ -22,7 +22,7 @@ namespace absl {
|
||||||
namespace base_internal {
|
namespace base_internal {
|
||||||
|
|
||||||
int64_t PeriodicSamplerBase::GetExponentialBiased(int period) noexcept {
|
int64_t PeriodicSamplerBase::GetExponentialBiased(int period) noexcept {
|
||||||
return rng_.Get(period);
|
return rng_.GetStride(period);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PeriodicSamplerBase::SubtleConfirmSample() noexcept {
|
bool PeriodicSamplerBase::SubtleConfirmSample() noexcept {
|
||||||
|
@ -36,13 +36,14 @@ bool PeriodicSamplerBase::SubtleConfirmSample() noexcept {
|
||||||
|
|
||||||
// Check if this is the first call to Sample()
|
// Check if this is the first call to Sample()
|
||||||
if (ABSL_PREDICT_FALSE(stride_ == 1)) {
|
if (ABSL_PREDICT_FALSE(stride_ == 1)) {
|
||||||
stride_ = static_cast<uint64_t>(-1 - GetExponentialBiased(current_period));
|
stride_ = static_cast<uint64_t>(-GetExponentialBiased(current_period));
|
||||||
if (static_cast<int64_t>(stride_) < -1) {
|
if (static_cast<int64_t>(stride_) < -1) {
|
||||||
++stride_;
|
++stride_;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stride_ = static_cast<uint64_t>(-1 - GetExponentialBiased(current_period));
|
|
||||||
|
stride_ = static_cast<uint64_t>(-GetExponentialBiased(current_period));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,13 @@ void BM_SampleMinunumInlined(Sampler* sampler, benchmark::State& state) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BM_PeriodicSampler_TinySample(benchmark::State& state) {
|
||||||
|
struct Tag {};
|
||||||
|
PeriodicSampler<Tag, 10> sampler;
|
||||||
|
BM_Sample(&sampler, state);
|
||||||
|
}
|
||||||
|
BENCHMARK(BM_PeriodicSampler_TinySample);
|
||||||
|
|
||||||
void BM_PeriodicSampler_ShortSample(benchmark::State& state) {
|
void BM_PeriodicSampler_ShortSample(benchmark::State& state) {
|
||||||
struct Tag {};
|
struct Tag {};
|
||||||
PeriodicSampler<Tag, 1024> sampler;
|
PeriodicSampler<Tag, 1024> sampler;
|
||||||
|
|
|
@ -42,9 +42,9 @@ TEST(PeriodicSamplerBaseTest, Sample) {
|
||||||
|
|
||||||
EXPECT_CALL(sampler, period()).Times(3).WillRepeatedly(Return(16));
|
EXPECT_CALL(sampler, period()).Times(3).WillRepeatedly(Return(16));
|
||||||
EXPECT_CALL(sampler, GetExponentialBiased(16))
|
EXPECT_CALL(sampler, GetExponentialBiased(16))
|
||||||
.WillOnce(Return(1))
|
|
||||||
.WillOnce(Return(2))
|
.WillOnce(Return(2))
|
||||||
.WillOnce(Return(3));
|
.WillOnce(Return(3))
|
||||||
|
.WillOnce(Return(4));
|
||||||
|
|
||||||
EXPECT_FALSE(sampler.Sample());
|
EXPECT_FALSE(sampler.Sample());
|
||||||
EXPECT_TRUE(sampler.Sample());
|
EXPECT_TRUE(sampler.Sample());
|
||||||
|
@ -63,9 +63,9 @@ TEST(PeriodicSamplerBaseTest, ImmediatelySample) {
|
||||||
|
|
||||||
EXPECT_CALL(sampler, period()).Times(2).WillRepeatedly(Return(16));
|
EXPECT_CALL(sampler, period()).Times(2).WillRepeatedly(Return(16));
|
||||||
EXPECT_CALL(sampler, GetExponentialBiased(16))
|
EXPECT_CALL(sampler, GetExponentialBiased(16))
|
||||||
.WillOnce(Return(0))
|
|
||||||
.WillOnce(Return(1))
|
.WillOnce(Return(1))
|
||||||
.WillOnce(Return(2));
|
.WillOnce(Return(2))
|
||||||
|
.WillOnce(Return(3));
|
||||||
|
|
||||||
EXPECT_TRUE(sampler.Sample());
|
EXPECT_TRUE(sampler.Sample());
|
||||||
|
|
||||||
|
@ -100,7 +100,7 @@ TEST(PeriodicSamplerBaseTest, Disable) {
|
||||||
StrictMock<MockPeriodicSampler> sampler;
|
StrictMock<MockPeriodicSampler> sampler;
|
||||||
|
|
||||||
EXPECT_CALL(sampler, period()).WillOnce(Return(16));
|
EXPECT_CALL(sampler, period()).WillOnce(Return(16));
|
||||||
EXPECT_CALL(sampler, GetExponentialBiased(16)).WillOnce(Return(2));
|
EXPECT_CALL(sampler, GetExponentialBiased(16)).WillOnce(Return(3));
|
||||||
EXPECT_FALSE(sampler.Sample());
|
EXPECT_FALSE(sampler.Sample());
|
||||||
EXPECT_FALSE(sampler.Sample());
|
EXPECT_FALSE(sampler.Sample());
|
||||||
|
|
||||||
|
@ -119,7 +119,7 @@ TEST(PeriodicSamplerBaseTest, Enable) {
|
||||||
EXPECT_CALL(sampler, period()).Times(2).WillRepeatedly(Return(16));
|
EXPECT_CALL(sampler, period()).Times(2).WillRepeatedly(Return(16));
|
||||||
EXPECT_CALL(sampler, GetExponentialBiased(16))
|
EXPECT_CALL(sampler, GetExponentialBiased(16))
|
||||||
.Times(2)
|
.Times(2)
|
||||||
.WillRepeatedly(Return(2));
|
.WillRepeatedly(Return(3));
|
||||||
|
|
||||||
EXPECT_FALSE(sampler.Sample());
|
EXPECT_FALSE(sampler.Sample());
|
||||||
EXPECT_FALSE(sampler.Sample());
|
EXPECT_FALSE(sampler.Sample());
|
||||||
|
|
Loading…
Reference in a new issue