Export of internal Abseil changes

-- e54b9c7bbb0c58475676c268e2e19c69f4bce48a by Jorg Brown <jorg@google.com>: Tweak ABSL_PREDICT_TRUE slightly, for better code on some platforms and/or optimization levels. "false || (x)" is more verbose than "!!(x)", but ultimately more efficient. For example, given this code: void InitIfNecessary() { if (ABSL_PREDICT_TRUE(NeedsInit())) { SlowInitIfNecessary(); } } Clang with default optimization level will produce: Before this CL After this CL InitIfNecessary: InitIfNecessary: push rbp push rbp mov rbp, rsp mov rbp, rsp call NeedsInit call NeedsInit xor al, -1 xor al, -1 test al, 1 test al, 1 jne .LBB2_1 jne .LBB3_1 jmp .LBB2_2 jmp .LBB3_2 .LBB2_1: .LBB3_1: call SlowInitIfNecessary call SlowInitIfNecessary .LBB2_2: .LBB3_2: pop rbp pop rbp ret ret PiperOrigin-RevId: 276401386 -- 0a3c4dfd8342bf2b1b11a87f1c662c883f73cab7 by Abseil Team <absl-team@google.com>: Fix comment nit: sem_open => sem_init. The code calls sem_init, not sem_open, to initialize an unnamed semaphore. (sem_open creates or opens a named semaphore.) PiperOrigin-RevId: 276344072 -- b36a664e9459057509a90e83d3482e1d3a4c44c7 by Abseil Team <absl-team@google.com>: Fix typo in flat_hash_map.h: exchaged -> exchanged PiperOrigin-RevId: 276295792 -- 7bbd8d18276eb110c8335743e35fceb662ddf3d6 by Samuel Benzaquen <sbenza@google.com>: Add assertions to verify use of iterators. PiperOrigin-RevId: 276283300 -- 677398a8ffcb1f59182cffe57a4fe7ff147a0404 by Laramie Leavitt <lar@google.com>: Migrate distribution_impl.h/cc to generate_real.h/cc. Combine the methods RandU64To<Float,Double> into a single method: GenerateRealFromBits(). Remove rejection sampling from absl::uniform_real_distribution. PiperOrigin-RevId: 276158675 -- c60c9d11d24b0c546329d998e78e15a84b3153f5 by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 276126962 -- 4c840cab6a8d86efa29b397cafaf7520eece68cc by Andy Soffer <asoffer@google.com>: Update CMakeLists.txt to address https://github.com/abseil/abseil-cpp/issues/365. This does not cover every platform, but it does at least address the first-order issue of assuming gcc implies x86. PiperOrigin-RevId: 276116253 -- 98da366e6b5d51afe5d7ac6722126aca23d85ee6 by Abseil Team <absl-team@google.com>: Internal change PiperOrigin-RevId: 276097452 GitOrigin-RevId: e54b9c7bbb0c58475676c268e2e19c69f4bce48a Change-Id: I02d84454bb71ab21ad3d39650acf6cc6e36f58d7
2019-10-23 19:35:39 -07:00 · 2019-10-23 19:35:39 -07:00 · 078b89b3c0
commit 078b89b3c0
parent 19b021cb3f
28 changed files with 739 additions and 370 deletions
--- a/absl/base/BUILD.bazel
+++ b/absl/base/BUILD.bazel
@ -557,6 +557,31 @@ cc_test(
    ],
 )

+cc_library(
+    name = "exponential_biased",
+    srcs = ["internal/exponential_biased.cc"],
+    hdrs = ["internal/exponential_biased.h"],
+    linkopts = ABSL_DEFAULT_LINKOPTS,
+    visibility = [
+        "//absl:__subpackages__",
+    ],
+    deps = [":core_headers"],
+)
+
+cc_test(
+    name = "exponential_biased_test",
+    size = "small",
+    srcs = ["internal/exponential_biased_test.cc"],
+    copts = ABSL_TEST_COPTS,
+    linkopts = ABSL_DEFAULT_LINKOPTS,
+    visibility = ["//visibility:private"],
+    deps = [
+        ":exponential_biased",
+        "//absl/strings",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 cc_library(
    name = "scoped_set_env",
    testonly = 1,
--- a/absl/base/CMakeLists.txt
+++ b/absl/base/CMakeLists.txt
@ -503,6 +503,32 @@ absl_cc_test(
    gtest_main
 )

+absl_cc_library(
+  NAME
+    exponential_biased
+  SRCS
+    "internal/exponential_biased.cc"
+  HDRS
+    "internal/exponential_biased.h"
+  COPTS
+    ${ABSL_DEFAULT_COPTS}
+  DEPS
+    absl::core_headers
+)
+
+absl_cc_test(
+  NAME
+    exponential_biased_test
+  SRCS
+    "internal/exponential_biased_test.cc"
+  COPTS
+    ${ABSL_TEST_COPTS}
+  DEPS
+    absl::exponential_biased
+    absl::strings
+    gmock_main
+)
+
 absl_cc_library(
  NAME
    scoped_set_env
--- a/absl/base/config.h
+++ b/absl/base/config.h
@ -307,7 +307,7 @@

 // ABSL_HAVE_SEMAPHORE_H
 //
-// Checks whether the platform supports the <semaphore.h> header and sem_open(3)
+// Checks whether the platform supports the <semaphore.h> header and sem_init(3)
 // family of functions as standardized in POSIX.1-2001.
 //
 // Note: While Apple provides <semaphore.h> for both iOS and macOS, it is
--- a/absl/base/internal/exponential_biased.cc
+++ b/absl/base/internal/exponential_biased.cc
@ -0,0 +1,84 @@
+// Copyright 2019 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/internal/exponential_biased.h"
+
+#include <stdint.h>
+
+#include <atomic>
+#include <cmath>
+#include <limits>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+
+namespace absl {
+namespace base_internal {
+
+// The algorithm generates a random number between 0 and 1 and applies the
+// inverse cumulative distribution function for an exponential. Specifically:
+// Let m be the inverse of the sample period, then the probability
+// distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
+int64_t ExponentialBiased::Get(int64_t mean) {
+  if (ABSL_PREDICT_FALSE(!initialized_)) {
+    Initialize();
+  }
+
+  uint64_t rng = NextRandom(rng_);
+  rng_ = rng;
+
+  // Take the top 26 bits as the random number
+  // (This plus the 1<<58 sampling bound give a max possible step of
+  // 5194297183973780480 bytes.)
+  // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
+  // under piii debug for some binaries.
+  double q = static_cast<uint32_t>(rng >> (kPrngNumBits - 26)) + 1.0;
+  // Put the computed p-value through the CDF of a geometric.
+  double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean);
+  // Very large values of interval overflow int64_t. To avoid that, we will cheat
+  // and clamp any huge values to (int64_t max)/2. This is a potential source of
+  // bias, but the mean would need to be such a large value that it's not likely
+  // to come up. For example, with a mean of 1e18, the probability of hitting
+  // this condition is about 1/1000. For a mean of 1e17, standard calculators
+  // claim that this event won't happen.
+  if (interval > static_cast<double>(std::numeric_limits<int64_t>::max() / 2)) {
+    return std::numeric_limits<int64_t>::max() / 2;
+  }
+
+  return static_cast<int64_t>(interval);
+}
+
+void ExponentialBiased::Initialize() {
+  // We don't get well distributed numbers from `this` so we call NextRandom() a
+  // bunch to mush the bits around. We use a global_rand to handle the case
+  // where the same thread (by memory address) gets created and destroyed
+  // repeatedly.
+  ABSL_CONST_INIT static std::atomic<uint32_t> global_rand(0);
+  uint64_t r = reinterpret_cast<uint64_t>(this) +
+               global_rand.fetch_add(1, std::memory_order_relaxed);
+  for (int i = 0; i < 20; ++i) {
+    r = NextRandom(r);
+  }
+  rng_ = r;
+  initialized_ = true;
+}
+
+}  // namespace base_internal
+}  // namespace absl
--- a/absl/base/internal/exponential_biased.h
+++ b/absl/base/internal/exponential_biased.h
@ -0,0 +1,77 @@
+// Copyright 2019 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_
+#define ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_
+
+#include <stdint.h>
+
+namespace absl {
+namespace base_internal {
+
+// ExponentialBiased provides a small and fast random number generator for a
+// rounded exponential distribution. This generator doesn't requires very little
+// state doesn't impose synchronization overhead, which makes it useful in some
+// specialized scenarios.
+//
+// For the generated variable X, X ~ floor(Exponential(1/mean)). The floor
+// operation introduces a small amount of bias, but the distribution is useful
+// to generate a wait time. That is, if an operation is supposed to happen on
+// average to 1/mean events, then the generated variable X will describe how
+// many events to skip before performing the operation and computing a new X.
+//
+// The mathematically precise distribution to use for integer wait times is a
+// Geometric distribution, but a Geometric distribution takes slightly more time
+// to compute and when the mean is large (say, 100+), the Geometric distribution
+// is hard to distinguish from the result of ExponentialBiased.
+//
+// This class is thread-compatible.
+class ExponentialBiased {
+ public:
+  // The number of bits set by NextRandom.
+  static constexpr int kPrngNumBits = 48;
+
+  // Generates the floor of an exponentially distributed random variable by
+  // rounding the value down to the nearest integer. The result will be in the
+  // range [0, int64_t max / 2].
+  int64_t Get(int64_t mean);
+
+  // Computes a random number in the range [0, 1<<(kPrngNumBits+1) - 1]
+  //
+  // This is public to enable testing.
+  static uint64_t NextRandom(uint64_t rnd);
+
+ private:
+  void Initialize();
+
+  uint64_t rng_{0};
+  bool initialized_{false};
+};
+
+// Returns the next prng value.
+// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
+// This is the lrand64 generator.
+inline uint64_t ExponentialBiased::NextRandom(uint64_t rnd) {
+  const uint64_t prng_mult = uint64_t{0x5DEECE66D};
+  const uint64_t prng_add = 0xB;
+  const uint64_t prng_mod_power = 48;
+  const uint64_t prng_mod_mask =
+      ~((~static_cast<uint64_t>(0)) << prng_mod_power);
+  return (prng_mult * rnd + prng_add) & prng_mod_mask;
+}
+
+}  // namespace base_internal
+}  // namespace absl
+
+#endif  // ABSL_BASE_INTERNAL_EXPONENTIAL_BIASED_H_
--- a/absl/base/internal/exponential_biased_test.cc
+++ b/absl/base/internal/exponential_biased_test.cc
@ -0,0 +1,168 @@
+// Copyright 2019 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/internal/exponential_biased.h"
+
+#include <stddef.h>
+
+#include <cmath>
+#include <cstdint>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+
+using ::testing::Ge;
+
+namespace absl {
+namespace base_internal {
+
+MATCHER_P2(IsBetween, a, b,
+           absl::StrCat(std::string(negation ? "isn't" : "is"), " between ", a,
+                        " and ", b)) {
+  return a <= arg && arg <= b;
+}
+
+// Tests of the quality of the random numbers generated
+// This uses the Anderson Darling test for uniformity.
+// See "Evaluating the Anderson-Darling Distribution" by Marsaglia
+// for details.
+
+// Short cut version of ADinf(z), z>0 (from Marsaglia)
+// This returns the p-value for Anderson Darling statistic in
+// the limit as n-> infinity. For finite n, apply the error fix below.
+double AndersonDarlingInf(double z) {
+  if (z < 2) {
+    return exp(-1.2337141 / z) / sqrt(z) *
+           (2.00012 +
+            (0.247105 -
+             (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * z) * z) * z) *
+                 z) *
+                z);
+  }
+  return exp(
+      -exp(1.0776 -
+           (2.30695 -
+            (0.43424 - (0.082433 - (0.008056 - 0.0003146 * z) * z) * z) * z) *
+               z));
+}
+
+// Corrects the approximation error in AndersonDarlingInf for small values of n
+// Add this to AndersonDarlingInf to get a better approximation
+// (from Marsaglia)
+double AndersonDarlingErrFix(int n, double x) {
+  if (x > 0.8) {
+    return (-130.2137 +
+            (745.2337 -
+             (1705.091 - (1950.646 - (1116.360 - 255.7844 * x) * x) * x) * x) *
+                x) /
+           n;
+  }
+  double cutoff = 0.01265 + 0.1757 / n;
+  if (x < cutoff) {
+    double t = x / cutoff;
+    t = sqrt(t) * (1 - t) * (49 * t - 102);
+    return t * (0.0037 / (n * n) + 0.00078 / n + 0.00006) / n;
+  } else {
+    double t = (x - cutoff) / (0.8 - cutoff);
+    t = -0.00022633 +
+        (6.54034 - (14.6538 - (14.458 - (8.259 - 1.91864 * t) * t) * t) * t) *
+            t;
+    return t * (0.04213 + 0.01365 / n) / n;
+  }
+}
+
+// Returns the AndersonDarling p-value given n and the value of the statistic
+double AndersonDarlingPValue(int n, double z) {
+  double ad = AndersonDarlingInf(z);
+  double errfix = AndersonDarlingErrFix(n, ad);
+  return ad + errfix;
+}
+
+double AndersonDarlingStatistic(const std::vector<double>& random_sample) {
+  int n = random_sample.size();
+  double ad_sum = 0;
+  for (int i = 0; i < n; i++) {
+    ad_sum += (2 * i + 1) *
+              std::log(random_sample[i] * (1 - random_sample[n - 1 - i]));
+  }
+  double ad_statistic = -n - 1 / static_cast<double>(n) * ad_sum;
+  return ad_statistic;
+}
+
+// Tests if the array of doubles is uniformly distributed.
+// Returns the p-value of the Anderson Darling Statistic
+// for the given set of sorted random doubles
+// See "Evaluating the Anderson-Darling Distribution" by
+// Marsaglia and Marsaglia for details.
+double AndersonDarlingTest(const std::vector<double>& random_sample) {
+  double ad_statistic = AndersonDarlingStatistic(random_sample);
+  double p = AndersonDarlingPValue(random_sample.size(), ad_statistic);
+  return p;
+}
+
+// Testing that NextRandom generates uniform random numbers. Applies the
+// Anderson-Darling test for uniformity
+TEST(ExponentialBiasedTest, TestNextRandom) {
+  for (auto n : std::vector<int>({
+           10,  // Check short-range correlation
+           100, 1000,
+           10000  // Make sure there's no systemic error
+       })) {
+    uint64_t x = 1;
+    // This assumes that the prng returns 48 bit numbers
+    uint64_t max_prng_value = static_cast<uint64_t>(1) << 48;
+    // Initialize.
+    for (int i = 1; i <= 20; i++) {
+      x = ExponentialBiased::NextRandom(x);
+    }
+    std::vector<uint64_t> int_random_sample(n);
+    // Collect samples
+    for (int i = 0; i < n; i++) {
+      int_random_sample[i] = x;
+      x = ExponentialBiased::NextRandom(x);
+    }
+    // First sort them...
+    std::sort(int_random_sample.begin(), int_random_sample.end());
+    std::vector<double> random_sample(n);
+    // Convert them to uniform randoms (in the range [0,1])
+    for (int i = 0; i < n; i++) {
+      random_sample[i] =
+          static_cast<double>(int_random_sample[i]) / max_prng_value;
+    }
+    // Now compute the Anderson-Darling statistic
+    double ad_pvalue = AndersonDarlingTest(random_sample);
+    EXPECT_GT(std::min(ad_pvalue, 1 - ad_pvalue), 0.0001)
+        << "prng is not uniform: n = " << n << " p = " << ad_pvalue;
+  }
+}
+
+// The generator needs to be available as a thread_local and as a static
+// variable.
+TEST(ExponentialBiasedTest, InitializationModes) {
+  ABSL_CONST_INIT static ExponentialBiased eb_static;
+  EXPECT_THAT(eb_static.Get(2), Ge(0));
+
+#if ABSL_HAVE_THREAD_LOCAL
+  thread_local ExponentialBiased eb_thread;
+  EXPECT_THAT(eb_thread.Get(2), Ge(0));
+#endif
+
+  ExponentialBiased eb_stack;
+  EXPECT_THAT(eb_stack.Get(2), Ge(0));
+}
+
+}  // namespace base_internal
+}  // namespace absl
--- a/absl/base/optimization.h
+++ b/absl/base/optimization.h
@ -172,7 +172,7 @@
 #if ABSL_HAVE_BUILTIN(__builtin_expect) || \
    (defined(__GNUC__) && !defined(__clang__))
 #define ABSL_PREDICT_FALSE(x) (__builtin_expect(x, 0))
-#define ABSL_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define ABSL_PREDICT_TRUE(x) (__builtin_expect(false || (x), true))
 #else
 #define ABSL_PREDICT_FALSE(x) (x)
 #define ABSL_PREDICT_TRUE(x) (x)