Add 'third_party/abseil_cpp/' from commit '768eb2ca28'

git-subtree-dir: third_party/abseil_cpp
git-subtree-mainline: ffb2ae54be
git-subtree-split: 768eb2ca28
This commit is contained in:
Vincent Ambo 2020-05-20 02:32:24 +01:00
commit fc8dc48020
1276 changed files with 208196 additions and 0 deletions

View file

@ -0,0 +1,156 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Character Map Class
//
// A fast, bit-vector map for 8-bit unsigned characters.
// This class is useful for non-character purposes as well.
#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "absl/base/macros.h"
#include "absl/base/port.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
class Charmap {
public:
constexpr Charmap() : m_() {}
// Initializes with a given char*. Note that NUL is not treated as
// a terminator, but rather a char to be flicked.
Charmap(const char* str, int len) : m_() {
while (len--) SetChar(*str++);
}
// Initializes with a given char*. NUL is treated as a terminator
// and will not be in the charmap.
explicit Charmap(const char* str) : m_() {
while (*str) SetChar(*str++);
}
constexpr bool contains(unsigned char c) const {
return (m_[c / 64] >> (c % 64)) & 0x1;
}
// Returns true if and only if a character exists in both maps.
bool IntersectsWith(const Charmap& c) const {
for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
if ((m_[i] & c.m_[i]) != 0) return true;
}
return false;
}
bool IsZero() const {
for (uint64_t c : m_) {
if (c != 0) return false;
}
return true;
}
// Containing only a single specified char.
static constexpr Charmap Char(char x) {
return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
}
// Containing all the chars in the C-string 's'.
// Note that this is expensively recursive because of the C++11 constexpr
// formulation. Use only in constexpr initializers.
static constexpr Charmap FromString(const char* s) {
return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
}
// Containing all the chars in the closed interval [lo,hi].
static constexpr Charmap Range(char lo, char hi) {
return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
}
friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
a.m_[3] & b.m_[3]);
}
friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
a.m_[3] | b.m_[3]);
}
friend constexpr Charmap operator~(const Charmap& a) {
return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
}
private:
constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
: m_{b0, b1, b2, b3} {}
static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi,
uint64_t word) {
return OpenRangeFromZeroForWord(hi + 1, word) &
~OpenRangeFromZeroForWord(lo, word);
}
// All the chars in the specified word of the range [0, upper).
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
uint64_t word) {
return (upper <= 64 * word)
? 0
: (upper >= 64 * (word + 1))
? ~static_cast<uint64_t>(0)
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
}
static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) {
return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0;
}
private:
void SetChar(unsigned char c) {
m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
}
uint64_t m_[4];
};
// Mirror the char-classifying predicates in <cctype>
constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
constexpr Charmap XDigitCharmap() {
return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
}
constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
constexpr Charmap CntrlCharmap() {
return Charmap::Range(0, 0x7f) & ~PrintCharmap();
}
constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_

View file

@ -0,0 +1,61 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cstdint>
#include "benchmark/benchmark.h"
namespace {
absl::strings_internal::Charmap MakeBenchmarkMap() {
absl::strings_internal::Charmap m;
uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc};
for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL);
for (uint32_t i = 0; i < 256; ++i) {
if ((x[i / 32] >> (i % 32)) & 1)
m = m | absl::strings_internal::Charmap::Char(i);
}
return m;
}
// Micro-benchmark for Charmap::contains.
void BM_Contains(benchmark::State& state) {
// Loop-body replicated 10 times to increase time per iteration.
// Argument continuously changed to avoid generating common subexpressions.
const absl::strings_internal::Charmap benchmark_map = MakeBenchmarkMap();
unsigned char c = 0;
int ops = 0;
for (auto _ : state) {
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
}
benchmark::DoNotOptimize(ops);
}
BENCHMARK(BM_Contains);
// We don't bother benchmarking Charmap::IsZero or Charmap::IntersectsWith;
// their running time is data-dependent and it is not worth characterizing
// "typical" data.
} // namespace

View file

@ -0,0 +1,172 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/char_map.h"
#include <cctype>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
constexpr absl::strings_internal::Charmap everything_map =
~absl::strings_internal::Charmap();
constexpr absl::strings_internal::Charmap nothing_map{};
TEST(Charmap, AllTests) {
const absl::strings_internal::Charmap also_nothing_map("", 0);
ASSERT_TRUE(everything_map.contains('\0'));
ASSERT_TRUE(!nothing_map.contains('\0'));
ASSERT_TRUE(!also_nothing_map.contains('\0'));
for (unsigned char ch = 1; ch != 0; ++ch) {
ASSERT_TRUE(everything_map.contains(ch));
ASSERT_TRUE(!nothing_map.contains(ch));
ASSERT_TRUE(!also_nothing_map.contains(ch));
}
const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
ASSERT_TRUE(symbols.contains('&'));
ASSERT_TRUE(symbols.contains('@'));
ASSERT_TRUE(symbols.contains('#'));
ASSERT_TRUE(symbols.contains('^'));
ASSERT_TRUE(!symbols.contains('!'));
ASSERT_TRUE(!symbols.contains('?'));
int cnt = 0;
for (unsigned char ch = 1; ch != 0; ++ch)
cnt += symbols.contains(ch);
ASSERT_EQ(cnt, 4);
const absl::strings_internal::Charmap lets("^abcde", 3);
const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
const absl::strings_internal::Charmap lets3("fghij\0klmnop");
ASSERT_TRUE(lets2.contains('k'));
ASSERT_TRUE(!lets3.contains('k'));
ASSERT_TRUE(symbols.IntersectsWith(lets));
ASSERT_TRUE(!lets2.IntersectsWith(lets));
ASSERT_TRUE(lets.IntersectsWith(symbols));
ASSERT_TRUE(!lets.IntersectsWith(lets2));
ASSERT_TRUE(nothing_map.IsZero());
ASSERT_TRUE(!lets.IsZero());
}
namespace {
std::string Members(const absl::strings_internal::Charmap& m) {
std::string r;
for (size_t i = 0; i < 256; ++i)
if (m.contains(i)) r.push_back(i);
return r;
}
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
// Don't depend on lo<hi. Just increment until lo==hi.
std::string s;
while (true) {
s.push_back(lo);
if (lo == hi) break;
++lo;
}
return s;
}
} // namespace
TEST(Charmap, Constexpr) {
constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
EXPECT_THAT(Members(kEmpty), "");
constexpr absl::strings_internal::Charmap kA =
absl::strings_internal::Charmap::Char('A');
EXPECT_THAT(Members(kA), "A");
constexpr absl::strings_internal::Charmap kAZ =
absl::strings_internal::Charmap::Range('A', 'Z');
EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
constexpr absl::strings_internal::Charmap kIdentifier =
absl::strings_internal::Charmap::Range('0', '9') |
absl::strings_internal::Charmap::Range('A', 'Z') |
absl::strings_internal::Charmap::Range('a', 'z') |
absl::strings_internal::Charmap::Char('_');
EXPECT_THAT(Members(kIdentifier),
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"abcdefghijklmnopqrstuvwxyz");
constexpr absl::strings_internal::Charmap kAll = everything_map;
for (size_t i = 0; i < 256; ++i) {
EXPECT_TRUE(kAll.contains(i)) << i;
}
constexpr absl::strings_internal::Charmap kHello =
absl::strings_internal::Charmap::FromString("Hello, world!");
EXPECT_THAT(Members(kHello), " !,Hdelorw");
// test negation and intersection
constexpr absl::strings_internal::Charmap kABC =
absl::strings_internal::Charmap::Range('A', 'Z') &
~absl::strings_internal::Charmap::Range('D', 'Z');
EXPECT_THAT(Members(kABC), "ABC");
}
TEST(Charmap, Range) {
// Exhaustive testing takes too long, so test some of the boundaries that
// are perhaps going to cause trouble.
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
16, 17, 30, 31, 32, 33, 63, 64, 65,
127, 128, 129, 223, 224, 225, 254, 255};
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
SCOPED_TRACE(*lo);
for (auto hi = lo; hi != poi.end(); ++hi) {
SCOPED_TRACE(*hi);
EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
ClosedRangeString(*lo, *hi));
}
}
}
bool AsBool(int x) { return static_cast<bool>(x); }
TEST(CharmapCtype, Match) {
for (int c = 0; c < 256; ++c) {
SCOPED_TRACE(c);
SCOPED_TRACE(static_cast<char>(c));
EXPECT_EQ(AsBool(std::isupper(c)),
absl::strings_internal::UpperCharmap().contains(c));
EXPECT_EQ(AsBool(std::islower(c)),
absl::strings_internal::LowerCharmap().contains(c));
EXPECT_EQ(AsBool(std::isdigit(c)),
absl::strings_internal::DigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalpha(c)),
absl::strings_internal::AlphaCharmap().contains(c));
EXPECT_EQ(AsBool(std::isalnum(c)),
absl::strings_internal::AlnumCharmap().contains(c));
EXPECT_EQ(AsBool(std::isxdigit(c)),
absl::strings_internal::XDigitCharmap().contains(c));
EXPECT_EQ(AsBool(std::isprint(c)),
absl::strings_internal::PrintCharmap().contains(c));
EXPECT_EQ(AsBool(std::isspace(c)),
absl::strings_internal::SpaceCharmap().contains(c));
EXPECT_EQ(AsBool(std::iscntrl(c)),
absl::strings_internal::CntrlCharmap().contains(c));
EXPECT_EQ(AsBool(std::isblank(c)),
absl::strings_internal::BlankCharmap().contains(c));
EXPECT_EQ(AsBool(std::isgraph(c)),
absl::strings_internal::GraphCharmap().contains(c));
EXPECT_EQ(AsBool(std::ispunct(c)),
absl::strings_internal::PunctCharmap().contains(c));
}
}
} // namespace

View file

@ -0,0 +1,359 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <algorithm>
#include <cassert>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// Table containing some large powers of 5, for fast computation.
// Constant step size for entries in the kLargePowersOfFive table. Each entry
// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep
// (or 5**27).
//
// In other words, the Nth entry in the table is 5**(27*N).
//
// 5**27 is the largest power of 5 that fits in 64 bits.
constexpr int kLargePowerOfFiveStep = 27;
// The largest legal index into the kLargePowersOfFive table.
//
// In other words, the largest precomputed power of 5 is 5**(27*20).
constexpr int kLargestPowerOfFiveIndex = 20;
// Table of powers of (5**27), up to (5**27)**20 == 5**540.
//
// Used to generate large powers of 5 while limiting the number of repeated
// multiplications required.
//
// clang-format off
const uint32_t kLargePowersOfFive[] = {
// 5**27 (i=1), start=0, end=2
0xfa10079dU, 0x6765c793U,
// 5**54 (i=2), start=2, end=6
0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U,
// 5**81 (i=3), start=6, end=12
0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U,
// 5**108 (i=4), start=12, end=20
0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU,
0x20d3846fU, 0x06d00f73U,
// 5**135 (i=5), start=20, end=30
0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U,
0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU,
// 5**162 (i=6), start=30, end=42
0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU,
0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU,
// 5**189 (i=7), start=42, end=56
0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U,
0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U,
0x94151217U, 0x0072e9f7U,
// 5**216 (i=8), start=56, end=72
0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU,
0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU,
0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U,
// 5**243 (i=9), start=72, end=90
0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U,
0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U,
0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U,
// 5**270 (i=10), start=90, end=110
0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U,
0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U,
0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U,
0x0d39e796U, 0x00079250U,
// 5**297 (i=11), start=110, end=132
0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U,
0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U,
0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U,
0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U,
// 5**324 (i=12), start=132, end=156
0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U,
0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU,
0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U,
0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U,
// 5**351 (i=13), start=156, end=182
0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU,
0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U,
0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U,
0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU,
0x859a4940U, 0x00007fb6U,
// 5**378 (i=14), start=182, end=210
0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U,
0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU,
0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU,
0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U,
0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U,
// 5**405 (i=15), start=210, end=240
0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U,
0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U,
0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U,
0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU,
0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U,
// 5**432 (i=16), start=240, end=272
0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU,
0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U,
0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U,
0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU,
0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U,
0x3364ea62U, 0x0000086aU,
// 5**459 (i=17), start=272, end=306
0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU,
0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U,
0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U,
0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U,
0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU,
0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U,
// 5**486 (i=18), start=306, end=342
0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU,
0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U,
0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU,
0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U,
0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU,
0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU,
// 5**513 (i=19), start=342, end=380
0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U,
0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU,
0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU,
0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U,
0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U,
0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU,
0xf0046d27U, 0x0000008dU,
// 5**540 (i=20), start=380, end=420
0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU,
0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U,
0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U,
0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U,
0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U,
0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU,
0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U,
};
// clang-format on
// Returns a pointer to the big integer data for (5**27)**i. i must be
// between 1 and 20, inclusive.
const uint32_t* LargePowerOfFiveData(int i) {
return kLargePowersOfFive + i * (i - 1);
}
// Returns the size of the big integer data for (5**27)**i, in words. i must be
// between 1 and 20, inclusive.
int LargePowerOfFiveSize(int i) { return 2 * i; }
} // namespace
ABSL_DLL const uint32_t kFiveToNth[14] = {
1, 5, 25, 125, 625, 3125, 15625,
78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125,
};
ABSL_DLL const uint32_t kTenToNth[10] = {
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000,
};
template <int max_words>
int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp,
int significant_digits) {
SetToZero();
assert(fp.type == FloatType::kNumber);
if (fp.subrange_begin == nullptr) {
// We already exactly parsed the mantissa, so no more work is necessary.
words_[0] = fp.mantissa & 0xffffffffu;
words_[1] = fp.mantissa >> 32;
if (words_[1]) {
size_ = 2;
} else if (words_[0]) {
size_ = 1;
}
return fp.exponent;
}
int exponent_adjust =
ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits);
return fp.literal_exponent + exponent_adjust;
}
template <int max_words>
int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end,
int significant_digits) {
assert(significant_digits <= Digits10() + 1);
SetToZero();
bool after_decimal_point = false;
// Discard any leading zeroes before the decimal point
while (begin < end && *begin == '0') {
++begin;
}
int dropped_digits = 0;
// Discard any trailing zeroes. These may or may not be after the decimal
// point.
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
if (begin < end && *std::prev(end) == '.') {
// If the string ends in '.', either before or after dropping zeroes, then
// drop the decimal point and look for more digits to drop.
dropped_digits = 0;
--end;
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
} else if (dropped_digits) {
// We dropped digits, and aren't sure if they're before or after the decimal
// point. Figure that out now.
const char* dp = std::find(begin, end, '.');
if (dp != end) {
// The dropped trailing digits were after the decimal point, so don't
// count them.
dropped_digits = 0;
}
}
// Any non-fraction digits we dropped need to be accounted for in our exponent
// adjustment.
int exponent_adjust = dropped_digits;
uint32_t queued = 0;
int digits_queued = 0;
for (; begin != end && significant_digits > 0; ++begin) {
if (*begin == '.') {
after_decimal_point = true;
continue;
}
if (after_decimal_point) {
// For each fractional digit we emit in our parsed integer, adjust our
// decimal exponent to compensate.
--exponent_adjust;
}
int digit = (*begin - '0');
--significant_digits;
if (significant_digits == 0 && std::next(begin) != end &&
(digit == 0 || digit == 5)) {
// If this is the very last significant digit, but insignificant digits
// remain, we know that the last of those remaining significant digits is
// nonzero. (If it wasn't, we would have stripped it before we got here.)
// So if this final digit is a 0 or 5, adjust it upward by 1.
//
// This adjustment is what allows incredibly large mantissas ending in
// 500000...000000000001 to correctly round up, rather than to nearest.
++digit;
}
queued = 10 * queued + digit;
++digits_queued;
if (digits_queued == kMaxSmallPowerOfTen) {
MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]);
AddWithCarry(0, queued);
queued = digits_queued = 0;
}
}
// Encode any remaining digits.
if (digits_queued) {
MultiplyBy(kTenToNth[digits_queued]);
AddWithCarry(0, queued);
}
// If any insignificant digits remain, we will drop them. But if we have not
// yet read the decimal point, then we have to adjust the exponent to account
// for the dropped digits.
if (begin < end && !after_decimal_point) {
// This call to std::find will result in a pointer either to the decimal
// point, or to the end of our buffer if there was none.
//
// Either way, [begin, decimal_point) will contain the set of dropped digits
// that require an exponent adjustment.
const char* decimal_point = std::find(begin, end, '.');
exponent_adjust += (decimal_point - begin);
}
return exponent_adjust;
}
template <int max_words>
/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth(
int n) {
BigUnsigned answer(1u);
// Seed from the table of large powers, if possible.
bool first_pass = true;
while (n >= kLargePowerOfFiveStep) {
int big_power =
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
if (first_pass) {
// just copy, rather than multiplying by 1
std::copy(
LargePowerOfFiveData(big_power),
LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power),
answer.words_);
answer.size_ = LargePowerOfFiveSize(big_power);
first_pass = false;
} else {
answer.MultiplyBy(LargePowerOfFiveSize(big_power),
LargePowerOfFiveData(big_power));
}
n -= kLargePowerOfFiveStep * big_power;
}
answer.MultiplyByFiveToTheNth(n);
return answer;
}
template <int max_words>
void BigUnsigned<max_words>::MultiplyStep(int original_size,
const uint32_t* other_words,
int other_size, int step) {
int this_i = std::min(original_size - 1, step);
int other_i = step - this_i;
uint64_t this_word = 0;
uint64_t carry = 0;
for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) {
uint64_t product = words_[this_i];
product *= other_words[other_i];
this_word += product;
carry += (this_word >> 32);
this_word &= 0xffffffff;
}
AddWithCarry(step + 1, carry);
words_[step] = this_word & 0xffffffff;
if (this_word > 0 && size_ <= step) {
size_ = step + 1;
}
}
template <int max_words>
std::string BigUnsigned<max_words>::ToString() const {
BigUnsigned<max_words> copy = *this;
std::string result;
// Build result in reverse order
while (copy.size() > 0) {
int next_digit = copy.DivMod<10>();
result.push_back('0' + next_digit);
}
if (result.empty()) {
result.push_back('0');
}
std::reverse(result.begin(), result.end());
return result;
}
template class BigUnsigned<4>;
template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,423 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/config.h"
#include "absl/strings/ascii.h"
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The largest power that 5 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfFive = 13;
// The largest power that 10 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfTen = 9;
ABSL_DLL extern const uint32_t
kFiveToNth[kMaxSmallPowerOfFive + 1];
ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
// Large, fixed-width unsigned integer.
//
// Exact rounding for decimal-to-binary floating point conversion requires very
// large integer math, but a design goal of absl::from_chars is to avoid
// allocating memory. The integer precision needed for decimal-to-binary
// conversions is large but bounded, so a huge fixed-width integer class
// suffices.
//
// This is an intentionally limited big integer class. Only needed operations
// are implemented. All storage lives in an array data member, and all
// arithmetic is done in-place, to avoid requiring separate storage for operand
// and result.
//
// This is an internal class. Some methods live in the .cc file, and are
// instantiated only for the values of max_words we need.
template <int max_words>
class BigUnsigned {
public:
static_assert(max_words == 4 || max_words == 84,
"unsupported max_words value");
BigUnsigned() : size_(0), words_{} {}
explicit constexpr BigUnsigned(uint64_t v)
: size_((v >> 32) ? 2 : v ? 1 : 0),
words_{static_cast<uint32_t>(v & 0xffffffffu),
static_cast<uint32_t>(v >> 32)} {}
// Constructs a BigUnsigned from the given string_view containing a decimal
// value. If the input string is not a decimal integer, constructs a 0
// instead.
explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
// Check for valid input, returning a 0 otherwise. This is reasonable
// behavior only because this constructor is for unit tests.
if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
sv.empty()) {
return;
}
int exponent_adjust =
ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
if (exponent_adjust > 0) {
MultiplyByTenToTheNth(exponent_adjust);
}
}
// Loads the mantissa value of a previously-parsed float.
//
// Returns the associated decimal exponent. The value of the parsed float is
// exactly *this * 10**exponent.
int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
// Returns the number of decimal digits of precision this type provides. All
// numbers with this many decimal digits or fewer are representable by this
// type.
//
// Analagous to std::numeric_limits<BigUnsigned>::digits10.
static constexpr int Digits10() {
// 9975007/1035508 is very slightly less than log10(2**32).
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
}
// Shifts left by the given number of bits.
void ShiftLeft(int count) {
if (count > 0) {
const int word_shift = count / 32;
if (word_shift >= max_words) {
SetToZero();
return;
}
size_ = (std::min)(size_ + word_shift, max_words);
count %= 32;
if (count == 0) {
std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
} else {
for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
words_[i] = (words_[i - word_shift] << count) |
(words_[i - word_shift - 1] >> (32 - count));
}
words_[word_shift] = words_[0] << count;
// Grow size_ if necessary.
if (size_ < max_words && words_[size_]) {
++size_;
}
}
std::fill(words_, words_ + word_shift, 0u);
}
}
// Multiplies by v in-place.
void MultiplyBy(uint32_t v) {
if (size_ == 0 || v == 1) {
return;
}
if (v == 0) {
SetToZero();
return;
}
const uint64_t factor = v;
uint64_t window = 0;
for (int i = 0; i < size_; ++i) {
window += factor * words_[i];
words_[i] = window & 0xffffffff;
window >>= 32;
}
// If carry bits remain and there's space for them, grow size_.
if (window && size_ < max_words) {
words_[size_] = window & 0xffffffff;
++size_;
}
}
void MultiplyBy(uint64_t v) {
uint32_t words[2];
words[0] = static_cast<uint32_t>(v);
words[1] = static_cast<uint32_t>(v >> 32);
if (words[1] == 0) {
MultiplyBy(words[0]);
} else {
MultiplyBy(2, words);
}
}
// Multiplies in place by 5 to the power of n. n must be non-negative.
void MultiplyByFiveToTheNth(int n) {
while (n >= kMaxSmallPowerOfFive) {
MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
n -= kMaxSmallPowerOfFive;
}
if (n > 0) {
MultiplyBy(kFiveToNth[n]);
}
}
// Multiplies in place by 10 to the power of n. n must be non-negative.
void MultiplyByTenToTheNth(int n) {
if (n > kMaxSmallPowerOfTen) {
// For large n, raise to a power of 5, then shift left by the same amount.
// (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
MultiplyByFiveToTheNth(n);
ShiftLeft(n);
} else if (n > 0) {
// We can do this more quickly for very small N by using a single
// multiplication.
MultiplyBy(kTenToNth[n]);
}
}
// Returns the value of 5**n, for non-negative n. This implementation uses
// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
// MultiplyByFiveToTheNth().
static BigUnsigned FiveToTheNth(int n);
// Multiplies by another BigUnsigned, in-place.
template <int M>
void MultiplyBy(const BigUnsigned<M>& other) {
MultiplyBy(other.size(), other.words());
}
void SetToZero() {
std::fill(words_, words_ + size_, 0u);
size_ = 0;
}
// Returns the value of the nth word of this BigUnsigned. This is
// range-checked, and returns 0 on out-of-bounds accesses.
uint32_t GetWord(int index) const {
if (index < 0 || index >= size_) {
return 0;
}
return words_[index];
}
// Returns this integer as a decimal string. This is not used in the decimal-
// to-binary conversion; it is intended to aid in testing.
std::string ToString() const;
int size() const { return size_; }
const uint32_t* words() const { return words_; }
private:
// Reads the number between [begin, end), possibly containing a decimal point,
// into this BigUnsigned.
//
// Callers are required to ensure [begin, end) contains a valid number, with
// one or more decimal digits and at most one decimal point. This routine
// will behave unpredictably if these preconditions are not met.
//
// Only the first `significant_digits` digits are read. Digits beyond this
// limit are "sticky": If the final significant digit is 0 or 5, and if any
// dropped digit is nonzero, then that final significant digit is adjusted up
// to 1 or 6. This adjustment allows for precise rounding.
//
// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
// account for the decimal point and for dropped significant digits. After
// this function returns,
// actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
int ReadDigits(const char* begin, const char* end, int significant_digits);
// Performs a step of big integer multiplication. This computes the full
// (64-bit-wide) values that should be added at the given index (step), and
// adds to that location in-place.
//
// Because our math all occurs in place, we must multiply starting from the
// highest word working downward. (This is a bit more expensive due to the
// extra carries involved.)
//
// This must be called in steps, for each word to be calculated, starting from
// the high end and working down to 0. The first value of `step` should be
// `std::min(original_size + other.size_ - 2, max_words - 1)`.
// The reason for this expression is that multiplying the i'th word from one
// multiplicand and the j'th word of another multiplicand creates a
// two-word-wide value to be stored at the (i+j)'th element. The highest
// word indices we will access are `original_size - 1` from this object, and
// `other.size_ - 1` from our operand. Therefore,
// `original_size + other.size_ - 2` is the first step we should calculate,
// but limited on an upper bound by max_words.
// Working from high-to-low ensures that we do not overwrite the portions of
// the initial value of *this which are still needed for later steps.
//
// Once called with step == 0, *this contains the result of the
// multiplication.
//
// `original_size` is the size_ of *this before the first call to
// MultiplyStep(). `other_words` and `other_size` are the contents of our
// operand. `step` is the step to perform, as described above.
void MultiplyStep(int original_size, const uint32_t* other_words,
int other_size, int step);
void MultiplyBy(int other_size, const uint32_t* other_words) {
const int original_size = size_;
const int first_step =
(std::min)(original_size + other_size - 2, max_words - 1);
for (int step = first_step; step >= 0; --step) {
MultiplyStep(original_size, other_words, other_size, step);
}
}
// Adds a 32-bit value to the index'th word, with carry.
void AddWithCarry(int index, uint32_t value) {
if (value) {
while (index < max_words && value > 0) {
words_[index] += value;
// carry if we overflowed in this word:
if (value > words_[index]) {
value = 1;
++index;
} else {
value = 0;
}
}
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
void AddWithCarry(int index, uint64_t value) {
if (value && index < max_words) {
uint32_t high = value >> 32;
uint32_t low = value & 0xffffffff;
words_[index] += low;
if (words_[index] < low) {
++high;
if (high == 0) {
// Carry from the low word caused our high word to overflow.
// Short circuit here to do the right thing.
AddWithCarry(index + 2, static_cast<uint32_t>(1));
return;
}
}
if (high > 0) {
AddWithCarry(index + 1, high);
} else {
// Normally 32-bit AddWithCarry() sets size_, but since we don't call
// it when `high` is 0, do it ourselves here.
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
}
// Divide this in place by a constant divisor. Returns the remainder of the
// division.
template <uint32_t divisor>
uint32_t DivMod() {
uint64_t accumulator = 0;
for (int i = size_ - 1; i >= 0; --i) {
accumulator <<= 32;
accumulator += words_[i];
// accumulator / divisor will never overflow an int32_t in this loop
words_[i] = static_cast<uint32_t>(accumulator / divisor);
accumulator = accumulator % divisor;
}
while (size_ > 0 && words_[size_ - 1] == 0) {
--size_;
}
return static_cast<uint32_t>(accumulator);
}
// The number of elements in words_ that may carry significant values.
// All elements beyond this point are 0.
//
// When size_ is 0, this BigUnsigned stores the value 0.
// When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
// nonzero. This can occur due to overflow truncation.
// In particular, x.size_ != y.size_ does *not* imply x != y.
int size_;
uint32_t words_[max_words];
};
// Compares two big integer instances.
//
// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
template <int N, int M>
int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = limit - 1; i >= 0; --i) {
const uint32_t lhs_word = lhs.GetWord(i);
const uint32_t rhs_word = rhs.GetWord(i);
if (lhs_word < rhs_word) {
return -1;
} else if (lhs_word > rhs_word) {
return 1;
}
}
return 0;
}
template <int N, int M>
bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = 0; i < limit; ++i) {
if (lhs.GetWord(i) != rhs.GetWord(i)) {
return false;
}
}
return true;
}
template <int N, int M>
bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs == rhs);
}
template <int N, int M>
bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return Compare(lhs, rhs) == -1;
}
template <int N, int M>
bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return rhs < lhs;
}
template <int N, int M>
bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(rhs < lhs);
}
template <int N, int M>
bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs < rhs);
}
// Output operator for BigUnsigned, for testing purposes only.
template <int N>
std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
return os << num.ToString();
}
// Explicit instantiation declarations for the sizes of BigUnsigned that we
// are using.
//
// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
// still bigger than an int128, and 84 is a large value we will want to use
// in the from_chars implementation.
//
// Comments justifying the use of 84 belong in the from_chars implementation,
// and will be added in a follow-up CL.
extern template class BigUnsigned<4>;
extern template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_

View file

@ -0,0 +1,205 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <string>
#include "gtest/gtest.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
TEST(BigUnsigned, ShiftLeft) {
{
// Check that 3 * 2**100 is calculated correctly
BigUnsigned<4> num(3u);
num.ShiftLeft(100);
EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128"));
}
{
// Test that overflow is truncated properly.
// 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint.
// Shifting left by 125 bits should truncate off the high bit, so that
// 15 << 125 == 7 << 125
// after truncation.
BigUnsigned<4> a(15u);
BigUnsigned<4> b(7u);
BigUnsigned<4> c(3u);
a.ShiftLeft(125);
b.ShiftLeft(125);
c.ShiftLeft(125);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Same test, larger bigint:
BigUnsigned<84> a(15u);
BigUnsigned<84> b(7u);
BigUnsigned<84> c(3u);
a.ShiftLeft(84 * 32 - 3);
b.ShiftLeft(84 * 32 - 3);
c.ShiftLeft(84 * 32 - 3);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Check that incrementally shifting has the same result as doing it all at
// once (attempting to capture corner cases.)
const std::string seed = "1234567890123456789012345678901234567890";
BigUnsigned<84> a(seed);
for (int i = 1; i <= 84 * 32; ++i) {
a.ShiftLeft(1);
BigUnsigned<84> b(seed);
b.ShiftLeft(i);
EXPECT_EQ(a, b);
}
// And we should have fully rotated all bits off by now:
EXPECT_EQ(a, BigUnsigned<84>(0u));
}
}
TEST(BigUnsigned, MultiplyByUint32) {
const BigUnsigned<84> factorial_100(
"933262154439441526816992388562667004907159682643816214685929638952175999"
"932299156089414639761565182862536979208272237582511852109168640000000000"
"00000000000000");
BigUnsigned<84> a(1u);
for (uint32_t i = 1; i <= 100; ++i) {
a.MultiplyBy(i);
}
EXPECT_EQ(a, BigUnsigned<84>(factorial_100));
}
TEST(BigUnsigned, MultiplyByBigUnsigned) {
{
// Put the terms of factorial_200 into two bigints, and multiply them
// together.
const BigUnsigned<84> factorial_200(
"7886578673647905035523632139321850622951359776871732632947425332443594"
"4996340334292030428401198462390417721213891963883025764279024263710506"
"1926624952829931113462857270763317237396988943922445621451664240254033"
"2918641312274282948532775242424075739032403212574055795686602260319041"
"7032406235170085879617892222278962370389737472000000000000000000000000"
"0000000000000000000000000");
BigUnsigned<84> evens(1u);
BigUnsigned<84> odds(1u);
for (uint32_t i = 1; i < 200; i += 2) {
odds.MultiplyBy(i);
evens.MultiplyBy(i + 1);
}
evens.MultiplyBy(odds);
EXPECT_EQ(evens, factorial_200);
}
{
// Multiply various powers of 10 together.
for (int a = 0 ; a < 700; a += 25) {
SCOPED_TRACE(a);
BigUnsigned<84> a_value("3" + std::string(a, '0'));
for (int b = 0; b < (700 - a); b += 25) {
SCOPED_TRACE(b);
BigUnsigned<84> b_value("2" + std::string(b, '0'));
BigUnsigned<84> expected_product("6" + std::string(a + b, '0'));
b_value.MultiplyBy(a_value);
EXPECT_EQ(b_value, expected_product);
}
}
}
}
TEST(BigUnsigned, MultiplyByOverflow) {
{
// Check that multiplcation overflow predictably truncates.
// A big int with all bits on.
BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455");
// Modulo 2**128, this is equal to -1. Therefore the square of this,
// modulo 2**128, should be 1.
all_bits_on.MultiplyBy(all_bits_on);
EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u));
}
{
// Try multiplying a large bigint by 2**50, and compare the result to
// shifting.
BigUnsigned<4> value_1("12345678901234567890123456789012345678");
BigUnsigned<4> value_2("12345678901234567890123456789012345678");
BigUnsigned<4> two_to_fiftieth(1u);
two_to_fiftieth.ShiftLeft(50);
value_1.ShiftLeft(50);
value_2.MultiplyBy(two_to_fiftieth);
EXPECT_EQ(value_1, value_2);
}
}
TEST(BigUnsigned, FiveToTheNth) {
{
// Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to
// and including overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByFiveToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(5u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Check that the faster, table-lookup-based static method returns the same
// result that multiplying in-place would return, up to and including
// overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(1u);
value_1.MultiplyByFiveToTheNth(i);
BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i);
EXPECT_EQ(value_1, value_2);
}
}
}
TEST(BigUnsigned, TenToTheNth) {
{
// Sanity check MultiplyByTenToTheNth.
for (int i = 0; i < 800; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByTenToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(10u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Alternate testing approach, taking advantage of the decimal parser.
for (int i = 0; i < 200; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(135u);
value_1.MultiplyByTenToTheNth(i);
BigUnsigned<84> value_2("135" + std::string(i, '0'));
EXPECT_EQ(value_1, value_2);
}
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,504 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/charconv.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include "absl/strings/internal/memutil.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
// ParseFloat<10> will read the first 19 significant digits of the mantissa.
// This number was chosen for multiple reasons.
//
// (a) First, for whatever integer type we choose to represent the mantissa, we
// want to choose the largest possible number of decimal digits for that integer
// type. We are using uint64_t, which can express any 19-digit unsigned
// integer.
//
// (b) Second, we need to parse enough digits that the binary value of any
// mantissa we capture has more bits of resolution than the mantissa
// representation in the target float. Our algorithm requires at least 3 bits
// of headway, but 19 decimal digits give a little more than that.
//
// The following static assertions verify the above comments:
constexpr int kDecimalMantissaDigitsMax = 19;
static_assert(std::numeric_limits<uint64_t>::digits10 ==
kDecimalMantissaDigitsMax,
"(a) above");
// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
// The lowest valued 19-digit decimal mantissa we can read still contains
// sufficient information to reconstruct a binary mantissa.
static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above");
// ParseFloat<16> will read the first 15 significant digits of the mantissa.
//
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
// to maximize the number of scanned hex digits to improve our conversion. What
// is required is to scan two more bits than the mantissa can represent, so that
// we always round correctly.
//
// (One extra bit does not suffice to perform correct rounding, since a number
// exactly halfway between two representable floats has unique rounding rules,
// so we need to differentiate between a "halfway between" number and a "closer
// to the larger value" number.)
constexpr int kHexadecimalMantissaDigitsMax = 15;
// The minimum number of significant bits that will be read from
// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since
// the most significant digit can be a "1", which only contributes a single
// significant bit.
constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
4 * kHexadecimalMantissaDigitsMax - 3;
static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
std::numeric_limits<double>::digits + 2,
"kHexadecimalMantissaDigitsMax too small");
// We also impose a limit on the number of significant digits we will read from
// an exponent, to avoid having to deal with integer overflow. We use 9 for
// this purpose.
//
// If we read a 9 digit exponent, the end result of the conversion will
// necessarily be infinity or zero, depending on the sign of the exponent.
// Therefore we can just drop extra digits on the floor without any extra
// logic.
constexpr int kDecimalExponentDigitsMax = 9;
static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
"int type too small");
// To avoid incredibly large inputs causing integer overflow for our exponent,
// we impose an arbitrary but very large limit on the number of significant
// digits we will accept. The implementation refuses to match a string with
// more consecutive significant mantissa digits than this.
constexpr int kDecimalDigitLimit = 50000000;
// Corresponding limit for hexadecimal digit inputs. This is one fourth the
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
// a binary exponent adjustment of 4.
constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
// The largest exponent we can read is 999999999 (per
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
// comfortably fits in an integer.
//
// We count kDecimalDigitLimit twice because there are independent limits for
// numbers before and after the decimal point. (In the case where there are no
// significant digits before the decimal point, there are independent limits for
// post-decimal-point leading zeroes and for significant digits.)
static_assert(999999999 + 2 * kDecimalDigitLimit <
std::numeric_limits<int>::max(),
"int type too small");
static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
std::numeric_limits<int>::max(),
"int type too small");
// Returns true if the provided bitfield allows parsing an exponent value
// (e.g., "1.5e100").
bool AllowExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific || !fixed;
}
// Returns true if the provided bitfield requires an exponent value be present.
bool RequireExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific && !fixed;
}
const int8_t kAsciiToInt[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1};
// Returns true if `ch` is a digit in the given base
template <int base>
bool IsDigit(char ch);
// Converts a valid `ch` to its digit value in the given base.
template <int base>
unsigned ToDigit(char ch);
// Returns true if `ch` is the exponent delimiter for the given base.
template <int base>
bool IsExponentCharacter(char ch);
// Returns the maximum number of significant digits we will read for a float
// in the given base.
template <int base>
constexpr int MantissaDigitsMax();
// Returns the largest consecutive run of digits we will accept when parsing a
// number in the given base.
template <int base>
constexpr int DigitLimit();
// Returns the amount the exponent must be adjusted by for each dropped digit.
// (For decimal this is 1, since the digits are in base 10 and the exponent base
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
// the exponent base is 2.)
template <int base>
constexpr int DigitMagnitude();
template <>
bool IsDigit<10>(char ch) {
return ch >= '0' && ch <= '9';
}
template <>
bool IsDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
}
template <>
unsigned ToDigit<10>(char ch) {
return ch - '0';
}
template <>
unsigned ToDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)];
}
template <>
bool IsExponentCharacter<10>(char ch) {
return ch == 'e' || ch == 'E';
}
template <>
bool IsExponentCharacter<16>(char ch) {
return ch == 'p' || ch == 'P';
}
template <>
constexpr int MantissaDigitsMax<10>() {
return kDecimalMantissaDigitsMax;
}
template <>
constexpr int MantissaDigitsMax<16>() {
return kHexadecimalMantissaDigitsMax;
}
template <>
constexpr int DigitLimit<10>() {
return kDecimalDigitLimit;
}
template <>
constexpr int DigitLimit<16>() {
return kHexadecimalDigitLimit;
}
template <>
constexpr int DigitMagnitude<10>() {
return 1;
}
template <>
constexpr int DigitMagnitude<16>() {
return 4;
}
// Reads decimal digits from [begin, end) into *out. Returns the number of
// digits consumed.
//
// After max_digits has been read, keeps consuming characters, but no longer
// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit
// is set; otherwise, it is left unmodified.
//
// If no digits are matched, returns 0 and leaves *out unchanged.
//
// ConsumeDigits does not protect against overflow on *out; max_digits must
// be chosen with respect to type T to avoid the possibility of overflow.
template <int base, typename T>
std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits,
T* out, bool* dropped_nonzero_digit) {
if (base == 10) {
assert(max_digits <= std::numeric_limits<T>::digits10);
} else if (base == 16) {
assert(max_digits * 4 <= std::numeric_limits<T>::digits);
}
const char* const original_begin = begin;
// Skip leading zeros, but only if *out is zero.
// They don't cause an overflow so we don't have to count them for
// `max_digits`.
while (!*out && end != begin && *begin == '0') ++begin;
T accumulator = *out;
const char* significant_digits_end =
(end - begin > max_digits) ? begin + max_digits : end;
while (begin < significant_digits_end && IsDigit<base>(*begin)) {
// Do not guard against *out overflow; max_digits was chosen to avoid this.
// Do assert against it, to detect problems in debug builds.
auto digit = static_cast<T>(ToDigit<base>(*begin));
assert(accumulator * base >= accumulator);
accumulator *= base;
assert(accumulator + digit >= accumulator);
accumulator += digit;
++begin;
}
bool dropped_nonzero = false;
while (begin < end && IsDigit<base>(*begin)) {
dropped_nonzero = dropped_nonzero || (*begin != '0');
++begin;
}
if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
*dropped_nonzero_digit = true;
}
*out = accumulator;
return begin - original_begin;
}
// Returns true if `v` is one of the chars allowed inside parentheses following
// a NaN.
bool IsNanChar(char v) {
return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
(v >= 'A' && v <= 'Z');
}
// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If
// one is found, sets `out` appropriately and returns true.
bool ParseInfinityOrNan(const char* begin, const char* end,
strings_internal::ParsedFloat* out) {
if (end - begin < 3) {
return false;
}
switch (*begin) {
case 'i':
case 'I': {
// An infinity string consists of the characters "inf" or "infinity",
// case insensitive.
if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kInfinity;
if (end - begin >= 8 &&
strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
out->end = begin + 8;
} else {
out->end = begin + 3;
}
return true;
}
case 'n':
case 'N': {
// A NaN consists of the characters "nan", case insensitive, optionally
// followed by a parenthesized sequence of zero or more alphanumeric
// characters and/or underscores.
if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kNan;
out->end = begin + 3;
// NaN is allowed to be followed by a parenthesized string, consisting of
// only the characters [a-zA-Z0-9_]. Match that if it's present.
begin += 3;
if (begin < end && *begin == '(') {
const char* nan_begin = begin + 1;
while (nan_begin < end && IsNanChar(*nan_begin)) {
++nan_begin;
}
if (nan_begin < end && *nan_begin == ')') {
// We found an extra NaN specifier range
out->subrange_begin = begin + 1;
out->subrange_end = nan_begin;
out->end = nan_begin + 1;
}
}
return true;
}
default:
return false;
}
}
} // namespace
namespace strings_internal {
template <int base>
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
chars_format format_flags) {
strings_internal::ParsedFloat result;
// Exit early if we're given an empty range.
if (begin == end) return result;
// Handle the infinity and NaN cases.
if (ParseInfinityOrNan(begin, end, &result)) {
return result;
}
const char* const mantissa_begin = begin;
while (begin < end && *begin == '0') {
++begin; // skip leading zeros
}
uint64_t mantissa = 0;
int exponent_adjustment = 0;
bool mantissa_is_inexact = false;
std::size_t pre_decimal_digits = ConsumeDigits<base>(
begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
begin += pre_decimal_digits;
int digits_left;
if (pre_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
// We dropped some non-fraction digits on the floor. Adjust our exponent
// to compensate.
exponent_adjustment =
static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
digits_left = 0;
} else {
digits_left =
static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
}
if (begin < end && *begin == '.') {
++begin;
if (mantissa == 0) {
// If we haven't seen any nonzero digits yet, keep skipping zeros. We
// have to adjust the exponent to reflect the changed place value.
const char* begin_zeros = begin;
while (begin < end && *begin == '0') {
++begin;
}
std::size_t zeros_skipped = begin - begin_zeros;
if (zeros_skipped >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
}
exponent_adjustment -= static_cast<int>(zeros_skipped);
}
std::size_t post_decimal_digits = ConsumeDigits<base>(
begin, end, digits_left, &mantissa, &mantissa_is_inexact);
begin += post_decimal_digits;
// Since `mantissa` is an integer, each significant digit we read after
// the decimal point requires an adjustment to the exponent. "1.23e0" will
// be stored as `mantissa` == 123 and `exponent` == -2 (that is,
// "123e-2").
if (post_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (post_decimal_digits > digits_left) {
exponent_adjustment -= digits_left;
} else {
exponent_adjustment -= post_decimal_digits;
}
}
// If we've found no mantissa whatsoever, this isn't a number.
if (mantissa_begin == begin) {
return result;
}
// A bare "." doesn't count as a mantissa either.
if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
return result;
}
if (mantissa_is_inexact) {
// We dropped significant digits on the floor. Handle this appropriately.
if (base == 10) {
// If we truncated significant decimal digits, store the full range of the
// mantissa for future big integer math for exact rounding.
result.subrange_begin = mantissa_begin;
result.subrange_end = begin;
} else if (base == 16) {
// If we truncated hex digits, reflect this fact by setting the low
// ("sticky") bit. This allows for correct rounding in all cases.
mantissa |= 1;
}
}
result.mantissa = mantissa;
const char* const exponent_begin = begin;
result.literal_exponent = 0;
bool found_exponent = false;
if (AllowExponent(format_flags) && begin < end &&
IsExponentCharacter<base>(*begin)) {
bool negative_exponent = false;
++begin;
if (begin < end && *begin == '-') {
negative_exponent = true;
++begin;
} else if (begin < end && *begin == '+') {
++begin;
}
const char* const exponent_digits_begin = begin;
// Exponent is always expressed in decimal, even for hexadecimal floats.
begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
&result.literal_exponent, nullptr);
if (begin == exponent_digits_begin) {
// there were no digits where we expected an exponent. We failed to read
// an exponent and should not consume the 'e' after all. Rewind 'begin'.
found_exponent = false;
begin = exponent_begin;
} else {
found_exponent = true;
if (negative_exponent) {
result.literal_exponent = -result.literal_exponent;
}
}
}
if (!found_exponent && RequireExponent(format_flags)) {
// Provided flags required an exponent, but none was found. This results
// in a failure to scan.
return result;
}
// Success!
result.type = strings_internal::FloatType::kNumber;
if (result.mantissa > 0) {
result.exponent = result.literal_exponent +
(DigitMagnitude<base>() * exponent_adjustment);
} else {
result.exponent = 0;
}
result.end = begin;
return result;
}
template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
chars_format format_flags);
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,99 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/charconv.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Enum indicating whether a parsed float is a number or special value.
enum class FloatType { kNumber, kInfinity, kNan };
// The decomposed parts of a parsed `float` or `double`.
struct ParsedFloat {
// Representation of the parsed mantissa, with the decimal point adjusted to
// make it an integer.
//
// During decimal scanning, this contains 19 significant digits worth of
// mantissa value. If digits beyond this point are found, they
// are truncated, and if any of these dropped digits are nonzero, then
// `mantissa` is inexact, and the full mantissa is stored in [subrange_begin,
// subrange_end).
//
// During hexadecimal scanning, this contains 15 significant hex digits worth
// of mantissa value. Digits beyond this point are sticky -- they are
// truncated, but if any dropped digits are nonzero, the low bit of mantissa
// will be set. (This allows for precise rounding, and avoids the need
// to store the full mantissa in [subrange_begin, subrange_end).)
uint64_t mantissa = 0;
// Floating point expontent. This reflects any decimal point adjustments and
// any truncated digits from the mantissa. The absolute value of the parsed
// number is represented by mantissa * (base ** exponent), where base==10 for
// decimal floats, and base==2 for hexadecimal floats.
int exponent = 0;
// The literal exponent value scanned from the input, or 0 if none was
// present. This does not reflect any adjustments applied to mantissa.
int literal_exponent = 0;
// The type of number scanned.
FloatType type = FloatType::kNumber;
// When non-null, [subrange_begin, subrange_end) marks a range of characters
// that require further processing. The meaning is dependent on float type.
// If type == kNumber and this is set, this is a "wide input": the input
// mantissa contained more than 19 digits. The range contains the full
// mantissa. It plus `literal_exponent` need to be examined to find the best
// floating point match.
// If type == kNan and this is set, the range marks the contents of a
// matched parenthesized character region after the NaN.
const char* subrange_begin = nullptr;
const char* subrange_end = nullptr;
// One-past-the-end of the successfully parsed region, or nullptr if no
// matching pattern was found.
const char* end = nullptr;
};
// Read the floating point number in the provided range, and populate
// ParsedFloat accordingly.
//
// format_flags is a bitmask value specifying what patterns this API will match.
// `scientific` and `fixed` are honored per std::from_chars rules
// ([utility.from.chars], C++17): if exactly one of these bits is set, then an
// exponent is required, or dislallowed, respectively.
//
// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is
// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat.
template <int base>
ParsedFloat ParseFloat(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
absl::chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_

View file

@ -0,0 +1,357 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include <string>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/str_cat.h"
using absl::chars_format;
using absl::strings_internal::FloatType;
using absl::strings_internal::ParsedFloat;
using absl::strings_internal::ParseFloat;
namespace {
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that should be consumed by the match. It is stripped from the
// input to ParseFloat.
//
// If input string `s` contains '[' and ']' characters, these mark the region
// of characters that should be marked as the "subrange". For NaNs, this is
// the location of the extended NaN string. For numbers, this is the location
// of the full, over-large mantissa.
template <int base>
void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
FloatType expected_type, uint64_t expected_mantissa,
int expected_exponent,
int expected_literal_exponent = -999) {
SCOPED_TRACE(s);
int begin_subrange = -1;
int end_subrange = -1;
// If s contains '[' and ']', then strip these characters and set the subrange
// indices appropriately.
std::string::size_type open_bracket_pos = s.find('[');
if (open_bracket_pos != std::string::npos) {
begin_subrange = static_cast<int>(open_bracket_pos);
s.replace(open_bracket_pos, 1, "");
std::string::size_type close_bracket_pos = s.find(']');
ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos,
"Test input contains [ without matching ]");
end_subrange = static_cast<int>(close_bracket_pos);
s.replace(close_bracket_pos, 1, "");
}
const std::string::size_type expected_characters_matched = s.find('$');
ABSL_RAW_CHECK(expected_characters_matched != std::string::npos,
"Input string must contain $");
s.replace(expected_characters_matched, 1, "");
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_NE(parsed.end, nullptr);
if (parsed.end == nullptr) {
return; // The following tests are not useful if we fully failed to parse
}
EXPECT_EQ(parsed.type, expected_type);
if (begin_subrange == -1) {
EXPECT_EQ(parsed.subrange_begin, nullptr);
EXPECT_EQ(parsed.subrange_end, nullptr);
} else {
EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange);
EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange);
}
if (parsed.type == FloatType::kNumber) {
EXPECT_EQ(parsed.mantissa, expected_mantissa);
EXPECT_EQ(parsed.exponent, expected_exponent);
if (expected_literal_exponent != -999) {
EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent);
}
}
auto characters_matched = static_cast<int>(parsed.end - s.data());
EXPECT_EQ(characters_matched, expected_characters_matched);
}
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that were consumed by the match.
template <int base>
void ExpectNumber(std::string s, absl::chars_format format_flags,
uint64_t expected_mantissa, int expected_exponent,
int expected_literal_exponent = -999) {
ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber,
expected_mantissa, expected_exponent,
expected_literal_exponent);
}
// Check that a given string input is parsed to the given special value.
//
// This tests against both number bases, since infinities and NaNs have
// identical representations in both modes.
void ExpectSpecial(const std::string& s, absl::chars_format format_flags,
FloatType type) {
ExpectParsedFloat<10>(s, format_flags, type, 0, 0);
ExpectParsedFloat<16>(s, format_flags, type, 0, 0);
}
// Check that a given input string is not matched by Float.
template <int base>
void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) {
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_EQ(parsed.end, nullptr);
}
TEST(ParseFloat, SimpleValue) {
// Test that various forms of floating point numbers all parse correctly.
ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3);
ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8);
// ExpectNumber does not attempt to drop trailing zeroes.
ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900,
-5);
ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general,
0x1234abcdef000, -20);
// Ensure non-matching characters after a number are ignored, even when they
// look like potentially matching characters.
ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789,
-3);
ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general,
0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8);
// Ensure we can read a full resolution mantissa without overflow.
ExpectNumber<10>("9999999999999999999$", chars_format::general,
9999999999999999999u, 0);
ExpectNumber<16>("fffffffffffffff$", chars_format::general,
0xfffffffffffffffu, 0);
// Check that zero is consistently read.
ExpectNumber<10>("0$", chars_format::general, 0, 0);
ExpectNumber<16>("0$", chars_format::general, 0, 0);
ExpectNumber<10>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0.00000000000000000000000000000000e123456$",
chars_format::general, 0, 0);
ExpectNumber<16>("0.00000000000000000000000000000000p123456$",
chars_format::general, 0, 0);
}
TEST(ParseFloat, LargeDecimalMantissa) {
// After 19 significant decimal digits in the mantissa, ParsedFloat will
// truncate additional digits. We need to test that:
// 1) the truncation to 19 digits happens
// 2) the returned exponent reflects the dropped significant digits
// 3) a correct literal_exponent is set
//
// If and only if a significant digit is found after 19 digits, then the
// entirety of the mantissa in case the exact value is needed to make a
// rounding decision. The [ and ] characters below denote where such a
// subregion was marked by by ParseFloat. They are not part of the input.
// Mark a capture group only if a dropped digit is significant (nonzero).
ExpectNumber<10>("100000000000000000000000000$", chars_format::general,
1000000000000000000,
/* adjusted exponent */ 8);
ExpectNumber<10>("123456789123456789100000000$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8);
ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
// Leading zeroes should not count towards the 19 significant digit limit
ExpectNumber<10>("[00000000123456789123456789123456789]$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("00000000123456789123456789100000000$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8);
// Truncated digits after the decimal point should not cause a further
// exponent adjustment.
ExpectNumber<10>("1.234567891234567891e123$", chars_format::general,
1234567891234567891, 105);
ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 105,
/* literal exponent */ 123);
// Ensure we truncate, and not round. (The from_chars algorithm we use
// depends on our guess missing low, if it misses, so we need the rounding
// error to be downward.)
ExpectNumber<10>("[1999999999999999999999]$", chars_format::general,
1999999999999999999,
/* adjusted exponent */ 3,
/* literal exponent */ 0);
}
TEST(ParseFloat, LargeHexadecimalMantissa) {
// After 15 significant hex digits in the mantissa, ParsedFloat will treat
// additional digits as sticky, We need to test that:
// 1) The truncation to 15 digits happens
// 2) The returned exponent reflects the dropped significant digits
// 3) If a nonzero digit is dropped, the low bit of mantissa is set.
ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general,
0x123456789abcdef, 60);
// Leading zeroes should not count towards the 15 significant digit limit
ExpectNumber<16>("000000123456789abcdef123456789abcdef$",
chars_format::general, 0x123456789abcdef, 60);
// Truncated digits after the radix point should not cause a further
// exponent adjustment.
ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general,
0x123456789abcdef, 44);
ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$",
chars_format::general, 0x123456789abcdef, 44);
// test sticky digit behavior. The low bit should be set iff any dropped
// digit is nonzero.
ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general,
0x123456789abcdee, 60);
}
TEST(ParseFloat, ScientificVsFixed) {
// In fixed mode, an exponent is never matched (but the remainder of the
// number will be matched.)
ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8);
ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3);
ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36);
ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8);
// In scientific mode, numbers don't match *unless* they have an exponent.
ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3);
ExpectFailedParse<10>("-123456.789$", chars_format::scientific);
ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef,
-8);
ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific);
}
TEST(ParseFloat, Infinity) {
ExpectFailedParse<10>("in", chars_format::general);
ExpectFailedParse<16>("in", chars_format::general);
ExpectFailedParse<10>("inx", chars_format::general);
ExpectFailedParse<16>("inx", chars_format::general);
ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity);
ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity);
}
TEST(ParseFloat, NaN) {
ExpectFailedParse<10>("na", chars_format::general);
ExpectFailedParse<16>("na", chars_format::general);
ExpectFailedParse<10>("nah", chars_format::general);
ExpectFailedParse<16>("nah", chars_format::general);
ExpectSpecial("nan$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$", chars_format::general, FloatType::kNan);
ExpectSpecial("nAn$", chars_format::general, FloatType::kNan);
ExpectSpecial("NAN$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan);
// A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to
// appear after an NaN. Check that this is allowed, and that the correct
// characters are grouped.
//
// (The characters [ and ] in the pattern below delimit the expected matched
// subgroup; they are not part of the input passed to ParseFloat.)
ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan);
// If the subgroup contains illegal characters, don't match it at all.
ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan);
// Also cope with a missing close paren.
ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan);
}
} // namespace

View file

@ -0,0 +1,150 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Wraps std::atomic for reference counting.
class Refcount {
public:
Refcount() : count_{1} {}
~Refcount() {}
// Increments the reference count by 1. Imposes no memory ordering.
inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); }
// Asserts that the current refcount is greater than 0. If the refcount is
// greater than 1, decrements the reference count by 1.
//
// Returns false if there are no references outstanding; true otherwise.
// Inserts barriers to ensure that state written before this method returns
// false will be visible to a thread that just observed this method returning
// false.
inline bool Decrement() {
int32_t refcount = count_.load(std::memory_order_acquire);
assert(refcount > 0);
return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel);
assert(refcount > 0);
return refcount != 1;
}
// Returns the current reference count using acquire semantics.
inline int32_t Get() const { return count_.load(std::memory_order_acquire); }
// Returns whether the atomic integer is 1.
// If the reference count is used in the conventional way, a
// reference count of 1 implies that the current thread owns the
// reference and no other thread shares it.
// This call performs the test for a reference count of one, and
// performs the memory barrier needed for the owning thread
// to act on the object, knowing that it has exclusive access to the
// object.
inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; }
private:
std::atomic<int32_t> count_;
};
// The overhead of a vtable is too much for Cord, so we roll our own subclasses
// using only a single byte to differentiate classes from each other - the "tag"
// byte. Define the subclasses first so we can provide downcasting helper
// functions in the base class.
struct CordRepConcat;
struct CordRepSubstring;
struct CordRepExternal;
struct CordRep {
// The following three fields have to be less than 32 bytes since
// that is the smallest supported flat node size.
size_t length;
Refcount refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
// Otherwise, the node type is CordRepFlat and the tag is the encoded size.
uint8_t tag;
char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep
inline CordRepConcat* concat();
inline const CordRepConcat* concat() const;
inline CordRepSubstring* substring();
inline const CordRepSubstring* substring() const;
inline CordRepExternal* external();
inline const CordRepExternal* external() const;
};
struct CordRepConcat : public CordRep {
CordRep* left;
CordRep* right;
uint8_t depth() const { return static_cast<uint8_t>(data[0]); }
void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); }
};
struct CordRepSubstring : public CordRep {
size_t start; // Starting offset of substring in child
CordRep* child;
};
// TODO(strel): replace the following logic (and related functions in cord.cc)
// with container_internal::Layout.
// Alignment requirement for CordRepExternal so that the type erased releaser
// will be stored at a suitably aligned address.
constexpr size_t ExternalRepAlignment() {
#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__)
return __STDCPP_DEFAULT_NEW_ALIGNMENT__;
#else
return alignof(max_align_t);
#endif
}
// Type for function pointer that will invoke and destroy the type-erased
// releaser function object. Accepts a pointer to the releaser and the
// `string_view` that were passed in to `NewExternalRep` below. The return value
// is the size of the `Releaser` type.
using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view);
// External CordReps are allocated together with a type erased releaser. The
// releaser is stored in the memory directly following the CordRepExternal.
struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep {
const char* base;
// Pointer to function that knows how to call and destroy the releaser.
ExternalReleaserInvoker releaser_invoker;
};
// TODO(strel): look into removing, it doesn't seem like anything relies on this
static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), "");
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_

View file

@ -0,0 +1,180 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/escaping.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
const char kBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
// Base64 encodes three bytes of input at a time. If the input is not
// divisible by three, we pad as appropriate.
//
// (from https://tools.ietf.org/html/rfc3548)
// Special processing is performed if fewer than 24 bits are available
// at the end of the data being encoded. A full encoding quantum is
// always completed at the end of a quantity. When fewer than 24 input
// bits are available in an input group, zero bits are added (on the
// right) to form an integral number of 6-bit groups. Padding at the
// end of the data is performed using the '=' character. Since all base
// 64 input is an integral number of octets, only the following cases
// can arise:
// Base64 encodes each three bytes of input into four bytes of output.
size_t len = (input_len / 3) * 4;
if (input_len % 3 == 0) {
// (from https://tools.ietf.org/html/rfc3548)
// (1) the final quantum of encoding input is an integral multiple of 24
// bits; here, the final unit of encoded output will be an integral
// multiple of 4 characters with no "=" padding,
} else if (input_len % 3 == 1) {
// (from https://tools.ietf.org/html/rfc3548)
// (2) the final quantum of encoding input is exactly 8 bits; here, the
// final unit of encoded output will be two characters followed by two
// "=" padding characters, or
len += 2;
if (do_padding) {
len += 2;
}
} else { // (input_len % 3 == 2)
// (from https://tools.ietf.org/html/rfc3548)
// (3) the final quantum of encoding input is exactly 16 bits; here, the
// final unit of encoded output will be three characters followed by one
// "=" padding character.
len += 3;
if (do_padding) {
len += 1;
}
}
assert(len >= input_len); // make sure we didn't overflow
return len;
}
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64,
bool do_padding) {
static const char kPad64 = '=';
if (szsrc * 4 > szdest * 3) return 0;
char* cur_dest = dest;
const unsigned char* cur_src = src;
char* const limit_dest = dest + szdest;
const unsigned char* const limit_src = src + szsrc;
// Three bytes of data encodes to four characters of cyphertext.
// So we can pump through three-byte chunks atomically.
if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
while (cur_src < limit_src - 3) { // While we have >= 32 bits.
uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
cur_src += 3;
}
}
// To save time, we didn't update szdest or szsrc in the loop. So do it now.
szdest = limit_dest - cur_dest;
szsrc = limit_src - cur_src;
/* now deal with the tail (<=3 bytes) */
switch (szsrc) {
case 0:
// Nothing left; nothing more to do.
break;
case 1: {
// One byte left: this encodes to two characters, and (optionally)
// two pad characters to round out the four-character cypherblock.
if (szdest < 2) return 0;
uint32_t in = cur_src[0];
cur_dest[0] = base64[in >> 2];
in &= 0x3;
cur_dest[1] = base64[in << 4];
cur_dest += 2;
szdest -= 2;
if (do_padding) {
if (szdest < 2) return 0;
cur_dest[0] = kPad64;
cur_dest[1] = kPad64;
cur_dest += 2;
szdest -= 2;
}
break;
}
case 2: {
// Two bytes left: this encodes to three characters, and (optionally)
// one pad character to round out the four-character cypherblock.
if (szdest < 3) return 0;
uint32_t in = absl::big_endian::Load16(cur_src);
cur_dest[0] = base64[in >> 10];
in &= 0x3FF;
cur_dest[1] = base64[in >> 4];
in &= 0x00F;
cur_dest[2] = base64[in << 2];
cur_dest += 3;
szdest -= 3;
if (do_padding) {
if (szdest < 1) return 0;
cur_dest[0] = kPad64;
cur_dest += 1;
szdest -= 1;
}
break;
}
case 3: {
// Three bytes left: same as in the big loop above. We can't do this in
// the loop because the loop above always reads 4 bytes, and the fourth
// byte is past the end of the input.
if (szdest < 4) return 0;
uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
szdest -= 4;
break;
}
default:
// Should not be reached: blocks of 4 bytes are handled
// in the while loop before this switch statement.
ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
break;
}
return (cur_dest - dest);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,58 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_H_
#include <cassert>
#include "absl/strings/internal/resize_uninitialized.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
ABSL_CONST_INIT extern const char kBase64Chars[];
// Calculates how long a string will be when it is base64 encoded given its
// length and whether or not the result should be padded.
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding);
// Base64-encodes `src` using the alphabet provided in `base64` and writes the
// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
// until its length is a multiple of 3. Returns the length of `dest`.
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64, bool do_padding);
// Base64-encodes `src` using the alphabet provided in `base64` and writes the
// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
// until its length is a multiple of 3.
template <typename String>
void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
bool do_padding, const char* base64_chars) {
const size_t calc_escaped_size =
CalculateBase64EscapedLenInternal(szsrc, do_padding);
STLStringResizeUninitialized(dest, calc_escaped_size);
const size_t escaped_len = Base64EscapeInternal(
src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
assert(calc_escaped_size == escaped_len);
dest->erase(escaped_len);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_

View file

@ -0,0 +1,133 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test contains common things needed by both escaping_test.cc and
// escaping_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#include <array>
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
struct base64_testcase {
absl::string_view plaintext;
absl::string_view cyphertext;
};
inline const std::array<base64_testcase, 5>& base64_strings() {
static const std::array<base64_testcase, 5> testcase{{
// Some google quotes
// Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
// (Note that we're testing the websafe encoding, though, so if
// you add messages, be sure to run "tr -- '+/' '-_'" on the output)
{ "I was always good at math and science, and I never realized "
"that was unusual or somehow undesirable. So one of the things "
"I care a lot about is helping to remove that stigma, "
"to show girls that you can be feminine, you can like the things "
"that girls like, but you can also be really good at technology. "
"You can be really good at building things."
" - Marissa Meyer, Newsweek, 2010-12-22" "\n",
"SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
"bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
"ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
"YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
"b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
"a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
"c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
"ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
"ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
{ "Typical first year for a new cluster: "
"~0.5 overheating "
"~1 PDU failure "
"~1 rack-move "
"~1 network rewiring "
"~20 rack failures "
"~5 racks go wonky "
"~8 network maintenances "
"~12 router reloads "
"~3 router failures "
"~dozens of minor 30-second blips for dns "
"~1000 individual machine failures "
"~thousands of hard drive failures "
"slow disks, bad memory, misconfigured machines, flaky machines, etc."
" - Jeff Dean, The Joys of Real Hardware" "\n",
"VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
"ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
"b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
"bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
"cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
"bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
"bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
"ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
"YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
"ZWFsIEhhcmR3YXJlCg" },
{ "I'm the head of the webspam team at Google. "
"That means that if you type your name into Google and get porn back, "
"it's my fault. Unless you're a porn star, in which case porn is a "
"completely reasonable response."
" - Matt Cutts, Google Plus" "\n",
"SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
"VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
"b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
"IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
"Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
"IEdvb2dsZSBQbHVzCg" },
{ "It will still be a long time before machines approach human "
"intelligence. "
"But luckily, machines don't actually have to be intelligent; "
"they just have to fake it. Access to a wealth of information, "
"combined with a rudimentary decision-making capacity, "
"can often be almost as useful. Of course, the results are better yet "
"when coupled with intelligence. A reference librarian with access to "
"a good search engine is a formidable tool."
" - Craig Silverstein, Siemens Pictures of the Future, Spring 2004"
"\n",
"SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
"YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
"aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
"dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
"IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
"ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
"IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
"IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
"bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
"Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
"biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
"NAo" },
// Degenerate edge case
{ "",
"" },
}};
return testcase;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_

View file

@ -0,0 +1,112 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
int memcasecmp(const char* s1, const char* s2, size_t len) {
const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
for (size_t i = 0; i < len; i++) {
const int diff =
int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} -
int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))};
if (diff != 0) return diff;
}
return 0;
}
char* memdup(const char* s, size_t slen) {
void* copy;
if ((copy = malloc(slen)) == nullptr) return nullptr;
memcpy(copy, s, slen);
return reinterpret_cast<char*>(copy);
}
char* memrchr(const char* s, int c, size_t slen) {
for (const char* e = s + slen - 1; e >= s; e--) {
if (*e == c) return const_cast<char*>(e);
}
return nullptr;
}
size_t memspn(const char* s, size_t slen, const char* accept) {
const char* p = s;
const char* spanp;
char c, sc;
cont:
c = *p++;
if (slen-- == 0) return p - 1 - s;
for (spanp = accept; (sc = *spanp++) != '\0';)
if (sc == c) goto cont;
return p - 1 - s;
}
size_t memcspn(const char* s, size_t slen, const char* reject) {
const char* p = s;
const char* spanp;
char c, sc;
while (slen-- != 0) {
c = *p++;
for (spanp = reject; (sc = *spanp++) != '\0';)
if (sc == c) return p - 1 - s;
}
return p - s;
}
char* mempbrk(const char* s, size_t slen, const char* accept) {
const char* scanp;
int sc;
for (; slen; ++s, --slen) {
for (scanp = accept; (sc = *scanp++) != '\0';)
if (sc == *s) return const_cast<char*>(s);
}
return nullptr;
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
// A static cast is used here to work around the fact that memchr returns
// a void* on Posix-compliant systems and const void* on Windows.
while ((match = static_cast<const char*>(
memchr(phaystack, pneedle[0], hayend - phaystack)))) {
if (memcmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,148 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// These routines provide mem versions of standard C string routines,
// such as strpbrk. They function exactly the same as the str versions,
// so if you wonder what they are, replace the word "mem" by
// "str" and check out the man page. I could return void*, as the
// strutil.h mem*() routines tend to do, but I return char* instead
// since this is by far the most common way these functions are called.
//
// The difference between the mem and str versions is the mem version
// takes a pointer and a length, rather than a '\0'-terminated string.
// The memcase* routines defined here assume the locale is "C"
// (they use absl::ascii_tolower instead of tolower).
//
// These routines are based on the BSD library.
//
// Here's a list of routines from string.h, and their mem analogues.
// Functions in lowercase are defined in string.h; those in UPPERCASE
// are defined here:
//
// strlen --
// strcat strncat MEMCAT
// strcpy strncpy memcpy
// -- memccpy (very cool function, btw)
// -- memmove
// -- memset
// strcmp strncmp memcmp
// strcasecmp strncasecmp MEMCASECMP
// strchr memchr
// strcoll --
// strxfrm --
// strdup strndup MEMDUP
// strrchr MEMRCHR
// strspn MEMSPN
// strcspn MEMCSPN
// strpbrk MEMPBRK
// strstr MEMSTR MEMMEM
// (g)strcasestr MEMCASESTR MEMCASEMEM
// strtok --
// strprefix MEMPREFIX (strprefix is from strutil.h)
// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
// -- MEMIS
// -- MEMCASEIS
// strcount MEMCOUNT (strcount is from strutil.h)
#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#include <cstddef>
#include <cstring>
#include "absl/base/port.h" // disable some warnings on Windows
#include "absl/strings/ascii.h" // for absl::ascii_tolower
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
inline char* memcat(char* dest, size_t destlen, const char* src,
size_t srclen) {
return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
}
int memcasecmp(const char* s1, const char* s2, size_t len);
char* memdup(const char* s, size_t slen);
char* memrchr(const char* s, int c, size_t slen);
size_t memspn(const char* s, size_t slen, const char* accept);
size_t memcspn(const char* s, size_t slen, const char* reject);
char* mempbrk(const char* s, size_t slen, const char* accept);
// This is for internal use only. Don't call this directly
template <bool case_sensitive>
const char* int_memmatch(const char* haystack, size_t haylen,
const char* needle, size_t neelen) {
if (0 == neelen) {
return haystack; // even if haylen is 0
}
const char* hayend = haystack + haylen;
const char* needlestart = needle;
const char* needleend = needlestart + neelen;
for (; haystack < hayend; ++haystack) {
char hay = case_sensitive
? *haystack
: absl::ascii_tolower(static_cast<unsigned char>(*haystack));
char nee = case_sensitive
? *needle
: absl::ascii_tolower(static_cast<unsigned char>(*needle));
if (hay == nee) {
if (++needle == needleend) {
return haystack + 1 - neelen;
}
} else if (needle != needlestart) {
// must back up haystack in case a prefix matched (find "aab" in "aaab")
haystack -= needle - needlestart; // for loop will advance one more
needle = needlestart;
}
}
return nullptr;
}
// These are the guys you can call directly
inline const char* memstr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memcasestr(const char* phaystack, size_t haylen,
const char* pneedle) {
return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
}
inline const char* memmem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
}
inline const char* memcasemem(const char* phaystack, size_t haylen,
const char* pneedle, size_t needlelen) {
return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
}
// This is significantly faster for case-sensitive matches with very
// few possible matches. See unit test for benchmarks.
const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
size_t neelen);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_

View file

@ -0,0 +1,323 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <algorithm>
#include <cstdlib>
#include "benchmark/benchmark.h"
#include "absl/strings/ascii.h"
// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
// That gives us:
// - an easy search: 'b'
// - a medium search: 'ab'. That means every letter is a possible match.
// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
// We benchmark case-sensitive and case-insensitive versions of
// three memmem implementations:
// - memmem() from memutil.h
// - search() from STL
// - memmatch(), a custom implementation using memchr and memcmp.
// Here are sample results:
//
// Run on (12 X 3800 MHz CPU s)
// CPU Caches:
// L1 Data 32K (x6)
// L1 Instruction 32K (x6)
// L2 Unified 256K (x6)
// L3 Unified 15360K (x1)
// ----------------------------------------------------------------
// Benchmark Time CPU Iterations
// ----------------------------------------------------------------
// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s
// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s
// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s
// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s
// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s
// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s
// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s
// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s
// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s
// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s
// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s
// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s
// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s
// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s
// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s
// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s
// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s
// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s
// BM_MemmemStartup 5 ns 5 ns 150821500
// BM_SearchStartup 5 ns 5 ns 150644203
// BM_MemmatchStartup 7 ns 7 ns 97068802
//
// Conclusions:
//
// The following recommendations are based on the sample results above. However,
// we have found that the performance of STL search can vary significantly
// depending on compiler and standard library implementation. We recommend you
// run the benchmarks for yourself on relevant platforms.
//
// If you need case-insensitive, STL search is slightly better than memmem for
// all cases.
//
// Case-sensitive is more subtle:
// Custom memmatch is _very_ fast at scanning, so if you have very few possible
// matches in your haystack, that's the way to go. Performance drops
// significantly with more matches.
//
// STL search is slightly faster than memmem in the medium and pathological
// benchmarks. However, the performance of memmem is currently more dependable
// across platforms and build configurations.
namespace {
constexpr int kHaystackSize = 10000;
constexpr int64_t kHaystackSize64 = kHaystackSize;
const char* MakeHaystack() {
char* haystack = new char[kHaystackSize];
for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
haystack[kHaystackSize - 1] = 'b';
return haystack;
}
const char* const kHaystack = MakeHaystack();
void BM_Memmem(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmem);
void BM_MemmemMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemMedium);
void BM_MemmemPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmem(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmemPathological);
void BM_Memcasemem(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasemem);
void BM_MemcasememMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememMedium);
void BM_MemcasememPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasememPathological);
bool case_eq(const char a, const char b) {
return absl::ascii_tolower(a) == absl::ascii_tolower(b);
}
void BM_Search(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Search);
void BM_SearchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 2,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchMedium);
void BM_SearchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystack + kHaystackSize));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchPathological);
void BM_Searchcase(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Searchcase);
void BM_SearchcaseMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcaseMedium);
void BM_SearchcasePathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcasePathological);
char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
void BM_Memmatch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memmatch);
void BM_MemmatchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchMedium);
void BM_MemmatchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmatch(
kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemmatchPathological);
void BM_Memcasematch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasematch);
void BM_MemcasematchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchMedium);
void BM_MemcasematchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchPathological);
void BM_MemmemStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmem(
kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
}
}
BENCHMARK(BM_MemmemStartup);
void BM_SearchStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(
std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
}
}
BENCHMARK(BM_SearchStartup);
void BM_MemmatchStartup(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(absl::strings_internal::memmatch(
kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
}
}
BENCHMARK(BM_MemmatchStartup);
} // namespace

View file

@ -0,0 +1,179 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit test for memutil.cc
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
#include "gtest/gtest.h"
#include "absl/strings/ascii.h"
namespace {
static char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
static const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
TEST(MemUtilTest, AllTests) {
// check memutil functions
char a[1000];
absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
sizeof("hello there") - 1),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 1),
-1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 2),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
char* p = absl::strings_internal::memdup("hello", 5);
free(p);
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 1);
EXPECT_TRUE(p && p[-1] == 'r');
p = absl::strings_internal::memrchr("hello there", 'e',
sizeof("hello there") - 2);
EXPECT_TRUE(p && p[-1] == 'h');
p = absl::strings_internal::memrchr("hello there", 'u',
sizeof("hello there") - 1);
EXPECT_TRUE(p == nullptr);
int len = absl::strings_internal::memspn("hello there",
sizeof("hello there") - 1, "hole");
EXPECT_EQ(len, sizeof("hello") - 1);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"u");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
"trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 1, "trole h");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memspn("hello there!",
sizeof("hello there!") - 2, "trole h!");
EXPECT_EQ(len, sizeof("hello there!") - 2);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "leho");
EXPECT_EQ(len, 0);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "u");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, "");
EXPECT_EQ(len, sizeof("hello there") - 1);
len = absl::strings_internal::memcspn("hello there",
sizeof("hello there") - 1, " ");
EXPECT_EQ(len, 5);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"leho");
EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
"nu");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there!",
sizeof("hello there!") - 2, "!");
EXPECT_TRUE(p == nullptr);
p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
" t ");
EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "aBcDeFgHiJ";
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
nullptr);
}
{
const char kHaystack[] = "0123456789";
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
kHaystack);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
kHaystack + 7);
EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
kHaystack + 9);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
nullptr);
EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
nullptr);
}
}
} // namespace

View file

@ -0,0 +1,184 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file contains common things needed by numbers_test.cc,
// numbers_legacy_test.cc and numbers_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#include <array>
#include <cstdint>
#include <limits>
#include <string>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename IntType>
inline bool Itoa(IntType value, int base, std::string* destination) {
destination->clear();
if (base <= 1 || base > 36) {
return false;
}
if (value == 0) {
destination->push_back('0');
return true;
}
bool negative = value < 0;
while (value != 0) {
const IntType next_value = value / base;
// Can't use std::abs here because of problems when IntType is unsigned.
int remainder =
static_cast<int>(value > next_value * base ? value - next_value * base
: next_value * base - value);
char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
destination->insert(0, 1, c);
value = next_value;
}
if (negative) {
destination->insert(0, 1, '-');
}
return true;
}
struct uint32_test_case {
const char* str;
bool expect_ok;
int base; // base to pass to the conversion function
uint32_t expected;
};
inline const std::array<uint32_test_case, 27>& strtouint32_test_cases() {
static const std::array<uint32_test_case, 27> test_cases{{
{"0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{"0x34234324", true, 16, 0x34234324},
{"34234324", true, 16, 0x34234324},
{"0", true, 16, 0},
{" \t\n 0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
{" \t\n 72717222", true, 8, 072717222},
{" \t\n 072717222", true, 8, 072717222},
{" \t\n 072717228", false, 8, 07271722},
{"0", true, 0, 0},
// Base-10 version.
{"34234324", true, 0, 34234324},
{"4294967295", true, 0, (std::numeric_limits<uint32_t>::max)()},
{"34234324 \n\t", true, 10, 34234324},
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0}, // would be valid hex, but prefix is missing
{"34234324a", false, 0, 34234324},
{"34234.3", false, 0, 34234},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"4294967296", false, 0, (std::numeric_limits<uint32_t>::max)()},
{"0x100000000", false, 0, (std::numeric_limits<uint32_t>::max)()},
{nullptr, false, 0, 0},
}};
return test_cases;
}
struct uint64_test_case {
const char* str;
bool expect_ok;
int base;
uint64_t expected;
};
inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() {
static const std::array<uint64_test_case, 34> test_cases{{
{"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"0", true, 16, 0},
{"000", true, 0, 0},
{"0", true, 0, 0},
{" \t\n 0xffffffffffffffff", true, 16,
(std::numeric_limits<uint64_t>::max)()},
{"012345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12845670123456701234", false, 8, 0},
// Base-10 version.
{"34234324487834466", true, 0, int64_t{34234324487834466}},
{" \t\n 18446744073709551615", true, 0,
(std::numeric_limits<uint64_t>::max)()},
{"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
{"0", true, 0, 0},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0},
{"34234324487834466a", false, 0, 0},
{"34234487834466.3", false, 0, 0},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"18446744073709551616", false, 10, 0},
{"18446744073709551616", false, 0, 0},
{"0x10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x10000000000000000", false, 0, (std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 0,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x1234", true, 16, 0x1234},
// Base-10 string version.
{"1234", true, 0, 1234},
{nullptr, false, 0, 0},
}};
return test_cases;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_

View file

@ -0,0 +1,36 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
OStringStream::Buf::int_type OStringStream::overflow(int c) {
assert(s_);
if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
s_->push_back(static_cast<char>(c));
return 1;
}
std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) {
assert(s_);
s_->append(s, n);
return n;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,89 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#include <cassert>
#include <ostream>
#include <streambuf>
#include <string>
#include "absl/base/port.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The same as std::ostringstream but appends to a user-specified std::string,
// and is faster. It is ~70% faster to create, ~50% faster to write to, and
// completely free to extract the result std::string.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42 << ' ' << 3.14; // appends to `s`
//
// The stream object doesn't have to be named. Starting from C++11 operator<<
// works with rvalues of std::ostream.
//
// std::string s;
// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
//
// OStringStream is faster to create than std::ostringstream but it's still
// relatively slow. Avoid creating multiple streams where a single stream will
// do.
//
// Creates unnecessary instances of OStringStream: slow.
//
// std::string s;
// OStringStream(&s) << 42;
// OStringStream(&s) << ' ';
// OStringStream(&s) << 3.14;
//
// Creates a single instance of OStringStream and reuses it: fast.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42;
// strm << ' ';
// strm << 3.14;
//
// Note: flush() has no effect. No reason to call it.
class OStringStream : private std::basic_streambuf<char>, public std::ostream {
public:
// The argument can be null, in which case you'll need to call str(p) with a
// non-null argument before you can write to the stream.
//
// The destructor of OStringStream doesn't use the std::string. It's OK to
// destroy the std::string before the stream.
explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
std::string* str() { return s_; }
const std::string* str() const { return s_; }
void str(std::string* s) { s_ = s; }
private:
using Buf = std::basic_streambuf<char>;
Buf::int_type overflow(int c) override;
std::streamsize xsputn(const char* s, std::streamsize n) override;
std::string* s_;
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_

View file

@ -0,0 +1,106 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <sstream>
#include <string>
#include "benchmark/benchmark.h"
namespace {
enum StringType {
kNone,
kStdString,
};
// Benchmarks for std::ostringstream.
template <StringType kOutput>
void BM_StdStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::ostringstream strm;
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = strm.str();
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_StdStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
BENCHMARK_TEMPLATE(BM_StdStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Benchmarks for OStringStream.
template <StringType kOutput>
void BM_CustomStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::string out;
absl::strings_internal::OStringStream strm(&out);
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = out;
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_CustomStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
// It's not useful in practice to extract std::string from OStringStream; we
// measure it for completeness.
BENCHMARK_TEMPLATE(BM_CustomStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
} // namespace

View file

@ -0,0 +1,102 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <memory>
#include <ostream>
#include <string>
#include <type_traits>
#include "gtest/gtest.h"
namespace {
TEST(OStringStream, IsOStream) {
static_assert(
std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
"");
}
TEST(OStringStream, ConstructDestroy) {
{
absl::strings_internal::OStringStream strm(nullptr);
EXPECT_EQ(nullptr, strm.str());
}
{
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ(&s, strm.str());
}
EXPECT_EQ("abc", s);
}
{
std::unique_ptr<std::string> s(new std::string);
absl::strings_internal::OStringStream strm(s.get());
s.reset();
}
}
TEST(OStringStream, Str) {
std::string s1;
absl::strings_internal::OStringStream strm(&s1);
const absl::strings_internal::OStringStream& c_strm(strm);
static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
strm.str(&s1);
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
std::string s2;
strm.str(&s2);
EXPECT_EQ(&s2, strm.str());
EXPECT_EQ(&s2, c_strm.str());
strm.str(nullptr);
EXPECT_EQ(nullptr, strm.str());
EXPECT_EQ(nullptr, c_strm.str());
}
TEST(OStreamStream, WriteToLValue) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ("abc", s);
strm << "";
EXPECT_EQ("abc", s);
strm << 42;
EXPECT_EQ("abc42", s);
strm << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
EXPECT_EQ("abc42xy", s);
}
TEST(OStreamStream, WriteToRValue) {
std::string s = "abc";
absl::strings_internal::OStringStream(&s) << "";
EXPECT_EQ("abc", s);
absl::strings_internal::OStringStream(&s) << 42;
EXPECT_EQ("abc42", s);
absl::strings_internal::OStringStream(&s) << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
} // namespace

View file

@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// The exact value of 1e23 falls precisely halfway between two representable
// doubles. Furthermore, the rounding rules we prefer (break ties by rounding
// to the nearest even) dictate in this case that the number should be rounded
// down, but this is not completely specified for floating-point literals in
// C++. (It just says to use the default rounding mode of the standard
// library.) We ensure the result we want by using a number that has an
// unambiguous correctly rounded answer.
constexpr double k1e23 = 9999999999999999e7;
constexpr double kPowersOfTen[] = {
0.0, 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316,
1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307,
1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298,
1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289,
1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280,
1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271,
1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262,
1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253,
1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244,
1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235,
1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226,
1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217,
1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208,
1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199,
1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190,
1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181,
1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172,
1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163,
1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154,
1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145,
1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136,
1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127,
1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118,
1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109,
1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101, 1e-100,
1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91,
1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82,
1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73,
1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64,
1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55,
1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46,
1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37,
1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19,
1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10,
1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8,
1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17,
1e+18, 1e+19, 1e+20, 1e+21, 1e+22, k1e23, 1e+24, 1e+25, 1e+26,
1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35,
1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44,
1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53,
1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62,
1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71,
1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89,
1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98,
1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107,
1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116,
1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125,
1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134,
1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143,
1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152,
1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161,
1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170,
1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179,
1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188,
1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197,
1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206,
1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215,
1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224,
1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233,
1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241, 1e+242,
1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251,
1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260,
1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269,
1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278,
1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287,
1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296,
1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305,
1e+306, 1e+307, 1e+308,
};
} // namespace
double Pow10(int exp) {
if (exp < -324) {
return 0.0;
} else if (exp > 308) {
return INFINITY;
} else {
return kPowersOfTen[exp + 324];
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,40 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test helper library contains a table of powers of 10, to guarantee
// precise values are computed across the full range of doubles. We can't rely
// on the pow() function, because not all standard libraries ship a version
// that is precise.
#ifndef ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#define ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#include <vector>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Computes the precise value of 10^exp. (I.e. the nearest representable
// double to the exact value, rounding to nearest-even in the (single) case of
// being exactly halfway between.)
double Pow10(int exp);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_

View file

@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
struct TestCase {
int power; // Testing Pow10(power)
uint64_t significand; // Raw bits of the expected value
int radix; // significand is adjusted by 2^radix
};
TEST(Pow10HelperTest, Works) {
// The logic in pow10_helper.cc is so simple that theoretically we don't even
// need a test. However, we're paranoid and believe that there may be
// compilers that don't round floating-point literals correctly, even though
// it is specified by the standard. We check various edge cases, just to be
// sure.
constexpr TestCase kTestCases[] = {
// Subnormals
{-323, 0x2, -1074},
{-322, 0x14, -1074},
{-321, 0xca, -1074},
{-320, 0x7e8, -1074},
{-319, 0x4f10, -1074},
{-318, 0x316a2, -1074},
{-317, 0x1ee257, -1074},
{-316, 0x134d761, -1074},
{-315, 0xc1069cd, -1074},
{-314, 0x78a42205, -1074},
{-313, 0x4b6695433, -1074},
{-312, 0x2f201d49fb, -1074},
{-311, 0x1d74124e3d1, -1074},
{-310, 0x12688b70e62b, -1074},
{-309, 0xb8157268fdaf, -1074},
{-308, 0x730d67819e8d2, -1074},
// Values that are very close to rounding the other way.
// Comment shows difference of significand from the true value.
{-307, 0x11fa182c40c60d, -1072}, // -.4588
{-290, 0x18f2b061aea072, -1016}, // .4854
{-276, 0x11BA03F5B21000, -969}, // .4709
{-259, 0x1899C2F6732210, -913}, // .4830
{-252, 0x1D53844EE47DD1, -890}, // -.4743
{-227, 0x1E5297287C2F45, -807}, // -.4708
{-198, 0x1322E220A5B17E, -710}, // -.4714
{-195, 0x12B010D3E1CF56, -700}, // .4928
{-192, 0x123FF06EEA847A, -690}, // .4968
{-163, 0x1708D0F84D3DE7, -594}, // -.4977
{-145, 0x13FAAC3E3FA1F3, -534}, // -.4785
{-111, 0x133D4032C2C7F5, -421}, // .4774
{-106, 0x1D5B561574765B, -405}, // -.4869
{-104, 0x16EF5B40C2FC77, -398}, // -.4741
{-88, 0x197683DF2F268D, -345}, // -.4738
{-86, 0x13E497065CD61F, -338}, // .4736
{-76, 0x17288E1271F513, -305}, // -.4761
{-63, 0x1A53FC9631D10D, -262}, // .4929
{-30, 0x14484BFEEBC2A0, -152}, // .4758
{-21, 0x12E3B40A0E9B4F, -122}, // -.4916
{-5, 0x14F8B588E368F1, -69}, // .4829
{23, 0x152D02C7E14AF6, 24}, // -.5000 (exactly, round-to-even)
{29, 0x1431E0FAE6D721, 44}, // -.4870
{34, 0x1ED09BEAD87C03, 60}, // -.4721
{70, 0x172EBAD6DDC73D, 180}, // .4733
{105, 0x1BE7ABD3781ECA, 296}, // -.4850
{126, 0x17A2ECC414A03F, 366}, // -.4999
{130, 0x1CDA62055B2D9E, 379}, // .4855
{165, 0x115D847AD00087, 496}, // -.4913
{172, 0x14B378469B6732, 519}, // .4818
{187, 0x1262DFEEBBB0F9, 569}, // -.4805
{210, 0x18557F31326BBB, 645}, // -.4992
{212, 0x1302CB5E6F642A, 652}, // -.4838
{215, 0x1290BA9A38C7D1, 662}, // -.4881
{236, 0x1F736F9B3494E9, 731}, // .4707
{244, 0x176EC98994F489, 758}, // .4924
{250, 0x1658E3AB795204, 778}, // -.4963
{252, 0x117571DDF6C814, 785}, // .4873
{254, 0x1B4781EAD1989E, 791}, // -.4887
{260, 0x1A03FDE214CAF1, 811}, // .4784
{284, 0x1585041B2C477F, 891}, // .4798
{304, 0x1D2A1BE4048F90, 957}, // -.4987
// Out-of-range values
{-324, 0x0, 0},
{-325, 0x0, 0},
{-326, 0x0, 0},
{309, 1, 2000},
{310, 1, 2000},
{311, 1, 2000},
};
for (const TestCase& test_case : kTestCases) {
EXPECT_EQ(Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix))
<< absl::StrFormat("Failure for Pow10(%d): %a vs %a", test_case.power,
Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix));
}
}
} // namespace
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,73 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#include <string>
#include <type_traits>
#include <utility>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h" // for void_t
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Is a subclass of true_type or false_type, depending on whether or not
// T has a __resize_default_init member.
template <typename string_type, typename = void>
struct ResizeUninitializedTraits {
using HasMember = std::false_type;
static void Resize(string_type* s, size_t new_size) { s->resize(new_size); }
};
// __resize_default_init is provided by libc++ >= 8.0
template <typename string_type>
struct ResizeUninitializedTraits<
string_type, absl::void_t<decltype(std::declval<string_type&>()
.__resize_default_init(237))> > {
using HasMember = std::true_type;
static void Resize(string_type* s, size_t new_size) {
s->__resize_default_init(new_size);
}
};
// Returns true if the std::string implementation supports a resize where
// the new characters added to the std::string are left untouched.
//
// (A better name might be "STLStringSupportsUninitializedResize", alluding to
// the previous function.)
template <typename string_type>
inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
return ResizeUninitializedTraits<string_type>::HasMember::value;
}
// Like str->resize(new_size), except any new characters added to "*str" as a
// result of resizing may be left uninitialized, rather than being filled with
// '0' bytes. Typically used when code is then going to overwrite the backing
// store of the std::string with known data.
template <typename string_type, typename = void>
inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
ResizeUninitializedTraits<string_type>::Resize(s, new_size);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_

View file

@ -0,0 +1,82 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/resize_uninitialized.h"
#include "gtest/gtest.h"
namespace {
int resize_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize() method has been called.
struct resizable_string {
size_t size() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
};
int resize_default_init_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize() and __resize_default_init() methods have been called.
struct resize_default_init_string {
size_t size() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
void __resize_default_init(size_t) { resize_default_init_call_count += 1; }
};
TEST(ResizeUninit, WithAndWithout) {
resize_call_count = 0;
resize_default_init_call_count = 0;
{
resizable_string rs;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_FALSE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
EXPECT_EQ(resize_call_count, 1);
EXPECT_EQ(resize_default_init_call_count, 0);
}
resize_call_count = 0;
resize_default_init_call_count = 0;
{
resize_default_init_string rus;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_TRUE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 1);
}
}
} // namespace

View file

@ -0,0 +1,248 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Thie file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug
// wrappers of STL containers.
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#define ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#include <array>
#include <bitset>
#include <deque>
#include <forward_list>
#include <list>
#include <map>
#include <set>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "absl/meta/type_traits.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename C, template <typename...> class T>
struct IsSpecializationImpl : std::false_type {};
template <template <typename...> class T, typename... Args>
struct IsSpecializationImpl<T<Args...>, T> : std::true_type {};
template <typename C, template <typename...> class T>
using IsSpecialization = IsSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsArrayImpl<A<T, N>> : std::is_same<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsArray = IsArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBitsetImpl<B<N>> : std::is_same<B<N>, std::bitset<N>> {};
template <typename C>
using IsBitset = IsBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsSTLContainer
: absl::disjunction<
IsArray<C>, IsBitset<C>, IsSpecialization<C, std::deque>,
IsSpecialization<C, std::forward_list>,
IsSpecialization<C, std::list>, IsSpecialization<C, std::map>,
IsSpecialization<C, std::multimap>, IsSpecialization<C, std::set>,
IsSpecialization<C, std::multiset>,
IsSpecialization<C, std::unordered_map>,
IsSpecialization<C, std::unordered_multimap>,
IsSpecialization<C, std::unordered_set>,
IsSpecialization<C, std::unordered_multiset>,
IsSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsBaseOfSpecializationImpl : std::false_type {};
// IsBaseOfSpecializationImpl needs multiple partial specializations to SFINAE
// on the existence of container dependent types and plug them into the STL
// template.
template <typename C, template <typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {
};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>> {
};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsBaseOfSpecialization = IsBaseOfSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsBaseOfArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsBaseOfArrayImpl<A<T, N>> : std::is_base_of<A<T, N>, std::array<T, N>> {
};
template <typename C>
using IsBaseOfArray = IsBaseOfArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBaseOfBitsetImpl<B<N>> : std::is_base_of<B<N>, std::bitset<N>> {};
template <typename C>
using IsBaseOfBitset = IsBaseOfBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfSTLContainer
: absl::disjunction<IsBaseOfArray<C>, IsBaseOfBitset<C>,
IsBaseOfSpecialization<C, std::deque>,
IsBaseOfSpecialization<C, std::forward_list>,
IsBaseOfSpecialization<C, std::list>,
IsBaseOfSpecialization<C, std::map>,
IsBaseOfSpecialization<C, std::multimap>,
IsBaseOfSpecialization<C, std::set>,
IsBaseOfSpecialization<C, std::multiset>,
IsBaseOfSpecialization<C, std::unordered_map>,
IsBaseOfSpecialization<C, std::unordered_multimap>,
IsBaseOfSpecialization<C, std::unordered_set>,
IsBaseOfSpecialization<C, std::unordered_multiset>,
IsBaseOfSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsConvertibleToSpecializationImpl : std::false_type {};
// IsConvertibleToSpecializationImpl needs multiple partial specializations to
// SFINAE on the existence of container dependent types and plug them into the
// STL template.
template <typename C, template <typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsConvertibleToSpecialization =
IsConvertibleToSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsConvertibleToArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsConvertibleToArrayImpl<A<T, N>>
: std::is_convertible<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsConvertibleToArray = IsConvertibleToArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsConvertibleToBitsetImpl<B<N>>
: std::is_convertible<B<N>, std::bitset<N>> {};
template <typename C>
using IsConvertibleToBitset = IsConvertibleToBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToSTLContainer
: absl::disjunction<
IsConvertibleToArray<C>, IsConvertibleToBitset<C>,
IsConvertibleToSpecialization<C, std::deque>,
IsConvertibleToSpecialization<C, std::forward_list>,
IsConvertibleToSpecialization<C, std::list>,
IsConvertibleToSpecialization<C, std::map>,
IsConvertibleToSpecialization<C, std::multimap>,
IsConvertibleToSpecialization<C, std::set>,
IsConvertibleToSpecialization<C, std::multiset>,
IsConvertibleToSpecialization<C, std::unordered_map>,
IsConvertibleToSpecialization<C, std::unordered_multimap>,
IsConvertibleToSpecialization<C, std::unordered_set>,
IsConvertibleToSpecialization<C, std::unordered_multiset>,
IsConvertibleToSpecialization<C, std::vector>> {};
template <typename C>
struct IsStrictlyBaseOfAndConvertibleToSTLContainer
: absl::conjunction<absl::negation<IsSTLContainer<C>>,
IsBaseOfSTLContainer<C>,
IsConvertibleToSTLContainer<C>> {};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_

View file

@ -0,0 +1,474 @@
//
// POSIX spec:
// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html
//
#include "absl/strings/internal/str_format/arg.h"
#include <cassert>
#include <cerrno>
#include <cstdlib>
#include <string>
#include <type_traits>
#include "absl/base/port.h"
#include "absl/strings/internal/str_format/float_conversion.h"
#include "absl/strings/numbers.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
// Reduce *capacity by s.size(), clipped to a 0 minimum.
void ReducePadding(string_view s, size_t *capacity) {
*capacity = Excess(s.size(), *capacity);
}
// Reduce *capacity by n, clipped to a 0 minimum.
void ReducePadding(size_t n, size_t *capacity) {
*capacity = Excess(n, *capacity);
}
template <typename T>
struct MakeUnsigned : std::make_unsigned<T> {};
template <>
struct MakeUnsigned<absl::int128> {
using type = absl::uint128;
};
template <>
struct MakeUnsigned<absl::uint128> {
using type = absl::uint128;
};
template <typename T>
struct IsSigned : std::is_signed<T> {};
template <>
struct IsSigned<absl::int128> : std::true_type {};
template <>
struct IsSigned<absl::uint128> : std::false_type {};
// Integral digit printer.
// Call one of the PrintAs* routines after construction once.
// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results.
class IntDigits {
public:
// Print the unsigned integer as octal.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsOct(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
*--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7));
v >>= 3;
} while (v);
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// Print the signed or unsigned integer as decimal.
// Supports all integral types.
template <typename T>
void PrintAsDec(T v) {
static_assert(std::is_integral<T>::value, "");
start_ = storage_;
size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_;
}
void PrintAsDec(int128 v) {
auto u = static_cast<uint128>(v);
bool add_neg = false;
if (v < 0) {
add_neg = true;
u = uint128{} - u;
}
PrintAsDec(u, add_neg);
}
void PrintAsDec(uint128 v, bool add_neg = false) {
// This function can be sped up if needed. We can call FastIntToBuffer
// twice, or fix FastIntToBuffer to support uint128.
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p);
v /= 100;
} while (v);
if (p[0] == '0') {
// We printed one too many hexits.
++p;
}
if (add_neg) {
*--p = '-';
}
size_ = storage_ + sizeof(storage_) - p;
start_ = p;
}
// Print the unsigned integer as hex using lowercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexLower(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
constexpr const char* table = numbers_internal::kHexTable;
std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2);
if (sizeof(T) == 1) break;
v >>= 8;
} while (v);
if (p[0] == '0') {
// We printed one too many digits.
++p;
}
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// Print the unsigned integer as hex using uppercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexUpper(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
// kHexTable is only lowercase, so do it manually for uppercase.
do {
*--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15];
v >>= 4;
} while (v);
start_ = p;
size_ = storage_ + sizeof(storage_) - p;
}
// The printed value including the '-' sign if available.
// For inputs of value `0`, this will return "0"
string_view with_neg_and_zero() const { return {start_, size_}; }
// The printed value not including the '-' sign.
// For inputs of value `0`, this will return "".
string_view without_neg_or_zero() const {
static_assert('-' < '0', "The check below verifies both.");
size_t advance = start_[0] <= '0' ? 1 : 0;
return {start_ + advance, size_ - advance};
}
bool is_negative() const { return start_[0] == '-'; }
private:
const char *start_;
size_t size_;
// Max size: 128 bit value as octal -> 43 digits, plus sign char
char storage_[128 / 3 + 1 + 1];
};
// Note: 'o' conversions do not have a base indicator, it's just that
// the '#' flag is specified to modify the precision for 'o' conversions.
string_view BaseIndicator(const IntDigits &as_digits,
const FormatConversionSpecImpl conv) {
// always show 0x for %p.
bool alt = conv.has_alt_flag() ||
conv.conversion_char() == FormatConversionCharInternal::p;
bool hex = (conv.conversion_char() == FormatConversionCharInternal::x ||
conv.conversion_char() == FormatConversionCharInternal::X ||
conv.conversion_char() == FormatConversionCharInternal::p);
// From the POSIX description of '#' flag:
// "For x or X conversion specifiers, a non-zero result shall have
// 0x (or 0X) prefixed to it."
if (alt && hex && !as_digits.without_neg_or_zero().empty()) {
return conv.conversion_char() == FormatConversionCharInternal::X ? "0X"
: "0x";
}
return {};
}
string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) {
if (conv.conversion_char() == FormatConversionCharInternal::d ||
conv.conversion_char() == FormatConversionCharInternal::i) {
if (neg) return "-";
if (conv.has_show_pos_flag()) return "+";
if (conv.has_sign_col_flag()) return " ";
}
return {};
}
bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
size_t fill = 0;
if (conv.width() >= 0) fill = conv.width();
ReducePadding(1, &fill);
if (!conv.has_left_flag()) sink->Append(fill, ' ');
sink->Append(1, v);
if (conv.has_left_flag()) sink->Append(fill, ' ');
return true;
}
bool ConvertIntImplInnerSlow(const IntDigits &as_digits,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
// Print as a sequence of Substrings:
// [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces]
size_t fill = 0;
if (conv.width() >= 0) fill = conv.width();
string_view formatted = as_digits.without_neg_or_zero();
ReducePadding(formatted, &fill);
string_view sign = SignColumn(as_digits.is_negative(), conv);
ReducePadding(sign, &fill);
string_view base_indicator = BaseIndicator(as_digits, conv);
ReducePadding(base_indicator, &fill);
int precision = conv.precision();
bool precision_specified = precision >= 0;
if (!precision_specified)
precision = 1;
if (conv.has_alt_flag() &&
conv.conversion_char() == FormatConversionCharInternal::o) {
// From POSIX description of the '#' (alt) flag:
// "For o conversion, it increases the precision (if necessary) to
// force the first digit of the result to be zero."
if (formatted.empty() || *formatted.begin() != '0') {
int needed = static_cast<int>(formatted.size()) + 1;
precision = std::max(precision, needed);
}
}
size_t num_zeroes = Excess(formatted.size(), precision);
ReducePadding(num_zeroes, &fill);
size_t num_left_spaces = !conv.has_left_flag() ? fill : 0;
size_t num_right_spaces = conv.has_left_flag() ? fill : 0;
// From POSIX description of the '0' (zero) flag:
// "For d, i, o, u, x, and X conversion specifiers, if a precision
// is specified, the '0' flag is ignored."
if (!precision_specified && conv.has_zero_flag()) {
num_zeroes += num_left_spaces;
num_left_spaces = 0;
}
sink->Append(num_left_spaces, ' ');
sink->Append(sign);
sink->Append(base_indicator);
sink->Append(num_zeroes, '0');
sink->Append(formatted);
sink->Append(num_right_spaces, ' ');
return true;
}
template <typename T>
bool ConvertIntArg(T v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
using U = typename MakeUnsigned<T>::type;
IntDigits as_digits;
// This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes
// it to complain about a switch/case type mismatch, even though both are
// FormatConverionChar. Likely this is because at this point
// FormatConversionChar is declared, but not defined.
switch (static_cast<uint8_t>(conv.conversion_char())) {
case static_cast<uint8_t>(FormatConversionCharInternal::c):
return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink);
case static_cast<uint8_t>(FormatConversionCharInternal::o):
as_digits.PrintAsOct(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::x):
as_digits.PrintAsHexLower(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::X):
as_digits.PrintAsHexUpper(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::u):
as_digits.PrintAsDec(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::d):
case static_cast<uint8_t>(FormatConversionCharInternal::i):
as_digits.PrintAsDec(v);
break;
case static_cast<uint8_t>(FormatConversionCharInternal::a):
case static_cast<uint8_t>(FormatConversionCharInternal::e):
case static_cast<uint8_t>(FormatConversionCharInternal::f):
case static_cast<uint8_t>(FormatConversionCharInternal::g):
case static_cast<uint8_t>(FormatConversionCharInternal::A):
case static_cast<uint8_t>(FormatConversionCharInternal::E):
case static_cast<uint8_t>(FormatConversionCharInternal::F):
case static_cast<uint8_t>(FormatConversionCharInternal::G):
return ConvertFloatImpl(static_cast<double>(v), conv, sink);
default:
ABSL_INTERNAL_ASSUME(false);
}
if (conv.is_basic()) {
sink->Append(as_digits.with_neg_and_zero());
return true;
}
return ConvertIntImplInnerSlow(as_digits, conv, sink);
}
template <typename T>
bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return FormatConversionCharIsFloat(conv.conversion_char()) &&
ConvertFloatImpl(v, conv, sink);
}
inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
if (conv.is_basic()) {
sink->Append(v);
return true;
}
return sink->PutPaddedString(v, conv.width(), conv.precision(),
conv.has_left_flag());
}
} // namespace
// ==================== Strings ====================
StringConvertResult FormatConvertImpl(const std::string &v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
StringConvertResult FormatConvertImpl(string_view v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
if (conv.conversion_char() == FormatConversionCharInternal::p)
return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
size_t len;
if (v == nullptr) {
len = 0;
} else if (conv.precision() < 0) {
len = std::strlen(v);
} else {
// If precision is set, we look for the NUL-terminator on the valid range.
len = std::find(v, v + conv.precision(), '\0') - v;
}
return {ConvertStringArg(string_view(v, len), conv, sink)};
}
// ==================== Raw pointers ====================
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
if (!v.value) {
sink->Append("(nil)");
return {true};
}
IntDigits as_digits;
as_digits.PrintAsHexLower(v.value);
return {ConvertIntImplInnerSlow(as_digits, conv, sink)};
}
// ==================== Floats ====================
FloatingConvertResult FormatConvertImpl(float v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(long double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
// ==================== Chars ====================
IntegralConvertResult FormatConvertImpl(char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(signed char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
// ==================== Ints ====================
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(int v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::int128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::uint128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_();
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,474 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#include <string.h>
#include <wchar.h>
#include <cstdio>
#include <iomanip>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <type_traits>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class Cord;
class FormatCountCapture;
class FormatSink;
namespace str_format_internal {
class FormatConversionSpec;
template <typename T, typename = void>
struct HasUserDefinedConvert : std::false_type {};
template <typename T>
struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert(
std::declval<const T&>(),
std::declval<const FormatConversionSpec&>(),
std::declval<FormatSink*>()))>>
: std::true_type {};
template <typename T>
class StreamedWrapper;
// If 'v' can be converted (in the printf sense) according to 'conv',
// then convert it, appending to `sink` and return `true`.
// Otherwise fail and return `false`.
// Raw pointers.
struct VoidPtr {
VoidPtr() = default;
template <typename T,
decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0>
VoidPtr(T* ptr) // NOLINT
: value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {}
uintptr_t value;
};
template <FormatConversionCharSet C>
struct ArgConvertResult {
bool value;
};
template <FormatConversionCharSet C>
constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) {
return C;
}
using StringConvertResult =
ArgConvertResult<FormatConversionCharSetInternal::s>;
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
// Strings.
StringConvertResult FormatConvertImpl(const std::string& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
StringConvertResult FormatConvertImpl(string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)>
FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
template <class AbslCord, typename std::enable_if<std::is_same<
AbslCord, absl::Cord>::value>::type* = nullptr>
StringConvertResult FormatConvertImpl(const AbslCord& value,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
bool is_left = conv.has_left_flag();
size_t space_remaining = 0;
int width = conv.width();
if (width >= 0) space_remaining = width;
size_t to_write = value.size();
int precision = conv.precision();
if (precision >= 0)
to_write = (std::min)(to_write, static_cast<size_t>(precision));
space_remaining = Excess(to_write, space_remaining);
if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' ');
for (string_view piece : value.Chunks()) {
if (piece.size() > to_write) {
piece.remove_suffix(piece.size() - to_write);
to_write = 0;
} else {
to_write -= piece.size();
}
sink->Append(piece);
if (to_write == 0) {
break;
}
}
if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' ');
return {true};
}
using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::c,
FormatConversionCharSetInternal::kNumeric,
FormatConversionCharSetInternal::kStar)>;
using FloatingConvertResult =
ArgConvertResult<FormatConversionCharSetInternal::kFloating>;
// Floats.
FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(long double v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Chars.
IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(signed char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Ints.
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(uint128 v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0>
IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatConvertImpl(static_cast<int>(v), conv, sink);
}
// We provide this function to help the checker, but it is never defined.
// FormatArgImpl will use the underlying Convert functions instead.
template <typename T>
typename std::enable_if<std::is_enum<T>::value &&
!HasUserDefinedConvert<T>::value,
IntegralConvertResult>::type
FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
template <typename T>
StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
std::ostringstream oss;
oss << v.v_;
if (!oss) return {false};
return str_format_internal::FormatConvertImpl(oss.str(), conv, out);
}
// Use templates and dependent types to delay evaluation of the function
// until after FormatCountCapture is fully defined.
struct FormatCountCaptureHelper {
template <class T = int>
static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v;
if (conv.conversion_char() !=
str_format_internal::FormatConversionCharInternal::n) {
return {false};
}
*v2.p_ = static_cast<int>(sink->size());
return {true};
}
};
template <class T = int>
ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatCountCaptureHelper::ConvertHelper(v, conv, sink);
}
// Helper friend struct to hide implementation details from the public API of
// FormatArgImpl.
struct FormatArgImplFriend {
template <typename Arg>
static bool ToInt(Arg arg, int* out) {
// A value initialized FormatConversionSpecImpl has a `none` conv, which
// tells the dispatcher to run the `int` conversion.
return arg.dispatcher_(arg.data_, {}, out);
}
template <typename Arg>
static bool Convert(Arg arg, FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
return arg.dispatcher_(arg.data_, conv, out);
}
template <typename Arg>
static typename Arg::Dispatcher GetVTablePtrForTest(Arg arg) {
return arg.dispatcher_;
}
};
template <typename Arg>
constexpr FormatConversionCharSet ArgumentToConv() {
return absl::str_format_internal::ExtractCharSet(
decltype(str_format_internal::FormatConvertImpl(
std::declval<const Arg&>(),
std::declval<const FormatConversionSpecImpl&>(),
std::declval<FormatSinkImpl*>())){});
}
// A type-erased handle to a format argument.
class FormatArgImpl {
private:
enum { kInlinedSpace = 8 };
using VoidPtr = str_format_internal::VoidPtr;
union Data {
const void* ptr;
const volatile void* volatile_ptr;
char buf[kInlinedSpace];
};
using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out);
template <typename T>
struct store_by_value
: std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) &&
(std::is_integral<T>::value ||
std::is_floating_point<T>::value ||
std::is_pointer<T>::value ||
std::is_same<VoidPtr, T>::value)> {};
enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue };
template <typename T>
struct storage_policy
: std::integral_constant<StoragePolicy,
(std::is_volatile<T>::value
? ByVolatilePointer
: (store_by_value<T>::value ? ByValue
: ByPointer))> {
};
// To reduce the number of vtables we will decay values before hand.
// Anything with a user-defined Convert will get its own vtable.
// For everything else:
// - Decay char* and char arrays into `const char*`
// - Decay any other pointer to `const void*`
// - Decay all enums to their underlying type.
// - Decay function pointers to void*.
template <typename T, typename = void>
struct DecayType {
static constexpr bool kHasUserDefined =
str_format_internal::HasUserDefinedConvert<T>::value;
using type = typename std::conditional<
!kHasUserDefined && std::is_convertible<T, const char*>::value,
const char*,
typename std::conditional<!kHasUserDefined &&
std::is_convertible<T, VoidPtr>::value,
VoidPtr, const T&>::type>::type;
};
template <typename T>
struct DecayType<T,
typename std::enable_if<
!str_format_internal::HasUserDefinedConvert<T>::value &&
std::is_enum<T>::value>::type> {
using type = typename std::underlying_type<T>::type;
};
public:
template <typename T>
explicit FormatArgImpl(const T& value) {
using D = typename DecayType<T>::type;
static_assert(
std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue,
"Decayed types must be stored by value");
Init(static_cast<D>(value));
}
private:
friend struct str_format_internal::FormatArgImplFriend;
template <typename T, StoragePolicy = storage_policy<T>::value>
struct Manager;
template <typename T>
struct Manager<T, ByPointer> {
static Data SetValue(const T& value) {
Data data;
data.ptr = std::addressof(value);
return data;
}
static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); }
};
template <typename T>
struct Manager<T, ByVolatilePointer> {
static Data SetValue(const T& value) {
Data data;
data.volatile_ptr = &value;
return data;
}
static const T& Value(Data arg) {
return *static_cast<const T*>(arg.volatile_ptr);
}
};
template <typename T>
struct Manager<T, ByValue> {
static Data SetValue(const T& value) {
Data data;
memcpy(data.buf, &value, sizeof(value));
return data;
}
static T Value(Data arg) {
T value;
memcpy(&value, arg.buf, sizeof(T));
return value;
}
};
template <typename T>
void Init(const T& value) {
data_ = Manager<T>::SetValue(value);
dispatcher_ = &Dispatch<T>;
}
template <typename T>
static int ToIntVal(const T& val) {
using CommonType = typename std::conditional<std::is_signed<T>::value,
int64_t, uint64_t>::type;
if (static_cast<CommonType>(val) >
static_cast<CommonType>((std::numeric_limits<int>::max)())) {
return (std::numeric_limits<int>::max)();
} else if (std::is_signed<T>::value &&
static_cast<CommonType>(val) <
static_cast<CommonType>((std::numeric_limits<int>::min)())) {
return (std::numeric_limits<int>::min)();
}
return static_cast<int>(val);
}
template <typename T>
static bool ToInt(Data arg, int* out, std::true_type /* is_integral */,
std::false_type) {
*out = ToIntVal(Manager<T>::Value(arg));
return true;
}
template <typename T>
static bool ToInt(Data arg, int* out, std::false_type,
std::true_type /* is_enum */) {
*out = ToIntVal(static_cast<typename std::underlying_type<T>::type>(
Manager<T>::Value(arg)));
return true;
}
template <typename T>
static bool ToInt(Data, int*, std::false_type, std::false_type) {
return false;
}
template <typename T>
static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) {
// A `none` conv indicates that we want the `int` conversion.
if (ABSL_PREDICT_FALSE(spec.conversion_char() ==
FormatConversionCharInternal::kNone)) {
return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(),
std::is_enum<T>());
}
if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(),
spec.conversion_char()))) {
return false;
}
return str_format_internal::FormatConvertImpl(
Manager<T>::Value(arg), spec,
static_cast<FormatSinkImpl*>(out))
.value;
}
Data data_;
Dispatcher dispatcher_;
};
#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \
E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \
void*)
#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(signed char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(short, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned short, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__)
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern);
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_

View file

@ -0,0 +1,114 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
#include "absl/strings/internal/str_format/arg.h"
#include <ostream>
#include <string>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
class FormatArgImplTest : public ::testing::Test {
public:
enum Color { kRed, kGreen, kBlue };
static const char *hi() { return "hi"; }
};
TEST_F(FormatArgImplTest, ToInt) {
int out = 0;
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out));
EXPECT_EQ(1, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(-1), &out));
EXPECT_EQ(-1, out);
EXPECT_TRUE(
FormatArgImplFriend::ToInt(FormatArgImpl(static_cast<char>(64)), &out));
EXPECT_EQ(64, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>(123456)), &out)); // NOLINT
EXPECT_EQ(123456, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>( // NOLINT
std::numeric_limits<int>::max()) +
1),
&out));
EXPECT_EQ(std::numeric_limits<int>::max(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<long long>( // NOLINT
std::numeric_limits<int>::min()) -
10),
&out));
EXPECT_EQ(std::numeric_limits<int>::min(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(false), &out));
EXPECT_EQ(0, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(true), &out));
EXPECT_EQ(1, out);
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(2.2), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(3.2f), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<int *>(nullptr)), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out));
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out));
EXPECT_EQ(2, out);
}
extern const char kMyArray[];
TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) {
const char* a = "";
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("A")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("ABC")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray)));
}
TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) {
auto expected = FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<void *>(nullptr)));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<int *>(nullptr))),
expected);
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<volatile int *>(nullptr))),
expected);
auto p = static_cast<void (*)()>([] {});
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(p)),
expected);
}
TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) {
std::string s;
FormatSinkImpl sink(&s);
FormatConversionSpecImpl conv;
FormatConversionSpecImplFriend::SetConversionChar(
FormatConversionCharInternal::s, &conv);
FormatConversionSpecImplFriend::SetFlags(Flags(), &conv);
FormatConversionSpecImplFriend::SetWidth(-1, &conv);
FormatConversionSpecImplFriend::SetPrecision(-1, &conv);
EXPECT_TRUE(
FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink));
sink.Flush();
EXPECT_EQ("ABCDE", s);
}
const char kMyArray[] = "ABCDE";
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,245 @@
#include "absl/strings/internal/str_format/bind.h"
#include <cerrno>
#include <limits>
#include <sstream>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
inline bool BindFromPosition(int position, int* value,
absl::Span<const FormatArgImpl> pack) {
assert(position > 0);
if (static_cast<size_t>(position) > pack.size()) {
return false;
}
// -1 because positions are 1-based
return FormatArgImplFriend::ToInt(pack[position - 1], value);
}
class ArgContext {
public:
explicit ArgContext(absl::Span<const FormatArgImpl> pack) : pack_(pack) {}
// Fill 'bound' with the results of applying the context's argument pack
// to the specified 'unbound'. We synthesize a BoundConversion by
// lining up a UnboundConversion with a user argument. We also
// resolve any '*' specifiers for width and precision, so after
// this call, 'bound' has all the information it needs to be formatted.
// Returns false on failure.
bool Bind(const UnboundConversion* unbound, BoundConversion* bound);
private:
absl::Span<const FormatArgImpl> pack_;
};
inline bool ArgContext::Bind(const UnboundConversion* unbound,
BoundConversion* bound) {
const FormatArgImpl* arg = nullptr;
int arg_position = unbound->arg_position;
if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false;
arg = &pack_[arg_position - 1]; // 1-based
if (!unbound->flags.basic) {
int width = unbound->width.value();
bool force_left = false;
if (unbound->width.is_from_arg()) {
if (!BindFromPosition(unbound->width.get_from_arg(), &width, pack_))
return false;
if (width < 0) {
// "A negative field width is taken as a '-' flag followed by a
// positive field width."
force_left = true;
// Make sure we don't overflow the width when negating it.
width = -std::max(width, -std::numeric_limits<int>::max());
}
}
int precision = unbound->precision.value();
if (unbound->precision.is_from_arg()) {
if (!BindFromPosition(unbound->precision.get_from_arg(), &precision,
pack_))
return false;
}
FormatConversionSpecImplFriend::SetWidth(width, bound);
FormatConversionSpecImplFriend::SetPrecision(precision, bound);
if (force_left) {
Flags flags = unbound->flags;
flags.left = true;
FormatConversionSpecImplFriend::SetFlags(flags, bound);
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
}
} else {
FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound);
FormatConversionSpecImplFriend::SetWidth(-1, bound);
FormatConversionSpecImplFriend::SetPrecision(-1, bound);
}
FormatConversionSpecImplFriend::SetConversionChar(unbound->conv, bound);
bound->set_arg(arg);
return true;
}
template <typename Converter>
class ConverterConsumer {
public:
ConverterConsumer(Converter converter, absl::Span<const FormatArgImpl> pack)
: converter_(converter), arg_context_(pack) {}
bool Append(string_view s) {
converter_.Append(s);
return true;
}
bool ConvertOne(const UnboundConversion& conv, string_view conv_string) {
BoundConversion bound;
if (!arg_context_.Bind(&conv, &bound)) return false;
return converter_.ConvertOne(bound, conv_string);
}
private:
Converter converter_;
ArgContext arg_context_;
};
template <typename Converter>
bool ConvertAll(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args, Converter converter) {
if (format.has_parsed_conversion()) {
return format.parsed_conversion()->ProcessFormat(
ConverterConsumer<Converter>(converter, args));
} else {
return ParseFormatString(format.str(),
ConverterConsumer<Converter>(converter, args));
}
}
class DefaultConverter {
public:
explicit DefaultConverter(FormatSinkImpl* sink) : sink_(sink) {}
void Append(string_view s) const { sink_->Append(s); }
bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const {
return FormatArgImplFriend::Convert(*bound.arg(), bound, sink_);
}
private:
FormatSinkImpl* sink_;
};
class SummarizingConverter {
public:
explicit SummarizingConverter(FormatSinkImpl* sink) : sink_(sink) {}
void Append(string_view s) const { sink_->Append(s); }
bool ConvertOne(const BoundConversion& bound, string_view /*conv*/) const {
UntypedFormatSpecImpl spec("%d");
std::ostringstream ss;
ss << "{" << Streamable(spec, {*bound.arg()}) << ":"
<< FormatConversionSpecImplFriend::FlagsToString(bound);
if (bound.width() >= 0) ss << bound.width();
if (bound.precision() >= 0) ss << "." << bound.precision();
ss << bound.conversion_char() << "}";
Append(ss.str());
return true;
}
private:
FormatSinkImpl* sink_;
};
} // namespace
bool BindWithPack(const UnboundConversion* props,
absl::Span<const FormatArgImpl> pack,
BoundConversion* bound) {
return ArgContext(pack).Bind(props, bound);
}
std::string Summarize(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
typedef SummarizingConverter Converter;
std::string out;
{
// inner block to destroy sink before returning out. It ensures a last
// flush.
FormatSinkImpl sink(&out);
if (!ConvertAll(format, args, Converter(&sink))) {
return "";
}
}
return out;
}
bool FormatUntyped(FormatRawSinkImpl raw_sink,
const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
FormatSinkImpl sink(raw_sink);
using Converter = DefaultConverter;
return ConvertAll(format, args, Converter(&sink));
}
std::ostream& Streamable::Print(std::ostream& os) const {
if (!FormatUntyped(&os, format_, args_)) os.setstate(std::ios::failbit);
return os;
}
std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
size_t orig = out->size();
if (ABSL_PREDICT_FALSE(!FormatUntyped(out, format, args))) {
out->erase(orig);
}
return *out;
}
std::string FormatPack(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
std::string out;
if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) {
out.clear();
}
return out;
}
int FprintF(std::FILE* output, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
FILERawSink sink(output);
if (!FormatUntyped(&sink, format, args)) {
errno = EINVAL;
return -1;
}
if (sink.error()) {
errno = sink.error();
return -1;
}
if (sink.count() > std::numeric_limits<int>::max()) {
errno = EFBIG;
return -1;
}
return static_cast<int>(sink.count());
}
int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args) {
BufferRawSink sink(output, size ? size - 1 : 0);
if (!FormatUntyped(&sink, format, args)) {
errno = EINVAL;
return -1;
}
size_t total = sink.total_written();
if (size) output[std::min(total, size - 1)] = 0;
return static_cast<int>(total);
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,202 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_
#include <array>
#include <cstdio>
#include <sstream>
#include <string>
#include "absl/base/port.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/parser.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class UntypedFormatSpec;
namespace str_format_internal {
class BoundConversion : public FormatConversionSpecImpl {
public:
const FormatArgImpl* arg() const { return arg_; }
void set_arg(const FormatArgImpl* a) { arg_ = a; }
private:
const FormatArgImpl* arg_;
};
// This is the type-erased class that the implementation uses.
class UntypedFormatSpecImpl {
public:
UntypedFormatSpecImpl() = delete;
explicit UntypedFormatSpecImpl(string_view s)
: data_(s.data()), size_(s.size()) {}
explicit UntypedFormatSpecImpl(
const str_format_internal::ParsedFormatBase* pc)
: data_(pc), size_(~size_t{}) {}
bool has_parsed_conversion() const { return size_ == ~size_t{}; }
string_view str() const {
assert(!has_parsed_conversion());
return string_view(static_cast<const char*>(data_), size_);
}
const str_format_internal::ParsedFormatBase* parsed_conversion() const {
assert(has_parsed_conversion());
return static_cast<const str_format_internal::ParsedFormatBase*>(data_);
}
template <typename T>
static const UntypedFormatSpecImpl& Extract(const T& s) {
return s.spec_;
}
private:
const void* data_;
size_t size_;
};
template <typename T, FormatConversionCharSet...>
struct MakeDependent {
using type = T;
};
// Implicitly convertible from `const char*`, `string_view`, and the
// `ExtendedParsedFormat` type. This abstraction allows all format functions to
// operate on any without providing too many overloads.
template <FormatConversionCharSet... Args>
class FormatSpecTemplate
: public MakeDependent<UntypedFormatSpec, Args...>::type {
using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type;
public:
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
// Honeypot overload for when the string is not constexpr.
// We use the 'unavailable' attribute to give a better compiler error than
// just 'method is deleted'.
FormatSpecTemplate(...) // NOLINT
__attribute__((unavailable("Format string is not constexpr.")));
// Honeypot overload for when the format is constexpr and invalid.
// We use the 'unavailable' attribute to give a better compiler error than
// just 'method is deleted'.
// To avoid checking the format twice, we just check that the format is
// constexpr. If is it valid, then the overload below will kick in.
// We add the template here to make this overload have lower priority.
template <typename = void>
FormatSpecTemplate(const char* s) // NOLINT
__attribute__((
enable_if(str_format_internal::EnsureConstexpr(s), "constexpr trap"),
unavailable(
"Format specified does not match the arguments passed.")));
template <typename T = void>
FormatSpecTemplate(string_view s) // NOLINT
__attribute__((enable_if(str_format_internal::EnsureConstexpr(s),
"constexpr trap"))) {
static_assert(sizeof(T*) == 0,
"Format specified does not match the arguments passed.");
}
// Good format overload.
FormatSpecTemplate(const char* s) // NOLINT
__attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap")))
: Base(s) {}
FormatSpecTemplate(string_view s) // NOLINT
__attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap")))
: Base(s) {}
#else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
FormatSpecTemplate(const char* s) : Base(s) {} // NOLINT
FormatSpecTemplate(string_view s) : Base(s) {} // NOLINT
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
template <FormatConversionCharSet... C,
typename = typename std::enable_if<
AllOf(sizeof...(C) == sizeof...(Args), Contains(Args,
C)...)>::type>
FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT
: Base(&pc) {}
};
class Streamable {
public:
Streamable(const UntypedFormatSpecImpl& format,
absl::Span<const FormatArgImpl> args)
: format_(format) {
if (args.size() <= ABSL_ARRAYSIZE(few_args_)) {
for (size_t i = 0; i < args.size(); ++i) {
few_args_[i] = args[i];
}
args_ = absl::MakeSpan(few_args_, args.size());
} else {
many_args_.assign(args.begin(), args.end());
args_ = many_args_;
}
}
std::ostream& Print(std::ostream& os) const;
friend std::ostream& operator<<(std::ostream& os, const Streamable& l) {
return l.Print(os);
}
private:
const UntypedFormatSpecImpl& format_;
absl::Span<const FormatArgImpl> args_;
// if args_.size() is 4 or less:
FormatArgImpl few_args_[4] = {FormatArgImpl(0), FormatArgImpl(0),
FormatArgImpl(0), FormatArgImpl(0)};
// if args_.size() is more than 4:
std::vector<FormatArgImpl> many_args_;
};
// for testing
std::string Summarize(UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
bool BindWithPack(const UnboundConversion* props,
absl::Span<const FormatArgImpl> pack, BoundConversion* bound);
bool FormatUntyped(FormatRawSinkImpl raw_sink,
UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
std::string FormatPack(const UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
int FprintF(std::FILE* output, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
int SnprintF(char* output, size_t size, UntypedFormatSpecImpl format,
absl::Span<const FormatArgImpl> args);
// Returned by Streamed(v). Converts via '%s' to the std::string created
// by std::ostream << v.
template <typename T>
class StreamedWrapper {
public:
explicit StreamedWrapper(const T& v) : v_(v) { }
private:
template <typename S>
friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl(
const StreamedWrapper<S>& v, FormatConversionSpecImpl conv,
FormatSinkImpl* out);
const T& v_;
};
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_

View file

@ -0,0 +1,143 @@
#include "absl/strings/internal/str_format/bind.h"
#include <string.h>
#include <limits>
#include "gtest/gtest.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
class FormatBindTest : public ::testing::Test {
public:
bool Extract(const char *s, UnboundConversion *props, int *next) const {
return ConsumeUnboundConversion(s, s + strlen(s), props, next) ==
s + strlen(s);
}
};
TEST_F(FormatBindTest, BindSingle) {
struct Expectation {
int line;
const char *fmt;
int ok_phases;
const FormatArgImpl *arg;
int width;
int precision;
int next_arg;
};
const int no = -1;
const int ia[] = { 10, 20, 30, 40};
const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]),
FormatArgImpl(ia[2]), FormatArgImpl(ia[3])};
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
const Expectation kExpect[] = {
{__LINE__, "d", 2, &args[0], no, no, 2},
{__LINE__, "4d", 2, &args[0], 4, no, 2},
{__LINE__, ".5d", 2, &args[0], no, 5, 2},
{__LINE__, "4.5d", 2, &args[0], 4, 5, 2},
{__LINE__, "*d", 2, &args[1], 10, no, 3},
{__LINE__, ".*d", 2, &args[1], no, 10, 3},
{__LINE__, "*.*d", 2, &args[2], 10, 20, 4},
{__LINE__, "1$d", 2, &args[0], no, no, 0},
{__LINE__, "2$d", 2, &args[1], no, no, 0},
{__LINE__, "3$d", 2, &args[2], no, no, 0},
{__LINE__, "4$d", 2, &args[3], no, no, 0},
{__LINE__, "2$*1$d", 2, &args[1], 10, no, 0},
{__LINE__, "2$*2$d", 2, &args[1], 20, no, 0},
{__LINE__, "2$*3$d", 2, &args[1], 30, no, 0},
{__LINE__, "2$.*1$d", 2, &args[1], no, 10, 0},
{__LINE__, "2$.*2$d", 2, &args[1], no, 20, 0},
{__LINE__, "2$.*3$d", 2, &args[1], no, 30, 0},
{__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0},
{__LINE__, "2$*2$.*2$d", 2, &args[1], 20, 20, 0},
{__LINE__, "2$*1$.*3$d", 2, &args[1], 10, 30, 0},
{__LINE__, "2$*3$.*1$d", 2, &args[1], 30, 10, 0},
{__LINE__, "1$*d", 0}, // indexed, then positional
{__LINE__, "*2$d", 0}, // positional, then indexed
{__LINE__, "6$d", 1}, // arg position out of bounds
{__LINE__, "1$6$d", 0}, // width position incorrectly specified
{__LINE__, "1$.6$d", 0}, // precision position incorrectly specified
{__LINE__, "1$*6$d", 1}, // width position out of bounds
{__LINE__, "1$.*6$d", 1}, // precision position out of bounds
};
#pragma GCC diagnostic pop
for (const Expectation &e : kExpect) {
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UnboundConversion props;
BoundConversion bound;
int ok_phases = 0;
int next = 0;
if (Extract(e.fmt, &props, &next)) {
++ok_phases;
if (BindWithPack(&props, args, &bound)) {
++ok_phases;
}
}
EXPECT_EQ(e.ok_phases, ok_phases);
if (e.ok_phases < 2) continue;
if (e.arg != nullptr) {
EXPECT_EQ(e.arg, bound.arg());
}
EXPECT_EQ(e.width, bound.width());
EXPECT_EQ(e.precision, bound.precision());
}
}
TEST_F(FormatBindTest, WidthUnderflowRegression) {
UnboundConversion props;
BoundConversion bound;
int next = 0;
const int args_i[] = {std::numeric_limits<int>::min(), 17};
const FormatArgImpl args[] = {FormatArgImpl(args_i[0]),
FormatArgImpl(args_i[1])};
ASSERT_TRUE(Extract("*d", &props, &next));
ASSERT_TRUE(BindWithPack(&props, args, &bound));
EXPECT_EQ(bound.width(), std::numeric_limits<int>::max());
EXPECT_EQ(bound.arg(), args + 1);
}
TEST_F(FormatBindTest, FormatPack) {
struct Expectation {
int line;
const char *fmt;
const char *summary;
};
const int ia[] = { 10, 20, 30, 40, -10 };
const FormatArgImpl args[] = {FormatArgImpl(ia[0]), FormatArgImpl(ia[1]),
FormatArgImpl(ia[2]), FormatArgImpl(ia[3]),
FormatArgImpl(ia[4])};
const Expectation kExpect[] = {
{__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"},
{__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"},
{__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"},
{__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"},
{__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"},
{__LINE__, "a%.*fb", "a{20:.10f}b"},
{__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"},
{__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"},
{__LINE__, "a%04ldb", "a{10:04d}b"},
{__LINE__, "a%-#04lldb", "a{10:-#04d}b"},
{__LINE__, "a%1$*5$db", "a{10:-10d}b"},
{__LINE__, "a%1$.*5$db", "a{10:d}b"},
};
for (const Expectation &e : kExpect) {
absl::string_view fmt = e.fmt;
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UntypedFormatSpecImpl format(fmt);
EXPECT_EQ(e.summary,
str_format_internal::Summarize(format, absl::MakeSpan(args)))
<< "line:" << e.line;
}
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,319 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
#include "absl/base/attributes.h"
#include "absl/strings/internal/str_format/arg.h"
#include "absl/strings/internal/str_format/extension.h"
// Compile time check support for entry points.
#ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
#define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1
#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
constexpr bool AllOf() { return true; }
template <typename... T>
constexpr bool AllOf(bool b, T... t) {
return b && AllOf(t...);
}
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
constexpr bool ContainsChar(const char* chars, char c) {
return *chars == c || (*chars && ContainsChar(chars + 1, c));
}
// A constexpr compatible list of Convs.
struct ConvList {
const FormatConversionCharSet* array;
int count;
// We do the bound check here to avoid having to do it on the callers.
// Returning an empty FormatConversionCharSet has the same effect as
// short circuiting because it will never match any conversion.
constexpr FormatConversionCharSet operator[](int i) const {
return i < count ? array[i] : FormatConversionCharSet{};
}
constexpr ConvList without_front() const {
return count != 0 ? ConvList{array + 1, count - 1} : *this;
}
};
template <size_t count>
struct ConvListT {
// Make sure the array has size > 0.
FormatConversionCharSet list[count ? count : 1];
};
constexpr char GetChar(string_view str, size_t index) {
return index < str.size() ? str[index] : char{};
}
constexpr string_view ConsumeFront(string_view str, size_t len = 1) {
return len <= str.size() ? string_view(str.data() + len, str.size() - len)
: string_view();
}
constexpr string_view ConsumeAnyOf(string_view format, const char* chars) {
return ContainsChar(chars, GetChar(format, 0))
? ConsumeAnyOf(ConsumeFront(format), chars)
: format;
}
constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; }
// Helper class for the ParseDigits function.
// It encapsulates the two return values we need there.
struct Integer {
string_view format;
int value;
// If the next character is a '$', consume it.
// Otherwise, make `this` an invalid positional argument.
constexpr Integer ConsumePositionalDollar() const {
return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value}
: Integer{format, 0};
}
};
constexpr Integer ParseDigits(string_view format, int value = 0) {
return IsDigit(GetChar(format, 0))
? ParseDigits(ConsumeFront(format),
10 * value + GetChar(format, 0) - '0')
: Integer{format, value};
}
// Parse digits for a positional argument.
// The parsing also consumes the '$'.
constexpr Integer ParsePositional(string_view format) {
return ParseDigits(format).ConsumePositionalDollar();
}
// Parses a single conversion specifier.
// See ConvParser::Run() for post conditions.
class ConvParser {
constexpr ConvParser SetFormat(string_view format) const {
return ConvParser(format, args_, error_, arg_position_, is_positional_);
}
constexpr ConvParser SetArgs(ConvList args) const {
return ConvParser(format_, args, error_, arg_position_, is_positional_);
}
constexpr ConvParser SetError(bool error) const {
return ConvParser(format_, args_, error_ || error, arg_position_,
is_positional_);
}
constexpr ConvParser SetArgPosition(int arg_position) const {
return ConvParser(format_, args_, error_, arg_position, is_positional_);
}
// Consumes the next arg and verifies that it matches `conv`.
// `error_` is set if there is no next arg or if it doesn't match `conv`.
constexpr ConvParser ConsumeNextArg(char conv) const {
return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv));
}
// Verify that positional argument `i.value` matches `conv`.
// `error_` is set if `i.value` is not a valid argument or if it doesn't
// match.
constexpr ConvParser VerifyPositional(Integer i, char conv) const {
return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv));
}
// Parse the position of the arg and store it in `arg_position_`.
constexpr ConvParser ParseArgPosition(Integer arg) const {
return SetFormat(arg.format).SetArgPosition(arg.value);
}
// Consume the flags.
constexpr ConvParser ParseFlags() const {
return SetFormat(ConsumeAnyOf(format_, "-+ #0"));
}
// Consume the width.
// If it is '*', we verify that it matches `args_`. `error_` is set if it
// doesn't match.
constexpr ConvParser ParseWidth() const {
return IsDigit(GetChar(format_, 0))
? SetFormat(ParseDigits(format_).format)
: GetChar(format_, 0) == '*'
? is_positional_
? VerifyPositional(
ParsePositional(ConsumeFront(format_)), '*')
: SetFormat(ConsumeFront(format_))
.ConsumeNextArg('*')
: *this;
}
// Consume the precision.
// If it is '*', we verify that it matches `args_`. `error_` is set if it
// doesn't match.
constexpr ConvParser ParsePrecision() const {
return GetChar(format_, 0) != '.'
? *this
: GetChar(format_, 1) == '*'
? is_positional_
? VerifyPositional(
ParsePositional(ConsumeFront(format_, 2)), '*')
: SetFormat(ConsumeFront(format_, 2))
.ConsumeNextArg('*')
: SetFormat(ParseDigits(ConsumeFront(format_)).format);
}
// Consume the length characters.
constexpr ConvParser ParseLength() const {
return SetFormat(ConsumeAnyOf(format_, "lLhjztq"));
}
// Consume the conversion character and verify that it matches `args_`.
// `error_` is set if it doesn't match.
constexpr ConvParser ParseConversion() const {
return is_positional_
? VerifyPositional({ConsumeFront(format_), arg_position_},
GetChar(format_, 0))
: ConsumeNextArg(GetChar(format_, 0))
.SetFormat(ConsumeFront(format_));
}
constexpr ConvParser(string_view format, ConvList args, bool error,
int arg_position, bool is_positional)
: format_(format),
args_(args),
error_(error),
arg_position_(arg_position),
is_positional_(is_positional) {}
public:
constexpr ConvParser(string_view format, ConvList args, bool is_positional)
: format_(format),
args_(args),
error_(false),
arg_position_(0),
is_positional_(is_positional) {}
// Consume the whole conversion specifier.
// `format()` will be set to the character after the conversion character.
// `error()` will be set if any of the arguments do not match.
constexpr ConvParser Run() const {
return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this)
.ParseFlags()
.ParseWidth()
.ParsePrecision()
.ParseLength()
.ParseConversion();
}
constexpr string_view format() const { return format_; }
constexpr ConvList args() const { return args_; }
constexpr bool error() const { return error_; }
constexpr bool is_positional() const { return is_positional_; }
private:
string_view format_;
// Current list of arguments. If we are not in positional mode we will consume
// from the front.
ConvList args_;
bool error_;
// Holds the argument position of the conversion character, if we are in
// positional mode. Otherwise, it is unspecified.
int arg_position_;
// Whether we are in positional mode.
// It changes the behavior of '*' and where to find the converted argument.
bool is_positional_;
};
// Parses a whole format expression.
// See FormatParser::Run().
class FormatParser {
static constexpr bool FoundPercent(string_view format) {
return format.empty() ||
(GetChar(format, 0) == '%' && GetChar(format, 1) != '%');
}
// We use an inner function to increase the recursion limit.
// The inner function consumes up to `limit` characters on every run.
// This increases the limit from 512 to ~512*limit.
static constexpr string_view ConsumeNonPercentInner(string_view format,
int limit = 20) {
return FoundPercent(format) || !limit
? format
: ConsumeNonPercentInner(
ConsumeFront(format, GetChar(format, 0) == '%' &&
GetChar(format, 1) == '%'
? 2
: 1),
limit - 1);
}
// Consume characters until the next conversion spec %.
// It skips %%.
static constexpr string_view ConsumeNonPercent(string_view format) {
return FoundPercent(format)
? format
: ConsumeNonPercent(ConsumeNonPercentInner(format));
}
static constexpr bool IsPositional(string_view format) {
return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format))
: GetChar(format, 0) == '$';
}
constexpr bool RunImpl(bool is_positional) const {
// In non-positional mode we require all arguments to be consumed.
// In positional mode just reaching the end of the format without errors is
// enough.
return (format_.empty() && (is_positional || args_.count == 0)) ||
(!format_.empty() &&
ValidateArg(
ConvParser(ConsumeFront(format_), args_, is_positional).Run()));
}
constexpr bool ValidateArg(ConvParser conv) const {
return !conv.error() && FormatParser(conv.format(), conv.args())
.RunImpl(conv.is_positional());
}
public:
constexpr FormatParser(string_view format, ConvList args)
: format_(ConsumeNonPercent(format)), args_(args) {}
// Runs the parser for `format` and `args`.
// It verifies that the format is valid and that all conversion specifiers
// match the arguments passed.
// In non-positional mode it also verfies that all arguments are consumed.
constexpr bool Run() const {
return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_)));
}
private:
string_view format_;
// Current list of arguments.
// If we are not in positional mode we will consume from the front and will
// have to be empty in the end.
ConvList args_;
};
template <FormatConversionCharSet... C>
constexpr bool ValidFormatImpl(string_view format) {
return FormatParser(format,
{ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)})
.Run();
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_

View file

@ -0,0 +1,156 @@
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
std::string ConvToString(FormatConversionCharSet conv) {
std::string out;
#define CONV_SET_CASE(c) \
if (Contains(conv, FormatConversionCharSetInternal::c)) { \
out += #c; \
}
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, )
#undef CONV_SET_CASE
if (Contains(conv, FormatConversionCharSetInternal::kStar)) {
out += "*";
}
return out;
}
TEST(StrFormatChecker, ArgumentToConv) {
FormatConversionCharSet conv = ArgumentToConv<std::string>();
EXPECT_EQ(ConvToString(conv), "s");
conv = ArgumentToConv<const char*>();
EXPECT_EQ(ConvToString(conv), "sp");
conv = ArgumentToConv<double>();
EXPECT_EQ(ConvToString(conv), "fFeEgGaA");
conv = ArgumentToConv<int>();
EXPECT_EQ(ConvToString(conv), "cdiouxXfFeEgGaA*");
conv = ArgumentToConv<std::string*>();
EXPECT_EQ(ConvToString(conv), "p");
}
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
struct Case {
bool result;
const char* format;
};
template <typename... Args>
constexpr Case ValidFormat(const char* format) {
return {ValidFormatImpl<ArgumentToConv<Args>()...>(format), format};
}
TEST(StrFormatChecker, ValidFormat) {
// We want to make sure these expressions are constexpr and they have the
// expected value.
// If they are not constexpr the attribute will just ignore them and not give
// a compile time error.
enum e {};
enum class e2 {};
constexpr Case trues[] = {
ValidFormat<>("abc"), //
ValidFormat<e>("%d"), //
ValidFormat<e2>("%d"), //
ValidFormat<int>("%% %d"), //
ValidFormat<int>("%ld"), //
ValidFormat<int>("%lld"), //
ValidFormat<std::string>("%s"), //
ValidFormat<std::string>("%10s"), //
ValidFormat<int>("%.10x"), //
ValidFormat<int, int>("%*.3x"), //
ValidFormat<int>("%1.d"), //
ValidFormat<int>("%.d"), //
ValidFormat<int, double>("%d %g"), //
ValidFormat<int, std::string>("%*s"), //
ValidFormat<int, double>("%.*f"), //
ValidFormat<void (*)(), volatile int*>("%p %p"), //
ValidFormat<string_view, const char*, double, void*>(
"string_view=%s const char*=%s double=%f void*=%p)"),
ValidFormat<int>("%% %1$d"), //
ValidFormat<int>("%1$ld"), //
ValidFormat<int>("%1$lld"), //
ValidFormat<std::string>("%1$s"), //
ValidFormat<std::string>("%1$10s"), //
ValidFormat<int>("%1$.10x"), //
ValidFormat<int>("%1$*1$.*1$d"), //
ValidFormat<int, int>("%1$*2$.3x"), //
ValidFormat<int>("%1$1.d"), //
ValidFormat<int>("%1$.d"), //
ValidFormat<double, int>("%2$d %1$g"), //
ValidFormat<int, std::string>("%2$*1$s"), //
ValidFormat<int, double>("%2$.*1$f"), //
ValidFormat<void*, string_view, const char*, double>(
"string_view=%2$s const char*=%3$s double=%4$f void*=%1$p "
"repeat=%3$s)")};
for (Case c : trues) {
EXPECT_TRUE(c.result) << c.format;
}
constexpr Case falses[] = {
ValidFormat<int>(""), //
ValidFormat<e>("%s"), //
ValidFormat<e2>("%s"), //
ValidFormat<>("%s"), //
ValidFormat<>("%r"), //
ValidFormat<int>("%s"), //
ValidFormat<int>("%.1.d"), //
ValidFormat<int>("%*1d"), //
ValidFormat<int>("%1-d"), //
ValidFormat<std::string, int>("%*s"), //
ValidFormat<int>("%*d"), //
ValidFormat<std::string>("%p"), //
ValidFormat<int (*)(int)>("%d"), //
ValidFormat<>("%3$d"), //
ValidFormat<>("%1$r"), //
ValidFormat<int>("%1$s"), //
ValidFormat<int>("%1$.1.d"), //
ValidFormat<int>("%1$*2$1d"), //
ValidFormat<int>("%1$1-d"), //
ValidFormat<std::string, int>("%2$*1$s"), //
ValidFormat<std::string>("%1$p"),
ValidFormat<int, int>("%d %2$d"), //
};
for (Case c : falses) {
EXPECT_FALSE(c.result) << c.format;
}
}
TEST(StrFormatChecker, LongFormat) {
#define CHARS_X_40 "1234567890123456789012345678901234567890"
#define CHARS_X_400 \
CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 CHARS_X_40 \
CHARS_X_40 CHARS_X_40 CHARS_X_40
#define CHARS_X_4000 \
CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400 \
CHARS_X_400 CHARS_X_400 CHARS_X_400 CHARS_X_400
constexpr char long_format[] =
CHARS_X_4000 "%d" CHARS_X_4000 "%s" CHARS_X_4000;
constexpr bool is_valid = ValidFormat<int, std::string>(long_format).result;
EXPECT_TRUE(is_valid);
}
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,865 @@
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <cctype>
#include <cmath>
#include <limits>
#include <string>
#include <thread> // NOLINT
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/str_format/bind.h"
#include "absl/types/optional.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
template <typename T, size_t N>
size_t ArraySize(T (&)[N]) {
return N;
}
std::string LengthModFor(float) { return ""; }
std::string LengthModFor(double) { return ""; }
std::string LengthModFor(long double) { return "L"; }
std::string LengthModFor(char) { return "hh"; }
std::string LengthModFor(signed char) { return "hh"; }
std::string LengthModFor(unsigned char) { return "hh"; }
std::string LengthModFor(short) { return "h"; } // NOLINT
std::string LengthModFor(unsigned short) { return "h"; } // NOLINT
std::string LengthModFor(int) { return ""; }
std::string LengthModFor(unsigned) { return ""; }
std::string LengthModFor(long) { return "l"; } // NOLINT
std::string LengthModFor(unsigned long) { return "l"; } // NOLINT
std::string LengthModFor(long long) { return "ll"; } // NOLINT
std::string LengthModFor(unsigned long long) { return "ll"; } // NOLINT
std::string EscCharImpl(int v) {
if (std::isprint(static_cast<unsigned char>(v))) {
return std::string(1, static_cast<char>(v));
}
char buf[64];
int n = snprintf(buf, sizeof(buf), "\\%#.2x",
static_cast<unsigned>(v & 0xff));
assert(n > 0 && n < sizeof(buf));
return std::string(buf, n);
}
std::string Esc(char v) { return EscCharImpl(v); }
std::string Esc(signed char v) { return EscCharImpl(v); }
std::string Esc(unsigned char v) { return EscCharImpl(v); }
template <typename T>
std::string Esc(const T &v) {
std::ostringstream oss;
oss << v;
return oss.str();
}
void StrAppendV(std::string *dst, const char *format, va_list ap) {
// First try with a small fixed size buffer
static const int kSpaceLength = 1024;
char space[kSpaceLength];
// It's possible for methods that use a va_list to invalidate
// the data in it upon use. The fix is to make a copy
// of the structure before using it and use that copy instead.
va_list backup_ap;
va_copy(backup_ap, ap);
int result = vsnprintf(space, kSpaceLength, format, backup_ap);
va_end(backup_ap);
if (result < kSpaceLength) {
if (result >= 0) {
// Normal case -- everything fit.
dst->append(space, result);
return;
}
if (result < 0) {
// Just an error.
return;
}
}
// Increase the buffer size to the size requested by vsnprintf,
// plus one for the closing \0.
int length = result + 1;
char *buf = new char[length];
// Restore the va_list before we use it again
va_copy(backup_ap, ap);
result = vsnprintf(buf, length, format, backup_ap);
va_end(backup_ap);
if (result >= 0 && result < length) {
// It fit
dst->append(buf, result);
}
delete[] buf;
}
void StrAppend(std::string *out, const char *format, ...) {
va_list ap;
va_start(ap, format);
StrAppendV(out, format, ap);
va_end(ap);
}
std::string StrPrint(const char *format, ...) {
va_list ap;
va_start(ap, format);
std::string result;
StrAppendV(&result, format, ap);
va_end(ap);
return result;
}
class FormatConvertTest : public ::testing::Test { };
template <typename T>
void TestStringConvert(const T& str) {
const FormatArgImpl args[] = {FormatArgImpl(str)};
struct Expectation {
const char *out;
const char *fmt;
};
const Expectation kExpect[] = {
{"hello", "%1$s" },
{"", "%1$.s" },
{"", "%1$.0s" },
{"h", "%1$.1s" },
{"he", "%1$.2s" },
{"hello", "%1$.10s" },
{" hello", "%1$6s" },
{" he", "%1$5.2s" },
{"he ", "%1$-5.2s" },
{"hello ", "%1$-6.10s" },
};
for (const Expectation &e : kExpect) {
UntypedFormatSpecImpl format(e.fmt);
EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args)));
}
}
TEST_F(FormatConvertTest, BasicString) {
TestStringConvert("hello"); // As char array.
TestStringConvert(static_cast<const char*>("hello"));
TestStringConvert(std::string("hello"));
TestStringConvert(string_view("hello"));
}
TEST_F(FormatConvertTest, NullString) {
const char* p = nullptr;
UntypedFormatSpecImpl format("%s");
EXPECT_EQ("", FormatPack(format, {FormatArgImpl(p)}));
}
TEST_F(FormatConvertTest, StringPrecision) {
// We cap at the precision.
char c = 'a';
const char* p = &c;
UntypedFormatSpecImpl format("%.1s");
EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)}));
// We cap at the NUL-terminator.
p = "ABC";
UntypedFormatSpecImpl format2("%.10s");
EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)}));
}
// Pointer formatting is implementation defined. This checks that the argument
// can be matched to `ptr`.
MATCHER_P(MatchesPointerString, ptr, "") {
if (ptr == nullptr && arg == "(nil)") {
return true;
}
void* parsed = nullptr;
if (sscanf(arg.c_str(), "%p", &parsed) != 1) {
ABSL_RAW_LOG(FATAL, "Could not parse %s", arg.c_str());
}
return ptr == parsed;
}
TEST_F(FormatConvertTest, Pointer) {
static int x = 0;
const int *xp = &x;
char c = 'h';
char *mcp = &c;
const char *cp = "hi";
const char *cnil = nullptr;
const int *inil = nullptr;
using VoidF = void (*)();
VoidF fp = [] {}, fnil = nullptr;
volatile char vc;
volatile char *vcp = &vc;
volatile char *vcnil = nullptr;
const FormatArgImpl args_array[] = {
FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil),
FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp),
FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil),
};
auto args = absl::MakeConstSpan(args_array);
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.1p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%30.20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-.1p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args),
MatchesPointerString(&x));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-30.20p"), args),
MatchesPointerString(&x));
// const char*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%2$p"), args),
MatchesPointerString(cp));
// null const int*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%3$p"), args),
MatchesPointerString(nullptr));
// null const char*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args),
MatchesPointerString(nullptr));
// nonconst char*
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args),
MatchesPointerString(mcp));
// function pointers
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args),
MatchesPointerString(reinterpret_cast<const void*>(fp)));
EXPECT_THAT(
FormatPack(UntypedFormatSpecImpl("%8$p"), args),
MatchesPointerString(reinterpret_cast<volatile const void *>(vcp)));
// null function pointers
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args),
MatchesPointerString(nullptr));
EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args),
MatchesPointerString(nullptr));
}
struct Cardinal {
enum Pos { k1 = 1, k2 = 2, k3 = 3 };
enum Neg { kM1 = -1, kM2 = -2, kM3 = -3 };
};
TEST_F(FormatConvertTest, Enum) {
const Cardinal::Pos k3 = Cardinal::k3;
const Cardinal::Neg km3 = Cardinal::kM3;
const FormatArgImpl args[] = {FormatArgImpl(k3), FormatArgImpl(km3)};
UntypedFormatSpecImpl format("%1$d");
UntypedFormatSpecImpl format2("%2$d");
EXPECT_EQ("3", FormatPack(format, absl::MakeSpan(args)));
EXPECT_EQ("-3", FormatPack(format2, absl::MakeSpan(args)));
}
template <typename T>
class TypedFormatConvertTest : public FormatConvertTest { };
TYPED_TEST_SUITE_P(TypedFormatConvertTest);
std::vector<std::string> AllFlagCombinations() {
const char kFlags[] = {'-', '#', '0', '+', ' '};
std::vector<std::string> result;
for (size_t fsi = 0; fsi < (1ull << ArraySize(kFlags)); ++fsi) {
std::string flag_set;
for (size_t fi = 0; fi < ArraySize(kFlags); ++fi)
if (fsi & (1ull << fi))
flag_set += kFlags[fi];
result.push_back(flag_set);
}
return result;
}
TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) {
typedef TypeParam T;
typedef typename std::make_unsigned<T>::type UnsignedT;
using remove_volatile_t = typename std::remove_volatile<T>::type;
const T kMin = std::numeric_limits<remove_volatile_t>::min();
const T kMax = std::numeric_limits<remove_volatile_t>::max();
const T kVals[] = {
remove_volatile_t(1),
remove_volatile_t(2),
remove_volatile_t(3),
remove_volatile_t(123),
remove_volatile_t(-1),
remove_volatile_t(-2),
remove_volatile_t(-3),
remove_volatile_t(-123),
remove_volatile_t(0),
kMax - remove_volatile_t(1),
kMax,
kMin + remove_volatile_t(1),
kMin,
};
const char kConvChars[] = {'d', 'i', 'u', 'o', 'x', 'X'};
const std::string kWid[] = {"", "4", "10"};
const std::string kPrec[] = {"", ".", ".0", ".4", ".10"};
const std::vector<std::string> flag_sets = AllFlagCombinations();
for (size_t vi = 0; vi < ArraySize(kVals); ++vi) {
const T val = kVals[vi];
SCOPED_TRACE(Esc(val));
const FormatArgImpl args[] = {FormatArgImpl(val)};
for (size_t ci = 0; ci < ArraySize(kConvChars); ++ci) {
const char conv_char = kConvChars[ci];
for (size_t fsi = 0; fsi < flag_sets.size(); ++fsi) {
const std::string &flag_set = flag_sets[fsi];
for (size_t wi = 0; wi < ArraySize(kWid); ++wi) {
const std::string &wid = kWid[wi];
for (size_t pi = 0; pi < ArraySize(kPrec); ++pi) {
const std::string &prec = kPrec[pi];
const bool is_signed_conv = (conv_char == 'd' || conv_char == 'i');
const bool is_unsigned_to_signed =
!std::is_signed<T>::value && is_signed_conv;
// Don't consider sign-related flags '+' and ' ' when doing
// unsigned to signed conversions.
if (is_unsigned_to_signed &&
flag_set.find_first_of("+ ") != std::string::npos) {
continue;
}
std::string new_fmt("%");
new_fmt += flag_set;
new_fmt += wid;
new_fmt += prec;
// old and new always agree up to here.
std::string old_fmt = new_fmt;
new_fmt += conv_char;
std::string old_result;
if (is_unsigned_to_signed) {
// don't expect agreement on unsigned formatted as signed,
// as printf can't do that conversion properly. For those
// cases, we do expect agreement with printf with a "%u"
// and the unsigned equivalent of 'val'.
UnsignedT uval = val;
old_fmt += LengthModFor(uval);
old_fmt += "u";
old_result = StrPrint(old_fmt.c_str(), uval);
} else {
old_fmt += LengthModFor(val);
old_fmt += conv_char;
old_result = StrPrint(old_fmt.c_str(), val);
}
SCOPED_TRACE(std::string() + " old_fmt: \"" + old_fmt +
"\"'"
" new_fmt: \"" +
new_fmt + "\"");
UntypedFormatSpecImpl format(new_fmt);
EXPECT_EQ(old_result, FormatPack(format, absl::MakeSpan(args)));
}
}
}
}
}
}
TYPED_TEST_P(TypedFormatConvertTest, Char) {
typedef TypeParam T;
using remove_volatile_t = typename std::remove_volatile<T>::type;
static const T kMin = std::numeric_limits<remove_volatile_t>::min();
static const T kMax = std::numeric_limits<remove_volatile_t>::max();
T kVals[] = {
remove_volatile_t(1), remove_volatile_t(2), remove_volatile_t(10),
remove_volatile_t(-1), remove_volatile_t(-2), remove_volatile_t(-10),
remove_volatile_t(0),
kMin + remove_volatile_t(1), kMin,
kMax - remove_volatile_t(1), kMax
};
for (const T &c : kVals) {
const FormatArgImpl args[] = {FormatArgImpl(c)};
UntypedFormatSpecImpl format("%c");
EXPECT_EQ(StrPrint("%c", c), FormatPack(format, absl::MakeSpan(args)));
}
}
REGISTER_TYPED_TEST_CASE_P(TypedFormatConvertTest, AllIntsWithFlags, Char);
typedef ::testing::Types<
int, unsigned, volatile int,
short, unsigned short,
long, unsigned long,
long long, unsigned long long,
signed char, unsigned char, char>
AllIntTypes;
INSTANTIATE_TYPED_TEST_CASE_P(TypedFormatConvertTestWithAllIntTypes,
TypedFormatConvertTest, AllIntTypes);
TEST_F(FormatConvertTest, VectorBool) {
// Make sure vector<bool>'s values behave as bools.
std::vector<bool> v = {true, false};
const std::vector<bool> cv = {true, false};
EXPECT_EQ("1,0,1,0",
FormatPack(UntypedFormatSpecImpl("%d,%d,%d,%d"),
absl::Span<const FormatArgImpl>(
{FormatArgImpl(v[0]), FormatArgImpl(v[1]),
FormatArgImpl(cv[0]), FormatArgImpl(cv[1])})));
}
TEST_F(FormatConvertTest, Int128) {
absl::int128 positive = static_cast<absl::int128>(0x1234567890abcdef) * 1979;
absl::int128 negative = -positive;
absl::int128 max = absl::Int128Max(), min = absl::Int128Min();
const FormatArgImpl args[] = {FormatArgImpl(positive),
FormatArgImpl(negative), FormatArgImpl(max),
FormatArgImpl(min)};
struct Case {
const char* format;
const char* expected;
} cases[] = {
{"%1$d", "2595989796776606496405"},
{"%1$30d", " 2595989796776606496405"},
{"%1$-30d", "2595989796776606496405 "},
{"%1$u", "2595989796776606496405"},
{"%1$x", "8cba9876066020f695"},
{"%2$d", "-2595989796776606496405"},
{"%2$30d", " -2595989796776606496405"},
{"%2$-30d", "-2595989796776606496405 "},
{"%2$u", "340282366920938460867384810655161715051"},
{"%2$x", "ffffffffffffff73456789f99fdf096b"},
{"%3$d", "170141183460469231731687303715884105727"},
{"%3$u", "170141183460469231731687303715884105727"},
{"%3$x", "7fffffffffffffffffffffffffffffff"},
{"%4$d", "-170141183460469231731687303715884105728"},
{"%4$x", "80000000000000000000000000000000"},
};
for (auto c : cases) {
UntypedFormatSpecImpl format(c.format);
EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args)));
}
}
TEST_F(FormatConvertTest, Uint128) {
absl::uint128 v = static_cast<absl::uint128>(0x1234567890abcdef) * 1979;
absl::uint128 max = absl::Uint128Max();
const FormatArgImpl args[] = {FormatArgImpl(v), FormatArgImpl(max)};
struct Case {
const char* format;
const char* expected;
} cases[] = {
{"%1$d", "2595989796776606496405"},
{"%1$30d", " 2595989796776606496405"},
{"%1$-30d", "2595989796776606496405 "},
{"%1$u", "2595989796776606496405"},
{"%1$x", "8cba9876066020f695"},
{"%2$d", "340282366920938463463374607431768211455"},
{"%2$u", "340282366920938463463374607431768211455"},
{"%2$x", "ffffffffffffffffffffffffffffffff"},
};
for (auto c : cases) {
UntypedFormatSpecImpl format(c.format);
EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args)));
}
}
TEST_F(FormatConvertTest, Float) {
#ifdef _MSC_VER
// MSVC has a different rounding policy than us so we can't test our
// implementation against the native one there.
return;
#endif // _MSC_VER
const char *const kFormats[] = {
"%", "%.3", "%8.5", "%500", "%.5000", "%.60", "%.30", "%03",
"%+", "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"};
std::vector<double> doubles = {0.0,
-0.0,
.99999999999999,
99999999999999.,
std::numeric_limits<double>::max(),
-std::numeric_limits<double>::max(),
std::numeric_limits<double>::min(),
-std::numeric_limits<double>::min(),
std::numeric_limits<double>::lowest(),
-std::numeric_limits<double>::lowest(),
std::numeric_limits<double>::epsilon(),
std::numeric_limits<double>::epsilon() + 1,
std::numeric_limits<double>::infinity(),
-std::numeric_limits<double>::infinity()};
// Some regression tests.
doubles.push_back(0.99999999999999989);
if (std::numeric_limits<double>::has_denorm != std::denorm_absent) {
doubles.push_back(std::numeric_limits<double>::denorm_min());
doubles.push_back(-std::numeric_limits<double>::denorm_min());
}
for (double base :
{1., 12., 123., 1234., 12345., 123456., 1234567., 12345678., 123456789.,
1234567890., 12345678901., 123456789012., 1234567890123.}) {
for (int exp = -123; exp <= 123; ++exp) {
for (int sign : {1, -1}) {
doubles.push_back(sign * std::ldexp(base, exp));
}
}
}
// Workaround libc bug.
// https://sourceware.org/bugzilla/show_bug.cgi?id=22142
const bool gcc_bug_22142 =
StrPrint("%f", std::numeric_limits<double>::max()) !=
"1797693134862315708145274237317043567980705675258449965989174768031"
"5726078002853876058955863276687817154045895351438246423432132688946"
"4182768467546703537516986049910576551282076245490090389328944075868"
"5084551339423045832369032229481658085593321233482747978262041447231"
"68738177180919299881250404026184124858368.000000";
if (!gcc_bug_22142) {
for (int exp = -300; exp <= 300; ++exp) {
const double all_ones_mantissa = 0x1fffffffffffff;
doubles.push_back(std::ldexp(all_ones_mantissa, exp));
}
}
if (gcc_bug_22142) {
for (auto &d : doubles) {
using L = std::numeric_limits<double>;
double d2 = std::abs(d);
if (d2 == L::max() || d2 == L::min() || d2 == L::denorm_min()) {
d = 0;
}
}
}
// Remove duplicates to speed up the logic below.
std::sort(doubles.begin(), doubles.end());
doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end());
#ifndef __APPLE__
// Apple formats NaN differently (+nan) vs. (nan)
doubles.push_back(std::nan(""));
#endif
// Reserve the space to ensure we don't allocate memory in the output itself.
std::string str_format_result;
str_format_result.reserve(1 << 20);
std::string string_printf_result;
string_printf_result.reserve(1 << 20);
for (const char *fmt : kFormats) {
for (char f : {'f', 'F', //
'g', 'G', //
'a', 'A', //
'e', 'E'}) {
std::string fmt_str = std::string(fmt) + f;
if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
// This particular test takes way too long with snprintf.
// Disable for the case we are not implementing natively.
continue;
}
for (double d : doubles) {
int i = -10;
FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)};
UntypedFormatSpecImpl format(fmt_str);
string_printf_result.clear();
StrAppend(&string_printf_result, fmt_str.c_str(), d, i);
str_format_result.clear();
{
AppendPack(&str_format_result, format, absl::MakeSpan(args));
}
if (string_printf_result != str_format_result) {
// We use ASSERT_EQ here because failures are usually correlated and a
// bug would print way too many failed expectations causing the test
// to time out.
ASSERT_EQ(string_printf_result, str_format_result)
<< fmt_str << " " << StrPrint("%.18g", d) << " "
<< StrPrint("%a", d) << " " << StrPrint("%.1080f", d);
}
}
}
}
}
TEST_F(FormatConvertTest, FloatRound) {
std::string s;
const auto format = [&](const char *fmt, double d) -> std::string & {
s.clear();
FormatArgImpl args[1] = {FormatArgImpl(d)};
AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args));
#if !defined(_MSC_VER)
// MSVC has a different rounding policy than us so we can't test our
// implementation against the native one there.
EXPECT_EQ(StrPrint(fmt, d), s);
#endif // _MSC_VER
return s;
};
// All of these values have to be exactly represented.
// Otherwise we might not be testing what we think we are testing.
// These values can fit in a 64bit "fast" representation.
const double exact_value = 0.00000000000005684341886080801486968994140625;
assert(exact_value == std::pow(2, -44));
// Round up at a 5xx.
EXPECT_EQ(format("%.13f", exact_value), "0.0000000000001");
// Round up at a >5
EXPECT_EQ(format("%.14f", exact_value), "0.00000000000006");
// Round down at a <5
EXPECT_EQ(format("%.16f", exact_value), "0.0000000000000568");
// Nine handling
EXPECT_EQ(format("%.35f", exact_value),
"0.00000000000005684341886080801486969");
EXPECT_EQ(format("%.36f", exact_value),
"0.000000000000056843418860808014869690");
// Round down the last nine.
EXPECT_EQ(format("%.37f", exact_value),
"0.0000000000000568434188608080148696899");
EXPECT_EQ(format("%.10f", 0.000003814697265625), "0.0000038147");
// Round up the last nine
EXPECT_EQ(format("%.11f", 0.000003814697265625), "0.00000381470");
EXPECT_EQ(format("%.12f", 0.000003814697265625), "0.000003814697");
// Round to even (down)
EXPECT_EQ(format("%.43f", exact_value),
"0.0000000000000568434188608080148696899414062");
// Exact
EXPECT_EQ(format("%.44f", exact_value),
"0.00000000000005684341886080801486968994140625");
// Round to even (up), let make the last digits 75 instead of 25
EXPECT_EQ(format("%.43f", exact_value + std::pow(2, -43)),
"0.0000000000001705302565824240446090698242188");
// Exact, just to check.
EXPECT_EQ(format("%.44f", exact_value + std::pow(2, -43)),
"0.00000000000017053025658242404460906982421875");
// This value has to be small enough that it won't fit in the uint128
// representation for printing.
const double small_exact_value =
0.000000000000000000000000000000000000752316384526264005099991383822237233803945956334136013765601092018187046051025390625; // NOLINT
assert(small_exact_value == std::pow(2, -120));
// Round up at a 5xx.
EXPECT_EQ(format("%.37f", small_exact_value),
"0.0000000000000000000000000000000000008");
// Round down at a <5
EXPECT_EQ(format("%.38f", small_exact_value),
"0.00000000000000000000000000000000000075");
// Round up at a >5
EXPECT_EQ(format("%.41f", small_exact_value),
"0.00000000000000000000000000000000000075232");
// Nine handling
EXPECT_EQ(format("%.55f", small_exact_value),
"0.0000000000000000000000000000000000007523163845262640051");
EXPECT_EQ(format("%.56f", small_exact_value),
"0.00000000000000000000000000000000000075231638452626400510");
EXPECT_EQ(format("%.57f", small_exact_value),
"0.000000000000000000000000000000000000752316384526264005100");
EXPECT_EQ(format("%.58f", small_exact_value),
"0.0000000000000000000000000000000000007523163845262640051000");
// Round down the last nine
EXPECT_EQ(format("%.59f", small_exact_value),
"0.00000000000000000000000000000000000075231638452626400509999");
// Round up the last nine
EXPECT_EQ(format("%.79f", small_exact_value),
"0.000000000000000000000000000000000000"
"7523163845262640050999913838222372338039460");
// Round to even (down)
EXPECT_EQ(format("%.119f", small_exact_value),
"0.000000000000000000000000000000000000"
"75231638452626400509999138382223723380"
"394595633413601376560109201818704605102539062");
// Exact
EXPECT_EQ(format("%.120f", small_exact_value),
"0.000000000000000000000000000000000000"
"75231638452626400509999138382223723380"
"3945956334136013765601092018187046051025390625");
// Round to even (up), let make the last digits 75 instead of 25
EXPECT_EQ(format("%.119f", small_exact_value + std::pow(2, -119)),
"0.000000000000000000000000000000000002"
"25694915357879201529997415146671170141"
"183786900240804129680327605456113815307617188");
// Exact, just to check.
EXPECT_EQ(format("%.120f", small_exact_value + std::pow(2, -119)),
"0.000000000000000000000000000000000002"
"25694915357879201529997415146671170141"
"1837869002408041296803276054561138153076171875");
}
// We don't actually store the results. This is just to exercise the rest of the
// machinery.
struct NullSink {
friend void AbslFormatFlush(NullSink *sink, string_view str) {}
};
template <typename... T>
bool FormatWithNullSink(absl::string_view fmt, const T &... a) {
NullSink sink;
FormatArgImpl args[] = {FormatArgImpl(a)...};
return FormatUntyped(&sink, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args));
}
TEST_F(FormatConvertTest, ExtremeWidthPrecision) {
for (const char *fmt : {"f"}) {
for (double d : {1e-100, 1.0, 1e100}) {
constexpr int max = std::numeric_limits<int>::max();
EXPECT_TRUE(FormatWithNullSink(std::string("%.*") + fmt, max, d));
EXPECT_TRUE(FormatWithNullSink(std::string("%1.*") + fmt, max, d));
EXPECT_TRUE(FormatWithNullSink(std::string("%*") + fmt, max, d));
EXPECT_TRUE(FormatWithNullSink(std::string("%*.*") + fmt, max, max, d));
}
}
}
TEST_F(FormatConvertTest, LongDouble) {
#ifdef _MSC_VER
// MSVC has a different rounding policy than us so we can't test our
// implementation against the native one there.
return;
#endif // _MSC_VER
const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000",
"%.60", "%+", "% ", "%-10"};
std::vector<long double> doubles = {
0.0,
-0.0,
std::numeric_limits<long double>::max(),
-std::numeric_limits<long double>::max(),
std::numeric_limits<long double>::min(),
-std::numeric_limits<long double>::min(),
std::numeric_limits<long double>::infinity(),
-std::numeric_limits<long double>::infinity()};
for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L,
1234567.L, 12345678.L, 123456789.L, 1234567890.L,
12345678901.L, 123456789012.L, 1234567890123.L,
// This value is not representable in double, but it
// is in long double that uses the extended format.
// This is to verify that we are not truncating the
// value mistakenly through a double.
10000000000000000.25L}) {
for (int exp : {-1000, -500, 0, 500, 1000}) {
for (int sign : {1, -1}) {
doubles.push_back(sign * std::ldexp(base, exp));
doubles.push_back(sign / std::ldexp(base, exp));
}
}
}
for (const char *fmt : kFormats) {
for (char f : {'f', 'F', //
'g', 'G', //
'a', 'A', //
'e', 'E'}) {
std::string fmt_str = std::string(fmt) + 'L' + f;
if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F') {
// This particular test takes way too long with snprintf.
// Disable for the case we are not implementing natively.
continue;
}
for (auto d : doubles) {
FormatArgImpl arg(d);
UntypedFormatSpecImpl format(fmt_str);
// We use ASSERT_EQ here because failures are usually correlated and a
// bug would print way too many failed expectations causing the test to
// time out.
ASSERT_EQ(StrPrint(fmt_str.c_str(), d), FormatPack(format, {&arg, 1}))
<< fmt_str << " " << StrPrint("%.18Lg", d) << " "
<< StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d);
}
}
}
}
TEST_F(FormatConvertTest, IntAsFloat) {
const int kMin = std::numeric_limits<int>::min();
const int kMax = std::numeric_limits<int>::max();
const int ia[] = {
1, 2, 3, 123,
-1, -2, -3, -123,
0, kMax - 1, kMax, kMin + 1, kMin };
for (const int fx : ia) {
SCOPED_TRACE(fx);
const FormatArgImpl args[] = {FormatArgImpl(fx)};
struct Expectation {
int line;
std::string out;
const char *fmt;
};
const double dx = static_cast<double>(fx);
const Expectation kExpect[] = {
{ __LINE__, StrPrint("%f", dx), "%f" },
{ __LINE__, StrPrint("%12f", dx), "%12f" },
{ __LINE__, StrPrint("%.12f", dx), "%.12f" },
{ __LINE__, StrPrint("%12a", dx), "%12a" },
{ __LINE__, StrPrint("%.12a", dx), "%.12a" },
};
for (const Expectation &e : kExpect) {
SCOPED_TRACE(e.line);
SCOPED_TRACE(e.fmt);
UntypedFormatSpecImpl format(e.fmt);
EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args)));
}
}
}
template <typename T>
bool FormatFails(const char* test_format, T value) {
std::string format_string = std::string("<<") + test_format + ">>";
UntypedFormatSpecImpl format(format_string);
int one = 1;
const FormatArgImpl args[] = {FormatArgImpl(value), FormatArgImpl(one)};
EXPECT_EQ(FormatPack(format, absl::MakeSpan(args)), "")
<< "format=" << test_format << " value=" << value;
return FormatPack(format, absl::MakeSpan(args)).empty();
}
TEST_F(FormatConvertTest, ExpectedFailures) {
// Int input
EXPECT_TRUE(FormatFails("%p", 1));
EXPECT_TRUE(FormatFails("%s", 1));
EXPECT_TRUE(FormatFails("%n", 1));
// Double input
EXPECT_TRUE(FormatFails("%p", 1.));
EXPECT_TRUE(FormatFails("%s", 1.));
EXPECT_TRUE(FormatFails("%n", 1.));
EXPECT_TRUE(FormatFails("%c", 1.));
EXPECT_TRUE(FormatFails("%d", 1.));
EXPECT_TRUE(FormatFails("%x", 1.));
EXPECT_TRUE(FormatFails("%*d", 1.));
// String input
EXPECT_TRUE(FormatFails("%n", ""));
EXPECT_TRUE(FormatFails("%c", ""));
EXPECT_TRUE(FormatFails("%d", ""));
EXPECT_TRUE(FormatFails("%x", ""));
EXPECT_TRUE(FormatFails("%f", ""));
EXPECT_TRUE(FormatFails("%*d", ""));
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,52 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/extension.h"
#include <errno.h>
#include <algorithm>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
std::string Flags::ToString() const {
std::string s;
s.append(left ? "-" : "");
s.append(show_pos ? "+" : "");
s.append(sign_col ? " " : "");
s.append(alt ? "#" : "");
s.append(zero ? "0" : "");
return s;
}
bool FormatSinkImpl::PutPaddedString(string_view value, int width,
int precision, bool left) {
size_t space_remaining = 0;
if (width >= 0) space_remaining = width;
size_t n = value.size();
if (precision >= 0) n = std::min(n, static_cast<size_t>(precision));
string_view shown(value.data(), n);
space_remaining = Excess(shown.size(), space_remaining);
if (!left) Append(space_remaining, ' ');
Append(shown);
if (left) Append(space_remaining, ' ');
return true;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,429 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_
#include <limits.h>
#include <cstddef>
#include <cstring>
#include <ostream>
#include "absl/base/config.h"
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/internal/str_format/output.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
enum class FormatConversionChar : uint8_t;
enum class FormatConversionCharSet : uint64_t;
class FormatRawSinkImpl {
public:
// Implicitly convert from any type that provides the hook function as
// described above.
template <typename T, decltype(str_format_internal::InvokeFlush(
std::declval<T*>(), string_view()))* = nullptr>
FormatRawSinkImpl(T* raw) // NOLINT
: sink_(raw), write_(&FormatRawSinkImpl::Flush<T>) {}
void Write(string_view s) { write_(sink_, s); }
template <typename T>
static FormatRawSinkImpl Extract(T s) {
return s.sink_;
}
private:
template <typename T>
static void Flush(void* r, string_view s) {
str_format_internal::InvokeFlush(static_cast<T*>(r), s);
}
void* sink_;
void (*write_)(void*, string_view);
};
// An abstraction to which conversions write their string data.
class FormatSinkImpl {
public:
explicit FormatSinkImpl(FormatRawSinkImpl raw) : raw_(raw) {}
~FormatSinkImpl() { Flush(); }
void Flush() {
raw_.Write(string_view(buf_, pos_ - buf_));
pos_ = buf_;
}
void Append(size_t n, char c) {
if (n == 0) return;
size_ += n;
auto raw_append = [&](size_t count) {
memset(pos_, c, count);
pos_ += count;
};
while (n > Avail()) {
n -= Avail();
if (Avail() > 0) {
raw_append(Avail());
}
Flush();
}
raw_append(n);
}
void Append(string_view v) {
size_t n = v.size();
if (n == 0) return;
size_ += n;
if (n >= Avail()) {
Flush();
raw_.Write(v);
return;
}
memcpy(pos_, v.data(), n);
pos_ += n;
}
size_t size() const { return size_; }
// Put 'v' to 'sink' with specified width, precision, and left flag.
bool PutPaddedString(string_view v, int width, int precision, bool left);
template <typename T>
T Wrap() {
return T(this);
}
template <typename T>
static FormatSinkImpl* Extract(T* s) {
return s->sink_;
}
private:
size_t Avail() const { return buf_ + sizeof(buf_) - pos_; }
FormatRawSinkImpl raw_;
size_t size_ = 0;
char* pos_ = buf_;
char buf_[1024];
};
struct Flags {
bool basic : 1; // fastest conversion: no flags, width, or precision
bool left : 1; // "-"
bool show_pos : 1; // "+"
bool sign_col : 1; // " "
bool alt : 1; // "#"
bool zero : 1; // "0"
std::string ToString() const;
friend std::ostream& operator<<(std::ostream& os, const Flags& v) {
return os << v.ToString();
}
};
// clang-format off
#define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \
/* text */ \
X_VAL(c) X_SEP X_VAL(s) X_SEP \
/* ints */ \
X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \
X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \
/* floats */ \
X_VAL(f) X_SEP X_VAL(F) X_SEP X_VAL(e) X_SEP X_VAL(E) X_SEP \
X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \
/* misc */ \
X_VAL(n) X_SEP X_VAL(p)
// clang-format on
// This type should not be referenced, it exists only to provide labels
// internally that match the values declared in FormatConversionChar in
// str_format.h. This is meant to allow internal libraries to use the same
// declared interface type as the public interface
// (absl::StrFormatConversionChar) while keeping the definition in a public
// header.
// Internal libraries should use the form
// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for
// comparisons. Use in switch statements is not recommended due to a bug in how
// gcc 4.9 -Wswitch handles declared but undefined enums.
struct FormatConversionCharInternal {
FormatConversionCharInternal() = delete;
private:
// clang-format off
enum class Enum : uint8_t {
c, s, // text
d, i, o, u, x, X, // int
f, F, e, E, g, G, a, A, // float
n, p, // misc
kNone
};
// clang-format on
public:
#define ABSL_INTERNAL_X_VAL(id) \
static constexpr FormatConversionChar id = \
static_cast<FormatConversionChar>(Enum::id);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, )
#undef ABSL_INTERNAL_X_VAL
static constexpr FormatConversionChar kNone =
static_cast<FormatConversionChar>(Enum::kNone);
};
// clang-format on
inline FormatConversionChar FormatConversionCharFromChar(char c) {
switch (c) {
#define ABSL_INTERNAL_X_VAL(id) \
case #id[0]: \
return FormatConversionCharInternal::id;
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, )
#undef ABSL_INTERNAL_X_VAL
}
return FormatConversionCharInternal::kNone;
}
inline bool FormatConversionCharIsUpper(FormatConversionChar c) {
if (c == FormatConversionCharInternal::X ||
c == FormatConversionCharInternal::F ||
c == FormatConversionCharInternal::E ||
c == FormatConversionCharInternal::G ||
c == FormatConversionCharInternal::A) {
return true;
} else {
return false;
}
}
inline bool FormatConversionCharIsFloat(FormatConversionChar c) {
if (c == FormatConversionCharInternal::a ||
c == FormatConversionCharInternal::e ||
c == FormatConversionCharInternal::f ||
c == FormatConversionCharInternal::g ||
c == FormatConversionCharInternal::A ||
c == FormatConversionCharInternal::E ||
c == FormatConversionCharInternal::F ||
c == FormatConversionCharInternal::G) {
return true;
} else {
return false;
}
}
inline char FormatConversionCharToChar(FormatConversionChar c) {
if (c == FormatConversionCharInternal::kNone) {
return '\0';
#define ABSL_INTERNAL_X_VAL(e) \
} else if (c == FormatConversionCharInternal::e) { \
return #e[0];
#define ABSL_INTERNAL_X_SEP
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL,
ABSL_INTERNAL_X_SEP)
} else {
return '\0';
}
#undef ABSL_INTERNAL_X_VAL
#undef ABSL_INTERNAL_X_SEP
}
// The associated char.
inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) {
char c = FormatConversionCharToChar(v);
if (!c) c = '?';
return os << c;
}
struct FormatConversionSpecImplFriend;
class FormatConversionSpecImpl {
public:
// Width and precison are not specified, no flags are set.
bool is_basic() const { return flags_.basic; }
bool has_left_flag() const { return flags_.left; }
bool has_show_pos_flag() const { return flags_.show_pos; }
bool has_sign_col_flag() const { return flags_.sign_col; }
bool has_alt_flag() const { return flags_.alt; }
bool has_zero_flag() const { return flags_.zero; }
FormatConversionChar conversion_char() const {
// Keep this field first in the struct . It generates better code when
// accessing it when ConversionSpec is passed by value in registers.
static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, "");
return conv_;
}
// Returns the specified width. If width is unspecfied, it returns a negative
// value.
int width() const { return width_; }
// Returns the specified precision. If precision is unspecfied, it returns a
// negative value.
int precision() const { return precision_; }
template <typename T>
T Wrap() {
return T(*this);
}
private:
friend struct str_format_internal::FormatConversionSpecImplFriend;
FormatConversionChar conv_ = FormatConversionCharInternal::kNone;
Flags flags_;
int width_;
int precision_;
};
struct FormatConversionSpecImplFriend final {
static void SetFlags(Flags f, FormatConversionSpecImpl* conv) {
conv->flags_ = f;
}
static void SetConversionChar(FormatConversionChar c,
FormatConversionSpecImpl* conv) {
conv->conv_ = c;
}
static void SetWidth(int w, FormatConversionSpecImpl* conv) {
conv->width_ = w;
}
static void SetPrecision(int p, FormatConversionSpecImpl* conv) {
conv->precision_ = p;
}
static std::string FlagsToString(const FormatConversionSpecImpl& spec) {
return spec.flags_.ToString();
}
};
// Type safe OR operator.
// We need this for two reasons:
// 1. operator| on enums makes them decay to integers and the result is an
// integer. We need the result to stay as an enum.
// 2. We use "enum class" which would not work even if we accepted the decay.
constexpr FormatConversionCharSet FormatConversionCharSetUnion(
FormatConversionCharSet a) {
return a;
}
template <typename... CharSet>
constexpr FormatConversionCharSet FormatConversionCharSetUnion(
FormatConversionCharSet a, CharSet... rest) {
return static_cast<FormatConversionCharSet>(
static_cast<uint64_t>(a) |
static_cast<uint64_t>(FormatConversionCharSetUnion(rest...)));
}
constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) {
return uint64_t{1} << (1 + static_cast<uint8_t>(c));
}
constexpr uint64_t FormatConversionCharToConvInt(char conv) {
return
#define ABSL_INTERNAL_CHAR_SET_CASE(c) \
conv == #c[0] \
? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \
:
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, )
#undef ABSL_INTERNAL_CHAR_SET_CASE
conv == '*'
? 1
: 0;
}
constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) {
return static_cast<FormatConversionCharSet>(
FormatConversionCharToConvInt(conv));
}
struct FormatConversionCharSetInternal {
#define ABSL_INTERNAL_CHAR_SET_CASE(c) \
static constexpr FormatConversionCharSet c = \
FormatConversionCharToConvValue(#c[0]);
ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, )
#undef ABSL_INTERNAL_CHAR_SET_CASE
// Used for width/precision '*' specification.
static constexpr FormatConversionCharSet kStar =
FormatConversionCharToConvValue('*');
// Some predefined values (TODO(matthewbr), delete any that are unused).
static constexpr FormatConversionCharSet kIntegral =
FormatConversionCharSetUnion(d, i, u, o, x, X);
static constexpr FormatConversionCharSet kFloating =
FormatConversionCharSetUnion(a, e, f, g, A, E, F, G);
static constexpr FormatConversionCharSet kNumeric =
FormatConversionCharSetUnion(kIntegral, kFloating);
static constexpr FormatConversionCharSet kString = s;
static constexpr FormatConversionCharSet kPointer = p;
};
// Type safe OR operator.
// We need this for two reasons:
// 1. operator| on enums makes them decay to integers and the result is an
// integer. We need the result to stay as an enum.
// 2. We use "enum class" which would not work even if we accepted the decay.
constexpr FormatConversionCharSet operator|(FormatConversionCharSet a,
FormatConversionCharSet b) {
return FormatConversionCharSetUnion(a, b);
}
// Overloaded conversion functions to support absl::ParsedFormat.
// Get a conversion with a single character in it.
constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) {
return static_cast<FormatConversionCharSet>(
FormatConversionCharToConvValue(c));
}
// Get a conversion with a single character in it.
constexpr FormatConversionCharSet ToFormatConversionCharSet(
FormatConversionCharSet c) {
return c;
}
template <typename T>
void ToFormatConversionCharSet(T) = delete;
// Checks whether `c` exists in `set`.
constexpr bool Contains(FormatConversionCharSet set, char c) {
return (static_cast<uint64_t>(set) &
static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0;
}
// Checks whether all the characters in `c` are contained in `set`
constexpr bool Contains(FormatConversionCharSet set,
FormatConversionCharSet c) {
return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) ==
static_cast<uint64_t>(c);
}
// Checks whether all the characters in `c` are contained in `set`
constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) {
return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0;
}
// Return capacity - used, clipped to a minimum of 0.
inline size_t Excess(size_t used, size_t capacity) {
return used < capacity ? capacity - used : 0;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_

View file

@ -0,0 +1,83 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "absl/strings/internal/str_format/extension.h"
#include <random>
#include <string>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
namespace my_namespace {
class UserDefinedType {
public:
UserDefinedType() = default;
void Append(absl::string_view str) { value_.append(str.data(), str.size()); }
const std::string& Value() const { return value_; }
friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) {
x->Append(str);
}
private:
std::string value_;
};
} // namespace my_namespace
namespace {
std::string MakeRandomString(size_t len) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis('a', 'z');
std::string s(len, '0');
for (char& c : s) {
c = dis(gen);
}
return s;
}
TEST(FormatExtensionTest, SinkAppendSubstring) {
for (size_t chunk_size : {1, 10, 100, 1000, 10000}) {
std::string expected, actual;
absl::str_format_internal::FormatSinkImpl sink(&actual);
for (size_t chunks = 0; chunks < 10; ++chunks) {
std::string rand = MakeRandomString(chunk_size);
expected += rand;
sink.Append(rand);
}
sink.Flush();
EXPECT_EQ(actual, expected);
}
}
TEST(FormatExtensionTest, SinkAppendChars) {
for (size_t chunk_size : {1, 10, 100, 1000, 10000}) {
std::string expected, actual;
absl::str_format_internal::FormatSinkImpl sink(&actual);
for (size_t chunks = 0; chunks < 10; ++chunks) {
std::string rand = MakeRandomString(1);
expected.append(chunk_size, rand[0]);
sink.Append(chunk_size, rand[0]);
}
sink.Flush();
EXPECT_EQ(actual, expected);
}
}
} // namespace

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv,
FormatSinkImpl *sink);
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_

View file

@ -0,0 +1,72 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/output.h"
#include <errno.h>
#include <cstring>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
struct ClearErrnoGuard {
ClearErrnoGuard() : old_value(errno) { errno = 0; }
~ClearErrnoGuard() {
if (!errno) errno = old_value;
}
int old_value;
};
} // namespace
void BufferRawSink::Write(string_view v) {
size_t to_write = std::min(v.size(), size_);
std::memcpy(buffer_, v.data(), to_write);
buffer_ += to_write;
size_ -= to_write;
total_written_ += v.size();
}
void FILERawSink::Write(string_view v) {
while (!v.empty() && !error_) {
// Reset errno to zero in case the libc implementation doesn't set errno
// when a failure occurs.
ClearErrnoGuard guard;
if (size_t result = std::fwrite(v.data(), 1, v.size(), output_)) {
// Some progress was made.
count_ += result;
v.remove_prefix(result);
} else {
if (errno == EINTR) {
continue;
} else if (errno) {
error_ = errno;
} else if (std::ferror(output_)) {
// Non-POSIX compliant libc implementations may not set errno, so we
// have check the streams error indicator.
error_ = EBADF;
} else {
// We're likely on a non-POSIX system that encountered EINTR but had no
// way of reporting it.
continue;
}
}
}
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,96 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Output extension hooks for the Format library.
// `internal::InvokeFlush` calls the appropriate flush function for the
// specified output argument.
// `BufferRawSink` is a simple output sink for a char buffer. Used by SnprintF.
// `FILERawSink` is a std::FILE* based sink. Used by PrintF and FprintF.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_
#include <cstdio>
#include <ostream>
#include <string>
#include "absl/base/port.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
// RawSink implementation that writes into a char* buffer.
// It will not overflow the buffer, but will keep the total count of chars
// that would have been written.
class BufferRawSink {
public:
BufferRawSink(char* buffer, size_t size) : buffer_(buffer), size_(size) {}
size_t total_written() const { return total_written_; }
void Write(string_view v);
private:
char* buffer_;
size_t size_;
size_t total_written_ = 0;
};
// RawSink implementation that writes into a FILE*.
// It keeps track of the total number of bytes written and any error encountered
// during the writes.
class FILERawSink {
public:
explicit FILERawSink(std::FILE* output) : output_(output) {}
void Write(string_view v);
size_t count() const { return count_; }
int error() const { return error_; }
private:
std::FILE* output_;
int error_ = 0;
size_t count_ = 0;
};
// Provide RawSink integration with common types from the STL.
inline void AbslFormatFlush(std::string* out, string_view s) {
out->append(s.data(), s.size());
}
inline void AbslFormatFlush(std::ostream* out, string_view s) {
out->write(s.data(), s.size());
}
inline void AbslFormatFlush(FILERawSink* sink, string_view v) {
sink->Write(v);
}
inline void AbslFormatFlush(BufferRawSink* sink, string_view v) {
sink->Write(v);
}
// This is a SFINAE to get a better compiler error message when the type
// is not supported.
template <typename T>
auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) {
AbslFormatFlush(out, s);
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_

View file

@ -0,0 +1,79 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/output.h"
#include <sstream>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/cord.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
TEST(InvokeFlush, String) {
std::string str = "ABC";
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str, "ABCDEF");
}
TEST(InvokeFlush, Stream) {
std::stringstream str;
str << "ABC";
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str.str(), "ABCDEF");
}
TEST(InvokeFlush, Cord) {
absl::Cord str("ABC");
str_format_internal::InvokeFlush(&str, "DEF");
EXPECT_EQ(str, "ABCDEF");
}
TEST(BufferRawSink, Limits) {
char buf[16];
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World237237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World");
str_format_internal::InvokeFlush(&bufsink, "237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World237xx");
}
{
std::fill(std::begin(buf), std::end(buf), 'x');
str_format_internal::BufferRawSink bufsink(buf, sizeof(buf) - 1);
str_format_internal::InvokeFlush(&bufsink, "Hello World");
str_format_internal::InvokeFlush(&bufsink, "237237");
EXPECT_EQ(std::string(buf, sizeof(buf)), "Hello World2372x");
}
}
} // namespace
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,336 @@
#include "absl/strings/internal/str_format/parser.h"
#include <assert.h>
#include <string.h>
#include <wchar.h>
#include <cctype>
#include <cstdint>
#include <algorithm>
#include <initializer_list>
#include <limits>
#include <ostream>
#include <string>
#include <unordered_set>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
using CC = FormatConversionCharInternal;
using LM = LengthMod;
ABSL_CONST_INIT const ConvTag kTags[256] = {
{}, {}, {}, {}, {}, {}, {}, {}, // 00-07
{}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
{}, {}, {}, {}, {}, {}, {}, {}, // 10-17
{}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
{}, {}, {}, {}, {}, {}, {}, {}, // 20-27
{}, {}, {}, {}, {}, {}, {}, {}, // 28-2f
{}, {}, {}, {}, {}, {}, {}, {}, // 30-37
{}, {}, {}, {}, {}, {}, {}, {}, // 38-3f
{}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
{}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
{}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
{}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
{}, {}, {}, {}, {}, {}, {}, {}, // 80-87
{}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
{}, {}, {}, {}, {}, {}, {}, {}, // 90-97
{}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
{}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
{}, {}, {}, {}, {}, {}, {}, {}, // a8-af
{}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
{}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
{}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
{}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
{}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
{}, {}, {}, {}, {}, {}, {}, {}, // d8-df
{}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
{}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
{}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
{}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
};
namespace {
bool CheckFastPathSetting(const UnboundConversion& conv) {
bool should_be_basic = !conv.flags.left && //
!conv.flags.show_pos && //
!conv.flags.sign_col && //
!conv.flags.alt && //
!conv.flags.zero && //
(conv.width.value() == -1) &&
(conv.precision.value() == -1);
if (should_be_basic != conv.flags.basic) {
fprintf(stderr,
"basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
"width=%d precision=%d\n",
conv.flags.basic, conv.flags.left, conv.flags.show_pos,
conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
conv.width.value(), conv.precision.value());
}
return should_be_basic == conv.flags.basic;
}
template <bool is_positional>
const char *ConsumeConversion(const char *pos, const char *const end,
UnboundConversion *conv, int *next_arg) {
const char* const original_pos = pos;
char c;
// Read the next char into `c` and update `pos`. Returns false if there are
// no more chars to read.
#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
do { \
if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
c = *pos++; \
} while (0)
const auto parse_digits = [&] {
int digits = c - '0';
// We do not want to overflow `digits` so we consume at most digits10
// digits. If there are more digits the parsing will fail later on when the
// digit doesn't match the expected characters.
int num_digits = std::numeric_limits<int>::digits10;
for (;;) {
if (ABSL_PREDICT_FALSE(pos == end)) break;
c = *pos++;
if (!std::isdigit(c)) break;
--num_digits;
if (ABSL_PREDICT_FALSE(!num_digits)) break;
digits = 10 * digits + c - '0';
}
return digits;
};
if (is_positional) {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->arg_position = parse_digits();
assert(conv->arg_position > 0);
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
// We should start with the basic flag on.
assert(conv->flags.basic);
// Any non alpha character makes this conversion not basic.
// This includes flags (-+ #0), width (1-9, *) or precision (.).
// All conversion characters and length modifiers are alpha characters.
if (c < 'A') {
conv->flags.basic = false;
for (; c <= '0';) {
// FIXME: We might be able to speed this up reusing the lookup table from
// above. It might require changing Flags to be a plain integer where we
// can |= a value.
switch (c) {
case '-':
conv->flags.left = true;
break;
case '+':
conv->flags.show_pos = true;
break;
case ' ':
conv->flags.sign_col = true;
break;
case '#':
conv->flags.alt = true;
break;
case '0':
conv->flags.zero = true;
break;
default:
goto flags_done;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
}
flags_done:
if (c <= '9') {
if (c >= '0') {
int maybe_width = parse_digits();
if (!is_positional && c == '$') {
if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
// Positional conversion.
*next_arg = -1;
conv->flags = Flags();
conv->flags.basic = true;
return ConsumeConversion<true>(original_pos, end, conv, next_arg);
}
conv->width.set_value(maybe_width);
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->width.set_from_arg(parse_digits());
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->width.set_from_arg(++*next_arg);
}
}
}
if (c == '.') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (std::isdigit(c)) {
conv->precision.set_value(parse_digits());
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->precision.set_from_arg(parse_digits());
if (c != '$') return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->precision.set_from_arg(++*next_arg);
}
} else {
conv->precision.set_value(0);
}
}
}
auto tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
// It is a length modifier.
using str_format_internal::LengthMod;
LengthMod length_mod = tag.as_length();
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (c == 'h' && length_mod == LengthMod::h) {
conv->length_mod = LengthMod::hh;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else if (c == 'l' && length_mod == LengthMod::l) {
conv->length_mod = LengthMod::ll;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->length_mod = length_mod;
}
tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
}
assert(CheckFastPathSetting(*conv));
(void)(&CheckFastPathSetting);
conv->conv = tag.as_conv();
if (!is_positional) conv->arg_position = ++*next_arg;
return pos;
}
} // namespace
std::string LengthModToString(LengthMod v) {
switch (v) {
case LengthMod::h:
return "h";
case LengthMod::hh:
return "hh";
case LengthMod::l:
return "l";
case LengthMod::ll:
return "ll";
case LengthMod::L:
return "L";
case LengthMod::j:
return "j";
case LengthMod::z:
return "z";
case LengthMod::t:
return "t";
case LengthMod::q:
return "q";
case LengthMod::none:
return "";
}
return "";
}
const char *ConsumeUnboundConversion(const char *p, const char *end,
UnboundConversion *conv, int *next_arg) {
if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
return ConsumeConversion<false>(p, end, conv, next_arg);
}
struct ParsedFormatBase::ParsedFormatConsumer {
explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
: parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
bool Append(string_view s) {
if (s.empty()) return true;
size_t text_end = AppendText(s);
if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
// Let's extend the existing text run.
parsed->items_.back().text_end = text_end;
} else {
// Let's make a new text run.
parsed->items_.push_back({false, text_end, {}});
}
return true;
}
bool ConvertOne(const UnboundConversion &conv, string_view s) {
size_t text_end = AppendText(s);
parsed->items_.push_back({true, text_end, conv});
return true;
}
size_t AppendText(string_view s) {
memcpy(data_pos, s.data(), s.size());
data_pos += s.size();
return static_cast<size_t>(data_pos - parsed->data_.get());
}
ParsedFormatBase *parsed;
char* data_pos;
};
ParsedFormatBase::ParsedFormatBase(
string_view format, bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs)
: data_(format.empty() ? nullptr : new char[format.size()]) {
has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
!MatchesConversions(allow_ignored, convs);
}
bool ParsedFormatBase::MatchesConversions(
bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs) const {
std::unordered_set<int> used;
auto add_if_valid_conv = [&](int pos, char c) {
if (static_cast<size_t>(pos) > convs.size() ||
!Contains(convs.begin()[pos - 1], c))
return false;
used.insert(pos);
return true;
};
for (const ConversionItem &item : items_) {
if (!item.is_conversion) continue;
auto &conv = item.conv;
if (conv.precision.is_from_arg() &&
!add_if_valid_conv(conv.precision.get_from_arg(), '*'))
return false;
if (conv.width.is_from_arg() &&
!add_if_valid_conv(conv.width.get_from_arg(), '*'))
return false;
if (!add_if_valid_conv(conv.arg_position,
FormatConversionCharToChar(conv.conv)))
return false;
}
return used.size() == convs.size() || allow_ignored;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,335 @@
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <cassert>
#include <cstdint>
#include <initializer_list>
#include <iosfwd>
#include <iterator>
#include <memory>
#include <string>
#include <vector>
#include "absl/strings/internal/str_format/checker.h"
#include "absl/strings/internal/str_format/extension.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
std::string LengthModToString(LengthMod v);
// The analyzed properties of a single specified conversion.
struct UnboundConversion {
UnboundConversion()
: flags() /* This is required to zero all the fields of flags. */ {
flags.basic = true;
}
class InputValue {
public:
void set_value(int value) {
assert(value >= 0);
value_ = value;
}
int value() const { return value_; }
// Marks the value as "from arg". aka the '*' format.
// Requires `value >= 1`.
// When set, is_from_arg() return true and get_from_arg() returns the
// original value.
// `value()`'s return value is unspecfied in this state.
void set_from_arg(int value) {
assert(value > 0);
value_ = -value - 1;
}
bool is_from_arg() const { return value_ < -1; }
int get_from_arg() const {
assert(is_from_arg());
return -value_ - 1;
}
private:
int value_ = -1;
};
// No need to initialize. It will always be set in the parser.
int arg_position;
InputValue width;
InputValue precision;
Flags flags;
LengthMod length_mod = LengthMod::none;
FormatConversionChar conv = FormatConversionCharInternal::kNone;
};
// Consume conversion spec prefix (not including '%') of [p, end) if valid.
// Examples of valid specs would be e.g.: "s", "d", "-12.6f".
// If valid, it returns the first character following the conversion spec,
// and the spec part is broken down and returned in 'conv'.
// If invalid, returns nullptr.
const char* ConsumeUnboundConversion(const char* p, const char* end,
UnboundConversion* conv, int* next_arg);
// Helper tag class for the table below.
// It allows fast `char -> ConversionChar/LengthMod` checking and
// conversions.
class ConvTag {
public:
constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
: tag_(static_cast<int8_t>(conversion_char)) {}
// We invert the length modifiers to make them negative so that we can easily
// test for them.
constexpr ConvTag(LengthMod length_mod) // NOLINT
: tag_(~static_cast<std::int8_t>(length_mod)) {}
// Everything else is -128, which is negative to make is_conv() simpler.
constexpr ConvTag() : tag_(-128) {}
bool is_conv() const { return tag_ >= 0; }
bool is_length() const { return tag_ < 0 && tag_ != -128; }
FormatConversionChar as_conv() const {
assert(is_conv());
return static_cast<FormatConversionChar>(tag_);
}
LengthMod as_length() const {
assert(is_length());
return static_cast<LengthMod>(~tag_);
}
private:
std::int8_t tag_;
};
extern const ConvTag kTags[256];
// Keep a single table for all the conversion chars and length modifiers.
inline ConvTag GetTagForChar(char c) {
return kTags[static_cast<unsigned char>(c)];
}
// Parse the format string provided in 'src' and pass the identified items into
// 'consumer'.
// Text runs will be passed by calling
// Consumer::Append(string_view);
// ConversionItems will be passed by calling
// Consumer::ConvertOne(UnboundConversion, string_view);
// In the case of ConvertOne, the string_view that is passed is the
// portion of the format string corresponding to the conversion, not including
// the leading %. On success, it returns true. On failure, it stops and returns
// false.
template <typename Consumer>
bool ParseFormatString(string_view src, Consumer consumer) {
int next_arg = 0;
const char* p = src.data();
const char* const end = p + src.size();
while (p != end) {
const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
if (!percent) {
// We found the last substring.
return consumer.Append(string_view(p, end - p));
}
// We found a percent, so push the text run then process the percent.
if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
return false;
}
if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
auto tag = GetTagForChar(percent[1]);
if (tag.is_conv()) {
if (ABSL_PREDICT_FALSE(next_arg < 0)) {
// This indicates an error in the format string.
// The only way to get `next_arg < 0` here is to have a positional
// argument first which sets next_arg to -1 and then a non-positional
// argument.
return false;
}
p = percent + 2;
// Keep this case separate from the one below.
// ConvertOne is more efficient when the compiler can see that the `basic`
// flag is set.
UnboundConversion conv;
conv.conv = tag.as_conv();
conv.arg_position = ++next_arg;
if (ABSL_PREDICT_FALSE(
!consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
return false;
}
} else if (percent[1] != '%') {
UnboundConversion conv;
p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
conv, string_view(percent + 1, p - (percent + 1))))) {
return false;
}
} else {
if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
p = percent + 2;
continue;
}
}
return true;
}
// Always returns true, or fails to compile in a constexpr context if s does not
// point to a constexpr char array.
constexpr bool EnsureConstexpr(string_view s) {
return s.empty() || s[0] == s[0];
}
class ParsedFormatBase {
public:
explicit ParsedFormatBase(
string_view format, bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs);
ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
ParsedFormatBase& operator=(const ParsedFormatBase& other) {
if (this == &other) return *this;
has_error_ = other.has_error_;
items_ = other.items_;
size_t text_size = items_.empty() ? 0 : items_.back().text_end;
data_.reset(new char[text_size]);
memcpy(data_.get(), other.data_.get(), text_size);
return *this;
}
ParsedFormatBase& operator=(ParsedFormatBase&& other) {
if (this == &other) return *this;
has_error_ = other.has_error_;
data_ = std::move(other.data_);
items_ = std::move(other.items_);
// Reset the vector to make sure the invariants hold.
other.items_.clear();
return *this;
}
template <typename Consumer>
bool ProcessFormat(Consumer consumer) const {
const char* const base = data_.get();
string_view text(base, 0);
for (const auto& item : items_) {
const char* const end = text.data() + text.size();
text = string_view(end, (base + item.text_end) - end);
if (item.is_conversion) {
if (!consumer.ConvertOne(item.conv, text)) return false;
} else {
if (!consumer.Append(text)) return false;
}
}
return !has_error_;
}
bool has_error() const { return has_error_; }
private:
// Returns whether the conversions match and if !allow_ignored it verifies
// that all conversions are used by the format.
bool MatchesConversions(
bool allow_ignored,
std::initializer_list<FormatConversionCharSet> convs) const;
struct ParsedFormatConsumer;
struct ConversionItem {
bool is_conversion;
// Points to the past-the-end location of this element in the data_ array.
size_t text_end;
UnboundConversion conv;
};
bool has_error_;
std::unique_ptr<char[]> data_;
std::vector<ConversionItem> items_;
};
// A value type representing a preparsed format. These can be created, copied
// around, and reused to speed up formatting loops.
// The user must specify through the template arguments the conversion
// characters used in the format. This will be checked at compile time.
//
// This class uses Conv enum values to specify each argument.
// This allows for more flexibility as you can specify multiple possible
// conversion characters for each argument.
// ParsedFormat<char...> is a simplified alias for when the user only
// needs to specify a single conversion character for each argument.
//
// Example:
// // Extended format supports multiple characters per argument:
// using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
// MyFormat GetFormat(bool use_hex) {
// if (use_hex) return MyFormat("foo %x bar");
// return MyFormat("foo %d bar");
// }
// // 'format' can be used with any value that supports 'd' and 'x',
// // like `int`.
// auto format = GetFormat(use_hex);
// value = StringF(format, i);
//
// This class also supports runtime format checking with the ::New() and
// ::NewAllowIgnored() factory functions.
// This is the only API that allows the user to pass a runtime specified format
// string. These factory functions will return NULL if the format does not match
// the conversions requested by the user.
template <FormatConversionCharSet... C>
class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
public:
explicit ExtendedParsedFormat(string_view format)
#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
__attribute__((
enable_if(str_format_internal::EnsureConstexpr(format),
"Format string is not constexpr."),
enable_if(str_format_internal::ValidFormatImpl<C...>(format),
"Format specified does not match the template arguments.")))
#endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
: ExtendedParsedFormat(format, false) {
}
// ExtendedParsedFormat factory function.
// The user still has to specify the conversion characters, but they will not
// be checked at compile time. Instead, it will be checked at runtime.
// This delays the checking to runtime, but allows the user to pass
// dynamically sourced formats.
// It returns NULL if the format does not match the conversion characters.
// The user is responsible for checking the return value before using it.
//
// The 'New' variant will check that all the specified arguments are being
// consumed by the format and return NULL if any argument is being ignored.
// The 'NewAllowIgnored' variant will not verify this and will allow formats
// that ignore arguments.
static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
return New(format, false);
}
static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
string_view format) {
return New(format, true);
}
private:
static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
bool allow_ignored) {
std::unique_ptr<ExtendedParsedFormat> conv(
new ExtendedParsedFormat(format, allow_ignored));
if (conv->has_error()) return nullptr;
return conv;
}
ExtendedParsedFormat(string_view s, bool allow_ignored)
: ParsedFormatBase(s, allow_ignored, {C...}) {}
};
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_

View file

@ -0,0 +1,413 @@
#include "absl/strings/internal/str_format/parser.h"
#include <string.h>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/macros.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
using testing::Pair;
TEST(LengthModTest, Names) {
struct Expectation {
int line;
LengthMod mod;
const char *name;
};
const Expectation kExpect[] = {
{__LINE__, LengthMod::none, "" },
{__LINE__, LengthMod::h, "h" },
{__LINE__, LengthMod::hh, "hh"},
{__LINE__, LengthMod::l, "l" },
{__LINE__, LengthMod::ll, "ll"},
{__LINE__, LengthMod::L, "L" },
{__LINE__, LengthMod::j, "j" },
{__LINE__, LengthMod::z, "z" },
{__LINE__, LengthMod::t, "t" },
{__LINE__, LengthMod::q, "q" },
};
EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), 10);
for (auto e : kExpect) {
SCOPED_TRACE(e.line);
EXPECT_EQ(e.name, LengthModToString(e.mod));
}
}
TEST(ConversionCharTest, Names) {
struct Expectation {
FormatConversionChar id;
char name;
};
// clang-format off
const Expectation kExpect[] = {
#define X(c) {FormatConversionCharInternal::c, #c[0]}
X(c), X(s), // text
X(d), X(i), X(o), X(u), X(x), X(X), // int
X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float
X(n), X(p), // misc
#undef X
{FormatConversionCharInternal::kNone, '\0'},
};
// clang-format on
for (auto e : kExpect) {
SCOPED_TRACE(e.name);
FormatConversionChar v = e.id;
EXPECT_EQ(e.name, FormatConversionCharToChar(v));
}
}
class ConsumeUnboundConversionTest : public ::testing::Test {
public:
std::pair<string_view, string_view> Consume(string_view src) {
int next = 0;
o = UnboundConversion(); // refresh
const char* p = ConsumeUnboundConversion(
src.data(), src.data() + src.size(), &o, &next);
if (!p) return {{}, src};
return {string_view(src.data(), p - src.data()),
string_view(p, src.data() + src.size() - p)};
}
bool Run(const char *fmt, bool force_positional = false) {
int next = force_positional ? -1 : 0;
o = UnboundConversion(); // refresh
return ConsumeUnboundConversion(fmt, fmt + strlen(fmt), &o, &next) ==
fmt + strlen(fmt);
}
UnboundConversion o;
};
TEST_F(ConsumeUnboundConversionTest, ConsumeSpecification) {
struct Expectation {
int line;
string_view src;
string_view out;
string_view src_post;
};
const Expectation kExpect[] = {
{__LINE__, "", "", "" },
{__LINE__, "b", "", "b" }, // 'b' is invalid
{__LINE__, "ba", "", "ba"}, // 'b' is invalid
{__LINE__, "l", "", "l" }, // just length mod isn't okay
{__LINE__, "d", "d", "" }, // basic
{__LINE__, "d ", "d", " " }, // leave suffix
{__LINE__, "dd", "d", "d" }, // don't be greedy
{__LINE__, "d9", "d", "9" }, // leave non-space suffix
{__LINE__, "dzz", "d", "zz"}, // length mod as suffix
{__LINE__, "1$*2$d", "1$*2$d", "" }, // arg indexing and * allowed.
{__LINE__, "0-14.3hhd", "0-14.3hhd", ""}, // precision, width
{__LINE__, " 0-+#14.3hhd", " 0-+#14.3hhd", ""}, // flags
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.line);
EXPECT_THAT(Consume(e.src), Pair(e.out, e.src_post));
}
}
TEST_F(ConsumeUnboundConversionTest, BasicConversion) {
EXPECT_FALSE(Run(""));
EXPECT_FALSE(Run("z"));
EXPECT_FALSE(Run("dd")); // no excess allowed
EXPECT_TRUE(Run("d"));
EXPECT_EQ('d', FormatConversionCharToChar(o.conv));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_EQ(1, o.arg_position);
}
TEST_F(ConsumeUnboundConversionTest, ArgPosition) {
EXPECT_TRUE(Run("d"));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("3$d"));
EXPECT_EQ(3, o.arg_position);
EXPECT_TRUE(Run("1$d"));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("1$d", true));
EXPECT_EQ(1, o.arg_position);
EXPECT_TRUE(Run("123$d"));
EXPECT_EQ(123, o.arg_position);
EXPECT_TRUE(Run("123$d", true));
EXPECT_EQ(123, o.arg_position);
EXPECT_TRUE(Run("10$d"));
EXPECT_EQ(10, o.arg_position);
EXPECT_TRUE(Run("10$d", true));
EXPECT_EQ(10, o.arg_position);
// Position can't be zero.
EXPECT_FALSE(Run("0$d"));
EXPECT_FALSE(Run("0$d", true));
EXPECT_FALSE(Run("1$*0$d"));
EXPECT_FALSE(Run("1$.*0$d"));
// Position can't start with a zero digit at all. That is not a 'decimal'.
EXPECT_FALSE(Run("01$p"));
EXPECT_FALSE(Run("01$p", true));
EXPECT_FALSE(Run("1$*01$p"));
EXPECT_FALSE(Run("1$.*01$p"));
}
TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) {
EXPECT_TRUE(Run("14d"));
EXPECT_EQ('d', FormatConversionCharToChar(o.conv));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_TRUE(Run("14.d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run(".d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run(".5d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(5, o.precision.value());
EXPECT_TRUE(Run(".0d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(0, o.precision.value());
EXPECT_TRUE(Run("14.5d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(14, o.width.value());
EXPECT_EQ(5, o.precision.value());
EXPECT_TRUE(Run("*.*d"));
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(1, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(2, o.precision.get_from_arg());
EXPECT_EQ(3, o.arg_position);
EXPECT_TRUE(Run("*d"));
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(1, o.width.get_from_arg());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_LT(o.precision.value(), 0);
EXPECT_EQ(2, o.arg_position);
EXPECT_TRUE(Run(".*d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_LT(o.width.value(), 0);
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(1, o.precision.get_from_arg());
EXPECT_EQ(2, o.arg_position);
// mixed implicit and explicit: didn't specify arg position.
EXPECT_FALSE(Run("*23$.*34$d"));
EXPECT_TRUE(Run("12$*23$.*34$d"));
EXPECT_EQ(12, o.arg_position);
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(23, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(34, o.precision.get_from_arg());
EXPECT_TRUE(Run("2$*5$.*9$d"));
EXPECT_EQ(2, o.arg_position);
EXPECT_TRUE(o.width.is_from_arg());
EXPECT_EQ(5, o.width.get_from_arg());
EXPECT_TRUE(o.precision.is_from_arg());
EXPECT_EQ(9, o.precision.get_from_arg());
EXPECT_FALSE(Run(".*0$d")) << "no arg 0";
// Large values
EXPECT_TRUE(Run("999999999.999999999d"));
EXPECT_FALSE(o.width.is_from_arg());
EXPECT_EQ(999999999, o.width.value());
EXPECT_FALSE(o.precision.is_from_arg());
EXPECT_EQ(999999999, o.precision.value());
EXPECT_FALSE(Run("1000000000.999999999d"));
EXPECT_FALSE(Run("999999999.1000000000d"));
EXPECT_FALSE(Run("9999999999d"));
EXPECT_FALSE(Run(".9999999999d"));
}
TEST_F(ConsumeUnboundConversionTest, Flags) {
static const char kAllFlags[] = "-+ #0";
static const int kNumFlags = ABSL_ARRAYSIZE(kAllFlags) - 1;
for (int rev = 0; rev < 2; ++rev) {
for (int i = 0; i < 1 << kNumFlags; ++i) {
std::string fmt;
for (int k = 0; k < kNumFlags; ++k)
if ((i >> k) & 1) fmt += kAllFlags[k];
// flag order shouldn't matter
if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); }
fmt += 'd';
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt.c_str()));
EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left);
EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos);
EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col);
EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt);
EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero);
}
}
}
TEST_F(ConsumeUnboundConversionTest, BasicFlag) {
// Flag is on
for (const char* fmt : {"d", "llx", "G", "1$X"}) {
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt));
EXPECT_TRUE(o.flags.basic);
}
// Flag is off
for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) {
SCOPED_TRACE(fmt);
EXPECT_TRUE(Run(fmt));
EXPECT_FALSE(o.flags.basic);
}
}
TEST_F(ConsumeUnboundConversionTest, LengthMod) {
EXPECT_TRUE(Run("d"));
EXPECT_EQ(LengthMod::none, o.length_mod);
EXPECT_TRUE(Run("hd"));
EXPECT_EQ(LengthMod::h, o.length_mod);
EXPECT_TRUE(Run("hhd"));
EXPECT_EQ(LengthMod::hh, o.length_mod);
EXPECT_TRUE(Run("ld"));
EXPECT_EQ(LengthMod::l, o.length_mod);
EXPECT_TRUE(Run("lld"));
EXPECT_EQ(LengthMod::ll, o.length_mod);
EXPECT_TRUE(Run("Lf"));
EXPECT_EQ(LengthMod::L, o.length_mod);
EXPECT_TRUE(Run("qf"));
EXPECT_EQ(LengthMod::q, o.length_mod);
EXPECT_TRUE(Run("jd"));
EXPECT_EQ(LengthMod::j, o.length_mod);
EXPECT_TRUE(Run("zd"));
EXPECT_EQ(LengthMod::z, o.length_mod);
EXPECT_TRUE(Run("td"));
EXPECT_EQ(LengthMod::t, o.length_mod);
}
struct SummarizeConsumer {
std::string* out;
explicit SummarizeConsumer(std::string* out) : out(out) {}
bool Append(string_view s) {
*out += "[" + std::string(s) + "]";
return true;
}
bool ConvertOne(const UnboundConversion& conv, string_view s) {
*out += "{";
*out += std::string(s);
*out += ":";
*out += std::to_string(conv.arg_position) + "$";
if (conv.width.is_from_arg()) {
*out += std::to_string(conv.width.get_from_arg()) + "$*";
}
if (conv.precision.is_from_arg()) {
*out += "." + std::to_string(conv.precision.get_from_arg()) + "$*";
}
*out += FormatConversionCharToChar(conv.conv);
*out += "}";
return true;
}
};
std::string SummarizeParsedFormat(const ParsedFormatBase& pc) {
std::string out;
if (!pc.ProcessFormat(SummarizeConsumer(&out))) out += "!";
return out;
}
class ParsedFormatTest : public testing::Test {};
TEST_F(ParsedFormatTest, ValueSemantics) {
ParsedFormatBase p1({}, true, {}); // empty format
EXPECT_EQ("", SummarizeParsedFormat(p1));
ParsedFormatBase p2 = p1; // copy construct (empty)
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2));
p1 = ParsedFormatBase("hello%s", true,
{FormatConversionCharSetInternal::s}); // move assign
EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1));
ParsedFormatBase p3 = p1; // copy construct (nonempty)
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p3));
using std::swap;
swap(p1, p2);
EXPECT_EQ("", SummarizeParsedFormat(p1));
EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p2));
swap(p1, p2); // undo
p2 = p1; // copy assign
EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2));
}
struct ExpectParse {
const char* in;
std::initializer_list<FormatConversionCharSet> conv_set;
const char* out;
};
TEST_F(ParsedFormatTest, Parsing) {
// Parse should be equivalent to that obtained by ConversionParseIterator.
// No need to retest the parsing edge cases here.
const ExpectParse kExpect[] = {
{"", {}, ""},
{"ab", {}, "[ab]"},
{"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"},
{"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"},
{"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"},
{"a%b %d", {}, "[a]!"}, // stop after error
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.in);
EXPECT_EQ(e.out,
SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set)));
}
}
TEST_F(ParsedFormatTest, ParsingFlagOrder) {
const ExpectParse kExpect[] = {
{"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"},
{"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"},
{"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"},
{"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"},
{"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"},
{"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"},
{"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"},
};
for (const auto& e : kExpect) {
SCOPED_TRACE(e.in);
EXPECT_EQ(e.out,
SummarizeParsedFormat(ParsedFormatBase(e.in, false, e.conv_set)));
}
}
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,314 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Join API that are inlined/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in this file are:
//
// - A handful of default Formatters
// - JoinAlgorithm() overloads
// - JoinRange() overloads
// - JoinTuple()
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_join.h
//
// IWYU pragma: private, include "absl/strings/str_join.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
#include <cstring>
#include <iterator>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/strings/internal/ostringstream.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/str_cat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
//
// Formatter objects
//
// The following are implementation classes for standard Formatter objects. The
// factory functions that users will call to create and use these formatters are
// defined and documented in strings/join.h.
//
// The default formatter. Converts alpha-numeric types to strings.
struct AlphaNumFormatterImpl {
// This template is needed in order to support passing in a dereferenced
// vector<bool>::iterator
template <typename T>
void operator()(std::string* out, const T& t) const {
StrAppend(out, AlphaNum(t));
}
void operator()(std::string* out, const AlphaNum& t) const {
StrAppend(out, t);
}
};
// A type that's used to overload the JoinAlgorithm() function (defined below)
// for ranges that do not require additional formatting (e.g., a range of
// strings).
struct NoFormatter : public AlphaNumFormatterImpl {};
// Formats types to strings using the << operator.
class StreamFormatterImpl {
public:
// The method isn't const because it mutates state. Making it const will
// render StreamFormatterImpl thread-hostile.
template <typename T>
void operator()(std::string* out, const T& t) {
// The stream is created lazily to avoid paying the relatively high cost
// of its construction when joining an empty range.
if (strm_) {
strm_->clear(); // clear the bad, fail and eof bits in case they were set
strm_->str(out);
} else {
strm_.reset(new strings_internal::OStringStream(out));
}
*strm_ << t;
}
private:
std::unique_ptr<strings_internal::OStringStream> strm_;
};
// Formats a std::pair<>. The 'first' member is formatted using f1_ and the
// 'second' member is formatted using f2_. sep_ is the separator.
template <typename F1, typename F2>
class PairFormatterImpl {
public:
PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2)
: f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
template <typename T>
void operator()(std::string* out, const T& p) {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
template <typename T>
void operator()(std::string* out, const T& p) const {
f1_(out, p.first);
out->append(sep_);
f2_(out, p.second);
}
private:
F1 f1_;
std::string sep_;
F2 f2_;
};
// Wraps another formatter and dereferences the argument to operator() then
// passes the dereferenced argument to the wrapped formatter. This can be
// useful, for example, to join a std::vector<int*>.
template <typename Formatter>
class DereferenceFormatterImpl {
public:
DereferenceFormatterImpl() : f_() {}
explicit DereferenceFormatterImpl(Formatter&& f)
: f_(std::forward<Formatter>(f)) {}
template <typename T>
void operator()(std::string* out, const T& t) {
f_(out, *t);
}
template <typename T>
void operator()(std::string* out, const T& t) const {
f_(out, *t);
}
private:
Formatter f_;
};
// DefaultFormatter<T> is a traits class that selects a default Formatter to use
// for the given type T. The ::Type member names the Formatter to use. This is
// used by the strings::Join() functions that do NOT take a Formatter argument,
// in which case a default Formatter must be chosen.
//
// AlphaNumFormatterImpl is the default in the base template, followed by
// specializations for other types.
template <typename ValueType>
struct DefaultFormatter {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<const char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<char*> {
typedef AlphaNumFormatterImpl Type;
};
template <>
struct DefaultFormatter<std::string> {
typedef NoFormatter Type;
};
template <>
struct DefaultFormatter<absl::string_view> {
typedef NoFormatter Type;
};
template <typename ValueType>
struct DefaultFormatter<ValueType*> {
typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
Type;
};
template <typename ValueType>
struct DefaultFormatter<std::unique_ptr<ValueType>>
: public DefaultFormatter<ValueType*> {};
//
// JoinAlgorithm() functions
//
// The main joining algorithm. This simply joins the elements in the given
// iterator range, each separated by the given separator, into an output string,
// and formats each element using the provided Formatter object.
template <typename Iterator, typename Formatter>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
Formatter&& f) {
std::string result;
absl::string_view sep("");
for (Iterator it = start; it != end; ++it) {
result.append(sep.data(), sep.size());
f(&result, *it);
sep = s;
}
return result;
}
// A joining algorithm that's optimized for a forward iterator range of
// string-like objects that do not need any additional formatting. This is to
// optimize the common case of joining, say, a std::vector<string> or a
// std::vector<absl::string_view>.
//
// This is an overload of the previous JoinAlgorithm() function. Here the
// Formatter argument is of type NoFormatter. Since NoFormatter is an internal
// type, this overload is only invoked when strings::Join() is called with a
// range of string-like objects (e.g., std::string, absl::string_view), and an
// explicit Formatter argument was NOT specified.
//
// The optimization is that the needed space will be reserved in the output
// string to avoid the need to resize while appending. To do this, the iterator
// range will be traversed twice: once to calculate the total needed size, and
// then again to copy the elements and delimiters to the output string.
template <typename Iterator,
typename = typename std::enable_if<std::is_convertible<
typename std::iterator_traits<Iterator>::iterator_category,
std::forward_iterator_tag>::value>::type>
std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
NoFormatter) {
std::string result;
if (start != end) {
// Sums size
size_t result_size = start->size();
for (Iterator it = start; ++it != end;) {
result_size += s.size();
result_size += it->size();
}
if (result_size > 0) {
STLStringResizeUninitialized(&result, result_size);
// Joins strings
char* result_buf = &*result.begin();
memcpy(result_buf, start->data(), start->size());
result_buf += start->size();
for (Iterator it = start; ++it != end;) {
memcpy(result_buf, s.data(), s.size());
result_buf += s.size();
memcpy(result_buf, it->data(), it->size());
result_buf += it->size();
}
}
}
return result;
}
// JoinTupleLoop implements a loop over the elements of a std::tuple, which
// are heterogeneous. The primary template matches the tuple interior case. It
// continues the iteration after appending a separator (for nonzero indices)
// and formatting an element of the tuple. The specialization for the I=N case
// matches the end-of-tuple, and terminates the iteration.
template <size_t I, size_t N>
struct JoinTupleLoop {
template <typename Tup, typename Formatter>
void operator()(std::string* out, const Tup& tup, absl::string_view sep,
Formatter&& fmt) {
if (I > 0) out->append(sep.data(), sep.size());
fmt(out, std::get<I>(tup));
JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
}
};
template <size_t N>
struct JoinTupleLoop<N, N> {
template <typename Tup, typename Formatter>
void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {}
};
template <typename... T, typename Formatter>
std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep,
Formatter&& fmt) {
std::string result;
JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
return result;
}
template <typename Iterator>
std::string JoinRange(Iterator first, Iterator last,
absl::string_view separator) {
// No formatter was explicitly given, so a default must be chosen.
typedef typename std::iterator_traits<Iterator>::value_type ValueType;
typedef typename DefaultFormatter<ValueType>::Type Formatter;
return JoinAlgorithm(first, last, separator, Formatter());
}
template <typename Range, typename Formatter>
std::string JoinRange(const Range& range, absl::string_view separator,
Formatter&& fmt) {
using std::begin;
using std::end;
return JoinAlgorithm(begin(range), end(range), separator, fmt);
}
template <typename Range>
std::string JoinRange(const Range& range, absl::string_view separator) {
using std::begin;
using std::end;
return JoinRange(begin(range), end(range), separator);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_

View file

@ -0,0 +1,455 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file declares INTERNAL parts of the Split API that are inline/templated
// or otherwise need to be available at compile time. The main abstractions
// defined in here are
//
// - ConvertibleToStringView
// - SplitIterator<>
// - Splitter<>
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
#include <array>
#include <initializer_list>
#include <iterator>
#include <map>
#include <type_traits>
#include <utility>
#include <vector>
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
#ifdef _GLIBCXX_DEBUG
#include "absl/strings/internal/stl_type_traits.h"
#endif // _GLIBCXX_DEBUG
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// This class is implicitly constructible from everything that absl::string_view
// is implicitly constructible from. If it's constructed from a temporary
// string, the data is moved into a data member so its lifetime matches that of
// the ConvertibleToStringView instance.
class ConvertibleToStringView {
public:
ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
: value_(s) {}
ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
: value_(s) {}
// Matches rvalue strings and moves their data to a member.
ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit)
: copy_(std::move(s)), value_(copy_) {}
ConvertibleToStringView(const ConvertibleToStringView& other)
: copy_(other.copy_),
value_(other.IsSelfReferential() ? copy_ : other.value_) {}
ConvertibleToStringView(ConvertibleToStringView&& other) {
StealMembers(std::move(other));
}
ConvertibleToStringView& operator=(ConvertibleToStringView other) {
StealMembers(std::move(other));
return *this;
}
absl::string_view value() const { return value_; }
private:
// Returns true if ctsp's value refers to its internal copy_ member.
bool IsSelfReferential() const { return value_.data() == copy_.data(); }
void StealMembers(ConvertibleToStringView&& other) {
if (other.IsSelfReferential()) {
copy_ = std::move(other.copy_);
value_ = copy_;
other.value_ = other.copy_;
} else {
value_ = other.value_;
}
}
// Holds the data moved from temporary std::string arguments. Declared first
// so that 'value' can refer to 'copy_'.
std::string copy_;
absl::string_view value_;
};
// An iterator that enumerates the parts of a string from a Splitter. The text
// to be split, the Delimiter, and the Predicate are all taken from the given
// Splitter object. Iterators may only be compared if they refer to the same
// Splitter instance.
//
// This class is NOT part of the public splitting API.
template <typename Splitter>
class SplitIterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = absl::string_view;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = const value_type&;
enum State { kInitState, kLastState, kEndState };
SplitIterator(State state, const Splitter* splitter)
: pos_(0),
state_(state),
splitter_(splitter),
delimiter_(splitter->delimiter()),
predicate_(splitter->predicate()) {
// Hack to maintain backward compatibility. This one block makes it so an
// empty absl::string_view whose .data() happens to be nullptr behaves
// *differently* from an otherwise empty absl::string_view whose .data() is
// not nullptr. This is an undesirable difference in general, but this
// behavior is maintained to avoid breaking existing code that happens to
// depend on this old behavior/bug. Perhaps it will be fixed one day. The
// difference in behavior is as follows:
// Split(absl::string_view(""), '-'); // {""}
// Split(absl::string_view(), '-'); // {}
if (splitter_->text().data() == nullptr) {
state_ = kEndState;
pos_ = splitter_->text().size();
return;
}
if (state_ == kEndState) {
pos_ = splitter_->text().size();
} else {
++(*this);
}
}
bool at_end() const { return state_ == kEndState; }
reference operator*() const { return curr_; }
pointer operator->() const { return &curr_; }
SplitIterator& operator++() {
do {
if (state_ == kLastState) {
state_ = kEndState;
return *this;
}
const absl::string_view text = splitter_->text();
const absl::string_view d = delimiter_.Find(text, pos_);
if (d.data() == text.data() + text.size()) state_ = kLastState;
curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
pos_ += curr_.size() + d.size();
} while (!predicate_(curr_));
return *this;
}
SplitIterator operator++(int) {
SplitIterator old(*this);
++(*this);
return old;
}
friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
return a.state_ == b.state_ && a.pos_ == b.pos_;
}
friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
return !(a == b);
}
private:
size_t pos_;
State state_;
absl::string_view curr_;
const Splitter* splitter_;
typename Splitter::DelimiterType delimiter_;
typename Splitter::PredicateType predicate_;
};
// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
template <typename T, typename = void>
struct HasMappedType : std::false_type {};
template <typename T>
struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
: std::true_type {};
// HasValueType<T>::value is true iff there exists a type T::value_type.
template <typename T, typename = void>
struct HasValueType : std::false_type {};
template <typename T>
struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
};
// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
template <typename T, typename = void>
struct HasConstIterator : std::false_type {};
template <typename T>
struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
: std::true_type {};
// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
// details below in Splitter<> where this is used.
std::false_type IsInitializerListDispatch(...); // default: No
template <typename T>
std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
template <typename T>
struct IsInitializerList
: decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
// is true for type 'C'.
//
// Restricts conversion to container-like types (by testing for the presence of
// a const_iterator member type) and also to disable conversion to an
// std::initializer_list (which also has a const_iterator). Otherwise, code
// compiled in C++11 will get an error due to ambiguous conversion paths (in
// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
// or an std::initializer_list<T>).
template <typename C, bool has_value_type, bool has_mapped_type>
struct SplitterIsConvertibleToImpl : std::false_type {};
template <typename C>
struct SplitterIsConvertibleToImpl<C, true, false>
: std::is_constructible<typename C::value_type, absl::string_view> {};
template <typename C>
struct SplitterIsConvertibleToImpl<C, true, true>
: absl::conjunction<
std::is_constructible<typename C::key_type, absl::string_view>,
std::is_constructible<typename C::mapped_type, absl::string_view>> {};
template <typename C>
struct SplitterIsConvertibleTo
: SplitterIsConvertibleToImpl<
C,
#ifdef _GLIBCXX_DEBUG
!IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
#endif // _GLIBCXX_DEBUG
!IsInitializerList<
typename std::remove_reference<C>::type>::value &&
HasValueType<C>::value && HasConstIterator<C>::value,
HasMappedType<C>::value> {
};
// This class implements the range that is returned by absl::StrSplit(). This
// class has templated conversion operators that allow it to be implicitly
// converted to a variety of types that the caller may have specified on the
// left-hand side of an assignment.
//
// The main interface for interacting with this class is through its implicit
// conversion operators. However, this class may also be used like a container
// in that it has .begin() and .end() member functions. It may also be used
// within a range-for loop.
//
// Output containers can be collections of any type that is constructible from
// an absl::string_view.
//
// An Predicate functor may be supplied. This predicate will be used to filter
// the split strings: only strings for which the predicate returns true will be
// kept. A Predicate object is any unary functor that takes an absl::string_view
// and returns bool.
template <typename Delimiter, typename Predicate>
class Splitter {
public:
using DelimiterType = Delimiter;
using PredicateType = Predicate;
using const_iterator = strings_internal::SplitIterator<Splitter>;
using value_type = typename std::iterator_traits<const_iterator>::value_type;
Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
: text_(std::move(input_text)),
delimiter_(std::move(d)),
predicate_(std::move(p)) {}
absl::string_view text() const { return text_.value(); }
const Delimiter& delimiter() const { return delimiter_; }
const Predicate& predicate() const { return predicate_; }
// Range functions that iterate the split substrings as absl::string_view
// objects. These methods enable a Splitter to be used in a range-based for
// loop.
const_iterator begin() const { return {const_iterator::kInitState, this}; }
const_iterator end() const { return {const_iterator::kEndState, this}; }
// An implicit conversion operator that is restricted to only those containers
// that the splitter is convertible to.
template <typename Container,
typename = typename std::enable_if<
SplitterIsConvertibleTo<Container>::value>::type>
operator Container() const { // NOLINT(runtime/explicit)
return ConvertToContainer<Container, typename Container::value_type,
HasMappedType<Container>::value>()(*this);
}
// Returns a pair with its .first and .second members set to the first two
// strings returned by the begin() iterator. Either/both of .first and .second
// will be constructed with empty strings if the iterator doesn't have a
// corresponding value.
template <typename First, typename Second>
operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
absl::string_view first, second;
auto it = begin();
if (it != end()) {
first = *it;
if (++it != end()) {
second = *it;
}
}
return {First(first), Second(second)};
}
private:
// ConvertToContainer is a functor converting a Splitter to the requested
// Container of ValueType. It is specialized below to optimize splitting to
// certain combinations of Container and ValueType.
//
// This base template handles the generic case of storing the split results in
// the requested non-map-like container and converting the split substrings to
// the requested type.
template <typename Container, typename ValueType, bool is_map = false>
struct ConvertToContainer {
Container operator()(const Splitter& splitter) const {
Container c;
auto it = std::inserter(c, c.end());
for (const auto sp : splitter) {
*it++ = ValueType(sp);
}
return c;
}
};
// Partial specialization for a std::vector<absl::string_view>.
//
// Optimized for the common case of splitting to a
// std::vector<absl::string_view>. In this case we first split the results to
// a small array of absl::string_view on the stack, to reduce reallocations.
template <typename A>
struct ConvertToContainer<std::vector<absl::string_view, A>,
absl::string_view, false> {
std::vector<absl::string_view, A> operator()(
const Splitter& splitter) const {
struct raw_view {
const char* data;
size_t size;
operator absl::string_view() const { // NOLINT(runtime/explicit)
return {data, size};
}
};
std::vector<absl::string_view, A> v;
std::array<raw_view, 16> ar;
for (auto it = splitter.begin(); !it.at_end();) {
size_t index = 0;
do {
ar[index].data = it->data();
ar[index].size = it->size();
++it;
} while (++index != ar.size() && !it.at_end());
v.insert(v.end(), ar.begin(), ar.begin() + index);
}
return v;
}
};
// Partial specialization for a std::vector<std::string>.
//
// Optimized for the common case of splitting to a std::vector<std::string>.
// In this case we first split the results to a std::vector<absl::string_view>
// so the returned std::vector<std::string> can have space reserved to avoid
// std::string moves.
template <typename A>
struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
std::vector<std::string, A> operator()(const Splitter& splitter) const {
const std::vector<absl::string_view> v = splitter;
return std::vector<std::string, A>(v.begin(), v.end());
}
};
// Partial specialization for containers of pairs (e.g., maps).
//
// The algorithm is to insert a new pair into the map for each even-numbered
// item, with the even-numbered item as the key with a default-constructed
// value. Each odd-numbered item will then be assigned to the last pair's
// value.
template <typename Container, typename First, typename Second>
struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
Container operator()(const Splitter& splitter) const {
Container m;
typename Container::iterator it;
bool insert = true;
for (const auto sp : splitter) {
if (insert) {
it = Inserter<Container>::Insert(&m, First(sp), Second());
} else {
it->second = Second(sp);
}
insert = !insert;
}
return m;
}
// Inserts the key and value into the given map, returning an iterator to
// the inserted item. Specialized for std::map and std::multimap to use
// emplace() and adapt emplace()'s return value.
template <typename Map>
struct Inserter {
using M = Map;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
}
};
template <typename... Ts>
struct Inserter<std::map<Ts...>> {
using M = std::map<Ts...>;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
}
};
template <typename... Ts>
struct Inserter<std::multimap<Ts...>> {
using M = std::multimap<Ts...>;
template <typename... Args>
static typename M::iterator Insert(M* m, Args&&... args) {
return m->emplace(std::make_pair(std::forward<Args>(args)...));
}
};
};
ConvertibleToStringView text_;
Delimiter delimiter_;
Predicate predicate_;
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_

View file

@ -0,0 +1,53 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// UTF8 utilities, implemented to reduce dependencies.
#include "absl/strings/internal/utf8.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) {
if (utf8_char <= 0x7F) {
*buffer = static_cast<char>(utf8_char);
return 1;
} else if (utf8_char <= 0x7FF) {
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xC0 | utf8_char;
return 2;
} else if (utf8_char <= 0xFFFF) {
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xE0 | utf8_char;
return 3;
} else {
buffer[3] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[2] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[1] = 0x80 | (utf8_char & 0x3F);
utf8_char >>= 6;
buffer[0] = 0xF0 | utf8_char;
return 4;
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,50 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// UTF8 utilities, implemented to reduce dependencies.
#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_
#define ABSL_STRINGS_INTERNAL_UTF8_H_
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes
// out the UTF-8 encoding into buffer, and returns the number of chars
// it wrote.
//
// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings
// are:
// 00 - 7F : 0xxxxxxx
// 80 - 7FF : 110xxxxx 10xxxxxx
// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx
// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
//
// Values greater than 0x10FFFF are not supported and may or may not write
// characters into buffer, however never will more than kMaxEncodedUTF8Size
// bytes be written, regardless of the value of utf8_char.
enum { kMaxEncodedUTF8Size = 4 };
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_UTF8_H_

View file

@ -0,0 +1,66 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/utf8.h"
#include <cstdint>
#include <utility>
#include "gtest/gtest.h"
#include "absl/base/port.h"
namespace {
#if !defined(__cpp_char8_t)
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wc++2a-compat"
#endif
TEST(EncodeUTF8Char, BasicFunction) {
std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"},
{0x00A3, u8"\u00A3"},
{0x00010000, u8"\U00010000"},
{0x0000FFFF, u8"\U0000FFFF"},
{0x0010FFFD, u8"\U0010FFFD"}};
for (auto &test : tests) {
char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'};
char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'};
char *buf0_written =
&buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)];
char *buf1_written =
&buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)];
int apparent_length = 7;
while (buf0[apparent_length - 1] == '\x00' &&
buf1[apparent_length - 1] == '\xFF') {
if (--apparent_length == 0) break;
}
EXPECT_EQ(apparent_length, buf0_written - buf0);
EXPECT_EQ(apparent_length, buf1_written - buf1);
EXPECT_EQ(apparent_length, test.second.length());
EXPECT_EQ(std::string(buf0, apparent_length), test.second);
EXPECT_EQ(std::string(buf1, apparent_length), test.second);
}
char buf[32] = "Don't Tread On Me";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000),
absl::strings_internal::kMaxEncodedUTF8Size);
char buf2[32] = "Negative is invalid but sane";
EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1),
absl::strings_internal::kMaxEncodedUTF8Size);
}
#if defined(__clang__)
#pragma clang diagnostic pop
#endif
#endif // !defined(__cpp_char8_t)
} // namespace