tvl-depot/absl/strings/numbers.h
Abseil Team 1e39f8626a Export of internal Abseil changes
--
b842b7fd9b1289be31f0b7ee8e62e48e550747cf by Greg Falcon <gfalcon@google.com>:

Change the Cord str_format formatter to use iteration instead of CordReader.

When Cord is publicly released, CordReader is not going with it.

PiperOrigin-RevId: 284780736

--
28e76c08ea7185a7ff9f4e0e02ae565fbbf7980f by Greg Falcon <gfalcon@google.com>:

Implementation detail change.

Introduce ABSL_NAMESPACE_BEGIN and _END annotation macros which indicate the beginning and end of a `namespace absl` scope.  Currently these do nothing, but they will be used to inject an inline namespace for LTS builds (to avoid symbol collisions against other Abseil versions).

These macros should not be used by end users, because end users should never write `namespace absl {` in their own code.

This CL applies these annotations to all code under //absl/base/.  The rest of Abseil will be annotated in this way in follow-up CLs.

PiperOrigin-RevId: 284776410

--
e1711dc6d696dcca50d4e7d4b4d8f3076575b7ec by Abseil Team <absl-team@google.com>:

--help changed to report long flags.

PiperOrigin-RevId: 284757720

--
78f66a68f428bbbd19d8d60e1125f43ba765fd35 by Tom Manshreck <shreck@google.com>:

Update comment on + or - in SimpleAToi()

PiperOrigin-RevId: 284231843
GitOrigin-RevId: b842b7fd9b1289be31f0b7ee8e62e48e550747cf
Change-Id: I3046b31391bd11c8bc4abab7785a863c377cd757
2019-12-10 15:07:05 -05:00

259 lines
11 KiB
C++

//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: numbers.h
// -----------------------------------------------------------------------------
//
// This package contains functions for converting strings to numbers. For
// converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h,
// which automatically detect and convert most number values appropriately.
#ifndef ABSL_STRINGS_NUMBERS_H_
#define ABSL_STRINGS_NUMBERS_H_
#ifdef __SSE4_2__
#include <x86intrin.h>
#endif
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <limits>
#include <string>
#include <type_traits>
#include "absl/base/internal/bits.h"
#ifdef __SSE4_2__
// TODO(jorg): Remove this when we figure out the right way
// to swap bytes on SSE 4.2 that works with the compilers
// we claim to support. Also, add tests for the compiler
// that doesn't support the Intel _bswap64 intrinsic but
// does support all the SSE 4.2 intrinsics
#include "absl/base/internal/endian.h"
#endif
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
namespace absl {
// SimpleAtoi()
//
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into an integer value, returning `true` if successful. The string
// must reflect a base-10 integer whose value falls within the range of the
// integer type (optionally preceded by a `+` or `-`). If any errors are
// encountered, this function returns `false`, leaving `out` in an unspecified
// state.
template <typename int_type>
ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out);
// SimpleAtof()
//
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into a float, which may be rounded on overflow or underflow,
// returning `true` if successful.
// See https://en.cppreference.com/w/c/string/byte/strtof for details about the
// allowed formats for `str`, except SimpleAtof() is locale-independent and will
// always use the "C" locale. If any errors are encountered, this function
// returns `false`, leaving `out` in an unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out);
// SimpleAtod()
//
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into a double, which may be rounded on overflow or underflow,
// returning `true` if successful.
// See https://en.cppreference.com/w/c/string/byte/strtof for details about the
// allowed formats for `str`, except SimpleAtod is locale-independent and will
// always use the "C" locale. If any errors are encountered, this function
// returns `false`, leaving `out` in an unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out);
// SimpleAtob()
//
// Converts the given string into a boolean, returning `true` if successful.
// The following case-insensitive strings are interpreted as boolean `true`:
// "true", "t", "yes", "y", "1". The following case-insensitive strings
// are interpreted as boolean `false`: "false", "f", "no", "n", "0". If any
// errors are encountered, this function returns `false`, leaving `out` in an
// unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out);
} // namespace absl
// End of public API. Implementation details follow.
namespace absl {
namespace numbers_internal {
// Digit conversion.
extern const char kHexChar[17]; // 0123456789abcdef
extern const char kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011...
extern const char two_ASCII_digits[100][2]; // 00, 01, 02, 03...
// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
// range 0 <= i < 100, and buf must have space for two characters. Example:
// char buf[2];
// PutTwoDigits(42, buf);
// // buf[0] == '4'
// // buf[1] == '2'
inline void PutTwoDigits(size_t i, char* buf) {
assert(i < 100);
memcpy(buf, two_ASCII_digits[i], 2);
}
// safe_strto?() functions for implementing SimpleAtoi()
bool safe_strto32_base(absl::string_view text, int32_t* value, int base);
bool safe_strto64_base(absl::string_view text, int64_t* value, int base);
bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base);
bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base);
bool safe_strtou128_base(absl::string_view text, absl::uint128* value,
int base);
static const int kFastToBufferSize = 32;
static const int kSixDigitsToBufferSize = 16;
// Helper function for fast formatting of floating-point values.
// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six
// significant digits are returned, trailing zeros are removed, and numbers
// outside the range 0.0001-999999 are output using scientific notation
// (1.23456e+06). This routine is heavily optimized.
// Required buffer size is `kSixDigitsToBufferSize`.
size_t SixDigitsToBuffer(double d, char* buffer);
// These functions are intended for speed. All functions take an output buffer
// as an argument and return a pointer to the last byte they wrote, which is the
// terminating '\0'. At most `kFastToBufferSize` bytes are written.
char* FastIntToBuffer(int32_t, char*);
char* FastIntToBuffer(uint32_t, char*);
char* FastIntToBuffer(int64_t, char*);
char* FastIntToBuffer(uint64_t, char*);
// For enums and integer types that are not an exact match for the types above,
// use templates to call the appropriate one of the four overloads above.
template <typename int_type>
char* FastIntToBuffer(int_type i, char* buffer) {
static_assert(sizeof(i) <= 64 / 8,
"FastIntToBuffer works only with 64-bit-or-less integers.");
// TODO(jorg): This signed-ness check is used because it works correctly
// with enums, and it also serves to check that int_type is not a pointer.
// If one day something like std::is_signed<enum E> works, switch to it.
if (static_cast<int_type>(1) - 2 < 0) { // Signed
if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
return FastIntToBuffer(static_cast<int64_t>(i), buffer);
} else { // 32-bit or less
return FastIntToBuffer(static_cast<int32_t>(i), buffer);
}
} else { // Unsigned
if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
return FastIntToBuffer(static_cast<uint64_t>(i), buffer);
} else { // 32-bit or less
return FastIntToBuffer(static_cast<uint32_t>(i), buffer);
}
}
}
// Implementation of SimpleAtoi, generalized to support arbitrary base (used
// with base different from 10 elsewhere in Abseil implementation).
template <typename int_type>
ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out,
int base) {
static_assert(sizeof(*out) == 4 || sizeof(*out) == 8,
"SimpleAtoi works only with 32-bit or 64-bit integers.");
static_assert(!std::is_floating_point<int_type>::value,
"Use SimpleAtof or SimpleAtod instead.");
bool parsed;
// TODO(jorg): This signed-ness check is used because it works correctly
// with enums, and it also serves to check that int_type is not a pointer.
// If one day something like std::is_signed<enum E> works, switch to it.
if (static_cast<int_type>(1) - 2 < 0) { // Signed
if (sizeof(*out) == 64 / 8) { // 64-bit
int64_t val;
parsed = numbers_internal::safe_strto64_base(s, &val, base);
*out = static_cast<int_type>(val);
} else { // 32-bit
int32_t val;
parsed = numbers_internal::safe_strto32_base(s, &val, base);
*out = static_cast<int_type>(val);
}
} else { // Unsigned
if (sizeof(*out) == 64 / 8) { // 64-bit
uint64_t val;
parsed = numbers_internal::safe_strtou64_base(s, &val, base);
*out = static_cast<int_type>(val);
} else { // 32-bit
uint32_t val;
parsed = numbers_internal::safe_strtou32_base(s, &val, base);
*out = static_cast<int_type>(val);
}
}
return parsed;
}
// FastHexToBufferZeroPad16()
//
// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but
// without the terminating null character. Thus `out` must be of length >= 16.
// Returns the number of non-pad digits of the output (it can never be zero
// since 0 has one digit).
inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) {
#ifdef __SSE4_2__
uint64_t be = absl::big_endian::FromHost64(val);
const auto kNibbleMask = _mm_set1_epi8(0xf);
const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f');
auto v = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&be)); // load lo dword
auto v4 = _mm_srli_epi64(v, 4); // shift 4 right
auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes
auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles
auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars
_mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars);
#else
for (int i = 0; i < 8; ++i) {
auto byte = (val >> (56 - 8 * i)) & 0xFF;
auto* hex = &absl::numbers_internal::kHexTable[byte * 2];
std::memcpy(out + 2 * i, hex, 2);
}
#endif
// | 0x1 so that even 0 has 1 digit.
return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4;
}
} // namespace numbers_internal
// SimpleAtoi()
//
// Converts a string to an integer, using `safe_strto?()` functions for actual
// parsing, returning `true` if successful. The `safe_strto?()` functions apply
// strict checking; the string must be a base-10 integer, optionally followed or
// preceded by ASCII whitespace, with a value in the range of the corresponding
// integer type.
template <typename int_type>
ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out) {
return numbers_internal::safe_strtoi_base(str, out, 10);
}
ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str,
absl::uint128* out) {
return numbers_internal::safe_strtou128_base(str, out, 10);
}
} // namespace absl
#endif // ABSL_STRINGS_NUMBERS_H_