tvl-depot/absl/strings/internal/str_format/parser.cc
Abseil Team d95d156716 Export of internal Abseil changes
--
832be2d52d7695cf72fd70248909791fb8ad1003 by Gennadiy Rozental <rogeeff@google.com>:

Migrate some FlagImpl fields to bit fields.

To save padding space we are migrating some fields representing bool and enums into bit fields. Eventually we'll use remaining padding space for call_once control flag.

No other semantic changes made in this CL.

PiperOrigin-RevId: 293878165

--
09162bba5fd8eddacfd732d46fcfeb33074a259f by Samuel Benzaquen <sbenza@google.com>:

Correctly initialize the `length_mod` member.
Now that it is a raw enum, the default initialization is not enough.

PiperOrigin-RevId: 293827817

--
842b7b805d75c5ab670c52ccd7368cdeba11853d by Matthew Brown <matthewbr@google.com>:

Move str_format_internal::LengthMod from extension.h to parser.h; change to enum

PiperOrigin-RevId: 293697274
GitOrigin-RevId: 832be2d52d7695cf72fd70248909791fb8ad1003
Change-Id: I90899519e9480543e22638616fdf31a41e7f75c0
2020-02-07 16:58:49 -05:00

332 lines
11 KiB
C++

#include "absl/strings/internal/str_format/parser.h"
#include <assert.h>
#include <string.h>
#include <wchar.h>
#include <cctype>
#include <cstdint>
#include <algorithm>
#include <initializer_list>
#include <limits>
#include <ostream>
#include <string>
#include <unordered_set>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
using CC = ConversionChar::Id;
using LM = LengthMod;
ABSL_CONST_INIT const ConvTag kTags[256] = {
{}, {}, {}, {}, {}, {}, {}, {}, // 00-07
{}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
{}, {}, {}, {}, {}, {}, {}, {}, // 10-17
{}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
{}, {}, {}, {}, {}, {}, {}, {}, // 20-27
{}, {}, {}, {}, {}, {}, {}, {}, // 28-2f
{}, {}, {}, {}, {}, {}, {}, {}, // 30-37
{}, {}, {}, {}, {}, {}, {}, {}, // 38-3f
{}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG
{}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
{}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW
CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
{}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
{}, {}, {}, {}, {}, {}, {}, {}, // 80-87
{}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
{}, {}, {}, {}, {}, {}, {}, {}, // 90-97
{}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
{}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
{}, {}, {}, {}, {}, {}, {}, {}, // a8-af
{}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
{}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
{}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
{}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
{}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
{}, {}, {}, {}, {}, {}, {}, {}, // d8-df
{}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
{}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
{}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
{}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
};
namespace {
bool CheckFastPathSetting(const UnboundConversion& conv) {
bool should_be_basic = !conv.flags.left && //
!conv.flags.show_pos && //
!conv.flags.sign_col && //
!conv.flags.alt && //
!conv.flags.zero && //
(conv.width.value() == -1) &&
(conv.precision.value() == -1);
if (should_be_basic != conv.flags.basic) {
fprintf(stderr,
"basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
"width=%d precision=%d\n",
conv.flags.basic, conv.flags.left, conv.flags.show_pos,
conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
conv.width.value(), conv.precision.value());
}
return should_be_basic == conv.flags.basic;
}
template <bool is_positional>
const char *ConsumeConversion(const char *pos, const char *const end,
UnboundConversion *conv, int *next_arg) {
const char* const original_pos = pos;
char c;
// Read the next char into `c` and update `pos`. Returns false if there are
// no more chars to read.
#define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
do { \
if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
c = *pos++; \
} while (0)
const auto parse_digits = [&] {
int digits = c - '0';
// We do not want to overflow `digits` so we consume at most digits10
// digits. If there are more digits the parsing will fail later on when the
// digit doesn't match the expected characters.
int num_digits = std::numeric_limits<int>::digits10;
for (;;) {
if (ABSL_PREDICT_FALSE(pos == end)) break;
c = *pos++;
if (!std::isdigit(c)) break;
--num_digits;
if (ABSL_PREDICT_FALSE(!num_digits)) break;
digits = 10 * digits + c - '0';
}
return digits;
};
if (is_positional) {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->arg_position = parse_digits();
assert(conv->arg_position > 0);
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
// We should start with the basic flag on.
assert(conv->flags.basic);
// Any non alpha character makes this conversion not basic.
// This includes flags (-+ #0), width (1-9, *) or precision (.).
// All conversion characters and length modifiers are alpha characters.
if (c < 'A') {
conv->flags.basic = false;
for (; c <= '0';) {
// FIXME: We might be able to speed this up reusing the lookup table from
// above. It might require changing Flags to be a plain integer where we
// can |= a value.
switch (c) {
case '-':
conv->flags.left = true;
break;
case '+':
conv->flags.show_pos = true;
break;
case ' ':
conv->flags.sign_col = true;
break;
case '#':
conv->flags.alt = true;
break;
case '0':
conv->flags.zero = true;
break;
default:
goto flags_done;
}
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
}
flags_done:
if (c <= '9') {
if (c >= '0') {
int maybe_width = parse_digits();
if (!is_positional && c == '$') {
if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
// Positional conversion.
*next_arg = -1;
conv->flags = Flags();
conv->flags.basic = true;
return ConsumeConversion<true>(original_pos, end, conv, next_arg);
}
conv->width.set_value(maybe_width);
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->width.set_from_arg(parse_digits());
if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->width.set_from_arg(++*next_arg);
}
}
}
if (c == '.') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (std::isdigit(c)) {
conv->precision.set_value(parse_digits());
} else if (c == '*') {
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (is_positional) {
if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
conv->precision.set_from_arg(parse_digits());
if (c != '$') return nullptr;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->precision.set_from_arg(++*next_arg);
}
} else {
conv->precision.set_value(0);
}
}
}
auto tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
// It is a length modifier.
using str_format_internal::LengthMod;
LengthMod length_mod = tag.as_length();
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
if (c == 'h' && length_mod == LengthMod::h) {
conv->length_mod = LengthMod::hh;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else if (c == 'l' && length_mod == LengthMod::l) {
conv->length_mod = LengthMod::ll;
ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
} else {
conv->length_mod = length_mod;
}
tag = GetTagForChar(c);
if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
}
assert(CheckFastPathSetting(*conv));
(void)(&CheckFastPathSetting);
conv->conv = tag.as_conv();
if (!is_positional) conv->arg_position = ++*next_arg;
return pos;
}
} // namespace
std::string LengthModToString(LengthMod v) {
switch (v) {
case LengthMod::h:
return "h";
case LengthMod::hh:
return "hh";
case LengthMod::l:
return "l";
case LengthMod::ll:
return "ll";
case LengthMod::L:
return "L";
case LengthMod::j:
return "j";
case LengthMod::z:
return "z";
case LengthMod::t:
return "t";
case LengthMod::q:
return "q";
case LengthMod::none:
return "";
}
return "";
}
const char *ConsumeUnboundConversion(const char *p, const char *end,
UnboundConversion *conv, int *next_arg) {
if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
return ConsumeConversion<false>(p, end, conv, next_arg);
}
struct ParsedFormatBase::ParsedFormatConsumer {
explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
: parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
bool Append(string_view s) {
if (s.empty()) return true;
size_t text_end = AppendText(s);
if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
// Let's extend the existing text run.
parsed->items_.back().text_end = text_end;
} else {
// Let's make a new text run.
parsed->items_.push_back({false, text_end, {}});
}
return true;
}
bool ConvertOne(const UnboundConversion &conv, string_view s) {
size_t text_end = AppendText(s);
parsed->items_.push_back({true, text_end, conv});
return true;
}
size_t AppendText(string_view s) {
memcpy(data_pos, s.data(), s.size());
data_pos += s.size();
return static_cast<size_t>(data_pos - parsed->data_.get());
}
ParsedFormatBase *parsed;
char* data_pos;
};
ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored,
std::initializer_list<Conv> convs)
: data_(format.empty() ? nullptr : new char[format.size()]) {
has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
!MatchesConversions(allow_ignored, convs);
}
bool ParsedFormatBase::MatchesConversions(
bool allow_ignored, std::initializer_list<Conv> convs) const {
std::unordered_set<int> used;
auto add_if_valid_conv = [&](int pos, char c) {
if (static_cast<size_t>(pos) > convs.size() ||
!Contains(convs.begin()[pos - 1], c))
return false;
used.insert(pos);
return true;
};
for (const ConversionItem &item : items_) {
if (!item.is_conversion) continue;
auto &conv = item.conv;
if (conv.precision.is_from_arg() &&
!add_if_valid_conv(conv.precision.get_from_arg(), '*'))
return false;
if (conv.width.is_from_arg() &&
!add_if_valid_conv(conv.width.get_from_arg(), '*'))
return false;
if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false;
}
return used.size() == convs.size() || allow_ignored;
}
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl