Changes imported from Abseil "staging" branch:

- a42e9b454ca8be7d021789cdb9bcada07d3e2d3e Merge pull request #57. by Derek Mauro <dmauro@google.com>
  - b1e03838f059c034a6489501804d516326246042 Move the long ostream tests into a separate source file u... by Alex Strelnikov <strel@google.com>
  - 7c56b7dbb05faa7e8653632e00be470331d79cb9 Return reference from absl::InlinedVector::emplace_back(). by Abseil Team <absl-team@google.com>
  - 85b070822b62688ff348d9ad9cc9e230a851f617 Treat \u or \U followed by Unicode surrogate character as... by Abseil Team <absl-team@google.com>

GitOrigin-RevId: a42e9b454ca8be7d021789cdb9bcada07d3e2d3e
Change-Id: I7d8fb68ffd7eb4e9e737f21fbed6d56b71985f94
This commit is contained in:
Abseil Team 2017-12-13 12:02:15 -08:00 committed by Titus Winters
parent 5fe41affba
commit 720c017e30
11 changed files with 720 additions and 919 deletions

View file

@ -72,6 +72,17 @@ inline int hex_digit_to_int(char c) {
return x & 0xf;
}
inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) {
if (c >= 0xD800 && c <= 0xDFFF) {
if (error) {
*error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
src);
}
return true;
}
return false;
}
// ----------------------------------------------------------------------
// CUnescapeInternal()
// Implements both CUnescape() and CUnescapeForNullTerminatedString().
@ -214,6 +225,9 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
d += 5;
break;
}
if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
return false;
}
d += strings_internal::EncodeUTF8Char(d, rune);
break;
}
@ -259,6 +273,9 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
d += 9;
break;
}
if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
return false;
}
d += strings_internal::EncodeUTF8Char(d, rune);
break;
}

View file

@ -160,11 +160,14 @@ TEST(Unescape, BasicFunction) {
EXPECT_EQ(out, val.unescaped);
}
std::string bad[] =
{"\\u1", // too short
"\\U1", // too short
"\\Uffffff",
"\\777", // exceeds 0xff
"\\xABCD"}; // exceeds 0xff
{"\\u1", // too short
"\\U1", // too short
"\\Uffffff", // exceeds 0x10ffff (largest Unicode)
"\\U00110000", // exceeds 0x10ffff (largest Unicode)
"\\uD835", // surrogate character (D800-DFFF)
"\\U0000DD04", // surrogate character (D800-DFFF)
"\\777", // exceeds 0xff
"\\xABCD"}; // exceeds 0xff
for (const std::string& e : bad) {
std::string error;
std::string out;