511 lines
19 KiB
C++
511 lines
19 KiB
C++
//
|
|
// Copyright 2017 The Abseil Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// -----------------------------------------------------------------------------
|
|
// File: str_split.h
|
|
// -----------------------------------------------------------------------------
|
|
//
|
|
// This file contains functions for splitting strings. It defines the main
|
|
// `StrSplit()` function, several delimiters for determining the boundaries on
|
|
// which to split the std::string, and predicates for filtering delimited results.
|
|
// `StrSplit()` adapts the returned collection to the type specified by the
|
|
// caller.
|
|
//
|
|
// Example:
|
|
//
|
|
// // Splits the given std::string on commas. Returns the results in a
|
|
// // vector of strings.
|
|
// std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
|
|
// // Can also use ","
|
|
// // v[0] == "a", v[1] == "b", v[2] == "c"
|
|
//
|
|
// See StrSplit() below for more information.
|
|
#ifndef ABSL_STRINGS_STR_SPLIT_H_
|
|
#define ABSL_STRINGS_STR_SPLIT_H_
|
|
|
|
#include <algorithm>
|
|
#include <cstddef>
|
|
#include <map>
|
|
#include <set>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "absl/base/internal/raw_logging.h"
|
|
#include "absl/strings/internal/str_split_internal.h"
|
|
#include "absl/strings/string_view.h"
|
|
#include "absl/strings/strip.h"
|
|
|
|
namespace absl {
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Delimiters
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// `StrSplit()` uses delimiters to define the boundaries between elements in the
|
|
// provided input. Several `Delimiter` types are defined below. If a std::string
|
|
// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
|
|
// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
|
|
// were passed a `ByString` delimiter.
|
|
//
|
|
// A `Delimiter` is an object with a `Find()` function that knows how to find
|
|
// the first occurrence of itself in a given `absl::string_view`.
|
|
//
|
|
// The following `Delimiter` types are available for use within `StrSplit()`:
|
|
//
|
|
// - `ByString` (default for std::string arguments)
|
|
// - `ByChar` (default for a char argument)
|
|
// - `ByAnyChar`
|
|
// - `ByLength`
|
|
// - `MaxSplits`
|
|
//
|
|
//
|
|
// A Delimiter's Find() member function will be passed the input text that is to
|
|
// be split and the position to begin searching for the next delimiter in the
|
|
// input text. The returned absl::string_view should refer to the next
|
|
// occurrence (after pos) of the represented delimiter; this returned
|
|
// absl::string_view represents the next location where the input std::string should
|
|
// be broken. The returned absl::string_view may be zero-length if the Delimiter
|
|
// does not represent a part of the std::string (e.g., a fixed-length delimiter). If
|
|
// no delimiter is found in the given text, a zero-length absl::string_view
|
|
// referring to text.end() should be returned (e.g.,
|
|
// absl::string_view(text.end(), 0)). It is important that the returned
|
|
// absl::string_view always be within the bounds of input text given as an
|
|
// argument--it must not refer to a std::string that is physically located outside of
|
|
// the given std::string.
|
|
//
|
|
// The following example is a simple Delimiter object that is created with a
|
|
// single char and will look for that char in the text passed to the Find()
|
|
// function:
|
|
//
|
|
// struct SimpleDelimiter {
|
|
// const char c_;
|
|
// explicit SimpleDelimiter(char c) : c_(c) {}
|
|
// absl::string_view Find(absl::string_view text, size_t pos) {
|
|
// auto found = text.find(c_, pos);
|
|
// if (found == absl::string_view::npos)
|
|
// return absl::string_view(text.end(), 0);
|
|
//
|
|
// return absl::string_view(text, found, 1);
|
|
// }
|
|
// };
|
|
|
|
// ByString
|
|
//
|
|
// A sub-std::string delimiter. If `StrSplit()` is passed a std::string in place of a
|
|
// `Delimiter` object, the std::string will be implicitly converted into a
|
|
// `ByString` delimiter.
|
|
//
|
|
// Example:
|
|
//
|
|
// // Because a std::string literal is converted to an `absl::ByString`,
|
|
// // the following two splits are equivalent.
|
|
//
|
|
// std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
|
|
//
|
|
// using absl::ByString;
|
|
// std::vector<std::string> v2 = absl::StrSplit("a, b, c",
|
|
// ByString(", "));
|
|
// // v[0] == "a", v[1] == "b", v[3] == "c"
|
|
class ByString {
|
|
public:
|
|
explicit ByString(absl::string_view sp);
|
|
absl::string_view Find(absl::string_view text, size_t pos) const;
|
|
|
|
private:
|
|
const std::string delimiter_;
|
|
};
|
|
|
|
// ByChar
|
|
//
|
|
// A single character delimiter. `ByChar` is functionally equivalent to a
|
|
// 1-char std::string within a `ByString` delimiter, but slightly more
|
|
// efficient.
|
|
//
|
|
// Example:
|
|
//
|
|
// // Because a char literal is converted to a absl::ByChar,
|
|
// // the following two splits are equivalent.
|
|
// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
|
|
// using absl::ByChar;
|
|
// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
|
|
// // v[0] == "a", v[1] == "b", v[3] == "c"
|
|
//
|
|
// `ByChar` is also the default delimiter if a single character is given
|
|
// as the delimiter to `StrSplit()`. For example, the following calls are
|
|
// equivalent:
|
|
//
|
|
// std::vector<std::string> v = absl::StrSplit("a-b", '-');
|
|
//
|
|
// using absl::ByChar;
|
|
// std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
|
|
//
|
|
class ByChar {
|
|
public:
|
|
explicit ByChar(char c) : c_(c) {}
|
|
absl::string_view Find(absl::string_view text, size_t pos) const;
|
|
|
|
private:
|
|
char c_;
|
|
};
|
|
|
|
// ByAnyChar
|
|
//
|
|
// A delimiter that will match any of the given byte-sized characters within
|
|
// its provided std::string.
|
|
//
|
|
// Note: this delimiter works with single-byte std::string data, but does not work
|
|
// with variable-width encodings, such as UTF-8.
|
|
//
|
|
// Example:
|
|
//
|
|
// using absl::ByAnyChar;
|
|
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
|
|
// // v[0] == "a", v[1] == "b", v[3] == "c"
|
|
//
|
|
// If `ByAnyChar` is given the empty std::string, it behaves exactly like
|
|
// `ByString` and matches each individual character in the input std::string.
|
|
//
|
|
class ByAnyChar {
|
|
public:
|
|
explicit ByAnyChar(absl::string_view sp);
|
|
absl::string_view Find(absl::string_view text, size_t pos) const;
|
|
|
|
private:
|
|
const std::string delimiters_;
|
|
};
|
|
|
|
// ByLength
|
|
//
|
|
// A delimiter for splitting into equal-length strings. The length argument to
|
|
// the constructor must be greater than 0.
|
|
//
|
|
// Note: this delimiter works with single-byte std::string data, but does not work
|
|
// with variable-width encodings, such as UTF-8.
|
|
//
|
|
// Example:
|
|
//
|
|
// using absl::ByLength;
|
|
// std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
|
|
|
|
// // v[0] == "123", v[1] == "456", v[2] == "789"
|
|
//
|
|
// Note that the std::string does not have to be a multiple of the fixed split
|
|
// length. In such a case, the last substring will be shorter.
|
|
//
|
|
// using absl::ByLength;
|
|
// std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
|
|
//
|
|
// // v[0] == "12", v[1] == "35", v[2] == "5"
|
|
class ByLength {
|
|
public:
|
|
explicit ByLength(ptrdiff_t length);
|
|
absl::string_view Find(absl::string_view text, size_t pos) const;
|
|
|
|
private:
|
|
const ptrdiff_t length_;
|
|
};
|
|
|
|
namespace strings_internal {
|
|
|
|
// A traits-like metafunction for selecting the default Delimiter object type
|
|
// for a particular Delimiter type. The base case simply exposes type Delimiter
|
|
// itself as the delimiter's Type. However, there are specializations for
|
|
// std::string-like objects that map them to the ByString delimiter object.
|
|
// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
|
|
// std::string-like objects (e.g., ',') as delimiter arguments but they will be
|
|
// treated as if a ByString delimiter was given.
|
|
template <typename Delimiter>
|
|
struct SelectDelimiter {
|
|
using type = Delimiter;
|
|
};
|
|
|
|
template <>
|
|
struct SelectDelimiter<char> {
|
|
using type = ByChar;
|
|
};
|
|
template <>
|
|
struct SelectDelimiter<char*> {
|
|
using type = ByString;
|
|
};
|
|
template <>
|
|
struct SelectDelimiter<const char*> {
|
|
using type = ByString;
|
|
};
|
|
template <>
|
|
struct SelectDelimiter<absl::string_view> {
|
|
using type = ByString;
|
|
};
|
|
template <>
|
|
struct SelectDelimiter<std::string> {
|
|
using type = ByString;
|
|
};
|
|
|
|
// Wraps another delimiter and sets a max number of matches for that delimiter.
|
|
template <typename Delimiter>
|
|
class MaxSplitsImpl {
|
|
public:
|
|
MaxSplitsImpl(Delimiter delimiter, int limit)
|
|
: delimiter_(delimiter), limit_(limit), count_(0) {}
|
|
absl::string_view Find(absl::string_view text, size_t pos) {
|
|
if (count_++ == limit_) {
|
|
return absl::string_view(text.end(), 0); // No more matches.
|
|
}
|
|
return delimiter_.Find(text, pos);
|
|
}
|
|
|
|
private:
|
|
Delimiter delimiter_;
|
|
const int limit_;
|
|
int count_;
|
|
};
|
|
|
|
} // namespace strings_internal
|
|
|
|
// MaxSplits()
|
|
//
|
|
// A delimiter that limits the number of matches which can occur to the passed
|
|
// `limit`. The last element in the returned collection will contain all
|
|
// remaining unsplit pieces, which may contain instances of the delimiter.
|
|
// The collection will contain at most `limit` + 1 elements.
|
|
// Example:
|
|
//
|
|
// using absl::MaxSplits;
|
|
// std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
|
|
//
|
|
// // v[0] == "a", v[1] == "b,c"
|
|
template <typename Delimiter>
|
|
inline strings_internal::MaxSplitsImpl<
|
|
typename strings_internal::SelectDelimiter<Delimiter>::type>
|
|
MaxSplits(Delimiter delimiter, int limit) {
|
|
typedef
|
|
typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
|
|
return strings_internal::MaxSplitsImpl<DelimiterType>(
|
|
DelimiterType(delimiter), limit);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Predicates
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// Predicates filter the results of a `StrSplit()` by determining whether or not
|
|
// a resultant element is included in the result set. A predicate may be passed
|
|
// as an optional third argument to the `StrSplit()` function.
|
|
//
|
|
// Predicates are unary functions (or functors) that take a single
|
|
// `absl::string_view` argument and return a bool indicating whether the
|
|
// argument should be included (`true`) or excluded (`false`).
|
|
//
|
|
// Predicates are useful when filtering out empty substrings. By default, empty
|
|
// substrings may be returned by `StrSplit()`, which is similar to the way split
|
|
// functions work in other programming languages.
|
|
|
|
// AllowEmpty()
|
|
//
|
|
// Always returns `true`, indicating that all strings--including empty
|
|
// strings--should be included in the split output. This predicate is not
|
|
// strictly needed because this is the default behavior of `StrSplit()`;
|
|
// however, it might be useful at some call sites to make the intent explicit.
|
|
//
|
|
// Example:
|
|
//
|
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
|
|
//
|
|
// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
|
|
struct AllowEmpty {
|
|
bool operator()(absl::string_view) const { return true; }
|
|
};
|
|
|
|
// SkipEmpty()
|
|
//
|
|
// Returns `false` if the given `absl::string_view` is empty, indicating that
|
|
// `StrSplit()` should omit the empty std::string.
|
|
//
|
|
// Example:
|
|
//
|
|
// std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
|
|
//
|
|
// // v[0] == "a", v[1] == "b"
|
|
//
|
|
// Note: `SkipEmpty()` does not consider a std::string containing only whitespace
|
|
// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
|
|
// predicate.
|
|
struct SkipEmpty {
|
|
bool operator()(absl::string_view sp) const { return !sp.empty(); }
|
|
};
|
|
|
|
// SkipWhitespace()
|
|
//
|
|
// Returns `false` if the given `absl::string_view` is empty *or* contains only
|
|
// whitespace, indicating that `StrSplit()` should omit the std::string.
|
|
//
|
|
// Example:
|
|
//
|
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
|
|
// ',', SkipWhitespace());
|
|
// // v[0] == " a ", v[1] == "b"
|
|
//
|
|
// // SkipEmpty() would return whitespace elements
|
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
|
|
// // v[0] == " a ", v[1] == " ", v[2] == "b"
|
|
struct SkipWhitespace {
|
|
bool operator()(absl::string_view sp) const {
|
|
sp = absl::StripAsciiWhitespace(sp);
|
|
return !sp.empty();
|
|
}
|
|
};
|
|
|
|
//------------------------------------------------------------------------------
|
|
// StrSplit()
|
|
//------------------------------------------------------------------------------
|
|
|
|
// StrSplit()
|
|
//
|
|
// Splits a given `std::string` based on the provided `Delimiter` object,
|
|
// returning the elements within the type specified by the caller. Optionally,
|
|
// you may also pass a `Predicate` to `StrSplit()` indicating whether to include
|
|
// or exclude the resulting element within the final result set. (See the
|
|
// overviews for Delimiters and Predicates above.)
|
|
//
|
|
// Example:
|
|
//
|
|
// std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
|
|
// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
|
|
//
|
|
// You can also provide an explicit `Delimiter` object:
|
|
//
|
|
// Example:
|
|
//
|
|
// using absl::ByAnyChar;
|
|
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
|
|
// // v[0] == "a", v[1] == "b", v[3] == "c"
|
|
//
|
|
// See above for more information on delimiters.
|
|
//
|
|
// By default, empty strings are included in the result set. You can optionally
|
|
// include a third `Predicate` argument to apply a test for whether the
|
|
// resultant element should be included in the result set:
|
|
//
|
|
// Example:
|
|
//
|
|
// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
|
|
// ',', SkipWhitespace());
|
|
// // v[0] == "a", v[1] == "b"
|
|
//
|
|
// See above for more information on predicates.
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
// StrSplit() Return Types
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// The `StrSplit()` function adapts the returned collection to the collection
|
|
// specified by the caller (e.g. `std::vector` above). The returned collections
|
|
// may contain `string`, `absl::string_view` (in which case the original std::string
|
|
// being split must ensure that it outlives the collection), or any object that
|
|
// can be explicitly created from an `absl::string_view`. This behavior works
|
|
// for:
|
|
//
|
|
// 1) All standard STL containers including `std::vector`, `std::list`,
|
|
// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
|
|
// 2) `std::pair` (which is not actually a container). See below.
|
|
//
|
|
// Example:
|
|
//
|
|
// // The results are returned as `absl::string_view` objects. Note that we
|
|
// // have to ensure that the input std::string outlives any results.
|
|
// std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
|
|
//
|
|
// // Stores results in a std::set<std::string>, which also performs
|
|
// // de-duplication and orders the elements in ascending order.
|
|
// std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
|
|
// // v[0] == "a", v[1] == "b", v[2] = "c"
|
|
//
|
|
// // `StrSplit()` can be used within a range-based for loop, in which case
|
|
// // each element will be of type `absl::string_view`.
|
|
// std::vector<std::string> v;
|
|
// for (const auto sv : absl::StrSplit("a,b,c", ',')) {
|
|
// if (sv != "b") v.emplace_back(sv);
|
|
// }
|
|
// // v[0] == "a", v[1] == "c"
|
|
//
|
|
// // Stores results in a map. The map implementation assumes that the input
|
|
// // is provided as a series of key/value pairs. For example, the 0th element
|
|
// // resulting from the split will be stored as a key to the 1st element. If
|
|
// // an odd number of elements are resolved, the last element is paired with
|
|
// // a default-constructed value (e.g., empty std::string).
|
|
// std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
|
|
// // m["a"] == "b", m["c"] == "" // last component value equals ""
|
|
//
|
|
// Splitting to `std::pair` is an interesting case because it can hold only two
|
|
// elements and is not a collection type. When splitting to a `std::pair` the
|
|
// first two split strings become the `std::pair` `.first` and `.second`
|
|
// members, respectively. The remaining split substrings are discarded. If there
|
|
// are less than two split substrings, the empty std::string is used for the
|
|
// corresponding
|
|
// `std::pair` member.
|
|
//
|
|
// Example:
|
|
//
|
|
// // Stores first two split strings as the members in a std::pair.
|
|
// std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
|
|
// // p.first == "a", p.second == "b" // "c" is omitted.
|
|
//
|
|
// The `StrSplit()` function can be used multiple times to perform more
|
|
// complicated splitting logic, such as intelligently parsing key-value pairs.
|
|
//
|
|
// Example:
|
|
//
|
|
// // The input std::string "a=b=c,d=e,f=,g" becomes
|
|
// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
|
|
// std::map<std::string, std::string> m;
|
|
// for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
|
|
// m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
|
|
// }
|
|
// EXPECT_EQ("b=c", m.find("a")->second);
|
|
// EXPECT_EQ("e", m.find("d")->second);
|
|
// EXPECT_EQ("", m.find("f")->second);
|
|
// EXPECT_EQ("", m.find("g")->second);
|
|
//
|
|
// WARNING: Due to a legacy bug that is maintained for backward compatibility,
|
|
// splitting the following empty string_views produces different results:
|
|
//
|
|
// absl::StrSplit(absl::string_view(""), '-'); // {""}
|
|
// absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""}
|
|
//
|
|
// Try not to depend on this distinction because the bug may one day be fixed.
|
|
template <typename Delimiter>
|
|
strings_internal::Splitter<
|
|
typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty>
|
|
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
|
|
using DelimiterType =
|
|
typename strings_internal::SelectDelimiter<Delimiter>::type;
|
|
return strings_internal::Splitter<DelimiterType, AllowEmpty>(
|
|
std::move(text), DelimiterType(d), AllowEmpty());
|
|
}
|
|
|
|
template <typename Delimiter, typename Predicate>
|
|
strings_internal::Splitter<
|
|
typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>
|
|
StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
|
|
Predicate p) {
|
|
using DelimiterType =
|
|
typename strings_internal::SelectDelimiter<Delimiter>::type;
|
|
return strings_internal::Splitter<DelimiterType, Predicate>(
|
|
std::move(text), DelimiterType(d), std::move(p));
|
|
}
|
|
|
|
} // namespace absl
|
|
|
|
#endif // ABSL_STRINGS_STR_SPLIT_H_
|