Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,2 @@
per-file safe_sprintf*=jln@chromium.org
per-file safe_sprintf*=mdempsky@chromium.org

View file

@ -0,0 +1,92 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_CHAR_TRAITS_H_
#define BASE_STRINGS_CHAR_TRAITS_H_
#include <stddef.h>
#include "base/compiler_specific.h"
namespace base {
// constexpr version of http://en.cppreference.com/w/cpp/string/char_traits.
// This currently just implements the bits needed to support a (mostly)
// constexpr StringPiece.
//
// TODO(dcheng): Once we switch to C++17, most methods will become constexpr and
// we can switch over to using the one in the standard library.
template <typename T>
struct CharTraits {
// Performs a lexographical comparison of the first N characters of |s1| and
// |s2|. Returns 0 if equal, -1 if |s1| is less than |s2|, and 1 if |s1| is
// greater than |s2|.
static constexpr int compare(const T* s1, const T* s2, size_t n) noexcept;
// Returns the length of |s|, assuming null termination (and not including the
// terminating null).
static constexpr size_t length(const T* s) noexcept;
};
template <typename T>
constexpr int CharTraits<T>::compare(const T* s1,
const T* s2,
size_t n) noexcept {
for (; n; --n, ++s1, ++s2) {
if (*s1 < *s2)
return -1;
if (*s1 > *s2)
return 1;
}
return 0;
}
template <typename T>
constexpr size_t CharTraits<T>::length(const T* s) noexcept {
size_t i = 0;
for (; *s; ++s)
++i;
return i;
}
// char specialization of CharTraits that can use clang's constexpr instrinsics,
// where available.
template <>
struct CharTraits<char> {
static constexpr int compare(const char* s1,
const char* s2,
size_t n) noexcept;
static constexpr size_t length(const char* s) noexcept;
};
constexpr int CharTraits<char>::compare(const char* s1,
const char* s2,
size_t n) noexcept {
#if HAS_FEATURE(cxx_constexpr_string_builtins)
return __builtin_memcmp(s1, s2, n);
#else
for (; n; --n, ++s1, ++s2) {
if (*s1 < *s2)
return -1;
if (*s1 > *s2)
return 1;
}
return 0;
#endif
}
constexpr size_t CharTraits<char>::length(const char* s) noexcept {
#if defined(__clang__)
return __builtin_strlen(s);
#else
size_t i = 0;
for (; *s; ++s)
++i;
return i;
#endif
}
} // namespace base
#endif // BASE_STRINGS_CHAR_TRAITS_H_

View file

@ -0,0 +1,19 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/latin1_string_conversions.h"
namespace base {
string16 Latin1OrUTF16ToUTF16(size_t length,
const Latin1Char* latin1,
const char16* utf16) {
if (!length)
return string16();
if (latin1)
return string16(latin1, latin1 + length);
return string16(utf16, utf16 + length);
}
} // namespace base

View file

@ -0,0 +1,34 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_LATIN1_STRING_CONVERSIONS_H_
#define BASE_STRINGS_LATIN1_STRING_CONVERSIONS_H_
#include <stddef.h>
#include <string>
#include "base/base_export.h"
#include "base/strings/string16.h"
namespace base {
// This definition of Latin1Char matches the definition of LChar in Blink. We
// use unsigned char rather than char to make less tempting to mix and match
// Latin-1 and UTF-8 characters..
typedef unsigned char Latin1Char;
// This somewhat odd function is designed to help us convert from Blink Strings
// to string16. A Blink string is either backed by an array of Latin-1
// characters or an array of UTF-16 characters. This function is called by
// WebString::operator string16() to convert one or the other character array
// to string16. This function is defined here rather than in WebString.h to
// avoid binary bloat in all the callers of the conversion operator.
BASE_EXPORT string16 Latin1OrUTF16ToUTF16(size_t length,
const Latin1Char* latin1,
const char16* utf16);
} // namespace base
#endif // BASE_STRINGS_LATIN1_STRING_CONVERSIONS_H_

View file

@ -0,0 +1,33 @@
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/nullable_string16.h"
#include <ostream>
#include <utility>
namespace base {
NullableString16::NullableString16() = default;
NullableString16::NullableString16(const NullableString16& other) = default;
NullableString16::NullableString16(NullableString16&& other) = default;
NullableString16::NullableString16(const string16& string, bool is_null) {
if (!is_null)
string_.emplace(string);
}
NullableString16::NullableString16(Optional<string16> optional_string16)
: string_(std::move(optional_string16)) {}
NullableString16::~NullableString16() = default;
NullableString16& NullableString16::operator=(const NullableString16& other) =
default;
NullableString16& NullableString16::operator=(NullableString16&& other) =
default;
std::ostream& operator<<(std::ostream& out, const NullableString16& value) {
return value.is_null() ? out << "(null)" : out << value.string();
}
} // namespace base

View file

@ -0,0 +1,55 @@
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_NULLABLE_STRING16_H_
#define BASE_STRINGS_NULLABLE_STRING16_H_
#include <iosfwd>
#include "base/base_export.h"
#include "base/optional.h"
#include "base/strings/string16.h"
#include "base/strings/string_util.h"
namespace base {
// This class is a simple wrapper for string16 which also contains a null
// state. This should be used only where the difference between null and
// empty is meaningful.
class BASE_EXPORT NullableString16 {
public:
NullableString16();
NullableString16(const NullableString16& other);
NullableString16(NullableString16&& other);
NullableString16(const string16& string, bool is_null);
explicit NullableString16(Optional<string16> optional_string16);
~NullableString16();
NullableString16& operator=(const NullableString16& other);
NullableString16& operator=(NullableString16&& other);
const string16& string() const {
return string_ ? *string_ : EmptyString16();
}
bool is_null() const { return !string_; }
const Optional<string16>& as_optional_string16() const { return string_; }
private:
Optional<string16> string_;
};
inline bool operator==(const NullableString16& a, const NullableString16& b) {
return a.as_optional_string16() == b.as_optional_string16();
}
inline bool operator!=(const NullableString16& a, const NullableString16& b) {
return !(a == b);
}
BASE_EXPORT std::ostream& operator<<(std::ostream& out,
const NullableString16& value);
} // namespace base
#endif // BASE_STRINGS_NULLABLE_STRING16_H_

View file

@ -0,0 +1,155 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/pattern.h"
#include "base/third_party/icu/icu_utf.h"
namespace base {
namespace {
constexpr bool IsWildcard(base_icu::UChar32 character) {
return character == '*' || character == '?';
}
// Searches for the next subpattern of |pattern| in |string|, up to the given
// |maximum_distance|. The subpattern extends from the start of |pattern| up to
// the first wildcard character (or the end of the string). If the value of
// |maximum_distance| is negative, the maximum distance is considered infinite.
template <typename CHAR, typename NEXT>
constexpr bool SearchForChars(const CHAR** pattern,
const CHAR* pattern_end,
const CHAR** string,
const CHAR* string_end,
int maximum_distance,
NEXT next) {
const CHAR* pattern_start = *pattern;
const CHAR* string_start = *string;
bool escape = false;
while (true) {
if (*pattern == pattern_end) {
// If this is the end of the pattern, only accept the end of the string;
// anything else falls through to the mismatch case.
if (*string == string_end)
return true;
} else {
// If we have found a wildcard, we're done.
if (!escape && IsWildcard(**pattern))
return true;
// Check if the escape character is found. If so, skip it and move to the
// next character.
if (!escape && **pattern == '\\') {
escape = true;
next(pattern, pattern_end);
continue;
}
escape = false;
if (*string == string_end)
return false;
// Check if the chars match, if so, increment the ptrs.
const CHAR* pattern_next = *pattern;
const CHAR* string_next = *string;
base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
if (pattern_char == next(&string_next, string_end) &&
pattern_char != CBU_SENTINEL) {
*pattern = pattern_next;
*string = string_next;
continue;
}
}
// Mismatch. If we have reached the maximum distance, return false,
// otherwise restart at the beginning of the pattern with the next character
// in the string.
// TODO(bauerb): This is a naive implementation of substring search, which
// could be implemented with a more efficient algorithm, e.g.
// Knuth-Morris-Pratt (at the expense of requiring preprocessing).
if (maximum_distance == 0)
return false;
// Because unlimited distance is represented as -1, this will never reach 0
// and therefore fail the match above.
maximum_distance--;
*pattern = pattern_start;
next(&string_start, string_end);
*string = string_start;
}
}
// Consumes consecutive wildcard characters (? or *). Returns the maximum number
// of characters matched by the sequence of wildcards, or -1 if the wildcards
// match an arbitrary number of characters (which is the case if it contains at
// least one *).
template <typename CHAR, typename NEXT>
constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) {
int num_question_marks = 0;
bool has_asterisk = false;
while (*pattern != end) {
if (**pattern == '?') {
num_question_marks++;
} else if (**pattern == '*') {
has_asterisk = true;
} else {
break;
}
next(pattern, end);
}
return has_asterisk ? -1 : num_question_marks;
}
template <typename CHAR, typename NEXT>
constexpr bool MatchPatternT(const CHAR* eval,
const CHAR* eval_end,
const CHAR* pattern,
const CHAR* pattern_end,
NEXT next) {
do {
int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next);
if (!SearchForChars(&pattern, pattern_end, &eval, eval_end,
maximum_wildcard_length, next)) {
return false;
}
} while (pattern != pattern_end);
return true;
}
struct NextCharUTF8 {
base_icu::UChar32 operator()(const char** p, const char* end) {
base_icu::UChar32 c;
int offset = 0;
CBU8_NEXT(*p, offset, end - *p, c);
*p += offset;
return c;
}
};
struct NextCharUTF16 {
base_icu::UChar32 operator()(const char16** p, const char16* end) {
base_icu::UChar32 c;
int offset = 0;
CBU16_NEXT(*p, offset, end - *p, c);
*p += offset;
return c;
}
};
} // namespace
bool MatchPattern(StringPiece eval, StringPiece pattern) {
return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
pattern.data() + pattern.size(), NextCharUTF8());
}
bool MatchPattern(StringPiece16 eval, StringPiece16 pattern) {
return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
pattern.data() + pattern.size(), NextCharUTF16());
}
} // namespace base

View file

@ -0,0 +1,23 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_PATTERN_H_
#define BASE_STRINGS_PATTERN_H_
#include "base/base_export.h"
#include "base/strings/string_piece.h"
namespace base {
// Returns true if the |string| passed in matches the |pattern|. The pattern
// string can contain wildcards like * and ?.
//
// The backslash character (\) is an escape character for * and ?.
// ? matches 0 or 1 character, while * matches 0 or more characters.
BASE_EXPORT bool MatchPattern(StringPiece string, StringPiece pattern);
BASE_EXPORT bool MatchPattern(StringPiece16 string, StringPiece16 pattern);
} // namespace base
#endif // BASE_STRINGS_PATTERN_H_

View file

@ -0,0 +1,682 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/safe_sprintf.h"
#include <errno.h>
#include <string.h>
#include <algorithm>
#include <limits>
#include "base/macros.h"
#include "build/build_config.h"
#if !defined(NDEBUG)
// In debug builds, we use RAW_CHECK() to print useful error messages, if
// SafeSPrintf() is called with broken arguments.
// As our contract promises that SafeSPrintf() can be called from any
// restricted run-time context, it is not actually safe to call logging
// functions from it; and we only ever do so for debug builds and hope for the
// best. We should _never_ call any logging function other than RAW_CHECK(),
// and we should _never_ include any logging code that is active in production
// builds. Most notably, we should not include these logging functions in
// unofficial release builds, even though those builds would otherwise have
// DCHECKS() enabled.
// In other words; please do not remove the #ifdef around this #include.
// Instead, in production builds we opt for returning a degraded result,
// whenever an error is encountered.
// E.g. The broken function call
// SafeSPrintf("errno = %d (%x)", errno, strerror(errno))
// will print something like
// errno = 13, (%x)
// instead of
// errno = 13 (Access denied)
// In most of the anticipated use cases, that's probably the preferred
// behavior.
#include "base/logging.h"
#define DEBUG_CHECK RAW_CHECK
#else
#define DEBUG_CHECK(x) do { if (x) { } } while (0)
#endif
namespace base {
namespace strings {
// The code in this file is extremely careful to be async-signal-safe.
//
// Most obviously, we avoid calling any code that could dynamically allocate
// memory. Doing so would almost certainly result in bugs and dead-locks.
// We also avoid calling any other STL functions that could have unintended
// side-effects involving memory allocation or access to other shared
// resources.
//
// But on top of that, we also avoid calling other library functions, as many
// of them have the side-effect of calling getenv() (in order to deal with
// localization) or accessing errno. The latter sounds benign, but there are
// several execution contexts where it isn't even possible to safely read let
// alone write errno.
//
// The stated design goal of the SafeSPrintf() function is that it can be
// called from any context that can safely call C or C++ code (i.e. anything
// that doesn't require assembly code).
//
// For a brief overview of some but not all of the issues with async-signal-
// safety, refer to:
// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
namespace {
const size_t kSSizeMaxConst = ((size_t)(ssize_t)-1) >> 1;
const char kUpCaseHexDigits[] = "0123456789ABCDEF";
const char kDownCaseHexDigits[] = "0123456789abcdef";
}
#if defined(NDEBUG)
// We would like to define kSSizeMax as std::numeric_limits<ssize_t>::max(),
// but C++ doesn't allow us to do that for constants. Instead, we have to
// use careful casting and shifting. We later use a static_assert to
// verify that this worked correctly.
namespace {
const size_t kSSizeMax = kSSizeMaxConst;
}
#else // defined(NDEBUG)
// For efficiency, we really need kSSizeMax to be a constant. But for unit
// tests, it should be adjustable. This allows us to verify edge cases without
// having to fill the entire available address space. As a compromise, we make
// kSSizeMax adjustable in debug builds, and then only compile that particular
// part of the unit test in debug builds.
namespace {
static size_t kSSizeMax = kSSizeMaxConst;
}
namespace internal {
void SetSafeSPrintfSSizeMaxForTest(size_t max) {
kSSizeMax = max;
}
size_t GetSafeSPrintfSSizeMaxForTest() {
return kSSizeMax;
}
}
#endif // defined(NDEBUG)
namespace {
class Buffer {
public:
// |buffer| is caller-allocated storage that SafeSPrintf() writes to. It
// has |size| bytes of writable storage. It is the caller's responsibility
// to ensure that the buffer is at least one byte in size, so that it fits
// the trailing NUL that will be added by the destructor. The buffer also
// must be smaller or equal to kSSizeMax in size.
Buffer(char* buffer, size_t size)
: buffer_(buffer),
size_(size - 1), // Account for trailing NUL byte
count_(0) {
// MSVS2013's standard library doesn't mark max() as constexpr yet. cl.exe
// supports static_cast but doesn't really implement constexpr yet so it doesn't
// complain, but clang does.
#if __cplusplus >= 201103 && !(defined(__clang__) && defined(OS_WIN))
static_assert(kSSizeMaxConst ==
static_cast<size_t>(std::numeric_limits<ssize_t>::max()),
"kSSizeMaxConst should be the max value of an ssize_t");
#endif
DEBUG_CHECK(size > 0);
DEBUG_CHECK(size <= kSSizeMax);
}
~Buffer() {
// The code calling the constructor guaranteed that there was enough space
// to store a trailing NUL -- and in debug builds, we are actually
// verifying this with DEBUG_CHECK()s in the constructor. So, we can
// always unconditionally write the NUL byte in the destructor. We do not
// need to adjust the count_, as SafeSPrintf() copies snprintf() in not
// including the NUL byte in its return code.
*GetInsertionPoint() = '\000';
}
// Returns true, iff the buffer is filled all the way to |kSSizeMax-1|. The
// caller can now stop adding more data, as GetCount() has reached its
// maximum possible value.
inline bool OutOfAddressableSpace() const {
return count_ == static_cast<size_t>(kSSizeMax - 1);
}
// Returns the number of bytes that would have been emitted to |buffer_|
// if it was sized sufficiently large. This number can be larger than
// |size_|, if the caller provided an insufficiently large output buffer.
// But it will never be bigger than |kSSizeMax-1|.
inline ssize_t GetCount() const {
DEBUG_CHECK(count_ < kSSizeMax);
return static_cast<ssize_t>(count_);
}
// Emits one |ch| character into the |buffer_| and updates the |count_| of
// characters that are currently supposed to be in the buffer.
// Returns "false", iff the buffer was already full.
// N.B. |count_| increases even if no characters have been written. This is
// needed so that GetCount() can return the number of bytes that should
// have been allocated for the |buffer_|.
inline bool Out(char ch) {
if (size_ >= 1 && count_ < size_) {
buffer_[count_] = ch;
return IncrementCountByOne();
}
// |count_| still needs to be updated, even if the buffer has been
// filled completely. This allows SafeSPrintf() to return the number of
// bytes that should have been emitted.
IncrementCountByOne();
return false;
}
// Inserts |padding|-|len| bytes worth of padding into the |buffer_|.
// |count_| will also be incremented by the number of bytes that were meant
// to be emitted. The |pad| character is typically either a ' ' space
// or a '0' zero, but other non-NUL values are legal.
// Returns "false", iff the the |buffer_| filled up (i.e. |count_|
// overflowed |size_|) at any time during padding.
inline bool Pad(char pad, size_t padding, size_t len) {
DEBUG_CHECK(pad);
DEBUG_CHECK(padding <= kSSizeMax);
for (; padding > len; --padding) {
if (!Out(pad)) {
if (--padding) {
IncrementCount(padding-len);
}
return false;
}
}
return true;
}
// POSIX doesn't define any async-signal-safe function for converting
// an integer to ASCII. Define our own version.
//
// This also gives us the ability to make the function a little more
// powerful and have it deal with |padding|, with truncation, and with
// predicting the length of the untruncated output.
//
// IToASCII() converts an integer |i| to ASCII.
//
// Unlike similar functions in the standard C library, it never appends a
// NUL character. This is left for the caller to do.
//
// While the function signature takes a signed int64_t, the code decides at
// run-time whether to treat the argument as signed (int64_t) or as unsigned
// (uint64_t) based on the value of |sign|.
//
// It supports |base|s 2 through 16. Only a |base| of 10 is allowed to have
// a |sign|. Otherwise, |i| is treated as unsigned.
//
// For bases larger than 10, |upcase| decides whether lower-case or upper-
// case letters should be used to designate digits greater than 10.
//
// Padding can be done with either '0' zeros or ' ' spaces. Padding has to
// be positive and will always be applied to the left of the output.
//
// Prepends a |prefix| to the number (e.g. "0x"). This prefix goes to
// the left of |padding|, if |pad| is '0'; and to the right of |padding|
// if |pad| is ' '.
//
// Returns "false", if the |buffer_| overflowed at any time.
bool IToASCII(bool sign, bool upcase, int64_t i, int base,
char pad, size_t padding, const char* prefix);
private:
// Increments |count_| by |inc| unless this would cause |count_| to
// overflow |kSSizeMax-1|. Returns "false", iff an overflow was detected;
// it then clamps |count_| to |kSSizeMax-1|.
inline bool IncrementCount(size_t inc) {
// "inc" is either 1 or a "padding" value. Padding is clamped at
// run-time to at most kSSizeMax-1. So, we know that "inc" is always in
// the range 1..kSSizeMax-1.
// This allows us to compute "kSSizeMax - 1 - inc" without incurring any
// integer overflows.
DEBUG_CHECK(inc <= kSSizeMax - 1);
if (count_ > kSSizeMax - 1 - inc) {
count_ = kSSizeMax - 1;
return false;
}
count_ += inc;
return true;
}
// Convenience method for the common case of incrementing |count_| by one.
inline bool IncrementCountByOne() {
return IncrementCount(1);
}
// Return the current insertion point into the buffer. This is typically
// at |buffer_| + |count_|, but could be before that if truncation
// happened. It always points to one byte past the last byte that was
// successfully placed into the |buffer_|.
inline char* GetInsertionPoint() const {
size_t idx = count_;
if (idx > size_) {
idx = size_;
}
return buffer_ + idx;
}
// User-provided buffer that will receive the fully formatted output string.
char* buffer_;
// Number of bytes that are available in the buffer excluding the trailing
// NUL byte that will be added by the destructor.
const size_t size_;
// Number of bytes that would have been emitted to the buffer, if the buffer
// was sufficiently big. This number always excludes the trailing NUL byte
// and it is guaranteed to never grow bigger than kSSizeMax-1.
size_t count_;
DISALLOW_COPY_AND_ASSIGN(Buffer);
};
bool Buffer::IToASCII(bool sign, bool upcase, int64_t i, int base,
char pad, size_t padding, const char* prefix) {
// Sanity check for parameters. None of these should ever fail, but see
// above for the rationale why we can't call CHECK().
DEBUG_CHECK(base >= 2);
DEBUG_CHECK(base <= 16);
DEBUG_CHECK(!sign || base == 10);
DEBUG_CHECK(pad == '0' || pad == ' ');
DEBUG_CHECK(padding <= kSSizeMax);
DEBUG_CHECK(!(sign && prefix && *prefix));
// Handle negative numbers, if the caller indicated that |i| should be
// treated as a signed number; otherwise treat |i| as unsigned (even if the
// MSB is set!)
// Details are tricky, because of limited data-types, but equivalent pseudo-
// code would look like:
// if (sign && i < 0)
// prefix = "-";
// num = abs(i);
int minint = 0;
uint64_t num;
if (sign && i < 0) {
prefix = "-";
// Turn our number positive.
if (i == std::numeric_limits<int64_t>::min()) {
// The most negative integer needs special treatment.
minint = 1;
num = static_cast<uint64_t>(-(i + 1));
} else {
// "Normal" negative numbers are easy.
num = static_cast<uint64_t>(-i);
}
} else {
num = static_cast<uint64_t>(i);
}
// If padding with '0' zero, emit the prefix or '-' character now. Otherwise,
// make the prefix accessible in reverse order, so that we can later output
// it right between padding and the number.
// We cannot choose the easier approach of just reversing the number, as that
// fails in situations where we need to truncate numbers that have padding
// and/or prefixes.
const char* reverse_prefix = nullptr;
if (prefix && *prefix) {
if (pad == '0') {
while (*prefix) {
if (padding) {
--padding;
}
Out(*prefix++);
}
prefix = nullptr;
} else {
for (reverse_prefix = prefix; *reverse_prefix; ++reverse_prefix) {
}
}
} else
prefix = nullptr;
const size_t prefix_length = reverse_prefix - prefix;
// Loop until we have converted the entire number. Output at least one
// character (i.e. '0').
size_t start = count_;
size_t discarded = 0;
bool started = false;
do {
// Make sure there is still enough space left in our output buffer.
if (count_ >= size_) {
if (start < size_) {
// It is rare that we need to output a partial number. But if asked
// to do so, we will still make sure we output the correct number of
// leading digits.
// Since we are generating the digits in reverse order, we actually
// have to discard digits in the order that we have already emitted
// them. This is essentially equivalent to:
// memmove(buffer_ + start, buffer_ + start + 1, size_ - start - 1)
for (char* move = buffer_ + start, *end = buffer_ + size_ - 1;
move < end;
++move) {
*move = move[1];
}
++discarded;
--count_;
} else if (count_ - size_ > 1) {
// Need to increment either |count_| or |discarded| to make progress.
// The latter is more efficient, as it eventually triggers fast
// handling of padding. But we have to ensure we don't accidentally
// change the overall state (i.e. switch the state-machine from
// discarding to non-discarding). |count_| needs to always stay
// bigger than |size_|.
--count_;
++discarded;
}
}
// Output the next digit and (if necessary) compensate for the most
// negative integer needing special treatment. This works because,
// no matter the bit width of the integer, the lowest-most decimal
// integer always ends in 2, 4, 6, or 8.
if (!num && started) {
if (reverse_prefix > prefix) {
Out(*--reverse_prefix);
} else {
Out(pad);
}
} else {
started = true;
Out((upcase ? kUpCaseHexDigits : kDownCaseHexDigits)[num%base + minint]);
}
minint = 0;
num /= base;
// Add padding, if requested.
if (padding > 0) {
--padding;
// Performance optimization for when we are asked to output excessive
// padding, but our output buffer is limited in size. Even if we output
// a 64bit number in binary, we would never write more than 64 plus
// prefix non-padding characters. So, once this limit has been passed,
// any further state change can be computed arithmetically; we know that
// by this time, our entire final output consists of padding characters
// that have all already been output.
if (discarded > 8*sizeof(num) + prefix_length) {
IncrementCount(padding);
padding = 0;
}
}
} while (num || padding || (reverse_prefix > prefix));
// Conversion to ASCII actually resulted in the digits being in reverse
// order. We can't easily generate them in forward order, as we can't tell
// the number of characters needed until we are done converting.
// So, now, we reverse the string (except for the possible '-' sign).
char* front = buffer_ + start;
char* back = GetInsertionPoint();
while (--back > front) {
char ch = *back;
*back = *front;
*front++ = ch;
}
IncrementCount(discarded);
return !discarded;
}
} // anonymous namespace
namespace internal {
ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt, const Arg* args,
const size_t max_args) {
// Make sure that at least one NUL byte can be written, and that the buffer
// never overflows kSSizeMax. Not only does that use up most or all of the
// address space, it also would result in a return code that cannot be
// represented.
if (static_cast<ssize_t>(sz) < 1)
return -1;
sz = std::min(sz, kSSizeMax);
// Iterate over format string and interpret '%' arguments as they are
// encountered.
Buffer buffer(buf, sz);
size_t padding;
char pad;
for (unsigned int cur_arg = 0; *fmt && !buffer.OutOfAddressableSpace(); ) {
if (*fmt++ == '%') {
padding = 0;
pad = ' ';
char ch = *fmt++;
format_character_found:
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// Found a width parameter. Convert to an integer value and store in
// "padding". If the leading digit is a zero, change the padding
// character from a space ' ' to a zero '0'.
pad = ch == '0' ? '0' : ' ';
for (;;) {
// The maximum allowed padding fills all the available address
// space and leaves just enough space to insert the trailing NUL.
const size_t max_padding = kSSizeMax - 1;
if (padding > max_padding/10 ||
10*padding > max_padding - (ch - '0')) {
DEBUG_CHECK(padding <= max_padding/10 &&
10*padding <= max_padding - (ch - '0'));
// Integer overflow detected. Skip the rest of the width until
// we find the format character, then do the normal error handling.
padding_overflow:
padding = max_padding;
while ((ch = *fmt++) >= '0' && ch <= '9') {
}
if (cur_arg < max_args) {
++cur_arg;
}
goto fail_to_expand;
}
padding = 10*padding + ch - '0';
if (padding > max_padding) {
// This doesn't happen for "sane" values of kSSizeMax. But once
// kSSizeMax gets smaller than about 10, our earlier range checks
// are incomplete. Unittests do trigger this artificial corner
// case.
DEBUG_CHECK(padding <= max_padding);
goto padding_overflow;
}
ch = *fmt++;
if (ch < '0' || ch > '9') {
// Reached the end of the width parameter. This is where the format
// character is found.
goto format_character_found;
}
}
break;
case 'c': { // Output an ASCII character.
// Check that there are arguments left to be inserted.
if (cur_arg >= max_args) {
DEBUG_CHECK(cur_arg < max_args);
goto fail_to_expand;
}
// Check that the argument has the expected type.
const Arg& arg = args[cur_arg++];
if (arg.type != Arg::INT && arg.type != Arg::UINT) {
DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT);
goto fail_to_expand;
}
// Apply padding, if needed.
buffer.Pad(' ', padding, 1);
// Convert the argument to an ASCII character and output it.
char as_char = static_cast<char>(arg.integer.i);
if (!as_char) {
goto end_of_output_buffer;
}
buffer.Out(as_char);
break; }
case 'd': // Output a possibly signed decimal value.
case 'o': // Output an unsigned octal value.
case 'x': // Output an unsigned hexadecimal value.
case 'X':
case 'p': { // Output a pointer value.
// Check that there are arguments left to be inserted.
if (cur_arg >= max_args) {
DEBUG_CHECK(cur_arg < max_args);
goto fail_to_expand;
}
const Arg& arg = args[cur_arg++];
int64_t i;
const char* prefix = nullptr;
if (ch != 'p') {
// Check that the argument has the expected type.
if (arg.type != Arg::INT && arg.type != Arg::UINT) {
DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT);
goto fail_to_expand;
}
i = arg.integer.i;
if (ch != 'd') {
// The Arg() constructor automatically performed sign expansion on
// signed parameters. This is great when outputting a %d decimal
// number, but can result in unexpected leading 0xFF bytes when
// outputting a %x hexadecimal number. Mask bits, if necessary.
// We have to do this here, instead of in the Arg() constructor, as
// the Arg() constructor cannot tell whether we will output a %d
// or a %x. Only the latter should experience masking.
if (arg.integer.width < sizeof(int64_t)) {
i &= (1LL << (8*arg.integer.width)) - 1;
}
}
} else {
// Pointer values require an actual pointer or a string.
if (arg.type == Arg::POINTER) {
i = reinterpret_cast<uintptr_t>(arg.ptr);
} else if (arg.type == Arg::STRING) {
i = reinterpret_cast<uintptr_t>(arg.str);
} else if (arg.type == Arg::INT &&
arg.integer.width == sizeof(NULL) &&
arg.integer.i == 0) { // Allow C++'s version of NULL
i = 0;
} else {
DEBUG_CHECK(arg.type == Arg::POINTER || arg.type == Arg::STRING);
goto fail_to_expand;
}
// Pointers always include the "0x" prefix.
prefix = "0x";
}
// Use IToASCII() to convert to ASCII representation. For decimal
// numbers, optionally print a sign. For hexadecimal numbers,
// distinguish between upper and lower case. %p addresses are always
// printed as upcase. Supports base 8, 10, and 16. Prints padding
// and/or prefixes, if so requested.
buffer.IToASCII(ch == 'd' && arg.type == Arg::INT,
ch != 'x', i,
ch == 'o' ? 8 : ch == 'd' ? 10 : 16,
pad, padding, prefix);
break; }
case 's': {
// Check that there are arguments left to be inserted.
if (cur_arg >= max_args) {
DEBUG_CHECK(cur_arg < max_args);
goto fail_to_expand;
}
// Check that the argument has the expected type.
const Arg& arg = args[cur_arg++];
const char *s;
if (arg.type == Arg::STRING) {
s = arg.str ? arg.str : "<NULL>";
} else if (arg.type == Arg::INT && arg.integer.width == sizeof(NULL) &&
arg.integer.i == 0) { // Allow C++'s version of NULL
s = "<NULL>";
} else {
DEBUG_CHECK(arg.type == Arg::STRING);
goto fail_to_expand;
}
// Apply padding, if needed. This requires us to first check the
// length of the string that we are outputting.
if (padding) {
size_t len = 0;
for (const char* src = s; *src++; ) {
++len;
}
buffer.Pad(' ', padding, len);
}
// Printing a string involves nothing more than copying it into the
// output buffer and making sure we don't output more bytes than
// available space; Out() takes care of doing that.
for (const char* src = s; *src; ) {
buffer.Out(*src++);
}
break; }
case '%':
// Quoted percent '%' character.
goto copy_verbatim;
fail_to_expand:
// C++ gives us tools to do type checking -- something that snprintf()
// could never really do. So, whenever we see arguments that don't
// match up with the format string, we refuse to output them. But
// since we have to be extremely conservative about being async-
// signal-safe, we are limited in the type of error handling that we
// can do in production builds (in debug builds we can use
// DEBUG_CHECK() and hope for the best). So, all we do is pass the
// format string unchanged. That should eventually get the user's
// attention; and in the meantime, it hopefully doesn't lose too much
// data.
default:
// Unknown or unsupported format character. Just copy verbatim to
// output.
buffer.Out('%');
DEBUG_CHECK(ch);
if (!ch) {
goto end_of_format_string;
}
buffer.Out(ch);
break;
}
} else {
copy_verbatim:
buffer.Out(fmt[-1]);
}
}
end_of_format_string:
end_of_output_buffer:
return buffer.GetCount();
}
} // namespace internal
ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt) {
// Make sure that at least one NUL byte can be written, and that the buffer
// never overflows kSSizeMax. Not only does that use up most or all of the
// address space, it also would result in a return code that cannot be
// represented.
if (static_cast<ssize_t>(sz) < 1)
return -1;
sz = std::min(sz, kSSizeMax);
Buffer buffer(buf, sz);
// In the slow-path, we deal with errors by copying the contents of
// "fmt" unexpanded. This means, if there are no arguments passed, the
// SafeSPrintf() function always degenerates to a version of strncpy() that
// de-duplicates '%' characters.
const char* src = fmt;
for (; *src; ++src) {
buffer.Out(*src);
DEBUG_CHECK(src[0] != '%' || src[1] == '%');
if (src[0] == '%' && src[1] == '%') {
++src;
}
}
return buffer.GetCount();
}
} // namespace strings
} // namespace base

View file

@ -0,0 +1,246 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_SAFE_SPRINTF_H_
#define BASE_STRINGS_SAFE_SPRINTF_H_
#include "build/build_config.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#if defined(OS_POSIX) || defined(OS_FUCHSIA)
// For ssize_t
#include <unistd.h>
#endif
#include "base/base_export.h"
namespace base {
namespace strings {
#if defined(COMPILER_MSVC)
// Define ssize_t inside of our namespace.
#if defined(_WIN64)
typedef __int64 ssize_t;
#else
typedef long ssize_t;
#endif
#endif
// SafeSPrintf() is a type-safe and completely self-contained version of
// snprintf().
//
// SafeSNPrintf() is an alternative function signature that can be used when
// not dealing with fixed-sized buffers. When possible, SafeSPrintf() should
// always be used instead of SafeSNPrintf()
//
// These functions allow for formatting complicated messages from contexts that
// require strict async-signal-safety. In fact, it is safe to call them from
// any low-level execution context, as they are guaranteed to make no library
// or system calls. It deliberately never touches "errno", either.
//
// The only exception to this rule is that in debug builds the code calls
// RAW_CHECK() to help diagnose problems when the format string does not
// match the rest of the arguments. In release builds, no CHECK()s are used,
// and SafeSPrintf() instead returns an output string that expands only
// those arguments that match their format characters. Mismatched arguments
// are ignored.
//
// The code currently only supports a subset of format characters:
// %c, %o, %d, %x, %X, %p, and %s.
//
// SafeSPrintf() aims to be as liberal as reasonably possible. Integer-like
// values of arbitrary width can be passed to all of the format characters
// that expect integers. Thus, it is explicitly legal to pass an "int" to
// "%c", and output will automatically look at the LSB only. It is also
// explicitly legal to pass either signed or unsigned values, and the format
// characters will automatically interpret the arguments accordingly.
//
// It is still not legal to mix-and-match integer-like values with pointer
// values. For instance, you cannot pass a pointer to %x, nor can you pass an
// integer to %p.
//
// The one exception is "0" zero being accepted by "%p". This works-around
// the problem of C++ defining NULL as an integer-like value.
//
// All format characters take an optional width parameter. This must be a
// positive integer. For %d, %o, %x, %X and %p, if the width starts with
// a leading '0', padding is done with '0' instead of ' ' characters.
//
// There are a few features of snprintf()-style format strings, that
// SafeSPrintf() does not support at this time.
//
// If an actual user showed up, there is no particularly strong reason they
// couldn't be added. But that assumes that the trade-offs between complexity
// and utility are favorable.
//
// For example, adding support for negative padding widths, and for %n are all
// likely to be viewed positively. They are all clearly useful, low-risk, easy
// to test, don't jeopardize the async-signal-safety of the code, and overall
// have little impact on other parts of SafeSPrintf() function.
//
// On the other hands, adding support for alternate forms, positional
// arguments, grouping, wide characters, localization or floating point numbers
// are all unlikely to ever be added.
//
// SafeSPrintf() and SafeSNPrintf() mimic the behavior of snprintf() and they
// return the number of bytes needed to store the untruncated output. This
// does *not* include the terminating NUL byte.
//
// They return -1, iff a fatal error happened. This typically can only happen,
// if the buffer size is a) negative, or b) zero (i.e. not even the NUL byte
// can be written). The return value can never be larger than SSIZE_MAX-1.
// This ensures that the caller can always add one to the signed return code
// in order to determine the amount of storage that needs to be allocated.
//
// While the code supports type checking and while it is generally very careful
// to avoid printing incorrect values, it tends to be conservative in printing
// as much as possible, even when given incorrect parameters. Typically, in
// case of an error, the format string will not be expanded. (i.e. something
// like SafeSPrintf(buf, "%p %d", 1, 2) results in "%p 2"). See above for
// the use of RAW_CHECK() in debug builds, though.
//
// Basic example:
// char buf[20];
// base::strings::SafeSPrintf(buf, "The answer: %2d", 42);
//
// Example with dynamically sized buffer (async-signal-safe). This code won't
// work on Visual studio, as it requires dynamically allocating arrays on the
// stack. Consider picking a smaller value for |kMaxSize| if stack size is
// limited and known. On the other hand, if the parameters to SafeSNPrintf()
// are trusted and not controllable by the user, you can consider eliminating
// the check for |kMaxSize| altogether. The current value of SSIZE_MAX is
// essentially a no-op that just illustrates how to implement an upper bound:
// const size_t kInitialSize = 128;
// const size_t kMaxSize = std::numeric_limits<ssize_t>::max();
// size_t size = kInitialSize;
// for (;;) {
// char buf[size];
// size = SafeSNPrintf(buf, size, "Error message \"%s\"\n", err) + 1;
// if (sizeof(buf) < kMaxSize && size > kMaxSize) {
// size = kMaxSize;
// continue;
// } else if (size > sizeof(buf))
// continue;
// write(2, buf, size-1);
// break;
// }
namespace internal {
// Helpers that use C++ overloading, templates, and specializations to deduce
// and record type information from function arguments. This allows us to
// later write a type-safe version of snprintf().
struct Arg {
enum Type { INT, UINT, STRING, POINTER };
// Any integer-like value.
Arg(signed char c) : type(INT) {
integer.i = c;
integer.width = sizeof(char);
}
Arg(unsigned char c) : type(UINT) {
integer.i = c;
integer.width = sizeof(char);
}
Arg(signed short j) : type(INT) {
integer.i = j;
integer.width = sizeof(short);
}
Arg(unsigned short j) : type(UINT) {
integer.i = j;
integer.width = sizeof(short);
}
Arg(signed int j) : type(INT) {
integer.i = j;
integer.width = sizeof(int);
}
Arg(unsigned int j) : type(UINT) {
integer.i = j;
integer.width = sizeof(int);
}
Arg(signed long j) : type(INT) {
integer.i = j;
integer.width = sizeof(long);
}
Arg(unsigned long j) : type(UINT) {
integer.i = j;
integer.width = sizeof(long);
}
Arg(signed long long j) : type(INT) {
integer.i = j;
integer.width = sizeof(long long);
}
Arg(unsigned long long j) : type(UINT) {
integer.i = j;
integer.width = sizeof(long long);
}
// A C-style text string.
Arg(const char* s) : str(s), type(STRING) { }
Arg(char* s) : str(s), type(STRING) { }
// Any pointer value that can be cast to a "void*".
template<class T> Arg(T* p) : ptr((void*)p), type(POINTER) { }
union {
// An integer-like value.
struct {
int64_t i;
unsigned char width;
} integer;
// A C-style text string.
const char* str;
// A pointer to an arbitrary object.
const void* ptr;
};
const enum Type type;
};
// This is the internal function that performs the actual formatting of
// an snprintf()-style format string.
BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt,
const Arg* args, size_t max_args);
#if !defined(NDEBUG)
// In debug builds, allow unit tests to artificially lower the kSSizeMax
// constant that is used as a hard upper-bound for all buffers. In normal
// use, this constant should always be std::numeric_limits<ssize_t>::max().
BASE_EXPORT void SetSafeSPrintfSSizeMaxForTest(size_t max);
BASE_EXPORT size_t GetSafeSPrintfSSizeMaxForTest();
#endif
} // namespace internal
template<typename... Args>
ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt, Args... args) {
// Use Arg() object to record type information and then copy arguments to an
// array to make it easier to iterate over them.
const internal::Arg arg_array[] = { args... };
return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args));
}
template<size_t N, typename... Args>
ssize_t SafeSPrintf(char (&buf)[N], const char* fmt, Args... args) {
// Use Arg() object to record type information and then copy arguments to an
// array to make it easier to iterate over them.
const internal::Arg arg_array[] = { args... };
return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args));
}
// Fast-path when we don't actually need to substitute any arguments.
BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt);
template<size_t N>
inline ssize_t SafeSPrintf(char (&buf)[N], const char* fmt) {
return SafeSNPrintf(buf, N, fmt);
}
} // namespace strings
} // namespace base
#endif // BASE_STRINGS_SAFE_SPRINTF_H_

View file

@ -0,0 +1,85 @@
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/strcat.h"
namespace base {
namespace {
// Reserves an additional amount of capacity in the given string, growing by at
// least 2x if necessary. Used by StrAppendT().
//
// The "at least 2x" growing rule duplicates the exponential growth of
// std::string. The problem is that most implementations of reserve() will grow
// exactly to the requested amount instead of exponentially growing like would
// happen when appending normally. If we didn't do this, an append after the
// call to StrAppend() would definitely cause a reallocation, and loops with
// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want
// StrAppend() to have the same semantics as std::string::append().
template <typename String>
void ReserveAdditionalIfNeeded(String* str,
typename String::size_type additional) {
const size_t required = str->size() + additional;
// Check whether we need to reserve additional capacity at all.
if (required <= str->capacity())
return;
str->reserve(std::max(required, str->capacity() * 2));
}
template <typename DestString, typename InputString>
void StrAppendT(DestString* dest, span<const InputString> pieces) {
size_t additional_size = 0;
for (const auto& cur : pieces)
additional_size += cur.size();
ReserveAdditionalIfNeeded(dest, additional_size);
for (const auto& cur : pieces)
dest->append(cur.data(), cur.size());
}
} // namespace
std::string StrCat(span<const StringPiece> pieces) {
std::string result;
StrAppendT(&result, pieces);
return result;
}
string16 StrCat(span<const StringPiece16> pieces) {
string16 result;
StrAppendT(&result, pieces);
return result;
}
std::string StrCat(span<const std::string> pieces) {
std::string result;
StrAppendT(&result, pieces);
return result;
}
string16 StrCat(span<const string16> pieces) {
string16 result;
StrAppendT(&result, pieces);
return result;
}
void StrAppend(std::string* dest, span<const StringPiece> pieces) {
StrAppendT(dest, pieces);
}
void StrAppend(string16* dest, span<const StringPiece16> pieces) {
StrAppendT(dest, pieces);
}
void StrAppend(std::string* dest, span<const std::string> pieces) {
StrAppendT(dest, pieces);
}
void StrAppend(string16* dest, span<const string16> pieces) {
StrAppendT(dest, pieces);
}
} // namespace base

View file

@ -0,0 +1,103 @@
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRCAT_H_
#define BASE_STRINGS_STRCAT_H_
#include <initializer_list>
#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/containers/span.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
#if defined(OS_WIN)
// Guard against conflict with Win32 API StrCat macro:
// check StrCat wasn't and will not be redefined.
#define StrCat StrCat
#endif
namespace base {
// StrCat ----------------------------------------------------------------------
//
// StrCat is a function to perform concatenation on a sequence of strings.
// It is preferrable to a sequence of "a + b + c" because it is both faster and
// generates less code.
//
// std::string result = base::StrCat({"foo ", result, "\nfoo ", bar});
//
// To join an array of strings with a separator, see base::JoinString in
// base/strings/string_util.h.
//
// MORE INFO
//
// StrCat can see all arguments at once, so it can allocate one return buffer
// of exactly the right size and copy once, as opposed to a sequence of
// operator+ which generates a series of temporary strings, copying as it goes.
// And by using StringPiece arguments, StrCat can avoid creating temporary
// string objects for char* constants.
//
// ALTERNATIVES
//
// Internal Google / Abseil has a similar StrCat function. That version takes
// an overloaded number of arguments instead of initializer list (overflowing
// to initializer list for many arguments). We don't have any legacy
// requirements and using only initializer_list is simpler and generates
// roughly the same amount of code at the call sites.
//
// Abseil's StrCat also allows numbers by using an intermediate class that can
// be implicitly constructed from either a string or various number types. This
// class formats the numbers into a static buffer for increased performance,
// and the call sites look nice.
//
// As-written Abseil's helper class for numbers generates slightly more code
// than the raw StringPiece version. We can de-inline the helper class'
// constructors which will cause the StringPiece constructors to be de-inlined
// for this call and generate slightly less code. This is something we can
// explore more in the future.
BASE_EXPORT std::string StrCat(span<const StringPiece> pieces)
WARN_UNUSED_RESULT;
BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces)
WARN_UNUSED_RESULT;
BASE_EXPORT std::string StrCat(span<const std::string> pieces)
WARN_UNUSED_RESULT;
BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT;
// Initializer list forwards to the array version.
inline std::string StrCat(std::initializer_list<StringPiece> pieces) {
return StrCat(make_span(pieces.begin(), pieces.size()));
}
inline string16 StrCat(std::initializer_list<StringPiece16> pieces) {
return StrCat(make_span(pieces.begin(), pieces.size()));
}
// StrAppend -------------------------------------------------------------------
//
// Appends a sequence of strings to a destination. Prefer:
// StrAppend(&foo, ...);
// over:
// foo += StrCat(...);
// because it avoids a temporary string allocation and copy.
BASE_EXPORT void StrAppend(std::string* dest, span<const StringPiece> pieces);
BASE_EXPORT void StrAppend(string16* dest, span<const StringPiece16> pieces);
BASE_EXPORT void StrAppend(std::string* dest, span<const std::string> pieces);
BASE_EXPORT void StrAppend(string16* dest, span<const string16> pieces);
// Initializer list forwards to the array version.
inline void StrAppend(std::string* dest,
std::initializer_list<StringPiece> pieces) {
return StrAppend(dest, make_span(pieces.begin(), pieces.size()));
}
inline void StrAppend(string16* dest,
std::initializer_list<StringPiece16> pieces) {
return StrAppend(dest, make_span(pieces.begin(), pieces.size()));
}
} // namespace base
#endif // BASE_STRINGS_STRCAT_H_

View file

@ -0,0 +1,87 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string16.h"
#if defined(WCHAR_T_IS_UTF16) && !defined(_AIX)
#error This file should not be used on 2-byte wchar_t systems
// If this winds up being needed on 2-byte wchar_t systems, either the
// definitions below can be used, or the host system's wide character
// functions like wmemcmp can be wrapped.
#elif defined(WCHAR_T_IS_UTF32)
#include <ostream>
#include "base/strings/string_piece.h"
namespace base {
int c16memcmp(const char16* s1, const char16* s2, size_t n) {
// We cannot call memcmp because that changes the semantics.
while (n-- > 0) {
if (*s1 != *s2) {
// We cannot use (*s1 - *s2) because char16 is unsigned.
return ((*s1 < *s2) ? -1 : 1);
}
++s1;
++s2;
}
return 0;
}
size_t c16len(const char16* s) {
const char16 *s_orig = s;
while (*s) {
++s;
}
return s - s_orig;
}
const char16* c16memchr(const char16* s, char16 c, size_t n) {
while (n-- > 0) {
if (*s == c) {
return s;
}
++s;
}
return nullptr;
}
char16* c16memmove(char16* s1, const char16* s2, size_t n) {
return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
}
char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
}
char16* c16memset(char16* s, char16 c, size_t n) {
char16 *s_orig = s;
while (n-- > 0) {
*s = c;
++s;
}
return s_orig;
}
namespace string16_internals {
std::ostream& operator<<(std::ostream& out, const string16& str) {
return out << base::StringPiece16(str);
}
void PrintTo(const string16& str, std::ostream* out) {
*out << str;
}
} // namespace string16_internals
} // namespace base
template class std::
basic_string<base::char16, base::string16_internals::string16_char_traits>;
#endif // WCHAR_T_IS_UTF32

View file

@ -0,0 +1,229 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRING16_H_
#define BASE_STRINGS_STRING16_H_
// WHAT:
// A version of std::basic_string that provides 2-byte characters even when
// wchar_t is not implemented as a 2-byte type. You can access this class as
// string16. We also define char16, which string16 is based upon.
//
// WHY:
// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
// data. Plenty of existing code operates on strings encoded as UTF-16.
//
// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
// at run time, because it calls some functions (like wcslen) that come from
// the system's native C library -- which was built with a 4-byte wchar_t!
// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
// entirely improper on those systems where the encoding of wchar_t is defined
// as UTF-32.
//
// Here, we define string16, which is similar to std::wstring but replaces all
// libc functions with custom, 2-byte-char compatible routines. It is capable
// of carrying UTF-16-encoded data.
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <functional>
#include <string>
#include "base/base_export.h"
#include "build/build_config.h"
#if defined(WCHAR_T_IS_UTF16)
// Define a macro for wrapping construction of char16 arrays and string16s from
// a literal string. This indirection allows for an easier migration of
// base::char16 to char16_t on platforms where WCHAR_T_IS_UTF16, as only a one
// character change to the macro will be necessary.
// This macro does not exist when WCHAR_T_IS_UTF32, as it is currently not
// possible to create a char array form a literal in this case.
// TODO(https://crbug.com/911896): Remove this macro once base::char16 is
// char16_t on all platforms.
#define STRING16_LITERAL(x) L##x
namespace base {
typedef wchar_t char16;
typedef std::wstring string16;
} // namespace base
#elif defined(WCHAR_T_IS_UTF32)
#include <wchar.h> // for mbstate_t
namespace base {
typedef uint16_t char16;
// char16 versions of the functions required by string16_char_traits; these
// are based on the wide character functions of similar names ("w" or "wcs"
// instead of "c16").
BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
BASE_EXPORT size_t c16len(const char16* s);
BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n);
BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n);
BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n);
// This namespace contains the implementation of base::string16 along with
// things that need to be found via argument-dependent lookup from a
// base::string16.
namespace string16_internals {
struct string16_char_traits {
typedef char16 char_type;
typedef int int_type;
// int_type needs to be able to hold each possible value of char_type, and in
// addition, the distinct value of eof().
static_assert(sizeof(int_type) > sizeof(char_type),
"int must be larger than 16 bits wide");
typedef std::streamoff off_type;
typedef mbstate_t state_type;
typedef std::fpos<state_type> pos_type;
static void assign(char_type& c1, const char_type& c2) {
c1 = c2;
}
static bool eq(const char_type& c1, const char_type& c2) {
return c1 == c2;
}
static bool lt(const char_type& c1, const char_type& c2) {
return c1 < c2;
}
static int compare(const char_type* s1, const char_type* s2, size_t n) {
return c16memcmp(s1, s2, n);
}
static size_t length(const char_type* s) {
return c16len(s);
}
static const char_type* find(const char_type* s, size_t n,
const char_type& a) {
return c16memchr(s, a, n);
}
static char_type* move(char_type* s1, const char_type* s2, size_t n) {
return c16memmove(s1, s2, n);
}
static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
return c16memcpy(s1, s2, n);
}
static char_type* assign(char_type* s, size_t n, char_type a) {
return c16memset(s, a, n);
}
static int_type not_eof(const int_type& c) {
return eq_int_type(c, eof()) ? 0 : c;
}
static char_type to_char_type(const int_type& c) {
return char_type(c);
}
static int_type to_int_type(const char_type& c) {
return int_type(c);
}
static bool eq_int_type(const int_type& c1, const int_type& c2) {
return c1 == c2;
}
static int_type eof() {
return static_cast<int_type>(EOF);
}
};
} // namespace string16_internals
typedef std::basic_string<char16,
base::string16_internals::string16_char_traits>
string16;
namespace string16_internals {
BASE_EXPORT extern std::ostream& operator<<(std::ostream& out,
const string16& str);
// This is required by googletest to print a readable output on test failures.
BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out);
} // namespace string16_internals
} // namespace base
// The string class will be explicitly instantiated only once, in string16.cc.
//
// std::basic_string<> in GNU libstdc++ contains a static data member,
// _S_empty_rep_storage, to represent empty strings. When an operation such
// as assignment or destruction is performed on a string, causing its existing
// data member to be invalidated, it must not be freed if this static data
// member is being used. Otherwise, it counts as an attempt to free static
// (and not allocated) data, which is a memory error.
//
// Generally, due to C++ template magic, _S_empty_rep_storage will be marked
// as a coalesced symbol, meaning that the linker will combine multiple
// instances into a single one when generating output.
//
// If a string class is used by multiple shared libraries, a problem occurs.
// Each library will get its own copy of _S_empty_rep_storage. When strings
// are passed across a library boundary for alteration or destruction, memory
// errors will result. GNU libstdc++ contains a configuration option,
// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which
// disables the static data member optimization, but it's a good optimization
// and non-STL code is generally at the mercy of the system's STL
// configuration. Fully-dynamic strings are not the default for GNU libstdc++
// libstdc++ itself or for the libstdc++ installations on the systems we care
// about, such as Mac OS X and relevant flavors of Linux.
//
// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 .
//
// To avoid problems, string classes need to be explicitly instantiated only
// once, in exactly one library. All other string users see it via an "extern"
// declaration. This is precisely how GNU libstdc++ handles
// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring).
//
// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2),
// in which the linker does not fully coalesce symbols when dead code
// stripping is enabled. This bug causes the memory errors described above
// to occur even when a std::basic_string<> does not cross shared library
// boundaries, such as in statically-linked executables.
//
// TODO(mark): File this bug with Apple and update this note with a bug number.
extern template class BASE_EXPORT
std::basic_string<base::char16,
base::string16_internals::string16_char_traits>;
// Specialize std::hash for base::string16. Although the style guide forbids
// this in general, it is necessary for consistency with WCHAR_T_IS_UTF16
// platforms, where base::string16 is a type alias for std::wstring.
namespace std {
template <>
struct hash<base::string16> {
std::size_t operator()(const base::string16& s) const {
std::size_t result = 0;
for (base::char16 c : s)
result = (result * 131) + c;
return result;
}
};
} // namespace std
#endif // WCHAR_T_IS_UTF32
#endif // BASE_STRINGS_STRING16_H_

View file

@ -0,0 +1,545 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string_number_conversions.h"
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <wctype.h>
#include <limits>
#include <type_traits>
#include "base/logging.h"
#include "base/no_destructor.h"
#include "base/numerics/safe_math.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/third_party/double_conversion/double-conversion/double-conversion.h"
namespace base {
namespace {
template <typename STR, typename INT>
struct IntToStringT {
static STR IntToString(INT value) {
// log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
// So round up to allocate 3 output characters per byte, plus 1 for '-'.
const size_t kOutputBufSize =
3 * sizeof(INT) + std::numeric_limits<INT>::is_signed;
// Create the string in a temporary buffer, write it back to front, and
// then return the substr of what we ended up using.
using CHR = typename STR::value_type;
CHR outbuf[kOutputBufSize];
// The ValueOrDie call below can never fail, because UnsignedAbs is valid
// for all valid inputs.
typename std::make_unsigned<INT>::type res =
CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie();
CHR* end = outbuf + kOutputBufSize;
CHR* i = end;
do {
--i;
DCHECK(i != outbuf);
*i = static_cast<CHR>((res % 10) + '0');
res /= 10;
} while (res != 0);
if (IsValueNegative(value)) {
--i;
DCHECK(i != outbuf);
*i = static_cast<CHR>('-');
}
return STR(i, end);
}
};
// Utility to convert a character to a digit in a given base
template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
};
// Faster specialization for bases <= 10
template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
public:
static bool Convert(CHAR c, uint8_t* digit) {
if (c >= '0' && c < '0' + BASE) {
*digit = static_cast<uint8_t>(c - '0');
return true;
}
return false;
}
};
// Specialization for bases where 10 < base <= 36
template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
public:
static bool Convert(CHAR c, uint8_t* digit) {
if (c >= '0' && c <= '9') {
*digit = c - '0';
} else if (c >= 'a' && c < 'a' + BASE - 10) {
*digit = c - 'a' + 10;
} else if (c >= 'A' && c < 'A' + BASE - 10) {
*digit = c - 'A' + 10;
} else {
return false;
}
return true;
}
};
template <int BASE, typename CHAR>
bool CharToDigit(CHAR c, uint8_t* digit) {
return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
}
// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it
// is locale independent, whereas the functions we are replacing were
// locale-dependent. TBD what is desired, but for the moment let's not
// introduce a change in behaviour.
template<typename CHAR> class WhitespaceHelper {
};
template<> class WhitespaceHelper<char> {
public:
static bool Invoke(char c) {
return 0 != isspace(static_cast<unsigned char>(c));
}
};
template<> class WhitespaceHelper<char16> {
public:
static bool Invoke(char16 c) {
return 0 != iswspace(c);
}
};
template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
return WhitespaceHelper<CHAR>::Invoke(c);
}
// IteratorRangeToNumberTraits should provide:
// - a typedef for iterator_type, the iterator type used as input.
// - a typedef for value_type, the target numeric type.
// - static functions min, max (returning the minimum and maximum permitted
// values)
// - constant kBase, the base in which to interpret the input
template<typename IteratorRangeToNumberTraits>
class IteratorRangeToNumber {
public:
typedef IteratorRangeToNumberTraits traits;
typedef typename traits::iterator_type const_iterator;
typedef typename traits::value_type value_type;
// Generalized iterator-range-to-number conversion.
//
static bool Invoke(const_iterator begin,
const_iterator end,
value_type* output) {
bool valid = true;
while (begin != end && LocalIsWhitespace(*begin)) {
valid = false;
++begin;
}
if (begin != end && *begin == '-') {
if (!std::numeric_limits<value_type>::is_signed) {
*output = 0;
valid = false;
} else if (!Negative::Invoke(begin + 1, end, output)) {
valid = false;
}
} else {
if (begin != end && *begin == '+') {
++begin;
}
if (!Positive::Invoke(begin, end, output)) {
valid = false;
}
}
return valid;
}
private:
// Sign provides:
// - a static function, CheckBounds, that determines whether the next digit
// causes an overflow/underflow
// - a static function, Increment, that appends the next digit appropriately
// according to the sign of the number being parsed.
template<typename Sign>
class Base {
public:
static bool Invoke(const_iterator begin, const_iterator end,
typename traits::value_type* output) {
*output = 0;
if (begin == end) {
return false;
}
// Note: no performance difference was found when using template
// specialization to remove this check in bases other than 16
if (traits::kBase == 16 && end - begin > 2 && *begin == '0' &&
(*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
begin += 2;
}
for (const_iterator current = begin; current != end; ++current) {
uint8_t new_digit = 0;
if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
return false;
}
if (current != begin) {
if (!Sign::CheckBounds(output, new_digit)) {
return false;
}
*output *= traits::kBase;
}
Sign::Increment(new_digit, output);
}
return true;
}
};
class Positive : public Base<Positive> {
public:
static bool CheckBounds(value_type* output, uint8_t new_digit) {
if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
(*output == static_cast<value_type>(traits::max() / traits::kBase) &&
new_digit > traits::max() % traits::kBase)) {
*output = traits::max();
return false;
}
return true;
}
static void Increment(uint8_t increment, value_type* output) {
*output += increment;
}
};
class Negative : public Base<Negative> {
public:
static bool CheckBounds(value_type* output, uint8_t new_digit) {
if (*output < traits::min() / traits::kBase ||
(*output == traits::min() / traits::kBase &&
new_digit > 0 - traits::min() % traits::kBase)) {
*output = traits::min();
return false;
}
return true;
}
static void Increment(uint8_t increment, value_type* output) {
*output -= increment;
}
};
};
template<typename ITERATOR, typename VALUE, int BASE>
class BaseIteratorRangeToNumberTraits {
public:
typedef ITERATOR iterator_type;
typedef VALUE value_type;
static value_type min() {
return std::numeric_limits<value_type>::min();
}
static value_type max() {
return std::numeric_limits<value_type>::max();
}
static const int kBase = BASE;
};
template<typename ITERATOR>
class BaseHexIteratorRangeToIntTraits
: public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
};
template <typename ITERATOR>
class BaseHexIteratorRangeToUIntTraits
: public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {};
template <typename ITERATOR>
class BaseHexIteratorRangeToInt64Traits
: public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {};
template <typename ITERATOR>
class BaseHexIteratorRangeToUInt64Traits
: public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {};
typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator>
HexIteratorRangeToIntTraits;
typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator>
HexIteratorRangeToUIntTraits;
typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator>
HexIteratorRangeToInt64Traits;
typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator>
HexIteratorRangeToUInt64Traits;
template <typename VALUE, int BASE>
class StringPieceToNumberTraits
: public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator,
VALUE,
BASE> {
};
template <typename VALUE>
bool StringToIntImpl(StringPiece input, VALUE* output) {
return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke(
input.begin(), input.end(), output);
}
template <typename VALUE, int BASE>
class StringPiece16ToNumberTraits
: public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator,
VALUE,
BASE> {
};
template <typename VALUE>
bool String16ToIntImpl(StringPiece16 input, VALUE* output) {
return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke(
input.begin(), input.end(), output);
}
} // namespace
std::string NumberToString(int value) {
return IntToStringT<std::string, int>::IntToString(value);
}
string16 NumberToString16(int value) {
return IntToStringT<string16, int>::IntToString(value);
}
std::string NumberToString(unsigned value) {
return IntToStringT<std::string, unsigned>::IntToString(value);
}
string16 NumberToString16(unsigned value) {
return IntToStringT<string16, unsigned>::IntToString(value);
}
std::string NumberToString(long value) {
return IntToStringT<std::string, long>::IntToString(value);
}
string16 NumberToString16(long value) {
return IntToStringT<string16, long>::IntToString(value);
}
std::string NumberToString(unsigned long value) {
return IntToStringT<std::string, unsigned long>::IntToString(value);
}
string16 NumberToString16(unsigned long value) {
return IntToStringT<string16, unsigned long>::IntToString(value);
}
std::string NumberToString(long long value) {
return IntToStringT<std::string, long long>::IntToString(value);
}
string16 NumberToString16(long long value) {
return IntToStringT<string16, long long>::IntToString(value);
}
std::string NumberToString(unsigned long long value) {
return IntToStringT<std::string, unsigned long long>::IntToString(value);
}
string16 NumberToString16(unsigned long long value) {
return IntToStringT<string16, unsigned long long>::IntToString(value);
}
static const double_conversion::DoubleToStringConverter*
GetDoubleToStringConverter() {
static NoDestructor<double_conversion::DoubleToStringConverter> converter(
double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
nullptr, nullptr, 'e', -6, 12, 0, 0);
return converter.get();
}
std::string NumberToString(double value) {
char buffer[32];
double_conversion::StringBuilder builder(buffer, sizeof(buffer));
GetDoubleToStringConverter()->ToShortest(value, &builder);
return std::string(buffer, builder.position());
}
base::string16 NumberToString16(double value) {
char buffer[32];
double_conversion::StringBuilder builder(buffer, sizeof(buffer));
GetDoubleToStringConverter()->ToShortest(value, &builder);
// The number will be ASCII. This creates the string using the "input
// iterator" variant which promotes from 8-bit to 16-bit via "=".
return base::string16(&buffer[0], &buffer[builder.position()]);
}
bool StringToInt(StringPiece input, int* output) {
return StringToIntImpl(input, output);
}
bool StringToInt(StringPiece16 input, int* output) {
return String16ToIntImpl(input, output);
}
bool StringToUint(StringPiece input, unsigned* output) {
return StringToIntImpl(input, output);
}
bool StringToUint(StringPiece16 input, unsigned* output) {
return String16ToIntImpl(input, output);
}
bool StringToInt64(StringPiece input, int64_t* output) {
return StringToIntImpl(input, output);
}
bool StringToInt64(StringPiece16 input, int64_t* output) {
return String16ToIntImpl(input, output);
}
bool StringToUint64(StringPiece input, uint64_t* output) {
return StringToIntImpl(input, output);
}
bool StringToUint64(StringPiece16 input, uint64_t* output) {
return String16ToIntImpl(input, output);
}
bool StringToSizeT(StringPiece input, size_t* output) {
return StringToIntImpl(input, output);
}
bool StringToSizeT(StringPiece16 input, size_t* output) {
return String16ToIntImpl(input, output);
}
template <typename STRING, typename CHAR>
bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) {
static NoDestructor<double_conversion::StringToDoubleConverter> converter(
double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
0.0, 0, nullptr, nullptr);
int processed_characters_count;
*output = converter->StringToDouble(data, input.size(),
&processed_characters_count);
// Cases to return false:
// - If the input string is empty, there was nothing to parse.
// - If the value saturated to HUGE_VAL.
// - If the entire string was not processed, there are either characters
// remaining in the string after a parsed number, or the string does not
// begin with a parseable number.
// - If the first character is a space, there was leading whitespace
return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL &&
static_cast<size_t>(processed_characters_count) == input.size() &&
!IsUnicodeWhitespace(input[0]);
}
bool StringToDouble(StringPiece input, double* output) {
return StringToDoubleImpl(input, input.data(), output);
}
bool StringToDouble(StringPiece16 input, double* output) {
return StringToDoubleImpl(
input, reinterpret_cast<const uint16_t*>(input.data()), output);
}
std::string HexEncode(const void* bytes, size_t size) {
static const char kHexChars[] = "0123456789ABCDEF";
// Each input byte creates two output hex characters.
std::string ret(size * 2, '\0');
for (size_t i = 0; i < size; ++i) {
char b = reinterpret_cast<const char*>(bytes)[i];
ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
ret[(i * 2) + 1] = kHexChars[b & 0xf];
}
return ret;
}
std::string HexEncode(base::span<const uint8_t> bytes) {
return HexEncode(bytes.data(), bytes.size());
}
bool HexStringToInt(StringPiece input, int* output) {
return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
input.begin(), input.end(), output);
}
bool HexStringToUInt(StringPiece input, uint32_t* output) {
return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke(
input.begin(), input.end(), output);
}
bool HexStringToInt64(StringPiece input, int64_t* output) {
return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke(
input.begin(), input.end(), output);
}
bool HexStringToUInt64(StringPiece input, uint64_t* output) {
return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke(
input.begin(), input.end(), output);
}
template <typename Container>
static bool HexStringToByteContainer(StringPiece input, Container* output) {
DCHECK_EQ(output->size(), 0u);
size_t count = input.size();
if (count == 0 || (count % 2) != 0)
return false;
for (uintptr_t i = 0; i < count / 2; ++i) {
uint8_t msb = 0; // most significant 4 bits
uint8_t lsb = 0; // least significant 4 bits
if (!CharToDigit<16>(input[i * 2], &msb) ||
!CharToDigit<16>(input[i * 2 + 1], &lsb)) {
return false;
}
output->push_back((msb << 4) | lsb);
}
return true;
}
bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) {
return HexStringToByteContainer(input, output);
}
bool HexStringToString(StringPiece input, std::string* output) {
return HexStringToByteContainer(input, output);
}
bool HexStringToSpan(StringPiece input, base::span<uint8_t> output) {
size_t count = input.size();
if (count == 0 || (count % 2) != 0)
return false;
if (count / 2 != output.size())
return false;
for (uintptr_t i = 0; i < count / 2; ++i) {
uint8_t msb = 0; // most significant 4 bits
uint8_t lsb = 0; // least significant 4 bits
if (!CharToDigit<16>(input[i * 2], &msb) ||
!CharToDigit<16>(input[i * 2 + 1], &lsb)) {
return false;
}
output[i] = (msb << 4) | lsb;
}
return true;
}
} // namespace base

View file

@ -0,0 +1,157 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <vector>
#include "base/base_export.h"
#include "base/containers/span.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
// ----------------------------------------------------------------------------
// IMPORTANT MESSAGE FROM YOUR SPONSOR
//
// This file contains no "wstring" variants. New code should use string16. If
// you need to make old code work, use the UTF8 version and convert. Please do
// not add wstring variants.
//
// Please do not add "convenience" functions for converting strings to integers
// that return the value and ignore success/failure. That encourages people to
// write code that doesn't properly handle the error conditions.
//
// DO NOT use these functions in any UI unless it's NOT localized on purpose.
// Instead, use base::MessageFormatter for a complex message with numbers
// (integer, float, double) embedded or base::Format{Number,Double,Percent} to
// just format a single number/percent. Note that some languages use native
// digits instead of ASCII digits while others use a group separator or decimal
// point different from ',' and '.'. Using these functions in the UI would lead
// numbers to be formatted in a non-native way.
// ----------------------------------------------------------------------------
namespace base {
// Number -> string conversions ------------------------------------------------
// Ignores locale! see warning above.
BASE_EXPORT std::string NumberToString(int value);
BASE_EXPORT string16 NumberToString16(int value);
BASE_EXPORT std::string NumberToString(unsigned int value);
BASE_EXPORT string16 NumberToString16(unsigned int value);
BASE_EXPORT std::string NumberToString(long value);
BASE_EXPORT string16 NumberToString16(long value);
BASE_EXPORT std::string NumberToString(unsigned long value);
BASE_EXPORT string16 NumberToString16(unsigned long value);
BASE_EXPORT std::string NumberToString(long long value);
BASE_EXPORT string16 NumberToString16(long long value);
BASE_EXPORT std::string NumberToString(unsigned long long value);
BASE_EXPORT string16 NumberToString16(unsigned long long value);
BASE_EXPORT std::string NumberToString(double value);
BASE_EXPORT string16 NumberToString16(double value);
// String -> number conversions ------------------------------------------------
// Perform a best-effort conversion of the input string to a numeric type,
// setting |*output| to the result of the conversion. Returns true for
// "perfect" conversions; returns false in the following cases:
// - Overflow. |*output| will be set to the maximum value supported
// by the data type.
// - Underflow. |*output| will be set to the minimum value supported
// by the data type.
// - Trailing characters in the string after parsing the number. |*output|
// will be set to the value of the number that was parsed.
// - Leading whitespace in the string before parsing the number. |*output| will
// be set to the value of the number that was parsed.
// - No characters parseable as a number at the beginning of the string.
// |*output| will be set to 0.
// - Empty string. |*output| will be set to 0.
// WARNING: Will write to |output| even when returning false.
// Read the comments above carefully.
BASE_EXPORT bool StringToInt(StringPiece input, int* output);
BASE_EXPORT bool StringToInt(StringPiece16 input, int* output);
BASE_EXPORT bool StringToUint(StringPiece input, unsigned* output);
BASE_EXPORT bool StringToUint(StringPiece16 input, unsigned* output);
BASE_EXPORT bool StringToInt64(StringPiece input, int64_t* output);
BASE_EXPORT bool StringToInt64(StringPiece16 input, int64_t* output);
BASE_EXPORT bool StringToUint64(StringPiece input, uint64_t* output);
BASE_EXPORT bool StringToUint64(StringPiece16 input, uint64_t* output);
BASE_EXPORT bool StringToSizeT(StringPiece input, size_t* output);
BASE_EXPORT bool StringToSizeT(StringPiece16 input, size_t* output);
// For floating-point conversions, only conversions of input strings in decimal
// form are defined to work. Behavior with strings representing floating-point
// numbers in hexadecimal, and strings representing non-finite values (such as
// NaN and inf) is undefined. Otherwise, these behave the same as the integral
// variants. This expects the input string to NOT be specific to the locale.
// If your input is locale specific, use ICU to read the number.
// WARNING: Will write to |output| even when returning false.
// Read the comments here and above StringToInt() carefully.
BASE_EXPORT bool StringToDouble(StringPiece input, double* output);
BASE_EXPORT bool StringToDouble(StringPiece16 input, double* output);
// Hex encoding ----------------------------------------------------------------
// Returns a hex string representation of a binary buffer. The returned hex
// string will be in upper case. This function does not check if |size| is
// within reasonable limits since it's written with trusted data in mind. If
// you suspect that the data you want to format might be large, the absolute
// max size for |size| should be is
// std::numeric_limits<size_t>::max() / 2
BASE_EXPORT std::string HexEncode(const void* bytes, size_t size);
BASE_EXPORT std::string HexEncode(base::span<const uint8_t> bytes);
// Best effort conversion, see StringToInt above for restrictions.
// Will only successful parse hex values that will fit into |output|, i.e.
// -0x80000000 < |input| < 0x7FFFFFFF.
BASE_EXPORT bool HexStringToInt(StringPiece input, int* output);
// Best effort conversion, see StringToInt above for restrictions.
// Will only successful parse hex values that will fit into |output|, i.e.
// 0x00000000 < |input| < 0xFFFFFFFF.
// The string is not required to start with 0x.
BASE_EXPORT bool HexStringToUInt(StringPiece input, uint32_t* output);
// Best effort conversion, see StringToInt above for restrictions.
// Will only successful parse hex values that will fit into |output|, i.e.
// -0x8000000000000000 < |input| < 0x7FFFFFFFFFFFFFFF.
BASE_EXPORT bool HexStringToInt64(StringPiece input, int64_t* output);
// Best effort conversion, see StringToInt above for restrictions.
// Will only successful parse hex values that will fit into |output|, i.e.
// 0x0000000000000000 < |input| < 0xFFFFFFFFFFFFFFFF.
// The string is not required to start with 0x.
BASE_EXPORT bool HexStringToUInt64(StringPiece input, uint64_t* output);
// Similar to the previous functions, except that output is a vector of bytes.
// |*output| will contain as many bytes as were successfully parsed prior to the
// error. There is no overflow, but input.size() must be evenly divisible by 2.
// Leading 0x or +/- are not allowed.
BASE_EXPORT bool HexStringToBytes(StringPiece input,
std::vector<uint8_t>* output);
// Same as HexStringToBytes, but for an std::string.
BASE_EXPORT bool HexStringToString(StringPiece input, std::string* output);
// Decodes the hex string |input| into a presized |output|. The output buffer
// must be sized exactly to |input.size() / 2| or decoding will fail and no
// bytes will be written to |output|. Decoding an empty input is also
// considered a failure. When decoding fails due to encountering invalid input
// characters, |output| will have been filled with the decoded bytes up until
// the failure.
BASE_EXPORT bool HexStringToSpan(StringPiece input, base::span<uint8_t> output);
} // namespace base
#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_

View file

@ -0,0 +1,118 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <string>
#include <vector>
#include "base/strings/string_number_conversions.h"
template <class NumberType, class StringPieceType, class StringType>
void CheckRoundtripsT(const uint8_t* data,
const size_t size,
StringType (*num_to_string)(NumberType),
bool (*string_to_num)(StringPieceType, NumberType*)) {
// Ensure we can read a NumberType from |data|
if (size < sizeof(NumberType))
return;
const NumberType v1 = *reinterpret_cast<const NumberType*>(data);
// Because we started with an arbitrary NumberType value, not an arbitrary
// string, we expect that the function |string_to_num| (e.g. StringToInt) will
// return true, indicating a perfect conversion.
NumberType v2;
CHECK(string_to_num(num_to_string(v1), &v2));
// Given that this was a perfect conversion, we expect the original NumberType
// value to equal the newly parsed one.
CHECK_EQ(v1, v2);
}
template <class NumberType>
void CheckRoundtrips(const uint8_t* data,
const size_t size,
bool (*string_to_num)(base::StringPiece, NumberType*)) {
return CheckRoundtripsT<NumberType, base::StringPiece, std::string>(
data, size, &base::NumberToString, string_to_num);
}
template <class NumberType>
void CheckRoundtrips16(const uint8_t* data,
const size_t size,
bool (*string_to_num)(base::StringPiece16,
NumberType*)) {
return CheckRoundtripsT<NumberType, base::StringPiece16, base::string16>(
data, size, &base::NumberToString16, string_to_num);
}
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
// For each instantiation of NumberToString f and its corresponding StringTo*
// function g, check that f(g(x)) = x holds for fuzzer-determined values of x.
CheckRoundtrips<int>(data, size, &base::StringToInt);
CheckRoundtrips16<int>(data, size, &base::StringToInt);
CheckRoundtrips<unsigned int>(data, size, &base::StringToUint);
CheckRoundtrips16<unsigned int>(data, size, &base::StringToUint);
CheckRoundtrips<int64_t>(data, size, &base::StringToInt64);
CheckRoundtrips16<int64_t>(data, size, &base::StringToInt64);
CheckRoundtrips<uint64_t>(data, size, &base::StringToUint64);
CheckRoundtrips16<uint64_t>(data, size, &base::StringToUint64);
CheckRoundtrips<size_t>(data, size, &base::StringToSizeT);
CheckRoundtrips16<size_t>(data, size, &base::StringToSizeT);
base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
size);
std::string string_input(reinterpret_cast<const char*>(data), size);
int out_int;
base::StringToInt(string_piece_input, &out_int);
unsigned out_uint;
base::StringToUint(string_piece_input, &out_uint);
int64_t out_int64;
base::StringToInt64(string_piece_input, &out_int64);
uint64_t out_uint64;
base::StringToUint64(string_piece_input, &out_uint64);
size_t out_size;
base::StringToSizeT(string_piece_input, &out_size);
// Test for StringPiece16 if size is even.
if (size % 2 == 0) {
base::StringPiece16 string_piece_input16(
reinterpret_cast<const base::char16*>(data), size / 2);
base::StringToInt(string_piece_input16, &out_int);
base::StringToUint(string_piece_input16, &out_uint);
base::StringToInt64(string_piece_input16, &out_int64);
base::StringToUint64(string_piece_input16, &out_uint64);
base::StringToSizeT(string_piece_input16, &out_size);
}
double out_double;
base::StringToDouble(string_input, &out_double);
base::HexStringToInt(string_piece_input, &out_int);
base::HexStringToUInt(string_piece_input, &out_uint);
base::HexStringToInt64(string_piece_input, &out_int64);
base::HexStringToUInt64(string_piece_input, &out_uint64);
std::vector<uint8_t> out_bytes;
base::HexStringToBytes(string_piece_input, &out_bytes);
base::HexEncode(data, size);
// Convert the numbers back to strings.
base::NumberToString(out_int);
base::NumberToString16(out_int);
base::NumberToString(out_uint);
base::NumberToString16(out_uint);
base::NumberToString(out_int64);
base::NumberToString16(out_int64);
base::NumberToString(out_uint64);
base::NumberToString16(out_uint64);
base::NumberToString(out_double);
base::NumberToString16(out_double);
return 0;
}

View file

@ -0,0 +1,412 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Copied from strings/stringpiece.cc with modifications
#include "base/strings/string_piece.h"
#include <limits.h>
#include <algorithm>
#include <ostream>
#include "base/logging.h"
#include "base/strings/utf_string_conversions.h"
namespace base {
namespace {
// For each character in characters_wanted, sets the index corresponding
// to the ASCII code of that character to 1 in table. This is used by
// the find_.*_of methods below to tell whether or not a character is in
// the lookup table in constant time.
// The argument `table' must be an array that is large enough to hold all
// the possible values of an unsigned char. Thus it should be be declared
// as follows:
// bool table[UCHAR_MAX + 1]
inline void BuildLookupTable(const StringPiece& characters_wanted,
bool* table) {
const size_t length = characters_wanted.length();
const char* const data = characters_wanted.data();
for (size_t i = 0; i < length; ++i) {
table[static_cast<unsigned char>(data[i])] = true;
}
}
} // namespace
// MSVC doesn't like complex extern templates and DLLs.
#if !defined(COMPILER_MSVC)
template class BasicStringPiece<std::string>;
template class BasicStringPiece<string16>;
#endif
std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
return o;
}
std::ostream& operator<<(std::ostream& o, const StringPiece16& piece) {
return o << UTF16ToUTF8(piece);
}
namespace internal {
template<typename STR>
size_t copyT(const BasicStringPiece<STR>& self,
typename STR::value_type* buf,
size_t n,
size_t pos) {
size_t ret = std::min(self.size() - pos, n);
memcpy(buf, self.data() + pos, ret * sizeof(typename STR::value_type));
return ret;
}
size_t copy(const StringPiece& self, char* buf, size_t n, size_t pos) {
return copyT(self, buf, n, pos);
}
size_t copy(const StringPiece16& self, char16* buf, size_t n, size_t pos) {
return copyT(self, buf, n, pos);
}
template<typename STR>
size_t findT(const BasicStringPiece<STR>& self,
const BasicStringPiece<STR>& s,
size_t pos) {
if (pos > self.size())
return BasicStringPiece<STR>::npos;
typename BasicStringPiece<STR>::const_iterator result =
std::search(self.begin() + pos, self.end(), s.begin(), s.end());
const size_t xpos =
static_cast<size_t>(result - self.begin());
return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos;
}
size_t find(const StringPiece& self, const StringPiece& s, size_t pos) {
return findT(self, s, pos);
}
size_t find(const StringPiece16& self, const StringPiece16& s, size_t pos) {
return findT(self, s, pos);
}
template<typename STR>
size_t findT(const BasicStringPiece<STR>& self,
typename STR::value_type c,
size_t pos) {
if (pos >= self.size())
return BasicStringPiece<STR>::npos;
typename BasicStringPiece<STR>::const_iterator result =
std::find(self.begin() + pos, self.end(), c);
return result != self.end() ?
static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
}
size_t find(const StringPiece& self, char c, size_t pos) {
return findT(self, c, pos);
}
size_t find(const StringPiece16& self, char16 c, size_t pos) {
return findT(self, c, pos);
}
template<typename STR>
size_t rfindT(const BasicStringPiece<STR>& self,
const BasicStringPiece<STR>& s,
size_t pos) {
if (self.size() < s.size())
return BasicStringPiece<STR>::npos;
if (s.empty())
return std::min(self.size(), pos);
typename BasicStringPiece<STR>::const_iterator last =
self.begin() + std::min(self.size() - s.size(), pos) + s.size();
typename BasicStringPiece<STR>::const_iterator result =
std::find_end(self.begin(), last, s.begin(), s.end());
return result != last ?
static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
}
size_t rfind(const StringPiece& self, const StringPiece& s, size_t pos) {
return rfindT(self, s, pos);
}
size_t rfind(const StringPiece16& self, const StringPiece16& s, size_t pos) {
return rfindT(self, s, pos);
}
template<typename STR>
size_t rfindT(const BasicStringPiece<STR>& self,
typename STR::value_type c,
size_t pos) {
if (self.size() == 0)
return BasicStringPiece<STR>::npos;
for (size_t i = std::min(pos, self.size() - 1); ;
--i) {
if (self.data()[i] == c)
return i;
if (i == 0)
break;
}
return BasicStringPiece<STR>::npos;
}
size_t rfind(const StringPiece& self, char c, size_t pos) {
return rfindT(self, c, pos);
}
size_t rfind(const StringPiece16& self, char16 c, size_t pos) {
return rfindT(self, c, pos);
}
// 8-bit version using lookup table.
size_t find_first_of(const StringPiece& self,
const StringPiece& s,
size_t pos) {
if (self.size() == 0 || s.size() == 0)
return StringPiece::npos;
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
return find(self, s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
for (size_t i = pos; i < self.size(); ++i) {
if (lookup[static_cast<unsigned char>(self.data()[i])]) {
return i;
}
}
return StringPiece::npos;
}
// 16-bit brute force version.
size_t find_first_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos) {
// Use the faster std::find() if searching for a single character.
StringPiece16::const_iterator found =
s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0])
: std::find_first_of(self.begin() + pos, self.end(),
s.begin(), s.end());
if (found == self.end())
return StringPiece16::npos;
return found - self.begin();
}
// 8-bit version using lookup table.
size_t find_first_not_of(const StringPiece& self,
const StringPiece& s,
size_t pos) {
if (self.size() == 0)
return StringPiece::npos;
if (s.size() == 0)
return 0;
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
return find_first_not_of(self, s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
for (size_t i = pos; i < self.size(); ++i) {
if (!lookup[static_cast<unsigned char>(self.data()[i])]) {
return i;
}
}
return StringPiece::npos;
}
// 16-bit brute-force version.
BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos) {
if (self.size() == 0)
return StringPiece16::npos;
for (size_t self_i = pos; self_i < self.size(); ++self_i) {
bool found = false;
for (auto c : s) {
if (self[self_i] == c) {
found = true;
break;
}
}
if (!found)
return self_i;
}
return StringPiece16::npos;
}
template<typename STR>
size_t find_first_not_ofT(const BasicStringPiece<STR>& self,
typename STR::value_type c,
size_t pos) {
if (self.size() == 0)
return BasicStringPiece<STR>::npos;
for (; pos < self.size(); ++pos) {
if (self.data()[pos] != c) {
return pos;
}
}
return BasicStringPiece<STR>::npos;
}
size_t find_first_not_of(const StringPiece& self,
char c,
size_t pos) {
return find_first_not_ofT(self, c, pos);
}
size_t find_first_not_of(const StringPiece16& self,
char16 c,
size_t pos) {
return find_first_not_ofT(self, c, pos);
}
// 8-bit version using lookup table.
size_t find_last_of(const StringPiece& self, const StringPiece& s, size_t pos) {
if (self.size() == 0 || s.size() == 0)
return StringPiece::npos;
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
return rfind(self, s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
for (size_t i = std::min(pos, self.size() - 1); ; --i) {
if (lookup[static_cast<unsigned char>(self.data()[i])])
return i;
if (i == 0)
break;
}
return StringPiece::npos;
}
// 16-bit brute-force version.
size_t find_last_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos) {
if (self.size() == 0)
return StringPiece16::npos;
for (size_t self_i = std::min(pos, self.size() - 1); ;
--self_i) {
for (auto c : s) {
if (self.data()[self_i] == c)
return self_i;
}
if (self_i == 0)
break;
}
return StringPiece16::npos;
}
// 8-bit version using lookup table.
size_t find_last_not_of(const StringPiece& self,
const StringPiece& s,
size_t pos) {
if (self.size() == 0)
return StringPiece::npos;
size_t i = std::min(pos, self.size() - 1);
if (s.size() == 0)
return i;
// Avoid the cost of BuildLookupTable() for a single-character search.
if (s.size() == 1)
return find_last_not_of(self, s.data()[0], pos);
bool lookup[UCHAR_MAX + 1] = { false };
BuildLookupTable(s, lookup);
for (; ; --i) {
if (!lookup[static_cast<unsigned char>(self.data()[i])])
return i;
if (i == 0)
break;
}
return StringPiece::npos;
}
// 16-bit brute-force version.
size_t find_last_not_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos) {
if (self.size() == 0)
return StringPiece::npos;
for (size_t self_i = std::min(pos, self.size() - 1); ; --self_i) {
bool found = false;
for (auto c : s) {
if (self.data()[self_i] == c) {
found = true;
break;
}
}
if (!found)
return self_i;
if (self_i == 0)
break;
}
return StringPiece16::npos;
}
template<typename STR>
size_t find_last_not_ofT(const BasicStringPiece<STR>& self,
typename STR::value_type c,
size_t pos) {
if (self.size() == 0)
return BasicStringPiece<STR>::npos;
for (size_t i = std::min(pos, self.size() - 1); ; --i) {
if (self.data()[i] != c)
return i;
if (i == 0)
break;
}
return BasicStringPiece<STR>::npos;
}
size_t find_last_not_of(const StringPiece& self,
char c,
size_t pos) {
return find_last_not_ofT(self, c, pos);
}
size_t find_last_not_of(const StringPiece16& self,
char16 c,
size_t pos) {
return find_last_not_ofT(self, c, pos);
}
template<typename STR>
BasicStringPiece<STR> substrT(const BasicStringPiece<STR>& self,
size_t pos,
size_t n) {
if (pos > self.size()) pos = self.size();
if (n > self.size() - pos) n = self.size() - pos;
return BasicStringPiece<STR>(self.data() + pos, n);
}
StringPiece substr(const StringPiece& self,
size_t pos,
size_t n) {
return substrT(self, pos, n);
}
StringPiece16 substr(const StringPiece16& self,
size_t pos,
size_t n) {
return substrT(self, pos, n);
}
} // namespace internal
} // namespace base

View file

@ -0,0 +1,519 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Copied from strings/stringpiece.h with modifications
//
// A string-like object that points to a sized piece of memory.
//
// You can use StringPiece as a function or method parameter. A StringPiece
// parameter can receive a double-quoted string literal argument, a "const
// char*" argument, a string argument, or a StringPiece argument with no data
// copying. Systematic use of StringPiece for arguments reduces data
// copies and strlen() calls.
//
// Prefer passing StringPieces by value:
// void MyFunction(StringPiece arg);
// If circumstances require, you may also pass by const reference:
// void MyFunction(const StringPiece& arg); // not preferred
// Both of these have the same lifetime semantics. Passing by value
// generates slightly smaller code. For more discussion, Googlers can see
// the thread go/stringpiecebyvalue on c-users.
#ifndef BASE_STRINGS_STRING_PIECE_H_
#define BASE_STRINGS_STRING_PIECE_H_
#include <stddef.h>
#include <iosfwd>
#include <string>
#include <type_traits>
#include "base/base_export.h"
#include "base/logging.h"
#include "base/strings/char_traits.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece_forward.h"
namespace base {
// internal --------------------------------------------------------------------
// Many of the StringPiece functions use different implementations for the
// 8-bit and 16-bit versions, and we don't want lots of template expansions in
// this (very common) header that will slow down compilation.
//
// So here we define overloaded functions called by the StringPiece template.
// For those that share an implementation, the two versions will expand to a
// template internal to the .cc file.
namespace internal {
BASE_EXPORT size_t copy(const StringPiece& self,
char* buf,
size_t n,
size_t pos);
BASE_EXPORT size_t copy(const StringPiece16& self,
char16* buf,
size_t n,
size_t pos);
BASE_EXPORT size_t find(const StringPiece& self,
const StringPiece& s,
size_t pos);
BASE_EXPORT size_t find(const StringPiece16& self,
const StringPiece16& s,
size_t pos);
BASE_EXPORT size_t find(const StringPiece& self,
char c,
size_t pos);
BASE_EXPORT size_t find(const StringPiece16& self,
char16 c,
size_t pos);
BASE_EXPORT size_t rfind(const StringPiece& self,
const StringPiece& s,
size_t pos);
BASE_EXPORT size_t rfind(const StringPiece16& self,
const StringPiece16& s,
size_t pos);
BASE_EXPORT size_t rfind(const StringPiece& self,
char c,
size_t pos);
BASE_EXPORT size_t rfind(const StringPiece16& self,
char16 c,
size_t pos);
BASE_EXPORT size_t find_first_of(const StringPiece& self,
const StringPiece& s,
size_t pos);
BASE_EXPORT size_t find_first_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos);
BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
const StringPiece& s,
size_t pos);
BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos);
BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
char c,
size_t pos);
BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
char16 c,
size_t pos);
BASE_EXPORT size_t find_last_of(const StringPiece& self,
const StringPiece& s,
size_t pos);
BASE_EXPORT size_t find_last_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos);
BASE_EXPORT size_t find_last_of(const StringPiece& self,
char c,
size_t pos);
BASE_EXPORT size_t find_last_of(const StringPiece16& self,
char16 c,
size_t pos);
BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
const StringPiece& s,
size_t pos);
BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
const StringPiece16& s,
size_t pos);
BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
char16 c,
size_t pos);
BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
char c,
size_t pos);
BASE_EXPORT StringPiece substr(const StringPiece& self,
size_t pos,
size_t n);
BASE_EXPORT StringPiece16 substr(const StringPiece16& self,
size_t pos,
size_t n);
} // namespace internal
// BasicStringPiece ------------------------------------------------------------
// Defines the types, methods, operators, and data members common to both
// StringPiece and StringPiece16.
//
// This is templatized by string class type rather than character type, so
// BasicStringPiece<std::string> or BasicStringPiece<base::string16>.
template <typename STRING_TYPE> class BasicStringPiece {
public:
// Standard STL container boilerplate.
typedef size_t size_type;
typedef typename STRING_TYPE::value_type value_type;
typedef const value_type* pointer;
typedef const value_type& reference;
typedef const value_type& const_reference;
typedef ptrdiff_t difference_type;
typedef const value_type* const_iterator;
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
static const size_type npos;
public:
// We provide non-explicit singleton constructors so users can pass
// in a "const char*" or a "string" wherever a "StringPiece" is
// expected (likewise for char16, string16, StringPiece16).
constexpr BasicStringPiece() : ptr_(NULL), length_(0) {}
// TODO(crbug.com/1049498): Construction from nullptr is not allowed for
// std::basic_string_view, so remove the special handling for it.
// Note: This doesn't just use STRING_TYPE::traits_type::length(), since that
// isn't constexpr until C++17.
constexpr BasicStringPiece(const value_type* str)
: ptr_(str), length_(!str ? 0 : CharTraits<value_type>::length(str)) {}
// Explicitly disallow construction from nullptr. Note that this does not
// catch construction from runtime strings that might be null.
// Note: The following is just a more elaborate way of spelling
// `BasicStringPiece(nullptr_t) = delete`, but unfortunately the terse form is
// not supported by the PNaCl toolchain.
// TODO(crbug.com/1049498): Remove once we CHECK(str) in the constructor
// above.
template <class T, class = std::enable_if_t<std::is_null_pointer<T>::value>>
BasicStringPiece(T) {
static_assert(sizeof(T) == 0, // Always false.
"StringPiece does not support construction from nullptr, use "
"the default constructor instead.");
}
BasicStringPiece(const STRING_TYPE& str)
: ptr_(str.data()), length_(str.size()) {}
constexpr BasicStringPiece(const value_type* offset, size_type len)
: ptr_(offset), length_(len) {}
BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
const typename STRING_TYPE::const_iterator& end) {
DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
length_ = static_cast<size_t>(std::distance(begin, end));
// The length test before assignment is to avoid dereferencing an iterator
// that may point to the end() of a string.
ptr_ = length_ > 0 ? &*begin : nullptr;
}
// data() may return a pointer to a buffer with embedded NULs, and the
// returned buffer may or may not be null terminated. Therefore it is
// typically a mistake to pass data() to a routine that expects a NUL
// terminated string.
constexpr const value_type* data() const { return ptr_; }
constexpr size_type size() const noexcept { return length_; }
constexpr size_type length() const noexcept { return length_; }
bool empty() const { return length_ == 0; }
constexpr value_type operator[](size_type i) const {
CHECK(i < length_);
return ptr_[i];
}
value_type front() const {
CHECK_NE(0UL, length_);
return ptr_[0];
}
value_type back() const {
CHECK_NE(0UL, length_);
return ptr_[length_ - 1];
}
constexpr void remove_prefix(size_type n) {
CHECK(n <= length_);
ptr_ += n;
length_ -= n;
}
constexpr void remove_suffix(size_type n) {
CHECK(n <= length_);
length_ -= n;
}
constexpr int compare(BasicStringPiece x) const noexcept {
int r = CharTraits<value_type>::compare(
ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_));
if (r == 0) {
if (length_ < x.length_) r = -1;
else if (length_ > x.length_) r = +1;
}
return r;
}
// This is the style of conversion preferred by std::string_view in C++17.
explicit operator STRING_TYPE() const { return as_string(); }
STRING_TYPE as_string() const {
// std::string doesn't like to take a NULL pointer even with a 0 size.
return empty() ? STRING_TYPE() : STRING_TYPE(data(), size());
}
const_iterator begin() const { return ptr_; }
const_iterator end() const { return ptr_ + length_; }
const_reverse_iterator rbegin() const {
return const_reverse_iterator(ptr_ + length_);
}
const_reverse_iterator rend() const {
return const_reverse_iterator(ptr_);
}
size_type max_size() const { return length_; }
size_type capacity() const { return length_; }
size_type copy(value_type* buf, size_type n, size_type pos = 0) const {
return internal::copy(*this, buf, n, pos);
}
// Does "this" start with "x"
constexpr bool starts_with(BasicStringPiece x) const noexcept {
return (
(this->length_ >= x.length_) &&
(CharTraits<value_type>::compare(this->ptr_, x.ptr_, x.length_) == 0));
}
// Does "this" end with "x"
constexpr bool ends_with(BasicStringPiece x) const noexcept {
return ((this->length_ >= x.length_) &&
(CharTraits<value_type>::compare(
this->ptr_ + (this->length_ - x.length_), x.ptr_, x.length_) ==
0));
}
// find: Search for a character or substring at a given offset.
size_type find(const BasicStringPiece<STRING_TYPE>& s,
size_type pos = 0) const {
return internal::find(*this, s, pos);
}
size_type find(value_type c, size_type pos = 0) const {
return internal::find(*this, c, pos);
}
// rfind: Reverse find.
size_type rfind(const BasicStringPiece& s,
size_type pos = BasicStringPiece::npos) const {
return internal::rfind(*this, s, pos);
}
size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const {
return internal::rfind(*this, c, pos);
}
// find_first_of: Find the first occurence of one of a set of characters.
size_type find_first_of(const BasicStringPiece& s,
size_type pos = 0) const {
return internal::find_first_of(*this, s, pos);
}
size_type find_first_of(value_type c, size_type pos = 0) const {
return find(c, pos);
}
// find_first_not_of: Find the first occurence not of a set of characters.
size_type find_first_not_of(const BasicStringPiece& s,
size_type pos = 0) const {
return internal::find_first_not_of(*this, s, pos);
}
size_type find_first_not_of(value_type c, size_type pos = 0) const {
return internal::find_first_not_of(*this, c, pos);
}
// find_last_of: Find the last occurence of one of a set of characters.
size_type find_last_of(const BasicStringPiece& s,
size_type pos = BasicStringPiece::npos) const {
return internal::find_last_of(*this, s, pos);
}
size_type find_last_of(value_type c,
size_type pos = BasicStringPiece::npos) const {
return rfind(c, pos);
}
// find_last_not_of: Find the last occurence not of a set of characters.
size_type find_last_not_of(const BasicStringPiece& s,
size_type pos = BasicStringPiece::npos) const {
return internal::find_last_not_of(*this, s, pos);
}
size_type find_last_not_of(value_type c,
size_type pos = BasicStringPiece::npos) const {
return internal::find_last_not_of(*this, c, pos);
}
// substr.
BasicStringPiece substr(size_type pos,
size_type n = BasicStringPiece::npos) const {
return internal::substr(*this, pos, n);
}
protected:
const value_type* ptr_;
size_type length_;
};
template <typename STRING_TYPE>
const typename BasicStringPiece<STRING_TYPE>::size_type
BasicStringPiece<STRING_TYPE>::npos =
typename BasicStringPiece<STRING_TYPE>::size_type(-1);
// MSVC doesn't like complex extern templates and DLLs.
#if !defined(COMPILER_MSVC)
extern template class BASE_EXPORT BasicStringPiece<std::string>;
extern template class BASE_EXPORT BasicStringPiece<string16>;
#endif
// Comparison operators --------------------------------------------------------
// operator ==
template <typename StringT>
constexpr bool operator==(BasicStringPiece<StringT> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
}
// Here and below we make use of std::common_type_t to emulate an identity type
// transformation. This creates a non-deduced context, so that we can compare
// StringPieces with types that implicitly convert to StringPieces. See
// https://wg21.link/n3766 for details.
// Furthermore, we require dummy template parameters for these overloads to work
// around a name mangling issue on Windows.
template <typename StringT, int = 1>
constexpr bool operator==(
BasicStringPiece<StringT> lhs,
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
}
template <typename StringT, int = 2>
constexpr bool operator==(std::common_type_t<BasicStringPiece<StringT>> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
}
// operator !=
template <typename StringT>
constexpr bool operator!=(BasicStringPiece<StringT> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return !(lhs == rhs);
}
template <typename StringT, int = 1>
constexpr bool operator!=(
BasicStringPiece<StringT> lhs,
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
return !(lhs == rhs);
}
template <typename StringT, int = 2>
constexpr bool operator!=(std::common_type_t<BasicStringPiece<StringT>> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return !(lhs == rhs);
}
// operator <
template <typename StringT>
constexpr bool operator<(BasicStringPiece<StringT> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return lhs.compare(rhs) < 0;
}
template <typename StringT, int = 1>
constexpr bool operator<(
BasicStringPiece<StringT> lhs,
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
return lhs.compare(rhs) < 0;
}
template <typename StringT, int = 2>
constexpr bool operator<(std::common_type_t<BasicStringPiece<StringT>> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return lhs.compare(rhs) < 0;
}
// operator >
template <typename StringT>
constexpr bool operator>(BasicStringPiece<StringT> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return rhs < lhs;
}
template <typename StringT, int = 1>
constexpr bool operator>(
BasicStringPiece<StringT> lhs,
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
return rhs < lhs;
}
template <typename StringT, int = 2>
constexpr bool operator>(std::common_type_t<BasicStringPiece<StringT>> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return rhs < lhs;
}
// operator <=
template <typename StringT>
constexpr bool operator<=(BasicStringPiece<StringT> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return !(rhs < lhs);
}
template <typename StringT, int = 1>
constexpr bool operator<=(
BasicStringPiece<StringT> lhs,
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
return !(rhs < lhs);
}
template <typename StringT, int = 2>
constexpr bool operator<=(std::common_type_t<BasicStringPiece<StringT>> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return !(rhs < lhs);
}
// operator >=
template <typename StringT>
constexpr bool operator>=(BasicStringPiece<StringT> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return !(lhs < rhs);
}
template <typename StringT, int = 1>
constexpr bool operator>=(
BasicStringPiece<StringT> lhs,
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
return !(lhs < rhs);
}
template <typename StringT, int = 2>
constexpr bool operator>=(std::common_type_t<BasicStringPiece<StringT>> lhs,
BasicStringPiece<StringT> rhs) noexcept {
return !(lhs < rhs);
}
BASE_EXPORT std::ostream& operator<<(std::ostream& o,
const StringPiece& piece);
BASE_EXPORT std::ostream& operator<<(std::ostream& o,
const StringPiece16& piece);
// Hashing ---------------------------------------------------------------------
// We provide appropriate hash functions so StringPiece and StringPiece16 can
// be used as keys in hash sets and maps.
// This hash function is copied from base/strings/string16.h. We don't use the
// ones already defined for string and string16 directly because it would
// require the string constructors to be called, which we don't want.
template <typename StringPieceType>
struct StringPieceHashImpl {
std::size_t operator()(StringPieceType sp) const {
std::size_t result = 0;
for (auto c : sp)
result = (result * 131) + c;
return result;
}
};
using StringPieceHash = StringPieceHashImpl<StringPiece>;
using StringPiece16Hash = StringPieceHashImpl<StringPiece16>;
using WStringPieceHash = StringPieceHashImpl<WStringPiece>;
} // namespace base
#endif // BASE_STRINGS_STRING_PIECE_H_

View file

@ -0,0 +1,24 @@
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Forward declaration of StringPiece types from base/strings/string_piece.h
#ifndef BASE_STRINGS_STRING_PIECE_FORWARD_H_
#define BASE_STRINGS_STRING_PIECE_FORWARD_H_
#include <string>
#include "base/strings/string16.h"
namespace base {
template <typename STRING_TYPE>
class BasicStringPiece;
typedef BasicStringPiece<std::string> StringPiece;
typedef BasicStringPiece<string16> StringPiece16;
typedef BasicStringPiece<std::wstring> WStringPiece;
} // namespace base
#endif // BASE_STRINGS_STRING_PIECE_FORWARD_H_

View file

@ -0,0 +1,259 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string_split.h"
#include <stddef.h>
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/third_party/icu/icu_utf.h"
namespace base {
namespace {
// Returns either the ASCII or UTF-16 whitespace.
template<typename Str> BasicStringPiece<Str> WhitespaceForType();
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
template <>
WStringPiece WhitespaceForType<std::wstring>() {
return kWhitespaceWide;
}
#endif
template<> StringPiece16 WhitespaceForType<string16>() {
return kWhitespaceUTF16;
}
template<> StringPiece WhitespaceForType<std::string>() {
return kWhitespaceASCII;
}
// General string splitter template. Can take 8- or 16-bit input, can produce
// the corresponding string or StringPiece output.
template <typename OutputStringType, typename Str>
static std::vector<OutputStringType> SplitStringT(
BasicStringPiece<Str> str,
BasicStringPiece<Str> delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
std::vector<OutputStringType> result;
if (str.empty())
return result;
size_t start = 0;
while (start != Str::npos) {
size_t end = str.find_first_of(delimiter, start);
BasicStringPiece<Str> piece;
if (end == Str::npos) {
piece = str.substr(start);
start = Str::npos;
} else {
piece = str.substr(start, end - start);
start = end + 1;
}
if (whitespace == TRIM_WHITESPACE)
piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
if (result_type == SPLIT_WANT_ALL || !piece.empty())
result.emplace_back(piece);
}
return result;
}
bool AppendStringKeyValue(StringPiece input,
char delimiter,
StringPairs* result) {
// Always append a new item regardless of success (it might be empty). The
// below code will copy the strings directly into the result pair.
result->resize(result->size() + 1);
auto& result_pair = result->back();
// Find the delimiter.
size_t end_key_pos = input.find_first_of(delimiter);
if (end_key_pos == std::string::npos) {
DVLOG(1) << "cannot find delimiter in: " << input;
return false; // No delimiter.
}
result_pair.first = std::string(input.substr(0, end_key_pos));
// Find the value string.
StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos);
size_t begin_value_pos = remains.find_first_not_of(delimiter);
if (begin_value_pos == StringPiece::npos) {
DVLOG(1) << "cannot parse value from input: " << input;
return false; // No value.
}
result_pair.second = std::string(
remains.substr(begin_value_pos, remains.size() - begin_value_pos));
return true;
}
template <typename OutputStringType, typename Str>
std::vector<OutputStringType> SplitStringUsingSubstrT(
BasicStringPiece<Str> input,
BasicStringPiece<Str> delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
using Piece = BasicStringPiece<Str>;
using size_type = typename Piece::size_type;
std::vector<OutputStringType> result;
if (delimiter.size() == 0) {
result.emplace_back(input);
return result;
}
for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
begin_index = end_index + delimiter.size()) {
end_index = input.find(delimiter, begin_index);
Piece term = end_index == Piece::npos
? input.substr(begin_index)
: input.substr(begin_index, end_index - begin_index);
if (whitespace == TRIM_WHITESPACE)
term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
if (result_type == SPLIT_WANT_ALL || !term.empty())
result.emplace_back(term);
}
return result;
}
} // namespace
std::vector<std::string> SplitString(StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringT<std::string>(input, separators, whitespace, result_type);
}
std::vector<string16> SplitString(StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringT<string16>(input, separators, whitespace, result_type);
}
std::vector<StringPiece> SplitStringPiece(StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringT<StringPiece>(input, separators, whitespace, result_type);
}
std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringT<StringPiece16>(input, separators, whitespace,
result_type);
}
bool SplitStringIntoKeyValuePairs(StringPiece input,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs) {
return SplitStringIntoKeyValuePairsUsingSubstr(
input, key_value_delimiter, StringPiece(&key_value_pair_delimiter, 1),
key_value_pairs);
}
bool SplitStringIntoKeyValuePairsUsingSubstr(
StringPiece input,
char key_value_delimiter,
StringPiece key_value_pair_delimiter,
StringPairs* key_value_pairs) {
key_value_pairs->clear();
std::vector<StringPiece> pairs = SplitStringPieceUsingSubstr(
input, key_value_pair_delimiter, TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
key_value_pairs->reserve(pairs.size());
bool success = true;
for (const StringPiece& pair : pairs) {
if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) {
// Don't return here, to allow for pairs without associated
// value or key; just record that the split failed.
success = false;
}
}
return success;
}
std::vector<string16> SplitStringUsingSubstr(StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace,
result_type);
}
std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace,
result_type);
}
std::vector<StringPiece16> SplitStringPieceUsingSubstr(
StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
std::vector<StringPiece16> result;
return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace,
result_type);
}
std::vector<StringPiece> SplitStringPieceUsingSubstr(
StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace,
result_type);
}
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
std::vector<std::wstring> SplitString(WStringPiece input,
WStringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringT<std::wstring>(input, separators, whitespace, result_type);
}
std::vector<WStringPiece> SplitStringPiece(WStringPiece input,
WStringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringT<WStringPiece>(input, separators, whitespace, result_type);
}
std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input,
WStringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace,
result_type);
}
std::vector<WStringPiece> SplitStringPieceUsingSubstr(
WStringPiece input,
WStringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) {
return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace,
result_type);
}
#endif
} // namespace base

View file

@ -0,0 +1,169 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRING_SPLIT_H_
#define BASE_STRINGS_STRING_SPLIT_H_
#include <string>
#include <utility>
#include <vector>
#include "base/base_export.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
namespace base {
enum WhitespaceHandling {
KEEP_WHITESPACE,
TRIM_WHITESPACE,
};
enum SplitResult {
// Strictly return all results.
//
// If the input is ",," and the separator is ',' this will return a
// vector of three empty strings.
SPLIT_WANT_ALL,
// Only nonempty results will be added to the results. Multiple separators
// will be coalesced. Separators at the beginning and end of the input will
// be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped.
//
// If the input is ",," and the separator is ',', this will return an empty
// vector.
SPLIT_WANT_NONEMPTY,
};
// Split the given string on ANY of the given separators, returning copies of
// the result.
//
// Note this is inverse of JoinString() defined in string_util.h.
//
// To split on either commas or semicolons, keeping all whitespace:
//
// std::vector<std::string> tokens = base::SplitString(
// input, ", WARN_UNUSED_RESULT;", base::KEEP_WHITESPACE,
// base::SPLIT_WANT_ALL) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<std::string> SplitString(StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type)
WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type)
WARN_UNUSED_RESULT;
// Like SplitString above except it returns a vector of StringPieces which
// reference the original buffer without copying. Although you have to be
// careful to keep the original string unmodified, this provides an efficient
// way to iterate through tokens in a string.
//
// Note this is inverse of JoinString() defined in string_util.h.
//
// To iterate through all whitespace-separated tokens in an input string:
//
// for (const auto& cur :
// base::SplitStringPiece(input, base::kWhitespaceASCII,
// base::KEEP_WHITESPACE,
// base::SPLIT_WANT_NONEMPTY)) {
// ...
BASE_EXPORT std::vector<StringPiece> SplitStringPiece(
StringPiece input,
StringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<StringPiece16> SplitStringPiece(
StringPiece16 input,
StringPiece16 separators,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
using StringPairs = std::vector<std::pair<std::string, std::string>>;
// Splits |line| into key value pairs according to the given delimiters and
// removes whitespace leading each key and trailing each value. Returns true
// only if each pair has a non-empty key and value. |key_value_pairs| will
// include ("","") pairs for entries without |key_value_delimiter|.
BASE_EXPORT bool SplitStringIntoKeyValuePairs(StringPiece input,
char key_value_delimiter,
char key_value_pair_delimiter,
StringPairs* key_value_pairs);
// Similar to SplitStringIntoKeyValuePairs, but use a substring
// |key_value_pair_delimiter| instead of a single char.
BASE_EXPORT bool SplitStringIntoKeyValuePairsUsingSubstr(
StringPiece input,
char key_value_delimiter,
StringPiece key_value_pair_delimiter,
StringPairs* key_value_pairs);
// Similar to SplitString, but use a substring delimiter instead of a list of
// characters that are all possible delimiters.
BASE_EXPORT std::vector<string16> SplitStringUsingSubstr(
StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<std::string> SplitStringUsingSubstr(
StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
// Like SplitStringUsingSubstr above except it returns a vector of StringPieces
// which reference the original buffer without copying. Although you have to be
// careful to keep the original string unmodified, this provides an efficient
// way to iterate through tokens in a string.
//
// To iterate through all newline-separated tokens in an input string:
//
// for (const auto& cur :
// base::SplitStringUsingSubstr(input, "\r\n",
// base::KEEP_WHITESPACE,
// base::SPLIT_WANT_NONEMPTY)) {
// ...
BASE_EXPORT std::vector<StringPiece16> SplitStringPieceUsingSubstr(
StringPiece16 input,
StringPiece16 delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr(
StringPiece input,
StringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input,
WStringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type)
WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<WStringPiece> SplitStringPiece(
WStringPiece input,
WStringPiece separators,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr(
WStringPiece input,
WStringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr(
WStringPiece input,
WStringPiece delimiter,
WhitespaceHandling whitespace,
SplitResult result_type) WARN_UNUSED_RESULT;
#endif
} // namespace base
#endif // BASE_STRINGS_STRING_SPLIT_H_

View file

@ -0,0 +1,303 @@
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRING_TOKENIZER_H_
#define BASE_STRINGS_STRING_TOKENIZER_H_
#include <algorithm>
#include <string>
#include "base/strings/string_piece.h"
namespace base {
// StringTokenizerT is a simple string tokenizer class. It works like an
// iterator that with each step (see the Advance method) updates members that
// refer to the next token in the input string. The user may optionally
// configure the tokenizer to return delimiters.
//
// EXAMPLE 1:
//
// char input[] = "this is a test";
// CStringTokenizer t(input, input + strlen(input), " ");
// while (t.GetNext()) {
// printf("%s\n", t.token().c_str());
// }
//
// Output:
//
// this
// is
// a
// test
//
//
// EXAMPLE 2:
//
// std::string input = "no-cache=\"foo, bar\", private";
// StringTokenizer t(input, ", ");
// t.set_quote_chars("\"");
// while (t.GetNext()) {
// printf("%s\n", t.token().c_str());
// }
//
// Output:
//
// no-cache="foo, bar"
// private
//
//
// EXAMPLE 3:
//
// bool next_is_option = false, next_is_value = false;
// std::string input = "text/html; charset=UTF-8; foo=bar";
// StringTokenizer t(input, "; =");
// t.set_options(StringTokenizer::RETURN_DELIMS);
// while (t.GetNext()) {
// if (t.token_is_delim()) {
// switch (*t.token_begin()) {
// case ';':
// next_is_option = true;
// break;
// case '=':
// next_is_value = true;
// break;
// }
// } else {
// const char* label;
// if (next_is_option) {
// label = "option-name";
// next_is_option = false;
// } else if (next_is_value) {
// label = "option-value";
// next_is_value = false;
// } else {
// label = "mime-type";
// }
// printf("%s: %s\n", label, t.token().c_str());
// }
// }
//
//
template <class str, class const_iterator>
class StringTokenizerT {
public:
typedef typename str::value_type char_type;
// Options that may be pass to set_options()
enum {
// Specifies the delimiters should be returned as tokens
RETURN_DELIMS = 1 << 0,
// Specifies that empty tokens should be returned. Treats the beginning and
// ending of the string as implicit delimiters, though doesn't return them
// as tokens if RETURN_DELIMS is also used.
RETURN_EMPTY_TOKENS = 1 << 1,
};
// The string object must live longer than the tokenizer. In particular, this
// should not be constructed with a temporary. The deleted rvalue constructor
// blocks the most obvious instances of this (e.g. passing a string literal to
// the constructor), but caution must still be exercised.
StringTokenizerT(const str& string,
const str& delims) {
Init(string.begin(), string.end(), delims);
}
// Don't allow temporary strings to be used with string tokenizer, since
// Init() would otherwise save iterators to a temporary string.
StringTokenizerT(str&&, const str& delims) = delete;
StringTokenizerT(const_iterator string_begin,
const_iterator string_end,
const str& delims) {
Init(string_begin, string_end, delims);
}
// Set the options for this tokenizer. By default, this is 0.
void set_options(int options) { options_ = options; }
// Set the characters to regard as quotes. By default, this is empty. When
// a quote char is encountered, the tokenizer will switch into a mode where
// it ignores delimiters that it finds. It switches out of this mode once it
// finds another instance of the quote char. If a backslash is encountered
// within a quoted string, then the next character is skipped.
void set_quote_chars(const str& quotes) { quotes_ = quotes; }
// Call this method to advance the tokenizer to the next delimiter. This
// returns false if the tokenizer is complete. This method must be called
// before calling any of the token* methods.
bool GetNext() {
if (quotes_.empty() && options_ == 0)
return QuickGetNext();
else
return FullGetNext();
}
// Start iterating through tokens from the beginning of the string.
void Reset() {
token_end_ = start_pos_;
}
// Returns true if token is a delimiter. When the tokenizer is constructed
// with the RETURN_DELIMS option, this method can be used to check if the
// returned token is actually a delimiter. Returns true before the first
// time GetNext() has been called, and after GetNext() returns false.
bool token_is_delim() const { return token_is_delim_; }
// If GetNext() returned true, then these methods may be used to read the
// value of the token.
const_iterator token_begin() const { return token_begin_; }
const_iterator token_end() const { return token_end_; }
str token() const { return str(token_begin_, token_end_); }
BasicStringPiece<str> token_piece() const {
return BasicStringPiece<str>(&*token_begin_,
std::distance(token_begin_, token_end_));
}
private:
void Init(const_iterator string_begin,
const_iterator string_end,
const str& delims) {
start_pos_ = string_begin;
token_begin_ = string_begin;
token_end_ = string_begin;
end_ = string_end;
delims_ = delims;
options_ = 0;
token_is_delim_ = true;
}
// Implementation of GetNext() for when we have no quote characters. We have
// two separate implementations because AdvanceOne() is a hot spot in large
// text files with large tokens.
bool QuickGetNext() {
token_is_delim_ = false;
for (;;) {
token_begin_ = token_end_;
if (token_end_ == end_) {
token_is_delim_ = true;
return false;
}
++token_end_;
if (delims_.find(*token_begin_) == str::npos)
break;
// else skip over delimiter.
}
while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
++token_end_;
return true;
}
// Implementation of GetNext() for when we have to take quotes into account.
bool FullGetNext() {
AdvanceState state;
for (;;) {
if (token_is_delim_) {
// Last token was a delimiter. Note: This is also the case at the start.
//
// ... D T T T T D ...
// ^ ^
// | |
// | |token_end_| : The next character to look at or |end_|.
// |
// |token_begin_| : Points to delimiter or |token_end_|.
//
// The next token is always a non-delimiting token. It could be empty,
// however.
token_is_delim_ = false;
token_begin_ = token_end_;
// Slurp all non-delimiter characters into the token.
while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) {
++token_end_;
}
// If it's non-empty, or empty tokens were requested, return the token.
if (token_begin_ != token_end_ || (options_ & RETURN_EMPTY_TOKENS))
return true;
}
DCHECK(!token_is_delim_);
// Last token was a regular token.
//
// ... T T T D T T ...
// ^ ^
// | |
// | token_end_ : The next character to look at. Always one
// | char beyond the token boundary.
// |
// token_begin_ : Points to beginning of token. Note: token could
// be empty, in which case
// token_begin_ == token_end_.
//
// The next token is always a delimiter. It could be |end_| however, but
// |end_| is also an implicit delimiter.
token_is_delim_ = true;
token_begin_ = token_end_;
if (token_end_ == end_)
return false;
// Look at the delimiter.
++token_end_;
if (options_ & RETURN_DELIMS)
return true;
}
return false;
}
bool IsDelim(char_type c) const {
return delims_.find(c) != str::npos;
}
bool IsQuote(char_type c) const {
return quotes_.find(c) != str::npos;
}
struct AdvanceState {
bool in_quote;
bool in_escape;
char_type quote_char;
AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}
};
// Returns true if a delimiter was not hit.
bool AdvanceOne(AdvanceState* state, char_type c) {
if (state->in_quote) {
if (state->in_escape) {
state->in_escape = false;
} else if (c == '\\') {
state->in_escape = true;
} else if (c == state->quote_char) {
state->in_quote = false;
}
} else {
if (IsDelim(c))
return false;
state->in_quote = IsQuote(state->quote_char = c);
}
return true;
}
const_iterator start_pos_;
const_iterator token_begin_;
const_iterator token_end_;
const_iterator end_;
str delims_;
str quotes_;
int options_;
bool token_is_delim_;
};
typedef StringTokenizerT<std::string, std::string::const_iterator>
StringTokenizer;
typedef StringTokenizerT<string16, string16::const_iterator> String16Tokenizer;
typedef StringTokenizerT<std::string, const char*> CStringTokenizer;
} // namespace base
#endif // BASE_STRINGS_STRING_TOKENIZER_H_

View file

@ -0,0 +1,59 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <string>
#include "base/strings/string_tokenizer.h"
void GetAllTokens(base::StringTokenizer& t) {
while (t.GetNext()) {
(void)t.token();
}
}
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
uint8_t size_t_bytes = sizeof(size_t);
if (size < size_t_bytes + 1) {
return 0;
}
// Calculate pattern size based on remaining bytes, otherwise fuzzing is
// inefficient with bailouts in most cases.
size_t pattern_size =
*reinterpret_cast<const size_t*>(data) % (size - size_t_bytes);
std::string pattern(reinterpret_cast<const char*>(data + size_t_bytes),
pattern_size);
std::string input(
reinterpret_cast<const char*>(data + size_t_bytes + pattern_size),
size - pattern_size - size_t_bytes);
// Allow quote_chars and options to be set. Otherwise full coverage
// won't be possible since IsQuote, FullGetNext and other functions
// won't be called.
for (bool return_delims : {false, true}) {
for (bool return_empty_strings : {false, true}) {
int options = 0;
if (return_delims)
options |= base::StringTokenizer::RETURN_DELIMS;
if (return_empty_strings)
options |= base::StringTokenizer::RETURN_EMPTY_TOKENS;
base::StringTokenizer t(input, pattern);
t.set_options(options);
GetAllTokens(t);
base::StringTokenizer t_quote(input, pattern);
t_quote.set_quote_chars("\"");
t_quote.set_options(options);
GetAllTokens(t_quote);
}
}
return 0;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,568 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file defines utility functions for working with strings.
#ifndef BASE_STRINGS_STRING_UTIL_H_
#define BASE_STRINGS_STRING_UTIL_H_
#include <ctype.h>
#include <stdarg.h> // va_list
#include <stddef.h>
#include <stdint.h>
#include <initializer_list>
#include <string>
#include <vector>
#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/stl_util.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h" // For implicit conversions.
#include "build/build_config.h"
namespace base {
// C standard-library functions that aren't cross-platform are provided as
// "base::...", and their prototypes are listed below. These functions are
// then implemented as inline calls to the platform-specific equivalents in the
// platform-specific headers.
// Wrapper for vsnprintf that always null-terminates and always returns the
// number of characters that would be in an untruncated formatted
// string, even when truncation occurs.
int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
PRINTF_FORMAT(3, 0);
// Some of these implementations need to be inlined.
// We separate the declaration from the implementation of this inline
// function just so the PRINTF_FORMAT works.
inline int snprintf(char* buffer, size_t size, const char* format, ...)
PRINTF_FORMAT(3, 4);
inline int snprintf(char* buffer, size_t size, const char* format, ...) {
va_list arguments;
va_start(arguments, format);
int result = vsnprintf(buffer, size, format, arguments);
va_end(arguments);
return result;
}
// BSD-style safe and consistent string copy functions.
// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
// long as |dst_size| is not 0. Returns the length of |src| in characters.
// If the return value is >= dst_size, then the output was truncated.
// NOTE: All sizes are in number of characters, NOT in bytes.
BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size);
BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
// Scan a wprintf format string to determine whether it's portable across a
// variety of systems. This function only checks that the conversion
// specifiers used by the format string are supported and have the same meaning
// on a variety of systems. It doesn't check for other errors that might occur
// within a format string.
//
// Nonportable conversion specifiers for wprintf are:
// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char
// data on all systems except Windows, which treat them as wchar_t data.
// Use %ls and %lc for wchar_t data instead.
// - 'S' and 'C', which operate on wchar_t data on all systems except Windows,
// which treat them as char data. Use %ls and %lc for wchar_t data
// instead.
// - 'F', which is not identified by Windows wprintf documentation.
// - 'D', 'O', and 'U', which are deprecated and not available on all systems.
// Use %ld, %lo, and %lu instead.
//
// Note that there is no portable conversion specifier for char data when
// working with wprintf.
//
// This function is intended to be called from base::vswprintf.
BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format);
// ASCII-specific tolower. The standard library's tolower is locale sensitive,
// so we don't want to use it here.
inline char ToLowerASCII(char c) {
return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
}
inline char16 ToLowerASCII(char16 c) {
return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
}
// ASCII-specific toupper. The standard library's toupper is locale sensitive,
// so we don't want to use it here.
inline char ToUpperASCII(char c) {
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
}
inline char16 ToUpperASCII(char16 c) {
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
}
// Converts the given string to it's ASCII-lowercase equivalent.
BASE_EXPORT std::string ToLowerASCII(StringPiece str);
BASE_EXPORT string16 ToLowerASCII(StringPiece16 str);
// Converts the given string to it's ASCII-uppercase equivalent.
BASE_EXPORT std::string ToUpperASCII(StringPiece str);
BASE_EXPORT string16 ToUpperASCII(StringPiece16 str);
// Functor for case-insensitive ASCII comparisons for STL algorithms like
// std::search.
//
// Note that a full Unicode version of this functor is not possible to write
// because case mappings might change the number of characters, depend on
// context (combining accents), and require handling UTF-16. If you need
// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
// use a normal operator== on the result.
template<typename Char> struct CaseInsensitiveCompareASCII {
public:
bool operator()(Char x, Char y) const {
return ToLowerASCII(x) == ToLowerASCII(y);
}
};
// Like strcasecmp for case-insensitive ASCII characters only. Returns:
// -1 (a < b)
// 0 (a == b)
// 1 (a > b)
// (unlike strcasecmp which can return values greater or less than 1/-1). For
// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
// and then just call the normal string operators on the result.
BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
// Equality for ASCII case-insensitive comparisons. For full Unicode support,
// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
// == or !=.
BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
// These threadsafe functions return references to globally unique empty
// strings.
//
// It is likely faster to construct a new empty string object (just a few
// instructions to set the length to 0) than to get the empty string instance
// returned by these functions (which requires threadsafe static access).
//
// Therefore, DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT
// CONSTRUCTORS. There is only one case where you should use these: functions
// which need to return a string by reference (e.g. as a class member
// accessor), and don't have an empty string to use (e.g. in an error case).
// These should not be used as initializers, function arguments, or return
// values for functions which return by value or outparam.
BASE_EXPORT const std::string& EmptyString();
BASE_EXPORT const string16& EmptyString16();
// Contains the set of characters representing whitespace in the corresponding
// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
// by HTML5, and don't include control characters.
BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.
BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.
BASE_EXPORT extern const char16 kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF.
BASE_EXPORT extern const char kWhitespaceASCII[];
BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.
// Null-terminated string representing the UTF-8 byte order mark.
BASE_EXPORT extern const char kUtf8ByteOrderMark[];
// Removes characters in |remove_chars| from anywhere in |input|. Returns true
// if any characters were removed. |remove_chars| must be null-terminated.
// NOTE: Safe to use the same variable for both |input| and |output|.
BASE_EXPORT bool RemoveChars(const string16& input,
StringPiece16 remove_chars,
string16* output);
BASE_EXPORT bool RemoveChars(const std::string& input,
StringPiece remove_chars,
std::string* output);
// Replaces characters in |replace_chars| from anywhere in |input| with
// |replace_with|. Each character in |replace_chars| will be replaced with
// the |replace_with| string. Returns true if any characters were replaced.
// |replace_chars| must be null-terminated.
// NOTE: Safe to use the same variable for both |input| and |output|.
BASE_EXPORT bool ReplaceChars(const string16& input,
StringPiece16 replace_chars,
StringPiece16 replace_with,
string16* output);
BASE_EXPORT bool ReplaceChars(const std::string& input,
StringPiece replace_chars,
StringPiece replace_with,
std::string* output);
enum TrimPositions {
TRIM_NONE = 0,
TRIM_LEADING = 1 << 0,
TRIM_TRAILING = 1 << 1,
TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
};
// Removes characters in |trim_chars| from the beginning and end of |input|.
// The 8-bit version only works on 8-bit characters, not UTF-8. Returns true if
// any characters were removed.
//
// It is safe to use the same variable for both |input| and |output| (this is
// the normal usage to trim in-place).
BASE_EXPORT bool TrimString(StringPiece16 input,
StringPiece16 trim_chars,
string16* output);
BASE_EXPORT bool TrimString(StringPiece input,
StringPiece trim_chars,
std::string* output);
// StringPiece versions of the above. The returned pieces refer to the original
// buffer.
BASE_EXPORT StringPiece16 TrimString(StringPiece16 input,
StringPiece16 trim_chars,
TrimPositions positions);
BASE_EXPORT StringPiece TrimString(StringPiece input,
StringPiece trim_chars,
TrimPositions positions);
// Truncates a string to the nearest UTF-8 character that will leave
// the string less than or equal to the specified byte size.
BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
const size_t byte_size,
std::string* output);
#if defined(WCHAR_T_IS_UTF16)
// Utility functions to access the underlying string buffer as a wide char
// pointer.
//
// Note: These functions violate strict aliasing when char16 and wchar_t are
// unrelated types. We thus pass -fno-strict-aliasing to the compiler on
// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2].
//
// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244
// [2]
// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949
inline wchar_t* as_writable_wcstr(char16* str) {
return reinterpret_cast<wchar_t*>(str);
}
inline wchar_t* as_writable_wcstr(string16& str) {
return reinterpret_cast<wchar_t*>(data(str));
}
inline const wchar_t* as_wcstr(const char16* str) {
return reinterpret_cast<const wchar_t*>(str);
}
inline const wchar_t* as_wcstr(StringPiece16 str) {
return reinterpret_cast<const wchar_t*>(str.data());
}
// Utility functions to access the underlying string buffer as a char16 pointer.
inline char16* as_writable_u16cstr(wchar_t* str) {
return reinterpret_cast<char16*>(str);
}
inline char16* as_writable_u16cstr(std::wstring& str) {
return reinterpret_cast<char16*>(data(str));
}
inline const char16* as_u16cstr(const wchar_t* str) {
return reinterpret_cast<const char16*>(str);
}
inline const char16* as_u16cstr(WStringPiece str) {
return reinterpret_cast<const char16*>(str.data());
}
// Utility functions to convert between base::WStringPiece and
// base::StringPiece16.
inline WStringPiece AsWStringPiece(StringPiece16 str) {
return WStringPiece(as_wcstr(str.data()), str.size());
}
inline StringPiece16 AsStringPiece16(WStringPiece str) {
return StringPiece16(as_u16cstr(str.data()), str.size());
}
inline std::wstring AsWString(StringPiece16 str) {
return std::wstring(as_wcstr(str.data()), str.size());
}
inline string16 AsString16(WStringPiece str) {
return string16(as_u16cstr(str.data()), str.size());
}
#endif // defined(WCHAR_T_IS_UTF16)
// Trims any whitespace from either end of the input string.
//
// The StringPiece versions return a substring referencing the input buffer.
// The ASCII versions look only for ASCII whitespace.
//
// The std::string versions return where whitespace was found.
// NOTE: Safe to use the same variable for both input and output.
BASE_EXPORT TrimPositions TrimWhitespace(StringPiece16 input,
TrimPositions positions,
string16* output);
BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,
TrimPositions positions);
BASE_EXPORT TrimPositions TrimWhitespaceASCII(StringPiece input,
TrimPositions positions,
std::string* output);
BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,
TrimPositions positions);
// Searches for CR or LF characters. Removes all contiguous whitespace
// strings that contain them. This is useful when trying to deal with text
// copied from terminals.
// Returns |text|, with the following three transformations:
// (1) Leading and trailing whitespace is trimmed.
// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
// sequences containing a CR or LF are trimmed.
// (3) All other whitespace sequences are converted to single spaces.
BASE_EXPORT string16 CollapseWhitespace(
const string16& text,
bool trim_sequences_with_line_breaks);
BASE_EXPORT std::string CollapseWhitespaceASCII(
const std::string& text,
bool trim_sequences_with_line_breaks);
// Returns true if |input| is empty or contains only characters found in
// |characters|.
BASE_EXPORT bool ContainsOnlyChars(StringPiece input, StringPiece characters);
BASE_EXPORT bool ContainsOnlyChars(StringPiece16 input,
StringPiece16 characters);
// Returns true if |str| is structurally valid UTF-8 and also doesn't
// contain any non-character code point (e.g. U+10FFFE). Prohibiting
// non-characters increases the likelihood of detecting non-UTF-8 in
// real-world text, for callers which do not need to accept
// non-characters in strings.
BASE_EXPORT bool IsStringUTF8(StringPiece str);
// Returns true if |str| contains valid UTF-8, allowing non-character
// code points.
BASE_EXPORT bool IsStringUTF8AllowingNoncharacters(StringPiece str);
// Returns true if |str| contains only valid ASCII character values.
// Note 1: IsStringASCII executes in time determined solely by the
// length of the string, not by its contents, so it is robust against
// timing attacks for all strings of equal length.
// Note 2: IsStringASCII assumes the input is likely all ASCII, and
// does not leave early if it is not the case.
BASE_EXPORT bool IsStringASCII(StringPiece str);
BASE_EXPORT bool IsStringASCII(StringPiece16 str);
#if defined(WCHAR_T_IS_UTF32)
BASE_EXPORT bool IsStringASCII(WStringPiece str);
#endif
// Compare the lower-case form of the given string against the given
// previously-lower-cased ASCII string (typically a constant).
BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str,
StringPiece lowecase_ascii);
BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str,
StringPiece lowecase_ascii);
// Performs a case-sensitive string compare of the given 16-bit string against
// the given 8-bit ASCII string (typically a constant). The behavior is
// undefined if the |ascii| string is not ASCII.
BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii);
// Indicates case sensitivity of comparisons. Only ASCII case insensitivity
// is supported. Full Unicode case-insensitive conversions would need to go in
// base/i18n so it can use ICU.
//
// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's
// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see
// base/i18n/case_conversion.h for usage advice) on the arguments, and then use
// the results to a case-sensitive comparison.
enum class CompareCase {
SENSITIVE,
INSENSITIVE_ASCII,
};
BASE_EXPORT bool StartsWith(StringPiece str,
StringPiece search_for,
CompareCase case_sensitivity);
BASE_EXPORT bool StartsWith(StringPiece16 str,
StringPiece16 search_for,
CompareCase case_sensitivity);
BASE_EXPORT bool EndsWith(StringPiece str,
StringPiece search_for,
CompareCase case_sensitivity);
BASE_EXPORT bool EndsWith(StringPiece16 str,
StringPiece16 search_for,
CompareCase case_sensitivity);
// Determines the type of ASCII character, independent of locale (the C
// library versions will change based on locale).
template <typename Char>
inline bool IsAsciiWhitespace(Char c) {
return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\f';
}
template <typename Char>
inline bool IsAsciiAlpha(Char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
template <typename Char>
inline bool IsAsciiUpper(Char c) {
return c >= 'A' && c <= 'Z';
}
template <typename Char>
inline bool IsAsciiLower(Char c) {
return c >= 'a' && c <= 'z';
}
template <typename Char>
inline bool IsAsciiDigit(Char c) {
return c >= '0' && c <= '9';
}
template <typename Char>
inline bool IsAsciiPrintable(Char c) {
return c >= ' ' && c <= '~';
}
template <typename Char>
inline bool IsHexDigit(Char c) {
return (c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f');
}
// Returns the integer corresponding to the given hex character. For example:
// '4' -> 4
// 'a' -> 10
// 'B' -> 11
// Assumes the input is a valid hex character. DCHECKs in debug builds if not.
BASE_EXPORT char HexDigitToInt(wchar_t c);
// Returns true if it's a Unicode whitespace character.
BASE_EXPORT bool IsUnicodeWhitespace(wchar_t c);
// Return a byte string in human-readable format with a unit suffix. Not
// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
// highly recommended instead. TODO(avi): Figure out how to get callers to use
// FormatBytes instead; remove this.
BASE_EXPORT string16 FormatBytesUnlocalized(int64_t bytes);
// Starting at |start_offset| (usually 0), replace the first instance of
// |find_this| with |replace_with|.
BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
base::string16* str,
size_t start_offset,
StringPiece16 find_this,
StringPiece16 replace_with);
BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
std::string* str,
size_t start_offset,
StringPiece find_this,
StringPiece replace_with);
// Starting at |start_offset| (usually 0), look through |str| and replace all
// instances of |find_this| with |replace_with|.
//
// This does entire substrings; use std::replace in <algorithm> for single
// characters, for example:
// std::replace(str.begin(), str.end(), 'a', 'b');
BASE_EXPORT void ReplaceSubstringsAfterOffset(
string16* str,
size_t start_offset,
StringPiece16 find_this,
StringPiece16 replace_with);
BASE_EXPORT void ReplaceSubstringsAfterOffset(
std::string* str,
size_t start_offset,
StringPiece find_this,
StringPiece replace_with);
// Reserves enough memory in |str| to accommodate |length_with_null| characters,
// sets the size of |str| to |length_with_null - 1| characters, and returns a
// pointer to the underlying contiguous array of characters. This is typically
// used when calling a function that writes results into a character array, but
// the caller wants the data to be managed by a string-like object. It is
// convenient in that is can be used inline in the call, and fast in that it
// avoids copying the results of the call from a char* into a string.
//
// Internally, this takes linear time because the resize() call 0-fills the
// underlying array for potentially all
// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we
// could avoid this aspect of the resize() call, as we expect the caller to
// immediately write over this memory, but there is no other way to set the size
// of the string, and not doing that will mean people who access |str| rather
// than str.c_str() will get back a string of whatever size |str| had on entry
// to this function (probably 0).
BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
// Joins a vector or list of strings into a single string, inserting |separator|
// (which may be empty) in between all elements.
//
// Note this is inverse of SplitString()/SplitStringPiece() defined in
// string_split.h.
//
// If possible, callers should build a vector of StringPieces and use the
// StringPiece variant, so that they do not create unnecessary copies of
// strings. For example, instead of using SplitString, modifying the vector,
// then using JoinString, use SplitStringPiece followed by JoinString so that no
// copies of those strings are created until the final join operation.
//
// Use StrCat (in base/strings/strcat.h) if you don't need a separator.
BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts,
StringPiece separator);
BASE_EXPORT string16 JoinString(const std::vector<string16>& parts,
StringPiece16 separator);
BASE_EXPORT std::string JoinString(const std::vector<StringPiece>& parts,
StringPiece separator);
BASE_EXPORT string16 JoinString(const std::vector<StringPiece16>& parts,
StringPiece16 separator);
// Explicit initializer_list overloads are required to break ambiguity when used
// with a literal initializer list (otherwise the compiler would not be able to
// decide between the string and StringPiece overloads).
BASE_EXPORT std::string JoinString(std::initializer_list<StringPiece> parts,
StringPiece separator);
BASE_EXPORT string16 JoinString(std::initializer_list<StringPiece16> parts,
StringPiece16 separator);
// Replace $1-$2-$3..$9 in the format string with values from |subst|.
// Additionally, any number of consecutive '$' characters is replaced by that
// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
// NULL. This only allows you to use up to nine replacements.
BASE_EXPORT string16 ReplaceStringPlaceholders(
const string16& format_string,
const std::vector<string16>& subst,
std::vector<size_t>* offsets);
BASE_EXPORT std::string ReplaceStringPlaceholders(
StringPiece format_string,
const std::vector<std::string>& subst,
std::vector<size_t>* offsets);
// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
const string16& a,
size_t* offset);
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input,
TrimPositions positions,
std::wstring* output);
BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input,
TrimPositions positions);
BASE_EXPORT bool TrimString(WStringPiece input,
WStringPiece trim_chars,
std::wstring* output);
BASE_EXPORT WStringPiece TrimString(WStringPiece input,
WStringPiece trim_chars,
TrimPositions positions);
BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
#endif
} // namespace base
#if defined(OS_WIN)
#include "base/strings/string_util_win.h"
#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
#include "base/strings/string_util_posix.h"
#else
#error Define string operations appropriately for your platform
#endif
#endif // BASE_STRINGS_STRING_UTIL_H_

View file

@ -0,0 +1,54 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/string_util.h"
namespace base {
#define WHITESPACE_ASCII_NO_CR_LF \
0x09, /* CHARACTER TABULATION */ \
0x0B, /* LINE TABULATION */ \
0x0C, /* FORM FEED (FF) */ \
0x20 /* SPACE */
#define WHITESPACE_ASCII \
WHITESPACE_ASCII_NO_CR_LF, /* Comment to make clang-format linebreak */ \
0x0A, /* LINE FEED (LF) */ \
0x0D /* CARRIAGE RETURN (CR) */
#define WHITESPACE_UNICODE_NON_ASCII \
0x0085, /* NEXT LINE (NEL) */ \
0x00A0, /* NO-BREAK SPACE */ \
0x1680, /* OGHAM SPACE MARK */ \
0x2000, /* EN QUAD */ \
0x2001, /* EM QUAD */ \
0x2002, /* EN SPACE */ \
0x2003, /* EM SPACE */ \
0x2004, /* THREE-PER-EM SPACE */ \
0x2005, /* FOUR-PER-EM SPACE */ \
0x2006, /* SIX-PER-EM SPACE */ \
0x2007, /* FIGURE SPACE */ \
0x2008, /* PUNCTUATION SPACE */ \
0x2009, /* THIN SPACE */ \
0x200A, /* HAIR SPACE */ \
0x2028, /* LINE SEPARATOR */ \
0x2029, /* PARAGRAPH SEPARATOR */ \
0x202F, /* NARROW NO-BREAK SPACE */ \
0x205F, /* MEDIUM MATHEMATICAL SPACE */ \
0x3000 /* IDEOGRAPHIC SPACE */
#define WHITESPACE_UNICODE_NO_CR_LF \
WHITESPACE_ASCII_NO_CR_LF, WHITESPACE_UNICODE_NON_ASCII
#define WHITESPACE_UNICODE WHITESPACE_ASCII, WHITESPACE_UNICODE_NON_ASCII
const wchar_t kWhitespaceWide[] = {WHITESPACE_UNICODE, 0};
const char16 kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0};
const char16 kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0};
const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0};
const char16 kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0};
const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
} // namespace base

View file

@ -0,0 +1,37 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRING_UTIL_POSIX_H_
#define BASE_STRINGS_STRING_UTIL_POSIX_H_
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <wchar.h>
#include "base/logging.h"
namespace base {
// Chromium code style is to not use malloc'd strings; this is only for use
// for interaction with APIs that require it.
inline char* strdup(const char* str) {
return ::strdup(str);
}
inline int vsnprintf(char* buffer, size_t size,
const char* format, va_list arguments) {
return ::vsnprintf(buffer, size, format, arguments);
}
inline int vswprintf(wchar_t* buffer, size_t size,
const wchar_t* format, va_list arguments) {
DCHECK(IsWprintfFormatPortable(format));
return ::vswprintf(buffer, size, format, arguments);
}
} // namespace base
#endif // BASE_STRINGS_STRING_UTIL_POSIX_H_

View file

@ -0,0 +1,44 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRING_UTIL_WIN_H_
#define BASE_STRINGS_STRING_UTIL_WIN_H_
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <wchar.h>
#include "base/logging.h"
namespace base {
// Chromium code style is to not use malloc'd strings; this is only for use
// for interaction with APIs that require it.
inline char* strdup(const char* str) {
return _strdup(str);
}
inline int vsnprintf(char* buffer, size_t size,
const char* format, va_list arguments) {
int length = vsnprintf_s(buffer, size, size - 1, format, arguments);
if (length < 0)
return _vscprintf(format, arguments);
return length;
}
inline int vswprintf(wchar_t* buffer, size_t size,
const wchar_t* format, va_list arguments) {
DCHECK(IsWprintfFormatPortable(format));
int length = _vsnwprintf_s(buffer, size, size - 1, format, arguments);
if (length < 0)
return _vscwprintf(format, arguments);
return length;
}
} // namespace base
#endif // BASE_STRINGS_STRING_UTIL_WIN_H_

View file

@ -0,0 +1,31 @@
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file defines preprocessor macros for stringizing preprocessor
// symbols (or their output) and manipulating preprocessor symbols
// that define strings.
#ifndef BASE_STRINGS_STRINGIZE_MACROS_H_
#define BASE_STRINGS_STRINGIZE_MACROS_H_
#include "build/build_config.h"
// This is not very useful as it does not expand defined symbols if
// called directly. Use its counterpart without the _NO_EXPANSION
// suffix, below.
#define STRINGIZE_NO_EXPANSION(x) #x
// Use this to quote the provided parameter, first expanding it if it
// is a preprocessor symbol.
//
// For example, if:
// #define A FOO
// #define B(x) myobj->FunctionCall(x)
//
// Then:
// STRINGIZE(A) produces "FOO"
// STRINGIZE(B(y)) produces "myobj->FunctionCall(y)"
#define STRINGIZE(x) STRINGIZE_NO_EXPANSION(x)
#endif // BASE_STRINGS_STRINGIZE_MACROS_H_

View file

@ -0,0 +1,225 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/stringprintf.h"
#include <errno.h>
#include <stddef.h>
#include <vector>
#include "base/scoped_clear_last_error.h"
#include "base/stl_util.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
namespace base {
namespace {
// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
// is the size of the buffer. These return the number of characters in the
// formatted string excluding the NUL terminator. If the buffer is not
// large enough to accommodate the formatted string without truncation, they
// return the number of characters that would be in the fully-formatted string
// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
inline int vsnprintfT(char* buffer,
size_t buf_size,
const char* format,
va_list argptr) {
return base::vsnprintf(buffer, buf_size, format, argptr);
}
#if defined(OS_WIN)
inline int vsnprintfT(wchar_t* buffer,
size_t buf_size,
const wchar_t* format,
va_list argptr) {
return base::vswprintf(buffer, buf_size, format, argptr);
}
inline int vsnprintfT(char16_t* buffer,
size_t buf_size,
const char16_t* format,
va_list argptr) {
return base::vswprintf(reinterpret_cast<wchar_t*>(buffer), buf_size,
reinterpret_cast<const wchar_t*>(format), argptr);
}
#endif
// Templatized backend for StringPrintF/StringAppendF. This does not finalize
// the va_list, the caller is expected to do that.
template <class CharT>
static void StringAppendVT(std::basic_string<CharT>* dst,
const CharT* format,
va_list ap) {
// First try with a small fixed size buffer.
// This buffer size should be kept in sync with StringUtilTest.GrowBoundary
// and StringUtilTest.StringPrintfBounds.
CharT stack_buf[1024];
va_list ap_copy;
va_copy(ap_copy, ap);
base::internal::ScopedClearLastError last_error;
int result = vsnprintfT(stack_buf, base::size(stack_buf), format, ap_copy);
va_end(ap_copy);
if (result >= 0 && result < static_cast<int>(base::size(stack_buf))) {
// It fit.
dst->append(stack_buf, result);
return;
}
// Repeatedly increase buffer size until it fits.
int mem_length = base::size(stack_buf);
while (true) {
if (result < 0) {
#if defined(OS_WIN)
// On Windows, vsnprintfT always returns the number of characters in a
// fully-formatted string, so if we reach this point, something else is
// wrong and no amount of buffer-doubling is going to fix it.
return;
#else
if (errno != 0 && errno != EOVERFLOW)
return;
// Try doubling the buffer size.
mem_length *= 2;
#endif
} else {
// We need exactly "result + 1" characters.
mem_length = result + 1;
}
if (mem_length > 32 * 1024 * 1024) {
// That should be plenty, don't try anything larger. This protects
// against huge allocations when using vsnprintfT implementations that
// return -1 for reasons other than overflow without setting errno.
DLOG(WARNING) << "Unable to printf the requested string due to size.";
return;
}
std::vector<CharT> mem_buf(mem_length);
// NOTE: You can only use a va_list once. Since we're in a while loop, we
// need to make a new copy each time so we don't use up the original.
va_copy(ap_copy, ap);
result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
va_end(ap_copy);
if ((result >= 0) && (result < mem_length)) {
// It fit.
dst->append(&mem_buf[0], result);
return;
}
}
}
} // namespace
std::string StringPrintf(const char* format, ...) {
va_list ap;
va_start(ap, format);
std::string result;
StringAppendV(&result, format, ap);
va_end(ap);
return result;
}
#if defined(OS_WIN)
std::wstring StringPrintf(const wchar_t* format, ...) {
va_list ap;
va_start(ap, format);
std::wstring result;
StringAppendV(&result, format, ap);
va_end(ap);
return result;
}
std::u16string StringPrintf(const char16_t* format, ...) {
va_list ap;
va_start(ap, format);
std::u16string result;
StringAppendV(&result, format, ap);
va_end(ap);
return result;
}
#endif
std::string StringPrintV(const char* format, va_list ap) {
std::string result;
StringAppendV(&result, format, ap);
return result;
}
const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
va_list ap;
va_start(ap, format);
dst->clear();
StringAppendV(dst, format, ap);
va_end(ap);
return *dst;
}
#if defined(OS_WIN)
const std::wstring& SStringPrintf(std::wstring* dst,
const wchar_t* format, ...) {
va_list ap;
va_start(ap, format);
dst->clear();
StringAppendV(dst, format, ap);
va_end(ap);
return *dst;
}
const std::u16string& SStringPrintf(std::u16string* dst,
const char16_t* format,
...) {
va_list ap;
va_start(ap, format);
dst->clear();
StringAppendV(dst, format, ap);
va_end(ap);
return *dst;
}
#endif
void StringAppendF(std::string* dst, const char* format, ...) {
va_list ap;
va_start(ap, format);
StringAppendV(dst, format, ap);
va_end(ap);
}
#if defined(OS_WIN)
void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
va_list ap;
va_start(ap, format);
StringAppendV(dst, format, ap);
va_end(ap);
}
void StringAppendF(std::u16string* dst, const char16_t* format, ...) {
va_list ap;
va_start(ap, format);
StringAppendV(dst, format, ap);
va_end(ap);
}
#endif
void StringAppendV(std::string* dst, const char* format, va_list ap) {
StringAppendVT(dst, format, ap);
}
#if defined(OS_WIN)
void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
StringAppendVT(dst, format, ap);
}
void StringAppendV(std::u16string* dst, const char16_t* format, va_list ap) {
StringAppendVT(dst, format, ap);
}
#endif
} // namespace base

View file

@ -0,0 +1,74 @@
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_STRINGPRINTF_H_
#define BASE_STRINGS_STRINGPRINTF_H_
#include <stdarg.h> // va_list
#include <string>
#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "build/build_config.h"
namespace base {
// Return a C++ string given printf-like input.
BASE_EXPORT std::string StringPrintf(const char* format, ...)
PRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
#if defined(OS_WIN)
// Note: Unfortunately compile time checking of the format string for UTF-16
// strings is not supported by any compiler, thus these functions should be used
// carefully and sparingly. Also applies to SStringPrintf and StringAppendV
// below.
BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...)
WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
BASE_EXPORT std::u16string StringPrintf(const char16_t* format, ...)
WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
#endif
// Return a C++ string given vprintf-like input.
BASE_EXPORT std::string StringPrintV(const char* format, va_list ap)
PRINTF_FORMAT(1, 0) WARN_UNUSED_RESULT;
// Store result into a supplied string and return it.
BASE_EXPORT const std::string& SStringPrintf(std::string* dst,
const char* format,
...) PRINTF_FORMAT(2, 3);
#if defined(OS_WIN)
BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst,
const wchar_t* format,
...) WPRINTF_FORMAT(2, 3);
BASE_EXPORT const std::u16string& SStringPrintf(std::u16string* dst,
const char16_t* format,
...) WPRINTF_FORMAT(2, 3);
#endif
// Append result to a supplied string.
BASE_EXPORT void StringAppendF(std::string* dst, const char* format, ...)
PRINTF_FORMAT(2, 3);
#if defined(OS_WIN)
BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...)
WPRINTF_FORMAT(2, 3);
BASE_EXPORT void StringAppendF(std::u16string* dst, const char16_t* format, ...)
WPRINTF_FORMAT(2, 3);
#endif
// Lower-level routine that takes a va_list and appends to a specified
// string. All other routines are just convenience wrappers around it.
BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap)
PRINTF_FORMAT(2, 0);
#if defined(OS_WIN)
BASE_EXPORT void StringAppendV(std::wstring* dst,
const wchar_t* format,
va_list ap) WPRINTF_FORMAT(2, 0);
BASE_EXPORT void StringAppendV(std::u16string* dst,
const char16_t* format,
va_list ap) WPRINTF_FORMAT(2, 0);
#endif
} // namespace base
#endif // BASE_STRINGS_STRINGPRINTF_H_

View file

@ -0,0 +1,93 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_SYS_STRING_CONVERSIONS_H_
#define BASE_STRINGS_SYS_STRING_CONVERSIONS_H_
// Provides system-dependent string type conversions for cases where it's
// necessary to not use ICU. Generally, you should not need this in Chrome,
// but it is used in some shared code. Dependencies should be minimal.
#include <stdint.h>
#include <string>
#include "base/base_export.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
#include "build/build_config.h"
#if defined(OS_MACOSX)
#include <CoreFoundation/CoreFoundation.h>
#ifdef __OBJC__
@class NSString;
#else
class NSString;
#endif
#endif // OS_MACOSX
namespace base {
// Converts between wide and UTF-8 representations of a string. On error, the
// result is system-dependent.
BASE_EXPORT std::string SysWideToUTF8(const std::wstring& wide)
WARN_UNUSED_RESULT;
BASE_EXPORT std::wstring SysUTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT;
// Converts between wide and the system multi-byte representations of a string.
// DANGER: This will lose information and can change (on Windows, this can
// change between reboots).
BASE_EXPORT std::string SysWideToNativeMB(const std::wstring& wide)
WARN_UNUSED_RESULT;
BASE_EXPORT std::wstring SysNativeMBToWide(StringPiece native_mb)
WARN_UNUSED_RESULT;
// Windows-specific ------------------------------------------------------------
#if defined(OS_WIN)
// Converts between 8-bit and wide strings, using the given code page. The
// code page identifier is one accepted by the Windows function
// MultiByteToWideChar().
BASE_EXPORT std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page)
WARN_UNUSED_RESULT;
BASE_EXPORT std::string SysWideToMultiByte(const std::wstring& wide,
uint32_t code_page)
WARN_UNUSED_RESULT;
#endif // defined(OS_WIN)
// Mac-specific ----------------------------------------------------------------
#if defined(OS_MACOSX)
// Converts between STL strings and CFStringRefs/NSStrings.
// Creates a string, and returns it with a refcount of 1. You are responsible
// for releasing it. Returns NULL on failure.
BASE_EXPORT CFStringRef SysUTF8ToCFStringRef(StringPiece utf8)
WARN_UNUSED_RESULT;
BASE_EXPORT CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16)
WARN_UNUSED_RESULT;
// Same, but returns an autoreleased NSString.
BASE_EXPORT NSString* SysUTF8ToNSString(StringPiece utf8) WARN_UNUSED_RESULT;
BASE_EXPORT NSString* SysUTF16ToNSString(StringPiece16 utf16)
WARN_UNUSED_RESULT;
// Converts a CFStringRef to an STL string. Returns an empty string on failure.
BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref)
WARN_UNUSED_RESULT;
BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref) WARN_UNUSED_RESULT;
// Same, but accepts NSString input. Converts nil NSString* to the appropriate
// string type of length 0.
BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref) WARN_UNUSED_RESULT;
BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT;
#endif // defined(OS_MACOSX)
} // namespace base
#endif // BASE_STRINGS_SYS_STRING_CONVERSIONS_H_

View file

@ -0,0 +1,183 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/sys_string_conversions.h"
#import <Foundation/Foundation.h>
#include <stddef.h>
#include <vector>
#include "base/mac/foundation_util.h"
#include "base/mac/scoped_cftyperef.h"
#include "base/strings/string_piece.h"
namespace base {
namespace {
// Convert the supplied CFString into the specified encoding, and return it as
// an STL string of the template type. Returns an empty string on failure.
//
// Do not assert in this function since it is used by the asssertion code!
template<typename StringType>
static StringType CFStringToSTLStringWithEncodingT(CFStringRef cfstring,
CFStringEncoding encoding) {
CFIndex length = CFStringGetLength(cfstring);
if (length == 0)
return StringType();
CFRange whole_string = CFRangeMake(0, length);
CFIndex out_size;
CFIndex converted = CFStringGetBytes(cfstring,
whole_string,
encoding,
0, // lossByte
false, // isExternalRepresentation
NULL, // buffer
0, // maxBufLen
&out_size);
if (converted == 0 || out_size == 0)
return StringType();
// out_size is the number of UInt8-sized units needed in the destination.
// A buffer allocated as UInt8 units might not be properly aligned to
// contain elements of StringType::value_type. Use a container for the
// proper value_type, and convert out_size by figuring the number of
// value_type elements per UInt8. Leave room for a NUL terminator.
typename StringType::size_type elements =
out_size * sizeof(UInt8) / sizeof(typename StringType::value_type) + 1;
std::vector<typename StringType::value_type> out_buffer(elements);
converted = CFStringGetBytes(cfstring,
whole_string,
encoding,
0, // lossByte
false, // isExternalRepresentation
reinterpret_cast<UInt8*>(&out_buffer[0]),
out_size,
NULL); // usedBufLen
if (converted == 0)
return StringType();
out_buffer[elements - 1] = '\0';
return StringType(&out_buffer[0], elements - 1);
}
// Given an STL string |in| with an encoding specified by |in_encoding|,
// convert it to |out_encoding| and return it as an STL string of the
// |OutStringType| template type. Returns an empty string on failure.
//
// Do not assert in this function since it is used by the asssertion code!
template<typename InStringType, typename OutStringType>
static OutStringType STLStringToSTLStringWithEncodingsT(
const InStringType& in,
CFStringEncoding in_encoding,
CFStringEncoding out_encoding) {
typename InStringType::size_type in_length = in.length();
if (in_length == 0)
return OutStringType();
base::ScopedCFTypeRef<CFStringRef> cfstring(CFStringCreateWithBytesNoCopy(
NULL,
reinterpret_cast<const UInt8*>(in.data()),
in_length * sizeof(typename InStringType::value_type),
in_encoding,
false,
kCFAllocatorNull));
if (!cfstring)
return OutStringType();
return CFStringToSTLStringWithEncodingT<OutStringType>(cfstring,
out_encoding);
}
// Given a StringPiece |in| with an encoding specified by |in_encoding|, return
// it as a CFStringRef. Returns NULL on failure.
template <typename StringType>
static CFStringRef StringPieceToCFStringWithEncodingsT(
BasicStringPiece<StringType> in,
CFStringEncoding in_encoding) {
const auto in_length = in.length();
if (in_length == 0)
return CFSTR("");
return CFStringCreateWithBytes(
kCFAllocatorDefault, reinterpret_cast<const UInt8*>(in.data()),
in_length * sizeof(typename BasicStringPiece<StringType>::value_type),
in_encoding, false);
}
// Specify the byte ordering explicitly, otherwise CFString will be confused
// when strings don't carry BOMs, as they typically won't.
static const CFStringEncoding kNarrowStringEncoding = kCFStringEncodingUTF8;
#ifdef __BIG_ENDIAN__
static const CFStringEncoding kMediumStringEncoding = kCFStringEncodingUTF16BE;
static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF32BE;
#elif defined(__LITTLE_ENDIAN__)
static const CFStringEncoding kMediumStringEncoding = kCFStringEncodingUTF16LE;
static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF32LE;
#endif // __LITTLE_ENDIAN__
} // namespace
// Do not assert in this function since it is used by the asssertion code!
std::string SysWideToUTF8(const std::wstring& wide) {
return STLStringToSTLStringWithEncodingsT<std::wstring, std::string>(
wide, kWideStringEncoding, kNarrowStringEncoding);
}
// Do not assert in this function since it is used by the asssertion code!
std::wstring SysUTF8ToWide(StringPiece utf8) {
return STLStringToSTLStringWithEncodingsT<StringPiece, std::wstring>(
utf8, kNarrowStringEncoding, kWideStringEncoding);
}
std::string SysWideToNativeMB(const std::wstring& wide) {
return SysWideToUTF8(wide);
}
std::wstring SysNativeMBToWide(StringPiece native_mb) {
return SysUTF8ToWide(native_mb);
}
CFStringRef SysUTF8ToCFStringRef(StringPiece utf8) {
return StringPieceToCFStringWithEncodingsT(utf8, kNarrowStringEncoding);
}
CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16) {
return StringPieceToCFStringWithEncodingsT(utf16, kMediumStringEncoding);
}
NSString* SysUTF8ToNSString(StringPiece utf8) {
return [mac::CFToNSCast(SysUTF8ToCFStringRef(utf8)) autorelease];
}
NSString* SysUTF16ToNSString(StringPiece16 utf16) {
return [mac::CFToNSCast(SysUTF16ToCFStringRef(utf16)) autorelease];
}
std::string SysCFStringRefToUTF8(CFStringRef ref) {
return CFStringToSTLStringWithEncodingT<std::string>(ref,
kNarrowStringEncoding);
}
string16 SysCFStringRefToUTF16(CFStringRef ref) {
return CFStringToSTLStringWithEncodingT<string16>(ref,
kMediumStringEncoding);
}
std::string SysNSStringToUTF8(NSString* nsstring) {
if (!nsstring)
return std::string();
return SysCFStringRefToUTF8(reinterpret_cast<CFStringRef>(nsstring));
}
string16 SysNSStringToUTF16(NSString* nsstring) {
if (!nsstring)
return string16();
return SysCFStringRefToUTF16(reinterpret_cast<CFStringRef>(nsstring));
}
} // namespace base

View file

@ -0,0 +1,162 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/sys_string_conversions.h"
#include <stddef.h>
#include <wchar.h>
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
namespace base {
std::string SysWideToUTF8(const std::wstring& wide) {
// In theory this should be using the system-provided conversion rather
// than our ICU, but this will do for now.
return WideToUTF8(wide);
}
std::wstring SysUTF8ToWide(StringPiece utf8) {
// In theory this should be using the system-provided conversion rather
// than our ICU, but this will do for now.
std::wstring out;
UTF8ToWide(utf8.data(), utf8.size(), &out);
return out;
}
#if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
// TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
// support and a better understanding of what calls these routines.
std::string SysWideToNativeMB(const std::wstring& wide) {
return WideToUTF8(wide);
}
std::wstring SysNativeMBToWide(StringPiece native_mb) {
return SysUTF8ToWide(native_mb);
}
#else
std::string SysWideToNativeMB(const std::wstring& wide) {
mbstate_t ps;
// Calculate the number of multi-byte characters. We walk through the string
// without writing the output, counting the number of multi-byte characters.
size_t num_out_chars = 0;
memset(&ps, 0, sizeof(ps));
for (auto src : wide) {
// Use a temp buffer since calling wcrtomb with an output of NULL does not
// calculate the output length.
char buf[16];
// Skip NULLs to avoid wcrtomb's special handling of them.
size_t res = src ? wcrtomb(buf, src, &ps) : 0;
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-1):
return std::string();
break;
case 0:
// We hit an embedded null byte, keep going.
++num_out_chars;
break;
default:
num_out_chars += res;
break;
}
}
if (num_out_chars == 0)
return std::string();
std::string out;
out.resize(num_out_chars);
// We walk the input string again, with |i| tracking the index of the
// wide input, and |j| tracking the multi-byte output.
memset(&ps, 0, sizeof(ps));
for (size_t i = 0, j = 0; i < wide.size(); ++i) {
const wchar_t src = wide[i];
// We don't want wcrtomb to do its funkiness for embedded NULLs.
size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-1):
return std::string();
break;
case 0:
// We hit an embedded null byte, keep going.
++j; // Output is already zeroed.
break;
default:
j += res;
break;
}
}
return out;
}
std::wstring SysNativeMBToWide(StringPiece native_mb) {
mbstate_t ps;
// Calculate the number of wide characters. We walk through the string
// without writing the output, counting the number of wide characters.
size_t num_out_chars = 0;
memset(&ps, 0, sizeof(ps));
for (size_t i = 0; i < native_mb.size(); ) {
const char* src = native_mb.data() + i;
size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps);
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-2):
case static_cast<size_t>(-1):
return std::wstring();
break;
case 0:
// We hit an embedded null byte, keep going.
i += 1;
FALLTHROUGH;
default:
i += res;
++num_out_chars;
break;
}
}
if (num_out_chars == 0)
return std::wstring();
std::wstring out;
out.resize(num_out_chars);
memset(&ps, 0, sizeof(ps)); // Clear the shift state.
// We walk the input string again, with |i| tracking the index of the
// multi-byte input, and |j| tracking the wide output.
for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
const char* src = native_mb.data() + i;
wchar_t* dst = &out[j];
size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-2):
case static_cast<size_t>(-1):
return std::wstring();
break;
case 0:
i += 1; // Skip null byte.
break;
default:
i += res;
break;
}
}
return out;
}
#endif // defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
} // namespace base

View file

@ -0,0 +1,71 @@
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/sys_string_conversions.h"
#include <windows.h>
#include <stdint.h>
#include "base/strings/string_piece.h"
namespace base {
// Do not assert in this function since it is used by the asssertion code!
std::string SysWideToUTF8(const std::wstring& wide) {
return SysWideToMultiByte(wide, CP_UTF8);
}
// Do not assert in this function since it is used by the asssertion code!
std::wstring SysUTF8ToWide(StringPiece utf8) {
return SysMultiByteToWide(utf8, CP_UTF8);
}
std::string SysWideToNativeMB(const std::wstring& wide) {
return SysWideToMultiByte(wide, CP_ACP);
}
std::wstring SysNativeMBToWide(StringPiece native_mb) {
return SysMultiByteToWide(native_mb, CP_ACP);
}
// Do not assert in this function since it is used by the asssertion code!
std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page) {
if (mb.empty())
return std::wstring();
int mb_length = static_cast<int>(mb.length());
// Compute the length of the buffer.
int charcount = MultiByteToWideChar(code_page, 0,
mb.data(), mb_length, NULL, 0);
if (charcount == 0)
return std::wstring();
std::wstring wide;
wide.resize(charcount);
MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount);
return wide;
}
// Do not assert in this function since it is used by the asssertion code!
std::string SysWideToMultiByte(const std::wstring& wide, uint32_t code_page) {
int wide_length = static_cast<int>(wide.length());
if (wide_length == 0)
return std::string();
// Compute the length of the buffer we'll need.
int charcount = WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
NULL, 0, NULL, NULL);
if (charcount == 0)
return std::string();
std::string mb;
mb.resize(charcount);
WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
&mb[0], charcount, NULL, NULL);
return mb;
}
} // namespace base

View file

@ -0,0 +1,264 @@
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/utf_offset_string_conversions.h"
#include <stdint.h>
#include <algorithm>
#include <memory>
#include "base/logging.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversion_utils.h"
namespace base {
OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
size_t original_length,
size_t output_length)
: original_offset(original_offset),
original_length(original_length),
output_length(output_length) {
}
// static
void OffsetAdjuster::AdjustOffsets(const Adjustments& adjustments,
std::vector<size_t>* offsets_for_adjustment,
size_t limit) {
DCHECK(offsets_for_adjustment);
for (auto& i : *offsets_for_adjustment)
AdjustOffset(adjustments, &i, limit);
}
// static
void OffsetAdjuster::AdjustOffset(const Adjustments& adjustments,
size_t* offset,
size_t limit) {
DCHECK(offset);
if (*offset == string16::npos)
return;
int adjustment = 0;
for (const auto& i : adjustments) {
if (*offset <= i.original_offset)
break;
if (*offset < (i.original_offset + i.original_length)) {
*offset = string16::npos;
return;
}
adjustment += static_cast<int>(i.original_length - i.output_length);
}
*offset -= adjustment;
if (*offset > limit)
*offset = string16::npos;
}
// static
void OffsetAdjuster::UnadjustOffsets(
const Adjustments& adjustments,
std::vector<size_t>* offsets_for_unadjustment) {
if (!offsets_for_unadjustment || adjustments.empty())
return;
for (auto& i : *offsets_for_unadjustment)
UnadjustOffset(adjustments, &i);
}
// static
void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,
size_t* offset) {
if (*offset == string16::npos)
return;
int adjustment = 0;
for (const auto& i : adjustments) {
if (*offset + adjustment <= i.original_offset)
break;
adjustment += static_cast<int>(i.original_length - i.output_length);
if ((*offset + adjustment) < (i.original_offset + i.original_length)) {
*offset = string16::npos;
return;
}
}
*offset += adjustment;
}
// static
void OffsetAdjuster::MergeSequentialAdjustments(
const Adjustments& first_adjustments,
Adjustments* adjustments_on_adjusted_string) {
auto adjusted_iter = adjustments_on_adjusted_string->begin();
auto first_iter = first_adjustments.begin();
// Simultaneously iterate over all |adjustments_on_adjusted_string| and
// |first_adjustments|, pushing adjustments at the end of
// |adjustments_builder| as we go. |shift| keeps track of the current number
// of characters collapsed by |first_adjustments| up to this point.
// |currently_collapsing| keeps track of the number of characters collapsed by
// |first_adjustments| into the current |adjusted_iter|'s length. These are
// characters that will change |shift| as soon as we're done processing the
// current |adjusted_iter|; they are not yet reflected in |shift|.
size_t shift = 0;
size_t currently_collapsing = 0;
// While we *could* update |adjustments_on_adjusted_string| in place by
// inserting new adjustments into the middle, we would be repeatedly calling
// |std::vector::insert|. That would cost O(n) time per insert, relative to
// distance from end of the string. By instead allocating
// |adjustments_builder| and calling |std::vector::push_back|, we only pay
// amortized constant time per push. We are trading space for time.
Adjustments adjustments_builder;
while (adjusted_iter != adjustments_on_adjusted_string->end()) {
if ((first_iter == first_adjustments.end()) ||
((adjusted_iter->original_offset + shift +
adjusted_iter->original_length) <= first_iter->original_offset)) {
// Entire |adjusted_iter| (accounting for its shift and including its
// whole original length) comes before |first_iter|.
//
// Correct the offset at |adjusted_iter| and move onto the next
// adjustment that needs revising.
adjusted_iter->original_offset += shift;
shift += currently_collapsing;
currently_collapsing = 0;
adjustments_builder.push_back(*adjusted_iter);
++adjusted_iter;
} else if ((adjusted_iter->original_offset + shift) >
first_iter->original_offset) {
// |first_iter| comes before the |adjusted_iter| (as adjusted by |shift|).
// It's not possible for the adjustments to overlap. (It shouldn't
// be possible that we have an |adjusted_iter->original_offset| that,
// when adjusted by the computed |shift|, is in the middle of
// |first_iter|'s output's length. After all, that would mean the
// current adjustment_on_adjusted_string somehow points to an offset
// that was supposed to have been eliminated by the first set of
// adjustments.)
DCHECK_LE(first_iter->original_offset + first_iter->output_length,
adjusted_iter->original_offset + shift);
// Add the |first_iter| to the full set of adjustments.
shift += first_iter->original_length - first_iter->output_length;
adjustments_builder.push_back(*first_iter);
++first_iter;
} else {
// The first adjustment adjusted something that then got further adjusted
// by the second set of adjustments. In other words, |first_iter| points
// to something in the range covered by |adjusted_iter|'s length (after
// accounting for |shift|). Precisely,
// adjusted_iter->original_offset + shift
// <=
// first_iter->original_offset
// <=
// adjusted_iter->original_offset + shift +
// adjusted_iter->original_length
// Modify the current |adjusted_iter| to include whatever collapsing
// happened in |first_iter|, then advance to the next |first_adjustments|
// because we dealt with the current one.
const int collapse = static_cast<int>(first_iter->original_length) -
static_cast<int>(first_iter->output_length);
// This function does not know how to deal with a string that expands and
// then gets modified, only strings that collapse and then get modified.
DCHECK_GT(collapse, 0);
adjusted_iter->original_length += collapse;
currently_collapsing += collapse;
++first_iter;
}
}
DCHECK_EQ(0u, currently_collapsing);
if (first_iter != first_adjustments.end()) {
// Only first adjustments are left. These do not need to be modified.
// (Their offsets are already correct with respect to the original string.)
// Append them all.
DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());
adjustments_builder.insert(adjustments_builder.end(), first_iter,
first_adjustments.end());
}
*adjustments_on_adjusted_string = std::move(adjustments_builder);
}
// Converts the given source Unicode character type to the given destination
// Unicode character type as a STL string. The given input buffer and size
// determine the source, and the given output STL string will be replaced by
// the result. If non-NULL, |adjustments| is set to reflect the all the
// alterations to the string that are not one-character-to-one-character.
// It will always be sorted by increasing offset.
template<typename SrcChar, typename DestStdString>
bool ConvertUnicode(const SrcChar* src,
size_t src_len,
DestStdString* output,
OffsetAdjuster::Adjustments* adjustments) {
if (adjustments)
adjustments->clear();
// ICU requires 32-bit numbers.
bool success = true;
int32_t src_len32 = static_cast<int32_t>(src_len);
for (int32_t i = 0; i < src_len32; i++) {
uint32_t code_point;
size_t original_i = i;
size_t chars_written = 0;
if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
chars_written = WriteUnicodeCharacter(code_point, output);
} else {
chars_written = WriteUnicodeCharacter(0xFFFD, output);
success = false;
}
// Only bother writing an adjustment if this modification changed the
// length of this character.
// NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
// character read, not after it (so that incrementing it in the loop
// increment will place it at the right location), so we need to account
// for that in determining the amount that was read.
if (adjustments && ((i - original_i + 1) != chars_written)) {
adjustments->push_back(OffsetAdjuster::Adjustment(
original_i, i - original_i + 1, chars_written));
}
}
return success;
}
bool UTF8ToUTF16WithAdjustments(
const char* src,
size_t src_len,
string16* output,
base::OffsetAdjuster::Adjustments* adjustments) {
PrepareForUTF16Or32Output(src, src_len, output);
return ConvertUnicode(src, src_len, output, adjustments);
}
string16 UTF8ToUTF16WithAdjustments(
const base::StringPiece& utf8,
base::OffsetAdjuster::Adjustments* adjustments) {
string16 result;
UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);
return result;
}
string16 UTF8ToUTF16AndAdjustOffsets(
const base::StringPiece& utf8,
std::vector<size_t>* offsets_for_adjustment) {
for (size_t& offset : *offsets_for_adjustment) {
if (offset > utf8.length())
offset = string16::npos;
}
OffsetAdjuster::Adjustments adjustments;
string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);
OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
return result;
}
std::string UTF16ToUTF8AndAdjustOffsets(
const base::StringPiece16& utf16,
std::vector<size_t>* offsets_for_adjustment) {
for (size_t& offset : *offsets_for_adjustment) {
if (offset > utf16.length())
offset = string16::npos;
}
std::string result;
PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
OffsetAdjuster::Adjustments adjustments;
ConvertUnicode(utf16.data(), utf16.length(), &result, &adjustments);
OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
return result;
}
} // namespace base

View file

@ -0,0 +1,114 @@
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
#include <stddef.h>
#include <string>
#include <vector>
#include "base/base_export.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
namespace base {
// A helper class and associated data structures to adjust offsets into a
// string in response to various adjustments one might do to that string
// (e.g., eliminating a range). For details on offsets, see the comments by
// the AdjustOffsets() function below.
class BASE_EXPORT OffsetAdjuster {
public:
struct BASE_EXPORT Adjustment {
Adjustment(size_t original_offset,
size_t original_length,
size_t output_length);
size_t original_offset;
size_t original_length;
size_t output_length;
};
typedef std::vector<Adjustment> Adjustments;
// Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
// recorded in |adjustments|. Adjusted offsets greater than |limit| will be
// set to string16::npos.
//
// Offsets represents insertion/selection points between characters: if |src|
// is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
// end of the string. Valid input offsets range from 0 to |src_len|. On
// exit, each offset will have been modified to point at the same logical
// position in the output string. If an offset cannot be successfully
// adjusted (e.g., because it points into the middle of a multibyte sequence),
// it will be set to string16::npos.
static void AdjustOffsets(const Adjustments& adjustments,
std::vector<size_t>* offsets_for_adjustment,
size_t limit = string16::npos);
// Adjusts the single |offset| to reflect the adjustments recorded in
// |adjustments|.
static void AdjustOffset(const Adjustments& adjustments,
size_t* offset,
size_t limit = string16::npos);
// Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
// of the adjustments recorded in |adjustments|. In other words, the offsets
// provided represent offsets into an adjusted string and the caller wants
// to know the offsets they correspond to in the original string. If an
// offset cannot be successfully unadjusted (e.g., because it points into
// the middle of a multibyte sequence), it will be set to string16::npos.
static void UnadjustOffsets(const Adjustments& adjustments,
std::vector<size_t>* offsets_for_unadjustment);
// Adjusts the single |offset| to reflect the reverse of the adjustments
// recorded in |adjustments|.
static void UnadjustOffset(const Adjustments& adjustments,
size_t* offset);
// Combines two sequential sets of adjustments, storing the combined revised
// adjustments in |adjustments_on_adjusted_string|. That is, suppose a
// string was altered in some way, with the alterations recorded as
// adjustments in |first_adjustments|. Then suppose the resulting string is
// further altered, with the alterations recorded as adjustments scored in
// |adjustments_on_adjusted_string|, with the offsets recorded in these
// adjustments being with respect to the intermediate string. This function
// combines the two sets of adjustments into one, storing the result in
// |adjustments_on_adjusted_string|, whose offsets are correct with respect
// to the original string.
//
// Assumes both parameters are sorted by increasing offset.
//
// WARNING: Only supports |first_adjustments| that involve collapsing ranges
// of text, not expanding ranges.
static void MergeSequentialAdjustments(
const Adjustments& first_adjustments,
Adjustments* adjustments_on_adjusted_string);
};
// Like the conversions in utf_string_conversions.h, but also fills in an
// |adjustments| parameter that reflects the alterations done to the string.
// It may be NULL.
BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
const char* src,
size_t src_len,
string16* output,
base::OffsetAdjuster::Adjustments* adjustments);
BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
const base::StringPiece& utf8,
base::OffsetAdjuster::Adjustments* adjustments) WARN_UNUSED_RESULT;
// As above, but instead internally examines the adjustments and applies them
// to |offsets_for_adjustment|. Input offsets greater than the length of the
// input string will be set to string16::npos. See comments by AdjustOffsets().
BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
const base::StringPiece& utf8,
std::vector<size_t>* offsets_for_adjustment);
BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
const base::StringPiece16& utf16,
std::vector<size_t>* offsets_for_adjustment);
} // namespace base
#endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_

View file

@ -0,0 +1,155 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/utf_string_conversion_utils.h"
#include "base/third_party/icu/icu_utf.h"
#include "build/build_config.h"
namespace base {
// ReadUnicodeCharacter --------------------------------------------------------
bool ReadUnicodeCharacter(const char* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point_out) {
// U8_NEXT expects to be able to use -1 to signal an error, so we must
// use a signed type for code_point. But this function returns false
// on error anyway, so code_point_out is unsigned.
int32_t code_point;
CBU8_NEXT(src, *char_index, src_len, code_point);
*code_point_out = static_cast<uint32_t>(code_point);
// The ICU macro above moves to the next char, we want to point to the last
// char consumed.
(*char_index)--;
// Validate the decoded value.
return IsValidCodepoint(code_point);
}
bool ReadUnicodeCharacter(const char16* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point) {
if (CBU16_IS_SURROGATE(src[*char_index])) {
if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
*char_index + 1 >= src_len ||
!CBU16_IS_TRAIL(src[*char_index + 1])) {
// Invalid surrogate pair.
return false;
}
// Valid surrogate pair.
*code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index],
src[*char_index + 1]);
(*char_index)++;
} else {
// Not a surrogate, just one 16-bit word.
*code_point = src[*char_index];
}
return IsValidCodepoint(*code_point);
}
#if defined(WCHAR_T_IS_UTF32)
bool ReadUnicodeCharacter(const wchar_t* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point) {
// Conversion is easy since the source is 32-bit.
*code_point = src[*char_index];
// Validate the value.
return IsValidCodepoint(*code_point);
}
#endif // defined(WCHAR_T_IS_UTF32)
// WriteUnicodeCharacter -------------------------------------------------------
size_t WriteUnicodeCharacter(uint32_t code_point, std::string* output) {
if (code_point <= 0x7f) {
// Fast path the common case of one byte.
output->push_back(static_cast<char>(code_point));
return 1;
}
// CBU8_APPEND_UNSAFE can append up to 4 bytes.
size_t char_offset = output->length();
size_t original_char_offset = char_offset;
output->resize(char_offset + CBU8_MAX_LENGTH);
CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
// CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
// it will represent the new length of the string.
output->resize(char_offset);
return char_offset - original_char_offset;
}
size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) {
if (CBU16_LENGTH(code_point) == 1) {
// Thie code point is in the Basic Multilingual Plane (BMP).
output->push_back(static_cast<char16>(code_point));
return 1;
}
// Non-BMP characters use a double-character encoding.
size_t char_offset = output->length();
output->resize(char_offset + CBU16_MAX_LENGTH);
CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
return CBU16_MAX_LENGTH;
}
// Generalized Unicode converter -----------------------------------------------
template<typename CHAR>
void PrepareForUTF8Output(const CHAR* src,
size_t src_len,
std::string* output) {
output->clear();
if (src_len == 0)
return;
if (src[0] < 0x80) {
// Assume that the entire input will be ASCII.
output->reserve(src_len);
} else {
// Assume that the entire input is non-ASCII and will have 3 bytes per char.
output->reserve(src_len * 3);
}
}
// Instantiate versions we know callers will need.
#if !defined(OS_WIN)
// wchar_t and char16 are the same thing on Windows.
template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*);
#endif
template void PrepareForUTF8Output(const char16*, size_t, std::string*);
template<typename STRING>
void PrepareForUTF16Or32Output(const char* src,
size_t src_len,
STRING* output) {
output->clear();
if (src_len == 0)
return;
if (static_cast<unsigned char>(src[0]) < 0x80) {
// Assume the input is all ASCII, which means 1:1 correspondence.
output->reserve(src_len);
} else {
// Otherwise assume that the UTF-8 sequences will have 2 bytes for each
// character.
output->reserve(src_len / 2);
}
}
// Instantiate versions we know callers will need.
#if !defined(OS_WIN)
// std::wstring and string16 are the same thing on Windows.
template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*);
#endif
template void PrepareForUTF16Or32Output(const char*, size_t, string16*);
} // namespace base

View file

@ -0,0 +1,103 @@
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
#define BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
// Low-level UTF handling functions. Most code will want to use the functions
// in utf_string_conversions.h
#include <stddef.h>
#include <stdint.h>
#include "base/base_export.h"
#include "base/strings/string16.h"
namespace base {
inline bool IsValidCodepoint(uint32_t code_point) {
// Excludes code points that are not Unicode scalar values, i.e.
// surrogate code points ([0xD800, 0xDFFF]). Additionally, excludes
// code points larger than 0x10FFFF (the highest codepoint allowed).
// Non-characters and unassigned code points are allowed.
// https://unicode.org/glossary/#unicode_scalar_value
return code_point < 0xD800u ||
(code_point >= 0xE000u && code_point <= 0x10FFFFu);
}
inline bool IsValidCharacter(uint32_t code_point) {
// Excludes non-characters (U+FDD0..U+FDEF, and all code points
// ending in 0xFFFE or 0xFFFF) from the set of valid code points.
// https://unicode.org/faq/private_use.html#nonchar1
return code_point < 0xD800u || (code_point >= 0xE000u &&
code_point < 0xFDD0u) || (code_point > 0xFDEFu &&
code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu);
}
// ReadUnicodeCharacter --------------------------------------------------------
// Reads a UTF-8 stream, placing the next code point into the given output
// |*code_point|. |src| represents the entire string to read, and |*char_index|
// is the character offset within the string to start reading at. |*char_index|
// will be updated to index the last character read, such that incrementing it
// (as in a for loop) will take the reader to the next character.
//
// Returns true on success. On false, |*code_point| will be invalid.
BASE_EXPORT bool ReadUnicodeCharacter(const char* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point_out);
// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
BASE_EXPORT bool ReadUnicodeCharacter(const char16* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point);
#if defined(WCHAR_T_IS_UTF32)
// Reads UTF-32 character. The usage is the same as the 8-bit version above.
BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src,
int32_t src_len,
int32_t* char_index,
uint32_t* code_point);
#endif // defined(WCHAR_T_IS_UTF32)
// WriteUnicodeCharacter -------------------------------------------------------
// Appends a UTF-8 character to the given 8-bit string. Returns the number of
// bytes written.
BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
std::string* output);
// Appends the given code point as a UTF-16 character to the given 16-bit
// string. Returns the number of 16-bit values written.
BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, string16* output);
#if defined(WCHAR_T_IS_UTF32)
// Appends the given UTF-32 character to the given 32-bit string. Returns the
// number of 32-bit values written.
inline size_t WriteUnicodeCharacter(uint32_t code_point, std::wstring* output) {
// This is the easy case, just append the character.
output->push_back(code_point);
return 1;
}
#endif // defined(WCHAR_T_IS_UTF32)
// Generalized Unicode converter -----------------------------------------------
// Guesses the length of the output in UTF-8 in bytes, clears that output
// string, and reserves that amount of space. We assume that the input
// character types are unsigned, which will be true for UTF-16 and -32 on our
// systems.
template<typename CHAR>
void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output);
// Prepares an output buffer (containing either UTF-16 or -32 data) given some
// UTF-8 input that will be converted to it. See PrepareForUTF8Output().
template<typename STRING>
void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output);
} // namespace base
#endif // BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_

View file

@ -0,0 +1,342 @@
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/utf_string_conversions.h"
#include <limits.h>
#include <stdint.h>
#include <type_traits>
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/third_party/icu/icu_utf.h"
#include "build/build_config.h"
namespace base {
namespace {
constexpr int32_t kErrorCodePoint = 0xFFFD;
// Size coefficient ----------------------------------------------------------
// The maximum number of codeunits in the destination encoding corresponding to
// one codeunit in the source encoding.
template <typename SrcChar, typename DestChar>
struct SizeCoefficient {
static_assert(sizeof(SrcChar) < sizeof(DestChar),
"Default case: from a smaller encoding to the bigger one");
// ASCII symbols are encoded by one codeunit in all encodings.
static constexpr int value = 1;
};
template <>
struct SizeCoefficient<char16, char> {
// One UTF-16 codeunit corresponds to at most 3 codeunits in UTF-8.
static constexpr int value = 3;
};
#if defined(WCHAR_T_IS_UTF32)
template <>
struct SizeCoefficient<wchar_t, char> {
// UTF-8 uses at most 4 codeunits per character.
static constexpr int value = 4;
};
template <>
struct SizeCoefficient<wchar_t, char16> {
// UTF-16 uses at most 2 codeunits per character.
static constexpr int value = 2;
};
#endif // defined(WCHAR_T_IS_UTF32)
template <typename SrcChar, typename DestChar>
constexpr int size_coefficient_v =
SizeCoefficient<std::decay_t<SrcChar>, std::decay_t<DestChar>>::value;
// UnicodeAppendUnsafe --------------------------------------------------------
// Function overloads that write code_point to the output string. Output string
// has to have enough space for the codepoint.
// Convenience typedef that checks whether the passed in type is integral (i.e.
// bool, char, int or their extended versions) and is of the correct size.
template <typename Char, size_t N>
using EnableIfBitsAre = std::enable_if_t<std::is_integral<Char>::value &&
CHAR_BIT * sizeof(Char) == N,
bool>;
template <typename Char, EnableIfBitsAre<Char, 8> = true>
void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) {
CBU8_APPEND_UNSAFE(out, *size, code_point);
}
template <typename Char, EnableIfBitsAre<Char, 16> = true>
void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) {
CBU16_APPEND_UNSAFE(out, *size, code_point);
}
template <typename Char, EnableIfBitsAre<Char, 32> = true>
void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) {
out[(*size)++] = code_point;
}
// DoUTFConversion ------------------------------------------------------------
// Main driver of UTFConversion specialized for different Src encodings.
// dest has to have enough room for the converted text.
template <typename DestChar>
bool DoUTFConversion(const char* src,
int32_t src_len,
DestChar* dest,
int32_t* dest_len) {
bool success = true;
for (int32_t i = 0; i < src_len;) {
int32_t code_point;
CBU8_NEXT(src, i, src_len, code_point);
if (!IsValidCodepoint(code_point)) {
success = false;
code_point = kErrorCodePoint;
}
UnicodeAppendUnsafe(dest, dest_len, code_point);
}
return success;
}
template <typename DestChar>
bool DoUTFConversion(const char16* src,
int32_t src_len,
DestChar* dest,
int32_t* dest_len) {
bool success = true;
auto ConvertSingleChar = [&success](char16 in) -> int32_t {
if (!CBU16_IS_SINGLE(in) || !IsValidCodepoint(in)) {
success = false;
return kErrorCodePoint;
}
return in;
};
int32_t i = 0;
// Always have another symbol in order to avoid checking boundaries in the
// middle of the surrogate pair.
while (i < src_len - 1) {
int32_t code_point;
if (CBU16_IS_LEAD(src[i]) && CBU16_IS_TRAIL(src[i + 1])) {
code_point = CBU16_GET_SUPPLEMENTARY(src[i], src[i + 1]);
if (!IsValidCodepoint(code_point)) {
code_point = kErrorCodePoint;
success = false;
}
i += 2;
} else {
code_point = ConvertSingleChar(src[i]);
++i;
}
UnicodeAppendUnsafe(dest, dest_len, code_point);
}
if (i < src_len)
UnicodeAppendUnsafe(dest, dest_len, ConvertSingleChar(src[i]));
return success;
}
#if defined(WCHAR_T_IS_UTF32)
template <typename DestChar>
bool DoUTFConversion(const wchar_t* src,
int32_t src_len,
DestChar* dest,
int32_t* dest_len) {
bool success = true;
for (int32_t i = 0; i < src_len; ++i) {
int32_t code_point = src[i];
if (!IsValidCodepoint(code_point)) {
success = false;
code_point = kErrorCodePoint;
}
UnicodeAppendUnsafe(dest, dest_len, code_point);
}
return success;
}
#endif // defined(WCHAR_T_IS_UTF32)
// UTFConversion --------------------------------------------------------------
// Function template for generating all UTF conversions.
template <typename InputString, typename DestString>
bool UTFConversion(const InputString& src_str, DestString* dest_str) {
if (IsStringASCII(src_str)) {
dest_str->assign(src_str.begin(), src_str.end());
return true;
}
dest_str->resize(src_str.length() *
size_coefficient_v<typename InputString::value_type,
typename DestString::value_type>);
// Empty string is ASCII => it OK to call operator[].
auto* dest = &(*dest_str)[0];
// ICU requires 32 bit numbers.
int32_t src_len32 = static_cast<int32_t>(src_str.length());
int32_t dest_len32 = 0;
bool res = DoUTFConversion(src_str.data(), src_len32, dest, &dest_len32);
dest_str->resize(dest_len32);
dest_str->shrink_to_fit();
return res;
}
} // namespace
// UTF16 <-> UTF8 --------------------------------------------------------------
bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
return UTFConversion(StringPiece(src, src_len), output);
}
string16 UTF8ToUTF16(StringPiece utf8) {
string16 ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
UTF8ToUTF16(utf8.data(), utf8.size(), &ret);
return ret;
}
bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
return UTFConversion(StringPiece16(src, src_len), output);
}
std::string UTF16ToUTF8(StringPiece16 utf16) {
std::string ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
return ret;
}
// UTF-16 <-> Wide -------------------------------------------------------------
#if defined(WCHAR_T_IS_UTF16)
// When wide == UTF-16 the conversions are a NOP.
bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
output->assign(src, src + src_len);
return true;
}
string16 WideToUTF16(WStringPiece wide) {
return string16(wide.begin(), wide.end());
}
bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
output->assign(src, src + src_len);
return true;
}
std::wstring UTF16ToWide(StringPiece16 utf16) {
return std::wstring(utf16.begin(), utf16.end());
}
#elif defined(WCHAR_T_IS_UTF32)
bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
return UTFConversion(base::WStringPiece(src, src_len), output);
}
string16 WideToUTF16(WStringPiece wide) {
string16 ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
WideToUTF16(wide.data(), wide.length(), &ret);
return ret;
}
bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
return UTFConversion(StringPiece16(src, src_len), output);
}
std::wstring UTF16ToWide(StringPiece16 utf16) {
std::wstring ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
UTF16ToWide(utf16.data(), utf16.length(), &ret);
return ret;
}
#endif // defined(WCHAR_T_IS_UTF32)
// UTF-8 <-> Wide --------------------------------------------------------------
// UTF8ToWide is the same code, regardless of whether wide is 16 or 32 bits
bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
return UTFConversion(StringPiece(src, src_len), output);
}
std::wstring UTF8ToWide(StringPiece utf8) {
std::wstring ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
UTF8ToWide(utf8.data(), utf8.length(), &ret);
return ret;
}
#if defined(WCHAR_T_IS_UTF16)
// Easy case since we can use the "utf" versions we already wrote above.
bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
return UTF16ToUTF8(as_u16cstr(src), src_len, output);
}
std::string WideToUTF8(WStringPiece wide) {
return UTF16ToUTF8(StringPiece16(as_u16cstr(wide), wide.size()));
}
#elif defined(WCHAR_T_IS_UTF32)
bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
return UTFConversion(WStringPiece(src, src_len), output);
}
std::string WideToUTF8(WStringPiece wide) {
std::string ret;
// Ignore the success flag of this call, it will do the best it can for
// invalid input, which is what we want here.
WideToUTF8(wide.data(), wide.length(), &ret);
return ret;
}
#endif // defined(WCHAR_T_IS_UTF32)
string16 ASCIIToUTF16(StringPiece ascii) {
DCHECK(IsStringASCII(ascii)) << ascii;
return string16(ascii.begin(), ascii.end());
}
std::string UTF16ToASCII(StringPiece16 utf16) {
DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
return std::string(utf16.begin(), utf16.end());
}
} // namespace base

View file

@ -0,0 +1,54 @@
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
#define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
#include <stddef.h>
#include <string>
#include "base/base_export.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
namespace base {
// These convert between UTF-8, -16, and -32 strings. They are potentially slow,
// so avoid unnecessary conversions. The low-level versions return a boolean
// indicating whether the conversion was 100% valid. In this case, it will still
// do the best it can and put the result in the output buffer. The versions that
// return strings ignore this error and just return the best conversion
// possible.
BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len,
std::string* output);
BASE_EXPORT std::string WideToUTF8(WStringPiece wide) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len,
std::wstring* output);
BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT;
BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len,
string16* output);
BASE_EXPORT string16 WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len,
std::wstring* output);
BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output);
BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT;
BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len,
std::string* output);
BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16) WARN_UNUSED_RESULT;
// This converts an ASCII string, typically a hardcoded constant, to a UTF16
// string.
BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT;
// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
// beforehand.
BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT;
} // namespace base
#endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_

View file

@ -0,0 +1,57 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/macros.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
std::string output_std_string;
std::wstring output_std_wstring;
base::string16 output_string16;
// Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
size);
ignore_result(base::UTF8ToWide(string_piece_input));
base::UTF8ToWide(reinterpret_cast<const char*>(data), size,
&output_std_wstring);
ignore_result(base::UTF8ToUTF16(string_piece_input));
base::UTF8ToUTF16(reinterpret_cast<const char*>(data), size,
&output_string16);
// Test for char16.
if (size % 2 == 0) {
base::StringPiece16 string_piece_input16(
reinterpret_cast<const base::char16*>(data), size / 2);
ignore_result(base::UTF16ToWide(output_string16));
base::UTF16ToWide(reinterpret_cast<const base::char16*>(data), size / 2,
&output_std_wstring);
ignore_result(base::UTF16ToUTF8(string_piece_input16));
base::UTF16ToUTF8(reinterpret_cast<const base::char16*>(data), size / 2,
&output_std_string);
}
// Test for wchar_t.
size_t wchar_t_size = sizeof(wchar_t);
if (size % wchar_t_size == 0) {
ignore_result(base::WideToUTF8(output_std_wstring));
base::WideToUTF8(reinterpret_cast<const wchar_t*>(data),
size / wchar_t_size, &output_std_string);
ignore_result(base::WideToUTF16(output_std_wstring));
base::WideToUTF16(reinterpret_cast<const wchar_t*>(data),
size / wchar_t_size, &output_string16);
}
// Test for ASCII. This condition is needed to avoid hitting instant CHECK
// failures.
if (base::IsStringASCII(string_piece_input)) {
output_string16 = base::ASCIIToUTF16(string_piece_input);
base::StringPiece16 string_piece_input16(output_string16);
ignore_result(base::UTF16ToASCII(string_piece_input16));
}
return 0;
}