Repo created
This commit is contained in:
parent
81b91f4139
commit
f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions
2
TMessagesProj/jni/voip/webrtc/base/strings/OWNERS
Normal file
2
TMessagesProj/jni/voip/webrtc/base/strings/OWNERS
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
per-file safe_sprintf*=jln@chromium.org
|
||||
per-file safe_sprintf*=mdempsky@chromium.org
|
||||
92
TMessagesProj/jni/voip/webrtc/base/strings/char_traits.h
Normal file
92
TMessagesProj/jni/voip/webrtc/base/strings/char_traits.h
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_CHAR_TRAITS_H_
|
||||
#define BASE_STRINGS_CHAR_TRAITS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "base/compiler_specific.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// constexpr version of http://en.cppreference.com/w/cpp/string/char_traits.
|
||||
// This currently just implements the bits needed to support a (mostly)
|
||||
// constexpr StringPiece.
|
||||
//
|
||||
// TODO(dcheng): Once we switch to C++17, most methods will become constexpr and
|
||||
// we can switch over to using the one in the standard library.
|
||||
template <typename T>
|
||||
struct CharTraits {
|
||||
// Performs a lexographical comparison of the first N characters of |s1| and
|
||||
// |s2|. Returns 0 if equal, -1 if |s1| is less than |s2|, and 1 if |s1| is
|
||||
// greater than |s2|.
|
||||
static constexpr int compare(const T* s1, const T* s2, size_t n) noexcept;
|
||||
|
||||
// Returns the length of |s|, assuming null termination (and not including the
|
||||
// terminating null).
|
||||
static constexpr size_t length(const T* s) noexcept;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
constexpr int CharTraits<T>::compare(const T* s1,
|
||||
const T* s2,
|
||||
size_t n) noexcept {
|
||||
for (; n; --n, ++s1, ++s2) {
|
||||
if (*s1 < *s2)
|
||||
return -1;
|
||||
if (*s1 > *s2)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr size_t CharTraits<T>::length(const T* s) noexcept {
|
||||
size_t i = 0;
|
||||
for (; *s; ++s)
|
||||
++i;
|
||||
return i;
|
||||
}
|
||||
|
||||
// char specialization of CharTraits that can use clang's constexpr instrinsics,
|
||||
// where available.
|
||||
template <>
|
||||
struct CharTraits<char> {
|
||||
static constexpr int compare(const char* s1,
|
||||
const char* s2,
|
||||
size_t n) noexcept;
|
||||
static constexpr size_t length(const char* s) noexcept;
|
||||
};
|
||||
|
||||
constexpr int CharTraits<char>::compare(const char* s1,
|
||||
const char* s2,
|
||||
size_t n) noexcept {
|
||||
#if HAS_FEATURE(cxx_constexpr_string_builtins)
|
||||
return __builtin_memcmp(s1, s2, n);
|
||||
#else
|
||||
for (; n; --n, ++s1, ++s2) {
|
||||
if (*s1 < *s2)
|
||||
return -1;
|
||||
if (*s1 > *s2)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
constexpr size_t CharTraits<char>::length(const char* s) noexcept {
|
||||
#if defined(__clang__)
|
||||
return __builtin_strlen(s);
|
||||
#else
|
||||
size_t i = 0;
|
||||
for (; *s; ++s)
|
||||
++i;
|
||||
return i;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_CHAR_TRAITS_H_
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/latin1_string_conversions.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
string16 Latin1OrUTF16ToUTF16(size_t length,
|
||||
const Latin1Char* latin1,
|
||||
const char16* utf16) {
|
||||
if (!length)
|
||||
return string16();
|
||||
if (latin1)
|
||||
return string16(latin1, latin1 + length);
|
||||
return string16(utf16, utf16 + length);
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_LATIN1_STRING_CONVERSIONS_H_
|
||||
#define BASE_STRINGS_LATIN1_STRING_CONVERSIONS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// This definition of Latin1Char matches the definition of LChar in Blink. We
|
||||
// use unsigned char rather than char to make less tempting to mix and match
|
||||
// Latin-1 and UTF-8 characters..
|
||||
typedef unsigned char Latin1Char;
|
||||
|
||||
// This somewhat odd function is designed to help us convert from Blink Strings
|
||||
// to string16. A Blink string is either backed by an array of Latin-1
|
||||
// characters or an array of UTF-16 characters. This function is called by
|
||||
// WebString::operator string16() to convert one or the other character array
|
||||
// to string16. This function is defined here rather than in WebString.h to
|
||||
// avoid binary bloat in all the callers of the conversion operator.
|
||||
BASE_EXPORT string16 Latin1OrUTF16ToUTF16(size_t length,
|
||||
const Latin1Char* latin1,
|
||||
const char16* utf16);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_LATIN1_STRING_CONVERSIONS_H_
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/nullable_string16.h"
|
||||
|
||||
#include <ostream>
|
||||
#include <utility>
|
||||
|
||||
namespace base {
|
||||
NullableString16::NullableString16() = default;
|
||||
NullableString16::NullableString16(const NullableString16& other) = default;
|
||||
NullableString16::NullableString16(NullableString16&& other) = default;
|
||||
|
||||
NullableString16::NullableString16(const string16& string, bool is_null) {
|
||||
if (!is_null)
|
||||
string_.emplace(string);
|
||||
}
|
||||
|
||||
NullableString16::NullableString16(Optional<string16> optional_string16)
|
||||
: string_(std::move(optional_string16)) {}
|
||||
|
||||
NullableString16::~NullableString16() = default;
|
||||
NullableString16& NullableString16::operator=(const NullableString16& other) =
|
||||
default;
|
||||
NullableString16& NullableString16::operator=(NullableString16&& other) =
|
||||
default;
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const NullableString16& value) {
|
||||
return value.is_null() ? out << "(null)" : out << value.string();
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_NULLABLE_STRING16_H_
|
||||
#define BASE_STRINGS_NULLABLE_STRING16_H_
|
||||
|
||||
#include <iosfwd>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/optional.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_util.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// This class is a simple wrapper for string16 which also contains a null
|
||||
// state. This should be used only where the difference between null and
|
||||
// empty is meaningful.
|
||||
class BASE_EXPORT NullableString16 {
|
||||
public:
|
||||
NullableString16();
|
||||
NullableString16(const NullableString16& other);
|
||||
NullableString16(NullableString16&& other);
|
||||
NullableString16(const string16& string, bool is_null);
|
||||
explicit NullableString16(Optional<string16> optional_string16);
|
||||
~NullableString16();
|
||||
|
||||
NullableString16& operator=(const NullableString16& other);
|
||||
NullableString16& operator=(NullableString16&& other);
|
||||
|
||||
const string16& string() const {
|
||||
return string_ ? *string_ : EmptyString16();
|
||||
}
|
||||
bool is_null() const { return !string_; }
|
||||
const Optional<string16>& as_optional_string16() const { return string_; }
|
||||
|
||||
private:
|
||||
Optional<string16> string_;
|
||||
};
|
||||
|
||||
inline bool operator==(const NullableString16& a, const NullableString16& b) {
|
||||
return a.as_optional_string16() == b.as_optional_string16();
|
||||
}
|
||||
|
||||
inline bool operator!=(const NullableString16& a, const NullableString16& b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
BASE_EXPORT std::ostream& operator<<(std::ostream& out,
|
||||
const NullableString16& value);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_NULLABLE_STRING16_H_
|
||||
155
TMessagesProj/jni/voip/webrtc/base/strings/pattern.cc
Normal file
155
TMessagesProj/jni/voip/webrtc/base/strings/pattern.cc
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/pattern.h"
|
||||
|
||||
#include "base/third_party/icu/icu_utf.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr bool IsWildcard(base_icu::UChar32 character) {
|
||||
return character == '*' || character == '?';
|
||||
}
|
||||
|
||||
// Searches for the next subpattern of |pattern| in |string|, up to the given
|
||||
// |maximum_distance|. The subpattern extends from the start of |pattern| up to
|
||||
// the first wildcard character (or the end of the string). If the value of
|
||||
// |maximum_distance| is negative, the maximum distance is considered infinite.
|
||||
template <typename CHAR, typename NEXT>
|
||||
constexpr bool SearchForChars(const CHAR** pattern,
|
||||
const CHAR* pattern_end,
|
||||
const CHAR** string,
|
||||
const CHAR* string_end,
|
||||
int maximum_distance,
|
||||
NEXT next) {
|
||||
const CHAR* pattern_start = *pattern;
|
||||
const CHAR* string_start = *string;
|
||||
bool escape = false;
|
||||
while (true) {
|
||||
if (*pattern == pattern_end) {
|
||||
// If this is the end of the pattern, only accept the end of the string;
|
||||
// anything else falls through to the mismatch case.
|
||||
if (*string == string_end)
|
||||
return true;
|
||||
} else {
|
||||
// If we have found a wildcard, we're done.
|
||||
if (!escape && IsWildcard(**pattern))
|
||||
return true;
|
||||
|
||||
// Check if the escape character is found. If so, skip it and move to the
|
||||
// next character.
|
||||
if (!escape && **pattern == '\\') {
|
||||
escape = true;
|
||||
next(pattern, pattern_end);
|
||||
continue;
|
||||
}
|
||||
|
||||
escape = false;
|
||||
|
||||
if (*string == string_end)
|
||||
return false;
|
||||
|
||||
// Check if the chars match, if so, increment the ptrs.
|
||||
const CHAR* pattern_next = *pattern;
|
||||
const CHAR* string_next = *string;
|
||||
base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
|
||||
if (pattern_char == next(&string_next, string_end) &&
|
||||
pattern_char != CBU_SENTINEL) {
|
||||
*pattern = pattern_next;
|
||||
*string = string_next;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Mismatch. If we have reached the maximum distance, return false,
|
||||
// otherwise restart at the beginning of the pattern with the next character
|
||||
// in the string.
|
||||
// TODO(bauerb): This is a naive implementation of substring search, which
|
||||
// could be implemented with a more efficient algorithm, e.g.
|
||||
// Knuth-Morris-Pratt (at the expense of requiring preprocessing).
|
||||
if (maximum_distance == 0)
|
||||
return false;
|
||||
|
||||
// Because unlimited distance is represented as -1, this will never reach 0
|
||||
// and therefore fail the match above.
|
||||
maximum_distance--;
|
||||
*pattern = pattern_start;
|
||||
next(&string_start, string_end);
|
||||
*string = string_start;
|
||||
}
|
||||
}
|
||||
|
||||
// Consumes consecutive wildcard characters (? or *). Returns the maximum number
|
||||
// of characters matched by the sequence of wildcards, or -1 if the wildcards
|
||||
// match an arbitrary number of characters (which is the case if it contains at
|
||||
// least one *).
|
||||
template <typename CHAR, typename NEXT>
|
||||
constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) {
|
||||
int num_question_marks = 0;
|
||||
bool has_asterisk = false;
|
||||
while (*pattern != end) {
|
||||
if (**pattern == '?') {
|
||||
num_question_marks++;
|
||||
} else if (**pattern == '*') {
|
||||
has_asterisk = true;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
next(pattern, end);
|
||||
}
|
||||
return has_asterisk ? -1 : num_question_marks;
|
||||
}
|
||||
|
||||
template <typename CHAR, typename NEXT>
|
||||
constexpr bool MatchPatternT(const CHAR* eval,
|
||||
const CHAR* eval_end,
|
||||
const CHAR* pattern,
|
||||
const CHAR* pattern_end,
|
||||
NEXT next) {
|
||||
do {
|
||||
int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next);
|
||||
if (!SearchForChars(&pattern, pattern_end, &eval, eval_end,
|
||||
maximum_wildcard_length, next)) {
|
||||
return false;
|
||||
}
|
||||
} while (pattern != pattern_end);
|
||||
return true;
|
||||
}
|
||||
|
||||
struct NextCharUTF8 {
|
||||
base_icu::UChar32 operator()(const char** p, const char* end) {
|
||||
base_icu::UChar32 c;
|
||||
int offset = 0;
|
||||
CBU8_NEXT(*p, offset, end - *p, c);
|
||||
*p += offset;
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
struct NextCharUTF16 {
|
||||
base_icu::UChar32 operator()(const char16** p, const char16* end) {
|
||||
base_icu::UChar32 c;
|
||||
int offset = 0;
|
||||
CBU16_NEXT(*p, offset, end - *p, c);
|
||||
*p += offset;
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
bool MatchPattern(StringPiece eval, StringPiece pattern) {
|
||||
return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
|
||||
pattern.data() + pattern.size(), NextCharUTF8());
|
||||
}
|
||||
|
||||
bool MatchPattern(StringPiece16 eval, StringPiece16 pattern) {
|
||||
return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
|
||||
pattern.data() + pattern.size(), NextCharUTF16());
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
23
TMessagesProj/jni/voip/webrtc/base/strings/pattern.h
Normal file
23
TMessagesProj/jni/voip/webrtc/base/strings/pattern.h
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_PATTERN_H_
|
||||
#define BASE_STRINGS_PATTERN_H_
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Returns true if the |string| passed in matches the |pattern|. The pattern
|
||||
// string can contain wildcards like * and ?.
|
||||
//
|
||||
// The backslash character (\) is an escape character for * and ?.
|
||||
// ? matches 0 or 1 character, while * matches 0 or more characters.
|
||||
BASE_EXPORT bool MatchPattern(StringPiece string, StringPiece pattern);
|
||||
BASE_EXPORT bool MatchPattern(StringPiece16 string, StringPiece16 pattern);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_PATTERN_H_
|
||||
682
TMessagesProj/jni/voip/webrtc/base/strings/safe_sprintf.cc
Normal file
682
TMessagesProj/jni/voip/webrtc/base/strings/safe_sprintf.cc
Normal file
|
|
@ -0,0 +1,682 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/safe_sprintf.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "base/macros.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
// In debug builds, we use RAW_CHECK() to print useful error messages, if
|
||||
// SafeSPrintf() is called with broken arguments.
|
||||
// As our contract promises that SafeSPrintf() can be called from any
|
||||
// restricted run-time context, it is not actually safe to call logging
|
||||
// functions from it; and we only ever do so for debug builds and hope for the
|
||||
// best. We should _never_ call any logging function other than RAW_CHECK(),
|
||||
// and we should _never_ include any logging code that is active in production
|
||||
// builds. Most notably, we should not include these logging functions in
|
||||
// unofficial release builds, even though those builds would otherwise have
|
||||
// DCHECKS() enabled.
|
||||
// In other words; please do not remove the #ifdef around this #include.
|
||||
// Instead, in production builds we opt for returning a degraded result,
|
||||
// whenever an error is encountered.
|
||||
// E.g. The broken function call
|
||||
// SafeSPrintf("errno = %d (%x)", errno, strerror(errno))
|
||||
// will print something like
|
||||
// errno = 13, (%x)
|
||||
// instead of
|
||||
// errno = 13 (Access denied)
|
||||
// In most of the anticipated use cases, that's probably the preferred
|
||||
// behavior.
|
||||
#include "base/logging.h"
|
||||
#define DEBUG_CHECK RAW_CHECK
|
||||
#else
|
||||
#define DEBUG_CHECK(x) do { if (x) { } } while (0)
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
namespace strings {
|
||||
|
||||
// The code in this file is extremely careful to be async-signal-safe.
|
||||
//
|
||||
// Most obviously, we avoid calling any code that could dynamically allocate
|
||||
// memory. Doing so would almost certainly result in bugs and dead-locks.
|
||||
// We also avoid calling any other STL functions that could have unintended
|
||||
// side-effects involving memory allocation or access to other shared
|
||||
// resources.
|
||||
//
|
||||
// But on top of that, we also avoid calling other library functions, as many
|
||||
// of them have the side-effect of calling getenv() (in order to deal with
|
||||
// localization) or accessing errno. The latter sounds benign, but there are
|
||||
// several execution contexts where it isn't even possible to safely read let
|
||||
// alone write errno.
|
||||
//
|
||||
// The stated design goal of the SafeSPrintf() function is that it can be
|
||||
// called from any context that can safely call C or C++ code (i.e. anything
|
||||
// that doesn't require assembly code).
|
||||
//
|
||||
// For a brief overview of some but not all of the issues with async-signal-
|
||||
// safety, refer to:
|
||||
// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
|
||||
|
||||
namespace {
|
||||
const size_t kSSizeMaxConst = ((size_t)(ssize_t)-1) >> 1;
|
||||
|
||||
const char kUpCaseHexDigits[] = "0123456789ABCDEF";
|
||||
const char kDownCaseHexDigits[] = "0123456789abcdef";
|
||||
}
|
||||
|
||||
#if defined(NDEBUG)
|
||||
// We would like to define kSSizeMax as std::numeric_limits<ssize_t>::max(),
|
||||
// but C++ doesn't allow us to do that for constants. Instead, we have to
|
||||
// use careful casting and shifting. We later use a static_assert to
|
||||
// verify that this worked correctly.
|
||||
namespace {
|
||||
const size_t kSSizeMax = kSSizeMaxConst;
|
||||
}
|
||||
#else // defined(NDEBUG)
|
||||
// For efficiency, we really need kSSizeMax to be a constant. But for unit
|
||||
// tests, it should be adjustable. This allows us to verify edge cases without
|
||||
// having to fill the entire available address space. As a compromise, we make
|
||||
// kSSizeMax adjustable in debug builds, and then only compile that particular
|
||||
// part of the unit test in debug builds.
|
||||
namespace {
|
||||
static size_t kSSizeMax = kSSizeMaxConst;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
void SetSafeSPrintfSSizeMaxForTest(size_t max) {
|
||||
kSSizeMax = max;
|
||||
}
|
||||
|
||||
size_t GetSafeSPrintfSSizeMaxForTest() {
|
||||
return kSSizeMax;
|
||||
}
|
||||
}
|
||||
#endif // defined(NDEBUG)
|
||||
|
||||
namespace {
|
||||
class Buffer {
|
||||
public:
|
||||
// |buffer| is caller-allocated storage that SafeSPrintf() writes to. It
|
||||
// has |size| bytes of writable storage. It is the caller's responsibility
|
||||
// to ensure that the buffer is at least one byte in size, so that it fits
|
||||
// the trailing NUL that will be added by the destructor. The buffer also
|
||||
// must be smaller or equal to kSSizeMax in size.
|
||||
Buffer(char* buffer, size_t size)
|
||||
: buffer_(buffer),
|
||||
size_(size - 1), // Account for trailing NUL byte
|
||||
count_(0) {
|
||||
// MSVS2013's standard library doesn't mark max() as constexpr yet. cl.exe
|
||||
// supports static_cast but doesn't really implement constexpr yet so it doesn't
|
||||
// complain, but clang does.
|
||||
#if __cplusplus >= 201103 && !(defined(__clang__) && defined(OS_WIN))
|
||||
static_assert(kSSizeMaxConst ==
|
||||
static_cast<size_t>(std::numeric_limits<ssize_t>::max()),
|
||||
"kSSizeMaxConst should be the max value of an ssize_t");
|
||||
#endif
|
||||
DEBUG_CHECK(size > 0);
|
||||
DEBUG_CHECK(size <= kSSizeMax);
|
||||
}
|
||||
|
||||
~Buffer() {
|
||||
// The code calling the constructor guaranteed that there was enough space
|
||||
// to store a trailing NUL -- and in debug builds, we are actually
|
||||
// verifying this with DEBUG_CHECK()s in the constructor. So, we can
|
||||
// always unconditionally write the NUL byte in the destructor. We do not
|
||||
// need to adjust the count_, as SafeSPrintf() copies snprintf() in not
|
||||
// including the NUL byte in its return code.
|
||||
*GetInsertionPoint() = '\000';
|
||||
}
|
||||
|
||||
// Returns true, iff the buffer is filled all the way to |kSSizeMax-1|. The
|
||||
// caller can now stop adding more data, as GetCount() has reached its
|
||||
// maximum possible value.
|
||||
inline bool OutOfAddressableSpace() const {
|
||||
return count_ == static_cast<size_t>(kSSizeMax - 1);
|
||||
}
|
||||
|
||||
// Returns the number of bytes that would have been emitted to |buffer_|
|
||||
// if it was sized sufficiently large. This number can be larger than
|
||||
// |size_|, if the caller provided an insufficiently large output buffer.
|
||||
// But it will never be bigger than |kSSizeMax-1|.
|
||||
inline ssize_t GetCount() const {
|
||||
DEBUG_CHECK(count_ < kSSizeMax);
|
||||
return static_cast<ssize_t>(count_);
|
||||
}
|
||||
|
||||
// Emits one |ch| character into the |buffer_| and updates the |count_| of
|
||||
// characters that are currently supposed to be in the buffer.
|
||||
// Returns "false", iff the buffer was already full.
|
||||
// N.B. |count_| increases even if no characters have been written. This is
|
||||
// needed so that GetCount() can return the number of bytes that should
|
||||
// have been allocated for the |buffer_|.
|
||||
inline bool Out(char ch) {
|
||||
if (size_ >= 1 && count_ < size_) {
|
||||
buffer_[count_] = ch;
|
||||
return IncrementCountByOne();
|
||||
}
|
||||
// |count_| still needs to be updated, even if the buffer has been
|
||||
// filled completely. This allows SafeSPrintf() to return the number of
|
||||
// bytes that should have been emitted.
|
||||
IncrementCountByOne();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Inserts |padding|-|len| bytes worth of padding into the |buffer_|.
|
||||
// |count_| will also be incremented by the number of bytes that were meant
|
||||
// to be emitted. The |pad| character is typically either a ' ' space
|
||||
// or a '0' zero, but other non-NUL values are legal.
|
||||
// Returns "false", iff the the |buffer_| filled up (i.e. |count_|
|
||||
// overflowed |size_|) at any time during padding.
|
||||
inline bool Pad(char pad, size_t padding, size_t len) {
|
||||
DEBUG_CHECK(pad);
|
||||
DEBUG_CHECK(padding <= kSSizeMax);
|
||||
for (; padding > len; --padding) {
|
||||
if (!Out(pad)) {
|
||||
if (--padding) {
|
||||
IncrementCount(padding-len);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// POSIX doesn't define any async-signal-safe function for converting
|
||||
// an integer to ASCII. Define our own version.
|
||||
//
|
||||
// This also gives us the ability to make the function a little more
|
||||
// powerful and have it deal with |padding|, with truncation, and with
|
||||
// predicting the length of the untruncated output.
|
||||
//
|
||||
// IToASCII() converts an integer |i| to ASCII.
|
||||
//
|
||||
// Unlike similar functions in the standard C library, it never appends a
|
||||
// NUL character. This is left for the caller to do.
|
||||
//
|
||||
// While the function signature takes a signed int64_t, the code decides at
|
||||
// run-time whether to treat the argument as signed (int64_t) or as unsigned
|
||||
// (uint64_t) based on the value of |sign|.
|
||||
//
|
||||
// It supports |base|s 2 through 16. Only a |base| of 10 is allowed to have
|
||||
// a |sign|. Otherwise, |i| is treated as unsigned.
|
||||
//
|
||||
// For bases larger than 10, |upcase| decides whether lower-case or upper-
|
||||
// case letters should be used to designate digits greater than 10.
|
||||
//
|
||||
// Padding can be done with either '0' zeros or ' ' spaces. Padding has to
|
||||
// be positive and will always be applied to the left of the output.
|
||||
//
|
||||
// Prepends a |prefix| to the number (e.g. "0x"). This prefix goes to
|
||||
// the left of |padding|, if |pad| is '0'; and to the right of |padding|
|
||||
// if |pad| is ' '.
|
||||
//
|
||||
// Returns "false", if the |buffer_| overflowed at any time.
|
||||
bool IToASCII(bool sign, bool upcase, int64_t i, int base,
|
||||
char pad, size_t padding, const char* prefix);
|
||||
|
||||
private:
|
||||
// Increments |count_| by |inc| unless this would cause |count_| to
|
||||
// overflow |kSSizeMax-1|. Returns "false", iff an overflow was detected;
|
||||
// it then clamps |count_| to |kSSizeMax-1|.
|
||||
inline bool IncrementCount(size_t inc) {
|
||||
// "inc" is either 1 or a "padding" value. Padding is clamped at
|
||||
// run-time to at most kSSizeMax-1. So, we know that "inc" is always in
|
||||
// the range 1..kSSizeMax-1.
|
||||
// This allows us to compute "kSSizeMax - 1 - inc" without incurring any
|
||||
// integer overflows.
|
||||
DEBUG_CHECK(inc <= kSSizeMax - 1);
|
||||
if (count_ > kSSizeMax - 1 - inc) {
|
||||
count_ = kSSizeMax - 1;
|
||||
return false;
|
||||
}
|
||||
count_ += inc;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Convenience method for the common case of incrementing |count_| by one.
|
||||
inline bool IncrementCountByOne() {
|
||||
return IncrementCount(1);
|
||||
}
|
||||
|
||||
// Return the current insertion point into the buffer. This is typically
|
||||
// at |buffer_| + |count_|, but could be before that if truncation
|
||||
// happened. It always points to one byte past the last byte that was
|
||||
// successfully placed into the |buffer_|.
|
||||
inline char* GetInsertionPoint() const {
|
||||
size_t idx = count_;
|
||||
if (idx > size_) {
|
||||
idx = size_;
|
||||
}
|
||||
return buffer_ + idx;
|
||||
}
|
||||
|
||||
// User-provided buffer that will receive the fully formatted output string.
|
||||
char* buffer_;
|
||||
|
||||
// Number of bytes that are available in the buffer excluding the trailing
|
||||
// NUL byte that will be added by the destructor.
|
||||
const size_t size_;
|
||||
|
||||
// Number of bytes that would have been emitted to the buffer, if the buffer
|
||||
// was sufficiently big. This number always excludes the trailing NUL byte
|
||||
// and it is guaranteed to never grow bigger than kSSizeMax-1.
|
||||
size_t count_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(Buffer);
|
||||
};
|
||||
|
||||
|
||||
bool Buffer::IToASCII(bool sign, bool upcase, int64_t i, int base,
|
||||
char pad, size_t padding, const char* prefix) {
|
||||
// Sanity check for parameters. None of these should ever fail, but see
|
||||
// above for the rationale why we can't call CHECK().
|
||||
DEBUG_CHECK(base >= 2);
|
||||
DEBUG_CHECK(base <= 16);
|
||||
DEBUG_CHECK(!sign || base == 10);
|
||||
DEBUG_CHECK(pad == '0' || pad == ' ');
|
||||
DEBUG_CHECK(padding <= kSSizeMax);
|
||||
DEBUG_CHECK(!(sign && prefix && *prefix));
|
||||
|
||||
// Handle negative numbers, if the caller indicated that |i| should be
|
||||
// treated as a signed number; otherwise treat |i| as unsigned (even if the
|
||||
// MSB is set!)
|
||||
// Details are tricky, because of limited data-types, but equivalent pseudo-
|
||||
// code would look like:
|
||||
// if (sign && i < 0)
|
||||
// prefix = "-";
|
||||
// num = abs(i);
|
||||
int minint = 0;
|
||||
uint64_t num;
|
||||
if (sign && i < 0) {
|
||||
prefix = "-";
|
||||
|
||||
// Turn our number positive.
|
||||
if (i == std::numeric_limits<int64_t>::min()) {
|
||||
// The most negative integer needs special treatment.
|
||||
minint = 1;
|
||||
num = static_cast<uint64_t>(-(i + 1));
|
||||
} else {
|
||||
// "Normal" negative numbers are easy.
|
||||
num = static_cast<uint64_t>(-i);
|
||||
}
|
||||
} else {
|
||||
num = static_cast<uint64_t>(i);
|
||||
}
|
||||
|
||||
// If padding with '0' zero, emit the prefix or '-' character now. Otherwise,
|
||||
// make the prefix accessible in reverse order, so that we can later output
|
||||
// it right between padding and the number.
|
||||
// We cannot choose the easier approach of just reversing the number, as that
|
||||
// fails in situations where we need to truncate numbers that have padding
|
||||
// and/or prefixes.
|
||||
const char* reverse_prefix = nullptr;
|
||||
if (prefix && *prefix) {
|
||||
if (pad == '0') {
|
||||
while (*prefix) {
|
||||
if (padding) {
|
||||
--padding;
|
||||
}
|
||||
Out(*prefix++);
|
||||
}
|
||||
prefix = nullptr;
|
||||
} else {
|
||||
for (reverse_prefix = prefix; *reverse_prefix; ++reverse_prefix) {
|
||||
}
|
||||
}
|
||||
} else
|
||||
prefix = nullptr;
|
||||
const size_t prefix_length = reverse_prefix - prefix;
|
||||
|
||||
// Loop until we have converted the entire number. Output at least one
|
||||
// character (i.e. '0').
|
||||
size_t start = count_;
|
||||
size_t discarded = 0;
|
||||
bool started = false;
|
||||
do {
|
||||
// Make sure there is still enough space left in our output buffer.
|
||||
if (count_ >= size_) {
|
||||
if (start < size_) {
|
||||
// It is rare that we need to output a partial number. But if asked
|
||||
// to do so, we will still make sure we output the correct number of
|
||||
// leading digits.
|
||||
// Since we are generating the digits in reverse order, we actually
|
||||
// have to discard digits in the order that we have already emitted
|
||||
// them. This is essentially equivalent to:
|
||||
// memmove(buffer_ + start, buffer_ + start + 1, size_ - start - 1)
|
||||
for (char* move = buffer_ + start, *end = buffer_ + size_ - 1;
|
||||
move < end;
|
||||
++move) {
|
||||
*move = move[1];
|
||||
}
|
||||
++discarded;
|
||||
--count_;
|
||||
} else if (count_ - size_ > 1) {
|
||||
// Need to increment either |count_| or |discarded| to make progress.
|
||||
// The latter is more efficient, as it eventually triggers fast
|
||||
// handling of padding. But we have to ensure we don't accidentally
|
||||
// change the overall state (i.e. switch the state-machine from
|
||||
// discarding to non-discarding). |count_| needs to always stay
|
||||
// bigger than |size_|.
|
||||
--count_;
|
||||
++discarded;
|
||||
}
|
||||
}
|
||||
|
||||
// Output the next digit and (if necessary) compensate for the most
|
||||
// negative integer needing special treatment. This works because,
|
||||
// no matter the bit width of the integer, the lowest-most decimal
|
||||
// integer always ends in 2, 4, 6, or 8.
|
||||
if (!num && started) {
|
||||
if (reverse_prefix > prefix) {
|
||||
Out(*--reverse_prefix);
|
||||
} else {
|
||||
Out(pad);
|
||||
}
|
||||
} else {
|
||||
started = true;
|
||||
Out((upcase ? kUpCaseHexDigits : kDownCaseHexDigits)[num%base + minint]);
|
||||
}
|
||||
|
||||
minint = 0;
|
||||
num /= base;
|
||||
|
||||
// Add padding, if requested.
|
||||
if (padding > 0) {
|
||||
--padding;
|
||||
|
||||
// Performance optimization for when we are asked to output excessive
|
||||
// padding, but our output buffer is limited in size. Even if we output
|
||||
// a 64bit number in binary, we would never write more than 64 plus
|
||||
// prefix non-padding characters. So, once this limit has been passed,
|
||||
// any further state change can be computed arithmetically; we know that
|
||||
// by this time, our entire final output consists of padding characters
|
||||
// that have all already been output.
|
||||
if (discarded > 8*sizeof(num) + prefix_length) {
|
||||
IncrementCount(padding);
|
||||
padding = 0;
|
||||
}
|
||||
}
|
||||
} while (num || padding || (reverse_prefix > prefix));
|
||||
|
||||
// Conversion to ASCII actually resulted in the digits being in reverse
|
||||
// order. We can't easily generate them in forward order, as we can't tell
|
||||
// the number of characters needed until we are done converting.
|
||||
// So, now, we reverse the string (except for the possible '-' sign).
|
||||
char* front = buffer_ + start;
|
||||
char* back = GetInsertionPoint();
|
||||
while (--back > front) {
|
||||
char ch = *back;
|
||||
*back = *front;
|
||||
*front++ = ch;
|
||||
}
|
||||
|
||||
IncrementCount(discarded);
|
||||
return !discarded;
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace internal {
|
||||
|
||||
ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt, const Arg* args,
|
||||
const size_t max_args) {
|
||||
// Make sure that at least one NUL byte can be written, and that the buffer
|
||||
// never overflows kSSizeMax. Not only does that use up most or all of the
|
||||
// address space, it also would result in a return code that cannot be
|
||||
// represented.
|
||||
if (static_cast<ssize_t>(sz) < 1)
|
||||
return -1;
|
||||
sz = std::min(sz, kSSizeMax);
|
||||
|
||||
// Iterate over format string and interpret '%' arguments as they are
|
||||
// encountered.
|
||||
Buffer buffer(buf, sz);
|
||||
size_t padding;
|
||||
char pad;
|
||||
for (unsigned int cur_arg = 0; *fmt && !buffer.OutOfAddressableSpace(); ) {
|
||||
if (*fmt++ == '%') {
|
||||
padding = 0;
|
||||
pad = ' ';
|
||||
char ch = *fmt++;
|
||||
format_character_found:
|
||||
switch (ch) {
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
// Found a width parameter. Convert to an integer value and store in
|
||||
// "padding". If the leading digit is a zero, change the padding
|
||||
// character from a space ' ' to a zero '0'.
|
||||
pad = ch == '0' ? '0' : ' ';
|
||||
for (;;) {
|
||||
// The maximum allowed padding fills all the available address
|
||||
// space and leaves just enough space to insert the trailing NUL.
|
||||
const size_t max_padding = kSSizeMax - 1;
|
||||
if (padding > max_padding/10 ||
|
||||
10*padding > max_padding - (ch - '0')) {
|
||||
DEBUG_CHECK(padding <= max_padding/10 &&
|
||||
10*padding <= max_padding - (ch - '0'));
|
||||
// Integer overflow detected. Skip the rest of the width until
|
||||
// we find the format character, then do the normal error handling.
|
||||
padding_overflow:
|
||||
padding = max_padding;
|
||||
while ((ch = *fmt++) >= '0' && ch <= '9') {
|
||||
}
|
||||
if (cur_arg < max_args) {
|
||||
++cur_arg;
|
||||
}
|
||||
goto fail_to_expand;
|
||||
}
|
||||
padding = 10*padding + ch - '0';
|
||||
if (padding > max_padding) {
|
||||
// This doesn't happen for "sane" values of kSSizeMax. But once
|
||||
// kSSizeMax gets smaller than about 10, our earlier range checks
|
||||
// are incomplete. Unittests do trigger this artificial corner
|
||||
// case.
|
||||
DEBUG_CHECK(padding <= max_padding);
|
||||
goto padding_overflow;
|
||||
}
|
||||
ch = *fmt++;
|
||||
if (ch < '0' || ch > '9') {
|
||||
// Reached the end of the width parameter. This is where the format
|
||||
// character is found.
|
||||
goto format_character_found;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'c': { // Output an ASCII character.
|
||||
// Check that there are arguments left to be inserted.
|
||||
if (cur_arg >= max_args) {
|
||||
DEBUG_CHECK(cur_arg < max_args);
|
||||
goto fail_to_expand;
|
||||
}
|
||||
|
||||
// Check that the argument has the expected type.
|
||||
const Arg& arg = args[cur_arg++];
|
||||
if (arg.type != Arg::INT && arg.type != Arg::UINT) {
|
||||
DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT);
|
||||
goto fail_to_expand;
|
||||
}
|
||||
|
||||
// Apply padding, if needed.
|
||||
buffer.Pad(' ', padding, 1);
|
||||
|
||||
// Convert the argument to an ASCII character and output it.
|
||||
char as_char = static_cast<char>(arg.integer.i);
|
||||
if (!as_char) {
|
||||
goto end_of_output_buffer;
|
||||
}
|
||||
buffer.Out(as_char);
|
||||
break; }
|
||||
case 'd': // Output a possibly signed decimal value.
|
||||
case 'o': // Output an unsigned octal value.
|
||||
case 'x': // Output an unsigned hexadecimal value.
|
||||
case 'X':
|
||||
case 'p': { // Output a pointer value.
|
||||
// Check that there are arguments left to be inserted.
|
||||
if (cur_arg >= max_args) {
|
||||
DEBUG_CHECK(cur_arg < max_args);
|
||||
goto fail_to_expand;
|
||||
}
|
||||
|
||||
const Arg& arg = args[cur_arg++];
|
||||
int64_t i;
|
||||
const char* prefix = nullptr;
|
||||
if (ch != 'p') {
|
||||
// Check that the argument has the expected type.
|
||||
if (arg.type != Arg::INT && arg.type != Arg::UINT) {
|
||||
DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT);
|
||||
goto fail_to_expand;
|
||||
}
|
||||
i = arg.integer.i;
|
||||
|
||||
if (ch != 'd') {
|
||||
// The Arg() constructor automatically performed sign expansion on
|
||||
// signed parameters. This is great when outputting a %d decimal
|
||||
// number, but can result in unexpected leading 0xFF bytes when
|
||||
// outputting a %x hexadecimal number. Mask bits, if necessary.
|
||||
// We have to do this here, instead of in the Arg() constructor, as
|
||||
// the Arg() constructor cannot tell whether we will output a %d
|
||||
// or a %x. Only the latter should experience masking.
|
||||
if (arg.integer.width < sizeof(int64_t)) {
|
||||
i &= (1LL << (8*arg.integer.width)) - 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Pointer values require an actual pointer or a string.
|
||||
if (arg.type == Arg::POINTER) {
|
||||
i = reinterpret_cast<uintptr_t>(arg.ptr);
|
||||
} else if (arg.type == Arg::STRING) {
|
||||
i = reinterpret_cast<uintptr_t>(arg.str);
|
||||
} else if (arg.type == Arg::INT &&
|
||||
arg.integer.width == sizeof(NULL) &&
|
||||
arg.integer.i == 0) { // Allow C++'s version of NULL
|
||||
i = 0;
|
||||
} else {
|
||||
DEBUG_CHECK(arg.type == Arg::POINTER || arg.type == Arg::STRING);
|
||||
goto fail_to_expand;
|
||||
}
|
||||
|
||||
// Pointers always include the "0x" prefix.
|
||||
prefix = "0x";
|
||||
}
|
||||
|
||||
// Use IToASCII() to convert to ASCII representation. For decimal
|
||||
// numbers, optionally print a sign. For hexadecimal numbers,
|
||||
// distinguish between upper and lower case. %p addresses are always
|
||||
// printed as upcase. Supports base 8, 10, and 16. Prints padding
|
||||
// and/or prefixes, if so requested.
|
||||
buffer.IToASCII(ch == 'd' && arg.type == Arg::INT,
|
||||
ch != 'x', i,
|
||||
ch == 'o' ? 8 : ch == 'd' ? 10 : 16,
|
||||
pad, padding, prefix);
|
||||
break; }
|
||||
case 's': {
|
||||
// Check that there are arguments left to be inserted.
|
||||
if (cur_arg >= max_args) {
|
||||
DEBUG_CHECK(cur_arg < max_args);
|
||||
goto fail_to_expand;
|
||||
}
|
||||
|
||||
// Check that the argument has the expected type.
|
||||
const Arg& arg = args[cur_arg++];
|
||||
const char *s;
|
||||
if (arg.type == Arg::STRING) {
|
||||
s = arg.str ? arg.str : "<NULL>";
|
||||
} else if (arg.type == Arg::INT && arg.integer.width == sizeof(NULL) &&
|
||||
arg.integer.i == 0) { // Allow C++'s version of NULL
|
||||
s = "<NULL>";
|
||||
} else {
|
||||
DEBUG_CHECK(arg.type == Arg::STRING);
|
||||
goto fail_to_expand;
|
||||
}
|
||||
|
||||
// Apply padding, if needed. This requires us to first check the
|
||||
// length of the string that we are outputting.
|
||||
if (padding) {
|
||||
size_t len = 0;
|
||||
for (const char* src = s; *src++; ) {
|
||||
++len;
|
||||
}
|
||||
buffer.Pad(' ', padding, len);
|
||||
}
|
||||
|
||||
// Printing a string involves nothing more than copying it into the
|
||||
// output buffer and making sure we don't output more bytes than
|
||||
// available space; Out() takes care of doing that.
|
||||
for (const char* src = s; *src; ) {
|
||||
buffer.Out(*src++);
|
||||
}
|
||||
break; }
|
||||
case '%':
|
||||
// Quoted percent '%' character.
|
||||
goto copy_verbatim;
|
||||
fail_to_expand:
|
||||
// C++ gives us tools to do type checking -- something that snprintf()
|
||||
// could never really do. So, whenever we see arguments that don't
|
||||
// match up with the format string, we refuse to output them. But
|
||||
// since we have to be extremely conservative about being async-
|
||||
// signal-safe, we are limited in the type of error handling that we
|
||||
// can do in production builds (in debug builds we can use
|
||||
// DEBUG_CHECK() and hope for the best). So, all we do is pass the
|
||||
// format string unchanged. That should eventually get the user's
|
||||
// attention; and in the meantime, it hopefully doesn't lose too much
|
||||
// data.
|
||||
default:
|
||||
// Unknown or unsupported format character. Just copy verbatim to
|
||||
// output.
|
||||
buffer.Out('%');
|
||||
DEBUG_CHECK(ch);
|
||||
if (!ch) {
|
||||
goto end_of_format_string;
|
||||
}
|
||||
buffer.Out(ch);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
copy_verbatim:
|
||||
buffer.Out(fmt[-1]);
|
||||
}
|
||||
}
|
||||
end_of_format_string:
|
||||
end_of_output_buffer:
|
||||
return buffer.GetCount();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt) {
|
||||
// Make sure that at least one NUL byte can be written, and that the buffer
|
||||
// never overflows kSSizeMax. Not only does that use up most or all of the
|
||||
// address space, it also would result in a return code that cannot be
|
||||
// represented.
|
||||
if (static_cast<ssize_t>(sz) < 1)
|
||||
return -1;
|
||||
sz = std::min(sz, kSSizeMax);
|
||||
|
||||
Buffer buffer(buf, sz);
|
||||
|
||||
// In the slow-path, we deal with errors by copying the contents of
|
||||
// "fmt" unexpanded. This means, if there are no arguments passed, the
|
||||
// SafeSPrintf() function always degenerates to a version of strncpy() that
|
||||
// de-duplicates '%' characters.
|
||||
const char* src = fmt;
|
||||
for (; *src; ++src) {
|
||||
buffer.Out(*src);
|
||||
DEBUG_CHECK(src[0] != '%' || src[1] == '%');
|
||||
if (src[0] == '%' && src[1] == '%') {
|
||||
++src;
|
||||
}
|
||||
}
|
||||
return buffer.GetCount();
|
||||
}
|
||||
|
||||
} // namespace strings
|
||||
} // namespace base
|
||||
246
TMessagesProj/jni/voip/webrtc/base/strings/safe_sprintf.h
Normal file
246
TMessagesProj/jni/voip/webrtc/base/strings/safe_sprintf.h
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_SAFE_SPRINTF_H_
|
||||
#define BASE_STRINGS_SAFE_SPRINTF_H_
|
||||
|
||||
#include "build/build_config.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(OS_POSIX) || defined(OS_FUCHSIA)
|
||||
// For ssize_t
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "base/base_export.h"
|
||||
|
||||
namespace base {
|
||||
namespace strings {
|
||||
|
||||
#if defined(COMPILER_MSVC)
|
||||
// Define ssize_t inside of our namespace.
|
||||
#if defined(_WIN64)
|
||||
typedef __int64 ssize_t;
|
||||
#else
|
||||
typedef long ssize_t;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// SafeSPrintf() is a type-safe and completely self-contained version of
|
||||
// snprintf().
|
||||
//
|
||||
// SafeSNPrintf() is an alternative function signature that can be used when
|
||||
// not dealing with fixed-sized buffers. When possible, SafeSPrintf() should
|
||||
// always be used instead of SafeSNPrintf()
|
||||
//
|
||||
// These functions allow for formatting complicated messages from contexts that
|
||||
// require strict async-signal-safety. In fact, it is safe to call them from
|
||||
// any low-level execution context, as they are guaranteed to make no library
|
||||
// or system calls. It deliberately never touches "errno", either.
|
||||
//
|
||||
// The only exception to this rule is that in debug builds the code calls
|
||||
// RAW_CHECK() to help diagnose problems when the format string does not
|
||||
// match the rest of the arguments. In release builds, no CHECK()s are used,
|
||||
// and SafeSPrintf() instead returns an output string that expands only
|
||||
// those arguments that match their format characters. Mismatched arguments
|
||||
// are ignored.
|
||||
//
|
||||
// The code currently only supports a subset of format characters:
|
||||
// %c, %o, %d, %x, %X, %p, and %s.
|
||||
//
|
||||
// SafeSPrintf() aims to be as liberal as reasonably possible. Integer-like
|
||||
// values of arbitrary width can be passed to all of the format characters
|
||||
// that expect integers. Thus, it is explicitly legal to pass an "int" to
|
||||
// "%c", and output will automatically look at the LSB only. It is also
|
||||
// explicitly legal to pass either signed or unsigned values, and the format
|
||||
// characters will automatically interpret the arguments accordingly.
|
||||
//
|
||||
// It is still not legal to mix-and-match integer-like values with pointer
|
||||
// values. For instance, you cannot pass a pointer to %x, nor can you pass an
|
||||
// integer to %p.
|
||||
//
|
||||
// The one exception is "0" zero being accepted by "%p". This works-around
|
||||
// the problem of C++ defining NULL as an integer-like value.
|
||||
//
|
||||
// All format characters take an optional width parameter. This must be a
|
||||
// positive integer. For %d, %o, %x, %X and %p, if the width starts with
|
||||
// a leading '0', padding is done with '0' instead of ' ' characters.
|
||||
//
|
||||
// There are a few features of snprintf()-style format strings, that
|
||||
// SafeSPrintf() does not support at this time.
|
||||
//
|
||||
// If an actual user showed up, there is no particularly strong reason they
|
||||
// couldn't be added. But that assumes that the trade-offs between complexity
|
||||
// and utility are favorable.
|
||||
//
|
||||
// For example, adding support for negative padding widths, and for %n are all
|
||||
// likely to be viewed positively. They are all clearly useful, low-risk, easy
|
||||
// to test, don't jeopardize the async-signal-safety of the code, and overall
|
||||
// have little impact on other parts of SafeSPrintf() function.
|
||||
//
|
||||
// On the other hands, adding support for alternate forms, positional
|
||||
// arguments, grouping, wide characters, localization or floating point numbers
|
||||
// are all unlikely to ever be added.
|
||||
//
|
||||
// SafeSPrintf() and SafeSNPrintf() mimic the behavior of snprintf() and they
|
||||
// return the number of bytes needed to store the untruncated output. This
|
||||
// does *not* include the terminating NUL byte.
|
||||
//
|
||||
// They return -1, iff a fatal error happened. This typically can only happen,
|
||||
// if the buffer size is a) negative, or b) zero (i.e. not even the NUL byte
|
||||
// can be written). The return value can never be larger than SSIZE_MAX-1.
|
||||
// This ensures that the caller can always add one to the signed return code
|
||||
// in order to determine the amount of storage that needs to be allocated.
|
||||
//
|
||||
// While the code supports type checking and while it is generally very careful
|
||||
// to avoid printing incorrect values, it tends to be conservative in printing
|
||||
// as much as possible, even when given incorrect parameters. Typically, in
|
||||
// case of an error, the format string will not be expanded. (i.e. something
|
||||
// like SafeSPrintf(buf, "%p %d", 1, 2) results in "%p 2"). See above for
|
||||
// the use of RAW_CHECK() in debug builds, though.
|
||||
//
|
||||
// Basic example:
|
||||
// char buf[20];
|
||||
// base::strings::SafeSPrintf(buf, "The answer: %2d", 42);
|
||||
//
|
||||
// Example with dynamically sized buffer (async-signal-safe). This code won't
|
||||
// work on Visual studio, as it requires dynamically allocating arrays on the
|
||||
// stack. Consider picking a smaller value for |kMaxSize| if stack size is
|
||||
// limited and known. On the other hand, if the parameters to SafeSNPrintf()
|
||||
// are trusted and not controllable by the user, you can consider eliminating
|
||||
// the check for |kMaxSize| altogether. The current value of SSIZE_MAX is
|
||||
// essentially a no-op that just illustrates how to implement an upper bound:
|
||||
// const size_t kInitialSize = 128;
|
||||
// const size_t kMaxSize = std::numeric_limits<ssize_t>::max();
|
||||
// size_t size = kInitialSize;
|
||||
// for (;;) {
|
||||
// char buf[size];
|
||||
// size = SafeSNPrintf(buf, size, "Error message \"%s\"\n", err) + 1;
|
||||
// if (sizeof(buf) < kMaxSize && size > kMaxSize) {
|
||||
// size = kMaxSize;
|
||||
// continue;
|
||||
// } else if (size > sizeof(buf))
|
||||
// continue;
|
||||
// write(2, buf, size-1);
|
||||
// break;
|
||||
// }
|
||||
|
||||
namespace internal {
|
||||
// Helpers that use C++ overloading, templates, and specializations to deduce
|
||||
// and record type information from function arguments. This allows us to
|
||||
// later write a type-safe version of snprintf().
|
||||
|
||||
struct Arg {
|
||||
enum Type { INT, UINT, STRING, POINTER };
|
||||
|
||||
// Any integer-like value.
|
||||
Arg(signed char c) : type(INT) {
|
||||
integer.i = c;
|
||||
integer.width = sizeof(char);
|
||||
}
|
||||
Arg(unsigned char c) : type(UINT) {
|
||||
integer.i = c;
|
||||
integer.width = sizeof(char);
|
||||
}
|
||||
Arg(signed short j) : type(INT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(short);
|
||||
}
|
||||
Arg(unsigned short j) : type(UINT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(short);
|
||||
}
|
||||
Arg(signed int j) : type(INT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(int);
|
||||
}
|
||||
Arg(unsigned int j) : type(UINT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(int);
|
||||
}
|
||||
Arg(signed long j) : type(INT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(long);
|
||||
}
|
||||
Arg(unsigned long j) : type(UINT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(long);
|
||||
}
|
||||
Arg(signed long long j) : type(INT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(long long);
|
||||
}
|
||||
Arg(unsigned long long j) : type(UINT) {
|
||||
integer.i = j;
|
||||
integer.width = sizeof(long long);
|
||||
}
|
||||
|
||||
// A C-style text string.
|
||||
Arg(const char* s) : str(s), type(STRING) { }
|
||||
Arg(char* s) : str(s), type(STRING) { }
|
||||
|
||||
// Any pointer value that can be cast to a "void*".
|
||||
template<class T> Arg(T* p) : ptr((void*)p), type(POINTER) { }
|
||||
|
||||
union {
|
||||
// An integer-like value.
|
||||
struct {
|
||||
int64_t i;
|
||||
unsigned char width;
|
||||
} integer;
|
||||
|
||||
// A C-style text string.
|
||||
const char* str;
|
||||
|
||||
// A pointer to an arbitrary object.
|
||||
const void* ptr;
|
||||
};
|
||||
const enum Type type;
|
||||
};
|
||||
|
||||
// This is the internal function that performs the actual formatting of
|
||||
// an snprintf()-style format string.
|
||||
BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt,
|
||||
const Arg* args, size_t max_args);
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
// In debug builds, allow unit tests to artificially lower the kSSizeMax
|
||||
// constant that is used as a hard upper-bound for all buffers. In normal
|
||||
// use, this constant should always be std::numeric_limits<ssize_t>::max().
|
||||
BASE_EXPORT void SetSafeSPrintfSSizeMaxForTest(size_t max);
|
||||
BASE_EXPORT size_t GetSafeSPrintfSSizeMaxForTest();
|
||||
#endif
|
||||
|
||||
} // namespace internal
|
||||
|
||||
template<typename... Args>
|
||||
ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt, Args... args) {
|
||||
// Use Arg() object to record type information and then copy arguments to an
|
||||
// array to make it easier to iterate over them.
|
||||
const internal::Arg arg_array[] = { args... };
|
||||
return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args));
|
||||
}
|
||||
|
||||
template<size_t N, typename... Args>
|
||||
ssize_t SafeSPrintf(char (&buf)[N], const char* fmt, Args... args) {
|
||||
// Use Arg() object to record type information and then copy arguments to an
|
||||
// array to make it easier to iterate over them.
|
||||
const internal::Arg arg_array[] = { args... };
|
||||
return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args));
|
||||
}
|
||||
|
||||
// Fast-path when we don't actually need to substitute any arguments.
|
||||
BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt);
|
||||
template<size_t N>
|
||||
inline ssize_t SafeSPrintf(char (&buf)[N], const char* fmt) {
|
||||
return SafeSNPrintf(buf, N, fmt);
|
||||
}
|
||||
|
||||
} // namespace strings
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_SAFE_SPRINTF_H_
|
||||
85
TMessagesProj/jni/voip/webrtc/base/strings/strcat.cc
Normal file
85
TMessagesProj/jni/voip/webrtc/base/strings/strcat.cc
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
// Copyright 2017 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/strcat.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
// Reserves an additional amount of capacity in the given string, growing by at
|
||||
// least 2x if necessary. Used by StrAppendT().
|
||||
//
|
||||
// The "at least 2x" growing rule duplicates the exponential growth of
|
||||
// std::string. The problem is that most implementations of reserve() will grow
|
||||
// exactly to the requested amount instead of exponentially growing like would
|
||||
// happen when appending normally. If we didn't do this, an append after the
|
||||
// call to StrAppend() would definitely cause a reallocation, and loops with
|
||||
// StrAppend() calls would have O(n^2) complexity to execute. Instead, we want
|
||||
// StrAppend() to have the same semantics as std::string::append().
|
||||
template <typename String>
|
||||
void ReserveAdditionalIfNeeded(String* str,
|
||||
typename String::size_type additional) {
|
||||
const size_t required = str->size() + additional;
|
||||
// Check whether we need to reserve additional capacity at all.
|
||||
if (required <= str->capacity())
|
||||
return;
|
||||
|
||||
str->reserve(std::max(required, str->capacity() * 2));
|
||||
}
|
||||
|
||||
template <typename DestString, typename InputString>
|
||||
void StrAppendT(DestString* dest, span<const InputString> pieces) {
|
||||
size_t additional_size = 0;
|
||||
for (const auto& cur : pieces)
|
||||
additional_size += cur.size();
|
||||
ReserveAdditionalIfNeeded(dest, additional_size);
|
||||
|
||||
for (const auto& cur : pieces)
|
||||
dest->append(cur.data(), cur.size());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string StrCat(span<const StringPiece> pieces) {
|
||||
std::string result;
|
||||
StrAppendT(&result, pieces);
|
||||
return result;
|
||||
}
|
||||
|
||||
string16 StrCat(span<const StringPiece16> pieces) {
|
||||
string16 result;
|
||||
StrAppendT(&result, pieces);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string StrCat(span<const std::string> pieces) {
|
||||
std::string result;
|
||||
StrAppendT(&result, pieces);
|
||||
return result;
|
||||
}
|
||||
|
||||
string16 StrCat(span<const string16> pieces) {
|
||||
string16 result;
|
||||
StrAppendT(&result, pieces);
|
||||
return result;
|
||||
}
|
||||
|
||||
void StrAppend(std::string* dest, span<const StringPiece> pieces) {
|
||||
StrAppendT(dest, pieces);
|
||||
}
|
||||
|
||||
void StrAppend(string16* dest, span<const StringPiece16> pieces) {
|
||||
StrAppendT(dest, pieces);
|
||||
}
|
||||
|
||||
void StrAppend(std::string* dest, span<const std::string> pieces) {
|
||||
StrAppendT(dest, pieces);
|
||||
}
|
||||
|
||||
void StrAppend(string16* dest, span<const string16> pieces) {
|
||||
StrAppendT(dest, pieces);
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
103
TMessagesProj/jni/voip/webrtc/base/strings/strcat.h
Normal file
103
TMessagesProj/jni/voip/webrtc/base/strings/strcat.h
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
// Copyright 2017 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRCAT_H_
|
||||
#define BASE_STRINGS_STRCAT_H_
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/containers/span.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
#if defined(OS_WIN)
|
||||
// Guard against conflict with Win32 API StrCat macro:
|
||||
// check StrCat wasn't and will not be redefined.
|
||||
#define StrCat StrCat
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
|
||||
// StrCat ----------------------------------------------------------------------
|
||||
//
|
||||
// StrCat is a function to perform concatenation on a sequence of strings.
|
||||
// It is preferrable to a sequence of "a + b + c" because it is both faster and
|
||||
// generates less code.
|
||||
//
|
||||
// std::string result = base::StrCat({"foo ", result, "\nfoo ", bar});
|
||||
//
|
||||
// To join an array of strings with a separator, see base::JoinString in
|
||||
// base/strings/string_util.h.
|
||||
//
|
||||
// MORE INFO
|
||||
//
|
||||
// StrCat can see all arguments at once, so it can allocate one return buffer
|
||||
// of exactly the right size and copy once, as opposed to a sequence of
|
||||
// operator+ which generates a series of temporary strings, copying as it goes.
|
||||
// And by using StringPiece arguments, StrCat can avoid creating temporary
|
||||
// string objects for char* constants.
|
||||
//
|
||||
// ALTERNATIVES
|
||||
//
|
||||
// Internal Google / Abseil has a similar StrCat function. That version takes
|
||||
// an overloaded number of arguments instead of initializer list (overflowing
|
||||
// to initializer list for many arguments). We don't have any legacy
|
||||
// requirements and using only initializer_list is simpler and generates
|
||||
// roughly the same amount of code at the call sites.
|
||||
//
|
||||
// Abseil's StrCat also allows numbers by using an intermediate class that can
|
||||
// be implicitly constructed from either a string or various number types. This
|
||||
// class formats the numbers into a static buffer for increased performance,
|
||||
// and the call sites look nice.
|
||||
//
|
||||
// As-written Abseil's helper class for numbers generates slightly more code
|
||||
// than the raw StringPiece version. We can de-inline the helper class'
|
||||
// constructors which will cause the StringPiece constructors to be de-inlined
|
||||
// for this call and generate slightly less code. This is something we can
|
||||
// explore more in the future.
|
||||
|
||||
BASE_EXPORT std::string StrCat(span<const StringPiece> pieces)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT string16 StrCat(span<const StringPiece16> pieces)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::string StrCat(span<const std::string> pieces)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT string16 StrCat(span<const string16> pieces) WARN_UNUSED_RESULT;
|
||||
|
||||
// Initializer list forwards to the array version.
|
||||
inline std::string StrCat(std::initializer_list<StringPiece> pieces) {
|
||||
return StrCat(make_span(pieces.begin(), pieces.size()));
|
||||
}
|
||||
inline string16 StrCat(std::initializer_list<StringPiece16> pieces) {
|
||||
return StrCat(make_span(pieces.begin(), pieces.size()));
|
||||
}
|
||||
|
||||
// StrAppend -------------------------------------------------------------------
|
||||
//
|
||||
// Appends a sequence of strings to a destination. Prefer:
|
||||
// StrAppend(&foo, ...);
|
||||
// over:
|
||||
// foo += StrCat(...);
|
||||
// because it avoids a temporary string allocation and copy.
|
||||
|
||||
BASE_EXPORT void StrAppend(std::string* dest, span<const StringPiece> pieces);
|
||||
BASE_EXPORT void StrAppend(string16* dest, span<const StringPiece16> pieces);
|
||||
BASE_EXPORT void StrAppend(std::string* dest, span<const std::string> pieces);
|
||||
BASE_EXPORT void StrAppend(string16* dest, span<const string16> pieces);
|
||||
|
||||
// Initializer list forwards to the array version.
|
||||
inline void StrAppend(std::string* dest,
|
||||
std::initializer_list<StringPiece> pieces) {
|
||||
return StrAppend(dest, make_span(pieces.begin(), pieces.size()));
|
||||
}
|
||||
inline void StrAppend(string16* dest,
|
||||
std::initializer_list<StringPiece16> pieces) {
|
||||
return StrAppend(dest, make_span(pieces.begin(), pieces.size()));
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRCAT_H_
|
||||
87
TMessagesProj/jni/voip/webrtc/base/strings/string16.cc
Normal file
87
TMessagesProj/jni/voip/webrtc/base/strings/string16.cc
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF16) && !defined(_AIX)
|
||||
|
||||
#error This file should not be used on 2-byte wchar_t systems
|
||||
// If this winds up being needed on 2-byte wchar_t systems, either the
|
||||
// definitions below can be used, or the host system's wide character
|
||||
// functions like wmemcmp can be wrapped.
|
||||
|
||||
#elif defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
#include <ostream>
|
||||
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
int c16memcmp(const char16* s1, const char16* s2, size_t n) {
|
||||
// We cannot call memcmp because that changes the semantics.
|
||||
while (n-- > 0) {
|
||||
if (*s1 != *s2) {
|
||||
// We cannot use (*s1 - *s2) because char16 is unsigned.
|
||||
return ((*s1 < *s2) ? -1 : 1);
|
||||
}
|
||||
++s1;
|
||||
++s2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t c16len(const char16* s) {
|
||||
const char16 *s_orig = s;
|
||||
while (*s) {
|
||||
++s;
|
||||
}
|
||||
return s - s_orig;
|
||||
}
|
||||
|
||||
const char16* c16memchr(const char16* s, char16 c, size_t n) {
|
||||
while (n-- > 0) {
|
||||
if (*s == c) {
|
||||
return s;
|
||||
}
|
||||
++s;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
char16* c16memmove(char16* s1, const char16* s2, size_t n) {
|
||||
return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
|
||||
}
|
||||
|
||||
char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
|
||||
return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
|
||||
}
|
||||
|
||||
char16* c16memset(char16* s, char16 c, size_t n) {
|
||||
char16 *s_orig = s;
|
||||
while (n-- > 0) {
|
||||
*s = c;
|
||||
++s;
|
||||
}
|
||||
return s_orig;
|
||||
}
|
||||
|
||||
namespace string16_internals {
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const string16& str) {
|
||||
return out << base::StringPiece16(str);
|
||||
}
|
||||
|
||||
void PrintTo(const string16& str, std::ostream* out) {
|
||||
*out << str;
|
||||
}
|
||||
|
||||
} // namespace string16_internals
|
||||
|
||||
} // namespace base
|
||||
|
||||
template class std::
|
||||
basic_string<base::char16, base::string16_internals::string16_char_traits>;
|
||||
|
||||
#endif // WCHAR_T_IS_UTF32
|
||||
229
TMessagesProj/jni/voip/webrtc/base/strings/string16.h
Normal file
229
TMessagesProj/jni/voip/webrtc/base/strings/string16.h
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING16_H_
|
||||
#define BASE_STRINGS_STRING16_H_
|
||||
|
||||
// WHAT:
|
||||
// A version of std::basic_string that provides 2-byte characters even when
|
||||
// wchar_t is not implemented as a 2-byte type. You can access this class as
|
||||
// string16. We also define char16, which string16 is based upon.
|
||||
//
|
||||
// WHY:
|
||||
// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
|
||||
// data. Plenty of existing code operates on strings encoded as UTF-16.
|
||||
//
|
||||
// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
|
||||
// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
|
||||
// at run time, because it calls some functions (like wcslen) that come from
|
||||
// the system's native C library -- which was built with a 4-byte wchar_t!
|
||||
// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
|
||||
// entirely improper on those systems where the encoding of wchar_t is defined
|
||||
// as UTF-32.
|
||||
//
|
||||
// Here, we define string16, which is similar to std::wstring but replaces all
|
||||
// libc functions with custom, 2-byte-char compatible routines. It is capable
|
||||
// of carrying UTF-16-encoded data.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF16)
|
||||
|
||||
// Define a macro for wrapping construction of char16 arrays and string16s from
|
||||
// a literal string. This indirection allows for an easier migration of
|
||||
// base::char16 to char16_t on platforms where WCHAR_T_IS_UTF16, as only a one
|
||||
// character change to the macro will be necessary.
|
||||
// This macro does not exist when WCHAR_T_IS_UTF32, as it is currently not
|
||||
// possible to create a char array form a literal in this case.
|
||||
// TODO(https://crbug.com/911896): Remove this macro once base::char16 is
|
||||
// char16_t on all platforms.
|
||||
#define STRING16_LITERAL(x) L##x
|
||||
|
||||
namespace base {
|
||||
|
||||
typedef wchar_t char16;
|
||||
typedef std::wstring string16;
|
||||
|
||||
} // namespace base
|
||||
|
||||
#elif defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
#include <wchar.h> // for mbstate_t
|
||||
|
||||
namespace base {
|
||||
|
||||
typedef uint16_t char16;
|
||||
|
||||
// char16 versions of the functions required by string16_char_traits; these
|
||||
// are based on the wide character functions of similar names ("w" or "wcs"
|
||||
// instead of "c16").
|
||||
BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
|
||||
BASE_EXPORT size_t c16len(const char16* s);
|
||||
BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n);
|
||||
BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n);
|
||||
BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
|
||||
BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n);
|
||||
|
||||
// This namespace contains the implementation of base::string16 along with
|
||||
// things that need to be found via argument-dependent lookup from a
|
||||
// base::string16.
|
||||
namespace string16_internals {
|
||||
|
||||
struct string16_char_traits {
|
||||
typedef char16 char_type;
|
||||
typedef int int_type;
|
||||
|
||||
// int_type needs to be able to hold each possible value of char_type, and in
|
||||
// addition, the distinct value of eof().
|
||||
static_assert(sizeof(int_type) > sizeof(char_type),
|
||||
"int must be larger than 16 bits wide");
|
||||
|
||||
typedef std::streamoff off_type;
|
||||
typedef mbstate_t state_type;
|
||||
typedef std::fpos<state_type> pos_type;
|
||||
|
||||
static void assign(char_type& c1, const char_type& c2) {
|
||||
c1 = c2;
|
||||
}
|
||||
|
||||
static bool eq(const char_type& c1, const char_type& c2) {
|
||||
return c1 == c2;
|
||||
}
|
||||
static bool lt(const char_type& c1, const char_type& c2) {
|
||||
return c1 < c2;
|
||||
}
|
||||
|
||||
static int compare(const char_type* s1, const char_type* s2, size_t n) {
|
||||
return c16memcmp(s1, s2, n);
|
||||
}
|
||||
|
||||
static size_t length(const char_type* s) {
|
||||
return c16len(s);
|
||||
}
|
||||
|
||||
static const char_type* find(const char_type* s, size_t n,
|
||||
const char_type& a) {
|
||||
return c16memchr(s, a, n);
|
||||
}
|
||||
|
||||
static char_type* move(char_type* s1, const char_type* s2, size_t n) {
|
||||
return c16memmove(s1, s2, n);
|
||||
}
|
||||
|
||||
static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
|
||||
return c16memcpy(s1, s2, n);
|
||||
}
|
||||
|
||||
static char_type* assign(char_type* s, size_t n, char_type a) {
|
||||
return c16memset(s, a, n);
|
||||
}
|
||||
|
||||
static int_type not_eof(const int_type& c) {
|
||||
return eq_int_type(c, eof()) ? 0 : c;
|
||||
}
|
||||
|
||||
static char_type to_char_type(const int_type& c) {
|
||||
return char_type(c);
|
||||
}
|
||||
|
||||
static int_type to_int_type(const char_type& c) {
|
||||
return int_type(c);
|
||||
}
|
||||
|
||||
static bool eq_int_type(const int_type& c1, const int_type& c2) {
|
||||
return c1 == c2;
|
||||
}
|
||||
|
||||
static int_type eof() {
|
||||
return static_cast<int_type>(EOF);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace string16_internals
|
||||
|
||||
typedef std::basic_string<char16,
|
||||
base::string16_internals::string16_char_traits>
|
||||
string16;
|
||||
|
||||
namespace string16_internals {
|
||||
|
||||
BASE_EXPORT extern std::ostream& operator<<(std::ostream& out,
|
||||
const string16& str);
|
||||
|
||||
// This is required by googletest to print a readable output on test failures.
|
||||
BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out);
|
||||
|
||||
} // namespace string16_internals
|
||||
|
||||
} // namespace base
|
||||
|
||||
// The string class will be explicitly instantiated only once, in string16.cc.
|
||||
//
|
||||
// std::basic_string<> in GNU libstdc++ contains a static data member,
|
||||
// _S_empty_rep_storage, to represent empty strings. When an operation such
|
||||
// as assignment or destruction is performed on a string, causing its existing
|
||||
// data member to be invalidated, it must not be freed if this static data
|
||||
// member is being used. Otherwise, it counts as an attempt to free static
|
||||
// (and not allocated) data, which is a memory error.
|
||||
//
|
||||
// Generally, due to C++ template magic, _S_empty_rep_storage will be marked
|
||||
// as a coalesced symbol, meaning that the linker will combine multiple
|
||||
// instances into a single one when generating output.
|
||||
//
|
||||
// If a string class is used by multiple shared libraries, a problem occurs.
|
||||
// Each library will get its own copy of _S_empty_rep_storage. When strings
|
||||
// are passed across a library boundary for alteration or destruction, memory
|
||||
// errors will result. GNU libstdc++ contains a configuration option,
|
||||
// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which
|
||||
// disables the static data member optimization, but it's a good optimization
|
||||
// and non-STL code is generally at the mercy of the system's STL
|
||||
// configuration. Fully-dynamic strings are not the default for GNU libstdc++
|
||||
// libstdc++ itself or for the libstdc++ installations on the systems we care
|
||||
// about, such as Mac OS X and relevant flavors of Linux.
|
||||
//
|
||||
// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 .
|
||||
//
|
||||
// To avoid problems, string classes need to be explicitly instantiated only
|
||||
// once, in exactly one library. All other string users see it via an "extern"
|
||||
// declaration. This is precisely how GNU libstdc++ handles
|
||||
// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring).
|
||||
//
|
||||
// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2),
|
||||
// in which the linker does not fully coalesce symbols when dead code
|
||||
// stripping is enabled. This bug causes the memory errors described above
|
||||
// to occur even when a std::basic_string<> does not cross shared library
|
||||
// boundaries, such as in statically-linked executables.
|
||||
//
|
||||
// TODO(mark): File this bug with Apple and update this note with a bug number.
|
||||
|
||||
extern template class BASE_EXPORT
|
||||
std::basic_string<base::char16,
|
||||
base::string16_internals::string16_char_traits>;
|
||||
|
||||
// Specialize std::hash for base::string16. Although the style guide forbids
|
||||
// this in general, it is necessary for consistency with WCHAR_T_IS_UTF16
|
||||
// platforms, where base::string16 is a type alias for std::wstring.
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<base::string16> {
|
||||
std::size_t operator()(const base::string16& s) const {
|
||||
std::size_t result = 0;
|
||||
for (base::char16 c : s)
|
||||
result = (result * 131) + c;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
#endif // WCHAR_T_IS_UTF32
|
||||
|
||||
#endif // BASE_STRINGS_STRING16_H_
|
||||
|
|
@ -0,0 +1,545 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/string_number_conversions.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "base/no_destructor.h"
|
||||
#include "base/numerics/safe_math.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "base/third_party/double_conversion/double-conversion/double-conversion.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename STR, typename INT>
|
||||
struct IntToStringT {
|
||||
static STR IntToString(INT value) {
|
||||
// log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
|
||||
// So round up to allocate 3 output characters per byte, plus 1 for '-'.
|
||||
const size_t kOutputBufSize =
|
||||
3 * sizeof(INT) + std::numeric_limits<INT>::is_signed;
|
||||
|
||||
// Create the string in a temporary buffer, write it back to front, and
|
||||
// then return the substr of what we ended up using.
|
||||
using CHR = typename STR::value_type;
|
||||
CHR outbuf[kOutputBufSize];
|
||||
|
||||
// The ValueOrDie call below can never fail, because UnsignedAbs is valid
|
||||
// for all valid inputs.
|
||||
typename std::make_unsigned<INT>::type res =
|
||||
CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie();
|
||||
|
||||
CHR* end = outbuf + kOutputBufSize;
|
||||
CHR* i = end;
|
||||
do {
|
||||
--i;
|
||||
DCHECK(i != outbuf);
|
||||
*i = static_cast<CHR>((res % 10) + '0');
|
||||
res /= 10;
|
||||
} while (res != 0);
|
||||
if (IsValueNegative(value)) {
|
||||
--i;
|
||||
DCHECK(i != outbuf);
|
||||
*i = static_cast<CHR>('-');
|
||||
}
|
||||
return STR(i, end);
|
||||
}
|
||||
};
|
||||
|
||||
// Utility to convert a character to a digit in a given base
|
||||
template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
|
||||
};
|
||||
|
||||
// Faster specialization for bases <= 10
|
||||
template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
|
||||
public:
|
||||
static bool Convert(CHAR c, uint8_t* digit) {
|
||||
if (c >= '0' && c < '0' + BASE) {
|
||||
*digit = static_cast<uint8_t>(c - '0');
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Specialization for bases where 10 < base <= 36
|
||||
template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
|
||||
public:
|
||||
static bool Convert(CHAR c, uint8_t* digit) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
*digit = c - '0';
|
||||
} else if (c >= 'a' && c < 'a' + BASE - 10) {
|
||||
*digit = c - 'a' + 10;
|
||||
} else if (c >= 'A' && c < 'A' + BASE - 10) {
|
||||
*digit = c - 'A' + 10;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <int BASE, typename CHAR>
|
||||
bool CharToDigit(CHAR c, uint8_t* digit) {
|
||||
return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
|
||||
}
|
||||
|
||||
// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it
|
||||
// is locale independent, whereas the functions we are replacing were
|
||||
// locale-dependent. TBD what is desired, but for the moment let's not
|
||||
// introduce a change in behaviour.
|
||||
template<typename CHAR> class WhitespaceHelper {
|
||||
};
|
||||
|
||||
template<> class WhitespaceHelper<char> {
|
||||
public:
|
||||
static bool Invoke(char c) {
|
||||
return 0 != isspace(static_cast<unsigned char>(c));
|
||||
}
|
||||
};
|
||||
|
||||
template<> class WhitespaceHelper<char16> {
|
||||
public:
|
||||
static bool Invoke(char16 c) {
|
||||
return 0 != iswspace(c);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
|
||||
return WhitespaceHelper<CHAR>::Invoke(c);
|
||||
}
|
||||
|
||||
// IteratorRangeToNumberTraits should provide:
|
||||
// - a typedef for iterator_type, the iterator type used as input.
|
||||
// - a typedef for value_type, the target numeric type.
|
||||
// - static functions min, max (returning the minimum and maximum permitted
|
||||
// values)
|
||||
// - constant kBase, the base in which to interpret the input
|
||||
template<typename IteratorRangeToNumberTraits>
|
||||
class IteratorRangeToNumber {
|
||||
public:
|
||||
typedef IteratorRangeToNumberTraits traits;
|
||||
typedef typename traits::iterator_type const_iterator;
|
||||
typedef typename traits::value_type value_type;
|
||||
|
||||
// Generalized iterator-range-to-number conversion.
|
||||
//
|
||||
static bool Invoke(const_iterator begin,
|
||||
const_iterator end,
|
||||
value_type* output) {
|
||||
bool valid = true;
|
||||
|
||||
while (begin != end && LocalIsWhitespace(*begin)) {
|
||||
valid = false;
|
||||
++begin;
|
||||
}
|
||||
|
||||
if (begin != end && *begin == '-') {
|
||||
if (!std::numeric_limits<value_type>::is_signed) {
|
||||
*output = 0;
|
||||
valid = false;
|
||||
} else if (!Negative::Invoke(begin + 1, end, output)) {
|
||||
valid = false;
|
||||
}
|
||||
} else {
|
||||
if (begin != end && *begin == '+') {
|
||||
++begin;
|
||||
}
|
||||
if (!Positive::Invoke(begin, end, output)) {
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
private:
|
||||
// Sign provides:
|
||||
// - a static function, CheckBounds, that determines whether the next digit
|
||||
// causes an overflow/underflow
|
||||
// - a static function, Increment, that appends the next digit appropriately
|
||||
// according to the sign of the number being parsed.
|
||||
template<typename Sign>
|
||||
class Base {
|
||||
public:
|
||||
static bool Invoke(const_iterator begin, const_iterator end,
|
||||
typename traits::value_type* output) {
|
||||
*output = 0;
|
||||
|
||||
if (begin == end) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Note: no performance difference was found when using template
|
||||
// specialization to remove this check in bases other than 16
|
||||
if (traits::kBase == 16 && end - begin > 2 && *begin == '0' &&
|
||||
(*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
|
||||
begin += 2;
|
||||
}
|
||||
|
||||
for (const_iterator current = begin; current != end; ++current) {
|
||||
uint8_t new_digit = 0;
|
||||
|
||||
if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (current != begin) {
|
||||
if (!Sign::CheckBounds(output, new_digit)) {
|
||||
return false;
|
||||
}
|
||||
*output *= traits::kBase;
|
||||
}
|
||||
|
||||
Sign::Increment(new_digit, output);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class Positive : public Base<Positive> {
|
||||
public:
|
||||
static bool CheckBounds(value_type* output, uint8_t new_digit) {
|
||||
if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
|
||||
(*output == static_cast<value_type>(traits::max() / traits::kBase) &&
|
||||
new_digit > traits::max() % traits::kBase)) {
|
||||
*output = traits::max();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static void Increment(uint8_t increment, value_type* output) {
|
||||
*output += increment;
|
||||
}
|
||||
};
|
||||
|
||||
class Negative : public Base<Negative> {
|
||||
public:
|
||||
static bool CheckBounds(value_type* output, uint8_t new_digit) {
|
||||
if (*output < traits::min() / traits::kBase ||
|
||||
(*output == traits::min() / traits::kBase &&
|
||||
new_digit > 0 - traits::min() % traits::kBase)) {
|
||||
*output = traits::min();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static void Increment(uint8_t increment, value_type* output) {
|
||||
*output -= increment;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template<typename ITERATOR, typename VALUE, int BASE>
|
||||
class BaseIteratorRangeToNumberTraits {
|
||||
public:
|
||||
typedef ITERATOR iterator_type;
|
||||
typedef VALUE value_type;
|
||||
static value_type min() {
|
||||
return std::numeric_limits<value_type>::min();
|
||||
}
|
||||
static value_type max() {
|
||||
return std::numeric_limits<value_type>::max();
|
||||
}
|
||||
static const int kBase = BASE;
|
||||
};
|
||||
|
||||
template<typename ITERATOR>
|
||||
class BaseHexIteratorRangeToIntTraits
|
||||
: public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
|
||||
};
|
||||
|
||||
template <typename ITERATOR>
|
||||
class BaseHexIteratorRangeToUIntTraits
|
||||
: public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {};
|
||||
|
||||
template <typename ITERATOR>
|
||||
class BaseHexIteratorRangeToInt64Traits
|
||||
: public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {};
|
||||
|
||||
template <typename ITERATOR>
|
||||
class BaseHexIteratorRangeToUInt64Traits
|
||||
: public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {};
|
||||
|
||||
typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator>
|
||||
HexIteratorRangeToIntTraits;
|
||||
|
||||
typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator>
|
||||
HexIteratorRangeToUIntTraits;
|
||||
|
||||
typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator>
|
||||
HexIteratorRangeToInt64Traits;
|
||||
|
||||
typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator>
|
||||
HexIteratorRangeToUInt64Traits;
|
||||
|
||||
template <typename VALUE, int BASE>
|
||||
class StringPieceToNumberTraits
|
||||
: public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator,
|
||||
VALUE,
|
||||
BASE> {
|
||||
};
|
||||
|
||||
template <typename VALUE>
|
||||
bool StringToIntImpl(StringPiece input, VALUE* output) {
|
||||
return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke(
|
||||
input.begin(), input.end(), output);
|
||||
}
|
||||
|
||||
template <typename VALUE, int BASE>
|
||||
class StringPiece16ToNumberTraits
|
||||
: public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator,
|
||||
VALUE,
|
||||
BASE> {
|
||||
};
|
||||
|
||||
template <typename VALUE>
|
||||
bool String16ToIntImpl(StringPiece16 input, VALUE* output) {
|
||||
return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke(
|
||||
input.begin(), input.end(), output);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string NumberToString(int value) {
|
||||
return IntToStringT<std::string, int>::IntToString(value);
|
||||
}
|
||||
|
||||
string16 NumberToString16(int value) {
|
||||
return IntToStringT<string16, int>::IntToString(value);
|
||||
}
|
||||
|
||||
std::string NumberToString(unsigned value) {
|
||||
return IntToStringT<std::string, unsigned>::IntToString(value);
|
||||
}
|
||||
|
||||
string16 NumberToString16(unsigned value) {
|
||||
return IntToStringT<string16, unsigned>::IntToString(value);
|
||||
}
|
||||
|
||||
std::string NumberToString(long value) {
|
||||
return IntToStringT<std::string, long>::IntToString(value);
|
||||
}
|
||||
|
||||
string16 NumberToString16(long value) {
|
||||
return IntToStringT<string16, long>::IntToString(value);
|
||||
}
|
||||
|
||||
std::string NumberToString(unsigned long value) {
|
||||
return IntToStringT<std::string, unsigned long>::IntToString(value);
|
||||
}
|
||||
|
||||
string16 NumberToString16(unsigned long value) {
|
||||
return IntToStringT<string16, unsigned long>::IntToString(value);
|
||||
}
|
||||
|
||||
std::string NumberToString(long long value) {
|
||||
return IntToStringT<std::string, long long>::IntToString(value);
|
||||
}
|
||||
|
||||
string16 NumberToString16(long long value) {
|
||||
return IntToStringT<string16, long long>::IntToString(value);
|
||||
}
|
||||
|
||||
std::string NumberToString(unsigned long long value) {
|
||||
return IntToStringT<std::string, unsigned long long>::IntToString(value);
|
||||
}
|
||||
|
||||
string16 NumberToString16(unsigned long long value) {
|
||||
return IntToStringT<string16, unsigned long long>::IntToString(value);
|
||||
}
|
||||
|
||||
static const double_conversion::DoubleToStringConverter*
|
||||
GetDoubleToStringConverter() {
|
||||
static NoDestructor<double_conversion::DoubleToStringConverter> converter(
|
||||
double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN,
|
||||
nullptr, nullptr, 'e', -6, 12, 0, 0);
|
||||
return converter.get();
|
||||
}
|
||||
|
||||
std::string NumberToString(double value) {
|
||||
char buffer[32];
|
||||
double_conversion::StringBuilder builder(buffer, sizeof(buffer));
|
||||
GetDoubleToStringConverter()->ToShortest(value, &builder);
|
||||
return std::string(buffer, builder.position());
|
||||
}
|
||||
|
||||
base::string16 NumberToString16(double value) {
|
||||
char buffer[32];
|
||||
double_conversion::StringBuilder builder(buffer, sizeof(buffer));
|
||||
GetDoubleToStringConverter()->ToShortest(value, &builder);
|
||||
|
||||
// The number will be ASCII. This creates the string using the "input
|
||||
// iterator" variant which promotes from 8-bit to 16-bit via "=".
|
||||
return base::string16(&buffer[0], &buffer[builder.position()]);
|
||||
}
|
||||
|
||||
bool StringToInt(StringPiece input, int* output) {
|
||||
return StringToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToInt(StringPiece16 input, int* output) {
|
||||
return String16ToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToUint(StringPiece input, unsigned* output) {
|
||||
return StringToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToUint(StringPiece16 input, unsigned* output) {
|
||||
return String16ToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToInt64(StringPiece input, int64_t* output) {
|
||||
return StringToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToInt64(StringPiece16 input, int64_t* output) {
|
||||
return String16ToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToUint64(StringPiece input, uint64_t* output) {
|
||||
return StringToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToUint64(StringPiece16 input, uint64_t* output) {
|
||||
return String16ToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToSizeT(StringPiece input, size_t* output) {
|
||||
return StringToIntImpl(input, output);
|
||||
}
|
||||
|
||||
bool StringToSizeT(StringPiece16 input, size_t* output) {
|
||||
return String16ToIntImpl(input, output);
|
||||
}
|
||||
|
||||
template <typename STRING, typename CHAR>
|
||||
bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) {
|
||||
static NoDestructor<double_conversion::StringToDoubleConverter> converter(
|
||||
double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
|
||||
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
|
||||
0.0, 0, nullptr, nullptr);
|
||||
|
||||
int processed_characters_count;
|
||||
*output = converter->StringToDouble(data, input.size(),
|
||||
&processed_characters_count);
|
||||
|
||||
// Cases to return false:
|
||||
// - If the input string is empty, there was nothing to parse.
|
||||
// - If the value saturated to HUGE_VAL.
|
||||
// - If the entire string was not processed, there are either characters
|
||||
// remaining in the string after a parsed number, or the string does not
|
||||
// begin with a parseable number.
|
||||
// - If the first character is a space, there was leading whitespace
|
||||
return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL &&
|
||||
static_cast<size_t>(processed_characters_count) == input.size() &&
|
||||
!IsUnicodeWhitespace(input[0]);
|
||||
}
|
||||
|
||||
bool StringToDouble(StringPiece input, double* output) {
|
||||
return StringToDoubleImpl(input, input.data(), output);
|
||||
}
|
||||
|
||||
bool StringToDouble(StringPiece16 input, double* output) {
|
||||
return StringToDoubleImpl(
|
||||
input, reinterpret_cast<const uint16_t*>(input.data()), output);
|
||||
}
|
||||
|
||||
std::string HexEncode(const void* bytes, size_t size) {
|
||||
static const char kHexChars[] = "0123456789ABCDEF";
|
||||
|
||||
// Each input byte creates two output hex characters.
|
||||
std::string ret(size * 2, '\0');
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
char b = reinterpret_cast<const char*>(bytes)[i];
|
||||
ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
|
||||
ret[(i * 2) + 1] = kHexChars[b & 0xf];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string HexEncode(base::span<const uint8_t> bytes) {
|
||||
return HexEncode(bytes.data(), bytes.size());
|
||||
}
|
||||
|
||||
bool HexStringToInt(StringPiece input, int* output) {
|
||||
return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
|
||||
input.begin(), input.end(), output);
|
||||
}
|
||||
|
||||
bool HexStringToUInt(StringPiece input, uint32_t* output) {
|
||||
return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke(
|
||||
input.begin(), input.end(), output);
|
||||
}
|
||||
|
||||
bool HexStringToInt64(StringPiece input, int64_t* output) {
|
||||
return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke(
|
||||
input.begin(), input.end(), output);
|
||||
}
|
||||
|
||||
bool HexStringToUInt64(StringPiece input, uint64_t* output) {
|
||||
return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke(
|
||||
input.begin(), input.end(), output);
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
static bool HexStringToByteContainer(StringPiece input, Container* output) {
|
||||
DCHECK_EQ(output->size(), 0u);
|
||||
size_t count = input.size();
|
||||
if (count == 0 || (count % 2) != 0)
|
||||
return false;
|
||||
for (uintptr_t i = 0; i < count / 2; ++i) {
|
||||
uint8_t msb = 0; // most significant 4 bits
|
||||
uint8_t lsb = 0; // least significant 4 bits
|
||||
if (!CharToDigit<16>(input[i * 2], &msb) ||
|
||||
!CharToDigit<16>(input[i * 2 + 1], &lsb)) {
|
||||
return false;
|
||||
}
|
||||
output->push_back((msb << 4) | lsb);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) {
|
||||
return HexStringToByteContainer(input, output);
|
||||
}
|
||||
|
||||
bool HexStringToString(StringPiece input, std::string* output) {
|
||||
return HexStringToByteContainer(input, output);
|
||||
}
|
||||
|
||||
bool HexStringToSpan(StringPiece input, base::span<uint8_t> output) {
|
||||
size_t count = input.size();
|
||||
if (count == 0 || (count % 2) != 0)
|
||||
return false;
|
||||
|
||||
if (count / 2 != output.size())
|
||||
return false;
|
||||
|
||||
for (uintptr_t i = 0; i < count / 2; ++i) {
|
||||
uint8_t msb = 0; // most significant 4 bits
|
||||
uint8_t lsb = 0; // least significant 4 bits
|
||||
if (!CharToDigit<16>(input[i * 2], &msb) ||
|
||||
!CharToDigit<16>(input[i * 2 + 1], &lsb)) {
|
||||
return false;
|
||||
}
|
||||
output[i] = (msb << 4) | lsb;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
|
||||
#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/containers/span.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// IMPORTANT MESSAGE FROM YOUR SPONSOR
|
||||
//
|
||||
// This file contains no "wstring" variants. New code should use string16. If
|
||||
// you need to make old code work, use the UTF8 version and convert. Please do
|
||||
// not add wstring variants.
|
||||
//
|
||||
// Please do not add "convenience" functions for converting strings to integers
|
||||
// that return the value and ignore success/failure. That encourages people to
|
||||
// write code that doesn't properly handle the error conditions.
|
||||
//
|
||||
// DO NOT use these functions in any UI unless it's NOT localized on purpose.
|
||||
// Instead, use base::MessageFormatter for a complex message with numbers
|
||||
// (integer, float, double) embedded or base::Format{Number,Double,Percent} to
|
||||
// just format a single number/percent. Note that some languages use native
|
||||
// digits instead of ASCII digits while others use a group separator or decimal
|
||||
// point different from ',' and '.'. Using these functions in the UI would lead
|
||||
// numbers to be formatted in a non-native way.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
namespace base {
|
||||
|
||||
// Number -> string conversions ------------------------------------------------
|
||||
|
||||
// Ignores locale! see warning above.
|
||||
BASE_EXPORT std::string NumberToString(int value);
|
||||
BASE_EXPORT string16 NumberToString16(int value);
|
||||
BASE_EXPORT std::string NumberToString(unsigned int value);
|
||||
BASE_EXPORT string16 NumberToString16(unsigned int value);
|
||||
BASE_EXPORT std::string NumberToString(long value);
|
||||
BASE_EXPORT string16 NumberToString16(long value);
|
||||
BASE_EXPORT std::string NumberToString(unsigned long value);
|
||||
BASE_EXPORT string16 NumberToString16(unsigned long value);
|
||||
BASE_EXPORT std::string NumberToString(long long value);
|
||||
BASE_EXPORT string16 NumberToString16(long long value);
|
||||
BASE_EXPORT std::string NumberToString(unsigned long long value);
|
||||
BASE_EXPORT string16 NumberToString16(unsigned long long value);
|
||||
BASE_EXPORT std::string NumberToString(double value);
|
||||
BASE_EXPORT string16 NumberToString16(double value);
|
||||
|
||||
// String -> number conversions ------------------------------------------------
|
||||
|
||||
// Perform a best-effort conversion of the input string to a numeric type,
|
||||
// setting |*output| to the result of the conversion. Returns true for
|
||||
// "perfect" conversions; returns false in the following cases:
|
||||
// - Overflow. |*output| will be set to the maximum value supported
|
||||
// by the data type.
|
||||
// - Underflow. |*output| will be set to the minimum value supported
|
||||
// by the data type.
|
||||
// - Trailing characters in the string after parsing the number. |*output|
|
||||
// will be set to the value of the number that was parsed.
|
||||
// - Leading whitespace in the string before parsing the number. |*output| will
|
||||
// be set to the value of the number that was parsed.
|
||||
// - No characters parseable as a number at the beginning of the string.
|
||||
// |*output| will be set to 0.
|
||||
// - Empty string. |*output| will be set to 0.
|
||||
// WARNING: Will write to |output| even when returning false.
|
||||
// Read the comments above carefully.
|
||||
BASE_EXPORT bool StringToInt(StringPiece input, int* output);
|
||||
BASE_EXPORT bool StringToInt(StringPiece16 input, int* output);
|
||||
|
||||
BASE_EXPORT bool StringToUint(StringPiece input, unsigned* output);
|
||||
BASE_EXPORT bool StringToUint(StringPiece16 input, unsigned* output);
|
||||
|
||||
BASE_EXPORT bool StringToInt64(StringPiece input, int64_t* output);
|
||||
BASE_EXPORT bool StringToInt64(StringPiece16 input, int64_t* output);
|
||||
|
||||
BASE_EXPORT bool StringToUint64(StringPiece input, uint64_t* output);
|
||||
BASE_EXPORT bool StringToUint64(StringPiece16 input, uint64_t* output);
|
||||
|
||||
BASE_EXPORT bool StringToSizeT(StringPiece input, size_t* output);
|
||||
BASE_EXPORT bool StringToSizeT(StringPiece16 input, size_t* output);
|
||||
|
||||
// For floating-point conversions, only conversions of input strings in decimal
|
||||
// form are defined to work. Behavior with strings representing floating-point
|
||||
// numbers in hexadecimal, and strings representing non-finite values (such as
|
||||
// NaN and inf) is undefined. Otherwise, these behave the same as the integral
|
||||
// variants. This expects the input string to NOT be specific to the locale.
|
||||
// If your input is locale specific, use ICU to read the number.
|
||||
// WARNING: Will write to |output| even when returning false.
|
||||
// Read the comments here and above StringToInt() carefully.
|
||||
BASE_EXPORT bool StringToDouble(StringPiece input, double* output);
|
||||
BASE_EXPORT bool StringToDouble(StringPiece16 input, double* output);
|
||||
|
||||
// Hex encoding ----------------------------------------------------------------
|
||||
|
||||
// Returns a hex string representation of a binary buffer. The returned hex
|
||||
// string will be in upper case. This function does not check if |size| is
|
||||
// within reasonable limits since it's written with trusted data in mind. If
|
||||
// you suspect that the data you want to format might be large, the absolute
|
||||
// max size for |size| should be is
|
||||
// std::numeric_limits<size_t>::max() / 2
|
||||
BASE_EXPORT std::string HexEncode(const void* bytes, size_t size);
|
||||
BASE_EXPORT std::string HexEncode(base::span<const uint8_t> bytes);
|
||||
|
||||
// Best effort conversion, see StringToInt above for restrictions.
|
||||
// Will only successful parse hex values that will fit into |output|, i.e.
|
||||
// -0x80000000 < |input| < 0x7FFFFFFF.
|
||||
BASE_EXPORT bool HexStringToInt(StringPiece input, int* output);
|
||||
|
||||
// Best effort conversion, see StringToInt above for restrictions.
|
||||
// Will only successful parse hex values that will fit into |output|, i.e.
|
||||
// 0x00000000 < |input| < 0xFFFFFFFF.
|
||||
// The string is not required to start with 0x.
|
||||
BASE_EXPORT bool HexStringToUInt(StringPiece input, uint32_t* output);
|
||||
|
||||
// Best effort conversion, see StringToInt above for restrictions.
|
||||
// Will only successful parse hex values that will fit into |output|, i.e.
|
||||
// -0x8000000000000000 < |input| < 0x7FFFFFFFFFFFFFFF.
|
||||
BASE_EXPORT bool HexStringToInt64(StringPiece input, int64_t* output);
|
||||
|
||||
// Best effort conversion, see StringToInt above for restrictions.
|
||||
// Will only successful parse hex values that will fit into |output|, i.e.
|
||||
// 0x0000000000000000 < |input| < 0xFFFFFFFFFFFFFFFF.
|
||||
// The string is not required to start with 0x.
|
||||
BASE_EXPORT bool HexStringToUInt64(StringPiece input, uint64_t* output);
|
||||
|
||||
// Similar to the previous functions, except that output is a vector of bytes.
|
||||
// |*output| will contain as many bytes as were successfully parsed prior to the
|
||||
// error. There is no overflow, but input.size() must be evenly divisible by 2.
|
||||
// Leading 0x or +/- are not allowed.
|
||||
BASE_EXPORT bool HexStringToBytes(StringPiece input,
|
||||
std::vector<uint8_t>* output);
|
||||
|
||||
// Same as HexStringToBytes, but for an std::string.
|
||||
BASE_EXPORT bool HexStringToString(StringPiece input, std::string* output);
|
||||
|
||||
// Decodes the hex string |input| into a presized |output|. The output buffer
|
||||
// must be sized exactly to |input.size() / 2| or decoding will fail and no
|
||||
// bytes will be written to |output|. Decoding an empty input is also
|
||||
// considered a failure. When decoding fails due to encountering invalid input
|
||||
// characters, |output| will have been filled with the decoded bytes up until
|
||||
// the failure.
|
||||
BASE_EXPORT bool HexStringToSpan(StringPiece input, base::span<uint8_t> output);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/strings/string_number_conversions.h"
|
||||
|
||||
template <class NumberType, class StringPieceType, class StringType>
|
||||
void CheckRoundtripsT(const uint8_t* data,
|
||||
const size_t size,
|
||||
StringType (*num_to_string)(NumberType),
|
||||
bool (*string_to_num)(StringPieceType, NumberType*)) {
|
||||
// Ensure we can read a NumberType from |data|
|
||||
if (size < sizeof(NumberType))
|
||||
return;
|
||||
const NumberType v1 = *reinterpret_cast<const NumberType*>(data);
|
||||
|
||||
// Because we started with an arbitrary NumberType value, not an arbitrary
|
||||
// string, we expect that the function |string_to_num| (e.g. StringToInt) will
|
||||
// return true, indicating a perfect conversion.
|
||||
NumberType v2;
|
||||
CHECK(string_to_num(num_to_string(v1), &v2));
|
||||
|
||||
// Given that this was a perfect conversion, we expect the original NumberType
|
||||
// value to equal the newly parsed one.
|
||||
CHECK_EQ(v1, v2);
|
||||
}
|
||||
|
||||
template <class NumberType>
|
||||
void CheckRoundtrips(const uint8_t* data,
|
||||
const size_t size,
|
||||
bool (*string_to_num)(base::StringPiece, NumberType*)) {
|
||||
return CheckRoundtripsT<NumberType, base::StringPiece, std::string>(
|
||||
data, size, &base::NumberToString, string_to_num);
|
||||
}
|
||||
|
||||
template <class NumberType>
|
||||
void CheckRoundtrips16(const uint8_t* data,
|
||||
const size_t size,
|
||||
bool (*string_to_num)(base::StringPiece16,
|
||||
NumberType*)) {
|
||||
return CheckRoundtripsT<NumberType, base::StringPiece16, base::string16>(
|
||||
data, size, &base::NumberToString16, string_to_num);
|
||||
}
|
||||
|
||||
// Entry point for LibFuzzer.
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
// For each instantiation of NumberToString f and its corresponding StringTo*
|
||||
// function g, check that f(g(x)) = x holds for fuzzer-determined values of x.
|
||||
CheckRoundtrips<int>(data, size, &base::StringToInt);
|
||||
CheckRoundtrips16<int>(data, size, &base::StringToInt);
|
||||
CheckRoundtrips<unsigned int>(data, size, &base::StringToUint);
|
||||
CheckRoundtrips16<unsigned int>(data, size, &base::StringToUint);
|
||||
CheckRoundtrips<int64_t>(data, size, &base::StringToInt64);
|
||||
CheckRoundtrips16<int64_t>(data, size, &base::StringToInt64);
|
||||
CheckRoundtrips<uint64_t>(data, size, &base::StringToUint64);
|
||||
CheckRoundtrips16<uint64_t>(data, size, &base::StringToUint64);
|
||||
CheckRoundtrips<size_t>(data, size, &base::StringToSizeT);
|
||||
CheckRoundtrips16<size_t>(data, size, &base::StringToSizeT);
|
||||
|
||||
base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
|
||||
size);
|
||||
std::string string_input(reinterpret_cast<const char*>(data), size);
|
||||
|
||||
int out_int;
|
||||
base::StringToInt(string_piece_input, &out_int);
|
||||
unsigned out_uint;
|
||||
base::StringToUint(string_piece_input, &out_uint);
|
||||
int64_t out_int64;
|
||||
base::StringToInt64(string_piece_input, &out_int64);
|
||||
uint64_t out_uint64;
|
||||
base::StringToUint64(string_piece_input, &out_uint64);
|
||||
size_t out_size;
|
||||
base::StringToSizeT(string_piece_input, &out_size);
|
||||
|
||||
// Test for StringPiece16 if size is even.
|
||||
if (size % 2 == 0) {
|
||||
base::StringPiece16 string_piece_input16(
|
||||
reinterpret_cast<const base::char16*>(data), size / 2);
|
||||
|
||||
base::StringToInt(string_piece_input16, &out_int);
|
||||
base::StringToUint(string_piece_input16, &out_uint);
|
||||
base::StringToInt64(string_piece_input16, &out_int64);
|
||||
base::StringToUint64(string_piece_input16, &out_uint64);
|
||||
base::StringToSizeT(string_piece_input16, &out_size);
|
||||
}
|
||||
|
||||
double out_double;
|
||||
base::StringToDouble(string_input, &out_double);
|
||||
|
||||
base::HexStringToInt(string_piece_input, &out_int);
|
||||
base::HexStringToUInt(string_piece_input, &out_uint);
|
||||
base::HexStringToInt64(string_piece_input, &out_int64);
|
||||
base::HexStringToUInt64(string_piece_input, &out_uint64);
|
||||
std::vector<uint8_t> out_bytes;
|
||||
base::HexStringToBytes(string_piece_input, &out_bytes);
|
||||
|
||||
base::HexEncode(data, size);
|
||||
|
||||
// Convert the numbers back to strings.
|
||||
base::NumberToString(out_int);
|
||||
base::NumberToString16(out_int);
|
||||
base::NumberToString(out_uint);
|
||||
base::NumberToString16(out_uint);
|
||||
base::NumberToString(out_int64);
|
||||
base::NumberToString16(out_int64);
|
||||
base::NumberToString(out_uint64);
|
||||
base::NumberToString16(out_uint64);
|
||||
base::NumberToString(out_double);
|
||||
base::NumberToString16(out_double);
|
||||
|
||||
return 0;
|
||||
}
|
||||
412
TMessagesProj/jni/voip/webrtc/base/strings/string_piece.cc
Normal file
412
TMessagesProj/jni/voip/webrtc/base/strings/string_piece.cc
Normal file
|
|
@ -0,0 +1,412 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
// Copied from strings/stringpiece.cc with modifications
|
||||
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <ostream>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
|
||||
namespace base {
|
||||
namespace {
|
||||
|
||||
// For each character in characters_wanted, sets the index corresponding
|
||||
// to the ASCII code of that character to 1 in table. This is used by
|
||||
// the find_.*_of methods below to tell whether or not a character is in
|
||||
// the lookup table in constant time.
|
||||
// The argument `table' must be an array that is large enough to hold all
|
||||
// the possible values of an unsigned char. Thus it should be be declared
|
||||
// as follows:
|
||||
// bool table[UCHAR_MAX + 1]
|
||||
inline void BuildLookupTable(const StringPiece& characters_wanted,
|
||||
bool* table) {
|
||||
const size_t length = characters_wanted.length();
|
||||
const char* const data = characters_wanted.data();
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
table[static_cast<unsigned char>(data[i])] = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// MSVC doesn't like complex extern templates and DLLs.
|
||||
#if !defined(COMPILER_MSVC)
|
||||
template class BasicStringPiece<std::string>;
|
||||
template class BasicStringPiece<string16>;
|
||||
#endif
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
|
||||
o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
|
||||
return o;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& o, const StringPiece16& piece) {
|
||||
return o << UTF16ToUTF8(piece);
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename STR>
|
||||
size_t copyT(const BasicStringPiece<STR>& self,
|
||||
typename STR::value_type* buf,
|
||||
size_t n,
|
||||
size_t pos) {
|
||||
size_t ret = std::min(self.size() - pos, n);
|
||||
memcpy(buf, self.data() + pos, ret * sizeof(typename STR::value_type));
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t copy(const StringPiece& self, char* buf, size_t n, size_t pos) {
|
||||
return copyT(self, buf, n, pos);
|
||||
}
|
||||
|
||||
size_t copy(const StringPiece16& self, char16* buf, size_t n, size_t pos) {
|
||||
return copyT(self, buf, n, pos);
|
||||
}
|
||||
|
||||
template<typename STR>
|
||||
size_t findT(const BasicStringPiece<STR>& self,
|
||||
const BasicStringPiece<STR>& s,
|
||||
size_t pos) {
|
||||
if (pos > self.size())
|
||||
return BasicStringPiece<STR>::npos;
|
||||
|
||||
typename BasicStringPiece<STR>::const_iterator result =
|
||||
std::search(self.begin() + pos, self.end(), s.begin(), s.end());
|
||||
const size_t xpos =
|
||||
static_cast<size_t>(result - self.begin());
|
||||
return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos;
|
||||
}
|
||||
|
||||
size_t find(const StringPiece& self, const StringPiece& s, size_t pos) {
|
||||
return findT(self, s, pos);
|
||||
}
|
||||
|
||||
size_t find(const StringPiece16& self, const StringPiece16& s, size_t pos) {
|
||||
return findT(self, s, pos);
|
||||
}
|
||||
|
||||
template<typename STR>
|
||||
size_t findT(const BasicStringPiece<STR>& self,
|
||||
typename STR::value_type c,
|
||||
size_t pos) {
|
||||
if (pos >= self.size())
|
||||
return BasicStringPiece<STR>::npos;
|
||||
|
||||
typename BasicStringPiece<STR>::const_iterator result =
|
||||
std::find(self.begin() + pos, self.end(), c);
|
||||
return result != self.end() ?
|
||||
static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
|
||||
}
|
||||
|
||||
size_t find(const StringPiece& self, char c, size_t pos) {
|
||||
return findT(self, c, pos);
|
||||
}
|
||||
|
||||
size_t find(const StringPiece16& self, char16 c, size_t pos) {
|
||||
return findT(self, c, pos);
|
||||
}
|
||||
|
||||
template<typename STR>
|
||||
size_t rfindT(const BasicStringPiece<STR>& self,
|
||||
const BasicStringPiece<STR>& s,
|
||||
size_t pos) {
|
||||
if (self.size() < s.size())
|
||||
return BasicStringPiece<STR>::npos;
|
||||
|
||||
if (s.empty())
|
||||
return std::min(self.size(), pos);
|
||||
|
||||
typename BasicStringPiece<STR>::const_iterator last =
|
||||
self.begin() + std::min(self.size() - s.size(), pos) + s.size();
|
||||
typename BasicStringPiece<STR>::const_iterator result =
|
||||
std::find_end(self.begin(), last, s.begin(), s.end());
|
||||
return result != last ?
|
||||
static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos;
|
||||
}
|
||||
|
||||
size_t rfind(const StringPiece& self, const StringPiece& s, size_t pos) {
|
||||
return rfindT(self, s, pos);
|
||||
}
|
||||
|
||||
size_t rfind(const StringPiece16& self, const StringPiece16& s, size_t pos) {
|
||||
return rfindT(self, s, pos);
|
||||
}
|
||||
|
||||
template<typename STR>
|
||||
size_t rfindT(const BasicStringPiece<STR>& self,
|
||||
typename STR::value_type c,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return BasicStringPiece<STR>::npos;
|
||||
|
||||
for (size_t i = std::min(pos, self.size() - 1); ;
|
||||
--i) {
|
||||
if (self.data()[i] == c)
|
||||
return i;
|
||||
if (i == 0)
|
||||
break;
|
||||
}
|
||||
return BasicStringPiece<STR>::npos;
|
||||
}
|
||||
|
||||
size_t rfind(const StringPiece& self, char c, size_t pos) {
|
||||
return rfindT(self, c, pos);
|
||||
}
|
||||
|
||||
size_t rfind(const StringPiece16& self, char16 c, size_t pos) {
|
||||
return rfindT(self, c, pos);
|
||||
}
|
||||
|
||||
// 8-bit version using lookup table.
|
||||
size_t find_first_of(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos) {
|
||||
if (self.size() == 0 || s.size() == 0)
|
||||
return StringPiece::npos;
|
||||
|
||||
// Avoid the cost of BuildLookupTable() for a single-character search.
|
||||
if (s.size() == 1)
|
||||
return find(self, s.data()[0], pos);
|
||||
|
||||
bool lookup[UCHAR_MAX + 1] = { false };
|
||||
BuildLookupTable(s, lookup);
|
||||
for (size_t i = pos; i < self.size(); ++i) {
|
||||
if (lookup[static_cast<unsigned char>(self.data()[i])]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return StringPiece::npos;
|
||||
}
|
||||
|
||||
// 16-bit brute force version.
|
||||
size_t find_first_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos) {
|
||||
// Use the faster std::find() if searching for a single character.
|
||||
StringPiece16::const_iterator found =
|
||||
s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0])
|
||||
: std::find_first_of(self.begin() + pos, self.end(),
|
||||
s.begin(), s.end());
|
||||
if (found == self.end())
|
||||
return StringPiece16::npos;
|
||||
return found - self.begin();
|
||||
}
|
||||
|
||||
// 8-bit version using lookup table.
|
||||
size_t find_first_not_of(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return StringPiece::npos;
|
||||
|
||||
if (s.size() == 0)
|
||||
return 0;
|
||||
|
||||
// Avoid the cost of BuildLookupTable() for a single-character search.
|
||||
if (s.size() == 1)
|
||||
return find_first_not_of(self, s.data()[0], pos);
|
||||
|
||||
bool lookup[UCHAR_MAX + 1] = { false };
|
||||
BuildLookupTable(s, lookup);
|
||||
for (size_t i = pos; i < self.size(); ++i) {
|
||||
if (!lookup[static_cast<unsigned char>(self.data()[i])]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return StringPiece::npos;
|
||||
}
|
||||
|
||||
// 16-bit brute-force version.
|
||||
BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return StringPiece16::npos;
|
||||
|
||||
for (size_t self_i = pos; self_i < self.size(); ++self_i) {
|
||||
bool found = false;
|
||||
for (auto c : s) {
|
||||
if (self[self_i] == c) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
return self_i;
|
||||
}
|
||||
return StringPiece16::npos;
|
||||
}
|
||||
|
||||
template<typename STR>
|
||||
size_t find_first_not_ofT(const BasicStringPiece<STR>& self,
|
||||
typename STR::value_type c,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return BasicStringPiece<STR>::npos;
|
||||
|
||||
for (; pos < self.size(); ++pos) {
|
||||
if (self.data()[pos] != c) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
return BasicStringPiece<STR>::npos;
|
||||
}
|
||||
|
||||
size_t find_first_not_of(const StringPiece& self,
|
||||
char c,
|
||||
size_t pos) {
|
||||
return find_first_not_ofT(self, c, pos);
|
||||
}
|
||||
|
||||
size_t find_first_not_of(const StringPiece16& self,
|
||||
char16 c,
|
||||
size_t pos) {
|
||||
return find_first_not_ofT(self, c, pos);
|
||||
}
|
||||
|
||||
// 8-bit version using lookup table.
|
||||
size_t find_last_of(const StringPiece& self, const StringPiece& s, size_t pos) {
|
||||
if (self.size() == 0 || s.size() == 0)
|
||||
return StringPiece::npos;
|
||||
|
||||
// Avoid the cost of BuildLookupTable() for a single-character search.
|
||||
if (s.size() == 1)
|
||||
return rfind(self, s.data()[0], pos);
|
||||
|
||||
bool lookup[UCHAR_MAX + 1] = { false };
|
||||
BuildLookupTable(s, lookup);
|
||||
for (size_t i = std::min(pos, self.size() - 1); ; --i) {
|
||||
if (lookup[static_cast<unsigned char>(self.data()[i])])
|
||||
return i;
|
||||
if (i == 0)
|
||||
break;
|
||||
}
|
||||
return StringPiece::npos;
|
||||
}
|
||||
|
||||
// 16-bit brute-force version.
|
||||
size_t find_last_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return StringPiece16::npos;
|
||||
|
||||
for (size_t self_i = std::min(pos, self.size() - 1); ;
|
||||
--self_i) {
|
||||
for (auto c : s) {
|
||||
if (self.data()[self_i] == c)
|
||||
return self_i;
|
||||
}
|
||||
if (self_i == 0)
|
||||
break;
|
||||
}
|
||||
return StringPiece16::npos;
|
||||
}
|
||||
|
||||
// 8-bit version using lookup table.
|
||||
size_t find_last_not_of(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return StringPiece::npos;
|
||||
|
||||
size_t i = std::min(pos, self.size() - 1);
|
||||
if (s.size() == 0)
|
||||
return i;
|
||||
|
||||
// Avoid the cost of BuildLookupTable() for a single-character search.
|
||||
if (s.size() == 1)
|
||||
return find_last_not_of(self, s.data()[0], pos);
|
||||
|
||||
bool lookup[UCHAR_MAX + 1] = { false };
|
||||
BuildLookupTable(s, lookup);
|
||||
for (; ; --i) {
|
||||
if (!lookup[static_cast<unsigned char>(self.data()[i])])
|
||||
return i;
|
||||
if (i == 0)
|
||||
break;
|
||||
}
|
||||
return StringPiece::npos;
|
||||
}
|
||||
|
||||
// 16-bit brute-force version.
|
||||
size_t find_last_not_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return StringPiece::npos;
|
||||
|
||||
for (size_t self_i = std::min(pos, self.size() - 1); ; --self_i) {
|
||||
bool found = false;
|
||||
for (auto c : s) {
|
||||
if (self.data()[self_i] == c) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
return self_i;
|
||||
if (self_i == 0)
|
||||
break;
|
||||
}
|
||||
return StringPiece16::npos;
|
||||
}
|
||||
|
||||
template<typename STR>
|
||||
size_t find_last_not_ofT(const BasicStringPiece<STR>& self,
|
||||
typename STR::value_type c,
|
||||
size_t pos) {
|
||||
if (self.size() == 0)
|
||||
return BasicStringPiece<STR>::npos;
|
||||
|
||||
for (size_t i = std::min(pos, self.size() - 1); ; --i) {
|
||||
if (self.data()[i] != c)
|
||||
return i;
|
||||
if (i == 0)
|
||||
break;
|
||||
}
|
||||
return BasicStringPiece<STR>::npos;
|
||||
}
|
||||
|
||||
size_t find_last_not_of(const StringPiece& self,
|
||||
char c,
|
||||
size_t pos) {
|
||||
return find_last_not_ofT(self, c, pos);
|
||||
}
|
||||
|
||||
size_t find_last_not_of(const StringPiece16& self,
|
||||
char16 c,
|
||||
size_t pos) {
|
||||
return find_last_not_ofT(self, c, pos);
|
||||
}
|
||||
|
||||
template<typename STR>
|
||||
BasicStringPiece<STR> substrT(const BasicStringPiece<STR>& self,
|
||||
size_t pos,
|
||||
size_t n) {
|
||||
if (pos > self.size()) pos = self.size();
|
||||
if (n > self.size() - pos) n = self.size() - pos;
|
||||
return BasicStringPiece<STR>(self.data() + pos, n);
|
||||
}
|
||||
|
||||
StringPiece substr(const StringPiece& self,
|
||||
size_t pos,
|
||||
size_t n) {
|
||||
return substrT(self, pos, n);
|
||||
}
|
||||
|
||||
StringPiece16 substr(const StringPiece16& self,
|
||||
size_t pos,
|
||||
size_t n) {
|
||||
return substrT(self, pos, n);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace base
|
||||
519
TMessagesProj/jni/voip/webrtc/base/strings/string_piece.h
Normal file
519
TMessagesProj/jni/voip/webrtc/base/strings/string_piece.h
Normal file
|
|
@ -0,0 +1,519 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
// Copied from strings/stringpiece.h with modifications
|
||||
//
|
||||
// A string-like object that points to a sized piece of memory.
|
||||
//
|
||||
// You can use StringPiece as a function or method parameter. A StringPiece
|
||||
// parameter can receive a double-quoted string literal argument, a "const
|
||||
// char*" argument, a string argument, or a StringPiece argument with no data
|
||||
// copying. Systematic use of StringPiece for arguments reduces data
|
||||
// copies and strlen() calls.
|
||||
//
|
||||
// Prefer passing StringPieces by value:
|
||||
// void MyFunction(StringPiece arg);
|
||||
// If circumstances require, you may also pass by const reference:
|
||||
// void MyFunction(const StringPiece& arg); // not preferred
|
||||
// Both of these have the same lifetime semantics. Passing by value
|
||||
// generates slightly smaller code. For more discussion, Googlers can see
|
||||
// the thread go/stringpiecebyvalue on c-users.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_PIECE_H_
|
||||
#define BASE_STRINGS_STRING_PIECE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/char_traits.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece_forward.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// internal --------------------------------------------------------------------
|
||||
|
||||
// Many of the StringPiece functions use different implementations for the
|
||||
// 8-bit and 16-bit versions, and we don't want lots of template expansions in
|
||||
// this (very common) header that will slow down compilation.
|
||||
//
|
||||
// So here we define overloaded functions called by the StringPiece template.
|
||||
// For those that share an implementation, the two versions will expand to a
|
||||
// template internal to the .cc file.
|
||||
namespace internal {
|
||||
|
||||
BASE_EXPORT size_t copy(const StringPiece& self,
|
||||
char* buf,
|
||||
size_t n,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t copy(const StringPiece16& self,
|
||||
char16* buf,
|
||||
size_t n,
|
||||
size_t pos);
|
||||
|
||||
BASE_EXPORT size_t find(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find(const StringPiece& self,
|
||||
char c,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find(const StringPiece16& self,
|
||||
char16 c,
|
||||
size_t pos);
|
||||
|
||||
BASE_EXPORT size_t rfind(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t rfind(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t rfind(const StringPiece& self,
|
||||
char c,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t rfind(const StringPiece16& self,
|
||||
char16 c,
|
||||
size_t pos);
|
||||
|
||||
BASE_EXPORT size_t find_first_of(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_first_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos);
|
||||
|
||||
BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_first_not_of(const StringPiece& self,
|
||||
char c,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_first_not_of(const StringPiece16& self,
|
||||
char16 c,
|
||||
size_t pos);
|
||||
|
||||
BASE_EXPORT size_t find_last_of(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_last_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_last_of(const StringPiece& self,
|
||||
char c,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_last_of(const StringPiece16& self,
|
||||
char16 c,
|
||||
size_t pos);
|
||||
|
||||
BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
|
||||
const StringPiece& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
|
||||
const StringPiece16& s,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_last_not_of(const StringPiece16& self,
|
||||
char16 c,
|
||||
size_t pos);
|
||||
BASE_EXPORT size_t find_last_not_of(const StringPiece& self,
|
||||
char c,
|
||||
size_t pos);
|
||||
|
||||
BASE_EXPORT StringPiece substr(const StringPiece& self,
|
||||
size_t pos,
|
||||
size_t n);
|
||||
BASE_EXPORT StringPiece16 substr(const StringPiece16& self,
|
||||
size_t pos,
|
||||
size_t n);
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// BasicStringPiece ------------------------------------------------------------
|
||||
|
||||
// Defines the types, methods, operators, and data members common to both
|
||||
// StringPiece and StringPiece16.
|
||||
//
|
||||
// This is templatized by string class type rather than character type, so
|
||||
// BasicStringPiece<std::string> or BasicStringPiece<base::string16>.
|
||||
template <typename STRING_TYPE> class BasicStringPiece {
|
||||
public:
|
||||
// Standard STL container boilerplate.
|
||||
typedef size_t size_type;
|
||||
typedef typename STRING_TYPE::value_type value_type;
|
||||
typedef const value_type* pointer;
|
||||
typedef const value_type& reference;
|
||||
typedef const value_type& const_reference;
|
||||
typedef ptrdiff_t difference_type;
|
||||
typedef const value_type* const_iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
|
||||
static const size_type npos;
|
||||
|
||||
public:
|
||||
// We provide non-explicit singleton constructors so users can pass
|
||||
// in a "const char*" or a "string" wherever a "StringPiece" is
|
||||
// expected (likewise for char16, string16, StringPiece16).
|
||||
constexpr BasicStringPiece() : ptr_(NULL), length_(0) {}
|
||||
// TODO(crbug.com/1049498): Construction from nullptr is not allowed for
|
||||
// std::basic_string_view, so remove the special handling for it.
|
||||
// Note: This doesn't just use STRING_TYPE::traits_type::length(), since that
|
||||
// isn't constexpr until C++17.
|
||||
constexpr BasicStringPiece(const value_type* str)
|
||||
: ptr_(str), length_(!str ? 0 : CharTraits<value_type>::length(str)) {}
|
||||
// Explicitly disallow construction from nullptr. Note that this does not
|
||||
// catch construction from runtime strings that might be null.
|
||||
// Note: The following is just a more elaborate way of spelling
|
||||
// `BasicStringPiece(nullptr_t) = delete`, but unfortunately the terse form is
|
||||
// not supported by the PNaCl toolchain.
|
||||
// TODO(crbug.com/1049498): Remove once we CHECK(str) in the constructor
|
||||
// above.
|
||||
template <class T, class = std::enable_if_t<std::is_null_pointer<T>::value>>
|
||||
BasicStringPiece(T) {
|
||||
static_assert(sizeof(T) == 0, // Always false.
|
||||
"StringPiece does not support construction from nullptr, use "
|
||||
"the default constructor instead.");
|
||||
}
|
||||
BasicStringPiece(const STRING_TYPE& str)
|
||||
: ptr_(str.data()), length_(str.size()) {}
|
||||
constexpr BasicStringPiece(const value_type* offset, size_type len)
|
||||
: ptr_(offset), length_(len) {}
|
||||
BasicStringPiece(const typename STRING_TYPE::const_iterator& begin,
|
||||
const typename STRING_TYPE::const_iterator& end) {
|
||||
DCHECK(begin <= end) << "StringPiece iterators swapped or invalid.";
|
||||
length_ = static_cast<size_t>(std::distance(begin, end));
|
||||
|
||||
// The length test before assignment is to avoid dereferencing an iterator
|
||||
// that may point to the end() of a string.
|
||||
ptr_ = length_ > 0 ? &*begin : nullptr;
|
||||
}
|
||||
|
||||
// data() may return a pointer to a buffer with embedded NULs, and the
|
||||
// returned buffer may or may not be null terminated. Therefore it is
|
||||
// typically a mistake to pass data() to a routine that expects a NUL
|
||||
// terminated string.
|
||||
constexpr const value_type* data() const { return ptr_; }
|
||||
constexpr size_type size() const noexcept { return length_; }
|
||||
constexpr size_type length() const noexcept { return length_; }
|
||||
bool empty() const { return length_ == 0; }
|
||||
|
||||
constexpr value_type operator[](size_type i) const {
|
||||
CHECK(i < length_);
|
||||
return ptr_[i];
|
||||
}
|
||||
|
||||
value_type front() const {
|
||||
CHECK_NE(0UL, length_);
|
||||
return ptr_[0];
|
||||
}
|
||||
|
||||
value_type back() const {
|
||||
CHECK_NE(0UL, length_);
|
||||
return ptr_[length_ - 1];
|
||||
}
|
||||
|
||||
constexpr void remove_prefix(size_type n) {
|
||||
CHECK(n <= length_);
|
||||
ptr_ += n;
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
constexpr void remove_suffix(size_type n) {
|
||||
CHECK(n <= length_);
|
||||
length_ -= n;
|
||||
}
|
||||
|
||||
constexpr int compare(BasicStringPiece x) const noexcept {
|
||||
int r = CharTraits<value_type>::compare(
|
||||
ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_));
|
||||
if (r == 0) {
|
||||
if (length_ < x.length_) r = -1;
|
||||
else if (length_ > x.length_) r = +1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// This is the style of conversion preferred by std::string_view in C++17.
|
||||
explicit operator STRING_TYPE() const { return as_string(); }
|
||||
|
||||
STRING_TYPE as_string() const {
|
||||
// std::string doesn't like to take a NULL pointer even with a 0 size.
|
||||
return empty() ? STRING_TYPE() : STRING_TYPE(data(), size());
|
||||
}
|
||||
|
||||
const_iterator begin() const { return ptr_; }
|
||||
const_iterator end() const { return ptr_ + length_; }
|
||||
const_reverse_iterator rbegin() const {
|
||||
return const_reverse_iterator(ptr_ + length_);
|
||||
}
|
||||
const_reverse_iterator rend() const {
|
||||
return const_reverse_iterator(ptr_);
|
||||
}
|
||||
|
||||
size_type max_size() const { return length_; }
|
||||
size_type capacity() const { return length_; }
|
||||
|
||||
size_type copy(value_type* buf, size_type n, size_type pos = 0) const {
|
||||
return internal::copy(*this, buf, n, pos);
|
||||
}
|
||||
|
||||
// Does "this" start with "x"
|
||||
constexpr bool starts_with(BasicStringPiece x) const noexcept {
|
||||
return (
|
||||
(this->length_ >= x.length_) &&
|
||||
(CharTraits<value_type>::compare(this->ptr_, x.ptr_, x.length_) == 0));
|
||||
}
|
||||
|
||||
// Does "this" end with "x"
|
||||
constexpr bool ends_with(BasicStringPiece x) const noexcept {
|
||||
return ((this->length_ >= x.length_) &&
|
||||
(CharTraits<value_type>::compare(
|
||||
this->ptr_ + (this->length_ - x.length_), x.ptr_, x.length_) ==
|
||||
0));
|
||||
}
|
||||
|
||||
// find: Search for a character or substring at a given offset.
|
||||
size_type find(const BasicStringPiece<STRING_TYPE>& s,
|
||||
size_type pos = 0) const {
|
||||
return internal::find(*this, s, pos);
|
||||
}
|
||||
size_type find(value_type c, size_type pos = 0) const {
|
||||
return internal::find(*this, c, pos);
|
||||
}
|
||||
|
||||
// rfind: Reverse find.
|
||||
size_type rfind(const BasicStringPiece& s,
|
||||
size_type pos = BasicStringPiece::npos) const {
|
||||
return internal::rfind(*this, s, pos);
|
||||
}
|
||||
size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const {
|
||||
return internal::rfind(*this, c, pos);
|
||||
}
|
||||
|
||||
// find_first_of: Find the first occurence of one of a set of characters.
|
||||
size_type find_first_of(const BasicStringPiece& s,
|
||||
size_type pos = 0) const {
|
||||
return internal::find_first_of(*this, s, pos);
|
||||
}
|
||||
size_type find_first_of(value_type c, size_type pos = 0) const {
|
||||
return find(c, pos);
|
||||
}
|
||||
|
||||
// find_first_not_of: Find the first occurence not of a set of characters.
|
||||
size_type find_first_not_of(const BasicStringPiece& s,
|
||||
size_type pos = 0) const {
|
||||
return internal::find_first_not_of(*this, s, pos);
|
||||
}
|
||||
size_type find_first_not_of(value_type c, size_type pos = 0) const {
|
||||
return internal::find_first_not_of(*this, c, pos);
|
||||
}
|
||||
|
||||
// find_last_of: Find the last occurence of one of a set of characters.
|
||||
size_type find_last_of(const BasicStringPiece& s,
|
||||
size_type pos = BasicStringPiece::npos) const {
|
||||
return internal::find_last_of(*this, s, pos);
|
||||
}
|
||||
size_type find_last_of(value_type c,
|
||||
size_type pos = BasicStringPiece::npos) const {
|
||||
return rfind(c, pos);
|
||||
}
|
||||
|
||||
// find_last_not_of: Find the last occurence not of a set of characters.
|
||||
size_type find_last_not_of(const BasicStringPiece& s,
|
||||
size_type pos = BasicStringPiece::npos) const {
|
||||
return internal::find_last_not_of(*this, s, pos);
|
||||
}
|
||||
size_type find_last_not_of(value_type c,
|
||||
size_type pos = BasicStringPiece::npos) const {
|
||||
return internal::find_last_not_of(*this, c, pos);
|
||||
}
|
||||
|
||||
// substr.
|
||||
BasicStringPiece substr(size_type pos,
|
||||
size_type n = BasicStringPiece::npos) const {
|
||||
return internal::substr(*this, pos, n);
|
||||
}
|
||||
|
||||
protected:
|
||||
const value_type* ptr_;
|
||||
size_type length_;
|
||||
};
|
||||
|
||||
template <typename STRING_TYPE>
|
||||
const typename BasicStringPiece<STRING_TYPE>::size_type
|
||||
BasicStringPiece<STRING_TYPE>::npos =
|
||||
typename BasicStringPiece<STRING_TYPE>::size_type(-1);
|
||||
|
||||
// MSVC doesn't like complex extern templates and DLLs.
|
||||
#if !defined(COMPILER_MSVC)
|
||||
extern template class BASE_EXPORT BasicStringPiece<std::string>;
|
||||
extern template class BASE_EXPORT BasicStringPiece<string16>;
|
||||
#endif
|
||||
|
||||
// Comparison operators --------------------------------------------------------
|
||||
// operator ==
|
||||
template <typename StringT>
|
||||
constexpr bool operator==(BasicStringPiece<StringT> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
|
||||
}
|
||||
|
||||
// Here and below we make use of std::common_type_t to emulate an identity type
|
||||
// transformation. This creates a non-deduced context, so that we can compare
|
||||
// StringPieces with types that implicitly convert to StringPieces. See
|
||||
// https://wg21.link/n3766 for details.
|
||||
// Furthermore, we require dummy template parameters for these overloads to work
|
||||
// around a name mangling issue on Windows.
|
||||
template <typename StringT, int = 1>
|
||||
constexpr bool operator==(
|
||||
BasicStringPiece<StringT> lhs,
|
||||
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
|
||||
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
|
||||
}
|
||||
|
||||
template <typename StringT, int = 2>
|
||||
constexpr bool operator==(std::common_type_t<BasicStringPiece<StringT>> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return lhs.size() == rhs.size() && lhs.compare(rhs) == 0;
|
||||
}
|
||||
|
||||
// operator !=
|
||||
template <typename StringT>
|
||||
constexpr bool operator!=(BasicStringPiece<StringT> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
template <typename StringT, int = 1>
|
||||
constexpr bool operator!=(
|
||||
BasicStringPiece<StringT> lhs,
|
||||
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
template <typename StringT, int = 2>
|
||||
constexpr bool operator!=(std::common_type_t<BasicStringPiece<StringT>> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
// operator <
|
||||
template <typename StringT>
|
||||
constexpr bool operator<(BasicStringPiece<StringT> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return lhs.compare(rhs) < 0;
|
||||
}
|
||||
|
||||
template <typename StringT, int = 1>
|
||||
constexpr bool operator<(
|
||||
BasicStringPiece<StringT> lhs,
|
||||
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
|
||||
return lhs.compare(rhs) < 0;
|
||||
}
|
||||
|
||||
template <typename StringT, int = 2>
|
||||
constexpr bool operator<(std::common_type_t<BasicStringPiece<StringT>> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return lhs.compare(rhs) < 0;
|
||||
}
|
||||
|
||||
// operator >
|
||||
template <typename StringT>
|
||||
constexpr bool operator>(BasicStringPiece<StringT> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return rhs < lhs;
|
||||
}
|
||||
|
||||
template <typename StringT, int = 1>
|
||||
constexpr bool operator>(
|
||||
BasicStringPiece<StringT> lhs,
|
||||
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
|
||||
return rhs < lhs;
|
||||
}
|
||||
|
||||
template <typename StringT, int = 2>
|
||||
constexpr bool operator>(std::common_type_t<BasicStringPiece<StringT>> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return rhs < lhs;
|
||||
}
|
||||
|
||||
// operator <=
|
||||
template <typename StringT>
|
||||
constexpr bool operator<=(BasicStringPiece<StringT> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return !(rhs < lhs);
|
||||
}
|
||||
|
||||
template <typename StringT, int = 1>
|
||||
constexpr bool operator<=(
|
||||
BasicStringPiece<StringT> lhs,
|
||||
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
|
||||
return !(rhs < lhs);
|
||||
}
|
||||
|
||||
template <typename StringT, int = 2>
|
||||
constexpr bool operator<=(std::common_type_t<BasicStringPiece<StringT>> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return !(rhs < lhs);
|
||||
}
|
||||
|
||||
// operator >=
|
||||
template <typename StringT>
|
||||
constexpr bool operator>=(BasicStringPiece<StringT> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return !(lhs < rhs);
|
||||
}
|
||||
|
||||
template <typename StringT, int = 1>
|
||||
constexpr bool operator>=(
|
||||
BasicStringPiece<StringT> lhs,
|
||||
std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept {
|
||||
return !(lhs < rhs);
|
||||
}
|
||||
|
||||
template <typename StringT, int = 2>
|
||||
constexpr bool operator>=(std::common_type_t<BasicStringPiece<StringT>> lhs,
|
||||
BasicStringPiece<StringT> rhs) noexcept {
|
||||
return !(lhs < rhs);
|
||||
}
|
||||
|
||||
BASE_EXPORT std::ostream& operator<<(std::ostream& o,
|
||||
const StringPiece& piece);
|
||||
|
||||
BASE_EXPORT std::ostream& operator<<(std::ostream& o,
|
||||
const StringPiece16& piece);
|
||||
|
||||
// Hashing ---------------------------------------------------------------------
|
||||
|
||||
// We provide appropriate hash functions so StringPiece and StringPiece16 can
|
||||
// be used as keys in hash sets and maps.
|
||||
|
||||
// This hash function is copied from base/strings/string16.h. We don't use the
|
||||
// ones already defined for string and string16 directly because it would
|
||||
// require the string constructors to be called, which we don't want.
|
||||
|
||||
template <typename StringPieceType>
|
||||
struct StringPieceHashImpl {
|
||||
std::size_t operator()(StringPieceType sp) const {
|
||||
std::size_t result = 0;
|
||||
for (auto c : sp)
|
||||
result = (result * 131) + c;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
using StringPieceHash = StringPieceHashImpl<StringPiece>;
|
||||
using StringPiece16Hash = StringPieceHashImpl<StringPiece16>;
|
||||
using WStringPieceHash = StringPieceHashImpl<WStringPiece>;
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRING_PIECE_H_
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright 2017 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Forward declaration of StringPiece types from base/strings/string_piece.h
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_PIECE_FORWARD_H_
|
||||
#define BASE_STRINGS_STRING_PIECE_FORWARD_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
template <typename STRING_TYPE>
|
||||
class BasicStringPiece;
|
||||
typedef BasicStringPiece<std::string> StringPiece;
|
||||
typedef BasicStringPiece<string16> StringPiece16;
|
||||
typedef BasicStringPiece<std::wstring> WStringPiece;
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRING_PIECE_FORWARD_H_
|
||||
259
TMessagesProj/jni/voip/webrtc/base/strings/string_split.cc
Normal file
259
TMessagesProj/jni/voip/webrtc/base/strings/string_split.cc
Normal file
|
|
@ -0,0 +1,259 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/string_split.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/third_party/icu/icu_utf.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
// Returns either the ASCII or UTF-16 whitespace.
|
||||
template<typename Str> BasicStringPiece<Str> WhitespaceForType();
|
||||
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
|
||||
template <>
|
||||
WStringPiece WhitespaceForType<std::wstring>() {
|
||||
return kWhitespaceWide;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> StringPiece16 WhitespaceForType<string16>() {
|
||||
return kWhitespaceUTF16;
|
||||
}
|
||||
template<> StringPiece WhitespaceForType<std::string>() {
|
||||
return kWhitespaceASCII;
|
||||
}
|
||||
|
||||
// General string splitter template. Can take 8- or 16-bit input, can produce
|
||||
// the corresponding string or StringPiece output.
|
||||
template <typename OutputStringType, typename Str>
|
||||
static std::vector<OutputStringType> SplitStringT(
|
||||
BasicStringPiece<Str> str,
|
||||
BasicStringPiece<Str> delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
std::vector<OutputStringType> result;
|
||||
if (str.empty())
|
||||
return result;
|
||||
|
||||
size_t start = 0;
|
||||
while (start != Str::npos) {
|
||||
size_t end = str.find_first_of(delimiter, start);
|
||||
|
||||
BasicStringPiece<Str> piece;
|
||||
if (end == Str::npos) {
|
||||
piece = str.substr(start);
|
||||
start = Str::npos;
|
||||
} else {
|
||||
piece = str.substr(start, end - start);
|
||||
start = end + 1;
|
||||
}
|
||||
|
||||
if (whitespace == TRIM_WHITESPACE)
|
||||
piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
|
||||
|
||||
if (result_type == SPLIT_WANT_ALL || !piece.empty())
|
||||
result.emplace_back(piece);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool AppendStringKeyValue(StringPiece input,
|
||||
char delimiter,
|
||||
StringPairs* result) {
|
||||
// Always append a new item regardless of success (it might be empty). The
|
||||
// below code will copy the strings directly into the result pair.
|
||||
result->resize(result->size() + 1);
|
||||
auto& result_pair = result->back();
|
||||
|
||||
// Find the delimiter.
|
||||
size_t end_key_pos = input.find_first_of(delimiter);
|
||||
if (end_key_pos == std::string::npos) {
|
||||
DVLOG(1) << "cannot find delimiter in: " << input;
|
||||
return false; // No delimiter.
|
||||
}
|
||||
result_pair.first = std::string(input.substr(0, end_key_pos));
|
||||
|
||||
// Find the value string.
|
||||
StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos);
|
||||
size_t begin_value_pos = remains.find_first_not_of(delimiter);
|
||||
if (begin_value_pos == StringPiece::npos) {
|
||||
DVLOG(1) << "cannot parse value from input: " << input;
|
||||
return false; // No value.
|
||||
}
|
||||
|
||||
result_pair.second = std::string(
|
||||
remains.substr(begin_value_pos, remains.size() - begin_value_pos));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename OutputStringType, typename Str>
|
||||
std::vector<OutputStringType> SplitStringUsingSubstrT(
|
||||
BasicStringPiece<Str> input,
|
||||
BasicStringPiece<Str> delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
using Piece = BasicStringPiece<Str>;
|
||||
using size_type = typename Piece::size_type;
|
||||
|
||||
std::vector<OutputStringType> result;
|
||||
if (delimiter.size() == 0) {
|
||||
result.emplace_back(input);
|
||||
return result;
|
||||
}
|
||||
|
||||
for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
|
||||
begin_index = end_index + delimiter.size()) {
|
||||
end_index = input.find(delimiter, begin_index);
|
||||
Piece term = end_index == Piece::npos
|
||||
? input.substr(begin_index)
|
||||
: input.substr(begin_index, end_index - begin_index);
|
||||
|
||||
if (whitespace == TRIM_WHITESPACE)
|
||||
term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
|
||||
|
||||
if (result_type == SPLIT_WANT_ALL || !term.empty())
|
||||
result.emplace_back(term);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<std::string> SplitString(StringPiece input,
|
||||
StringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringT<std::string>(input, separators, whitespace, result_type);
|
||||
}
|
||||
|
||||
std::vector<string16> SplitString(StringPiece16 input,
|
||||
StringPiece16 separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringT<string16>(input, separators, whitespace, result_type);
|
||||
}
|
||||
|
||||
std::vector<StringPiece> SplitStringPiece(StringPiece input,
|
||||
StringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringT<StringPiece>(input, separators, whitespace, result_type);
|
||||
}
|
||||
|
||||
std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
|
||||
StringPiece16 separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringT<StringPiece16>(input, separators, whitespace,
|
||||
result_type);
|
||||
}
|
||||
|
||||
bool SplitStringIntoKeyValuePairs(StringPiece input,
|
||||
char key_value_delimiter,
|
||||
char key_value_pair_delimiter,
|
||||
StringPairs* key_value_pairs) {
|
||||
return SplitStringIntoKeyValuePairsUsingSubstr(
|
||||
input, key_value_delimiter, StringPiece(&key_value_pair_delimiter, 1),
|
||||
key_value_pairs);
|
||||
}
|
||||
|
||||
bool SplitStringIntoKeyValuePairsUsingSubstr(
|
||||
StringPiece input,
|
||||
char key_value_delimiter,
|
||||
StringPiece key_value_pair_delimiter,
|
||||
StringPairs* key_value_pairs) {
|
||||
key_value_pairs->clear();
|
||||
|
||||
std::vector<StringPiece> pairs = SplitStringPieceUsingSubstr(
|
||||
input, key_value_pair_delimiter, TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
|
||||
key_value_pairs->reserve(pairs.size());
|
||||
|
||||
bool success = true;
|
||||
for (const StringPiece& pair : pairs) {
|
||||
if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) {
|
||||
// Don't return here, to allow for pairs without associated
|
||||
// value or key; just record that the split failed.
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
std::vector<string16> SplitStringUsingSubstr(StringPiece16 input,
|
||||
StringPiece16 delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace,
|
||||
result_type);
|
||||
}
|
||||
|
||||
std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
|
||||
StringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace,
|
||||
result_type);
|
||||
}
|
||||
|
||||
std::vector<StringPiece16> SplitStringPieceUsingSubstr(
|
||||
StringPiece16 input,
|
||||
StringPiece16 delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
std::vector<StringPiece16> result;
|
||||
return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace,
|
||||
result_type);
|
||||
}
|
||||
|
||||
std::vector<StringPiece> SplitStringPieceUsingSubstr(
|
||||
StringPiece input,
|
||||
StringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace,
|
||||
result_type);
|
||||
}
|
||||
|
||||
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
|
||||
std::vector<std::wstring> SplitString(WStringPiece input,
|
||||
WStringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringT<std::wstring>(input, separators, whitespace, result_type);
|
||||
}
|
||||
|
||||
std::vector<WStringPiece> SplitStringPiece(WStringPiece input,
|
||||
WStringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringT<WStringPiece>(input, separators, whitespace, result_type);
|
||||
}
|
||||
|
||||
std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input,
|
||||
WStringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace,
|
||||
result_type);
|
||||
}
|
||||
|
||||
std::vector<WStringPiece> SplitStringPieceUsingSubstr(
|
||||
WStringPiece input,
|
||||
WStringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) {
|
||||
return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace,
|
||||
result_type);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace base
|
||||
169
TMessagesProj/jni/voip/webrtc/base/strings/string_split.h
Normal file
169
TMessagesProj/jni/voip/webrtc/base/strings/string_split.h
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_SPLIT_H_
|
||||
#define BASE_STRINGS_STRING_SPLIT_H_
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
enum WhitespaceHandling {
|
||||
KEEP_WHITESPACE,
|
||||
TRIM_WHITESPACE,
|
||||
};
|
||||
|
||||
enum SplitResult {
|
||||
// Strictly return all results.
|
||||
//
|
||||
// If the input is ",," and the separator is ',' this will return a
|
||||
// vector of three empty strings.
|
||||
SPLIT_WANT_ALL,
|
||||
|
||||
// Only nonempty results will be added to the results. Multiple separators
|
||||
// will be coalesced. Separators at the beginning and end of the input will
|
||||
// be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped.
|
||||
//
|
||||
// If the input is ",," and the separator is ',', this will return an empty
|
||||
// vector.
|
||||
SPLIT_WANT_NONEMPTY,
|
||||
};
|
||||
|
||||
// Split the given string on ANY of the given separators, returning copies of
|
||||
// the result.
|
||||
//
|
||||
// Note this is inverse of JoinString() defined in string_util.h.
|
||||
//
|
||||
// To split on either commas or semicolons, keeping all whitespace:
|
||||
//
|
||||
// std::vector<std::string> tokens = base::SplitString(
|
||||
// input, ", WARN_UNUSED_RESULT;", base::KEEP_WHITESPACE,
|
||||
// base::SPLIT_WANT_ALL) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::vector<std::string> SplitString(StringPiece input,
|
||||
StringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input,
|
||||
StringPiece16 separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type)
|
||||
WARN_UNUSED_RESULT;
|
||||
|
||||
// Like SplitString above except it returns a vector of StringPieces which
|
||||
// reference the original buffer without copying. Although you have to be
|
||||
// careful to keep the original string unmodified, this provides an efficient
|
||||
// way to iterate through tokens in a string.
|
||||
//
|
||||
// Note this is inverse of JoinString() defined in string_util.h.
|
||||
//
|
||||
// To iterate through all whitespace-separated tokens in an input string:
|
||||
//
|
||||
// for (const auto& cur :
|
||||
// base::SplitStringPiece(input, base::kWhitespaceASCII,
|
||||
// base::KEEP_WHITESPACE,
|
||||
// base::SPLIT_WANT_NONEMPTY)) {
|
||||
// ...
|
||||
BASE_EXPORT std::vector<StringPiece> SplitStringPiece(
|
||||
StringPiece input,
|
||||
StringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::vector<StringPiece16> SplitStringPiece(
|
||||
StringPiece16 input,
|
||||
StringPiece16 separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
|
||||
using StringPairs = std::vector<std::pair<std::string, std::string>>;
|
||||
|
||||
// Splits |line| into key value pairs according to the given delimiters and
|
||||
// removes whitespace leading each key and trailing each value. Returns true
|
||||
// only if each pair has a non-empty key and value. |key_value_pairs| will
|
||||
// include ("","") pairs for entries without |key_value_delimiter|.
|
||||
BASE_EXPORT bool SplitStringIntoKeyValuePairs(StringPiece input,
|
||||
char key_value_delimiter,
|
||||
char key_value_pair_delimiter,
|
||||
StringPairs* key_value_pairs);
|
||||
|
||||
// Similar to SplitStringIntoKeyValuePairs, but use a substring
|
||||
// |key_value_pair_delimiter| instead of a single char.
|
||||
BASE_EXPORT bool SplitStringIntoKeyValuePairsUsingSubstr(
|
||||
StringPiece input,
|
||||
char key_value_delimiter,
|
||||
StringPiece key_value_pair_delimiter,
|
||||
StringPairs* key_value_pairs);
|
||||
|
||||
// Similar to SplitString, but use a substring delimiter instead of a list of
|
||||
// characters that are all possible delimiters.
|
||||
BASE_EXPORT std::vector<string16> SplitStringUsingSubstr(
|
||||
StringPiece16 input,
|
||||
StringPiece16 delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::vector<std::string> SplitStringUsingSubstr(
|
||||
StringPiece input,
|
||||
StringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
|
||||
// Like SplitStringUsingSubstr above except it returns a vector of StringPieces
|
||||
// which reference the original buffer without copying. Although you have to be
|
||||
// careful to keep the original string unmodified, this provides an efficient
|
||||
// way to iterate through tokens in a string.
|
||||
//
|
||||
// To iterate through all newline-separated tokens in an input string:
|
||||
//
|
||||
// for (const auto& cur :
|
||||
// base::SplitStringUsingSubstr(input, "\r\n",
|
||||
// base::KEEP_WHITESPACE,
|
||||
// base::SPLIT_WANT_NONEMPTY)) {
|
||||
// ...
|
||||
BASE_EXPORT std::vector<StringPiece16> SplitStringPieceUsingSubstr(
|
||||
StringPiece16 input,
|
||||
StringPiece16 delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr(
|
||||
StringPiece input,
|
||||
StringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
|
||||
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
|
||||
BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input,
|
||||
WStringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type)
|
||||
WARN_UNUSED_RESULT;
|
||||
|
||||
BASE_EXPORT std::vector<WStringPiece> SplitStringPiece(
|
||||
WStringPiece input,
|
||||
WStringPiece separators,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
|
||||
BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr(
|
||||
WStringPiece input,
|
||||
WStringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
|
||||
BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr(
|
||||
WStringPiece input,
|
||||
WStringPiece delimiter,
|
||||
WhitespaceHandling whitespace,
|
||||
SplitResult result_type) WARN_UNUSED_RESULT;
|
||||
#endif
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRING_SPLIT_H_
|
||||
303
TMessagesProj/jni/voip/webrtc/base/strings/string_tokenizer.h
Normal file
303
TMessagesProj/jni/voip/webrtc/base/strings/string_tokenizer.h
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_TOKENIZER_H_
|
||||
#define BASE_STRINGS_STRING_TOKENIZER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// StringTokenizerT is a simple string tokenizer class. It works like an
|
||||
// iterator that with each step (see the Advance method) updates members that
|
||||
// refer to the next token in the input string. The user may optionally
|
||||
// configure the tokenizer to return delimiters.
|
||||
//
|
||||
// EXAMPLE 1:
|
||||
//
|
||||
// char input[] = "this is a test";
|
||||
// CStringTokenizer t(input, input + strlen(input), " ");
|
||||
// while (t.GetNext()) {
|
||||
// printf("%s\n", t.token().c_str());
|
||||
// }
|
||||
//
|
||||
// Output:
|
||||
//
|
||||
// this
|
||||
// is
|
||||
// a
|
||||
// test
|
||||
//
|
||||
//
|
||||
// EXAMPLE 2:
|
||||
//
|
||||
// std::string input = "no-cache=\"foo, bar\", private";
|
||||
// StringTokenizer t(input, ", ");
|
||||
// t.set_quote_chars("\"");
|
||||
// while (t.GetNext()) {
|
||||
// printf("%s\n", t.token().c_str());
|
||||
// }
|
||||
//
|
||||
// Output:
|
||||
//
|
||||
// no-cache="foo, bar"
|
||||
// private
|
||||
//
|
||||
//
|
||||
// EXAMPLE 3:
|
||||
//
|
||||
// bool next_is_option = false, next_is_value = false;
|
||||
// std::string input = "text/html; charset=UTF-8; foo=bar";
|
||||
// StringTokenizer t(input, "; =");
|
||||
// t.set_options(StringTokenizer::RETURN_DELIMS);
|
||||
// while (t.GetNext()) {
|
||||
// if (t.token_is_delim()) {
|
||||
// switch (*t.token_begin()) {
|
||||
// case ';':
|
||||
// next_is_option = true;
|
||||
// break;
|
||||
// case '=':
|
||||
// next_is_value = true;
|
||||
// break;
|
||||
// }
|
||||
// } else {
|
||||
// const char* label;
|
||||
// if (next_is_option) {
|
||||
// label = "option-name";
|
||||
// next_is_option = false;
|
||||
// } else if (next_is_value) {
|
||||
// label = "option-value";
|
||||
// next_is_value = false;
|
||||
// } else {
|
||||
// label = "mime-type";
|
||||
// }
|
||||
// printf("%s: %s\n", label, t.token().c_str());
|
||||
// }
|
||||
// }
|
||||
//
|
||||
//
|
||||
template <class str, class const_iterator>
|
||||
class StringTokenizerT {
|
||||
public:
|
||||
typedef typename str::value_type char_type;
|
||||
|
||||
// Options that may be pass to set_options()
|
||||
enum {
|
||||
// Specifies the delimiters should be returned as tokens
|
||||
RETURN_DELIMS = 1 << 0,
|
||||
|
||||
// Specifies that empty tokens should be returned. Treats the beginning and
|
||||
// ending of the string as implicit delimiters, though doesn't return them
|
||||
// as tokens if RETURN_DELIMS is also used.
|
||||
RETURN_EMPTY_TOKENS = 1 << 1,
|
||||
};
|
||||
|
||||
// The string object must live longer than the tokenizer. In particular, this
|
||||
// should not be constructed with a temporary. The deleted rvalue constructor
|
||||
// blocks the most obvious instances of this (e.g. passing a string literal to
|
||||
// the constructor), but caution must still be exercised.
|
||||
StringTokenizerT(const str& string,
|
||||
const str& delims) {
|
||||
Init(string.begin(), string.end(), delims);
|
||||
}
|
||||
|
||||
// Don't allow temporary strings to be used with string tokenizer, since
|
||||
// Init() would otherwise save iterators to a temporary string.
|
||||
StringTokenizerT(str&&, const str& delims) = delete;
|
||||
|
||||
StringTokenizerT(const_iterator string_begin,
|
||||
const_iterator string_end,
|
||||
const str& delims) {
|
||||
Init(string_begin, string_end, delims);
|
||||
}
|
||||
|
||||
// Set the options for this tokenizer. By default, this is 0.
|
||||
void set_options(int options) { options_ = options; }
|
||||
|
||||
// Set the characters to regard as quotes. By default, this is empty. When
|
||||
// a quote char is encountered, the tokenizer will switch into a mode where
|
||||
// it ignores delimiters that it finds. It switches out of this mode once it
|
||||
// finds another instance of the quote char. If a backslash is encountered
|
||||
// within a quoted string, then the next character is skipped.
|
||||
void set_quote_chars(const str& quotes) { quotes_ = quotes; }
|
||||
|
||||
// Call this method to advance the tokenizer to the next delimiter. This
|
||||
// returns false if the tokenizer is complete. This method must be called
|
||||
// before calling any of the token* methods.
|
||||
bool GetNext() {
|
||||
if (quotes_.empty() && options_ == 0)
|
||||
return QuickGetNext();
|
||||
else
|
||||
return FullGetNext();
|
||||
}
|
||||
|
||||
// Start iterating through tokens from the beginning of the string.
|
||||
void Reset() {
|
||||
token_end_ = start_pos_;
|
||||
}
|
||||
|
||||
// Returns true if token is a delimiter. When the tokenizer is constructed
|
||||
// with the RETURN_DELIMS option, this method can be used to check if the
|
||||
// returned token is actually a delimiter. Returns true before the first
|
||||
// time GetNext() has been called, and after GetNext() returns false.
|
||||
bool token_is_delim() const { return token_is_delim_; }
|
||||
|
||||
// If GetNext() returned true, then these methods may be used to read the
|
||||
// value of the token.
|
||||
const_iterator token_begin() const { return token_begin_; }
|
||||
const_iterator token_end() const { return token_end_; }
|
||||
str token() const { return str(token_begin_, token_end_); }
|
||||
BasicStringPiece<str> token_piece() const {
|
||||
return BasicStringPiece<str>(&*token_begin_,
|
||||
std::distance(token_begin_, token_end_));
|
||||
}
|
||||
|
||||
private:
|
||||
void Init(const_iterator string_begin,
|
||||
const_iterator string_end,
|
||||
const str& delims) {
|
||||
start_pos_ = string_begin;
|
||||
token_begin_ = string_begin;
|
||||
token_end_ = string_begin;
|
||||
end_ = string_end;
|
||||
delims_ = delims;
|
||||
options_ = 0;
|
||||
token_is_delim_ = true;
|
||||
}
|
||||
|
||||
// Implementation of GetNext() for when we have no quote characters. We have
|
||||
// two separate implementations because AdvanceOne() is a hot spot in large
|
||||
// text files with large tokens.
|
||||
bool QuickGetNext() {
|
||||
token_is_delim_ = false;
|
||||
for (;;) {
|
||||
token_begin_ = token_end_;
|
||||
if (token_end_ == end_) {
|
||||
token_is_delim_ = true;
|
||||
return false;
|
||||
}
|
||||
++token_end_;
|
||||
if (delims_.find(*token_begin_) == str::npos)
|
||||
break;
|
||||
// else skip over delimiter.
|
||||
}
|
||||
while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
|
||||
++token_end_;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Implementation of GetNext() for when we have to take quotes into account.
|
||||
bool FullGetNext() {
|
||||
AdvanceState state;
|
||||
|
||||
for (;;) {
|
||||
if (token_is_delim_) {
|
||||
// Last token was a delimiter. Note: This is also the case at the start.
|
||||
//
|
||||
// ... D T T T T D ...
|
||||
// ^ ^
|
||||
// | |
|
||||
// | |token_end_| : The next character to look at or |end_|.
|
||||
// |
|
||||
// |token_begin_| : Points to delimiter or |token_end_|.
|
||||
//
|
||||
// The next token is always a non-delimiting token. It could be empty,
|
||||
// however.
|
||||
token_is_delim_ = false;
|
||||
token_begin_ = token_end_;
|
||||
|
||||
// Slurp all non-delimiter characters into the token.
|
||||
while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) {
|
||||
++token_end_;
|
||||
}
|
||||
|
||||
// If it's non-empty, or empty tokens were requested, return the token.
|
||||
if (token_begin_ != token_end_ || (options_ & RETURN_EMPTY_TOKENS))
|
||||
return true;
|
||||
}
|
||||
|
||||
DCHECK(!token_is_delim_);
|
||||
// Last token was a regular token.
|
||||
//
|
||||
// ... T T T D T T ...
|
||||
// ^ ^
|
||||
// | |
|
||||
// | token_end_ : The next character to look at. Always one
|
||||
// | char beyond the token boundary.
|
||||
// |
|
||||
// token_begin_ : Points to beginning of token. Note: token could
|
||||
// be empty, in which case
|
||||
// token_begin_ == token_end_.
|
||||
//
|
||||
// The next token is always a delimiter. It could be |end_| however, but
|
||||
// |end_| is also an implicit delimiter.
|
||||
token_is_delim_ = true;
|
||||
token_begin_ = token_end_;
|
||||
|
||||
if (token_end_ == end_)
|
||||
return false;
|
||||
|
||||
// Look at the delimiter.
|
||||
++token_end_;
|
||||
if (options_ & RETURN_DELIMS)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsDelim(char_type c) const {
|
||||
return delims_.find(c) != str::npos;
|
||||
}
|
||||
|
||||
bool IsQuote(char_type c) const {
|
||||
return quotes_.find(c) != str::npos;
|
||||
}
|
||||
|
||||
struct AdvanceState {
|
||||
bool in_quote;
|
||||
bool in_escape;
|
||||
char_type quote_char;
|
||||
AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}
|
||||
};
|
||||
|
||||
// Returns true if a delimiter was not hit.
|
||||
bool AdvanceOne(AdvanceState* state, char_type c) {
|
||||
if (state->in_quote) {
|
||||
if (state->in_escape) {
|
||||
state->in_escape = false;
|
||||
} else if (c == '\\') {
|
||||
state->in_escape = true;
|
||||
} else if (c == state->quote_char) {
|
||||
state->in_quote = false;
|
||||
}
|
||||
} else {
|
||||
if (IsDelim(c))
|
||||
return false;
|
||||
state->in_quote = IsQuote(state->quote_char = c);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const_iterator start_pos_;
|
||||
const_iterator token_begin_;
|
||||
const_iterator token_end_;
|
||||
const_iterator end_;
|
||||
str delims_;
|
||||
str quotes_;
|
||||
int options_;
|
||||
bool token_is_delim_;
|
||||
};
|
||||
|
||||
typedef StringTokenizerT<std::string, std::string::const_iterator>
|
||||
StringTokenizer;
|
||||
typedef StringTokenizerT<string16, string16::const_iterator> String16Tokenizer;
|
||||
typedef StringTokenizerT<std::string, const char*> CStringTokenizer;
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRING_TOKENIZER_H_
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2015 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/strings/string_tokenizer.h"
|
||||
|
||||
void GetAllTokens(base::StringTokenizer& t) {
|
||||
while (t.GetNext()) {
|
||||
(void)t.token();
|
||||
}
|
||||
}
|
||||
|
||||
// Entry point for LibFuzzer.
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
uint8_t size_t_bytes = sizeof(size_t);
|
||||
if (size < size_t_bytes + 1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Calculate pattern size based on remaining bytes, otherwise fuzzing is
|
||||
// inefficient with bailouts in most cases.
|
||||
size_t pattern_size =
|
||||
*reinterpret_cast<const size_t*>(data) % (size - size_t_bytes);
|
||||
|
||||
std::string pattern(reinterpret_cast<const char*>(data + size_t_bytes),
|
||||
pattern_size);
|
||||
std::string input(
|
||||
reinterpret_cast<const char*>(data + size_t_bytes + pattern_size),
|
||||
size - pattern_size - size_t_bytes);
|
||||
|
||||
// Allow quote_chars and options to be set. Otherwise full coverage
|
||||
// won't be possible since IsQuote, FullGetNext and other functions
|
||||
// won't be called.
|
||||
for (bool return_delims : {false, true}) {
|
||||
for (bool return_empty_strings : {false, true}) {
|
||||
int options = 0;
|
||||
if (return_delims)
|
||||
options |= base::StringTokenizer::RETURN_DELIMS;
|
||||
if (return_empty_strings)
|
||||
options |= base::StringTokenizer::RETURN_EMPTY_TOKENS;
|
||||
|
||||
base::StringTokenizer t(input, pattern);
|
||||
t.set_options(options);
|
||||
GetAllTokens(t);
|
||||
|
||||
base::StringTokenizer t_quote(input, pattern);
|
||||
t_quote.set_quote_chars("\"");
|
||||
t_quote.set_options(options);
|
||||
GetAllTokens(t_quote);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
1129
TMessagesProj/jni/voip/webrtc/base/strings/string_util.cc
Normal file
1129
TMessagesProj/jni/voip/webrtc/base/strings/string_util.cc
Normal file
File diff suppressed because it is too large
Load diff
568
TMessagesProj/jni/voip/webrtc/base/strings/string_util.h
Normal file
568
TMessagesProj/jni/voip/webrtc/base/strings/string_util.h
Normal file
|
|
@ -0,0 +1,568 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
//
|
||||
// This file defines utility functions for working with strings.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_UTIL_H_
|
||||
#define BASE_STRINGS_STRING_UTIL_H_
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h> // va_list
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/stl_util.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h" // For implicit conversions.
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// C standard-library functions that aren't cross-platform are provided as
|
||||
// "base::...", and their prototypes are listed below. These functions are
|
||||
// then implemented as inline calls to the platform-specific equivalents in the
|
||||
// platform-specific headers.
|
||||
|
||||
// Wrapper for vsnprintf that always null-terminates and always returns the
|
||||
// number of characters that would be in an untruncated formatted
|
||||
// string, even when truncation occurs.
|
||||
int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
|
||||
PRINTF_FORMAT(3, 0);
|
||||
|
||||
// Some of these implementations need to be inlined.
|
||||
|
||||
// We separate the declaration from the implementation of this inline
|
||||
// function just so the PRINTF_FORMAT works.
|
||||
inline int snprintf(char* buffer, size_t size, const char* format, ...)
|
||||
PRINTF_FORMAT(3, 4);
|
||||
inline int snprintf(char* buffer, size_t size, const char* format, ...) {
|
||||
va_list arguments;
|
||||
va_start(arguments, format);
|
||||
int result = vsnprintf(buffer, size, format, arguments);
|
||||
va_end(arguments);
|
||||
return result;
|
||||
}
|
||||
|
||||
// BSD-style safe and consistent string copy functions.
|
||||
// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
|
||||
// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
|
||||
// long as |dst_size| is not 0. Returns the length of |src| in characters.
|
||||
// If the return value is >= dst_size, then the output was truncated.
|
||||
// NOTE: All sizes are in number of characters, NOT in bytes.
|
||||
BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size);
|
||||
BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size);
|
||||
|
||||
// Scan a wprintf format string to determine whether it's portable across a
|
||||
// variety of systems. This function only checks that the conversion
|
||||
// specifiers used by the format string are supported and have the same meaning
|
||||
// on a variety of systems. It doesn't check for other errors that might occur
|
||||
// within a format string.
|
||||
//
|
||||
// Nonportable conversion specifiers for wprintf are:
|
||||
// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char
|
||||
// data on all systems except Windows, which treat them as wchar_t data.
|
||||
// Use %ls and %lc for wchar_t data instead.
|
||||
// - 'S' and 'C', which operate on wchar_t data on all systems except Windows,
|
||||
// which treat them as char data. Use %ls and %lc for wchar_t data
|
||||
// instead.
|
||||
// - 'F', which is not identified by Windows wprintf documentation.
|
||||
// - 'D', 'O', and 'U', which are deprecated and not available on all systems.
|
||||
// Use %ld, %lo, and %lu instead.
|
||||
//
|
||||
// Note that there is no portable conversion specifier for char data when
|
||||
// working with wprintf.
|
||||
//
|
||||
// This function is intended to be called from base::vswprintf.
|
||||
BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format);
|
||||
|
||||
// ASCII-specific tolower. The standard library's tolower is locale sensitive,
|
||||
// so we don't want to use it here.
|
||||
inline char ToLowerASCII(char c) {
|
||||
return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
|
||||
}
|
||||
inline char16 ToLowerASCII(char16 c) {
|
||||
return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
|
||||
}
|
||||
|
||||
// ASCII-specific toupper. The standard library's toupper is locale sensitive,
|
||||
// so we don't want to use it here.
|
||||
inline char ToUpperASCII(char c) {
|
||||
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
|
||||
}
|
||||
inline char16 ToUpperASCII(char16 c) {
|
||||
return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
|
||||
}
|
||||
|
||||
// Converts the given string to it's ASCII-lowercase equivalent.
|
||||
BASE_EXPORT std::string ToLowerASCII(StringPiece str);
|
||||
BASE_EXPORT string16 ToLowerASCII(StringPiece16 str);
|
||||
|
||||
// Converts the given string to it's ASCII-uppercase equivalent.
|
||||
BASE_EXPORT std::string ToUpperASCII(StringPiece str);
|
||||
BASE_EXPORT string16 ToUpperASCII(StringPiece16 str);
|
||||
|
||||
// Functor for case-insensitive ASCII comparisons for STL algorithms like
|
||||
// std::search.
|
||||
//
|
||||
// Note that a full Unicode version of this functor is not possible to write
|
||||
// because case mappings might change the number of characters, depend on
|
||||
// context (combining accents), and require handling UTF-16. If you need
|
||||
// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
|
||||
// use a normal operator== on the result.
|
||||
template<typename Char> struct CaseInsensitiveCompareASCII {
|
||||
public:
|
||||
bool operator()(Char x, Char y) const {
|
||||
return ToLowerASCII(x) == ToLowerASCII(y);
|
||||
}
|
||||
};
|
||||
|
||||
// Like strcasecmp for case-insensitive ASCII characters only. Returns:
|
||||
// -1 (a < b)
|
||||
// 0 (a == b)
|
||||
// 1 (a > b)
|
||||
// (unlike strcasecmp which can return values greater or less than 1/-1). For
|
||||
// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
|
||||
// and then just call the normal string operators on the result.
|
||||
BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
|
||||
BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
|
||||
|
||||
// Equality for ASCII case-insensitive comparisons. For full Unicode support,
|
||||
// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
|
||||
// == or !=.
|
||||
BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
|
||||
BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b);
|
||||
|
||||
// These threadsafe functions return references to globally unique empty
|
||||
// strings.
|
||||
//
|
||||
// It is likely faster to construct a new empty string object (just a few
|
||||
// instructions to set the length to 0) than to get the empty string instance
|
||||
// returned by these functions (which requires threadsafe static access).
|
||||
//
|
||||
// Therefore, DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT
|
||||
// CONSTRUCTORS. There is only one case where you should use these: functions
|
||||
// which need to return a string by reference (e.g. as a class member
|
||||
// accessor), and don't have an empty string to use (e.g. in an error case).
|
||||
// These should not be used as initializers, function arguments, or return
|
||||
// values for functions which return by value or outparam.
|
||||
BASE_EXPORT const std::string& EmptyString();
|
||||
BASE_EXPORT const string16& EmptyString16();
|
||||
|
||||
// Contains the set of characters representing whitespace in the corresponding
|
||||
// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
|
||||
// by HTML5, and don't include control characters.
|
||||
BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode.
|
||||
BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode.
|
||||
BASE_EXPORT extern const char16 kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF.
|
||||
BASE_EXPORT extern const char kWhitespaceASCII[];
|
||||
BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode.
|
||||
|
||||
// Null-terminated string representing the UTF-8 byte order mark.
|
||||
BASE_EXPORT extern const char kUtf8ByteOrderMark[];
|
||||
|
||||
// Removes characters in |remove_chars| from anywhere in |input|. Returns true
|
||||
// if any characters were removed. |remove_chars| must be null-terminated.
|
||||
// NOTE: Safe to use the same variable for both |input| and |output|.
|
||||
BASE_EXPORT bool RemoveChars(const string16& input,
|
||||
StringPiece16 remove_chars,
|
||||
string16* output);
|
||||
BASE_EXPORT bool RemoveChars(const std::string& input,
|
||||
StringPiece remove_chars,
|
||||
std::string* output);
|
||||
|
||||
// Replaces characters in |replace_chars| from anywhere in |input| with
|
||||
// |replace_with|. Each character in |replace_chars| will be replaced with
|
||||
// the |replace_with| string. Returns true if any characters were replaced.
|
||||
// |replace_chars| must be null-terminated.
|
||||
// NOTE: Safe to use the same variable for both |input| and |output|.
|
||||
BASE_EXPORT bool ReplaceChars(const string16& input,
|
||||
StringPiece16 replace_chars,
|
||||
StringPiece16 replace_with,
|
||||
string16* output);
|
||||
BASE_EXPORT bool ReplaceChars(const std::string& input,
|
||||
StringPiece replace_chars,
|
||||
StringPiece replace_with,
|
||||
std::string* output);
|
||||
|
||||
enum TrimPositions {
|
||||
TRIM_NONE = 0,
|
||||
TRIM_LEADING = 1 << 0,
|
||||
TRIM_TRAILING = 1 << 1,
|
||||
TRIM_ALL = TRIM_LEADING | TRIM_TRAILING,
|
||||
};
|
||||
|
||||
// Removes characters in |trim_chars| from the beginning and end of |input|.
|
||||
// The 8-bit version only works on 8-bit characters, not UTF-8. Returns true if
|
||||
// any characters were removed.
|
||||
//
|
||||
// It is safe to use the same variable for both |input| and |output| (this is
|
||||
// the normal usage to trim in-place).
|
||||
BASE_EXPORT bool TrimString(StringPiece16 input,
|
||||
StringPiece16 trim_chars,
|
||||
string16* output);
|
||||
BASE_EXPORT bool TrimString(StringPiece input,
|
||||
StringPiece trim_chars,
|
||||
std::string* output);
|
||||
|
||||
// StringPiece versions of the above. The returned pieces refer to the original
|
||||
// buffer.
|
||||
BASE_EXPORT StringPiece16 TrimString(StringPiece16 input,
|
||||
StringPiece16 trim_chars,
|
||||
TrimPositions positions);
|
||||
BASE_EXPORT StringPiece TrimString(StringPiece input,
|
||||
StringPiece trim_chars,
|
||||
TrimPositions positions);
|
||||
|
||||
// Truncates a string to the nearest UTF-8 character that will leave
|
||||
// the string less than or equal to the specified byte size.
|
||||
BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input,
|
||||
const size_t byte_size,
|
||||
std::string* output);
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF16)
|
||||
// Utility functions to access the underlying string buffer as a wide char
|
||||
// pointer.
|
||||
//
|
||||
// Note: These functions violate strict aliasing when char16 and wchar_t are
|
||||
// unrelated types. We thus pass -fno-strict-aliasing to the compiler on
|
||||
// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2].
|
||||
//
|
||||
// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244
|
||||
// [2]
|
||||
// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949
|
||||
inline wchar_t* as_writable_wcstr(char16* str) {
|
||||
return reinterpret_cast<wchar_t*>(str);
|
||||
}
|
||||
|
||||
inline wchar_t* as_writable_wcstr(string16& str) {
|
||||
return reinterpret_cast<wchar_t*>(data(str));
|
||||
}
|
||||
|
||||
inline const wchar_t* as_wcstr(const char16* str) {
|
||||
return reinterpret_cast<const wchar_t*>(str);
|
||||
}
|
||||
|
||||
inline const wchar_t* as_wcstr(StringPiece16 str) {
|
||||
return reinterpret_cast<const wchar_t*>(str.data());
|
||||
}
|
||||
|
||||
// Utility functions to access the underlying string buffer as a char16 pointer.
|
||||
inline char16* as_writable_u16cstr(wchar_t* str) {
|
||||
return reinterpret_cast<char16*>(str);
|
||||
}
|
||||
|
||||
inline char16* as_writable_u16cstr(std::wstring& str) {
|
||||
return reinterpret_cast<char16*>(data(str));
|
||||
}
|
||||
|
||||
inline const char16* as_u16cstr(const wchar_t* str) {
|
||||
return reinterpret_cast<const char16*>(str);
|
||||
}
|
||||
|
||||
inline const char16* as_u16cstr(WStringPiece str) {
|
||||
return reinterpret_cast<const char16*>(str.data());
|
||||
}
|
||||
|
||||
// Utility functions to convert between base::WStringPiece and
|
||||
// base::StringPiece16.
|
||||
inline WStringPiece AsWStringPiece(StringPiece16 str) {
|
||||
return WStringPiece(as_wcstr(str.data()), str.size());
|
||||
}
|
||||
|
||||
inline StringPiece16 AsStringPiece16(WStringPiece str) {
|
||||
return StringPiece16(as_u16cstr(str.data()), str.size());
|
||||
}
|
||||
|
||||
inline std::wstring AsWString(StringPiece16 str) {
|
||||
return std::wstring(as_wcstr(str.data()), str.size());
|
||||
}
|
||||
|
||||
inline string16 AsString16(WStringPiece str) {
|
||||
return string16(as_u16cstr(str.data()), str.size());
|
||||
}
|
||||
#endif // defined(WCHAR_T_IS_UTF16)
|
||||
|
||||
// Trims any whitespace from either end of the input string.
|
||||
//
|
||||
// The StringPiece versions return a substring referencing the input buffer.
|
||||
// The ASCII versions look only for ASCII whitespace.
|
||||
//
|
||||
// The std::string versions return where whitespace was found.
|
||||
// NOTE: Safe to use the same variable for both input and output.
|
||||
BASE_EXPORT TrimPositions TrimWhitespace(StringPiece16 input,
|
||||
TrimPositions positions,
|
||||
string16* output);
|
||||
BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input,
|
||||
TrimPositions positions);
|
||||
BASE_EXPORT TrimPositions TrimWhitespaceASCII(StringPiece input,
|
||||
TrimPositions positions,
|
||||
std::string* output);
|
||||
BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input,
|
||||
TrimPositions positions);
|
||||
|
||||
// Searches for CR or LF characters. Removes all contiguous whitespace
|
||||
// strings that contain them. This is useful when trying to deal with text
|
||||
// copied from terminals.
|
||||
// Returns |text|, with the following three transformations:
|
||||
// (1) Leading and trailing whitespace is trimmed.
|
||||
// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace
|
||||
// sequences containing a CR or LF are trimmed.
|
||||
// (3) All other whitespace sequences are converted to single spaces.
|
||||
BASE_EXPORT string16 CollapseWhitespace(
|
||||
const string16& text,
|
||||
bool trim_sequences_with_line_breaks);
|
||||
BASE_EXPORT std::string CollapseWhitespaceASCII(
|
||||
const std::string& text,
|
||||
bool trim_sequences_with_line_breaks);
|
||||
|
||||
// Returns true if |input| is empty or contains only characters found in
|
||||
// |characters|.
|
||||
BASE_EXPORT bool ContainsOnlyChars(StringPiece input, StringPiece characters);
|
||||
BASE_EXPORT bool ContainsOnlyChars(StringPiece16 input,
|
||||
StringPiece16 characters);
|
||||
|
||||
// Returns true if |str| is structurally valid UTF-8 and also doesn't
|
||||
// contain any non-character code point (e.g. U+10FFFE). Prohibiting
|
||||
// non-characters increases the likelihood of detecting non-UTF-8 in
|
||||
// real-world text, for callers which do not need to accept
|
||||
// non-characters in strings.
|
||||
BASE_EXPORT bool IsStringUTF8(StringPiece str);
|
||||
|
||||
// Returns true if |str| contains valid UTF-8, allowing non-character
|
||||
// code points.
|
||||
BASE_EXPORT bool IsStringUTF8AllowingNoncharacters(StringPiece str);
|
||||
|
||||
// Returns true if |str| contains only valid ASCII character values.
|
||||
// Note 1: IsStringASCII executes in time determined solely by the
|
||||
// length of the string, not by its contents, so it is robust against
|
||||
// timing attacks for all strings of equal length.
|
||||
// Note 2: IsStringASCII assumes the input is likely all ASCII, and
|
||||
// does not leave early if it is not the case.
|
||||
BASE_EXPORT bool IsStringASCII(StringPiece str);
|
||||
BASE_EXPORT bool IsStringASCII(StringPiece16 str);
|
||||
#if defined(WCHAR_T_IS_UTF32)
|
||||
BASE_EXPORT bool IsStringASCII(WStringPiece str);
|
||||
#endif
|
||||
|
||||
// Compare the lower-case form of the given string against the given
|
||||
// previously-lower-cased ASCII string (typically a constant).
|
||||
BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str,
|
||||
StringPiece lowecase_ascii);
|
||||
BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str,
|
||||
StringPiece lowecase_ascii);
|
||||
|
||||
// Performs a case-sensitive string compare of the given 16-bit string against
|
||||
// the given 8-bit ASCII string (typically a constant). The behavior is
|
||||
// undefined if the |ascii| string is not ASCII.
|
||||
BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii);
|
||||
|
||||
// Indicates case sensitivity of comparisons. Only ASCII case insensitivity
|
||||
// is supported. Full Unicode case-insensitive conversions would need to go in
|
||||
// base/i18n so it can use ICU.
|
||||
//
|
||||
// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's
|
||||
// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see
|
||||
// base/i18n/case_conversion.h for usage advice) on the arguments, and then use
|
||||
// the results to a case-sensitive comparison.
|
||||
enum class CompareCase {
|
||||
SENSITIVE,
|
||||
INSENSITIVE_ASCII,
|
||||
};
|
||||
|
||||
BASE_EXPORT bool StartsWith(StringPiece str,
|
||||
StringPiece search_for,
|
||||
CompareCase case_sensitivity);
|
||||
BASE_EXPORT bool StartsWith(StringPiece16 str,
|
||||
StringPiece16 search_for,
|
||||
CompareCase case_sensitivity);
|
||||
BASE_EXPORT bool EndsWith(StringPiece str,
|
||||
StringPiece search_for,
|
||||
CompareCase case_sensitivity);
|
||||
BASE_EXPORT bool EndsWith(StringPiece16 str,
|
||||
StringPiece16 search_for,
|
||||
CompareCase case_sensitivity);
|
||||
|
||||
// Determines the type of ASCII character, independent of locale (the C
|
||||
// library versions will change based on locale).
|
||||
template <typename Char>
|
||||
inline bool IsAsciiWhitespace(Char c) {
|
||||
return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\f';
|
||||
}
|
||||
template <typename Char>
|
||||
inline bool IsAsciiAlpha(Char c) {
|
||||
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
||||
}
|
||||
template <typename Char>
|
||||
inline bool IsAsciiUpper(Char c) {
|
||||
return c >= 'A' && c <= 'Z';
|
||||
}
|
||||
template <typename Char>
|
||||
inline bool IsAsciiLower(Char c) {
|
||||
return c >= 'a' && c <= 'z';
|
||||
}
|
||||
template <typename Char>
|
||||
inline bool IsAsciiDigit(Char c) {
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
template <typename Char>
|
||||
inline bool IsAsciiPrintable(Char c) {
|
||||
return c >= ' ' && c <= '~';
|
||||
}
|
||||
|
||||
template <typename Char>
|
||||
inline bool IsHexDigit(Char c) {
|
||||
return (c >= '0' && c <= '9') ||
|
||||
(c >= 'A' && c <= 'F') ||
|
||||
(c >= 'a' && c <= 'f');
|
||||
}
|
||||
|
||||
// Returns the integer corresponding to the given hex character. For example:
|
||||
// '4' -> 4
|
||||
// 'a' -> 10
|
||||
// 'B' -> 11
|
||||
// Assumes the input is a valid hex character. DCHECKs in debug builds if not.
|
||||
BASE_EXPORT char HexDigitToInt(wchar_t c);
|
||||
|
||||
// Returns true if it's a Unicode whitespace character.
|
||||
BASE_EXPORT bool IsUnicodeWhitespace(wchar_t c);
|
||||
|
||||
// Return a byte string in human-readable format with a unit suffix. Not
|
||||
// appropriate for use in any UI; use of FormatBytes and friends in ui/base is
|
||||
// highly recommended instead. TODO(avi): Figure out how to get callers to use
|
||||
// FormatBytes instead; remove this.
|
||||
BASE_EXPORT string16 FormatBytesUnlocalized(int64_t bytes);
|
||||
|
||||
// Starting at |start_offset| (usually 0), replace the first instance of
|
||||
// |find_this| with |replace_with|.
|
||||
BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
|
||||
base::string16* str,
|
||||
size_t start_offset,
|
||||
StringPiece16 find_this,
|
||||
StringPiece16 replace_with);
|
||||
BASE_EXPORT void ReplaceFirstSubstringAfterOffset(
|
||||
std::string* str,
|
||||
size_t start_offset,
|
||||
StringPiece find_this,
|
||||
StringPiece replace_with);
|
||||
|
||||
// Starting at |start_offset| (usually 0), look through |str| and replace all
|
||||
// instances of |find_this| with |replace_with|.
|
||||
//
|
||||
// This does entire substrings; use std::replace in <algorithm> for single
|
||||
// characters, for example:
|
||||
// std::replace(str.begin(), str.end(), 'a', 'b');
|
||||
BASE_EXPORT void ReplaceSubstringsAfterOffset(
|
||||
string16* str,
|
||||
size_t start_offset,
|
||||
StringPiece16 find_this,
|
||||
StringPiece16 replace_with);
|
||||
BASE_EXPORT void ReplaceSubstringsAfterOffset(
|
||||
std::string* str,
|
||||
size_t start_offset,
|
||||
StringPiece find_this,
|
||||
StringPiece replace_with);
|
||||
|
||||
// Reserves enough memory in |str| to accommodate |length_with_null| characters,
|
||||
// sets the size of |str| to |length_with_null - 1| characters, and returns a
|
||||
// pointer to the underlying contiguous array of characters. This is typically
|
||||
// used when calling a function that writes results into a character array, but
|
||||
// the caller wants the data to be managed by a string-like object. It is
|
||||
// convenient in that is can be used inline in the call, and fast in that it
|
||||
// avoids copying the results of the call from a char* into a string.
|
||||
//
|
||||
// Internally, this takes linear time because the resize() call 0-fills the
|
||||
// underlying array for potentially all
|
||||
// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we
|
||||
// could avoid this aspect of the resize() call, as we expect the caller to
|
||||
// immediately write over this memory, but there is no other way to set the size
|
||||
// of the string, and not doing that will mean people who access |str| rather
|
||||
// than str.c_str() will get back a string of whatever size |str| had on entry
|
||||
// to this function (probably 0).
|
||||
BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null);
|
||||
BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null);
|
||||
|
||||
// Joins a vector or list of strings into a single string, inserting |separator|
|
||||
// (which may be empty) in between all elements.
|
||||
//
|
||||
// Note this is inverse of SplitString()/SplitStringPiece() defined in
|
||||
// string_split.h.
|
||||
//
|
||||
// If possible, callers should build a vector of StringPieces and use the
|
||||
// StringPiece variant, so that they do not create unnecessary copies of
|
||||
// strings. For example, instead of using SplitString, modifying the vector,
|
||||
// then using JoinString, use SplitStringPiece followed by JoinString so that no
|
||||
// copies of those strings are created until the final join operation.
|
||||
//
|
||||
// Use StrCat (in base/strings/strcat.h) if you don't need a separator.
|
||||
BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts,
|
||||
StringPiece separator);
|
||||
BASE_EXPORT string16 JoinString(const std::vector<string16>& parts,
|
||||
StringPiece16 separator);
|
||||
BASE_EXPORT std::string JoinString(const std::vector<StringPiece>& parts,
|
||||
StringPiece separator);
|
||||
BASE_EXPORT string16 JoinString(const std::vector<StringPiece16>& parts,
|
||||
StringPiece16 separator);
|
||||
// Explicit initializer_list overloads are required to break ambiguity when used
|
||||
// with a literal initializer list (otherwise the compiler would not be able to
|
||||
// decide between the string and StringPiece overloads).
|
||||
BASE_EXPORT std::string JoinString(std::initializer_list<StringPiece> parts,
|
||||
StringPiece separator);
|
||||
BASE_EXPORT string16 JoinString(std::initializer_list<StringPiece16> parts,
|
||||
StringPiece16 separator);
|
||||
|
||||
// Replace $1-$2-$3..$9 in the format string with values from |subst|.
|
||||
// Additionally, any number of consecutive '$' characters is replaced by that
|
||||
// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be
|
||||
// NULL. This only allows you to use up to nine replacements.
|
||||
BASE_EXPORT string16 ReplaceStringPlaceholders(
|
||||
const string16& format_string,
|
||||
const std::vector<string16>& subst,
|
||||
std::vector<size_t>* offsets);
|
||||
|
||||
BASE_EXPORT std::string ReplaceStringPlaceholders(
|
||||
StringPiece format_string,
|
||||
const std::vector<std::string>& subst,
|
||||
std::vector<size_t>* offsets);
|
||||
|
||||
// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL.
|
||||
BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string,
|
||||
const string16& a,
|
||||
size_t* offset);
|
||||
|
||||
#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING)
|
||||
BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input,
|
||||
TrimPositions positions,
|
||||
std::wstring* output);
|
||||
|
||||
BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input,
|
||||
TrimPositions positions);
|
||||
|
||||
BASE_EXPORT bool TrimString(WStringPiece input,
|
||||
WStringPiece trim_chars,
|
||||
std::wstring* output);
|
||||
|
||||
BASE_EXPORT WStringPiece TrimString(WStringPiece input,
|
||||
WStringPiece trim_chars,
|
||||
TrimPositions positions);
|
||||
|
||||
BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null);
|
||||
#endif
|
||||
|
||||
} // namespace base
|
||||
|
||||
#if defined(OS_WIN)
|
||||
#include "base/strings/string_util_win.h"
|
||||
#elif defined(OS_POSIX) || defined(OS_FUCHSIA)
|
||||
#include "base/strings/string_util_posix.h"
|
||||
#else
|
||||
#error Define string operations appropriately for your platform
|
||||
#endif
|
||||
|
||||
#endif // BASE_STRINGS_STRING_UTIL_H_
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/string_util.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
#define WHITESPACE_ASCII_NO_CR_LF \
|
||||
0x09, /* CHARACTER TABULATION */ \
|
||||
0x0B, /* LINE TABULATION */ \
|
||||
0x0C, /* FORM FEED (FF) */ \
|
||||
0x20 /* SPACE */
|
||||
|
||||
#define WHITESPACE_ASCII \
|
||||
WHITESPACE_ASCII_NO_CR_LF, /* Comment to make clang-format linebreak */ \
|
||||
0x0A, /* LINE FEED (LF) */ \
|
||||
0x0D /* CARRIAGE RETURN (CR) */
|
||||
|
||||
#define WHITESPACE_UNICODE_NON_ASCII \
|
||||
0x0085, /* NEXT LINE (NEL) */ \
|
||||
0x00A0, /* NO-BREAK SPACE */ \
|
||||
0x1680, /* OGHAM SPACE MARK */ \
|
||||
0x2000, /* EN QUAD */ \
|
||||
0x2001, /* EM QUAD */ \
|
||||
0x2002, /* EN SPACE */ \
|
||||
0x2003, /* EM SPACE */ \
|
||||
0x2004, /* THREE-PER-EM SPACE */ \
|
||||
0x2005, /* FOUR-PER-EM SPACE */ \
|
||||
0x2006, /* SIX-PER-EM SPACE */ \
|
||||
0x2007, /* FIGURE SPACE */ \
|
||||
0x2008, /* PUNCTUATION SPACE */ \
|
||||
0x2009, /* THIN SPACE */ \
|
||||
0x200A, /* HAIR SPACE */ \
|
||||
0x2028, /* LINE SEPARATOR */ \
|
||||
0x2029, /* PARAGRAPH SEPARATOR */ \
|
||||
0x202F, /* NARROW NO-BREAK SPACE */ \
|
||||
0x205F, /* MEDIUM MATHEMATICAL SPACE */ \
|
||||
0x3000 /* IDEOGRAPHIC SPACE */
|
||||
|
||||
#define WHITESPACE_UNICODE_NO_CR_LF \
|
||||
WHITESPACE_ASCII_NO_CR_LF, WHITESPACE_UNICODE_NON_ASCII
|
||||
|
||||
#define WHITESPACE_UNICODE WHITESPACE_ASCII, WHITESPACE_UNICODE_NON_ASCII
|
||||
|
||||
const wchar_t kWhitespaceWide[] = {WHITESPACE_UNICODE, 0};
|
||||
const char16 kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0};
|
||||
const char16 kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0};
|
||||
const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0};
|
||||
const char16 kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0};
|
||||
|
||||
const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_UTIL_POSIX_H_
|
||||
#define BASE_STRINGS_STRING_UTIL_POSIX_H_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include "base/logging.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Chromium code style is to not use malloc'd strings; this is only for use
|
||||
// for interaction with APIs that require it.
|
||||
inline char* strdup(const char* str) {
|
||||
return ::strdup(str);
|
||||
}
|
||||
|
||||
inline int vsnprintf(char* buffer, size_t size,
|
||||
const char* format, va_list arguments) {
|
||||
return ::vsnprintf(buffer, size, format, arguments);
|
||||
}
|
||||
|
||||
inline int vswprintf(wchar_t* buffer, size_t size,
|
||||
const wchar_t* format, va_list arguments) {
|
||||
DCHECK(IsWprintfFormatPortable(format));
|
||||
return ::vswprintf(buffer, size, format, arguments);
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRING_UTIL_POSIX_H_
|
||||
44
TMessagesProj/jni/voip/webrtc/base/strings/string_util_win.h
Normal file
44
TMessagesProj/jni/voip/webrtc/base/strings/string_util_win.h
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRING_UTIL_WIN_H_
|
||||
#define BASE_STRINGS_STRING_UTIL_WIN_H_
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include "base/logging.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Chromium code style is to not use malloc'd strings; this is only for use
|
||||
// for interaction with APIs that require it.
|
||||
inline char* strdup(const char* str) {
|
||||
return _strdup(str);
|
||||
}
|
||||
|
||||
inline int vsnprintf(char* buffer, size_t size,
|
||||
const char* format, va_list arguments) {
|
||||
int length = vsnprintf_s(buffer, size, size - 1, format, arguments);
|
||||
if (length < 0)
|
||||
return _vscprintf(format, arguments);
|
||||
return length;
|
||||
}
|
||||
|
||||
inline int vswprintf(wchar_t* buffer, size_t size,
|
||||
const wchar_t* format, va_list arguments) {
|
||||
DCHECK(IsWprintfFormatPortable(format));
|
||||
|
||||
int length = _vsnwprintf_s(buffer, size, size - 1, format, arguments);
|
||||
if (length < 0)
|
||||
return _vscwprintf(format, arguments);
|
||||
return length;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRING_UTIL_WIN_H_
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
//
|
||||
// This file defines preprocessor macros for stringizing preprocessor
|
||||
// symbols (or their output) and manipulating preprocessor symbols
|
||||
// that define strings.
|
||||
|
||||
#ifndef BASE_STRINGS_STRINGIZE_MACROS_H_
|
||||
#define BASE_STRINGS_STRINGIZE_MACROS_H_
|
||||
|
||||
#include "build/build_config.h"
|
||||
|
||||
// This is not very useful as it does not expand defined symbols if
|
||||
// called directly. Use its counterpart without the _NO_EXPANSION
|
||||
// suffix, below.
|
||||
#define STRINGIZE_NO_EXPANSION(x) #x
|
||||
|
||||
// Use this to quote the provided parameter, first expanding it if it
|
||||
// is a preprocessor symbol.
|
||||
//
|
||||
// For example, if:
|
||||
// #define A FOO
|
||||
// #define B(x) myobj->FunctionCall(x)
|
||||
//
|
||||
// Then:
|
||||
// STRINGIZE(A) produces "FOO"
|
||||
// STRINGIZE(B(y)) produces "myobj->FunctionCall(y)"
|
||||
#define STRINGIZE(x) STRINGIZE_NO_EXPANSION(x)
|
||||
|
||||
#endif // BASE_STRINGS_STRINGIZE_MACROS_H_
|
||||
225
TMessagesProj/jni/voip/webrtc/base/strings/stringprintf.cc
Normal file
225
TMessagesProj/jni/voip/webrtc/base/strings/stringprintf.cc
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/stringprintf.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "base/scoped_clear_last_error.h"
|
||||
#include "base/stl_util.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
|
||||
// is the size of the buffer. These return the number of characters in the
|
||||
// formatted string excluding the NUL terminator. If the buffer is not
|
||||
// large enough to accommodate the formatted string without truncation, they
|
||||
// return the number of characters that would be in the fully-formatted string
|
||||
// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
|
||||
inline int vsnprintfT(char* buffer,
|
||||
size_t buf_size,
|
||||
const char* format,
|
||||
va_list argptr) {
|
||||
return base::vsnprintf(buffer, buf_size, format, argptr);
|
||||
}
|
||||
|
||||
#if defined(OS_WIN)
|
||||
inline int vsnprintfT(wchar_t* buffer,
|
||||
size_t buf_size,
|
||||
const wchar_t* format,
|
||||
va_list argptr) {
|
||||
return base::vswprintf(buffer, buf_size, format, argptr);
|
||||
}
|
||||
inline int vsnprintfT(char16_t* buffer,
|
||||
size_t buf_size,
|
||||
const char16_t* format,
|
||||
va_list argptr) {
|
||||
return base::vswprintf(reinterpret_cast<wchar_t*>(buffer), buf_size,
|
||||
reinterpret_cast<const wchar_t*>(format), argptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Templatized backend for StringPrintF/StringAppendF. This does not finalize
|
||||
// the va_list, the caller is expected to do that.
|
||||
template <class CharT>
|
||||
static void StringAppendVT(std::basic_string<CharT>* dst,
|
||||
const CharT* format,
|
||||
va_list ap) {
|
||||
// First try with a small fixed size buffer.
|
||||
// This buffer size should be kept in sync with StringUtilTest.GrowBoundary
|
||||
// and StringUtilTest.StringPrintfBounds.
|
||||
CharT stack_buf[1024];
|
||||
|
||||
va_list ap_copy;
|
||||
va_copy(ap_copy, ap);
|
||||
|
||||
base::internal::ScopedClearLastError last_error;
|
||||
int result = vsnprintfT(stack_buf, base::size(stack_buf), format, ap_copy);
|
||||
va_end(ap_copy);
|
||||
|
||||
if (result >= 0 && result < static_cast<int>(base::size(stack_buf))) {
|
||||
// It fit.
|
||||
dst->append(stack_buf, result);
|
||||
return;
|
||||
}
|
||||
|
||||
// Repeatedly increase buffer size until it fits.
|
||||
int mem_length = base::size(stack_buf);
|
||||
while (true) {
|
||||
if (result < 0) {
|
||||
#if defined(OS_WIN)
|
||||
// On Windows, vsnprintfT always returns the number of characters in a
|
||||
// fully-formatted string, so if we reach this point, something else is
|
||||
// wrong and no amount of buffer-doubling is going to fix it.
|
||||
return;
|
||||
#else
|
||||
if (errno != 0 && errno != EOVERFLOW)
|
||||
return;
|
||||
// Try doubling the buffer size.
|
||||
mem_length *= 2;
|
||||
#endif
|
||||
} else {
|
||||
// We need exactly "result + 1" characters.
|
||||
mem_length = result + 1;
|
||||
}
|
||||
|
||||
if (mem_length > 32 * 1024 * 1024) {
|
||||
// That should be plenty, don't try anything larger. This protects
|
||||
// against huge allocations when using vsnprintfT implementations that
|
||||
// return -1 for reasons other than overflow without setting errno.
|
||||
DLOG(WARNING) << "Unable to printf the requested string due to size.";
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<CharT> mem_buf(mem_length);
|
||||
|
||||
// NOTE: You can only use a va_list once. Since we're in a while loop, we
|
||||
// need to make a new copy each time so we don't use up the original.
|
||||
va_copy(ap_copy, ap);
|
||||
result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
|
||||
va_end(ap_copy);
|
||||
|
||||
if ((result >= 0) && (result < mem_length)) {
|
||||
// It fit.
|
||||
dst->append(&mem_buf[0], result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::string StringPrintf(const char* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
std::string result;
|
||||
StringAppendV(&result, format, ap);
|
||||
va_end(ap);
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(OS_WIN)
|
||||
std::wstring StringPrintf(const wchar_t* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
std::wstring result;
|
||||
StringAppendV(&result, format, ap);
|
||||
va_end(ap);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::u16string StringPrintf(const char16_t* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
std::u16string result;
|
||||
StringAppendV(&result, format, ap);
|
||||
va_end(ap);
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
std::string StringPrintV(const char* format, va_list ap) {
|
||||
std::string result;
|
||||
StringAppendV(&result, format, ap);
|
||||
return result;
|
||||
}
|
||||
|
||||
const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
dst->clear();
|
||||
StringAppendV(dst, format, ap);
|
||||
va_end(ap);
|
||||
return *dst;
|
||||
}
|
||||
|
||||
#if defined(OS_WIN)
|
||||
const std::wstring& SStringPrintf(std::wstring* dst,
|
||||
const wchar_t* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
dst->clear();
|
||||
StringAppendV(dst, format, ap);
|
||||
va_end(ap);
|
||||
return *dst;
|
||||
}
|
||||
|
||||
const std::u16string& SStringPrintf(std::u16string* dst,
|
||||
const char16_t* format,
|
||||
...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
dst->clear();
|
||||
StringAppendV(dst, format, ap);
|
||||
va_end(ap);
|
||||
return *dst;
|
||||
}
|
||||
#endif
|
||||
|
||||
void StringAppendF(std::string* dst, const char* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
StringAppendV(dst, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
#if defined(OS_WIN)
|
||||
void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
StringAppendV(dst, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
void StringAppendF(std::u16string* dst, const char16_t* format, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
StringAppendV(dst, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
#endif
|
||||
|
||||
void StringAppendV(std::string* dst, const char* format, va_list ap) {
|
||||
StringAppendVT(dst, format, ap);
|
||||
}
|
||||
|
||||
#if defined(OS_WIN)
|
||||
void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
|
||||
StringAppendVT(dst, format, ap);
|
||||
}
|
||||
|
||||
void StringAppendV(std::u16string* dst, const char16_t* format, va_list ap) {
|
||||
StringAppendVT(dst, format, ap);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace base
|
||||
74
TMessagesProj/jni/voip/webrtc/base/strings/stringprintf.h
Normal file
74
TMessagesProj/jni/voip/webrtc/base/strings/stringprintf.h
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
// Copyright 2013 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_STRINGPRINTF_H_
|
||||
#define BASE_STRINGS_STRINGPRINTF_H_
|
||||
|
||||
#include <stdarg.h> // va_list
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/compiler_specific.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Return a C++ string given printf-like input.
|
||||
BASE_EXPORT std::string StringPrintf(const char* format, ...)
|
||||
PRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
|
||||
#if defined(OS_WIN)
|
||||
// Note: Unfortunately compile time checking of the format string for UTF-16
|
||||
// strings is not supported by any compiler, thus these functions should be used
|
||||
// carefully and sparingly. Also applies to SStringPrintf and StringAppendV
|
||||
// below.
|
||||
BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...)
|
||||
WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::u16string StringPrintf(const char16_t* format, ...)
|
||||
WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT;
|
||||
#endif
|
||||
|
||||
// Return a C++ string given vprintf-like input.
|
||||
BASE_EXPORT std::string StringPrintV(const char* format, va_list ap)
|
||||
PRINTF_FORMAT(1, 0) WARN_UNUSED_RESULT;
|
||||
|
||||
// Store result into a supplied string and return it.
|
||||
BASE_EXPORT const std::string& SStringPrintf(std::string* dst,
|
||||
const char* format,
|
||||
...) PRINTF_FORMAT(2, 3);
|
||||
#if defined(OS_WIN)
|
||||
BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst,
|
||||
const wchar_t* format,
|
||||
...) WPRINTF_FORMAT(2, 3);
|
||||
BASE_EXPORT const std::u16string& SStringPrintf(std::u16string* dst,
|
||||
const char16_t* format,
|
||||
...) WPRINTF_FORMAT(2, 3);
|
||||
#endif
|
||||
|
||||
// Append result to a supplied string.
|
||||
BASE_EXPORT void StringAppendF(std::string* dst, const char* format, ...)
|
||||
PRINTF_FORMAT(2, 3);
|
||||
#if defined(OS_WIN)
|
||||
BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...)
|
||||
WPRINTF_FORMAT(2, 3);
|
||||
BASE_EXPORT void StringAppendF(std::u16string* dst, const char16_t* format, ...)
|
||||
WPRINTF_FORMAT(2, 3);
|
||||
#endif
|
||||
|
||||
// Lower-level routine that takes a va_list and appends to a specified
|
||||
// string. All other routines are just convenience wrappers around it.
|
||||
BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap)
|
||||
PRINTF_FORMAT(2, 0);
|
||||
#if defined(OS_WIN)
|
||||
BASE_EXPORT void StringAppendV(std::wstring* dst,
|
||||
const wchar_t* format,
|
||||
va_list ap) WPRINTF_FORMAT(2, 0);
|
||||
BASE_EXPORT void StringAppendV(std::u16string* dst,
|
||||
const char16_t* format,
|
||||
va_list ap) WPRINTF_FORMAT(2, 0);
|
||||
#endif
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_STRINGPRINTF_H_
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_SYS_STRING_CONVERSIONS_H_
|
||||
#define BASE_STRINGS_SYS_STRING_CONVERSIONS_H_
|
||||
|
||||
// Provides system-dependent string type conversions for cases where it's
|
||||
// necessary to not use ICU. Generally, you should not need this in Chrome,
|
||||
// but it is used in some shared code. Dependencies should be minimal.
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
#if defined(OS_MACOSX)
|
||||
#include <CoreFoundation/CoreFoundation.h>
|
||||
#ifdef __OBJC__
|
||||
@class NSString;
|
||||
#else
|
||||
class NSString;
|
||||
#endif
|
||||
#endif // OS_MACOSX
|
||||
|
||||
namespace base {
|
||||
|
||||
// Converts between wide and UTF-8 representations of a string. On error, the
|
||||
// result is system-dependent.
|
||||
BASE_EXPORT std::string SysWideToUTF8(const std::wstring& wide)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::wstring SysUTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT;
|
||||
|
||||
// Converts between wide and the system multi-byte representations of a string.
|
||||
// DANGER: This will lose information and can change (on Windows, this can
|
||||
// change between reboots).
|
||||
BASE_EXPORT std::string SysWideToNativeMB(const std::wstring& wide)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::wstring SysNativeMBToWide(StringPiece native_mb)
|
||||
WARN_UNUSED_RESULT;
|
||||
|
||||
// Windows-specific ------------------------------------------------------------
|
||||
|
||||
#if defined(OS_WIN)
|
||||
|
||||
// Converts between 8-bit and wide strings, using the given code page. The
|
||||
// code page identifier is one accepted by the Windows function
|
||||
// MultiByteToWideChar().
|
||||
BASE_EXPORT std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT std::string SysWideToMultiByte(const std::wstring& wide,
|
||||
uint32_t code_page)
|
||||
WARN_UNUSED_RESULT;
|
||||
|
||||
#endif // defined(OS_WIN)
|
||||
|
||||
// Mac-specific ----------------------------------------------------------------
|
||||
|
||||
#if defined(OS_MACOSX)
|
||||
|
||||
// Converts between STL strings and CFStringRefs/NSStrings.
|
||||
|
||||
// Creates a string, and returns it with a refcount of 1. You are responsible
|
||||
// for releasing it. Returns NULL on failure.
|
||||
BASE_EXPORT CFStringRef SysUTF8ToCFStringRef(StringPiece utf8)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16)
|
||||
WARN_UNUSED_RESULT;
|
||||
|
||||
// Same, but returns an autoreleased NSString.
|
||||
BASE_EXPORT NSString* SysUTF8ToNSString(StringPiece utf8) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT NSString* SysUTF16ToNSString(StringPiece16 utf16)
|
||||
WARN_UNUSED_RESULT;
|
||||
|
||||
// Converts a CFStringRef to an STL string. Returns an empty string on failure.
|
||||
BASE_EXPORT std::string SysCFStringRefToUTF8(CFStringRef ref)
|
||||
WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT string16 SysCFStringRefToUTF16(CFStringRef ref) WARN_UNUSED_RESULT;
|
||||
|
||||
// Same, but accepts NSString input. Converts nil NSString* to the appropriate
|
||||
// string type of length 0.
|
||||
BASE_EXPORT std::string SysNSStringToUTF8(NSString* ref) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT string16 SysNSStringToUTF16(NSString* ref) WARN_UNUSED_RESULT;
|
||||
|
||||
#endif // defined(OS_MACOSX)
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_SYS_STRING_CONVERSIONS_H_
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/sys_string_conversions.h"
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "base/mac/foundation_util.h"
|
||||
#include "base/mac/scoped_cftyperef.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
// Convert the supplied CFString into the specified encoding, and return it as
|
||||
// an STL string of the template type. Returns an empty string on failure.
|
||||
//
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
template<typename StringType>
|
||||
static StringType CFStringToSTLStringWithEncodingT(CFStringRef cfstring,
|
||||
CFStringEncoding encoding) {
|
||||
CFIndex length = CFStringGetLength(cfstring);
|
||||
if (length == 0)
|
||||
return StringType();
|
||||
|
||||
CFRange whole_string = CFRangeMake(0, length);
|
||||
CFIndex out_size;
|
||||
CFIndex converted = CFStringGetBytes(cfstring,
|
||||
whole_string,
|
||||
encoding,
|
||||
0, // lossByte
|
||||
false, // isExternalRepresentation
|
||||
NULL, // buffer
|
||||
0, // maxBufLen
|
||||
&out_size);
|
||||
if (converted == 0 || out_size == 0)
|
||||
return StringType();
|
||||
|
||||
// out_size is the number of UInt8-sized units needed in the destination.
|
||||
// A buffer allocated as UInt8 units might not be properly aligned to
|
||||
// contain elements of StringType::value_type. Use a container for the
|
||||
// proper value_type, and convert out_size by figuring the number of
|
||||
// value_type elements per UInt8. Leave room for a NUL terminator.
|
||||
typename StringType::size_type elements =
|
||||
out_size * sizeof(UInt8) / sizeof(typename StringType::value_type) + 1;
|
||||
|
||||
std::vector<typename StringType::value_type> out_buffer(elements);
|
||||
converted = CFStringGetBytes(cfstring,
|
||||
whole_string,
|
||||
encoding,
|
||||
0, // lossByte
|
||||
false, // isExternalRepresentation
|
||||
reinterpret_cast<UInt8*>(&out_buffer[0]),
|
||||
out_size,
|
||||
NULL); // usedBufLen
|
||||
if (converted == 0)
|
||||
return StringType();
|
||||
|
||||
out_buffer[elements - 1] = '\0';
|
||||
return StringType(&out_buffer[0], elements - 1);
|
||||
}
|
||||
|
||||
// Given an STL string |in| with an encoding specified by |in_encoding|,
|
||||
// convert it to |out_encoding| and return it as an STL string of the
|
||||
// |OutStringType| template type. Returns an empty string on failure.
|
||||
//
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
template<typename InStringType, typename OutStringType>
|
||||
static OutStringType STLStringToSTLStringWithEncodingsT(
|
||||
const InStringType& in,
|
||||
CFStringEncoding in_encoding,
|
||||
CFStringEncoding out_encoding) {
|
||||
typename InStringType::size_type in_length = in.length();
|
||||
if (in_length == 0)
|
||||
return OutStringType();
|
||||
|
||||
base::ScopedCFTypeRef<CFStringRef> cfstring(CFStringCreateWithBytesNoCopy(
|
||||
NULL,
|
||||
reinterpret_cast<const UInt8*>(in.data()),
|
||||
in_length * sizeof(typename InStringType::value_type),
|
||||
in_encoding,
|
||||
false,
|
||||
kCFAllocatorNull));
|
||||
if (!cfstring)
|
||||
return OutStringType();
|
||||
|
||||
return CFStringToSTLStringWithEncodingT<OutStringType>(cfstring,
|
||||
out_encoding);
|
||||
}
|
||||
|
||||
// Given a StringPiece |in| with an encoding specified by |in_encoding|, return
|
||||
// it as a CFStringRef. Returns NULL on failure.
|
||||
template <typename StringType>
|
||||
static CFStringRef StringPieceToCFStringWithEncodingsT(
|
||||
BasicStringPiece<StringType> in,
|
||||
CFStringEncoding in_encoding) {
|
||||
const auto in_length = in.length();
|
||||
if (in_length == 0)
|
||||
return CFSTR("");
|
||||
|
||||
return CFStringCreateWithBytes(
|
||||
kCFAllocatorDefault, reinterpret_cast<const UInt8*>(in.data()),
|
||||
in_length * sizeof(typename BasicStringPiece<StringType>::value_type),
|
||||
in_encoding, false);
|
||||
}
|
||||
|
||||
// Specify the byte ordering explicitly, otherwise CFString will be confused
|
||||
// when strings don't carry BOMs, as they typically won't.
|
||||
static const CFStringEncoding kNarrowStringEncoding = kCFStringEncodingUTF8;
|
||||
#ifdef __BIG_ENDIAN__
|
||||
static const CFStringEncoding kMediumStringEncoding = kCFStringEncodingUTF16BE;
|
||||
static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF32BE;
|
||||
#elif defined(__LITTLE_ENDIAN__)
|
||||
static const CFStringEncoding kMediumStringEncoding = kCFStringEncodingUTF16LE;
|
||||
static const CFStringEncoding kWideStringEncoding = kCFStringEncodingUTF32LE;
|
||||
#endif // __LITTLE_ENDIAN__
|
||||
|
||||
} // namespace
|
||||
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
std::string SysWideToUTF8(const std::wstring& wide) {
|
||||
return STLStringToSTLStringWithEncodingsT<std::wstring, std::string>(
|
||||
wide, kWideStringEncoding, kNarrowStringEncoding);
|
||||
}
|
||||
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
std::wstring SysUTF8ToWide(StringPiece utf8) {
|
||||
return STLStringToSTLStringWithEncodingsT<StringPiece, std::wstring>(
|
||||
utf8, kNarrowStringEncoding, kWideStringEncoding);
|
||||
}
|
||||
|
||||
std::string SysWideToNativeMB(const std::wstring& wide) {
|
||||
return SysWideToUTF8(wide);
|
||||
}
|
||||
|
||||
std::wstring SysNativeMBToWide(StringPiece native_mb) {
|
||||
return SysUTF8ToWide(native_mb);
|
||||
}
|
||||
|
||||
CFStringRef SysUTF8ToCFStringRef(StringPiece utf8) {
|
||||
return StringPieceToCFStringWithEncodingsT(utf8, kNarrowStringEncoding);
|
||||
}
|
||||
|
||||
CFStringRef SysUTF16ToCFStringRef(StringPiece16 utf16) {
|
||||
return StringPieceToCFStringWithEncodingsT(utf16, kMediumStringEncoding);
|
||||
}
|
||||
|
||||
NSString* SysUTF8ToNSString(StringPiece utf8) {
|
||||
return [mac::CFToNSCast(SysUTF8ToCFStringRef(utf8)) autorelease];
|
||||
}
|
||||
|
||||
NSString* SysUTF16ToNSString(StringPiece16 utf16) {
|
||||
return [mac::CFToNSCast(SysUTF16ToCFStringRef(utf16)) autorelease];
|
||||
}
|
||||
|
||||
std::string SysCFStringRefToUTF8(CFStringRef ref) {
|
||||
return CFStringToSTLStringWithEncodingT<std::string>(ref,
|
||||
kNarrowStringEncoding);
|
||||
}
|
||||
|
||||
string16 SysCFStringRefToUTF16(CFStringRef ref) {
|
||||
return CFStringToSTLStringWithEncodingT<string16>(ref,
|
||||
kMediumStringEncoding);
|
||||
}
|
||||
|
||||
std::string SysNSStringToUTF8(NSString* nsstring) {
|
||||
if (!nsstring)
|
||||
return std::string();
|
||||
return SysCFStringRefToUTF8(reinterpret_cast<CFStringRef>(nsstring));
|
||||
}
|
||||
|
||||
string16 SysNSStringToUTF16(NSString* nsstring) {
|
||||
if (!nsstring)
|
||||
return string16();
|
||||
return SysCFStringRefToUTF16(reinterpret_cast<CFStringRef>(nsstring));
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,162 @@
|
|||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/sys_string_conversions.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
std::string SysWideToUTF8(const std::wstring& wide) {
|
||||
// In theory this should be using the system-provided conversion rather
|
||||
// than our ICU, but this will do for now.
|
||||
return WideToUTF8(wide);
|
||||
}
|
||||
std::wstring SysUTF8ToWide(StringPiece utf8) {
|
||||
// In theory this should be using the system-provided conversion rather
|
||||
// than our ICU, but this will do for now.
|
||||
std::wstring out;
|
||||
UTF8ToWide(utf8.data(), utf8.size(), &out);
|
||||
return out;
|
||||
}
|
||||
|
||||
#if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
|
||||
// TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
|
||||
// support and a better understanding of what calls these routines.
|
||||
|
||||
std::string SysWideToNativeMB(const std::wstring& wide) {
|
||||
return WideToUTF8(wide);
|
||||
}
|
||||
|
||||
std::wstring SysNativeMBToWide(StringPiece native_mb) {
|
||||
return SysUTF8ToWide(native_mb);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
std::string SysWideToNativeMB(const std::wstring& wide) {
|
||||
mbstate_t ps;
|
||||
|
||||
// Calculate the number of multi-byte characters. We walk through the string
|
||||
// without writing the output, counting the number of multi-byte characters.
|
||||
size_t num_out_chars = 0;
|
||||
memset(&ps, 0, sizeof(ps));
|
||||
for (auto src : wide) {
|
||||
// Use a temp buffer since calling wcrtomb with an output of NULL does not
|
||||
// calculate the output length.
|
||||
char buf[16];
|
||||
// Skip NULLs to avoid wcrtomb's special handling of them.
|
||||
size_t res = src ? wcrtomb(buf, src, &ps) : 0;
|
||||
switch (res) {
|
||||
// Handle any errors and return an empty string.
|
||||
case static_cast<size_t>(-1):
|
||||
return std::string();
|
||||
break;
|
||||
case 0:
|
||||
// We hit an embedded null byte, keep going.
|
||||
++num_out_chars;
|
||||
break;
|
||||
default:
|
||||
num_out_chars += res;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_out_chars == 0)
|
||||
return std::string();
|
||||
|
||||
std::string out;
|
||||
out.resize(num_out_chars);
|
||||
|
||||
// We walk the input string again, with |i| tracking the index of the
|
||||
// wide input, and |j| tracking the multi-byte output.
|
||||
memset(&ps, 0, sizeof(ps));
|
||||
for (size_t i = 0, j = 0; i < wide.size(); ++i) {
|
||||
const wchar_t src = wide[i];
|
||||
// We don't want wcrtomb to do its funkiness for embedded NULLs.
|
||||
size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
|
||||
switch (res) {
|
||||
// Handle any errors and return an empty string.
|
||||
case static_cast<size_t>(-1):
|
||||
return std::string();
|
||||
break;
|
||||
case 0:
|
||||
// We hit an embedded null byte, keep going.
|
||||
++j; // Output is already zeroed.
|
||||
break;
|
||||
default:
|
||||
j += res;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::wstring SysNativeMBToWide(StringPiece native_mb) {
|
||||
mbstate_t ps;
|
||||
|
||||
// Calculate the number of wide characters. We walk through the string
|
||||
// without writing the output, counting the number of wide characters.
|
||||
size_t num_out_chars = 0;
|
||||
memset(&ps, 0, sizeof(ps));
|
||||
for (size_t i = 0; i < native_mb.size(); ) {
|
||||
const char* src = native_mb.data() + i;
|
||||
size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps);
|
||||
switch (res) {
|
||||
// Handle any errors and return an empty string.
|
||||
case static_cast<size_t>(-2):
|
||||
case static_cast<size_t>(-1):
|
||||
return std::wstring();
|
||||
break;
|
||||
case 0:
|
||||
// We hit an embedded null byte, keep going.
|
||||
i += 1;
|
||||
FALLTHROUGH;
|
||||
default:
|
||||
i += res;
|
||||
++num_out_chars;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_out_chars == 0)
|
||||
return std::wstring();
|
||||
|
||||
std::wstring out;
|
||||
out.resize(num_out_chars);
|
||||
|
||||
memset(&ps, 0, sizeof(ps)); // Clear the shift state.
|
||||
// We walk the input string again, with |i| tracking the index of the
|
||||
// multi-byte input, and |j| tracking the wide output.
|
||||
for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
|
||||
const char* src = native_mb.data() + i;
|
||||
wchar_t* dst = &out[j];
|
||||
size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
|
||||
switch (res) {
|
||||
// Handle any errors and return an empty string.
|
||||
case static_cast<size_t>(-2):
|
||||
case static_cast<size_t>(-1):
|
||||
return std::wstring();
|
||||
break;
|
||||
case 0:
|
||||
i += 1; // Skip null byte.
|
||||
break;
|
||||
default:
|
||||
i += res;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID)
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/sys_string_conversions.h"
|
||||
|
||||
#include <windows.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
std::string SysWideToUTF8(const std::wstring& wide) {
|
||||
return SysWideToMultiByte(wide, CP_UTF8);
|
||||
}
|
||||
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
std::wstring SysUTF8ToWide(StringPiece utf8) {
|
||||
return SysMultiByteToWide(utf8, CP_UTF8);
|
||||
}
|
||||
|
||||
std::string SysWideToNativeMB(const std::wstring& wide) {
|
||||
return SysWideToMultiByte(wide, CP_ACP);
|
||||
}
|
||||
|
||||
std::wstring SysNativeMBToWide(StringPiece native_mb) {
|
||||
return SysMultiByteToWide(native_mb, CP_ACP);
|
||||
}
|
||||
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
std::wstring SysMultiByteToWide(StringPiece mb, uint32_t code_page) {
|
||||
if (mb.empty())
|
||||
return std::wstring();
|
||||
|
||||
int mb_length = static_cast<int>(mb.length());
|
||||
// Compute the length of the buffer.
|
||||
int charcount = MultiByteToWideChar(code_page, 0,
|
||||
mb.data(), mb_length, NULL, 0);
|
||||
if (charcount == 0)
|
||||
return std::wstring();
|
||||
|
||||
std::wstring wide;
|
||||
wide.resize(charcount);
|
||||
MultiByteToWideChar(code_page, 0, mb.data(), mb_length, &wide[0], charcount);
|
||||
|
||||
return wide;
|
||||
}
|
||||
|
||||
// Do not assert in this function since it is used by the asssertion code!
|
||||
std::string SysWideToMultiByte(const std::wstring& wide, uint32_t code_page) {
|
||||
int wide_length = static_cast<int>(wide.length());
|
||||
if (wide_length == 0)
|
||||
return std::string();
|
||||
|
||||
// Compute the length of the buffer we'll need.
|
||||
int charcount = WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
|
||||
NULL, 0, NULL, NULL);
|
||||
if (charcount == 0)
|
||||
return std::string();
|
||||
|
||||
std::string mb;
|
||||
mb.resize(charcount);
|
||||
WideCharToMultiByte(code_page, 0, wide.data(), wide_length,
|
||||
&mb[0], charcount, NULL, NULL);
|
||||
|
||||
return mb;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,264 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/utf_offset_string_conversions.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "base/strings/utf_string_conversion_utils.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
OffsetAdjuster::Adjustment::Adjustment(size_t original_offset,
|
||||
size_t original_length,
|
||||
size_t output_length)
|
||||
: original_offset(original_offset),
|
||||
original_length(original_length),
|
||||
output_length(output_length) {
|
||||
}
|
||||
|
||||
// static
|
||||
void OffsetAdjuster::AdjustOffsets(const Adjustments& adjustments,
|
||||
std::vector<size_t>* offsets_for_adjustment,
|
||||
size_t limit) {
|
||||
DCHECK(offsets_for_adjustment);
|
||||
for (auto& i : *offsets_for_adjustment)
|
||||
AdjustOffset(adjustments, &i, limit);
|
||||
}
|
||||
|
||||
// static
|
||||
void OffsetAdjuster::AdjustOffset(const Adjustments& adjustments,
|
||||
size_t* offset,
|
||||
size_t limit) {
|
||||
DCHECK(offset);
|
||||
if (*offset == string16::npos)
|
||||
return;
|
||||
int adjustment = 0;
|
||||
for (const auto& i : adjustments) {
|
||||
if (*offset <= i.original_offset)
|
||||
break;
|
||||
if (*offset < (i.original_offset + i.original_length)) {
|
||||
*offset = string16::npos;
|
||||
return;
|
||||
}
|
||||
adjustment += static_cast<int>(i.original_length - i.output_length);
|
||||
}
|
||||
*offset -= adjustment;
|
||||
|
||||
if (*offset > limit)
|
||||
*offset = string16::npos;
|
||||
}
|
||||
|
||||
// static
|
||||
void OffsetAdjuster::UnadjustOffsets(
|
||||
const Adjustments& adjustments,
|
||||
std::vector<size_t>* offsets_for_unadjustment) {
|
||||
if (!offsets_for_unadjustment || adjustments.empty())
|
||||
return;
|
||||
for (auto& i : *offsets_for_unadjustment)
|
||||
UnadjustOffset(adjustments, &i);
|
||||
}
|
||||
|
||||
// static
|
||||
void OffsetAdjuster::UnadjustOffset(const Adjustments& adjustments,
|
||||
size_t* offset) {
|
||||
if (*offset == string16::npos)
|
||||
return;
|
||||
int adjustment = 0;
|
||||
for (const auto& i : adjustments) {
|
||||
if (*offset + adjustment <= i.original_offset)
|
||||
break;
|
||||
adjustment += static_cast<int>(i.original_length - i.output_length);
|
||||
if ((*offset + adjustment) < (i.original_offset + i.original_length)) {
|
||||
*offset = string16::npos;
|
||||
return;
|
||||
}
|
||||
}
|
||||
*offset += adjustment;
|
||||
}
|
||||
|
||||
// static
|
||||
void OffsetAdjuster::MergeSequentialAdjustments(
|
||||
const Adjustments& first_adjustments,
|
||||
Adjustments* adjustments_on_adjusted_string) {
|
||||
auto adjusted_iter = adjustments_on_adjusted_string->begin();
|
||||
auto first_iter = first_adjustments.begin();
|
||||
// Simultaneously iterate over all |adjustments_on_adjusted_string| and
|
||||
// |first_adjustments|, pushing adjustments at the end of
|
||||
// |adjustments_builder| as we go. |shift| keeps track of the current number
|
||||
// of characters collapsed by |first_adjustments| up to this point.
|
||||
// |currently_collapsing| keeps track of the number of characters collapsed by
|
||||
// |first_adjustments| into the current |adjusted_iter|'s length. These are
|
||||
// characters that will change |shift| as soon as we're done processing the
|
||||
// current |adjusted_iter|; they are not yet reflected in |shift|.
|
||||
size_t shift = 0;
|
||||
size_t currently_collapsing = 0;
|
||||
// While we *could* update |adjustments_on_adjusted_string| in place by
|
||||
// inserting new adjustments into the middle, we would be repeatedly calling
|
||||
// |std::vector::insert|. That would cost O(n) time per insert, relative to
|
||||
// distance from end of the string. By instead allocating
|
||||
// |adjustments_builder| and calling |std::vector::push_back|, we only pay
|
||||
// amortized constant time per push. We are trading space for time.
|
||||
Adjustments adjustments_builder;
|
||||
while (adjusted_iter != adjustments_on_adjusted_string->end()) {
|
||||
if ((first_iter == first_adjustments.end()) ||
|
||||
((adjusted_iter->original_offset + shift +
|
||||
adjusted_iter->original_length) <= first_iter->original_offset)) {
|
||||
// Entire |adjusted_iter| (accounting for its shift and including its
|
||||
// whole original length) comes before |first_iter|.
|
||||
//
|
||||
// Correct the offset at |adjusted_iter| and move onto the next
|
||||
// adjustment that needs revising.
|
||||
adjusted_iter->original_offset += shift;
|
||||
shift += currently_collapsing;
|
||||
currently_collapsing = 0;
|
||||
adjustments_builder.push_back(*adjusted_iter);
|
||||
++adjusted_iter;
|
||||
} else if ((adjusted_iter->original_offset + shift) >
|
||||
first_iter->original_offset) {
|
||||
// |first_iter| comes before the |adjusted_iter| (as adjusted by |shift|).
|
||||
|
||||
// It's not possible for the adjustments to overlap. (It shouldn't
|
||||
// be possible that we have an |adjusted_iter->original_offset| that,
|
||||
// when adjusted by the computed |shift|, is in the middle of
|
||||
// |first_iter|'s output's length. After all, that would mean the
|
||||
// current adjustment_on_adjusted_string somehow points to an offset
|
||||
// that was supposed to have been eliminated by the first set of
|
||||
// adjustments.)
|
||||
DCHECK_LE(first_iter->original_offset + first_iter->output_length,
|
||||
adjusted_iter->original_offset + shift);
|
||||
|
||||
// Add the |first_iter| to the full set of adjustments.
|
||||
shift += first_iter->original_length - first_iter->output_length;
|
||||
adjustments_builder.push_back(*first_iter);
|
||||
++first_iter;
|
||||
} else {
|
||||
// The first adjustment adjusted something that then got further adjusted
|
||||
// by the second set of adjustments. In other words, |first_iter| points
|
||||
// to something in the range covered by |adjusted_iter|'s length (after
|
||||
// accounting for |shift|). Precisely,
|
||||
// adjusted_iter->original_offset + shift
|
||||
// <=
|
||||
// first_iter->original_offset
|
||||
// <=
|
||||
// adjusted_iter->original_offset + shift +
|
||||
// adjusted_iter->original_length
|
||||
|
||||
// Modify the current |adjusted_iter| to include whatever collapsing
|
||||
// happened in |first_iter|, then advance to the next |first_adjustments|
|
||||
// because we dealt with the current one.
|
||||
const int collapse = static_cast<int>(first_iter->original_length) -
|
||||
static_cast<int>(first_iter->output_length);
|
||||
// This function does not know how to deal with a string that expands and
|
||||
// then gets modified, only strings that collapse and then get modified.
|
||||
DCHECK_GT(collapse, 0);
|
||||
adjusted_iter->original_length += collapse;
|
||||
currently_collapsing += collapse;
|
||||
++first_iter;
|
||||
}
|
||||
}
|
||||
DCHECK_EQ(0u, currently_collapsing);
|
||||
if (first_iter != first_adjustments.end()) {
|
||||
// Only first adjustments are left. These do not need to be modified.
|
||||
// (Their offsets are already correct with respect to the original string.)
|
||||
// Append them all.
|
||||
DCHECK(adjusted_iter == adjustments_on_adjusted_string->end());
|
||||
adjustments_builder.insert(adjustments_builder.end(), first_iter,
|
||||
first_adjustments.end());
|
||||
}
|
||||
*adjustments_on_adjusted_string = std::move(adjustments_builder);
|
||||
}
|
||||
|
||||
// Converts the given source Unicode character type to the given destination
|
||||
// Unicode character type as a STL string. The given input buffer and size
|
||||
// determine the source, and the given output STL string will be replaced by
|
||||
// the result. If non-NULL, |adjustments| is set to reflect the all the
|
||||
// alterations to the string that are not one-character-to-one-character.
|
||||
// It will always be sorted by increasing offset.
|
||||
template<typename SrcChar, typename DestStdString>
|
||||
bool ConvertUnicode(const SrcChar* src,
|
||||
size_t src_len,
|
||||
DestStdString* output,
|
||||
OffsetAdjuster::Adjustments* adjustments) {
|
||||
if (adjustments)
|
||||
adjustments->clear();
|
||||
// ICU requires 32-bit numbers.
|
||||
bool success = true;
|
||||
int32_t src_len32 = static_cast<int32_t>(src_len);
|
||||
for (int32_t i = 0; i < src_len32; i++) {
|
||||
uint32_t code_point;
|
||||
size_t original_i = i;
|
||||
size_t chars_written = 0;
|
||||
if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
|
||||
chars_written = WriteUnicodeCharacter(code_point, output);
|
||||
} else {
|
||||
chars_written = WriteUnicodeCharacter(0xFFFD, output);
|
||||
success = false;
|
||||
}
|
||||
|
||||
// Only bother writing an adjustment if this modification changed the
|
||||
// length of this character.
|
||||
// NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
|
||||
// character read, not after it (so that incrementing it in the loop
|
||||
// increment will place it at the right location), so we need to account
|
||||
// for that in determining the amount that was read.
|
||||
if (adjustments && ((i - original_i + 1) != chars_written)) {
|
||||
adjustments->push_back(OffsetAdjuster::Adjustment(
|
||||
original_i, i - original_i + 1, chars_written));
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
bool UTF8ToUTF16WithAdjustments(
|
||||
const char* src,
|
||||
size_t src_len,
|
||||
string16* output,
|
||||
base::OffsetAdjuster::Adjustments* adjustments) {
|
||||
PrepareForUTF16Or32Output(src, src_len, output);
|
||||
return ConvertUnicode(src, src_len, output, adjustments);
|
||||
}
|
||||
|
||||
string16 UTF8ToUTF16WithAdjustments(
|
||||
const base::StringPiece& utf8,
|
||||
base::OffsetAdjuster::Adjustments* adjustments) {
|
||||
string16 result;
|
||||
UTF8ToUTF16WithAdjustments(utf8.data(), utf8.length(), &result, adjustments);
|
||||
return result;
|
||||
}
|
||||
|
||||
string16 UTF8ToUTF16AndAdjustOffsets(
|
||||
const base::StringPiece& utf8,
|
||||
std::vector<size_t>* offsets_for_adjustment) {
|
||||
for (size_t& offset : *offsets_for_adjustment) {
|
||||
if (offset > utf8.length())
|
||||
offset = string16::npos;
|
||||
}
|
||||
OffsetAdjuster::Adjustments adjustments;
|
||||
string16 result = UTF8ToUTF16WithAdjustments(utf8, &adjustments);
|
||||
OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string UTF16ToUTF8AndAdjustOffsets(
|
||||
const base::StringPiece16& utf16,
|
||||
std::vector<size_t>* offsets_for_adjustment) {
|
||||
for (size_t& offset : *offsets_for_adjustment) {
|
||||
if (offset > utf16.length())
|
||||
offset = string16::npos;
|
||||
}
|
||||
std::string result;
|
||||
PrepareForUTF8Output(utf16.data(), utf16.length(), &result);
|
||||
OffsetAdjuster::Adjustments adjustments;
|
||||
ConvertUnicode(utf16.data(), utf16.length(), &result, &adjustments);
|
||||
OffsetAdjuster::AdjustOffsets(adjustments, offsets_for_adjustment);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
|
||||
#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// A helper class and associated data structures to adjust offsets into a
|
||||
// string in response to various adjustments one might do to that string
|
||||
// (e.g., eliminating a range). For details on offsets, see the comments by
|
||||
// the AdjustOffsets() function below.
|
||||
class BASE_EXPORT OffsetAdjuster {
|
||||
public:
|
||||
struct BASE_EXPORT Adjustment {
|
||||
Adjustment(size_t original_offset,
|
||||
size_t original_length,
|
||||
size_t output_length);
|
||||
|
||||
size_t original_offset;
|
||||
size_t original_length;
|
||||
size_t output_length;
|
||||
};
|
||||
typedef std::vector<Adjustment> Adjustments;
|
||||
|
||||
// Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
|
||||
// recorded in |adjustments|. Adjusted offsets greater than |limit| will be
|
||||
// set to string16::npos.
|
||||
//
|
||||
// Offsets represents insertion/selection points between characters: if |src|
|
||||
// is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
|
||||
// end of the string. Valid input offsets range from 0 to |src_len|. On
|
||||
// exit, each offset will have been modified to point at the same logical
|
||||
// position in the output string. If an offset cannot be successfully
|
||||
// adjusted (e.g., because it points into the middle of a multibyte sequence),
|
||||
// it will be set to string16::npos.
|
||||
static void AdjustOffsets(const Adjustments& adjustments,
|
||||
std::vector<size_t>* offsets_for_adjustment,
|
||||
size_t limit = string16::npos);
|
||||
|
||||
// Adjusts the single |offset| to reflect the adjustments recorded in
|
||||
// |adjustments|.
|
||||
static void AdjustOffset(const Adjustments& adjustments,
|
||||
size_t* offset,
|
||||
size_t limit = string16::npos);
|
||||
|
||||
// Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
|
||||
// of the adjustments recorded in |adjustments|. In other words, the offsets
|
||||
// provided represent offsets into an adjusted string and the caller wants
|
||||
// to know the offsets they correspond to in the original string. If an
|
||||
// offset cannot be successfully unadjusted (e.g., because it points into
|
||||
// the middle of a multibyte sequence), it will be set to string16::npos.
|
||||
static void UnadjustOffsets(const Adjustments& adjustments,
|
||||
std::vector<size_t>* offsets_for_unadjustment);
|
||||
|
||||
// Adjusts the single |offset| to reflect the reverse of the adjustments
|
||||
// recorded in |adjustments|.
|
||||
static void UnadjustOffset(const Adjustments& adjustments,
|
||||
size_t* offset);
|
||||
|
||||
// Combines two sequential sets of adjustments, storing the combined revised
|
||||
// adjustments in |adjustments_on_adjusted_string|. That is, suppose a
|
||||
// string was altered in some way, with the alterations recorded as
|
||||
// adjustments in |first_adjustments|. Then suppose the resulting string is
|
||||
// further altered, with the alterations recorded as adjustments scored in
|
||||
// |adjustments_on_adjusted_string|, with the offsets recorded in these
|
||||
// adjustments being with respect to the intermediate string. This function
|
||||
// combines the two sets of adjustments into one, storing the result in
|
||||
// |adjustments_on_adjusted_string|, whose offsets are correct with respect
|
||||
// to the original string.
|
||||
//
|
||||
// Assumes both parameters are sorted by increasing offset.
|
||||
//
|
||||
// WARNING: Only supports |first_adjustments| that involve collapsing ranges
|
||||
// of text, not expanding ranges.
|
||||
static void MergeSequentialAdjustments(
|
||||
const Adjustments& first_adjustments,
|
||||
Adjustments* adjustments_on_adjusted_string);
|
||||
};
|
||||
|
||||
// Like the conversions in utf_string_conversions.h, but also fills in an
|
||||
// |adjustments| parameter that reflects the alterations done to the string.
|
||||
// It may be NULL.
|
||||
BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
|
||||
const char* src,
|
||||
size_t src_len,
|
||||
string16* output,
|
||||
base::OffsetAdjuster::Adjustments* adjustments);
|
||||
BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
|
||||
const base::StringPiece& utf8,
|
||||
base::OffsetAdjuster::Adjustments* adjustments) WARN_UNUSED_RESULT;
|
||||
// As above, but instead internally examines the adjustments and applies them
|
||||
// to |offsets_for_adjustment|. Input offsets greater than the length of the
|
||||
// input string will be set to string16::npos. See comments by AdjustOffsets().
|
||||
BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
|
||||
const base::StringPiece& utf8,
|
||||
std::vector<size_t>* offsets_for_adjustment);
|
||||
BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
|
||||
const base::StringPiece16& utf16,
|
||||
std::vector<size_t>* offsets_for_adjustment);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/utf_string_conversion_utils.h"
|
||||
|
||||
#include "base/third_party/icu/icu_utf.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// ReadUnicodeCharacter --------------------------------------------------------
|
||||
|
||||
bool ReadUnicodeCharacter(const char* src,
|
||||
int32_t src_len,
|
||||
int32_t* char_index,
|
||||
uint32_t* code_point_out) {
|
||||
// U8_NEXT expects to be able to use -1 to signal an error, so we must
|
||||
// use a signed type for code_point. But this function returns false
|
||||
// on error anyway, so code_point_out is unsigned.
|
||||
int32_t code_point;
|
||||
CBU8_NEXT(src, *char_index, src_len, code_point);
|
||||
*code_point_out = static_cast<uint32_t>(code_point);
|
||||
|
||||
// The ICU macro above moves to the next char, we want to point to the last
|
||||
// char consumed.
|
||||
(*char_index)--;
|
||||
|
||||
// Validate the decoded value.
|
||||
return IsValidCodepoint(code_point);
|
||||
}
|
||||
|
||||
bool ReadUnicodeCharacter(const char16* src,
|
||||
int32_t src_len,
|
||||
int32_t* char_index,
|
||||
uint32_t* code_point) {
|
||||
if (CBU16_IS_SURROGATE(src[*char_index])) {
|
||||
if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
|
||||
*char_index + 1 >= src_len ||
|
||||
!CBU16_IS_TRAIL(src[*char_index + 1])) {
|
||||
// Invalid surrogate pair.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Valid surrogate pair.
|
||||
*code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index],
|
||||
src[*char_index + 1]);
|
||||
(*char_index)++;
|
||||
} else {
|
||||
// Not a surrogate, just one 16-bit word.
|
||||
*code_point = src[*char_index];
|
||||
}
|
||||
|
||||
return IsValidCodepoint(*code_point);
|
||||
}
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF32)
|
||||
bool ReadUnicodeCharacter(const wchar_t* src,
|
||||
int32_t src_len,
|
||||
int32_t* char_index,
|
||||
uint32_t* code_point) {
|
||||
// Conversion is easy since the source is 32-bit.
|
||||
*code_point = src[*char_index];
|
||||
|
||||
// Validate the value.
|
||||
return IsValidCodepoint(*code_point);
|
||||
}
|
||||
#endif // defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
// WriteUnicodeCharacter -------------------------------------------------------
|
||||
|
||||
size_t WriteUnicodeCharacter(uint32_t code_point, std::string* output) {
|
||||
if (code_point <= 0x7f) {
|
||||
// Fast path the common case of one byte.
|
||||
output->push_back(static_cast<char>(code_point));
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
// CBU8_APPEND_UNSAFE can append up to 4 bytes.
|
||||
size_t char_offset = output->length();
|
||||
size_t original_char_offset = char_offset;
|
||||
output->resize(char_offset + CBU8_MAX_LENGTH);
|
||||
|
||||
CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
|
||||
|
||||
// CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
|
||||
// it will represent the new length of the string.
|
||||
output->resize(char_offset);
|
||||
return char_offset - original_char_offset;
|
||||
}
|
||||
|
||||
size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) {
|
||||
if (CBU16_LENGTH(code_point) == 1) {
|
||||
// Thie code point is in the Basic Multilingual Plane (BMP).
|
||||
output->push_back(static_cast<char16>(code_point));
|
||||
return 1;
|
||||
}
|
||||
// Non-BMP characters use a double-character encoding.
|
||||
size_t char_offset = output->length();
|
||||
output->resize(char_offset + CBU16_MAX_LENGTH);
|
||||
CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
|
||||
return CBU16_MAX_LENGTH;
|
||||
}
|
||||
|
||||
// Generalized Unicode converter -----------------------------------------------
|
||||
|
||||
template<typename CHAR>
|
||||
void PrepareForUTF8Output(const CHAR* src,
|
||||
size_t src_len,
|
||||
std::string* output) {
|
||||
output->clear();
|
||||
if (src_len == 0)
|
||||
return;
|
||||
if (src[0] < 0x80) {
|
||||
// Assume that the entire input will be ASCII.
|
||||
output->reserve(src_len);
|
||||
} else {
|
||||
// Assume that the entire input is non-ASCII and will have 3 bytes per char.
|
||||
output->reserve(src_len * 3);
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate versions we know callers will need.
|
||||
#if !defined(OS_WIN)
|
||||
// wchar_t and char16 are the same thing on Windows.
|
||||
template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*);
|
||||
#endif
|
||||
template void PrepareForUTF8Output(const char16*, size_t, std::string*);
|
||||
|
||||
template<typename STRING>
|
||||
void PrepareForUTF16Or32Output(const char* src,
|
||||
size_t src_len,
|
||||
STRING* output) {
|
||||
output->clear();
|
||||
if (src_len == 0)
|
||||
return;
|
||||
if (static_cast<unsigned char>(src[0]) < 0x80) {
|
||||
// Assume the input is all ASCII, which means 1:1 correspondence.
|
||||
output->reserve(src_len);
|
||||
} else {
|
||||
// Otherwise assume that the UTF-8 sequences will have 2 bytes for each
|
||||
// character.
|
||||
output->reserve(src_len / 2);
|
||||
}
|
||||
}
|
||||
|
||||
// Instantiate versions we know callers will need.
|
||||
#if !defined(OS_WIN)
|
||||
// std::wstring and string16 are the same thing on Windows.
|
||||
template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*);
|
||||
#endif
|
||||
template void PrepareForUTF16Or32Output(const char*, size_t, string16*);
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
|
||||
#define BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
|
||||
|
||||
// Low-level UTF handling functions. Most code will want to use the functions
|
||||
// in utf_string_conversions.h
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
inline bool IsValidCodepoint(uint32_t code_point) {
|
||||
// Excludes code points that are not Unicode scalar values, i.e.
|
||||
// surrogate code points ([0xD800, 0xDFFF]). Additionally, excludes
|
||||
// code points larger than 0x10FFFF (the highest codepoint allowed).
|
||||
// Non-characters and unassigned code points are allowed.
|
||||
// https://unicode.org/glossary/#unicode_scalar_value
|
||||
return code_point < 0xD800u ||
|
||||
(code_point >= 0xE000u && code_point <= 0x10FFFFu);
|
||||
}
|
||||
|
||||
inline bool IsValidCharacter(uint32_t code_point) {
|
||||
// Excludes non-characters (U+FDD0..U+FDEF, and all code points
|
||||
// ending in 0xFFFE or 0xFFFF) from the set of valid code points.
|
||||
// https://unicode.org/faq/private_use.html#nonchar1
|
||||
return code_point < 0xD800u || (code_point >= 0xE000u &&
|
||||
code_point < 0xFDD0u) || (code_point > 0xFDEFu &&
|
||||
code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu);
|
||||
}
|
||||
|
||||
// ReadUnicodeCharacter --------------------------------------------------------
|
||||
|
||||
// Reads a UTF-8 stream, placing the next code point into the given output
|
||||
// |*code_point|. |src| represents the entire string to read, and |*char_index|
|
||||
// is the character offset within the string to start reading at. |*char_index|
|
||||
// will be updated to index the last character read, such that incrementing it
|
||||
// (as in a for loop) will take the reader to the next character.
|
||||
//
|
||||
// Returns true on success. On false, |*code_point| will be invalid.
|
||||
BASE_EXPORT bool ReadUnicodeCharacter(const char* src,
|
||||
int32_t src_len,
|
||||
int32_t* char_index,
|
||||
uint32_t* code_point_out);
|
||||
|
||||
// Reads a UTF-16 character. The usage is the same as the 8-bit version above.
|
||||
BASE_EXPORT bool ReadUnicodeCharacter(const char16* src,
|
||||
int32_t src_len,
|
||||
int32_t* char_index,
|
||||
uint32_t* code_point);
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF32)
|
||||
// Reads UTF-32 character. The usage is the same as the 8-bit version above.
|
||||
BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src,
|
||||
int32_t src_len,
|
||||
int32_t* char_index,
|
||||
uint32_t* code_point);
|
||||
#endif // defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
// WriteUnicodeCharacter -------------------------------------------------------
|
||||
|
||||
// Appends a UTF-8 character to the given 8-bit string. Returns the number of
|
||||
// bytes written.
|
||||
BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point,
|
||||
std::string* output);
|
||||
|
||||
// Appends the given code point as a UTF-16 character to the given 16-bit
|
||||
// string. Returns the number of 16-bit values written.
|
||||
BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, string16* output);
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF32)
|
||||
// Appends the given UTF-32 character to the given 32-bit string. Returns the
|
||||
// number of 32-bit values written.
|
||||
inline size_t WriteUnicodeCharacter(uint32_t code_point, std::wstring* output) {
|
||||
// This is the easy case, just append the character.
|
||||
output->push_back(code_point);
|
||||
return 1;
|
||||
}
|
||||
#endif // defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
// Generalized Unicode converter -----------------------------------------------
|
||||
|
||||
// Guesses the length of the output in UTF-8 in bytes, clears that output
|
||||
// string, and reserves that amount of space. We assume that the input
|
||||
// character types are unsigned, which will be true for UTF-16 and -32 on our
|
||||
// systems.
|
||||
template<typename CHAR>
|
||||
void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output);
|
||||
|
||||
// Prepares an output buffer (containing either UTF-16 or -32 data) given some
|
||||
// UTF-8 input that will be converted to it. See PrepareForUTF8Output().
|
||||
template<typename STRING>
|
||||
void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output);
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
|
||||
|
|
@ -0,0 +1,342 @@
|
|||
// Copyright (c) 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "base/strings/string_piece.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/utf_string_conversion_utils.h"
|
||||
#include "base/third_party/icu/icu_utf.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int32_t kErrorCodePoint = 0xFFFD;
|
||||
|
||||
// Size coefficient ----------------------------------------------------------
|
||||
// The maximum number of codeunits in the destination encoding corresponding to
|
||||
// one codeunit in the source encoding.
|
||||
|
||||
template <typename SrcChar, typename DestChar>
|
||||
struct SizeCoefficient {
|
||||
static_assert(sizeof(SrcChar) < sizeof(DestChar),
|
||||
"Default case: from a smaller encoding to the bigger one");
|
||||
|
||||
// ASCII symbols are encoded by one codeunit in all encodings.
|
||||
static constexpr int value = 1;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SizeCoefficient<char16, char> {
|
||||
// One UTF-16 codeunit corresponds to at most 3 codeunits in UTF-8.
|
||||
static constexpr int value = 3;
|
||||
};
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF32)
|
||||
template <>
|
||||
struct SizeCoefficient<wchar_t, char> {
|
||||
// UTF-8 uses at most 4 codeunits per character.
|
||||
static constexpr int value = 4;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SizeCoefficient<wchar_t, char16> {
|
||||
// UTF-16 uses at most 2 codeunits per character.
|
||||
static constexpr int value = 2;
|
||||
};
|
||||
#endif // defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
template <typename SrcChar, typename DestChar>
|
||||
constexpr int size_coefficient_v =
|
||||
SizeCoefficient<std::decay_t<SrcChar>, std::decay_t<DestChar>>::value;
|
||||
|
||||
// UnicodeAppendUnsafe --------------------------------------------------------
|
||||
// Function overloads that write code_point to the output string. Output string
|
||||
// has to have enough space for the codepoint.
|
||||
|
||||
// Convenience typedef that checks whether the passed in type is integral (i.e.
|
||||
// bool, char, int or their extended versions) and is of the correct size.
|
||||
template <typename Char, size_t N>
|
||||
using EnableIfBitsAre = std::enable_if_t<std::is_integral<Char>::value &&
|
||||
CHAR_BIT * sizeof(Char) == N,
|
||||
bool>;
|
||||
|
||||
template <typename Char, EnableIfBitsAre<Char, 8> = true>
|
||||
void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) {
|
||||
CBU8_APPEND_UNSAFE(out, *size, code_point);
|
||||
}
|
||||
|
||||
template <typename Char, EnableIfBitsAre<Char, 16> = true>
|
||||
void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) {
|
||||
CBU16_APPEND_UNSAFE(out, *size, code_point);
|
||||
}
|
||||
|
||||
template <typename Char, EnableIfBitsAre<Char, 32> = true>
|
||||
void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) {
|
||||
out[(*size)++] = code_point;
|
||||
}
|
||||
|
||||
// DoUTFConversion ------------------------------------------------------------
|
||||
// Main driver of UTFConversion specialized for different Src encodings.
|
||||
// dest has to have enough room for the converted text.
|
||||
|
||||
template <typename DestChar>
|
||||
bool DoUTFConversion(const char* src,
|
||||
int32_t src_len,
|
||||
DestChar* dest,
|
||||
int32_t* dest_len) {
|
||||
bool success = true;
|
||||
|
||||
for (int32_t i = 0; i < src_len;) {
|
||||
int32_t code_point;
|
||||
CBU8_NEXT(src, i, src_len, code_point);
|
||||
|
||||
if (!IsValidCodepoint(code_point)) {
|
||||
success = false;
|
||||
code_point = kErrorCodePoint;
|
||||
}
|
||||
|
||||
UnicodeAppendUnsafe(dest, dest_len, code_point);
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
template <typename DestChar>
|
||||
bool DoUTFConversion(const char16* src,
|
||||
int32_t src_len,
|
||||
DestChar* dest,
|
||||
int32_t* dest_len) {
|
||||
bool success = true;
|
||||
|
||||
auto ConvertSingleChar = [&success](char16 in) -> int32_t {
|
||||
if (!CBU16_IS_SINGLE(in) || !IsValidCodepoint(in)) {
|
||||
success = false;
|
||||
return kErrorCodePoint;
|
||||
}
|
||||
return in;
|
||||
};
|
||||
|
||||
int32_t i = 0;
|
||||
|
||||
// Always have another symbol in order to avoid checking boundaries in the
|
||||
// middle of the surrogate pair.
|
||||
while (i < src_len - 1) {
|
||||
int32_t code_point;
|
||||
|
||||
if (CBU16_IS_LEAD(src[i]) && CBU16_IS_TRAIL(src[i + 1])) {
|
||||
code_point = CBU16_GET_SUPPLEMENTARY(src[i], src[i + 1]);
|
||||
if (!IsValidCodepoint(code_point)) {
|
||||
code_point = kErrorCodePoint;
|
||||
success = false;
|
||||
}
|
||||
i += 2;
|
||||
} else {
|
||||
code_point = ConvertSingleChar(src[i]);
|
||||
++i;
|
||||
}
|
||||
|
||||
UnicodeAppendUnsafe(dest, dest_len, code_point);
|
||||
}
|
||||
|
||||
if (i < src_len)
|
||||
UnicodeAppendUnsafe(dest, dest_len, ConvertSingleChar(src[i]));
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
template <typename DestChar>
|
||||
bool DoUTFConversion(const wchar_t* src,
|
||||
int32_t src_len,
|
||||
DestChar* dest,
|
||||
int32_t* dest_len) {
|
||||
bool success = true;
|
||||
|
||||
for (int32_t i = 0; i < src_len; ++i) {
|
||||
int32_t code_point = src[i];
|
||||
|
||||
if (!IsValidCodepoint(code_point)) {
|
||||
success = false;
|
||||
code_point = kErrorCodePoint;
|
||||
}
|
||||
|
||||
UnicodeAppendUnsafe(dest, dest_len, code_point);
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
#endif // defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
// UTFConversion --------------------------------------------------------------
|
||||
// Function template for generating all UTF conversions.
|
||||
|
||||
template <typename InputString, typename DestString>
|
||||
bool UTFConversion(const InputString& src_str, DestString* dest_str) {
|
||||
if (IsStringASCII(src_str)) {
|
||||
dest_str->assign(src_str.begin(), src_str.end());
|
||||
return true;
|
||||
}
|
||||
|
||||
dest_str->resize(src_str.length() *
|
||||
size_coefficient_v<typename InputString::value_type,
|
||||
typename DestString::value_type>);
|
||||
|
||||
// Empty string is ASCII => it OK to call operator[].
|
||||
auto* dest = &(*dest_str)[0];
|
||||
|
||||
// ICU requires 32 bit numbers.
|
||||
int32_t src_len32 = static_cast<int32_t>(src_str.length());
|
||||
int32_t dest_len32 = 0;
|
||||
|
||||
bool res = DoUTFConversion(src_str.data(), src_len32, dest, &dest_len32);
|
||||
|
||||
dest_str->resize(dest_len32);
|
||||
dest_str->shrink_to_fit();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// UTF16 <-> UTF8 --------------------------------------------------------------
|
||||
|
||||
bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
|
||||
return UTFConversion(StringPiece(src, src_len), output);
|
||||
}
|
||||
|
||||
string16 UTF8ToUTF16(StringPiece utf8) {
|
||||
string16 ret;
|
||||
// Ignore the success flag of this call, it will do the best it can for
|
||||
// invalid input, which is what we want here.
|
||||
UTF8ToUTF16(utf8.data(), utf8.size(), &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
|
||||
return UTFConversion(StringPiece16(src, src_len), output);
|
||||
}
|
||||
|
||||
std::string UTF16ToUTF8(StringPiece16 utf16) {
|
||||
std::string ret;
|
||||
// Ignore the success flag of this call, it will do the best it can for
|
||||
// invalid input, which is what we want here.
|
||||
UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// UTF-16 <-> Wide -------------------------------------------------------------
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF16)
|
||||
// When wide == UTF-16 the conversions are a NOP.
|
||||
|
||||
bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
|
||||
output->assign(src, src + src_len);
|
||||
return true;
|
||||
}
|
||||
|
||||
string16 WideToUTF16(WStringPiece wide) {
|
||||
return string16(wide.begin(), wide.end());
|
||||
}
|
||||
|
||||
bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
|
||||
output->assign(src, src + src_len);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::wstring UTF16ToWide(StringPiece16 utf16) {
|
||||
return std::wstring(utf16.begin(), utf16.end());
|
||||
}
|
||||
|
||||
#elif defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
|
||||
return UTFConversion(base::WStringPiece(src, src_len), output);
|
||||
}
|
||||
|
||||
string16 WideToUTF16(WStringPiece wide) {
|
||||
string16 ret;
|
||||
// Ignore the success flag of this call, it will do the best it can for
|
||||
// invalid input, which is what we want here.
|
||||
WideToUTF16(wide.data(), wide.length(), &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
|
||||
return UTFConversion(StringPiece16(src, src_len), output);
|
||||
}
|
||||
|
||||
std::wstring UTF16ToWide(StringPiece16 utf16) {
|
||||
std::wstring ret;
|
||||
// Ignore the success flag of this call, it will do the best it can for
|
||||
// invalid input, which is what we want here.
|
||||
UTF16ToWide(utf16.data(), utf16.length(), &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif // defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
// UTF-8 <-> Wide --------------------------------------------------------------
|
||||
|
||||
// UTF8ToWide is the same code, regardless of whether wide is 16 or 32 bits
|
||||
|
||||
bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
|
||||
return UTFConversion(StringPiece(src, src_len), output);
|
||||
}
|
||||
|
||||
std::wstring UTF8ToWide(StringPiece utf8) {
|
||||
std::wstring ret;
|
||||
// Ignore the success flag of this call, it will do the best it can for
|
||||
// invalid input, which is what we want here.
|
||||
UTF8ToWide(utf8.data(), utf8.length(), &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(WCHAR_T_IS_UTF16)
|
||||
// Easy case since we can use the "utf" versions we already wrote above.
|
||||
|
||||
bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
|
||||
return UTF16ToUTF8(as_u16cstr(src), src_len, output);
|
||||
}
|
||||
|
||||
std::string WideToUTF8(WStringPiece wide) {
|
||||
return UTF16ToUTF8(StringPiece16(as_u16cstr(wide), wide.size()));
|
||||
}
|
||||
|
||||
#elif defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
|
||||
return UTFConversion(WStringPiece(src, src_len), output);
|
||||
}
|
||||
|
||||
std::string WideToUTF8(WStringPiece wide) {
|
||||
std::string ret;
|
||||
// Ignore the success flag of this call, it will do the best it can for
|
||||
// invalid input, which is what we want here.
|
||||
WideToUTF8(wide.data(), wide.length(), &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif // defined(WCHAR_T_IS_UTF32)
|
||||
|
||||
string16 ASCIIToUTF16(StringPiece ascii) {
|
||||
DCHECK(IsStringASCII(ascii)) << ascii;
|
||||
return string16(ascii.begin(), ascii.end());
|
||||
}
|
||||
|
||||
std::string UTF16ToASCII(StringPiece16 utf16) {
|
||||
DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
|
||||
return std::string(utf16.begin(), utf16.end());
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
|
||||
#define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/strings/string16.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// These convert between UTF-8, -16, and -32 strings. They are potentially slow,
|
||||
// so avoid unnecessary conversions. The low-level versions return a boolean
|
||||
// indicating whether the conversion was 100% valid. In this case, it will still
|
||||
// do the best it can and put the result in the output buffer. The versions that
|
||||
// return strings ignore this error and just return the best conversion
|
||||
// possible.
|
||||
BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len,
|
||||
std::string* output);
|
||||
BASE_EXPORT std::string WideToUTF8(WStringPiece wide) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len,
|
||||
std::wstring* output);
|
||||
BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT;
|
||||
|
||||
BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len,
|
||||
string16* output);
|
||||
BASE_EXPORT string16 WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len,
|
||||
std::wstring* output);
|
||||
BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16) WARN_UNUSED_RESULT;
|
||||
|
||||
BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output);
|
||||
BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT;
|
||||
BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len,
|
||||
std::string* output);
|
||||
BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16) WARN_UNUSED_RESULT;
|
||||
|
||||
// This converts an ASCII string, typically a hardcoded constant, to a UTF16
|
||||
// string.
|
||||
BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT;
|
||||
|
||||
// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
|
||||
// beforehand.
|
||||
BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT;
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/macros.h"
|
||||
#include "base/strings/string_util.h"
|
||||
#include "base/strings/utf_string_conversions.h"
|
||||
|
||||
std::string output_std_string;
|
||||
std::wstring output_std_wstring;
|
||||
base::string16 output_string16;
|
||||
|
||||
// Entry point for LibFuzzer.
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
base::StringPiece string_piece_input(reinterpret_cast<const char*>(data),
|
||||
size);
|
||||
|
||||
ignore_result(base::UTF8ToWide(string_piece_input));
|
||||
base::UTF8ToWide(reinterpret_cast<const char*>(data), size,
|
||||
&output_std_wstring);
|
||||
ignore_result(base::UTF8ToUTF16(string_piece_input));
|
||||
base::UTF8ToUTF16(reinterpret_cast<const char*>(data), size,
|
||||
&output_string16);
|
||||
|
||||
// Test for char16.
|
||||
if (size % 2 == 0) {
|
||||
base::StringPiece16 string_piece_input16(
|
||||
reinterpret_cast<const base::char16*>(data), size / 2);
|
||||
ignore_result(base::UTF16ToWide(output_string16));
|
||||
base::UTF16ToWide(reinterpret_cast<const base::char16*>(data), size / 2,
|
||||
&output_std_wstring);
|
||||
ignore_result(base::UTF16ToUTF8(string_piece_input16));
|
||||
base::UTF16ToUTF8(reinterpret_cast<const base::char16*>(data), size / 2,
|
||||
&output_std_string);
|
||||
}
|
||||
|
||||
// Test for wchar_t.
|
||||
size_t wchar_t_size = sizeof(wchar_t);
|
||||
if (size % wchar_t_size == 0) {
|
||||
ignore_result(base::WideToUTF8(output_std_wstring));
|
||||
base::WideToUTF8(reinterpret_cast<const wchar_t*>(data),
|
||||
size / wchar_t_size, &output_std_string);
|
||||
ignore_result(base::WideToUTF16(output_std_wstring));
|
||||
base::WideToUTF16(reinterpret_cast<const wchar_t*>(data),
|
||||
size / wchar_t_size, &output_string16);
|
||||
}
|
||||
|
||||
// Test for ASCII. This condition is needed to avoid hitting instant CHECK
|
||||
// failures.
|
||||
if (base::IsStringASCII(string_piece_input)) {
|
||||
output_string16 = base::ASCIIToUTF16(string_piece_input);
|
||||
base::StringPiece16 string_piece_input16(output_string16);
|
||||
ignore_result(base::UTF16ToASCII(string_piece_input16));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue