Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,296 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
#include <climits>
#include <cstddef>
#include <cstring>
#include <string>
#include "absl/base/config.h"
#include "absl/base/nullability.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
// # Table generated by this Python code (bit 0x02 is currently unused):
// TODO(mbar) Move Python code for generation of table to BUILD and link here.
// NOTE: The kAsciiPropertyBits table used within this code was generated by
// Python code of the following form. (Bit 0x02 is currently unused and
// available.)
//
// def Hex2(n):
// return '0x' + hex(n/16)[2:] + hex(n%16)[2:]
// def IsPunct(ch):
// return (ord(ch) >= 32 and ord(ch) < 127 and
// not ch.isspace() and not ch.isalnum())
// def IsBlank(ch):
// return ch in ' \t'
// def IsCntrl(ch):
// return ord(ch) < 32 or ord(ch) == 127
// def IsXDigit(ch):
// return ch.isdigit() or ch.lower() in 'abcdef'
// for i in range(128):
// ch = chr(i)
// mask = ((ch.isalpha() and 0x01 or 0) |
// (ch.isalnum() and 0x04 or 0) |
// (ch.isspace() and 0x08 or 0) |
// (IsPunct(ch) and 0x10 or 0) |
// (IsBlank(ch) and 0x20 or 0) |
// (IsCntrl(ch) and 0x40 or 0) |
// (IsXDigit(ch) and 0x80 or 0))
// print Hex2(mask) + ',',
// if i % 16 == 7:
// print ' //', Hex2(i & 0x78)
// elif i % 16 == 15:
// print
// clang-format off
// Array of bitfields holding character information. Each bit value corresponds
// to a particular character feature. For readability, and because the value
// of these bits is tightly coupled to this implementation, the individual bits
// are not named. Note that bitfields for all characters above ASCII 127 are
// zero-initialized.
ABSL_DLL const unsigned char kPropertyBits[256] = {
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00
0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10
0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30
0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50
0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70
0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
};
// Array of characters for the ascii_tolower() function. For values 'A'
// through 'Z', return the lower-case character; otherwise, return the
// identity of the passed character.
ABSL_DLL const char kToLower[256] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
};
// Array of characters for the ascii_toupper() function. For values 'a'
// through 'z', return the upper-case character; otherwise, return the
// identity of the passed character.
ABSL_DLL const char kToUpper[256] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
'\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
'\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
'\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
'\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
'\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
'\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
'\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
'\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
'\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
'\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
'\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
'\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
'\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
'\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
'\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
'\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
'\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
};
// clang-format on
// Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`).
// Implemented by:
// 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26).
// 2. Comparing to SCHAR_MIN + 26.
template <bool ToUpper>
constexpr bool AsciiInAZRange(unsigned char c) {
constexpr unsigned char sub = (ToUpper ? 'a' : 'A') - SCHAR_MIN;
constexpr signed char threshold = SCHAR_MIN + 26; // 26 = alphabet size.
// Using unsigned arithmetic as overflows/underflows are well defined.
unsigned char u = c - sub;
// Using signed cmp, as SIMD unsigned cmp isn't available in many platforms.
return static_cast<signed char>(u) < threshold;
}
template <bool ToUpper>
constexpr bool AsciiInAZRangeNaive(unsigned char c) {
constexpr unsigned char a = (ToUpper ? 'a' : 'A');
constexpr unsigned char z = (ToUpper ? 'z' : 'Z');
return a <= c && c <= z;
}
template <bool ToUpper, bool Naive>
constexpr void AsciiStrCaseFoldImpl(absl::Nonnull<char*> dst,
absl::Nullable<const char*> src,
size_t size) {
// The upper- and lowercase versions of ASCII characters differ by only 1 bit.
// When we need to flip the case, we can xor with this bit to achieve the
// desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
// could have chosen 'z' and 'Z', or any other pair of characters as they all
// have the same single bit difference.
constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
for (size_t i = 0; i < size; ++i) {
unsigned char v = static_cast<unsigned char>(src[i]);
if ABSL_INTERNAL_CONSTEXPR_SINCE_CXX17 (Naive) {
v ^= AsciiInAZRangeNaive<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
} else {
v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
}
dst[i] = static_cast<char>(v);
}
}
// Splitting to short and long strings to allow vectorization decisions
// to be made separately in the long and short cases.
// Using slightly different implementations so the compiler won't optimize them
// into the same code (the non-naive version is needed for SIMD, so for short
// strings it's not important).
// `src` may be null iff `size` is zero.
template <bool ToUpper>
constexpr void AsciiStrCaseFold(absl::Nonnull<char*> dst,
absl::Nullable<const char*> src, size_t size) {
size < 16 ? AsciiStrCaseFoldImpl<ToUpper, /*Naive=*/true>(dst, src, size)
: AsciiStrCaseFoldImpl<ToUpper, /*Naive=*/false>(dst, src, size);
}
void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
size_t n) {
return AsciiStrCaseFold<false>(dst, src, n);
}
void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
size_t n) {
return AsciiStrCaseFold<true>(dst, src, n);
}
static constexpr size_t ValidateAsciiCasefold() {
constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
size_t incorrect_index = 0;
char lowered[num_chars] = {};
char uppered[num_chars] = {};
for (unsigned int i = 0; i < num_chars; ++i) {
uppered[i] = lowered[i] = static_cast<char>(i);
}
AsciiStrCaseFold<false>(&lowered[0], &lowered[0], num_chars);
AsciiStrCaseFold<true>(&uppered[0], &uppered[0], num_chars);
for (size_t i = 0; i < num_chars; ++i) {
const char ch = static_cast<char>(i),
ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch);
if (uppered[i] != ch_upper || lowered[i] != ch_lower) {
incorrect_index = i > 0 ? i : num_chars;
break;
}
}
return incorrect_index;
}
static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
} // namespace ascii_internal
void AsciiStrToLower(absl::Nonnull<std::string*> s) {
char* p = &(*s)[0];
return ascii_internal::AsciiStrCaseFold<false>(p, p, s->size());
}
void AsciiStrToUpper(absl::Nonnull<std::string*> s) {
char* p = &(*s)[0];
return ascii_internal::AsciiStrCaseFold<true>(p, p, s->size());
}
void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str) {
auto stripped = StripAsciiWhitespace(*str);
if (stripped.empty()) {
str->clear();
return;
}
auto input_it = stripped.begin();
auto input_end = stripped.end();
auto output_it = &(*str)[0];
bool is_ws = false;
for (; input_it < input_end; ++input_it) {
if (is_ws) {
// Consecutive whitespace? Keep only the last.
is_ws = absl::ascii_isspace(static_cast<unsigned char>(*input_it));
if (is_ws) --output_it;
} else {
is_ws = absl::ascii_isspace(static_cast<unsigned char>(*input_it));
}
*output_it = *input_it;
++output_it;
}
str->erase(static_cast<size_t>(output_it - &(*str)[0]));
}
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,284 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: ascii.h
// -----------------------------------------------------------------------------
//
// This package contains functions operating on characters and strings
// restricted to standard ASCII. These include character classification
// functions analogous to those found in the ANSI C Standard Library <ctype.h>
// header file.
//
// C++ implementations provide <ctype.h> functionality based on their
// C environment locale. In general, reliance on such a locale is not ideal, as
// the locale standard is problematic (and may not return invariant information
// for the same character set, for example). These `ascii_*()` functions are
// hard-wired for standard ASCII, much faster, and guaranteed to behave
// consistently. They will never be overloaded, nor will their function
// signature change.
//
// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
// `ascii_isxdigit()`
// Analogous to the <ctype.h> functions with similar names, these
// functions take an unsigned char and return a bool, based on whether the
// character matches the condition specified.
//
// If the input character has a numerical value greater than 127, these
// functions return `false`.
//
// `ascii_tolower()`, `ascii_toupper()`
// Analogous to the <ctype.h> functions with similar names, these functions
// take an unsigned char and return a char.
//
// If the input character is not an ASCII {lower,upper}-case letter (including
// numerical values greater than 127) then the functions return the same value
// as the input character.
#ifndef ABSL_STRINGS_ASCII_H_
#define ABSL_STRINGS_ASCII_H_
#include <algorithm>
#include <cstddef>
#include <string>
#include <utility>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace ascii_internal {
// Declaration for an array of bitfields holding character information.
ABSL_DLL extern const unsigned char kPropertyBits[256];
// Declaration for the array of characters to upper-case characters.
ABSL_DLL extern const char kToUpper[256];
// Declaration for the array of characters to lower-case characters.
ABSL_DLL extern const char kToLower[256];
void AsciiStrToLower(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
size_t n);
void AsciiStrToUpper(absl::Nonnull<char*> dst, absl::Nullable<const char*> src,
size_t n);
} // namespace ascii_internal
// ascii_isalpha()
//
// Determines whether the given character is an alphabetic character.
inline bool ascii_isalpha(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
}
// ascii_isalnum()
//
// Determines whether the given character is an alphanumeric character.
inline bool ascii_isalnum(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
}
// ascii_isspace()
//
// Determines whether the given character is a whitespace character (space,
// tab, vertical tab, formfeed, linefeed, or carriage return).
inline bool ascii_isspace(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
}
// ascii_ispunct()
//
// Determines whether the given character is a punctuation character.
inline bool ascii_ispunct(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
}
// ascii_isblank()
//
// Determines whether the given character is a blank character (tab or space).
inline bool ascii_isblank(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
}
// ascii_iscntrl()
//
// Determines whether the given character is a control character.
inline bool ascii_iscntrl(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
}
// ascii_isxdigit()
//
// Determines whether the given character can be represented as a hexadecimal
// digit character (i.e. {0-9} or {A-F}).
inline bool ascii_isxdigit(unsigned char c) {
return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
}
// ascii_isdigit()
//
// Determines whether the given character can be represented as a decimal
// digit character (i.e. {0-9}).
inline constexpr bool ascii_isdigit(unsigned char c) {
return c >= '0' && c <= '9';
}
// ascii_isprint()
//
// Determines whether the given character is printable, including spaces.
inline constexpr bool ascii_isprint(unsigned char c) {
return c >= 32 && c < 127;
}
// ascii_isgraph()
//
// Determines whether the given character has a graphical representation.
inline constexpr bool ascii_isgraph(unsigned char c) {
return c > 32 && c < 127;
}
// ascii_isupper()
//
// Determines whether the given character is uppercase.
inline constexpr bool ascii_isupper(unsigned char c) {
return c >= 'A' && c <= 'Z';
}
// ascii_islower()
//
// Determines whether the given character is lowercase.
inline constexpr bool ascii_islower(unsigned char c) {
return c >= 'a' && c <= 'z';
}
// ascii_isascii()
//
// Determines whether the given character is ASCII.
inline constexpr bool ascii_isascii(unsigned char c) { return c < 128; }
// ascii_tolower()
//
// Returns an ASCII character, converting to lowercase if uppercase is
// passed. Note that character values > 127 are simply returned.
inline char ascii_tolower(unsigned char c) {
return ascii_internal::kToLower[c];
}
// Converts the characters in `s` to lowercase, changing the contents of `s`.
void AsciiStrToLower(absl::Nonnull<std::string*> s);
// Creates a lowercase string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
std::string result;
strings_internal::STLStringResizeUninitialized(&result, s.size());
ascii_internal::AsciiStrToLower(&result[0], s.data(), s.size());
return result;
}
// Creates a lowercase string from a given std::string&&.
//
// (Template is used to lower priority of this overload.)
template <int&... DoNotSpecify>
ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(std::string&& s) {
std::string result = std::move(s);
absl::AsciiStrToLower(&result);
return result;
}
// ascii_toupper()
//
// Returns the ASCII character, converting to upper-case if lower-case is
// passed. Note that characters values > 127 are simply returned.
inline char ascii_toupper(unsigned char c) {
return ascii_internal::kToUpper[c];
}
// Converts the characters in `s` to uppercase, changing the contents of `s`.
void AsciiStrToUpper(absl::Nonnull<std::string*> s);
// Creates an uppercase string from a given absl::string_view.
ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
std::string result;
strings_internal::STLStringResizeUninitialized(&result, s.size());
ascii_internal::AsciiStrToUpper(&result[0], s.data(), s.size());
return result;
}
// Creates an uppercase string from a given std::string&&.
//
// (Template is used to lower priority of this overload.)
template <int&... DoNotSpecify>
ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(std::string&& s) {
std::string result = std::move(s);
absl::AsciiStrToUpper(&result);
return result;
}
// Returns absl::string_view with whitespace stripped from the beginning of the
// given string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
absl::string_view str) {
auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
return str.substr(static_cast<size_t>(it - str.begin()));
}
// Strips in place whitespace from the beginning of the given string.
inline void StripLeadingAsciiWhitespace(absl::Nonnull<std::string*> str) {
auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
str->erase(str->begin(), it);
}
// Returns absl::string_view with whitespace stripped from the end of the given
// string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
absl::string_view str) {
auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
return str.substr(0, static_cast<size_t>(str.rend() - it));
}
// Strips in place whitespace from the end of the given string
inline void StripTrailingAsciiWhitespace(absl::Nonnull<std::string*> str) {
auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
str->erase(static_cast<size_t>(str->rend() - it));
}
// Returns absl::string_view with whitespace stripped from both ends of the
// given string_view.
ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
absl::string_view str) {
return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
}
// Strips in place whitespace from both ends of the given string
inline void StripAsciiWhitespace(absl::Nonnull<std::string*> str) {
StripTrailingAsciiWhitespace(str);
StripLeadingAsciiWhitespace(str);
}
// Removes leading, trailing, and consecutive internal whitespace.
void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str);
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_ASCII_H_

View file

@ -0,0 +1,160 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
#include <algorithm>
#include <cctype>
#include <cstddef>
#include <string>
#include <array>
#include <random>
#include "benchmark/benchmark.h"
namespace {
std::array<unsigned char, 256> MakeShuffledBytes() {
std::array<unsigned char, 256> bytes;
for (size_t i = 0; i < 256; ++i) bytes[i] = static_cast<unsigned char>(i);
std::random_device rd;
std::seed_seq seed({rd(), rd(), rd(), rd(), rd(), rd(), rd(), rd()});
std::mt19937 g(seed);
std::shuffle(bytes.begin(), bytes.end(), g);
return bytes;
}
template <typename Function>
void AsciiBenchmark(benchmark::State& state, Function f) {
std::array<unsigned char, 256> bytes = MakeShuffledBytes();
size_t sum = 0;
for (auto _ : state) {
for (unsigned char b : bytes) sum += f(b) ? 1 : 0;
}
// Make a copy of `sum` before calling `DoNotOptimize` to make sure that `sum`
// can be put in a CPU register and not degrade performance in the loop above.
size_t sum2 = sum;
benchmark::DoNotOptimize(sum2);
state.SetBytesProcessed(state.iterations() * bytes.size());
}
using StdAsciiFunction = int (*)(int);
template <StdAsciiFunction f>
void BM_Ascii(benchmark::State& state) {
AsciiBenchmark(state, f);
}
using AbslAsciiIsFunction = bool (*)(unsigned char);
template <AbslAsciiIsFunction f>
void BM_Ascii(benchmark::State& state) {
AsciiBenchmark(state, f);
}
using AbslAsciiToFunction = char (*)(unsigned char);
template <AbslAsciiToFunction f>
void BM_Ascii(benchmark::State& state) {
AsciiBenchmark(state, f);
}
inline char Noop(unsigned char b) { return static_cast<char>(b); }
BENCHMARK_TEMPLATE(BM_Ascii, Noop);
BENCHMARK_TEMPLATE(BM_Ascii, std::isalpha);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isalpha);
BENCHMARK_TEMPLATE(BM_Ascii, std::isdigit);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isdigit);
BENCHMARK_TEMPLATE(BM_Ascii, std::isalnum);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isalnum);
BENCHMARK_TEMPLATE(BM_Ascii, std::isspace);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isspace);
BENCHMARK_TEMPLATE(BM_Ascii, std::ispunct);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_ispunct);
BENCHMARK_TEMPLATE(BM_Ascii, std::isblank);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isblank);
BENCHMARK_TEMPLATE(BM_Ascii, std::iscntrl);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_iscntrl);
BENCHMARK_TEMPLATE(BM_Ascii, std::isxdigit);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isxdigit);
BENCHMARK_TEMPLATE(BM_Ascii, std::isprint);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isprint);
BENCHMARK_TEMPLATE(BM_Ascii, std::isgraph);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isgraph);
BENCHMARK_TEMPLATE(BM_Ascii, std::isupper);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isupper);
BENCHMARK_TEMPLATE(BM_Ascii, std::islower);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_islower);
BENCHMARK_TEMPLATE(BM_Ascii, isascii);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_isascii);
BENCHMARK_TEMPLATE(BM_Ascii, std::tolower);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_tolower);
BENCHMARK_TEMPLATE(BM_Ascii, std::toupper);
BENCHMARK_TEMPLATE(BM_Ascii, absl::ascii_toupper);
static void BM_StrToLower(benchmark::State& state) {
const size_t size = static_cast<size_t>(state.range(0));
std::string s(size, 'X');
for (auto _ : state) {
benchmark::DoNotOptimize(s);
std::string res = absl::AsciiStrToLower(s);
benchmark::DoNotOptimize(res);
}
}
BENCHMARK(BM_StrToLower)
->DenseRange(0, 32)
->RangeMultiplier(2)
->Range(64, 1 << 26);
static void BM_StrToUpper(benchmark::State& state) {
const size_t size = static_cast<size_t>(state.range(0));
std::string s(size, 'x');
for (auto _ : state) {
benchmark::DoNotOptimize(s);
std::string res = absl::AsciiStrToUpper(s);
benchmark::DoNotOptimize(res);
}
}
BENCHMARK(BM_StrToUpper)
->DenseRange(0, 32)
->RangeMultiplier(2)
->Range(64, 1 << 26);
static void BM_StrToUpperFromRvalref(benchmark::State& state) {
const size_t size = static_cast<size_t>(state.range(0));
std::string s(size, 'X');
for (auto _ : state) {
benchmark::DoNotOptimize(s);
std::string res = absl::AsciiStrToUpper(std::string(s));
benchmark::DoNotOptimize(res);
}
}
BENCHMARK(BM_StrToUpperFromRvalref)
->DenseRange(0, 32)
->RangeMultiplier(2)
->Range(64, 1 << 26);
static void BM_StrToLowerFromRvalref(benchmark::State& state) {
const size_t size = static_cast<size_t>(state.range(0));
std::string s(size, 'x');
for (auto _ : state) {
benchmark::DoNotOptimize(s);
std::string res = absl::AsciiStrToLower(std::string(s));
benchmark::DoNotOptimize(res);
}
}
BENCHMARK(BM_StrToLowerFromRvalref)
->DenseRange(0, 32)
->RangeMultiplier(2)
->Range(64, 1 << 26);
} // namespace

View file

@ -0,0 +1,374 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/ascii.h"
#include <algorithm>
#include <cctype>
#include <clocale>
#include <cstring>
#include <string>
#include "gtest/gtest.h"
#include "absl/base/macros.h"
#include "absl/strings/string_view.h"
namespace {
TEST(AsciiIsFoo, All) {
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
EXPECT_TRUE(absl::ascii_isalpha(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isalpha(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if ((c >= '0' && c <= '9'))
EXPECT_TRUE(absl::ascii_isdigit(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isdigit(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (absl::ascii_isalpha(c) || absl::ascii_isdigit(c))
EXPECT_TRUE(absl::ascii_isalnum(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isalnum(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (i != '\0' && strchr(" \r\n\t\v\f", i))
EXPECT_TRUE(absl::ascii_isspace(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isspace(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (i >= 32 && i < 127)
EXPECT_TRUE(absl::ascii_isprint(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isprint(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (absl::ascii_isprint(c) && !absl::ascii_isspace(c) &&
!absl::ascii_isalnum(c)) {
EXPECT_TRUE(absl::ascii_ispunct(c)) << ": failed on " << c;
} else {
EXPECT_TRUE(!absl::ascii_ispunct(c)) << ": failed on " << c;
}
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (i == ' ' || i == '\t')
EXPECT_TRUE(absl::ascii_isblank(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isblank(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (i < 32 || i == 127)
EXPECT_TRUE(absl::ascii_iscntrl(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_iscntrl(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (absl::ascii_isdigit(c) || (i >= 'A' && i <= 'F') ||
(i >= 'a' && i <= 'f')) {
EXPECT_TRUE(absl::ascii_isxdigit(c)) << ": failed on " << c;
} else {
EXPECT_TRUE(!absl::ascii_isxdigit(c)) << ": failed on " << c;
}
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (i > 32 && i < 127)
EXPECT_TRUE(absl::ascii_isgraph(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isgraph(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (i >= 'A' && i <= 'Z')
EXPECT_TRUE(absl::ascii_isupper(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_isupper(c)) << ": failed on " << c;
}
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (i >= 'a' && i <= 'z')
EXPECT_TRUE(absl::ascii_islower(c)) << ": failed on " << c;
else
EXPECT_TRUE(!absl::ascii_islower(c)) << ": failed on " << c;
}
for (unsigned char c = 0; c < 128; c++) {
EXPECT_TRUE(absl::ascii_isascii(c)) << ": failed on " << c;
}
for (int i = 128; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
EXPECT_TRUE(!absl::ascii_isascii(c)) << ": failed on " << c;
}
}
// Checks that absl::ascii_isfoo returns the same value as isfoo in the C
// locale.
TEST(AsciiIsFoo, SameAsIsFoo) {
#ifndef __ANDROID__
// temporarily change locale to C. It should already be C, but just for safety
const char* old_locale = setlocale(LC_CTYPE, "C");
ASSERT_TRUE(old_locale != nullptr);
#endif
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
EXPECT_EQ(isalpha(c) != 0, absl::ascii_isalpha(c)) << c;
EXPECT_EQ(isdigit(c) != 0, absl::ascii_isdigit(c)) << c;
EXPECT_EQ(isalnum(c) != 0, absl::ascii_isalnum(c)) << c;
EXPECT_EQ(isspace(c) != 0, absl::ascii_isspace(c)) << c;
EXPECT_EQ(ispunct(c) != 0, absl::ascii_ispunct(c)) << c;
EXPECT_EQ(isblank(c) != 0, absl::ascii_isblank(c)) << c;
EXPECT_EQ(iscntrl(c) != 0, absl::ascii_iscntrl(c)) << c;
EXPECT_EQ(isxdigit(c) != 0, absl::ascii_isxdigit(c)) << c;
EXPECT_EQ(isprint(c) != 0, absl::ascii_isprint(c)) << c;
EXPECT_EQ(isgraph(c) != 0, absl::ascii_isgraph(c)) << c;
EXPECT_EQ(isupper(c) != 0, absl::ascii_isupper(c)) << c;
EXPECT_EQ(islower(c) != 0, absl::ascii_islower(c)) << c;
EXPECT_EQ(isascii(c) != 0, absl::ascii_isascii(c)) << c;
}
#ifndef __ANDROID__
// restore the old locale.
ASSERT_TRUE(setlocale(LC_CTYPE, old_locale));
#endif
}
TEST(AsciiToFoo, All) {
#ifndef __ANDROID__
// temporarily change locale to C. It should already be C, but just for safety
const char* old_locale = setlocale(LC_CTYPE, "C");
ASSERT_TRUE(old_locale != nullptr);
#endif
for (int i = 0; i < 256; i++) {
const auto c = static_cast<unsigned char>(i);
if (absl::ascii_islower(c))
EXPECT_EQ(absl::ascii_toupper(c), 'A' + (i - 'a')) << c;
else
EXPECT_EQ(absl::ascii_toupper(c), static_cast<char>(i)) << c;
if (absl::ascii_isupper(c))
EXPECT_EQ(absl::ascii_tolower(c), 'a' + (i - 'A')) << c;
else
EXPECT_EQ(absl::ascii_tolower(c), static_cast<char>(i)) << c;
// These CHECKs only hold in a C locale.
EXPECT_EQ(static_cast<char>(tolower(i)), absl::ascii_tolower(c)) << c;
EXPECT_EQ(static_cast<char>(toupper(i)), absl::ascii_toupper(c)) << c;
}
#ifndef __ANDROID__
// restore the old locale.
ASSERT_TRUE(setlocale(LC_CTYPE, old_locale));
#endif
}
TEST(AsciiStrTo, Lower) {
const char buf[] = "ABCDEF";
const std::string str("GHIJKL");
const std::string str2("MNOPQR");
const absl::string_view sp(str2);
const std::string long_str("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!a");
std::string mutable_str("_`?@[{AMNOPQRSTUVWXYZ");
auto fun = []() -> std::string { return "PQRSTU"; };
EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
EXPECT_EQ("abcdefghijklmnopqrstuvwxyz1!a", absl::AsciiStrToLower(long_str));
EXPECT_EQ("pqrstu", absl::AsciiStrToLower(fun()));
// An empty `string_view` specifically exercises the case where a null data
// pointer is passed to internal functions.
EXPECT_EQ("", absl::AsciiStrToLower(absl::string_view()));
absl::AsciiStrToLower(&mutable_str);
EXPECT_EQ("_`?@[{amnopqrstuvwxyz", mutable_str);
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
mutable_buf, absl::ascii_tolower);
EXPECT_STREQ("mutable", mutable_buf);
}
TEST(AsciiStrTo, Upper) {
const char buf[] = "abcdef";
const std::string str("ghijkl");
const std::string str2("_`?@[{amnopqrstuvwxyz");
const absl::string_view sp(str2);
const std::string long_str("abcdefghijklmnopqrstuvwxyz1!A");
auto fun = []() -> std::string { return "pqrstu"; };
EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
EXPECT_EQ("_`?@[{AMNOPQRSTUVWXYZ", absl::AsciiStrToUpper(sp));
EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZ1!A", absl::AsciiStrToUpper(long_str));
EXPECT_EQ("PQRSTU", absl::AsciiStrToUpper(fun()));
// An empty `string_view` specifically exercises the case where a null data
// pointer is passed to internal functions.
EXPECT_EQ("", absl::AsciiStrToUpper(absl::string_view()));
char mutable_buf[] = "Mutable";
std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
mutable_buf, absl::ascii_toupper);
EXPECT_STREQ("MUTABLE", mutable_buf);
}
TEST(StripLeadingAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripLeadingAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"foo"}));
EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo"}));
EXPECT_EQ("foo foo\n ",
absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo foo\n "}));
EXPECT_EQ(absl::string_view{}, absl::StripLeadingAsciiWhitespace(
{"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripLeadingAsciiWhitespace, InPlace) {
std::string str;
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo foo\n ";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ("foo foo\n ", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripLeadingAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(StripTrailingAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripTrailingAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo"}));
EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo\t \n\f\r\n\v"}));
EXPECT_EQ(" \nfoo foo",
absl::StripTrailingAsciiWhitespace({" \nfoo foo\t \n\f\r\n\v"}));
EXPECT_EQ(absl::string_view{}, absl::StripTrailingAsciiWhitespace(
{"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripTrailingAsciiWhitespace, InPlace) {
std::string str;
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "foo\t \n\f\r\n\v";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = " \nfoo foo\t \n\f\r\n\v";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ(" \nfoo foo", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripTrailingAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(StripAsciiWhitespace, FromStringView) {
EXPECT_EQ(absl::string_view{},
absl::StripAsciiWhitespace(absl::string_view{}));
EXPECT_EQ("foo", absl::StripAsciiWhitespace({"foo"}));
EXPECT_EQ("foo",
absl::StripAsciiWhitespace({"\t \n\f\r\n\vfoo\t \n\f\r\n\v"}));
EXPECT_EQ("foo foo", absl::StripAsciiWhitespace(
{"\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"}));
EXPECT_EQ(absl::string_view{},
absl::StripAsciiWhitespace({"\t \n\f\r\v\n\t \n\f\r\v\n"}));
}
TEST(StripAsciiWhitespace, InPlace) {
std::string str;
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("", str);
str = "foo";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo\t \n\f\r\n\v";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo", str);
str = "\t \n\f\r\n\vfoo foo\t \n\f\r\n\v";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ("foo foo", str);
str = "\t \n\f\r\v\n\t \n\f\r\v\n";
absl::StripAsciiWhitespace(&str);
EXPECT_EQ(absl::string_view{}, str);
}
TEST(RemoveExtraAsciiWhitespace, InPlace) {
const char* inputs[] = {"No extra space",
" Leading whitespace",
"Trailing whitespace ",
" Leading and trailing ",
" Whitespace \t in\v middle ",
"'Eeeeep! \n Newlines!\n",
"nospaces",
"",
"\n\t a\t\n\nb \t\n"};
const char* outputs[] = {
"No extra space",
"Leading whitespace",
"Trailing whitespace",
"Leading and trailing",
"Whitespace in middle",
"'Eeeeep! Newlines!",
"nospaces",
"",
"a\nb",
};
const int NUM_TESTS = ABSL_ARRAYSIZE(inputs);
for (int i = 0; i < NUM_TESTS; i++) {
std::string s(inputs[i]);
absl::RemoveExtraAsciiWhitespace(&s);
EXPECT_EQ(outputs[i], s);
}
}
} // namespace

View file

@ -0,0 +1,193 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This program tests the absl::SimpleAtod and absl::SimpleAtof functions. Run
// it as "atod_manual_test pnftd/data/*.txt" where the pnftd directory is a
// local checkout of the https://github.com/nigeltao/parse-number-fxx-test-data
// repository. The test suite lives in a separate repository because its more
// than 5 million test cases weigh over several hundred megabytes and because
// the test cases are also useful to other software projects, not just Abseil.
// Its data/*.txt files contain one test case per line, like:
//
// 3C00 3F800000 3FF0000000000000 1
// 3D00 3FA00000 3FF4000000000000 1.25
// 3D9A 3FB33333 3FF6666666666666 1.4
// 57B7 42F6E979 405EDD2F1A9FBE77 123.456
// 622A 44454000 4088A80000000000 789
// 7C00 7F800000 7FF0000000000000 123.456e789
//
// For each line (and using 0-based column indexes), columns [5..13] and
// [14..30] contain the 32-bit float and 64-bit double result of parsing
// columns [31..].
//
// For example, parsing "1.4" as a float gives the bits 0x3FB33333.
//
// In this 6-line example, the final line's float and double values are all
// infinity. The largest finite float and double values are approximately
// 3.40e+38 and 1.80e+308.
#include <cstdint>
#include <cstdio>
#include <string>
#include "absl/base/casts.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
static constexpr uint8_t kUnhex[256] = {
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, // '0' ..= '7'
0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, // '8' ..= '9'
0x0, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, // 'A' ..= 'F'
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, //
};
static absl::optional<std::string> ReadFileToString(const char* filename) {
FILE* f = fopen(filename, "rb");
if (!f) {
return absl::nullopt;
}
fseek(f, 0, SEEK_END);
size_t size = ftell(f);
fseek(f, 0, SEEK_SET);
std::string s(size, '\x00');
size_t n = fread(&s[0], 1, size, f);
fclose(f);
if (n != size) {
return absl::nullopt;
}
return s;
}
static bool ProcessOneTestFile(const char* filename) {
absl::optional<std::string> contents = ReadFileToString(filename);
if (!contents) {
absl::FPrintF(stderr, "Invalid file: %s\n", filename);
return false;
}
int num_cases = 0;
for (absl::string_view v(*contents); !v.empty();) {
size_t new_line = v.find('\n');
if ((new_line == absl::string_view::npos) || (new_line < 32)) {
break;
}
absl::string_view input = v.substr(31, new_line - 31);
// Test absl::SimpleAtof.
{
float f;
if (!absl::SimpleAtof(input, &f)) {
absl::FPrintF(stderr, "Could not parse \"%s\" in %s\n", input,
filename);
return false;
}
uint32_t have32 = absl::bit_cast<uint32_t>(f);
uint32_t want32 = 0;
for (int i = 0; i < 8; i++) {
want32 = (want32 << 4) | kUnhex[static_cast<unsigned char>(v[5 + i])];
}
if (have32 != want32) {
absl::FPrintF(stderr,
"absl::SimpleAtof failed parsing \"%s\" in %s\n have "
"%08X\n want %08X\n",
input, filename, have32, want32);
return false;
}
}
// Test absl::SimpleAtod.
{
double d;
if (!absl::SimpleAtod(input, &d)) {
absl::FPrintF(stderr, "Could not parse \"%s\" in %s\n", input,
filename);
return false;
}
uint64_t have64 = absl::bit_cast<uint64_t>(d);
uint64_t want64 = 0;
for (int i = 0; i < 16; i++) {
want64 = (want64 << 4) | kUnhex[static_cast<unsigned char>(v[14 + i])];
}
if (have64 != want64) {
absl::FPrintF(stderr,
"absl::SimpleAtod failed parsing \"%s\" in %s\n have "
"%016X\n want %016X\n",
input, filename, have64, want64);
return false;
}
}
num_cases++;
v = v.substr(new_line + 1);
}
printf("%8d OK in %s\n", num_cases, filename);
return true;
}
int main(int argc, char** argv) {
if (argc < 2) {
absl::FPrintF(
stderr,
"Usage: %s pnftd/data/*.txt\nwhere the pnftd directory is a local "
"checkout of "
"the\nhttps://github.com/nigeltao/parse-number-fxx-test-data "
"repository.\n",
argv[0]);
return 1;
}
for (int i = 1; i < argc; i++) {
if (!ProcessOneTestFile(argv[i])) {
return 1;
}
}
return 0;
}

View file

@ -0,0 +1,169 @@
// Copyright 2023 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstddef>
#include "gtest/gtest.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/substitute.h"
namespace {
TEST(CharFormatting, Char) {
const char v = 'A';
// Desired behavior: does not compile:
// EXPECT_EQ(absl::StrCat(v, "B"), "AB");
// EXPECT_EQ(absl::StrFormat("%vB", v), "AB");
// Legacy behavior: format as char:
EXPECT_EQ(absl::Substitute("$0B", v), "AB");
}
enum CharEnum : char {};
TEST(CharFormatting, CharEnum) {
auto v = static_cast<CharEnum>('A');
// Desired behavior: format as decimal
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
// Legacy behavior: format as character:
// Some older versions of gcc behave differently in this one case
#if !defined(__GNUC__) || defined(__clang__)
EXPECT_EQ(absl::Substitute("$0B", v), "AB");
#endif
}
enum class CharEnumClass: char {};
TEST(CharFormatting, CharEnumClass) {
auto v = static_cast<CharEnumClass>('A');
// Desired behavior: format as decimal:
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
// Legacy behavior: format as character:
EXPECT_EQ(absl::Substitute("$0B", v), "AB");
}
TEST(CharFormatting, UnsignedChar) {
const unsigned char v = 'A';
// Desired behavior: format as decimal:
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
EXPECT_EQ(absl::Substitute("$0B", v), "65B");
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
// Signedness check
const unsigned char w = 255;
EXPECT_EQ(absl::StrCat(w, "B"), "255B");
EXPECT_EQ(absl::Substitute("$0B", w), "255B");
// EXPECT_EQ(absl::StrFormat("%vB", v), "255B");
}
TEST(CharFormatting, SignedChar) {
const signed char v = 'A';
// Desired behavior: format as decimal:
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
EXPECT_EQ(absl::Substitute("$0B", v), "65B");
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
// Signedness check
const signed char w = -128;
EXPECT_EQ(absl::StrCat(w, "B"), "-128B");
EXPECT_EQ(absl::Substitute("$0B", w), "-128B");
}
enum UnsignedCharEnum : unsigned char {};
TEST(CharFormatting, UnsignedCharEnum) {
auto v = static_cast<UnsignedCharEnum>('A');
// Desired behavior: format as decimal:
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
EXPECT_EQ(absl::Substitute("$0B", v), "65B");
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
// Signedness check
auto w = static_cast<UnsignedCharEnum>(255);
EXPECT_EQ(absl::StrCat(w, "B"), "255B");
EXPECT_EQ(absl::Substitute("$0B", w), "255B");
EXPECT_EQ(absl::StrFormat("%vB", w), "255B");
}
enum SignedCharEnum : signed char {};
TEST(CharFormatting, SignedCharEnum) {
auto v = static_cast<SignedCharEnum>('A');
// Desired behavior: format as decimal:
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
EXPECT_EQ(absl::Substitute("$0B", v), "65B");
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
// Signedness check
auto w = static_cast<SignedCharEnum>(-128);
EXPECT_EQ(absl::StrCat(w, "B"), "-128B");
EXPECT_EQ(absl::Substitute("$0B", w), "-128B");
EXPECT_EQ(absl::StrFormat("%vB", w), "-128B");
}
enum class UnsignedCharEnumClass : unsigned char {};
TEST(CharFormatting, UnsignedCharEnumClass) {
auto v = static_cast<UnsignedCharEnumClass>('A');
// Desired behavior: format as decimal:
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
EXPECT_EQ(absl::Substitute("$0B", v), "65B");
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
// Signedness check
auto w = static_cast<UnsignedCharEnumClass>(255);
EXPECT_EQ(absl::StrCat(w, "B"), "255B");
EXPECT_EQ(absl::Substitute("$0B", w), "255B");
EXPECT_EQ(absl::StrFormat("%vB", w), "255B");
}
enum SignedCharEnumClass : signed char {};
TEST(CharFormatting, SignedCharEnumClass) {
auto v = static_cast<SignedCharEnumClass>('A');
// Desired behavior: format as decimal:
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
EXPECT_EQ(absl::Substitute("$0B", v), "65B");
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
// Signedness check
auto w = static_cast<SignedCharEnumClass>(-128);
EXPECT_EQ(absl::StrCat(w, "B"), "-128B");
EXPECT_EQ(absl::Substitute("$0B", w), "-128B");
EXPECT_EQ(absl::StrFormat("%vB", w), "-128B");
}
#ifdef __cpp_lib_byte
TEST(CharFormatting, StdByte) {
auto v = static_cast<std::byte>('A');
// Desired behavior: format as 0xff
// (No APIs do this today.)
// Legacy behavior: format as decimal:
EXPECT_EQ(absl::StrCat(v, "B"), "65B");
EXPECT_EQ(absl::Substitute("$0B", v), "65B");
EXPECT_EQ(absl::StrFormat("%vB", v), "65B");
}
#endif // _cpp_lib_byte
} // namespace

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,123 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_CHARCONV_H_
#define ABSL_STRINGS_CHARCONV_H_
#include <system_error> // NOLINT(build/c++11)
#include "absl/base/config.h"
#include "absl/base/nullability.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// Workalike compatibility version of std::chars_format from C++17.
//
// This is an bitfield enumerator which can be passed to absl::from_chars to
// configure the string-to-float conversion.
enum class chars_format {
scientific = 1,
fixed = 2,
hex = 4,
general = fixed | scientific,
};
// The return result of a string-to-number conversion.
//
// `ec` will be set to `invalid_argument` if a well-formed number was not found
// at the start of the input range, `result_out_of_range` if a well-formed
// number was found, but it was out of the representable range of the requested
// type, or to std::errc() otherwise.
//
// If a well-formed number was found, `ptr` is set to one past the sequence of
// characters that were successfully parsed. If none was found, `ptr` is set
// to the `first` argument to from_chars.
struct from_chars_result {
absl::Nonnull<const char*> ptr;
std::errc ec;
};
// Workalike compatibility version of std::from_chars from C++17. Currently
// this only supports the `double` and `float` types.
//
// This interface incorporates the proposed resolutions for library issues
// DR 3080 and DR 3081. If these are adopted with different wording,
// Abseil's behavior will change to match the standard. (The behavior most
// likely to change is for DR 3081, which says what `value` will be set to in
// the case of overflow and underflow. Code that wants to avoid possible
// breaking changes in this area should not depend on `value` when the returned
// from_chars_result indicates a range error.)
//
// Searches the range [first, last) for the longest matching pattern beginning
// at `first` that represents a floating point number. If one is found, store
// the result in `value`.
//
// The matching pattern format is almost the same as that of strtod(), except
// that (1) C locale is not respected, (2) an initial '+' character in the
// input range will never be matched, and (3) leading whitespaces are not
// ignored.
//
// If `fmt` is set, it must be one of the enumerator values of the chars_format.
// (This is despite the fact that chars_format is a bitmask type.) If set to
// `scientific`, a matching number must contain an exponent. If set to `fixed`,
// then an exponent will never match. (For example, the string "1e5" will be
// parsed as "1".) If set to `hex`, then a hexadecimal float is parsed in the
// format that strtod() accepts, except that a "0x" prefix is NOT matched.
// (In particular, in `hex` mode, the input "0xff" results in the largest
// matching pattern "0".)
absl::from_chars_result from_chars(absl::Nonnull<const char*> first,
absl::Nonnull<const char*> last,
double& value, // NOLINT
chars_format fmt = chars_format::general);
absl::from_chars_result from_chars(absl::Nonnull<const char*> first,
absl::Nonnull<const char*> last,
float& value, // NOLINT
chars_format fmt = chars_format::general);
// std::chars_format is specified as a bitmask type, which means the following
// operations must be provided:
inline constexpr chars_format operator&(chars_format lhs, chars_format rhs) {
return static_cast<chars_format>(static_cast<int>(lhs) &
static_cast<int>(rhs));
}
inline constexpr chars_format operator|(chars_format lhs, chars_format rhs) {
return static_cast<chars_format>(static_cast<int>(lhs) |
static_cast<int>(rhs));
}
inline constexpr chars_format operator^(chars_format lhs, chars_format rhs) {
return static_cast<chars_format>(static_cast<int>(lhs) ^
static_cast<int>(rhs));
}
inline constexpr chars_format operator~(chars_format arg) {
return static_cast<chars_format>(~static_cast<int>(arg));
}
inline chars_format& operator&=(chars_format& lhs, chars_format rhs) {
lhs = lhs & rhs;
return lhs;
}
inline chars_format& operator|=(chars_format& lhs, chars_format rhs) {
lhs = lhs | rhs;
return lhs;
}
inline chars_format& operator^=(chars_format& lhs, chars_format rhs) {
lhs = lhs ^ rhs;
return lhs;
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CHARCONV_H_

View file

@ -0,0 +1,204 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/charconv.h"
#include <cstdlib>
#include <cstring>
#include <string>
#include "benchmark/benchmark.h"
namespace {
void BM_Strtod_Pi(benchmark::State& state) {
const char* pi = "3.14159";
for (auto s : state) {
benchmark::DoNotOptimize(pi);
benchmark::DoNotOptimize(strtod(pi, nullptr));
}
}
BENCHMARK(BM_Strtod_Pi);
void BM_Absl_Pi(benchmark::State& state) {
const char* pi = "3.14159";
const char* pi_end = pi + strlen(pi);
for (auto s : state) {
benchmark::DoNotOptimize(pi);
double v;
absl::from_chars(pi, pi_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_Pi);
void BM_Strtod_Pi_float(benchmark::State& state) {
const char* pi = "3.14159";
for (auto s : state) {
benchmark::DoNotOptimize(pi);
benchmark::DoNotOptimize(strtof(pi, nullptr));
}
}
BENCHMARK(BM_Strtod_Pi_float);
void BM_Absl_Pi_float(benchmark::State& state) {
const char* pi = "3.14159";
const char* pi_end = pi + strlen(pi);
for (auto s : state) {
benchmark::DoNotOptimize(pi);
float v;
absl::from_chars(pi, pi_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_Pi_float);
void BM_Strtod_HardLarge(benchmark::State& state) {
const char* num = "272104041512242479.e200";
for (auto s : state) {
benchmark::DoNotOptimize(num);
benchmark::DoNotOptimize(strtod(num, nullptr));
}
}
BENCHMARK(BM_Strtod_HardLarge);
void BM_Absl_HardLarge(benchmark::State& state) {
const char* numstr = "272104041512242479.e200";
const char* numstr_end = numstr + strlen(numstr);
for (auto s : state) {
benchmark::DoNotOptimize(numstr);
double v;
absl::from_chars(numstr, numstr_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_HardLarge);
void BM_Strtod_HardSmall(benchmark::State& state) {
const char* num = "94080055902682397.e-242";
for (auto s : state) {
benchmark::DoNotOptimize(num);
benchmark::DoNotOptimize(strtod(num, nullptr));
}
}
BENCHMARK(BM_Strtod_HardSmall);
void BM_Absl_HardSmall(benchmark::State& state) {
const char* numstr = "94080055902682397.e-242";
const char* numstr_end = numstr + strlen(numstr);
for (auto s : state) {
benchmark::DoNotOptimize(numstr);
double v;
absl::from_chars(numstr, numstr_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_HardSmall);
void BM_Strtod_HugeMantissa(benchmark::State& state) {
std::string huge(200, '3');
const char* num = huge.c_str();
for (auto s : state) {
benchmark::DoNotOptimize(num);
benchmark::DoNotOptimize(strtod(num, nullptr));
}
}
BENCHMARK(BM_Strtod_HugeMantissa);
void BM_Absl_HugeMantissa(benchmark::State& state) {
std::string huge(200, '3');
const char* num = huge.c_str();
const char* num_end = num + 200;
for (auto s : state) {
benchmark::DoNotOptimize(num);
double v;
absl::from_chars(num, num_end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_HugeMantissa);
std::string MakeHardCase(int length) {
// The number 1.1521...e-297 is exactly halfway between 12345 * 2**-1000 and
// the next larger representable number. The digits of this number are in
// the string below.
const std::string digits =
"1."
"152113937042223790993097181572444900347587985074226836242307364987727724"
"831384300183638649152607195040591791364113930628852279348613864894524591"
"272746490313676832900762939595690019745859128071117417798540258114233761"
"012939937017879509401007964861774960297319002612457273148497158989073482"
"171377406078223015359818300988676687994537274548940612510414856761641652"
"513434981938564294004070500716200446656421722229202383105446378511678258"
"370570631774499359748259931676320916632111681001853983492795053244971606"
"922718923011680846577744433974087653954904214152517799883551075537146316"
"168973685866425605046988661997658648354773076621610279716804960009043764"
"038392994055171112475093876476783502487512538082706095923790634572014823"
"78877699375152587890625" +
std::string(5000, '0');
// generate the hard cases on either side for the given length.
// Lengths between 3 and 1000 are reasonable.
return digits.substr(0, length) + "1e-297";
}
void BM_Strtod_Big_And_Difficult(benchmark::State& state) {
std::string testcase = MakeHardCase(state.range(0));
const char* begin = testcase.c_str();
for (auto s : state) {
benchmark::DoNotOptimize(begin);
benchmark::DoNotOptimize(strtod(begin, nullptr));
}
}
BENCHMARK(BM_Strtod_Big_And_Difficult)->Range(3, 5000);
void BM_Absl_Big_And_Difficult(benchmark::State& state) {
std::string testcase = MakeHardCase(state.range(0));
const char* begin = testcase.c_str();
const char* end = begin + testcase.size();
for (auto s : state) {
benchmark::DoNotOptimize(begin);
double v;
absl::from_chars(begin, end, v);
benchmark::DoNotOptimize(v);
}
}
BENCHMARK(BM_Absl_Big_And_Difficult)->Range(3, 5000);
} // namespace
// ------------------------------------------------------------------------
// Benchmark Time CPU Iterations
// ------------------------------------------------------------------------
// BM_Strtod_Pi 96 ns 96 ns 6337454
// BM_Absl_Pi 35 ns 35 ns 20031996
// BM_Strtod_Pi_float 91 ns 91 ns 7745851
// BM_Absl_Pi_float 35 ns 35 ns 20430298
// BM_Strtod_HardLarge 133 ns 133 ns 5288341
// BM_Absl_HardLarge 181 ns 181 ns 3855615
// BM_Strtod_HardSmall 279 ns 279 ns 2517243
// BM_Absl_HardSmall 287 ns 287 ns 2458744
// BM_Strtod_HugeMantissa 433 ns 433 ns 1604293
// BM_Absl_HugeMantissa 160 ns 160 ns 4403671
// BM_Strtod_Big_And_Difficult/3 236 ns 236 ns 2942496
// BM_Strtod_Big_And_Difficult/8 232 ns 232 ns 2983796
// BM_Strtod_Big_And_Difficult/64 437 ns 437 ns 1591951
// BM_Strtod_Big_And_Difficult/512 1738 ns 1738 ns 402519
// BM_Strtod_Big_And_Difficult/4096 3943 ns 3943 ns 176128
// BM_Strtod_Big_And_Difficult/5000 4397 ns 4397 ns 157878
// BM_Absl_Big_And_Difficult/3 39 ns 39 ns 17799583
// BM_Absl_Big_And_Difficult/8 43 ns 43 ns 16096859
// BM_Absl_Big_And_Difficult/64 550 ns 550 ns 1259717
// BM_Absl_Big_And_Difficult/512 4167 ns 4167 ns 171414
// BM_Absl_Big_And_Difficult/4096 9160 ns 9159 ns 76297
// BM_Absl_Big_And_Difficult/5000 9738 ns 9738 ns 70140

View file

@ -0,0 +1,787 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/charconv.h"
#include <cfloat>
#include <cmath>
#include <cstdlib>
#include <functional>
#include <limits>
#include <string>
#include <system_error> // NOLINT(build/c++11)
#include "gtest/gtest.h"
#include "absl/strings/internal/pow10_helper.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#ifdef _MSC_FULL_VER
#define ABSL_COMPILER_DOES_EXACT_ROUNDING 0
#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 0
#else
#define ABSL_COMPILER_DOES_EXACT_ROUNDING 1
#define ABSL_STRTOD_HANDLES_NAN_CORRECTLY 1
#endif
namespace {
using absl::strings_internal::Pow10;
#if ABSL_COMPILER_DOES_EXACT_ROUNDING
// Tests that the given string is accepted by absl::from_chars, and that it
// converts exactly equal to the given number.
void TestDoubleParse(absl::string_view str, double expected_number) {
SCOPED_TRACE(str);
double actual_number = 0.0;
absl::from_chars_result result =
absl::from_chars(str.data(), str.data() + str.length(), actual_number);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(result.ptr, str.data() + str.length());
EXPECT_EQ(actual_number, expected_number);
}
void TestFloatParse(absl::string_view str, float expected_number) {
SCOPED_TRACE(str);
float actual_number = 0.0;
absl::from_chars_result result =
absl::from_chars(str.data(), str.data() + str.length(), actual_number);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(result.ptr, str.data() + str.length());
EXPECT_EQ(actual_number, expected_number);
}
// Tests that the given double or single precision floating point literal is
// parsed correctly by absl::from_chars.
//
// These convenience macros assume that the C++ compiler being used also does
// fully correct decimal-to-binary conversions.
#define FROM_CHARS_TEST_DOUBLE(number) \
{ \
TestDoubleParse(#number, number); \
TestDoubleParse("-" #number, -number); \
}
#define FROM_CHARS_TEST_FLOAT(number) \
{ \
TestFloatParse(#number, number##f); \
TestFloatParse("-" #number, -number##f); \
}
TEST(FromChars, NearRoundingCases) {
// Cases from "A Program for Testing IEEE Decimal-Binary Conversion"
// by Vern Paxson.
// Forms that should round towards zero. (These are the hardest cases for
// each decimal mantissa size.)
FROM_CHARS_TEST_DOUBLE(5.e125);
FROM_CHARS_TEST_DOUBLE(69.e267);
FROM_CHARS_TEST_DOUBLE(999.e-026);
FROM_CHARS_TEST_DOUBLE(7861.e-034);
FROM_CHARS_TEST_DOUBLE(75569.e-254);
FROM_CHARS_TEST_DOUBLE(928609.e-261);
FROM_CHARS_TEST_DOUBLE(9210917.e080);
FROM_CHARS_TEST_DOUBLE(84863171.e114);
FROM_CHARS_TEST_DOUBLE(653777767.e273);
FROM_CHARS_TEST_DOUBLE(5232604057.e-298);
FROM_CHARS_TEST_DOUBLE(27235667517.e-109);
FROM_CHARS_TEST_DOUBLE(653532977297.e-123);
FROM_CHARS_TEST_DOUBLE(3142213164987.e-294);
FROM_CHARS_TEST_DOUBLE(46202199371337.e-072);
FROM_CHARS_TEST_DOUBLE(231010996856685.e-073);
FROM_CHARS_TEST_DOUBLE(9324754620109615.e212);
FROM_CHARS_TEST_DOUBLE(78459735791271921.e049);
FROM_CHARS_TEST_DOUBLE(272104041512242479.e200);
FROM_CHARS_TEST_DOUBLE(6802601037806061975.e198);
FROM_CHARS_TEST_DOUBLE(20505426358836677347.e-221);
FROM_CHARS_TEST_DOUBLE(836168422905420598437.e-234);
FROM_CHARS_TEST_DOUBLE(4891559871276714924261.e222);
FROM_CHARS_TEST_FLOAT(5.e-20);
FROM_CHARS_TEST_FLOAT(67.e14);
FROM_CHARS_TEST_FLOAT(985.e15);
FROM_CHARS_TEST_FLOAT(7693.e-42);
FROM_CHARS_TEST_FLOAT(55895.e-16);
FROM_CHARS_TEST_FLOAT(996622.e-44);
FROM_CHARS_TEST_FLOAT(7038531.e-32);
FROM_CHARS_TEST_FLOAT(60419369.e-46);
FROM_CHARS_TEST_FLOAT(702990899.e-20);
FROM_CHARS_TEST_FLOAT(6930161142.e-48);
FROM_CHARS_TEST_FLOAT(25933168707.e-13);
FROM_CHARS_TEST_FLOAT(596428896559.e20);
// Similarly, forms that should round away from zero.
FROM_CHARS_TEST_DOUBLE(9.e-265);
FROM_CHARS_TEST_DOUBLE(85.e-037);
FROM_CHARS_TEST_DOUBLE(623.e100);
FROM_CHARS_TEST_DOUBLE(3571.e263);
FROM_CHARS_TEST_DOUBLE(81661.e153);
FROM_CHARS_TEST_DOUBLE(920657.e-023);
FROM_CHARS_TEST_DOUBLE(4603285.e-024);
FROM_CHARS_TEST_DOUBLE(87575437.e-309);
FROM_CHARS_TEST_DOUBLE(245540327.e122);
FROM_CHARS_TEST_DOUBLE(6138508175.e120);
FROM_CHARS_TEST_DOUBLE(83356057653.e193);
FROM_CHARS_TEST_DOUBLE(619534293513.e124);
FROM_CHARS_TEST_DOUBLE(2335141086879.e218);
FROM_CHARS_TEST_DOUBLE(36167929443327.e-159);
FROM_CHARS_TEST_DOUBLE(609610927149051.e-255);
FROM_CHARS_TEST_DOUBLE(3743626360493413.e-165);
FROM_CHARS_TEST_DOUBLE(94080055902682397.e-242);
FROM_CHARS_TEST_DOUBLE(899810892172646163.e283);
FROM_CHARS_TEST_DOUBLE(7120190517612959703.e120);
FROM_CHARS_TEST_DOUBLE(25188282901709339043.e-252);
FROM_CHARS_TEST_DOUBLE(308984926168550152811.e-052);
FROM_CHARS_TEST_DOUBLE(6372891218502368041059.e064);
FROM_CHARS_TEST_FLOAT(3.e-23);
FROM_CHARS_TEST_FLOAT(57.e18);
FROM_CHARS_TEST_FLOAT(789.e-35);
FROM_CHARS_TEST_FLOAT(2539.e-18);
FROM_CHARS_TEST_FLOAT(76173.e28);
FROM_CHARS_TEST_FLOAT(887745.e-11);
FROM_CHARS_TEST_FLOAT(5382571.e-37);
FROM_CHARS_TEST_FLOAT(82381273.e-35);
FROM_CHARS_TEST_FLOAT(750486563.e-38);
FROM_CHARS_TEST_FLOAT(3752432815.e-39);
FROM_CHARS_TEST_FLOAT(75224575729.e-45);
FROM_CHARS_TEST_FLOAT(459926601011.e15);
}
#undef FROM_CHARS_TEST_DOUBLE
#undef FROM_CHARS_TEST_FLOAT
#endif
float ToFloat(absl::string_view s) {
float f;
absl::from_chars(s.data(), s.data() + s.size(), f);
return f;
}
double ToDouble(absl::string_view s) {
double d;
absl::from_chars(s.data(), s.data() + s.size(), d);
return d;
}
// A duplication of the test cases in "NearRoundingCases" above, but with
// expected values expressed with integers, using ldexp/ldexpf. These test
// cases will work even on compilers that do not accurately round floating point
// literals.
TEST(FromChars, NearRoundingCasesExplicit) {
EXPECT_EQ(ToDouble("5.e125"), ldexp(6653062250012735, 365));
EXPECT_EQ(ToDouble("69.e267"), ldexp(4705683757438170, 841));
EXPECT_EQ(ToDouble("999.e-026"), ldexp(6798841691080350, -129));
EXPECT_EQ(ToDouble("7861.e-034"), ldexp(8975675289889240, -153));
EXPECT_EQ(ToDouble("75569.e-254"), ldexp(6091718967192243, -880));
EXPECT_EQ(ToDouble("928609.e-261"), ldexp(7849264900213743, -900));
EXPECT_EQ(ToDouble("9210917.e080"), ldexp(8341110837370930, 236));
EXPECT_EQ(ToDouble("84863171.e114"), ldexp(4625202867375927, 353));
EXPECT_EQ(ToDouble("653777767.e273"), ldexp(5068902999763073, 884));
EXPECT_EQ(ToDouble("5232604057.e-298"), ldexp(5741343011915040, -1010));
EXPECT_EQ(ToDouble("27235667517.e-109"), ldexp(6707124626673586, -380));
EXPECT_EQ(ToDouble("653532977297.e-123"), ldexp(7078246407265384, -422));
EXPECT_EQ(ToDouble("3142213164987.e-294"), ldexp(8219991337640559, -988));
EXPECT_EQ(ToDouble("46202199371337.e-072"), ldexp(5224462102115359, -246));
EXPECT_EQ(ToDouble("231010996856685.e-073"), ldexp(5224462102115359, -247));
EXPECT_EQ(ToDouble("9324754620109615.e212"), ldexp(5539753864394442, 705));
EXPECT_EQ(ToDouble("78459735791271921.e049"), ldexp(8388176519442766, 166));
EXPECT_EQ(ToDouble("272104041512242479.e200"), ldexp(5554409530847367, 670));
EXPECT_EQ(ToDouble("6802601037806061975.e198"), ldexp(5554409530847367, 668));
EXPECT_EQ(ToDouble("20505426358836677347.e-221"),
ldexp(4524032052079546, -722));
EXPECT_EQ(ToDouble("836168422905420598437.e-234"),
ldexp(5070963299887562, -760));
EXPECT_EQ(ToDouble("4891559871276714924261.e222"),
ldexp(6452687840519111, 757));
EXPECT_EQ(ToFloat("5.e-20"), ldexpf(15474250, -88));
EXPECT_EQ(ToFloat("67.e14"), ldexpf(12479722, 29));
EXPECT_EQ(ToFloat("985.e15"), ldexpf(14333636, 36));
EXPECT_EQ(ToFloat("7693.e-42"), ldexpf(10979816, -150));
EXPECT_EQ(ToFloat("55895.e-16"), ldexpf(12888509, -61));
EXPECT_EQ(ToFloat("996622.e-44"), ldexpf(14224264, -150));
EXPECT_EQ(ToFloat("7038531.e-32"), ldexpf(11420669, -107));
EXPECT_EQ(ToFloat("60419369.e-46"), ldexpf(8623340, -150));
EXPECT_EQ(ToFloat("702990899.e-20"), ldexpf(16209866, -61));
EXPECT_EQ(ToFloat("6930161142.e-48"), ldexpf(9891056, -150));
EXPECT_EQ(ToFloat("25933168707.e-13"), ldexpf(11138211, -32));
EXPECT_EQ(ToFloat("596428896559.e20"), ldexpf(12333860, 82));
EXPECT_EQ(ToDouble("9.e-265"), ldexp(8168427841980010, -930));
EXPECT_EQ(ToDouble("85.e-037"), ldexp(6360455125664090, -169));
EXPECT_EQ(ToDouble("623.e100"), ldexp(6263531988747231, 289));
EXPECT_EQ(ToDouble("3571.e263"), ldexp(6234526311072170, 833));
EXPECT_EQ(ToDouble("81661.e153"), ldexp(6696636728760206, 472));
EXPECT_EQ(ToDouble("920657.e-023"), ldexp(5975405561110124, -109));
EXPECT_EQ(ToDouble("4603285.e-024"), ldexp(5975405561110124, -110));
EXPECT_EQ(ToDouble("87575437.e-309"), ldexp(8452160731874668, -1053));
EXPECT_EQ(ToDouble("245540327.e122"), ldexp(4985336549131723, 381));
EXPECT_EQ(ToDouble("6138508175.e120"), ldexp(4985336549131723, 379));
EXPECT_EQ(ToDouble("83356057653.e193"), ldexp(5986732817132056, 625));
EXPECT_EQ(ToDouble("619534293513.e124"), ldexp(4798406992060657, 399));
EXPECT_EQ(ToDouble("2335141086879.e218"), ldexp(5419088166961646, 713));
EXPECT_EQ(ToDouble("36167929443327.e-159"), ldexp(8135819834632444, -536));
EXPECT_EQ(ToDouble("609610927149051.e-255"), ldexp(4576664294594737, -850));
EXPECT_EQ(ToDouble("3743626360493413.e-165"), ldexp(6898586531774201, -549));
EXPECT_EQ(ToDouble("94080055902682397.e-242"), ldexp(6273271706052298, -800));
EXPECT_EQ(ToDouble("899810892172646163.e283"), ldexp(7563892574477827, 947));
EXPECT_EQ(ToDouble("7120190517612959703.e120"), ldexp(5385467232557565, 409));
EXPECT_EQ(ToDouble("25188282901709339043.e-252"),
ldexp(5635662608542340, -825));
EXPECT_EQ(ToDouble("308984926168550152811.e-052"),
ldexp(5644774693823803, -157));
EXPECT_EQ(ToDouble("6372891218502368041059.e064"),
ldexp(4616868614322430, 233));
EXPECT_EQ(ToFloat("3.e-23"), ldexpf(9507380, -98));
EXPECT_EQ(ToFloat("57.e18"), ldexpf(12960300, 42));
EXPECT_EQ(ToFloat("789.e-35"), ldexpf(10739312, -130));
EXPECT_EQ(ToFloat("2539.e-18"), ldexpf(11990089, -72));
EXPECT_EQ(ToFloat("76173.e28"), ldexpf(9845130, 86));
EXPECT_EQ(ToFloat("887745.e-11"), ldexpf(9760860, -40));
EXPECT_EQ(ToFloat("5382571.e-37"), ldexpf(11447463, -124));
EXPECT_EQ(ToFloat("82381273.e-35"), ldexpf(8554961, -113));
EXPECT_EQ(ToFloat("750486563.e-38"), ldexpf(9975678, -120));
EXPECT_EQ(ToFloat("3752432815.e-39"), ldexpf(9975678, -121));
EXPECT_EQ(ToFloat("75224575729.e-45"), ldexpf(13105970, -137));
EXPECT_EQ(ToFloat("459926601011.e15"), ldexpf(12466336, 65));
}
// Common test logic for converting a string which lies exactly halfway between
// two target floats.
//
// mantissa and exponent represent the precise value between two floating point
// numbers, `expected_low` and `expected_high`. The floating point
// representation to parse in `StrCat(mantissa, "e", exponent)`.
//
// This function checks that an input just slightly less than the exact value
// is rounded down to `expected_low`, and an input just slightly greater than
// the exact value is rounded up to `expected_high`.
//
// The exact value should round to `expected_half`, which must be either
// `expected_low` or `expected_high`.
template <typename FloatType>
void TestHalfwayValue(const std::string& mantissa, int exponent,
FloatType expected_low, FloatType expected_high,
FloatType expected_half) {
std::string low_rep = mantissa;
low_rep[low_rep.size() - 1] -= 1;
absl::StrAppend(&low_rep, std::string(1000, '9'), "e", exponent);
FloatType actual_low = 0;
absl::from_chars(low_rep.data(), low_rep.data() + low_rep.size(), actual_low);
EXPECT_EQ(expected_low, actual_low);
std::string high_rep =
absl::StrCat(mantissa, std::string(1000, '0'), "1e", exponent);
FloatType actual_high = 0;
absl::from_chars(high_rep.data(), high_rep.data() + high_rep.size(),
actual_high);
EXPECT_EQ(expected_high, actual_high);
std::string halfway_rep = absl::StrCat(mantissa, "e", exponent);
FloatType actual_half = 0;
absl::from_chars(halfway_rep.data(), halfway_rep.data() + halfway_rep.size(),
actual_half);
EXPECT_EQ(expected_half, actual_half);
}
TEST(FromChars, DoubleRounding) {
const double zero = 0.0;
const double first_subnormal = nextafter(zero, 1.0);
const double second_subnormal = nextafter(first_subnormal, 1.0);
const double first_normal = DBL_MIN;
const double last_subnormal = nextafter(first_normal, 0.0);
const double second_normal = nextafter(first_normal, 1.0);
const double last_normal = DBL_MAX;
const double penultimate_normal = nextafter(last_normal, 0.0);
// Various test cases for numbers between two representable floats. Each
// call to TestHalfwayValue tests a number just below and just above the
// halfway point, as well as the number exactly between them.
// Test between zero and first_subnormal. Round-to-even tie rounds down.
TestHalfwayValue(
"2."
"470328229206232720882843964341106861825299013071623822127928412503377536"
"351043759326499181808179961898982823477228588654633283551779698981993873"
"980053909390631503565951557022639229085839244910518443593180284993653615"
"250031937045767824921936562366986365848075700158576926990370631192827955"
"855133292783433840935197801553124659726357957462276646527282722005637400"
"648549997709659947045402082816622623785739345073633900796776193057750674"
"017632467360096895134053553745851666113422376667860416215968046191446729"
"184030053005753084904876539171138659164623952491262365388187963623937328"
"042389101867234849766823508986338858792562830275599565752445550725518931"
"369083625477918694866799496832404970582102851318545139621383772282614543"
"7693412532098591327667236328125",
-324, zero, first_subnormal, zero);
// first_subnormal and second_subnormal. Round-to-even tie rounds up.
TestHalfwayValue(
"7."
"410984687618698162648531893023320585475897039214871466383785237510132609"
"053131277979497545424539885696948470431685765963899850655339096945981621"
"940161728171894510697854671067917687257517734731555330779540854980960845"
"750095811137303474765809687100959097544227100475730780971111893578483867"
"565399878350301522805593404659373979179073872386829939581848166016912201"
"945649993128979841136206248449867871357218035220901702390328579173252022"
"052897402080290685402160661237554998340267130003581248647904138574340187"
"552090159017259254714629617513415977493871857473787096164563890871811984"
"127167305601704549300470526959016576377688490826798697257336652176556794"
"107250876433756084600398490497214911746308553955635418864151316847843631"
"3080237596295773983001708984375",
-324, first_subnormal, second_subnormal, second_subnormal);
// last_subnormal and first_normal. Round-to-even tie rounds up.
TestHalfwayValue(
"2."
"225073858507201136057409796709131975934819546351645648023426109724822222"
"021076945516529523908135087914149158913039621106870086438694594645527657"
"207407820621743379988141063267329253552286881372149012981122451451889849"
"057222307285255133155755015914397476397983411801999323962548289017107081"
"850690630666655994938275772572015763062690663332647565300009245888316433"
"037779791869612049497390377829704905051080609940730262937128958950003583"
"799967207254304360284078895771796150945516748243471030702609144621572289"
"880258182545180325707018860872113128079512233426288368622321503775666622"
"503982534335974568884423900265498198385487948292206894721689831099698365"
"846814022854243330660339850886445804001034933970427567186443383770486037"
"86162277173854562306587467901408672332763671875",
-308, last_subnormal, first_normal, first_normal);
// first_normal and second_normal. Round-to-even tie rounds down.
TestHalfwayValue(
"2."
"225073858507201630123055637955676152503612414573018013083228724049586647"
"606759446192036794116886953213985520549032000903434781884412325572184367"
"563347617020518175998922941393629966742598285899994830148971433555578567"
"693279306015978183162142425067962460785295885199272493577688320732492479"
"924816869232247165964934329258783950102250973957579510571600738343645738"
"494324192997092179207389919761694314131497173265255020084997973676783743"
"155205818804439163810572367791175177756227497413804253387084478193655533"
"073867420834526162513029462022730109054820067654020201547112002028139700"
"141575259123440177362244273712468151750189745559978653234255886219611516"
"335924167958029604477064946470184777360934300451421683607013647479513962"
"13837722826145437693412532098591327667236328125",
-308, first_normal, second_normal, first_normal);
// penultimate_normal and last_normal. Round-to-even rounds down.
TestHalfwayValue(
"1."
"797693134862315608353258760581052985162070023416521662616611746258695532"
"672923265745300992879465492467506314903358770175220871059269879629062776"
"047355692132901909191523941804762171253349609463563872612866401980290377"
"995141836029815117562837277714038305214839639239356331336428021390916694"
"57927874464075218944",
308, penultimate_normal, last_normal, penultimate_normal);
}
// Same test cases as DoubleRounding, now with new and improved Much Smaller
// Precision!
TEST(FromChars, FloatRounding) {
const float zero = 0.0;
const float first_subnormal = nextafterf(zero, 1.0);
const float second_subnormal = nextafterf(first_subnormal, 1.0);
const float first_normal = FLT_MIN;
const float last_subnormal = nextafterf(first_normal, 0.0);
const float second_normal = nextafterf(first_normal, 1.0);
const float last_normal = FLT_MAX;
const float penultimate_normal = nextafterf(last_normal, 0.0);
// Test between zero and first_subnormal. Round-to-even tie rounds down.
TestHalfwayValue(
"7."
"006492321624085354618647916449580656401309709382578858785341419448955413"
"42930300743319094181060791015625",
-46, zero, first_subnormal, zero);
// first_subnormal and second_subnormal. Round-to-even tie rounds up.
TestHalfwayValue(
"2."
"101947696487225606385594374934874196920392912814773657635602425834686624"
"028790902229957282543182373046875",
-45, first_subnormal, second_subnormal, second_subnormal);
// last_subnormal and first_normal. Round-to-even tie rounds up.
TestHalfwayValue(
"1."
"175494280757364291727882991035766513322858992758990427682963118425003064"
"9651730385585324256680905818939208984375",
-38, last_subnormal, first_normal, first_normal);
// first_normal and second_normal. Round-to-even tie rounds down.
TestHalfwayValue(
"1."
"175494420887210724209590083408724842314472120785184615334540294131831453"
"9442813071445925743319094181060791015625",
-38, first_normal, second_normal, first_normal);
// penultimate_normal and last_normal. Round-to-even rounds down.
TestHalfwayValue("3.40282336497324057985868971510891282432", 38,
penultimate_normal, last_normal, penultimate_normal);
}
TEST(FromChars, Underflow) {
// Check that underflow is handled correctly, according to the specification
// in DR 3081.
double d;
float f;
absl::from_chars_result result;
std::string negative_underflow = "-1e-1000";
const char* begin = negative_underflow.data();
const char* end = begin + negative_underflow.size();
d = 100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(d)); // negative
EXPECT_GE(d, -std::numeric_limits<double>::min());
f = 100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(f)); // negative
EXPECT_GE(f, -std::numeric_limits<float>::min());
std::string positive_underflow = "1e-1000";
begin = positive_underflow.data();
end = begin + positive_underflow.size();
d = -100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(d)); // positive
EXPECT_LE(d, std::numeric_limits<double>::min());
f = -100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(f)); // positive
EXPECT_LE(f, std::numeric_limits<float>::min());
}
TEST(FromChars, Overflow) {
// Check that overflow is handled correctly, according to the specification
// in DR 3081.
double d;
float f;
absl::from_chars_result result;
std::string negative_overflow = "-1e1000";
const char* begin = negative_overflow.data();
const char* end = begin + negative_overflow.size();
d = 100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(d)); // negative
EXPECT_EQ(d, -std::numeric_limits<double>::max());
f = 100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_TRUE(std::signbit(f)); // negative
EXPECT_EQ(f, -std::numeric_limits<float>::max());
std::string positive_overflow = "1e1000";
begin = positive_overflow.data();
end = begin + positive_overflow.size();
d = -100.0;
result = absl::from_chars(begin, end, d);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(d)); // positive
EXPECT_EQ(d, std::numeric_limits<double>::max());
f = -100.0;
result = absl::from_chars(begin, end, f);
EXPECT_EQ(result.ptr, end);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_FALSE(std::signbit(f)); // positive
EXPECT_EQ(f, std::numeric_limits<float>::max());
}
TEST(FromChars, RegressionTestsFromFuzzer) {
absl::string_view src = "0x21900000p00000000099";
float f;
auto result = absl::from_chars(src.data(), src.data() + src.size(), f);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
}
TEST(FromChars, ReturnValuePtr) {
// Check that `ptr` points one past the number scanned, even if that number
// is not representable.
double d;
absl::from_chars_result result;
std::string normal = "3.14@#$%@#$%";
result = absl::from_chars(normal.data(), normal.data() + normal.size(), d);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(result.ptr - normal.data(), 4);
std::string overflow = "1e1000@#$%@#$%";
result = absl::from_chars(overflow.data(),
overflow.data() + overflow.size(), d);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_EQ(result.ptr - overflow.data(), 6);
std::string garbage = "#$%@#$%";
result = absl::from_chars(garbage.data(),
garbage.data() + garbage.size(), d);
EXPECT_EQ(result.ec, std::errc::invalid_argument);
EXPECT_EQ(result.ptr - garbage.data(), 0);
}
// Check for a wide range of inputs that strtod() and absl::from_chars() exactly
// agree on the conversion amount.
//
// This test assumes the platform's strtod() uses perfect round_to_nearest
// rounding.
TEST(FromChars, TestVersusStrtod) {
for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) {
for (int exponent = -300; exponent < 300; ++exponent) {
std::string candidate = absl::StrCat(mantissa, "e", exponent);
double strtod_value = strtod(candidate.c_str(), nullptr);
double absl_value = 0;
absl::from_chars(candidate.data(), candidate.data() + candidate.size(),
absl_value);
ASSERT_EQ(strtod_value, absl_value) << candidate;
}
}
}
// Check for a wide range of inputs that strtof() and absl::from_chars() exactly
// agree on the conversion amount.
//
// This test assumes the platform's strtof() uses perfect round_to_nearest
// rounding.
TEST(FromChars, TestVersusStrtof) {
for (int mantissa = 1000000; mantissa <= 9999999; mantissa += 501) {
for (int exponent = -43; exponent < 32; ++exponent) {
std::string candidate = absl::StrCat(mantissa, "e", exponent);
float strtod_value = strtof(candidate.c_str(), nullptr);
float absl_value = 0;
absl::from_chars(candidate.data(), candidate.data() + candidate.size(),
absl_value);
ASSERT_EQ(strtod_value, absl_value) << candidate;
}
}
}
// Tests if two floating point values have identical bit layouts. (EXPECT_EQ
// is not suitable for NaN testing, since NaNs are never equal.)
template <typename Float>
bool Identical(Float a, Float b) {
return 0 == memcmp(&a, &b, sizeof(Float));
}
// Check that NaNs are parsed correctly. The spec requires that
// std::from_chars on "NaN(123abc)" return the same value as std::nan("123abc").
// How such an n-char-sequence affects the generated NaN is unspecified, so we
// just test for symmetry with std::nan and strtod here.
//
// (In Linux, this parses the value as a number and stuffs that number into the
// free bits of a quiet NaN.)
TEST(FromChars, NaNDoubles) {
for (std::string n_char_sequence :
{"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000",
"8000000000000", "abc123", "legal_but_unexpected",
"99999999999999999999999", "_"}) {
std::string input = absl::StrCat("nan(", n_char_sequence, ")");
SCOPED_TRACE(input);
double from_chars_double;
absl::from_chars(input.data(), input.data() + input.size(),
from_chars_double);
double std_nan_double = std::nan(n_char_sequence.c_str());
EXPECT_TRUE(Identical(from_chars_double, std_nan_double));
// Also check that we match strtod()'s behavior. This test assumes that the
// platform has a compliant strtod().
#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY
double strtod_double = strtod(input.c_str(), nullptr);
EXPECT_TRUE(Identical(from_chars_double, strtod_double));
#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY
// Check that we can parse a negative NaN
std::string negative_input = "-" + input;
double negative_from_chars_double;
absl::from_chars(negative_input.data(),
negative_input.data() + negative_input.size(),
negative_from_chars_double);
EXPECT_TRUE(std::signbit(negative_from_chars_double));
EXPECT_FALSE(Identical(negative_from_chars_double, from_chars_double));
from_chars_double = std::copysign(from_chars_double, -1.0);
EXPECT_TRUE(Identical(negative_from_chars_double, from_chars_double));
}
}
TEST(FromChars, NaNFloats) {
for (std::string n_char_sequence :
{"", "1", "2", "3", "fff", "FFF", "200000", "400000", "4000000000000",
"8000000000000", "abc123", "legal_but_unexpected",
"99999999999999999999999", "_"}) {
std::string input = absl::StrCat("nan(", n_char_sequence, ")");
SCOPED_TRACE(input);
float from_chars_float;
absl::from_chars(input.data(), input.data() + input.size(),
from_chars_float);
float std_nan_float = std::nanf(n_char_sequence.c_str());
EXPECT_TRUE(Identical(from_chars_float, std_nan_float));
// Also check that we match strtof()'s behavior. This test assumes that the
// platform has a compliant strtof().
#if ABSL_STRTOD_HANDLES_NAN_CORRECTLY
float strtof_float = strtof(input.c_str(), nullptr);
EXPECT_TRUE(Identical(from_chars_float, strtof_float));
#endif // ABSL_STRTOD_HANDLES_NAN_CORRECTLY
// Check that we can parse a negative NaN
std::string negative_input = "-" + input;
float negative_from_chars_float;
absl::from_chars(negative_input.data(),
negative_input.data() + negative_input.size(),
negative_from_chars_float);
EXPECT_TRUE(std::signbit(negative_from_chars_float));
EXPECT_FALSE(Identical(negative_from_chars_float, from_chars_float));
// Use the (float, float) overload of std::copysign to prevent narrowing;
// see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98251.
from_chars_float = std::copysign(from_chars_float, -1.0f);
EXPECT_TRUE(Identical(negative_from_chars_float, from_chars_float));
}
}
// Returns an integer larger than step. The values grow exponentially.
int NextStep(int step) {
return step + (step >> 2) + 1;
}
// Test a conversion on a family of input strings, checking that the calculation
// is correct for in-bounds values, and that overflow and underflow are done
// correctly for out-of-bounds values.
//
// input_generator maps from an integer index to a string to test.
// expected_generator maps from an integer index to an expected Float value.
// from_chars conversion of input_generator(i) should result in
// expected_generator(i).
//
// lower_bound and upper_bound denote the smallest and largest values for which
// the conversion is expected to succeed.
template <typename Float>
void TestOverflowAndUnderflow(
const std::function<std::string(int)>& input_generator,
const std::function<Float(int)>& expected_generator, int lower_bound,
int upper_bound) {
// test legal values near lower_bound
int index, step;
for (index = lower_bound, step = 1; index < upper_bound;
index += step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float expected = expected_generator(index);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(expected, actual)
<< absl::StrFormat("%a vs %a", expected, actual);
}
// test legal values near upper_bound
for (index = upper_bound, step = 1; index > lower_bound;
index -= step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float expected = expected_generator(index);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc());
EXPECT_EQ(expected, actual)
<< absl::StrFormat("%a vs %a", expected, actual);
}
// Test underflow values below lower_bound
for (index = lower_bound - 1, step = 1; index > -1000000;
index -= step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_LT(actual, 1.0); // check for underflow
}
// Test overflow values above upper_bound
for (index = upper_bound + 1, step = 1; index < 1000000;
index += step, step = NextStep(step)) {
std::string input = input_generator(index);
SCOPED_TRACE(input);
Float actual;
auto result =
absl::from_chars(input.data(), input.data() + input.size(), actual);
EXPECT_EQ(result.ec, std::errc::result_out_of_range);
EXPECT_GT(actual, 1.0); // check for overflow
}
}
// Check that overflow and underflow are caught correctly for hex doubles.
//
// The largest representable double is 0x1.fffffffffffffp+1023, and the
// smallest representable subnormal is 0x0.0000000000001p-1022, which equals
// 0x1p-1074. Therefore 1023 and -1074 are the limits of acceptable exponents
// in this test.
TEST(FromChars, HexdecimalDoubleLimits) {
auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); };
auto expected_gen = [](int index) { return std::ldexp(1.0, index); };
TestOverflowAndUnderflow<double>(input_gen, expected_gen, -1074, 1023);
}
// Check that overflow and underflow are caught correctly for hex floats.
//
// The largest representable float is 0x1.fffffep+127, and the smallest
// representable subnormal is 0x0.000002p-126, which equals 0x1p-149.
// Therefore 127 and -149 are the limits of acceptable exponents in this test.
TEST(FromChars, HexdecimalFloatLimits) {
auto input_gen = [](int index) { return absl::StrCat("0x1.0p", index); };
auto expected_gen = [](int index) { return std::ldexp(1.0f, index); };
TestOverflowAndUnderflow<float>(input_gen, expected_gen, -149, 127);
}
// Check that overflow and underflow are caught correctly for decimal doubles.
//
// The largest representable double is about 1.8e308, and the smallest
// representable subnormal is about 5e-324. '1e-324' therefore rounds away from
// the smallest representable positive value. -323 and 308 are the limits of
// acceptable exponents in this test.
TEST(FromChars, DecimalDoubleLimits) {
auto input_gen = [](int index) { return absl::StrCat("1.0e", index); };
auto expected_gen = [](int index) { return Pow10(index); };
TestOverflowAndUnderflow<double>(input_gen, expected_gen, -323, 308);
}
// Check that overflow and underflow are caught correctly for decimal floats.
//
// The largest representable float is about 3.4e38, and the smallest
// representable subnormal is about 1.45e-45. '1e-45' therefore rounds towards
// the smallest representable positive value. -45 and 38 are the limits of
// acceptable exponents in this test.
TEST(FromChars, DecimalFloatLimits) {
auto input_gen = [](int index) { return absl::StrCat("1.0e", index); };
auto expected_gen = [](int index) { return Pow10(index); };
TestOverflowAndUnderflow<float>(input_gen, expected_gen, -45, 38);
}
} // namespace

View file

@ -0,0 +1,163 @@
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: charset.h
// -----------------------------------------------------------------------------
//
// This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
// characters.
//
// Instances can be initialized as constexpr constants. For example:
//
// constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
// constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
// constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
//
// Multiple instances can be combined that still forms a constexpr expression.
// For example:
//
// constexpr absl::CharSet kLettersAndNumbers =
// absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
//
// Several pre-defined character classes are available that mirror the methods
// from <cctype>. For example:
//
// constexpr absl::CharSet kLettersAndWhitespace =
// absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
//
// To check membership, use the .contains method, e.g.
//
// absl::CharSet hex_letters("abcdef");
// hex_letters.contains('a'); // true
// hex_letters.contains('g'); // false
#ifndef ABSL_STRINGS_CHARSET_H_
#define ABSL_STRINGS_CHARSET_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class CharSet {
public:
constexpr CharSet() : m_() {}
// Initializes with a given string_view.
constexpr explicit CharSet(absl::string_view str) : m_() {
for (char c : str) {
SetChar(static_cast<unsigned char>(c));
}
}
constexpr bool contains(char c) const {
return ((m_[static_cast<unsigned char>(c) / 64] >>
(static_cast<unsigned char>(c) % 64)) &
0x1) == 0x1;
}
constexpr bool empty() const {
for (uint64_t c : m_) {
if (c != 0) return false;
}
return true;
}
// Containing only a single specified char.
static constexpr CharSet Char(char x) {
return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
CharMaskForWord(x, 2), CharMaskForWord(x, 3));
}
// Containing all the chars in the closed interval [lo,hi].
static constexpr CharSet Range(char lo, char hi) {
return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
}
friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
a.m_[3] & b.m_[3]);
}
friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
a.m_[3] | b.m_[3]);
}
friend constexpr CharSet operator~(const CharSet& a) {
return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
}
// Mirrors the char-classifying predicates in <cctype>.
static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
static constexpr CharSet AsciiAlphabet() {
return AsciiLowercase() | AsciiUppercase();
}
static constexpr CharSet AsciiAlphanumerics() {
return AsciiDigits() | AsciiAlphabet();
}
static constexpr CharSet AsciiHexDigits() {
return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
}
static constexpr CharSet AsciiPrintable() {
return CharSet::Range(0x20, 0x7e);
}
static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
static constexpr CharSet AsciiPunctuation() {
return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
}
private:
constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
: m_{b0, b1, b2, b3} {}
static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
}
// All the chars in the specified word of the range [0, upper).
static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
uint64_t word) {
return (upper <= 64 * word) ? 0
: (upper >= 64 * (word + 1))
? ~static_cast<uint64_t>(0)
: (~static_cast<uint64_t>(0) >> (64 - upper % 64));
}
static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
return (static_cast<unsigned char>(x) / 64 == word)
? (static_cast<uint64_t>(1)
<< (static_cast<unsigned char>(x) % 64))
: 0;
}
constexpr void SetChar(unsigned char c) {
m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
}
uint64_t m_[4];
};
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CHARSET_H_

View file

@ -0,0 +1,57 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include "benchmark/benchmark.h"
#include "absl/log/check.h"
#include "absl/strings/charset.h"
namespace {
absl::CharSet MakeBenchmarkMap() {
absl::CharSet m;
uint32_t x[] = {0x0, 0x1, 0x2, 0x3, 0xf, 0xe, 0xd, 0xc};
for (uint32_t& t : x) t *= static_cast<uint32_t>(0x11111111UL);
for (uint32_t i = 0; i < 256; ++i) {
if ((x[i / 32] >> (i % 32)) & 1) m = m | absl::CharSet::Char(i);
}
return m;
}
// Micro-benchmark for Charmap::contains.
static void BM_Contains(benchmark::State& state) {
// Loop-body replicated 10 times to increase time per iteration.
// Argument continuously changed to avoid generating common subexpressions.
// Final CHECK used to discourage unwanted optimization.
const absl::CharSet benchmark_map = MakeBenchmarkMap();
unsigned char c = 0;
int ops = 0;
for (auto _ : state) {
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
ops += benchmark_map.contains(c++);
}
CHECK_NE(ops, -1);
}
BENCHMARK(BM_Contains);
} // namespace

View file

@ -0,0 +1,181 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/charset.h"
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "absl/strings/ascii.h"
#include "absl/strings/string_view.h"
namespace {
constexpr absl::CharSet everything_map = ~absl::CharSet();
constexpr absl::CharSet nothing_map = absl::CharSet();
TEST(Charmap, AllTests) {
const absl::CharSet also_nothing_map("");
EXPECT_TRUE(everything_map.contains('\0'));
EXPECT_FALSE(nothing_map.contains('\0'));
EXPECT_FALSE(also_nothing_map.contains('\0'));
for (unsigned char ch = 1; ch != 0; ++ch) {
SCOPED_TRACE(ch);
EXPECT_TRUE(everything_map.contains(ch));
EXPECT_FALSE(nothing_map.contains(ch));
EXPECT_FALSE(also_nothing_map.contains(ch));
}
const absl::CharSet symbols(absl::string_view("&@#@^!@?", 5));
EXPECT_TRUE(symbols.contains('&'));
EXPECT_TRUE(symbols.contains('@'));
EXPECT_TRUE(symbols.contains('#'));
EXPECT_TRUE(symbols.contains('^'));
EXPECT_FALSE(symbols.contains('!'));
EXPECT_FALSE(symbols.contains('?'));
int cnt = 0;
for (unsigned char ch = 1; ch != 0; ++ch) cnt += symbols.contains(ch);
EXPECT_EQ(cnt, 4);
const absl::CharSet lets(absl::string_view("^abcde", 3));
const absl::CharSet lets2(absl::string_view("fghij\0klmnop", 10));
const absl::CharSet lets3("fghij\0klmnop");
EXPECT_TRUE(lets2.contains('k'));
EXPECT_FALSE(lets3.contains('k'));
EXPECT_FALSE((symbols & lets).empty());
EXPECT_TRUE((lets2 & lets).empty());
EXPECT_FALSE((lets & symbols).empty());
EXPECT_TRUE((lets & lets2).empty());
EXPECT_TRUE(nothing_map.empty());
EXPECT_FALSE(lets.empty());
}
std::string Members(const absl::CharSet& m) {
std::string r;
for (size_t i = 0; i < 256; ++i)
if (m.contains(i)) r.push_back(i);
return r;
}
std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
// Don't depend on lo<hi. Just increment until lo==hi.
std::string s;
while (true) {
s.push_back(lo);
if (lo == hi) break;
++lo;
}
return s;
}
TEST(Charmap, Constexpr) {
constexpr absl::CharSet kEmpty = absl::CharSet();
EXPECT_EQ(Members(kEmpty), "");
constexpr absl::CharSet kA = absl::CharSet::Char('A');
EXPECT_EQ(Members(kA), "A");
constexpr absl::CharSet kAZ = absl::CharSet::Range('A', 'Z');
EXPECT_EQ(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
constexpr absl::CharSet kIdentifier =
absl::CharSet::Range('0', '9') | absl::CharSet::Range('A', 'Z') |
absl::CharSet::Range('a', 'z') | absl::CharSet::Char('_');
EXPECT_EQ(Members(kIdentifier),
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"_"
"abcdefghijklmnopqrstuvwxyz");
constexpr absl::CharSet kAll = ~absl::CharSet();
for (size_t i = 0; i < 256; ++i) {
SCOPED_TRACE(i);
EXPECT_TRUE(kAll.contains(i));
}
constexpr absl::CharSet kHello = absl::CharSet("Hello, world!");
EXPECT_EQ(Members(kHello), " !,Hdelorw");
// test negation and intersection
constexpr absl::CharSet kABC =
absl::CharSet::Range('A', 'Z') & ~absl::CharSet::Range('D', 'Z');
EXPECT_EQ(Members(kABC), "ABC");
// contains
constexpr bool kContainsA = absl::CharSet("abc").contains('a');
EXPECT_TRUE(kContainsA);
constexpr bool kContainsD = absl::CharSet("abc").contains('d');
EXPECT_FALSE(kContainsD);
// empty
constexpr bool kEmptyIsEmpty = absl::CharSet().empty();
EXPECT_TRUE(kEmptyIsEmpty);
constexpr bool kNotEmptyIsEmpty = absl::CharSet("abc").empty();
EXPECT_FALSE(kNotEmptyIsEmpty);
}
TEST(Charmap, Range) {
// Exhaustive testing takes too long, so test some of the boundaries that
// are perhaps going to cause trouble.
std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
16, 17, 30, 31, 32, 33, 63, 64, 65,
127, 128, 129, 223, 224, 225, 254, 255};
for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
SCOPED_TRACE(*lo);
for (auto hi = lo; hi != poi.end(); ++hi) {
SCOPED_TRACE(*hi);
EXPECT_EQ(Members(absl::CharSet::Range(*lo, *hi)),
ClosedRangeString(*lo, *hi));
}
}
}
TEST(Charmap, NullByteWithStringView) {
char characters[5] = {'a', 'b', '\0', 'd', 'x'};
absl::string_view view(characters, 5);
absl::CharSet tester(view);
EXPECT_TRUE(tester.contains('a'));
EXPECT_TRUE(tester.contains('b'));
EXPECT_TRUE(tester.contains('\0'));
EXPECT_TRUE(tester.contains('d'));
EXPECT_TRUE(tester.contains('x'));
EXPECT_FALSE(tester.contains('c'));
}
TEST(CharmapCtype, Match) {
for (int c = 0; c < 256; ++c) {
SCOPED_TRACE(c);
SCOPED_TRACE(static_cast<char>(c));
EXPECT_EQ(absl::ascii_isupper(c),
absl::CharSet::AsciiUppercase().contains(c));
EXPECT_EQ(absl::ascii_islower(c),
absl::CharSet::AsciiLowercase().contains(c));
EXPECT_EQ(absl::ascii_isdigit(c), absl::CharSet::AsciiDigits().contains(c));
EXPECT_EQ(absl::ascii_isalpha(c),
absl::CharSet::AsciiAlphabet().contains(c));
EXPECT_EQ(absl::ascii_isalnum(c),
absl::CharSet::AsciiAlphanumerics().contains(c));
EXPECT_EQ(absl::ascii_isxdigit(c),
absl::CharSet::AsciiHexDigits().contains(c));
EXPECT_EQ(absl::ascii_isprint(c),
absl::CharSet::AsciiPrintable().contains(c));
EXPECT_EQ(absl::ascii_isspace(c),
absl::CharSet::AsciiWhitespace().contains(c));
EXPECT_EQ(absl::ascii_ispunct(c),
absl::CharSet::AsciiPunctuation().contains(c));
}
}
} // namespace

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,196 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/cord_analysis.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <unordered_set>
#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
// Accounting mode for analyzing memory usage.
enum class Mode { kFairShare, kTotal, kTotalMorePrecise };
// CordRepRef holds a `const CordRep*` reference in rep, and depending on mode,
// holds a 'fraction' representing a cumulative inverse refcount weight.
template <Mode mode>
struct CordRepRef {
// Instantiates a CordRepRef instance.
explicit CordRepRef(absl::Nonnull<const CordRep*> r) : rep(r) {}
// Creates a child reference holding the provided child.
// Overloaded to add cumulative reference count for kFairShare.
CordRepRef Child(absl::Nonnull<const CordRep*> child) const {
return CordRepRef(child);
}
absl::Nonnull<const CordRep*> rep;
};
// RawUsage holds the computed total number of bytes.
template <Mode mode>
struct RawUsage {
size_t total = 0;
// Add 'size' to total, ignoring the CordRepRef argument.
void Add(size_t size, CordRepRef<mode>) { total += size; }
};
// Overloaded representation of RawUsage that tracks the set of objects
// counted, and avoids double-counting objects referenced more than once
// by the same Cord.
template <>
struct RawUsage<Mode::kTotalMorePrecise> {
size_t total = 0;
// TODO(b/289250880): Replace this with a flat_hash_set.
std::unordered_set<absl::Nonnull<const CordRep*>> counted;
void Add(size_t size, CordRepRef<Mode::kTotalMorePrecise> repref) {
if (counted.insert(repref.rep).second) {
total += size;
}
}
};
// Returns n / refcount avoiding a div for the common refcount == 1.
template <typename refcount_t>
double MaybeDiv(double d, refcount_t refcount) {
return refcount == 1 ? d : d / refcount;
}
// Overloaded 'kFairShare' specialization for CordRepRef. This class holds a
// `fraction` value which represents a cumulative inverse refcount weight.
// For example, a top node with a reference count of 2 will have a fraction
// value of 1/2 = 0.5, representing the 'fair share' of memory it references.
// A node below such a node with a reference count of 5 then has a fraction of
// 0.5 / 5 = 0.1 representing the fair share of memory below that node, etc.
template <>
struct CordRepRef<Mode::kFairShare> {
// Creates a CordRepRef with the provided rep and top (parent) fraction.
explicit CordRepRef(absl::Nonnull<const CordRep*> r, double frac = 1.0)
: rep(r), fraction(MaybeDiv(frac, r->refcount.Get())) {}
// Returns a CordRepRef with a fraction of `this->fraction / child.refcount`
CordRepRef Child(absl::Nonnull<const CordRep*> child) const {
return CordRepRef(child, fraction);
}
absl::Nonnull<const CordRep*> rep;
double fraction;
};
// Overloaded 'kFairShare' specialization for RawUsage
template <>
struct RawUsage<Mode::kFairShare> {
double total = 0;
// Adds `size` multiplied by `rep.fraction` to the total size.
void Add(size_t size, CordRepRef<Mode::kFairShare> rep) {
total += static_cast<double>(size) * rep.fraction;
}
};
// Computes the estimated memory size of the provided data edge.
// External reps are assumed 'heap allocated at their exact size'.
template <Mode mode>
void AnalyzeDataEdge(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
assert(IsDataEdge(rep.rep));
// Consume all substrings
if (rep.rep->tag == SUBSTRING) {
raw_usage.Add(sizeof(CordRepSubstring), rep);
rep = rep.Child(rep.rep->substring()->child);
}
// Consume FLAT / EXTERNAL
const size_t size =
rep.rep->tag >= FLAT
? rep.rep->flat()->AllocatedSize()
: rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>);
raw_usage.Add(size, rep);
}
// Computes the memory size of the provided Btree tree.
template <Mode mode>
void AnalyzeBtree(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) {
raw_usage.Add(sizeof(CordRepBtree), rep);
const CordRepBtree* tree = rep.rep->btree();
if (tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
AnalyzeBtree(rep.Child(edge), raw_usage);
}
} else {
for (CordRep* edge : tree->Edges()) {
AnalyzeDataEdge(rep.Child(edge), raw_usage);
}
}
}
template <Mode mode>
size_t GetEstimatedUsage(absl::Nonnull<const CordRep*> rep) {
// Zero initialized memory usage totals.
RawUsage<mode> raw_usage;
// Capture top level node and refcount into a CordRepRef.
CordRepRef<mode> repref(rep);
// Consume the top level CRC node if present.
if (repref.rep->tag == CRC) {
raw_usage.Add(sizeof(CordRepCrc), repref);
if (repref.rep->crc()->child == nullptr) {
return static_cast<size_t>(raw_usage.total);
}
repref = repref.Child(repref.rep->crc()->child);
}
if (IsDataEdge(repref.rep)) {
AnalyzeDataEdge(repref, raw_usage);
} else if (repref.rep->tag == BTREE) {
AnalyzeBtree(repref, raw_usage);
} else {
assert(false);
}
return static_cast<size_t>(raw_usage.total);
}
} // namespace
size_t GetEstimatedMemoryUsage(absl::Nonnull<const CordRep*> rep) {
return GetEstimatedUsage<Mode::kTotal>(rep);
}
size_t GetEstimatedFairShareMemoryUsage(absl::Nonnull<const CordRep*> rep) {
return GetEstimatedUsage<Mode::kFairShare>(rep);
}
size_t GetMorePreciseMemoryUsage(absl::Nonnull<const CordRep*> rep) {
return GetEstimatedUsage<Mode::kTotalMorePrecise>(rep);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,63 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_CORD_ANALYSIS_H_
#define ABSL_STRINGS_CORD_ANALYSIS_H_
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Returns the *approximate* number of bytes held in full or in part by this
// Cord (which may not remain the same between invocations). Cords that share
// memory could each be "charged" independently for the same shared memory.
size_t GetEstimatedMemoryUsage(absl::Nonnull<const CordRep*> rep);
// Returns the *approximate* number of bytes held in full or in part by this
// Cord for the distinct memory held by this cord. This is similar to
// `GetEstimatedMemoryUsage()`, except that if the cord has multiple references
// to the same memory, that memory is only counted once.
//
// For example:
// absl::Cord cord;
// cord.append(some_other_cord);
// cord.append(some_other_cord);
// // Calls GetEstimatedMemoryUsage() and counts `other_cord` twice:
// cord.EstimatedMemoryUsage(kTotal);
// // Calls GetMorePreciseMemoryUsage() and counts `other_cord` once:
// cord.EstimatedMemoryUsage(kTotalMorePrecise);
//
// This is more expensive than `GetEstimatedMemoryUsage()` as it requires
// deduplicating all memory references.
size_t GetMorePreciseMemoryUsage(absl::Nonnull<const CordRep*> rep);
// Returns the *approximate* number of bytes held in full or in part by this
// CordRep weighted by the sharing ratio of that data. For example, if some data
// edge is shared by 4 different Cords, then each cord is attribute 1/4th of
// the total memory usage as a 'fair share' of the total memory usage.
size_t GetEstimatedFairShareMemoryUsage(absl::Nonnull<const CordRep*> rep);
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORD_ANALYSIS_H_

View file

@ -0,0 +1,30 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/cord_buffer.h"
#include <cstddef>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
constexpr size_t CordBuffer::kDefaultLimit;
constexpr size_t CordBuffer::kCustomLimit;
#endif
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,572 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: cord_buffer.h
// -----------------------------------------------------------------------------
//
// This file defines an `absl::CordBuffer` data structure to hold data for
// eventual inclusion within an existing `Cord` data structure. Cord buffers are
// useful for building large Cords that may require custom allocation of its
// associated memory.
//
#ifndef ABSL_STRINGS_CORD_BUFFER_H_
#define ABSL_STRINGS_CORD_BUFFER_H_
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <utility>
#include "absl/base/config.h"
#include "absl/base/macros.h"
#include "absl/numeric/bits.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
class Cord;
class CordBufferTestPeer;
// CordBuffer
//
// CordBuffer manages memory buffers for purposes such as zero-copy APIs as well
// as applications building cords with large data requiring granular control
// over the allocation and size of cord data. For example, a function creating
// a cord of random data could use a CordBuffer as follows:
//
// absl::Cord CreateRandomCord(size_t length) {
// absl::Cord cord;
// while (length > 0) {
// CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(length);
// absl::Span<char> data = buffer.available_up_to(length);
// FillRandomValues(data.data(), data.size());
// buffer.IncreaseLengthBy(data.size());
// cord.Append(std::move(buffer));
// length -= data.size();
// }
// return cord;
// }
//
// CordBuffer instances are by default limited to a capacity of `kDefaultLimit`
// bytes. `kDefaultLimit` is currently just under 4KiB, but this default may
// change in the future and/or for specific architectures. The default limit is
// aimed to provide a good trade-off between performance and memory overhead.
// Smaller buffers typically incur more compute cost while larger buffers are
// more CPU efficient but create significant memory overhead because of such
// allocations being less granular. Using larger buffers may also increase the
// risk of memory fragmentation.
//
// Applications create a buffer using one of the `CreateWithDefaultLimit()` or
// `CreateWithCustomLimit()` methods. The returned instance will have a non-zero
// capacity and a zero length. Applications use the `data()` method to set the
// contents of the managed memory, and once done filling the buffer, use the
// `IncreaseLengthBy()` or 'SetLength()' method to specify the length of the
// initialized data before adding the buffer to a Cord.
//
// The `CreateWithCustomLimit()` method is intended for applications needing
// larger buffers than the default memory limit, allowing the allocation of up
// to a capacity of `kCustomLimit` bytes minus some minimum internal overhead.
// The usage of `CreateWithCustomLimit()` should be limited to only those use
// cases where the distribution of the input is relatively well known, and/or
// where the trade-off between the efficiency gains outweigh the risk of memory
// fragmentation. See the documentation for `CreateWithCustomLimit()` for more
// information on using larger custom limits.
//
// The capacity of a `CordBuffer` returned by one of the `Create` methods may
// be larger than the requested capacity due to rounding, alignment and
// granularity of the memory allocator. Applications should use the `capacity`
// method to obtain the effective capacity of the returned instance as
// demonstrated in the provided example above.
//
// CordBuffer is a move-only class. All references into the managed memory are
// invalidated when an instance is moved into either another CordBuffer instance
// or a Cord. Writing to a location obtained by a previous call to `data()`
// after an instance was moved will lead to undefined behavior.
//
// A `moved from` CordBuffer instance will have a valid, but empty state.
// CordBuffer is thread compatible.
class CordBuffer {
public:
// kDefaultLimit
//
// Default capacity limits of allocated CordBuffers.
// See the class comments for more information on allocation limits.
static constexpr size_t kDefaultLimit = cord_internal::kMaxFlatLength;
// kCustomLimit
//
// Maximum size for CreateWithCustomLimit() allocated buffers.
// Note that the effective capacity may be slightly less
// because of internal overhead of internal cord buffers.
static constexpr size_t kCustomLimit = 64U << 10;
// Constructors, Destructors and Assignment Operators
// Creates an empty CordBuffer.
CordBuffer() = default;
// Destroys this CordBuffer instance and, if not empty, releases any memory
// managed by this instance, invalidating previously returned references.
~CordBuffer();
// CordBuffer is move-only
CordBuffer(CordBuffer&& rhs) noexcept;
CordBuffer& operator=(CordBuffer&&) noexcept;
CordBuffer(const CordBuffer&) = delete;
CordBuffer& operator=(const CordBuffer&) = delete;
// CordBuffer::MaximumPayload()
//
// Returns the guaranteed maximum payload for a CordBuffer returned by the
// `CreateWithDefaultLimit()` method. While small, each internal buffer inside
// a Cord incurs an overhead to manage the length, type and reference count
// for the buffer managed inside the cord tree. Applications can use this
// method to get approximate number of buffers required for a given byte
// size, etc.
//
// For example:
// const size_t payload = absl::CordBuffer::MaximumPayload();
// const size_t buffer_count = (total_size + payload - 1) / payload;
// buffers.reserve(buffer_count);
static constexpr size_t MaximumPayload();
// Overload to the above `MaximumPayload()` except that it returns the
// maximum payload for a CordBuffer returned by the `CreateWithCustomLimit()`
// method given the provided `block_size`.
static constexpr size_t MaximumPayload(size_t block_size);
// CordBuffer::CreateWithDefaultLimit()
//
// Creates a CordBuffer instance of the desired `capacity`, capped at the
// default limit `kDefaultLimit`. The returned buffer has a guaranteed
// capacity of at least `min(kDefaultLimit, capacity)`. See the class comments
// for more information on buffer capacities and intended usage.
static CordBuffer CreateWithDefaultLimit(size_t capacity);
// CordBuffer::CreateWithCustomLimit()
//
// Creates a CordBuffer instance of the desired `capacity` rounded to an
// appropriate power of 2 size less than, or equal to `block_size`.
// Requires `block_size` to be a power of 2.
//
// If `capacity` is less than or equal to `kDefaultLimit`, then this method
// behaves identical to `CreateWithDefaultLimit`, which means that the caller
// is guaranteed to get a buffer of at least the requested capacity.
//
// If `capacity` is greater than or equal to `block_size`, then this method
// returns a buffer with an `allocated size` of `block_size` bytes. Otherwise,
// this methods returns a buffer with a suitable smaller power of 2 block size
// to satisfy the request. The actual size depends on a number of factors, and
// is typically (but not necessarily) the highest or second highest power of 2
// value less than or equal to `capacity`.
//
// The 'allocated size' includes a small amount of overhead required for
// internal state, which is currently 13 bytes on 64-bit platforms. For
// example: a buffer created with `block_size` and `capacity' set to 8KiB
// will have an allocated size of 8KiB, and an effective internal `capacity`
// of 8KiB - 13 = 8179 bytes.
//
// To demonstrate this in practice, let's assume we want to read data from
// somewhat larger files using approximately 64KiB buffers:
//
// absl::Cord ReadFromFile(int fd, size_t n) {
// absl::Cord cord;
// while (n > 0) {
// CordBuffer buffer = CordBuffer::CreateWithCustomLimit(64 << 10, n);
// absl::Span<char> data = buffer.available_up_to(n);
// ReadFileDataOrDie(fd, data.data(), data.size());
// buffer.IncreaseLengthBy(data.size());
// cord.Append(std::move(buffer));
// n -= data.size();
// }
// return cord;
// }
//
// If we'd use this function to read a file of 659KiB, we may get the
// following pattern of allocated cord buffer sizes:
//
// CreateWithCustomLimit(64KiB, 674816) --> ~64KiB (65523)
// CreateWithCustomLimit(64KiB, 674816) --> ~64KiB (65523)
// ...
// CreateWithCustomLimit(64KiB, 19586) --> ~16KiB (16371)
// CreateWithCustomLimit(64KiB, 3215) --> 3215 (at least 3215)
//
// The reason the method returns a 16K buffer instead of a roughly 19K buffer
// is to reduce memory overhead and fragmentation risks. Using carefully
// chosen power of 2 values reduces the entropy of allocated memory sizes.
//
// Additionally, let's assume we'd use the above function on files that are
// generally smaller than 64K. If we'd use 'precise' sized buffers for such
// files, than we'd get a very wide distribution of allocated memory sizes
// rounded to 4K page sizes, and we'd end up with a lot of unused capacity.
//
// In general, application should only use custom sizes if the data they are
// consuming or storing is expected to be many times the chosen block size,
// and be based on objective data and performance metrics. For example, a
// compress function may work faster and consume less CPU when using larger
// buffers. Such an application should pick a size offering a reasonable
// trade-off between expected data size, compute savings with larger buffers,
// and the cost or fragmentation effect of larger buffers.
// Applications must pick a reasonable spot on that curve, and make sure their
// data meets their expectations in size distributions such as "mostly large".
static CordBuffer CreateWithCustomLimit(size_t block_size, size_t capacity);
// CordBuffer::available()
//
// Returns the span delineating the available capacity in this buffer
// which is defined as `{ data() + length(), capacity() - length() }`.
absl::Span<char> available();
// CordBuffer::available_up_to()
//
// Returns the span delineating the available capacity in this buffer limited
// to `size` bytes. This is equivalent to `available().subspan(0, size)`.
absl::Span<char> available_up_to(size_t size);
// CordBuffer::data()
//
// Returns a non-null reference to the data managed by this instance.
// Applications are allowed to write up to `capacity` bytes of instance data.
// CordBuffer data is uninitialized by default. Reading data from an instance
// that has not yet been initialized will lead to undefined behavior.
char* data();
const char* data() const;
// CordBuffer::length()
//
// Returns the length of this instance. The default length of a CordBuffer is
// 0, indicating an 'empty' CordBuffer. Applications must specify the length
// of the data in a CordBuffer before adding it to a Cord.
size_t length() const;
// CordBuffer::capacity()
//
// Returns the capacity of this instance. All instances have a non-zero
// capacity: default and `moved from` instances have a small internal buffer.
size_t capacity() const;
// CordBuffer::IncreaseLengthBy()
//
// Increases the length of this buffer by the specified 'n' bytes.
// Applications must make sure all data in this buffer up to the new length
// has been initialized before adding a CordBuffer to a Cord: failure to do so
// will lead to undefined behavior. Requires `length() + n <= capacity()`.
// Typically, applications will use 'available_up_to()` to get a span of the
// desired capacity, and use `span.size()` to increase the length as in:
// absl::Span<char> span = buffer.available_up_to(desired);
// buffer.IncreaseLengthBy(span.size());
// memcpy(span.data(), src, span.size());
// etc...
void IncreaseLengthBy(size_t n);
// CordBuffer::SetLength()
//
// Sets the data length of this instance. Applications must make sure all data
// of the specified length has been initialized before adding a CordBuffer to
// a Cord: failure to do so will lead to undefined behavior.
// Setting the length to a small value or zero does not release any memory
// held by this CordBuffer instance. Requires `length <= capacity()`.
// Applications should preferably use the `IncreaseLengthBy()` method above
// in combination with the 'available()` or `available_up_to()` methods.
void SetLength(size_t length);
private:
// Make sure we don't accidentally over promise.
static_assert(kCustomLimit <= cord_internal::kMaxLargeFlatSize, "");
// Assume the cost of an 'uprounded' allocation to CeilPow2(size) versus
// the cost of allocating at least 1 extra flat <= 4KB:
// - Flat overhead = 13 bytes
// - Btree amortized cost / node =~ 13 bytes
// - 64 byte granularity of tcmalloc at 4K =~ 32 byte average
// CPU cost and efficiency requires we should at least 'save' something by
// splitting, as a poor man's measure, we say the slop needs to be
// at least double the cost offset to make it worth splitting: ~128 bytes.
static constexpr size_t kMaxPageSlop = 128;
// Overhead for allocation a flat.
static constexpr size_t kOverhead = cord_internal::kFlatOverhead;
using CordRepFlat = cord_internal::CordRepFlat;
// `Rep` is the internal data representation of a CordBuffer. The internal
// representation has an internal small size optimization similar to
// std::string (SSO).
struct Rep {
// Inline SSO size of a CordBuffer
static constexpr size_t kInlineCapacity = sizeof(intptr_t) * 2 - 1;
// Creates a default instance with kInlineCapacity.
Rep() : short_rep{} {}
// Creates an instance managing an allocated non zero CordRep.
explicit Rep(cord_internal::CordRepFlat* rep) : long_rep{rep} {
assert(rep != nullptr);
}
// Returns true if this instance manages the SSO internal buffer.
bool is_short() const {
constexpr size_t offset = offsetof(Short, raw_size);
return (reinterpret_cast<const char*>(this)[offset] & 1) != 0;
}
// Returns the available area of the internal SSO data
absl::Span<char> short_available() {
const size_t length = short_length();
return absl::Span<char>(short_rep.data + length,
kInlineCapacity - length);
}
// Returns the available area of the internal SSO data
absl::Span<char> long_available() const {
assert(!is_short());
const size_t length = long_rep.rep->length;
return absl::Span<char>(long_rep.rep->Data() + length,
long_rep.rep->Capacity() - length);
}
// Returns the length of the internal SSO data.
size_t short_length() const {
assert(is_short());
return static_cast<size_t>(short_rep.raw_size >> 1);
}
// Sets the length of the internal SSO data.
// Disregards any previously set CordRep instance.
void set_short_length(size_t length) {
short_rep.raw_size = static_cast<char>((length << 1) + 1);
}
// Adds `n` to the current short length.
void add_short_length(size_t n) {
assert(is_short());
short_rep.raw_size += static_cast<char>(n << 1);
}
// Returns reference to the internal SSO data buffer.
char* data() {
assert(is_short());
return short_rep.data;
}
const char* data() const {
assert(is_short());
return short_rep.data;
}
// Returns a pointer the external CordRep managed by this instance.
cord_internal::CordRepFlat* rep() const {
assert(!is_short());
return long_rep.rep;
}
// The internal representation takes advantage of the fact that allocated
// memory is always on an even address, and uses the least significant bit
// of the first or last byte (depending on endianness) as the inline size
// indicator overlapping with the least significant byte of the CordRep*.
#if defined(ABSL_IS_BIG_ENDIAN)
struct Long {
explicit Long(cord_internal::CordRepFlat* rep_arg) : rep(rep_arg) {}
void* padding;
cord_internal::CordRepFlat* rep;
};
struct Short {
char data[sizeof(Long) - 1];
char raw_size = 1;
};
#else
struct Long {
explicit Long(cord_internal::CordRepFlat* rep_arg) : rep(rep_arg) {}
cord_internal::CordRepFlat* rep;
void* padding;
};
struct Short {
char raw_size = 1;
char data[sizeof(Long) - 1];
};
#endif
union {
Long long_rep;
Short short_rep;
};
};
// Power2 functions
static bool IsPow2(size_t size) { return absl::has_single_bit(size); }
static size_t Log2Floor(size_t size) {
return static_cast<size_t>(absl::bit_width(size) - 1);
}
static size_t Log2Ceil(size_t size) {
return static_cast<size_t>(absl::bit_width(size - 1));
}
// Implementation of `CreateWithCustomLimit()`.
// This implementation allows for future memory allocation hints to
// be passed down into the CordRepFlat allocation function.
template <typename... AllocationHints>
static CordBuffer CreateWithCustomLimitImpl(size_t block_size,
size_t capacity,
AllocationHints... hints);
// Consumes the value contained in this instance and resets the instance.
// This method returns a non-null Cordrep* if the current instances manages a
// CordRep*, and resets the instance to an empty SSO instance. If the current
// instance is an SSO instance, then this method returns nullptr and sets
// `short_value` to the inlined data value. In either case, the current
// instance length is reset to zero.
// This method is intended to be used by Cord internal functions only.
cord_internal::CordRep* ConsumeValue(absl::string_view& short_value) {
cord_internal::CordRep* rep = nullptr;
if (rep_.is_short()) {
short_value = absl::string_view(rep_.data(), rep_.short_length());
} else {
rep = rep_.rep();
}
rep_.set_short_length(0);
return rep;
}
// Internal constructor.
explicit CordBuffer(cord_internal::CordRepFlat* rep) : rep_(rep) {
assert(rep != nullptr);
}
Rep rep_;
friend class Cord;
friend class CordBufferTestPeer;
};
inline constexpr size_t CordBuffer::MaximumPayload() {
return cord_internal::kMaxFlatLength;
}
inline constexpr size_t CordBuffer::MaximumPayload(size_t block_size) {
return (std::min)(kCustomLimit, block_size) - cord_internal::kFlatOverhead;
}
inline CordBuffer CordBuffer::CreateWithDefaultLimit(size_t capacity) {
if (capacity > Rep::kInlineCapacity) {
auto* rep = cord_internal::CordRepFlat::New(capacity);
rep->length = 0;
return CordBuffer(rep);
}
return CordBuffer();
}
template <typename... AllocationHints>
inline CordBuffer CordBuffer::CreateWithCustomLimitImpl(
size_t block_size, size_t capacity, AllocationHints... hints) {
assert(IsPow2(block_size));
capacity = (std::min)(capacity, kCustomLimit);
block_size = (std::min)(block_size, kCustomLimit);
if (capacity + kOverhead >= block_size) {
capacity = block_size;
} else if (capacity <= kDefaultLimit) {
capacity = capacity + kOverhead;
} else if (!IsPow2(capacity)) {
// Check if rounded up to next power 2 is a good enough fit
// with limited waste making it an acceptable direct fit.
const size_t rounded_up = size_t{1} << Log2Ceil(capacity);
const size_t slop = rounded_up - capacity;
if (slop >= kOverhead && slop <= kMaxPageSlop + kOverhead) {
capacity = rounded_up;
} else {
// Round down to highest power of 2 <= capacity.
// Consider a more aggressive step down if that may reduce the
// risk of fragmentation where 'people are holding it wrong'.
const size_t rounded_down = size_t{1} << Log2Floor(capacity);
capacity = rounded_down;
}
}
const size_t length = capacity - kOverhead;
auto* rep = CordRepFlat::New(CordRepFlat::Large(), length, hints...);
rep->length = 0;
return CordBuffer(rep);
}
inline CordBuffer CordBuffer::CreateWithCustomLimit(size_t block_size,
size_t capacity) {
return CreateWithCustomLimitImpl(block_size, capacity);
}
inline CordBuffer::~CordBuffer() {
if (!rep_.is_short()) {
cord_internal::CordRepFlat::Delete(rep_.rep());
}
}
inline CordBuffer::CordBuffer(CordBuffer&& rhs) noexcept : rep_(rhs.rep_) {
rhs.rep_.set_short_length(0);
}
inline CordBuffer& CordBuffer::operator=(CordBuffer&& rhs) noexcept {
if (!rep_.is_short()) cord_internal::CordRepFlat::Delete(rep_.rep());
rep_ = rhs.rep_;
rhs.rep_.set_short_length(0);
return *this;
}
inline absl::Span<char> CordBuffer::available() {
return rep_.is_short() ? rep_.short_available() : rep_.long_available();
}
inline absl::Span<char> CordBuffer::available_up_to(size_t size) {
return available().subspan(0, size);
}
inline char* CordBuffer::data() {
return rep_.is_short() ? rep_.data() : rep_.rep()->Data();
}
inline const char* CordBuffer::data() const {
return rep_.is_short() ? rep_.data() : rep_.rep()->Data();
}
inline size_t CordBuffer::capacity() const {
return rep_.is_short() ? Rep::kInlineCapacity : rep_.rep()->Capacity();
}
inline size_t CordBuffer::length() const {
return rep_.is_short() ? rep_.short_length() : rep_.rep()->length;
}
inline void CordBuffer::SetLength(size_t length) {
ABSL_HARDENING_ASSERT(length <= capacity());
if (rep_.is_short()) {
rep_.set_short_length(length);
} else {
rep_.rep()->length = length;
}
}
inline void CordBuffer::IncreaseLengthBy(size_t n) {
ABSL_HARDENING_ASSERT(n <= capacity() && length() + n <= capacity());
if (rep_.is_short()) {
rep_.add_short_length(n);
} else {
rep_.rep()->length += n;
}
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORD_BUFFER_H_

View file

@ -0,0 +1,322 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/cord_buffer.h"
#include <algorithm>
#include <cstring>
#include <limits>
#include <string>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
using testing::Eq;
using testing::Ge;
using testing::Le;
using testing::Ne;
namespace absl {
ABSL_NAMESPACE_BEGIN
class CordBufferTestPeer {
public:
static cord_internal::CordRep* ConsumeValue(CordBuffer& buffer,
absl::string_view& short_value) {
return buffer.ConsumeValue(short_value);
}
};
namespace {
using ::absl::cordrep_testing::CordToString;
constexpr size_t kInlinedSize = sizeof(CordBuffer) - 1;
constexpr size_t kDefaultLimit = CordBuffer::kDefaultLimit;
constexpr size_t kCustomLimit = CordBuffer::kCustomLimit;
constexpr size_t kMaxFlatSize = cord_internal::kMaxFlatSize;
constexpr size_t kMaxFlatLength = cord_internal::kMaxFlatLength;
constexpr size_t kFlatOverhead = cord_internal::kFlatOverhead;
constexpr size_t k8KiB = 8 << 10;
constexpr size_t k16KiB = 16 << 10;
constexpr size_t k64KiB = 64 << 10;
constexpr size_t k1MB = 1 << 20;
class CordBufferTest : public testing::TestWithParam<size_t> {};
INSTANTIATE_TEST_SUITE_P(MediumSize, CordBufferTest,
testing::Values(1, kInlinedSize - 1, kInlinedSize,
kInlinedSize + 1, kDefaultLimit - 1,
kDefaultLimit));
TEST_P(CordBufferTest, MaximumPayload) {
EXPECT_THAT(CordBuffer::MaximumPayload(), Eq(kMaxFlatLength));
EXPECT_THAT(CordBuffer::MaximumPayload(512), Eq(512 - kFlatOverhead));
EXPECT_THAT(CordBuffer::MaximumPayload(k64KiB), Eq(k64KiB - kFlatOverhead));
EXPECT_THAT(CordBuffer::MaximumPayload(k1MB), Eq(k64KiB - kFlatOverhead));
}
TEST(CordBufferTest, ConstructDefault) {
CordBuffer buffer;
EXPECT_THAT(buffer.capacity(), Eq(sizeof(CordBuffer) - 1));
EXPECT_THAT(buffer.length(), Eq(0));
EXPECT_THAT(buffer.data(), Ne(nullptr));
EXPECT_THAT(buffer.available().data(), Eq(buffer.data()));
EXPECT_THAT(buffer.available().size(), Eq(buffer.capacity()));
memset(buffer.data(), 0xCD, buffer.capacity());
}
TEST(CordBufferTest, CreateSsoWithDefaultLimit) {
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(3);
EXPECT_THAT(buffer.capacity(), Ge(3));
EXPECT_THAT(buffer.capacity(), Le(sizeof(CordBuffer)));
EXPECT_THAT(buffer.length(), Eq(0));
memset(buffer.data(), 0xCD, buffer.capacity());
memcpy(buffer.data(), "Abc", 3);
buffer.SetLength(3);
EXPECT_THAT(buffer.length(), Eq(3));
absl::string_view short_value;
EXPECT_THAT(CordBufferTestPeer::ConsumeValue(buffer, short_value),
Eq(nullptr));
EXPECT_THAT(absl::string_view(buffer.data(), 3), Eq("Abc"));
EXPECT_THAT(short_value, Eq("Abc"));
}
TEST_P(CordBufferTest, Available) {
const size_t requested = GetParam();
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested);
EXPECT_THAT(buffer.available().data(), Eq(buffer.data()));
EXPECT_THAT(buffer.available().size(), Eq(buffer.capacity()));
buffer.SetLength(2);
EXPECT_THAT(buffer.available().data(), Eq(buffer.data() + 2));
EXPECT_THAT(buffer.available().size(), Eq(buffer.capacity() - 2));
}
TEST_P(CordBufferTest, IncreaseLengthBy) {
const size_t requested = GetParam();
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested);
buffer.IncreaseLengthBy(2);
EXPECT_THAT(buffer.length(), Eq(2));
buffer.IncreaseLengthBy(5);
EXPECT_THAT(buffer.length(), Eq(7));
}
TEST_P(CordBufferTest, AvailableUpTo) {
const size_t requested = GetParam();
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested);
size_t expected_up_to = std::min<size_t>(3, buffer.capacity());
EXPECT_THAT(buffer.available_up_to(3).data(), Eq(buffer.data()));
EXPECT_THAT(buffer.available_up_to(3).size(), Eq(expected_up_to));
buffer.SetLength(2);
expected_up_to = std::min<size_t>(3, buffer.capacity() - 2);
EXPECT_THAT(buffer.available_up_to(3).data(), Eq(buffer.data() + 2));
EXPECT_THAT(buffer.available_up_to(3).size(), Eq(expected_up_to));
}
// Returns the maximum capacity for a given block_size and requested size.
size_t MaxCapacityFor(size_t block_size, size_t requested) {
requested = (std::min)(requested, cord_internal::kMaxLargeFlatSize);
// Maximum returned size is always capped at block_size - kFlatOverhead.
return block_size - kFlatOverhead;
}
TEST_P(CordBufferTest, CreateWithDefaultLimit) {
const size_t requested = GetParam();
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested);
EXPECT_THAT(buffer.capacity(), Ge(requested));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(kMaxFlatSize, requested)));
EXPECT_THAT(buffer.length(), Eq(0));
memset(buffer.data(), 0xCD, buffer.capacity());
std::string data(requested - 1, 'x');
memcpy(buffer.data(), data.c_str(), requested);
buffer.SetLength(requested);
EXPECT_THAT(buffer.length(), Eq(requested));
EXPECT_THAT(absl::string_view(buffer.data()), Eq(data));
}
TEST(CordBufferTest, CreateWithDefaultLimitAskingFor2GB) {
constexpr size_t k2GiB = 1U << 31;
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(k2GiB);
// Expect to never be awarded more than a reasonable memory size, even in
// cases where a (debug) memory allocator may grant us somewhat more memory
// than `kDefaultLimit` which should be no more than `2 * kDefaultLimit`
EXPECT_THAT(buffer.capacity(), Le(2 * CordBuffer::kDefaultLimit));
EXPECT_THAT(buffer.length(), Eq(0));
EXPECT_THAT(buffer.data(), Ne(nullptr));
memset(buffer.data(), 0xCD, buffer.capacity());
}
TEST_P(CordBufferTest, MoveConstruct) {
const size_t requested = GetParam();
CordBuffer from = CordBuffer::CreateWithDefaultLimit(requested);
const size_t capacity = from.capacity();
memcpy(from.data(), "Abc", 4);
from.SetLength(4);
CordBuffer to(std::move(from));
EXPECT_THAT(to.capacity(), Eq(capacity));
EXPECT_THAT(to.length(), Eq(4));
EXPECT_THAT(absl::string_view(to.data()), Eq("Abc"));
EXPECT_THAT(from.length(), Eq(0)); // NOLINT
}
TEST_P(CordBufferTest, MoveAssign) {
const size_t requested = GetParam();
CordBuffer from = CordBuffer::CreateWithDefaultLimit(requested);
const size_t capacity = from.capacity();
memcpy(from.data(), "Abc", 4);
from.SetLength(4);
CordBuffer to;
to = std::move(from);
EXPECT_THAT(to.capacity(), Eq(capacity));
EXPECT_THAT(to.length(), Eq(4));
EXPECT_THAT(absl::string_view(to.data()), Eq("Abc"));
EXPECT_THAT(from.length(), Eq(0)); // NOLINT
}
TEST_P(CordBufferTest, ConsumeValue) {
const size_t requested = GetParam();
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested);
memcpy(buffer.data(), "Abc", 4);
buffer.SetLength(3);
absl::string_view short_value;
if (cord_internal::CordRep* rep =
CordBufferTestPeer::ConsumeValue(buffer, short_value)) {
EXPECT_THAT(CordToString(rep), Eq("Abc"));
cord_internal::CordRep::Unref(rep);
} else {
EXPECT_THAT(short_value, Eq("Abc"));
}
EXPECT_THAT(buffer.length(), Eq(0));
}
TEST_P(CordBufferTest, CreateWithCustomLimitWithinDefaultLimit) {
const size_t requested = GetParam();
CordBuffer buffer =
CordBuffer::CreateWithCustomLimit(kMaxFlatSize, requested);
EXPECT_THAT(buffer.capacity(), Ge(requested));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(kMaxFlatSize, requested)));
EXPECT_THAT(buffer.length(), Eq(0));
memset(buffer.data(), 0xCD, buffer.capacity());
std::string data(requested - 1, 'x');
memcpy(buffer.data(), data.c_str(), requested);
buffer.SetLength(requested);
EXPECT_THAT(buffer.length(), Eq(requested));
EXPECT_THAT(absl::string_view(buffer.data()), Eq(data));
}
TEST(CordLargeBufferTest, CreateAtOrBelowDefaultLimit) {
CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k64KiB, kDefaultLimit);
EXPECT_THAT(buffer.capacity(), Ge(kDefaultLimit));
EXPECT_THAT(buffer.capacity(),
Le(MaxCapacityFor(kMaxFlatSize, kDefaultLimit)));
buffer = CordBuffer::CreateWithCustomLimit(k64KiB, 3178);
EXPECT_THAT(buffer.capacity(), Ge(3178));
}
TEST(CordLargeBufferTest, CreateWithCustomLimit) {
ASSERT_THAT((kMaxFlatSize & (kMaxFlatSize - 1)) == 0, "Must be power of 2");
for (size_t size = kMaxFlatSize; size <= kCustomLimit; size *= 2) {
CordBuffer buffer = CordBuffer::CreateWithCustomLimit(size, size);
size_t expected = size - kFlatOverhead;
ASSERT_THAT(buffer.capacity(), Ge(expected));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(size, expected)));
}
}
TEST(CordLargeBufferTest, CreateWithTooLargeLimit) {
CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k64KiB, k1MB);
ASSERT_THAT(buffer.capacity(), Ge(k64KiB - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(k64KiB, k1MB)));
}
TEST(CordLargeBufferTest, CreateWithHugeValueForOverFlowHardening) {
for (size_t dist_from_max = 0; dist_from_max <= 32; ++dist_from_max) {
size_t capacity = std::numeric_limits<size_t>::max() - dist_from_max;
CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(capacity);
ASSERT_THAT(buffer.capacity(), Ge(kDefaultLimit));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(kMaxFlatSize, capacity)));
for (size_t limit = kMaxFlatSize; limit <= kCustomLimit; limit *= 2) {
CordBuffer buffer = CordBuffer::CreateWithCustomLimit(limit, capacity);
ASSERT_THAT(buffer.capacity(), Ge(limit - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(limit, capacity)));
}
}
}
TEST(CordLargeBufferTest, CreateWithSmallLimit) {
CordBuffer buffer = CordBuffer::CreateWithCustomLimit(512, 1024);
ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 1024)));
// Ask for precise block size, should return size - kOverhead
buffer = CordBuffer::CreateWithCustomLimit(512, 512);
ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 512)));
// Corner case: 511 < block_size, but 511 + kOverhead is above
buffer = CordBuffer::CreateWithCustomLimit(512, 511);
ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 511)));
// Corner case: 498 + kOverhead < block_size
buffer = CordBuffer::CreateWithCustomLimit(512, 498);
ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 498)));
}
TEST(CordLargeBufferTest, CreateWasteFull) {
// 15 KiB gets rounded down to next pow2 value.
const size_t requested = (15 << 10);
CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k16KiB, requested);
ASSERT_THAT(buffer.capacity(), Ge(k8KiB - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(k8KiB, requested)));
}
TEST(CordLargeBufferTest, CreateSmallSlop) {
const size_t requested = k16KiB - 2 * kFlatOverhead;
CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k16KiB, requested);
ASSERT_THAT(buffer.capacity(), Ge(k16KiB - kFlatOverhead));
EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(k16KiB, requested)));
}
} // namespace
ABSL_NAMESPACE_END
} // namespace absl

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,122 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_
#define ABSL_STRINGS_CORD_TEST_HELPERS_H_
#include <cstdint>
#include <iostream>
#include <string>
#include "absl/base/config.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// Cord sizes relevant for testing
enum class TestCordSize {
// An empty value
kEmpty = 0,
// An inlined string value
kInlined = cord_internal::kMaxInline / 2 + 1,
// 'Well known' SSO lengths (excluding terminating zero).
// libstdcxx has a maximum SSO of 15, libc++ has a maximum SSO of 22.
kStringSso1 = 15,
kStringSso2 = 22,
// A string value which is too large to fit in inlined data, but small enough
// such that Cord prefers copying the value if possible, i.e.: not stealing
// std::string inputs, or referencing existing CordReps on Append, etc.
kSmall = cord_internal::kMaxBytesToCopy / 2 + 1,
// A string value large enough that Cord prefers to reference or steal from
// existing inputs rather than copying contents of the input.
kMedium = cord_internal::kMaxFlatLength / 2 + 1,
// A string value large enough to cause it to be stored in multiple flats.
kLarge = cord_internal::kMaxFlatLength * 4
};
// To string helper
inline absl::string_view ToString(TestCordSize size) {
switch (size) {
case TestCordSize::kEmpty:
return "Empty";
case TestCordSize::kInlined:
return "Inlined";
case TestCordSize::kSmall:
return "Small";
case TestCordSize::kStringSso1:
return "StringSso1";
case TestCordSize::kStringSso2:
return "StringSso2";
case TestCordSize::kMedium:
return "Medium";
case TestCordSize::kLarge:
return "Large";
}
return "???";
}
// Returns the length matching the specified size
inline size_t Length(TestCordSize size) { return static_cast<size_t>(size); }
// Stream output helper
inline std::ostream& operator<<(std::ostream& stream, TestCordSize size) {
return stream << ToString(size);
}
// Creates a multi-segment Cord from an iterable container of strings. The
// resulting Cord is guaranteed to have one segment for every string in the
// container. This allows code to be unit tested with multi-segment Cord
// inputs.
//
// Example:
//
// absl::Cord c = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"});
// EXPECT_FALSE(c.GetFlat(&unused));
//
// The mechanism by which this Cord is created is an implementation detail. Any
// implementation that produces a multi-segment Cord may produce a flat Cord in
// the future as new optimizations are added to the Cord class.
// MakeFragmentedCord will, however, always be updated to return a multi-segment
// Cord.
template <typename Container>
Cord MakeFragmentedCord(const Container& c) {
Cord result;
for (const auto& s : c) {
auto* external = new std::string(s);
Cord tmp = absl::MakeCordFromExternal(
*external, [external](absl::string_view) { delete external; });
tmp.Prepend(result);
result = tmp;
}
return result;
}
inline Cord MakeFragmentedCord(std::initializer_list<absl::string_view> list) {
return MakeFragmentedCord<std::initializer_list<absl::string_view>>(list);
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORD_TEST_HELPERS_H_

View file

@ -0,0 +1,468 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstddef>
#include <cstring>
#include <ostream>
#include <string>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/cord_test_helpers.h"
#include "absl/strings/cordz_test_helpers.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_sample_token.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
using testing::Eq;
using testing::AnyOf;
namespace absl {
ABSL_NAMESPACE_BEGIN
using cord_internal::CordzInfo;
using cord_internal::CordzSampleToken;
using cord_internal::CordzStatistics;
using cord_internal::CordzUpdateTracker;
using Method = CordzUpdateTracker::MethodIdentifier;
// Do not print cord contents, we only care about 'size' perhaps.
// Note that this method must be inside the named namespace.
inline void PrintTo(const Cord& cord, std::ostream* s) {
if (s) *s << "Cord[" << cord.size() << "]";
}
namespace {
auto constexpr kMaxInline = cord_internal::kMaxInline;
// Returns a string_view value of the specified length
// We do this to avoid 'consuming' large strings in Cord by default.
absl::string_view MakeString(size_t size) {
thread_local std::string str;
str = std::string(size, '.');
return str;
}
absl::string_view MakeString(TestCordSize size) {
return MakeString(Length(size));
}
// Returns a cord with a sampled method of kAppendString.
absl::Cord MakeAppendStringCord(TestCordSize size) {
CordzSamplingIntervalHelper always(1);
absl::Cord cord;
cord.Append(MakeString(size));
return cord;
}
std::string TestParamToString(::testing::TestParamInfo<TestCordSize> size) {
return absl::StrCat("On", ToString(size.param), "Cord");
}
class CordzUpdateTest : public testing::TestWithParam<TestCordSize> {
public:
Cord& cord() { return cord_; }
Method InitialOr(Method method) const {
return (GetParam() > TestCordSize::kInlined) ? Method::kConstructorString
: method;
}
private:
CordzSamplingIntervalHelper sample_every_{1};
Cord cord_{MakeString(GetParam())};
};
template <typename T>
std::string ParamToString(::testing::TestParamInfo<T> param) {
return std::string(ToString(param.param));
}
INSTANTIATE_TEST_SUITE_P(WithParam, CordzUpdateTest,
testing::Values(TestCordSize::kEmpty,
TestCordSize::kInlined,
TestCordSize::kLarge),
TestParamToString);
class CordzStringTest : public testing::TestWithParam<TestCordSize> {
private:
CordzSamplingIntervalHelper sample_every_{1};
};
INSTANTIATE_TEST_SUITE_P(WithParam, CordzStringTest,
testing::Values(TestCordSize::kInlined,
TestCordSize::kStringSso1,
TestCordSize::kStringSso2,
TestCordSize::kSmall,
TestCordSize::kLarge),
ParamToString<TestCordSize>);
TEST(CordzTest, ConstructSmallArray) {
CordzSamplingIntervalHelper sample_every{1};
Cord cord(MakeString(TestCordSize::kSmall));
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
}
TEST(CordzTest, ConstructLargeArray) {
CordzSamplingIntervalHelper sample_every{1};
Cord cord(MakeString(TestCordSize::kLarge));
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
}
TEST_P(CordzStringTest, ConstructString) {
CordzSamplingIntervalHelper sample_every{1};
Cord cord(std::string(Length(GetParam()), '.'));
if (Length(GetParam()) > kMaxInline) {
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
}
}
TEST(CordzTest, CopyConstructFromUnsampled) {
CordzSamplingIntervalHelper sample_every{1};
Cord src = UnsampledCord(MakeString(TestCordSize::kLarge));
Cord cord(src);
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
}
TEST(CordzTest, CopyConstructFromSampled) {
CordzSamplingIntervalHelper sample_never{99999};
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
Cord cord(src);
ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorCord));
CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics();
EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString));
EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1));
}
TEST(CordzTest, MoveConstruct) {
CordzSamplingIntervalHelper sample_every{1};
Cord src(MakeString(TestCordSize::kLarge));
Cord cord(std::move(src));
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
}
TEST_P(CordzUpdateTest, AssignUnsampledCord) {
Cord src = UnsampledCord(MakeString(TestCordSize::kLarge));
const CordzInfo* info = GetCordzInfoForTesting(cord());
cord() = src;
EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr));
EXPECT_FALSE(CordzInfoIsListed(info));
}
TEST_P(CordzUpdateTest, AssignSampledCord) {
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
cord() = src;
ASSERT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignCord));
CordzStatistics stats = GetCordzInfoForTesting(cord())->GetCordzStatistics();
EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString));
EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1));
EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0));
}
TEST(CordzUpdateTest, AssignSampledCordToInlined) {
CordzSamplingIntervalHelper sample_never{99999};
Cord cord;
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
cord = src;
ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord));
CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics();
EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString));
EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1));
EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0));
}
TEST(CordzUpdateTest, AssignSampledCordToUnsampledCord) {
CordzSamplingIntervalHelper sample_never{99999};
Cord cord = UnsampledCord(MakeString(TestCordSize::kLarge));
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
cord = src;
ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord));
CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics();
EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString));
EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1));
EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0));
}
TEST(CordzUpdateTest, AssignUnsampledCordToSampledCordWithoutSampling) {
CordzSamplingIntervalHelper sample_never{99999};
Cord cord = MakeAppendStringCord(TestCordSize::kLarge);
const CordzInfo* info = GetCordzInfoForTesting(cord);
Cord src = UnsampledCord(MakeString(TestCordSize::kLarge));
cord = src;
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
EXPECT_FALSE(CordzInfoIsListed(info));
}
TEST(CordzUpdateTest, AssignUnsampledCordToSampledCordWithSampling) {
CordzSamplingIntervalHelper sample_every{1};
Cord cord = MakeAppendStringCord(TestCordSize::kLarge);
const CordzInfo* info = GetCordzInfoForTesting(cord);
Cord src = UnsampledCord(MakeString(TestCordSize::kLarge));
cord = src;
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
EXPECT_FALSE(CordzInfoIsListed(info));
}
TEST(CordzUpdateTest, AssignSampledCordToSampledCord) {
CordzSamplingIntervalHelper sample_every{1};
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
Cord cord(MakeString(TestCordSize::kLarge));
cord = src;
ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord));
CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics();
EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString));
EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1));
EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0));
}
TEST(CordzUpdateTest, AssignUnsampledCordToSampledCord) {
CordzSamplingIntervalHelper sample_every{1};
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
Cord cord(MakeString(TestCordSize::kLarge));
cord = src;
ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord));
CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics();
EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString));
EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1));
EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0));
}
TEST(CordzTest, AssignInlinedCordToSampledCord) {
CordzSampleToken token;
CordzSamplingIntervalHelper sample_every{1};
Cord cord(MakeString(TestCordSize::kLarge));
const CordzInfo* info = GetCordzInfoForTesting(cord);
Cord src = UnsampledCord(MakeString(TestCordSize::kInlined));
cord = src;
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
EXPECT_FALSE(CordzInfoIsListed(info));
}
TEST(CordzUpdateTest, MoveAssignCord) {
CordzSamplingIntervalHelper sample_every{1};
Cord cord;
Cord src(MakeString(TestCordSize::kLarge));
cord = std::move(src);
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
}
TEST_P(CordzUpdateTest, AssignLargeArray) {
cord() = MakeString(TestCordSize::kSmall);
EXPECT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignString));
}
TEST_P(CordzUpdateTest, AssignSmallArray) {
cord() = MakeString(TestCordSize::kSmall);
EXPECT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignString));
}
TEST_P(CordzUpdateTest, AssignInlinedArray) {
cord() = MakeString(TestCordSize::kInlined);
EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr));
}
TEST_P(CordzStringTest, AssignStringToInlined) {
Cord cord;
cord = std::string(Length(GetParam()), '.');
if (Length(GetParam()) > kMaxInline) {
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAssignString));
}
}
TEST_P(CordzStringTest, AssignStringToCord) {
Cord cord(MakeString(TestCordSize::kLarge));
cord = std::string(Length(GetParam()), '.');
if (Length(GetParam()) > kMaxInline) {
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
EXPECT_THAT(cord, CordzMethodCountEq(Method::kAssignString, 1));
}
}
TEST_P(CordzUpdateTest, AssignInlinedString) {
cord() = std::string(Length(TestCordSize::kInlined), '.');
EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr));
}
TEST_P(CordzUpdateTest, AppendCord) {
Cord src = UnsampledCord(MakeString(TestCordSize::kLarge));
cord().Append(src);
EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendCord)));
}
TEST_P(CordzUpdateTest, MoveAppendCord) {
cord().Append(UnsampledCord(MakeString(TestCordSize::kLarge)));
EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendCord)));
}
TEST_P(CordzUpdateTest, AppendSmallArray) {
cord().Append(MakeString(TestCordSize::kSmall));
EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendString)));
}
TEST_P(CordzUpdateTest, AppendLargeArray) {
cord().Append(MakeString(TestCordSize::kLarge));
EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendString)));
}
TEST_P(CordzStringTest, AppendStringToEmpty) {
Cord cord;
cord.Append(std::string(Length(GetParam()), '.'));
if (Length(GetParam()) > kMaxInline) {
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAppendString));
}
}
TEST_P(CordzStringTest, AppendStringToInlined) {
Cord cord(MakeString(TestCordSize::kInlined));
cord.Append(std::string(Length(GetParam()), '.'));
if (Length(TestCordSize::kInlined) + Length(GetParam()) > kMaxInline) {
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAppendString));
}
}
TEST_P(CordzStringTest, AppendStringToCord) {
Cord cord(MakeString(TestCordSize::kLarge));
cord.Append(std::string(Length(GetParam()), '.'));
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
EXPECT_THAT(cord, CordzMethodCountEq(Method::kAppendString, 1));
}
TEST(CordzTest, MakeCordFromExternal) {
CordzSamplingIntervalHelper sample_every{1};
Cord cord = MakeCordFromExternal("Hello world", [](absl::string_view) {});
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kMakeCordFromExternal));
}
TEST(CordzTest, MakeCordFromEmptyExternal) {
CordzSamplingIntervalHelper sample_every{1};
Cord cord = MakeCordFromExternal({}, [](absl::string_view) {});
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
}
TEST_P(CordzUpdateTest, PrependCord) {
Cord src = UnsampledCord(MakeString(TestCordSize::kLarge));
cord().Prepend(src);
EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependCord)));
}
TEST_P(CordzUpdateTest, PrependSmallArray) {
cord().Prepend(MakeString(TestCordSize::kSmall));
EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependString)));
}
TEST_P(CordzUpdateTest, PrependLargeArray) {
cord().Prepend(MakeString(TestCordSize::kLarge));
EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependString)));
}
TEST_P(CordzStringTest, PrependStringToEmpty) {
Cord cord;
cord.Prepend(std::string(Length(GetParam()), '.'));
if (Length(GetParam()) > kMaxInline) {
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kPrependString));
}
}
TEST_P(CordzStringTest, PrependStringToInlined) {
Cord cord(MakeString(TestCordSize::kInlined));
cord.Prepend(std::string(Length(GetParam()), '.'));
if (Length(TestCordSize::kInlined) + Length(GetParam()) > kMaxInline) {
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kPrependString));
}
}
TEST_P(CordzStringTest, PrependStringToCord) {
Cord cord(MakeString(TestCordSize::kLarge));
cord.Prepend(std::string(Length(GetParam()), '.'));
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
EXPECT_THAT(cord, CordzMethodCountEq(Method::kPrependString, 1));
}
TEST(CordzTest, RemovePrefix) {
CordzSamplingIntervalHelper sample_every(1);
Cord cord(MakeString(TestCordSize::kLarge));
// Half the cord
cord.RemovePrefix(cord.size() / 2);
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemovePrefix, 1));
// TODO(mvels): RemovePrefix does not reset to inlined, except if empty?
cord.RemovePrefix(cord.size() - kMaxInline);
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemovePrefix, 2));
cord.RemovePrefix(cord.size());
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
}
TEST(CordzTest, RemoveSuffix) {
CordzSamplingIntervalHelper sample_every(1);
Cord cord(MakeString(TestCordSize::kLarge));
// Half the cord
cord.RemoveSuffix(cord.size() / 2);
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemoveSuffix, 1));
// TODO(mvels): RemoveSuffix does not reset to inlined, except if empty?
cord.RemoveSuffix(cord.size() - kMaxInline);
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString));
EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemoveSuffix, 2));
cord.RemoveSuffix(cord.size());
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
}
TEST(CordzTest, SubCordFromUnsampledCord) {
CordzSamplingIntervalHelper sample_every{1};
Cord src = UnsampledCord(MakeString(TestCordSize::kLarge));
Cord cord = src.Subcord(10, src.size() / 2);
EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr));
}
TEST(CordzTest, SubCordFromSampledCord) {
CordzSamplingIntervalHelper sample_never{99999};
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
Cord cord = src.Subcord(10, src.size() / 2);
ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kSubCord));
CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics();
EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString));
EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1));
}
TEST(CordzTest, SmallSubCord) {
CordzSamplingIntervalHelper sample_never{99999};
Cord src = MakeAppendStringCord(TestCordSize::kLarge);
Cord cord = src.Subcord(10, kMaxInline + 1);
EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kSubCord));
}
} // namespace
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_INTERNAL_CORDZ_ENABLED

View file

@ -0,0 +1,153 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_CORDZ_TEST_HELPERS_H_
#define ABSL_STRINGS_CORDZ_TEST_HELPERS_H_
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/macros.h"
#include "absl/base/nullability.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_sample_token.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/strings/str_cat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// Returns the CordzInfo for the cord, or nullptr if the cord is not sampled.
inline absl::Nullable<const cord_internal::CordzInfo*> GetCordzInfoForTesting(
const Cord& cord) {
if (!cord.contents_.is_tree()) return nullptr;
return cord.contents_.cordz_info();
}
// Returns true if the provided cordz_info is in the list of sampled cords.
inline bool CordzInfoIsListed(
absl::Nonnull<const cord_internal::CordzInfo*> cordz_info,
cord_internal::CordzSampleToken token = {}) {
for (const cord_internal::CordzInfo& info : token) {
if (cordz_info == &info) return true;
}
return false;
}
// Matcher on Cord that verifies all of:
// - the cord is sampled
// - the CordzInfo of the cord is listed / discoverable.
// - the reported CordzStatistics match the cord's actual properties
// - the cord has an (initial) UpdateTracker count of 1 for `method`
MATCHER_P(HasValidCordzInfoOf, method, "CordzInfo matches cord") {
const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg);
if (cord_info == nullptr) {
*result_listener << "cord is not sampled";
return false;
}
if (!CordzInfoIsListed(cord_info)) {
*result_listener << "cord is sampled, but not listed";
return false;
}
cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics();
if (stat.size != arg.size()) {
*result_listener << "cordz size " << stat.size
<< " does not match cord size " << arg.size();
return false;
}
if (stat.update_tracker.Value(method) != 1) {
*result_listener << "Expected method count 1 for " << method << ", found "
<< stat.update_tracker.Value(method);
return false;
}
return true;
}
// Matcher on Cord that verifies that the cord is sampled and that the CordzInfo
// update tracker has 'method' with a call count of 'n'
MATCHER_P2(CordzMethodCountEq, method, n,
absl::StrCat("CordzInfo method count equals ", n)) {
const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg);
if (cord_info == nullptr) {
*result_listener << "cord is not sampled";
return false;
}
cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics();
if (stat.update_tracker.Value(method) != n) {
*result_listener << "Expected method count " << n << " for " << method
<< ", found " << stat.update_tracker.Value(method);
return false;
}
return true;
}
// Cordz will only update with a new rate once the previously scheduled event
// has fired. When we disable Cordz, a long delay takes place where we won't
// consider profiling new Cords. CordzSampleIntervalHelper will burn through
// that interval and allow for testing that assumes that the average sampling
// interval is a particular value.
class CordzSamplingIntervalHelper {
public:
explicit CordzSamplingIntervalHelper(int32_t interval)
: orig_mean_interval_(absl::cord_internal::get_cordz_mean_interval()) {
absl::cord_internal::set_cordz_mean_interval(interval);
absl::cord_internal::cordz_set_next_sample_for_testing(interval);
}
~CordzSamplingIntervalHelper() {
absl::cord_internal::set_cordz_mean_interval(orig_mean_interval_);
absl::cord_internal::cordz_set_next_sample_for_testing(orig_mean_interval_);
}
private:
int32_t orig_mean_interval_;
};
// Wrapper struct managing a small CordRep `rep`
struct TestCordRep {
absl::Nonnull<cord_internal::CordRepFlat*> rep;
TestCordRep() {
rep = cord_internal::CordRepFlat::New(100);
rep->length = 100;
memset(rep->Data(), 1, 100);
}
~TestCordRep() { cord_internal::CordRep::Unref(rep); }
};
// Wrapper struct managing a small CordRep `rep`, and
// an InlineData `data` initialized with that CordRep.
struct TestCordData {
TestCordRep rep;
cord_internal::InlineData data{rep.rep};
};
// Creates a Cord that is not sampled
template <typename... Args>
Cord UnsampledCord(Args... args) {
CordzSamplingIntervalHelper never(9999);
Cord cord(std::forward<Args>(args)...);
ABSL_ASSERT(GetCordzInfoForTesting(cord) == nullptr);
return cord;
}
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_CORDZ_TEST_HELPERS_H_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,186 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: escaping.h
// -----------------------------------------------------------------------------
//
// This header file contains string utilities involved in escaping and
// unescaping strings in various ways.
#ifndef ABSL_STRINGS_ESCAPING_H_
#define ABSL_STRINGS_ESCAPING_H_
#include <cstddef>
#include <string>
#include <vector>
#include "absl/base/attributes.h"
#include "absl/base/macros.h"
#include "absl/base/nullability.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_join.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// CUnescape()
//
// Unescapes a `source` string and copies it into `dest`, rewriting C-style
// escape sequences (https://en.cppreference.com/w/cpp/language/escape) into
// their proper code point equivalents, returning `true` if successful.
//
// The following unescape sequences can be handled:
//
// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents
// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must
// resolve to a single byte or an error will occur. E.g. values greater than
// 0xff will produce an error.
// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary
// number of following digits are allowed, the unescaped value must resolve
// to a single byte or an error will occur. E.g. '\x0045' is equivalent to
// '\x45', but '\x1234' will produce an error.
// * Unicode escape sequences ('\unnnn' for exactly four hex digits or
// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in
// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and
// 0x99).
//
// If any errors are encountered, this function returns `false`, leaving the
// `dest` output parameter in an unspecified state, and stores the first
// encountered error in `error`. To disable error reporting, set `error` to
// `nullptr` or use the overload with no error reporting below.
//
// Example:
//
// std::string s = "foo\\rbar\\nbaz\\t";
// std::string unescaped_s;
// if (!absl::CUnescape(s, &unescaped_s)) {
// ...
// }
// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t");
bool CUnescape(absl::string_view source, absl::Nonnull<std::string*> dest,
absl::Nullable<std::string*> error);
// Overload of `CUnescape()` with no error reporting.
inline bool CUnescape(absl::string_view source,
absl::Nonnull<std::string*> dest) {
return CUnescape(source, dest, nullptr);
}
// CEscape()
//
// Escapes a 'src' string using C-style escapes sequences
// (https://en.cppreference.com/w/cpp/language/escape), escaping other
// non-printable/non-whitespace bytes as octal sequences (e.g. "\377").
//
// Example:
//
// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
// std::string escaped_s = absl::CEscape(s);
// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n");
std::string CEscape(absl::string_view src);
// CHexEscape()
//
// Escapes a 'src' string using C-style escape sequences, escaping
// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g.
// "\xFF").
//
// Example:
//
// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
// std::string escaped_s = absl::CHexEscape(s);
// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n");
std::string CHexEscape(absl::string_view src);
// Utf8SafeCEscape()
//
// Escapes a 'src' string using C-style escape sequences, escaping bytes as
// octal sequences, and passing through UTF-8 characters without conversion.
// I.e., when encountering any bytes with their high bit set, this function
// will not escape those values, whether or not they are valid UTF-8.
std::string Utf8SafeCEscape(absl::string_view src);
// Utf8SafeCHexEscape()
//
// Escapes a 'src' string using C-style escape sequences, escaping bytes as
// hexadecimal sequences, and passing through UTF-8 characters without
// conversion.
std::string Utf8SafeCHexEscape(absl::string_view src);
// Base64Escape()
//
// Encodes a `src` string into a base64-encoded 'dest' string with padding
// characters. This function conforms with RFC 4648 section 4 (base64) and RFC
// 2045.
void Base64Escape(absl::string_view src, absl::Nonnull<std::string*> dest);
std::string Base64Escape(absl::string_view src);
// WebSafeBase64Escape()
//
// Encodes a `src` string into a base64 string, like Base64Escape() does, but
// outputs '-' instead of '+' and '_' instead of '/', and does not pad 'dest'.
// This function conforms with RFC 4648 section 5 (base64url).
void WebSafeBase64Escape(absl::string_view src,
absl::Nonnull<std::string*> dest);
std::string WebSafeBase64Escape(absl::string_view src);
// Base64Unescape()
//
// Converts a `src` string encoded in Base64 (RFC 4648 section 4) to its binary
// equivalent, writing it to a `dest` buffer, returning `true` on success. If
// `src` contains invalid characters, `dest` is cleared and returns `false`.
// If padding is included (note that `Base64Escape()` does produce it), it must
// be correct. In the padding, '=' and '.' are treated identically.
bool Base64Unescape(absl::string_view src, absl::Nonnull<std::string*> dest);
// WebSafeBase64Unescape()
//
// Converts a `src` string encoded in "web safe" Base64 (RFC 4648 section 5) to
// its binary equivalent, writing it to a `dest` buffer. If `src` contains
// invalid characters, `dest` is cleared and returns `false`. If padding is
// included (note that `WebSafeBase64Escape()` does not produce it), it must be
// correct. In the padding, '=' and '.' are treated identically.
bool WebSafeBase64Unescape(absl::string_view src,
absl::Nonnull<std::string*> dest);
// HexStringToBytes()
//
// Converts the hexadecimal encoded data in `hex` into raw bytes in the `bytes`
// output string. If `hex` does not consist of valid hexadecimal data, this
// function returns false and leaves `bytes` in an unspecified state. Returns
// true on success.
ABSL_MUST_USE_RESULT bool HexStringToBytes(absl::string_view hex,
absl::Nonnull<std::string*> bytes);
// HexStringToBytes()
//
// Converts an ASCII hex string into bytes, returning binary data of length
// `from.size()/2`. The input must be valid hexadecimal data, otherwise the
// return value is unspecified.
ABSL_DEPRECATED("Use the HexStringToBytes() that returns a bool")
std::string HexStringToBytes(absl::string_view from);
// BytesToHexString()
//
// Converts binary data into an ASCII text string, returning a string of size
// `2*from.size()`.
std::string BytesToHexString(absl::string_view from);
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_ESCAPING_H_

View file

@ -0,0 +1,125 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/escaping.h"
#include <cstdint>
#include <memory>
#include <random>
#include <string>
#include "benchmark/benchmark.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/escaping_test_common.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace {
void BM_CUnescapeHexString(benchmark::State& state) {
std::string src;
for (int i = 0; i < 50; i++) {
src += "\\x55";
}
for (auto _ : state) {
std::string dest;
benchmark::DoNotOptimize(src);
bool result = absl::CUnescape(src, &dest);
benchmark::DoNotOptimize(result);
benchmark::DoNotOptimize(dest);
}
}
BENCHMARK(BM_CUnescapeHexString);
void BM_WebSafeBase64Escape_string(benchmark::State& state) {
std::string raw;
for (int i = 0; i < 10; ++i) {
for (const auto& test_set : absl::strings_internal::base64_strings()) {
raw += std::string(test_set.plaintext);
}
}
for (auto _ : state) {
std::string escaped;
benchmark::DoNotOptimize(raw);
absl::WebSafeBase64Escape(raw, &escaped);
benchmark::DoNotOptimize(escaped);
}
}
BENCHMARK(BM_WebSafeBase64Escape_string);
void BM_HexStringToBytes(benchmark::State& state) {
const int size = state.range(0);
std::string input, output;
for (int i = 0; i < size; ++i) input += "1c";
for (auto _ : state) {
benchmark::DoNotOptimize(input);
bool result = absl::HexStringToBytes(input, &output);
benchmark::DoNotOptimize(result);
benchmark::DoNotOptimize(output);
}
}
BENCHMARK(BM_HexStringToBytes)->Range(1, 1 << 8);
void BM_HexStringToBytes_Fail(benchmark::State& state) {
std::string binary;
absl::string_view hex_input1 = "1c2f003";
absl::string_view hex_input2 = "1c2f0032f40123456789abcdef**";
for (auto _ : state) {
benchmark::DoNotOptimize(hex_input1);
bool result1 = absl::HexStringToBytes(hex_input1, &binary);
benchmark::DoNotOptimize(result1);
benchmark::DoNotOptimize(binary);
benchmark::DoNotOptimize(hex_input2);
bool result2 = absl::HexStringToBytes(hex_input2, &binary);
benchmark::DoNotOptimize(result2);
benchmark::DoNotOptimize(binary);
}
}
BENCHMARK(BM_HexStringToBytes_Fail);
// Used for the CEscape benchmarks
const char kStringValueNoEscape[] = "1234567890";
const char kStringValueSomeEscaped[] = "123\n56789\xA1";
const char kStringValueMostEscaped[] = "\xA1\xA2\ny\xA4\xA5\xA6z\b\r";
void CEscapeBenchmarkHelper(benchmark::State& state, const char* string_value,
int max_len) {
std::string src;
while (src.size() < max_len) {
absl::StrAppend(&src, string_value);
}
for (auto _ : state) {
benchmark::DoNotOptimize(src);
std::string result = absl::CEscape(src);
benchmark::DoNotOptimize(result);
}
}
void BM_CEscape_NoEscape(benchmark::State& state) {
CEscapeBenchmarkHelper(state, kStringValueNoEscape, state.range(0));
}
BENCHMARK(BM_CEscape_NoEscape)->Range(1, 1 << 14);
void BM_CEscape_SomeEscaped(benchmark::State& state) {
CEscapeBenchmarkHelper(state, kStringValueSomeEscaped, state.range(0));
}
BENCHMARK(BM_CEscape_SomeEscaped)->Range(1, 1 << 14);
void BM_CEscape_MostEscaped(benchmark::State& state) {
CEscapeBenchmarkHelper(state, kStringValueMostEscaped, state.range(0));
}
BENCHMARK(BM_CEscape_MostEscaped)->Range(1, 1 << 14);
} // namespace

View file

@ -0,0 +1,750 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/escaping.h"
#include <array>
#include <cstddef>
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/internal/escaping_test_common.h"
#include "absl/strings/string_view.h"
namespace {
struct epair {
std::string escaped;
std::string unescaped;
};
TEST(CEscape, EscapeAndUnescape) {
const std::string inputs[] = {
std::string("foo\nxx\r\b\0023"),
std::string(""),
std::string("abc"),
std::string("\1chad_rules"),
std::string("\1arnar_drools"),
std::string("xxxx\r\t'\"\\"),
std::string("\0xx\0", 4),
std::string("\x01\x31"),
std::string("abc\xb\x42\141bc"),
std::string("123\1\x31\x32\x33"),
std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"),
std::string(
"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"),
};
// Do this twice, once for octal escapes and once for hex escapes.
for (int kind = 0; kind < 4; kind++) {
for (const std::string& original : inputs) {
std::string escaped;
switch (kind) {
case 0:
escaped = absl::CEscape(original);
break;
case 1:
escaped = absl::CHexEscape(original);
break;
case 2:
escaped = absl::Utf8SafeCEscape(original);
break;
case 3:
escaped = absl::Utf8SafeCHexEscape(original);
break;
}
std::string unescaped_str;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str));
EXPECT_EQ(unescaped_str, original);
unescaped_str.erase();
std::string error;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str, &error));
EXPECT_EQ(error, "");
// Check in-place unescaping
std::string s = escaped;
EXPECT_TRUE(absl::CUnescape(s, &s));
ASSERT_EQ(s, original);
}
}
// Check that all possible two character strings can be escaped then
// unescaped successfully.
for (int char0 = 0; char0 < 256; char0++) {
for (int char1 = 0; char1 < 256; char1++) {
char chars[2];
chars[0] = char0;
chars[1] = char1;
std::string s(chars, 2);
std::string escaped = absl::CHexEscape(s);
std::string unescaped;
EXPECT_TRUE(absl::CUnescape(escaped, &unescaped));
EXPECT_EQ(s, unescaped);
}
}
}
TEST(CEscape, BasicEscaping) {
epair oct_values[] = {
{"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"},
{"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'",
"'full of \"sound\" and \"fury\"'"},
{"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"},
{"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"}
};
epair hex_values[] = {
{"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"},
{"I\\\'ve just seen a \\\"face\\\"",
"I've just seen a \"face\""},
{"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"},
{"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"}
};
epair utf8_oct_values[] = {
{"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t",
"\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"},
{"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name",
"\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"},
{"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\",
"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"},
{"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r",
"\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
};
epair utf8_hex_values[] = {
{"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n",
"\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"},
{"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"",
"\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""},
{"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\",
"\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"},
{"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r",
"\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
};
for (const epair& val : oct_values) {
std::string escaped = absl::CEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : hex_values) {
std::string escaped = absl::CHexEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : utf8_oct_values) {
std::string escaped = absl::Utf8SafeCEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
for (const epair& val : utf8_hex_values) {
std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped);
EXPECT_EQ(escaped, val.escaped);
}
}
TEST(Unescape, BasicFunction) {
epair tests[] =
{{"", ""},
{"\\u0030", "0"},
{"\\u00A3", "\xC2\xA3"},
{"\\u22FD", "\xE2\x8B\xBD"},
{"\\U00010000", "\xF0\x90\x80\x80"},
{"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}};
for (const epair& val : tests) {
std::string out;
EXPECT_TRUE(absl::CUnescape(val.escaped, &out));
EXPECT_EQ(out, val.unescaped);
}
std::string bad[] = {"\\u1", // too short
"\\U1", // too short
"\\Uffffff", // exceeds 0x10ffff (largest Unicode)
"\\U00110000", // exceeds 0x10ffff (largest Unicode)
"\\uD835", // surrogate character (D800-DFFF)
"\\U0000DD04", // surrogate character (D800-DFFF)
"\\777", // exceeds 0xff
"\\xABCD"}; // exceeds 0xff
for (const std::string& e : bad) {
std::string error;
std::string out;
EXPECT_FALSE(absl::CUnescape(e, &out, &error));
EXPECT_FALSE(error.empty());
out.erase();
EXPECT_FALSE(absl::CUnescape(e, &out));
}
}
class CUnescapeTest : public testing::Test {
protected:
static const char kStringWithMultipleOctalNulls[];
static const char kStringWithMultipleHexNulls[];
static const char kStringWithMultipleUnicodeNulls[];
std::string result_string_;
};
const char CUnescapeTest::kStringWithMultipleOctalNulls[] =
"\\0\\n" // null escape \0 plus newline
"0\\n" // just a number 0 (not a null escape) plus newline
"\\00\\12" // null escape \00 plus octal newline code
"\\000"; // null escape \000
// This has the same ingredients as kStringWithMultipleOctalNulls
// but with \x hex escapes instead of octal escapes.
const char CUnescapeTest::kStringWithMultipleHexNulls[] =
"\\x0\\n"
"0\\n"
"\\x00\\xa"
"\\x000";
const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] =
"\\u0000\\n" // short-form (4-digit) null escape plus newline
"0\\n" // just a number 0 (not a null escape) plus newline
"\\U00000000"; // long-form (8-digit) null escape
TEST_F(CUnescapeTest, Unescapes1CharOctalNull) {
std::string original_string = "\\0";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes2CharOctalNull) {
std::string original_string = "\\00";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes3CharOctalNull) {
std::string original_string = "\\000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes1CharHexNull) {
std::string original_string = "\\x0";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes2CharHexNull) {
std::string original_string = "\\x00";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes3CharHexNull) {
std::string original_string = "\\x000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) {
std::string original_string = "\\u0000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) {
std::string original_string = "\\U00000000";
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0", 1), result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) {
std::string original_string(kStringWithMultipleOctalNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
// All escapes, including newlines and null escapes, should have been
// converted to the equivalent characters.
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0\n"
"\0",
7),
result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) {
std::string original_string(kStringWithMultipleHexNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0\n"
"\0",
7),
result_string_);
}
TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) {
std::string original_string(kStringWithMultipleUnicodeNulls);
EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
EXPECT_EQ(std::string("\0\n"
"0\n"
"\0",
5),
result_string_);
}
static struct {
absl::string_view plaintext;
absl::string_view cyphertext;
} const base64_tests[] = {
// Empty string.
{{"", 0}, {"", 0}},
{{nullptr, 0},
{"", 0}}, // if length is zero, plaintext ptr must be ignored!
// Basic bit patterns;
// values obtained with "echo -n '...' | uuencode -m test"
{{"\000", 1}, "AA=="},
{{"\001", 1}, "AQ=="},
{{"\002", 1}, "Ag=="},
{{"\004", 1}, "BA=="},
{{"\010", 1}, "CA=="},
{{"\020", 1}, "EA=="},
{{"\040", 1}, "IA=="},
{{"\100", 1}, "QA=="},
{{"\200", 1}, "gA=="},
{{"\377", 1}, "/w=="},
{{"\376", 1}, "/g=="},
{{"\375", 1}, "/Q=="},
{{"\373", 1}, "+w=="},
{{"\367", 1}, "9w=="},
{{"\357", 1}, "7w=="},
{{"\337", 1}, "3w=="},
{{"\277", 1}, "vw=="},
{{"\177", 1}, "fw=="},
{{"\000\000", 2}, "AAA="},
{{"\000\001", 2}, "AAE="},
{{"\000\002", 2}, "AAI="},
{{"\000\004", 2}, "AAQ="},
{{"\000\010", 2}, "AAg="},
{{"\000\020", 2}, "ABA="},
{{"\000\040", 2}, "ACA="},
{{"\000\100", 2}, "AEA="},
{{"\000\200", 2}, "AIA="},
{{"\001\000", 2}, "AQA="},
{{"\002\000", 2}, "AgA="},
{{"\004\000", 2}, "BAA="},
{{"\010\000", 2}, "CAA="},
{{"\020\000", 2}, "EAA="},
{{"\040\000", 2}, "IAA="},
{{"\100\000", 2}, "QAA="},
{{"\200\000", 2}, "gAA="},
{{"\377\377", 2}, "//8="},
{{"\377\376", 2}, "//4="},
{{"\377\375", 2}, "//0="},
{{"\377\373", 2}, "//s="},
{{"\377\367", 2}, "//c="},
{{"\377\357", 2}, "/+8="},
{{"\377\337", 2}, "/98="},
{{"\377\277", 2}, "/78="},
{{"\377\177", 2}, "/38="},
{{"\376\377", 2}, "/v8="},
{{"\375\377", 2}, "/f8="},
{{"\373\377", 2}, "+/8="},
{{"\367\377", 2}, "9/8="},
{{"\357\377", 2}, "7/8="},
{{"\337\377", 2}, "3/8="},
{{"\277\377", 2}, "v/8="},
{{"\177\377", 2}, "f/8="},
{{"\000\000\000", 3}, "AAAA"},
{{"\000\000\001", 3}, "AAAB"},
{{"\000\000\002", 3}, "AAAC"},
{{"\000\000\004", 3}, "AAAE"},
{{"\000\000\010", 3}, "AAAI"},
{{"\000\000\020", 3}, "AAAQ"},
{{"\000\000\040", 3}, "AAAg"},
{{"\000\000\100", 3}, "AABA"},
{{"\000\000\200", 3}, "AACA"},
{{"\000\001\000", 3}, "AAEA"},
{{"\000\002\000", 3}, "AAIA"},
{{"\000\004\000", 3}, "AAQA"},
{{"\000\010\000", 3}, "AAgA"},
{{"\000\020\000", 3}, "ABAA"},
{{"\000\040\000", 3}, "ACAA"},
{{"\000\100\000", 3}, "AEAA"},
{{"\000\200\000", 3}, "AIAA"},
{{"\001\000\000", 3}, "AQAA"},
{{"\002\000\000", 3}, "AgAA"},
{{"\004\000\000", 3}, "BAAA"},
{{"\010\000\000", 3}, "CAAA"},
{{"\020\000\000", 3}, "EAAA"},
{{"\040\000\000", 3}, "IAAA"},
{{"\100\000\000", 3}, "QAAA"},
{{"\200\000\000", 3}, "gAAA"},
{{"\377\377\377", 3}, "////"},
{{"\377\377\376", 3}, "///+"},
{{"\377\377\375", 3}, "///9"},
{{"\377\377\373", 3}, "///7"},
{{"\377\377\367", 3}, "///3"},
{{"\377\377\357", 3}, "///v"},
{{"\377\377\337", 3}, "///f"},
{{"\377\377\277", 3}, "//+/"},
{{"\377\377\177", 3}, "//9/"},
{{"\377\376\377", 3}, "//7/"},
{{"\377\375\377", 3}, "//3/"},
{{"\377\373\377", 3}, "//v/"},
{{"\377\367\377", 3}, "//f/"},
{{"\377\357\377", 3}, "/+//"},
{{"\377\337\377", 3}, "/9//"},
{{"\377\277\377", 3}, "/7//"},
{{"\377\177\377", 3}, "/3//"},
{{"\376\377\377", 3}, "/v//"},
{{"\375\377\377", 3}, "/f//"},
{{"\373\377\377", 3}, "+///"},
{{"\367\377\377", 3}, "9///"},
{{"\357\377\377", 3}, "7///"},
{{"\337\377\377", 3}, "3///"},
{{"\277\377\377", 3}, "v///"},
{{"\177\377\377", 3}, "f///"},
// Random numbers: values obtained with
//
// #! /bin/bash
// dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
// od -N $1 -t o1 /tmp/bar.random
// uuencode -m test < /tmp/bar.random
//
// where $1 is the number of bytes (2, 3)
{{"\243\361", 2}, "o/E="},
{{"\024\167", 2}, "FHc="},
{{"\313\252", 2}, "y6o="},
{{"\046\041", 2}, "JiE="},
{{"\145\236", 2}, "ZZ4="},
{{"\254\325", 2}, "rNU="},
{{"\061\330", 2}, "Mdg="},
{{"\245\032", 2}, "pRo="},
{{"\006\000", 2}, "BgA="},
{{"\375\131", 2}, "/Vk="},
{{"\303\210", 2}, "w4g="},
{{"\040\037", 2}, "IB8="},
{{"\261\372", 2}, "sfo="},
{{"\335\014", 2}, "3Qw="},
{{"\233\217", 2}, "m48="},
{{"\373\056", 2}, "+y4="},
{{"\247\232", 2}, "p5o="},
{{"\107\053", 2}, "Rys="},
{{"\204\077", 2}, "hD8="},
{{"\276\211", 2}, "vok="},
{{"\313\110", 2}, "y0g="},
{{"\363\376", 2}, "8/4="},
{{"\251\234", 2}, "qZw="},
{{"\103\262", 2}, "Q7I="},
{{"\142\312", 2}, "Yso="},
{{"\067\211", 2}, "N4k="},
{{"\220\001", 2}, "kAE="},
{{"\152\240", 2}, "aqA="},
{{"\367\061", 2}, "9zE="},
{{"\133\255", 2}, "W60="},
{{"\176\035", 2}, "fh0="},
{{"\032\231", 2}, "Gpk="},
{{"\013\007\144", 3}, "Cwdk"},
{{"\030\112\106", 3}, "GEpG"},
{{"\047\325\046", 3}, "J9Um"},
{{"\310\160\022", 3}, "yHAS"},
{{"\131\100\237", 3}, "WUCf"},
{{"\064\342\134", 3}, "NOJc"},
{{"\010\177\004", 3}, "CH8E"},
{{"\345\147\205", 3}, "5WeF"},
{{"\300\343\360", 3}, "wOPw"},
{{"\061\240\201", 3}, "MaCB"},
{{"\225\333\044", 3}, "ldsk"},
{{"\215\137\352", 3}, "jV/q"},
{{"\371\147\160", 3}, "+Wdw"},
{{"\030\320\051", 3}, "GNAp"},
{{"\044\174\241", 3}, "JHyh"},
{{"\260\127\037", 3}, "sFcf"},
{{"\111\045\033", 3}, "SSUb"},
{{"\202\114\107", 3}, "gkxH"},
{{"\057\371\042", 3}, "L/ki"},
{{"\223\247\244", 3}, "k6ek"},
{{"\047\216\144", 3}, "J45k"},
{{"\203\070\327", 3}, "gzjX"},
{{"\247\140\072", 3}, "p2A6"},
{{"\124\115\116", 3}, "VE1O"},
{{"\157\162\050", 3}, "b3Io"},
{{"\357\223\004", 3}, "75ME"},
{{"\052\117\156", 3}, "Kk9u"},
{{"\347\154\000", 3}, "52wA"},
{{"\303\012\142", 3}, "wwpi"},
{{"\060\035\362", 3}, "MB3y"},
{{"\130\226\361", 3}, "WJbx"},
{{"\173\013\071", 3}, "ews5"},
{{"\336\004\027", 3}, "3gQX"},
{{"\357\366\234", 3}, "7/ac"},
{{"\353\304\111", 3}, "68RJ"},
{{"\024\264\131", 3}, "FLRZ"},
{{"\075\114\251", 3}, "PUyp"},
{{"\315\031\225", 3}, "zRmV"},
{{"\154\201\276", 3}, "bIG+"},
{{"\200\066\072", 3}, "gDY6"},
{{"\142\350\267", 3}, "Yui3"},
{{"\033\000\166", 3}, "GwB2"},
{{"\210\055\077", 3}, "iC0/"},
{{"\341\037\124", 3}, "4R9U"},
{{"\161\103\152", 3}, "cUNq"},
{{"\270\142\131", 3}, "uGJZ"},
{{"\337\076\074", 3}, "3z48"},
{{"\375\106\362", 3}, "/Uby"},
{{"\227\301\127", 3}, "l8FX"},
{{"\340\002\234", 3}, "4AKc"},
{{"\121\064\033", 3}, "UTQb"},
{{"\157\134\143", 3}, "b1xj"},
{{"\247\055\327", 3}, "py3X"},
{{"\340\142\005", 3}, "4GIF"},
{{"\060\260\143", 3}, "MLBj"},
{{"\075\203\170", 3}, "PYN4"},
{{"\143\160\016", 3}, "Y3AO"},
{{"\313\013\063", 3}, "ywsz"},
{{"\174\236\135", 3}, "fJ5d"},
{{"\103\047\026", 3}, "QycW"},
{{"\365\005\343", 3}, "9QXj"},
{{"\271\160\223", 3}, "uXCT"},
{{"\362\255\172", 3}, "8q16"},
{{"\113\012\015", 3}, "SwoN"},
// various lengths, generated by this python script:
//
// from std::string import lowercase as lc
// for i in range(27):
// print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
// lc[:i].encode('base64').strip())
{{"", 0}, {"", 0}},
{"a", "YQ=="},
{"ab", "YWI="},
{"abc", "YWJj"},
{"abcd", "YWJjZA=="},
{"abcde", "YWJjZGU="},
{"abcdef", "YWJjZGVm"},
{"abcdefg", "YWJjZGVmZw=="},
{"abcdefgh", "YWJjZGVmZ2g="},
{"abcdefghi", "YWJjZGVmZ2hp"},
{"abcdefghij", "YWJjZGVmZ2hpag=="},
{"abcdefghijk", "YWJjZGVmZ2hpams="},
{"abcdefghijkl", "YWJjZGVmZ2hpamts"},
{"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="},
{"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="},
{"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"},
{"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="},
{"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="},
{"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"},
{"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="},
{"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="},
{"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"},
{"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="},
{"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="},
{"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"},
{"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="},
{"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="},
};
template <typename StringType>
void TestEscapeAndUnescape() {
// Check the short strings; this tests the math (and boundaries)
for (const auto& tc : base64_tests) {
// Test plain base64.
StringType encoded("this junk should be ignored");
absl::Base64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, tc.cyphertext);
EXPECT_EQ(absl::Base64Escape(tc.plaintext), tc.cyphertext);
StringType decoded("this junk should be ignored");
EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
StringType websafe_with_padding(tc.cyphertext);
for (unsigned int c = 0; c < websafe_with_padding.size(); ++c) {
if ('+' == websafe_with_padding[c]) websafe_with_padding[c] = '-';
if ('/' == websafe_with_padding[c]) websafe_with_padding[c] = '_';
// Intentionally keeping padding aka '='.
}
// Test plain websafe (aka without padding).
StringType websafe(websafe_with_padding);
for (unsigned int c = 0; c < websafe.size(); ++c) {
if ('=' == websafe[c]) {
websafe.resize(c);
break;
}
}
encoded = "this junk should be ignored";
absl::WebSafeBase64Escape(tc.plaintext, &encoded);
EXPECT_EQ(encoded, websafe);
EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe);
decoded = "this junk should be ignored";
EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
EXPECT_EQ(decoded, tc.plaintext);
}
// Now try the long strings, this tests the streaming
for (const auto& tc : absl::strings_internal::base64_strings()) {
StringType buffer;
absl::WebSafeBase64Escape(tc.plaintext, &buffer);
EXPECT_EQ(tc.cyphertext, buffer);
EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), tc.cyphertext);
}
// Verify the behavior when decoding bad data
{
absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4),
absl::string_view("abc.\0", 5)};
for (absl::string_view bad_data : data_set) {
StringType buf;
EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf));
EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf));
EXPECT_TRUE(buf.empty());
}
}
}
TEST(Base64, EscapeAndUnescape) {
TestEscapeAndUnescape<std::string>();
}
TEST(Base64, Padding) {
// Padding is optional.
// '.' is an acceptable padding character, just like '='.
std::initializer_list<absl::string_view> good_padding = {
"YQ",
"YQ==",
"YQ=.",
"YQ.=",
"YQ..",
};
for (absl::string_view b64 : good_padding) {
std::string decoded;
EXPECT_TRUE(absl::Base64Unescape(b64, &decoded));
EXPECT_EQ(decoded, "a");
std::string websafe_decoded;
EXPECT_TRUE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
EXPECT_EQ(websafe_decoded, "a");
}
std::initializer_list<absl::string_view> bad_padding = {
"YQ=",
"YQ.",
"YQ===",
"YQ==.",
"YQ=.=",
"YQ=..",
"YQ.==",
"YQ.=.",
"YQ..=",
"YQ...",
"YQ====",
"YQ....",
"YQ=====",
"YQ.....",
};
for (absl::string_view b64 : bad_padding) {
std::string decoded;
EXPECT_FALSE(absl::Base64Unescape(b64, &decoded));
std::string websafe_decoded;
EXPECT_FALSE(absl::WebSafeBase64Unescape(b64, &websafe_decoded));
}
}
TEST(Base64, DISABLED_HugeData) {
const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
const std::string huge(kSize, 'x');
std::string escaped;
absl::Base64Escape(huge, &escaped);
// Generates the string that should match a base64 encoded "xxx..." string.
// "xxx" in base64 is "eHh4".
std::string expected_encoding;
expected_encoding.reserve(kSize / 3 * 4);
for (size_t i = 0; i < kSize / 3; ++i) {
expected_encoding.append("eHh4");
}
EXPECT_EQ(expected_encoding, escaped);
std::string unescaped;
EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped));
EXPECT_EQ(huge, unescaped);
}
TEST(Escaping, HexStringToBytesBackToHex) {
std::string bytes, hex;
constexpr absl::string_view kTestHexLower = "1c2f0032f40123456789abcdef";
constexpr absl::string_view kTestHexUpper = "1C2F0032F40123456789ABCDEF";
constexpr absl::string_view kTestBytes = absl::string_view(
"\x1c\x2f\x00\x32\xf4\x01\x23\x45\x67\x89\xab\xcd\xef", 13);
EXPECT_TRUE(absl::HexStringToBytes(kTestHexLower, &bytes));
EXPECT_EQ(bytes, kTestBytes);
EXPECT_TRUE(absl::HexStringToBytes(kTestHexUpper, &bytes));
EXPECT_EQ(bytes, kTestBytes);
hex = absl::BytesToHexString(kTestBytes);
EXPECT_EQ(hex, kTestHexLower);
// Same buffer.
// We do not care if this works since we do not promise it in the contract.
// The purpose of this test is to to see if the program will crash or if
// sanitizers will catch anything.
bytes = std::string(kTestHexUpper);
(void)absl::HexStringToBytes(bytes, &bytes);
// Length not a multiple of two.
EXPECT_FALSE(absl::HexStringToBytes("1c2f003", &bytes));
// Not hex.
EXPECT_FALSE(absl::HexStringToBytes("1c2f00ft", &bytes));
// Empty input.
bytes = "abc";
EXPECT_TRUE(absl::HexStringToBytes("", &bytes));
EXPECT_EQ("", bytes); // Results in empty output.
}
TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) {
std::string hex_mixed = "0123456789abcdefABCDEF";
std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF";
std::string hex_only_lower = "0123456789abcdefabcdef";
std::string bytes_result = absl::HexStringToBytes(hex_mixed);
EXPECT_EQ(bytes_expected, bytes_result);
std::string prefix_valid = hex_mixed + "?";
std::string prefix_valid_result = absl::HexStringToBytes(
absl::string_view(prefix_valid.data(), prefix_valid.size() - 1));
EXPECT_EQ(bytes_expected, prefix_valid_result);
std::string infix_valid = "?" + hex_mixed + "???";
std::string infix_valid_result = absl::HexStringToBytes(
absl::string_view(infix_valid.data() + 1, hex_mixed.size()));
EXPECT_EQ(bytes_expected, infix_valid_result);
std::string hex_result = absl::BytesToHexString(bytes_expected);
EXPECT_EQ(hex_only_lower, hex_result);
}
} // namespace

View file

@ -0,0 +1,64 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_HAS_ABSL_STRINGIFY_H_
#define ABSL_STRINGS_HAS_ABSL_STRINGIFY_H_
#include <type_traits>
#include <utility>
#include "absl/base/config.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// This is an empty class not intended to be used. It exists so that
// `HasAbslStringify` can reference a universal class rather than needing to be
// copied for each new sink.
class UnimplementedSink {
public:
void Append(size_t count, char ch);
void Append(string_view v);
// Support `absl::Format(&sink, format, args...)`.
friend void AbslFormatFlush(UnimplementedSink* sink, absl::string_view v);
};
} // namespace strings_internal
// `HasAbslStringify<T>` detects if type `T` supports the `AbslStringify()`
// customization point (see
// https://abseil.io/docs/cpp/guides/format#abslstringify for the
// documentation).
//
// Note that there are types that can be `StrCat`-ed that do not use the
// `AbslStringify` customization point (for example, `int`).
template <typename T, typename = void>
struct HasAbslStringify : std::false_type {};
template <typename T>
struct HasAbslStringify<
T, std::enable_if_t<std::is_void<decltype(AbslStringify(
std::declval<strings_internal::UnimplementedSink&>(),
std::declval<const T&>()))>::value>> : std::true_type {};
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_HAS_ABSL_STRINGIFY_H_

View file

@ -0,0 +1,40 @@
// Copyright 2023 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/has_absl_stringify.h"
#include <string>
#include "gtest/gtest.h"
#include "absl/types/optional.h"
namespace {
struct TypeWithoutAbslStringify {};
struct TypeWithAbslStringify {
template <typename Sink>
friend void AbslStringify(Sink&, const TypeWithAbslStringify&) {}
};
TEST(HasAbslStringifyTest, Works) {
EXPECT_FALSE(absl::HasAbslStringify<int>::value);
EXPECT_FALSE(absl::HasAbslStringify<std::string>::value);
EXPECT_FALSE(absl::HasAbslStringify<TypeWithoutAbslStringify>::value);
EXPECT_TRUE(absl::HasAbslStringify<TypeWithAbslStringify>::value);
EXPECT_FALSE(
absl::HasAbslStringify<absl::optional<TypeWithAbslStringify>>::value);
}
} // namespace

View file

@ -0,0 +1,42 @@
// Copyright 2023 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_HAS_OSTREAM_OPERATOR_H_
#define ABSL_STRINGS_HAS_OSTREAM_OPERATOR_H_
#include <ostream>
#include <type_traits>
#include <utility>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
// Detects if type `T` supports streaming to `std::ostream`s with `operator<<`.
template <typename T, typename = void>
struct HasOstreamOperator : std::false_type {};
template <typename T>
struct HasOstreamOperator<
T, std::enable_if_t<std::is_same<
std::ostream&, decltype(std::declval<std::ostream&>()
<< std::declval<const T&>())>::value>>
: std::true_type {};
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_HAS_OSTREAM_OPERATOR_H_

View file

@ -0,0 +1,41 @@
// Copyright 2023 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/has_ostream_operator.h"
#include <ostream>
#include <string>
#include "gtest/gtest.h"
#include "absl/types/optional.h"
namespace {
struct TypeWithoutOstreamOp {};
struct TypeWithOstreamOp {
friend std::ostream& operator<<(std::ostream& os, const TypeWithOstreamOp&) {
return os;
}
};
TEST(HasOstreamOperatorTest, Works) {
EXPECT_TRUE(absl::HasOstreamOperator<int>::value);
EXPECT_TRUE(absl::HasOstreamOperator<std::string>::value);
EXPECT_FALSE(absl::HasOstreamOperator<absl::optional<int>>::value);
EXPECT_FALSE(absl::HasOstreamOperator<TypeWithoutOstreamOp>::value);
EXPECT_TRUE(absl::HasOstreamOperator<TypeWithOstreamOp>::value);
}
} // namespace

View file

@ -0,0 +1,357 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <algorithm>
#include <cassert>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// Table containing some large powers of 5, for fast computation.
// Constant step size for entries in the kLargePowersOfFive table. Each entry
// is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep
// (or 5**27).
//
// In other words, the Nth entry in the table is 5**(27*N).
//
// 5**27 is the largest power of 5 that fits in 64 bits.
constexpr int kLargePowerOfFiveStep = 27;
// The largest legal index into the kLargePowersOfFive table.
//
// In other words, the largest precomputed power of 5 is 5**(27*20).
constexpr int kLargestPowerOfFiveIndex = 20;
// Table of powers of (5**27), up to (5**27)**20 == 5**540.
//
// Used to generate large powers of 5 while limiting the number of repeated
// multiplications required.
//
// clang-format off
const uint32_t kLargePowersOfFive[] = {
// 5**27 (i=1), start=0, end=2
0xfa10079dU, 0x6765c793U,
// 5**54 (i=2), start=2, end=6
0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U,
// 5**81 (i=3), start=6, end=12
0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U,
// 5**108 (i=4), start=12, end=20
0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU,
0x20d3846fU, 0x06d00f73U,
// 5**135 (i=5), start=20, end=30
0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U,
0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU,
// 5**162 (i=6), start=30, end=42
0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU,
0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU,
// 5**189 (i=7), start=42, end=56
0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U,
0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U,
0x94151217U, 0x0072e9f7U,
// 5**216 (i=8), start=56, end=72
0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU,
0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU,
0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U,
// 5**243 (i=9), start=72, end=90
0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U,
0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U,
0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U,
// 5**270 (i=10), start=90, end=110
0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U,
0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U,
0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U,
0x0d39e796U, 0x00079250U,
// 5**297 (i=11), start=110, end=132
0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U,
0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U,
0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U,
0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U,
// 5**324 (i=12), start=132, end=156
0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U,
0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU,
0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U,
0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U,
// 5**351 (i=13), start=156, end=182
0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU,
0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U,
0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U,
0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU,
0x859a4940U, 0x00007fb6U,
// 5**378 (i=14), start=182, end=210
0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U,
0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU,
0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU,
0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U,
0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U,
// 5**405 (i=15), start=210, end=240
0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U,
0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U,
0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U,
0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU,
0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U,
// 5**432 (i=16), start=240, end=272
0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU,
0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U,
0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U,
0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU,
0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U,
0x3364ea62U, 0x0000086aU,
// 5**459 (i=17), start=272, end=306
0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU,
0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U,
0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U,
0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U,
0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU,
0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U,
// 5**486 (i=18), start=306, end=342
0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU,
0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U,
0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU,
0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U,
0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU,
0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU,
// 5**513 (i=19), start=342, end=380
0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U,
0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU,
0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU,
0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U,
0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U,
0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU,
0xf0046d27U, 0x0000008dU,
// 5**540 (i=20), start=380, end=420
0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU,
0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U,
0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U,
0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U,
0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U,
0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU,
0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U,
};
// clang-format on
// Returns a pointer to the big integer data for (5**27)**i. i must be
// between 1 and 20, inclusive.
const uint32_t* LargePowerOfFiveData(int i) {
return kLargePowersOfFive + i * (i - 1);
}
// Returns the size of the big integer data for (5**27)**i, in words. i must be
// between 1 and 20, inclusive.
int LargePowerOfFiveSize(int i) { return 2 * i; }
} // namespace
ABSL_DLL const uint32_t kFiveToNth[14] = {
1, 5, 25, 125, 625, 3125, 15625,
78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125,
};
ABSL_DLL const uint32_t kTenToNth[10] = {
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000,
};
template <int max_words>
int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp,
int significant_digits) {
SetToZero();
assert(fp.type == FloatType::kNumber);
if (fp.subrange_begin == nullptr) {
// We already exactly parsed the mantissa, so no more work is necessary.
words_[0] = fp.mantissa & 0xffffffffu;
words_[1] = fp.mantissa >> 32;
if (words_[1]) {
size_ = 2;
} else if (words_[0]) {
size_ = 1;
}
return fp.exponent;
}
int exponent_adjust =
ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits);
return fp.literal_exponent + exponent_adjust;
}
template <int max_words>
int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end,
int significant_digits) {
assert(significant_digits <= Digits10() + 1);
SetToZero();
bool after_decimal_point = false;
// Discard any leading zeroes before the decimal point
while (begin < end && *begin == '0') {
++begin;
}
int dropped_digits = 0;
// Discard any trailing zeroes. These may or may not be after the decimal
// point.
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
if (begin < end && *std::prev(end) == '.') {
// If the string ends in '.', either before or after dropping zeroes, then
// drop the decimal point and look for more digits to drop.
dropped_digits = 0;
--end;
while (begin < end && *std::prev(end) == '0') {
--end;
++dropped_digits;
}
} else if (dropped_digits) {
// We dropped digits, and aren't sure if they're before or after the decimal
// point. Figure that out now.
const char* dp = std::find(begin, end, '.');
if (dp != end) {
// The dropped trailing digits were after the decimal point, so don't
// count them.
dropped_digits = 0;
}
}
// Any non-fraction digits we dropped need to be accounted for in our exponent
// adjustment.
int exponent_adjust = dropped_digits;
uint32_t queued = 0;
int digits_queued = 0;
for (; begin != end && significant_digits > 0; ++begin) {
if (*begin == '.') {
after_decimal_point = true;
continue;
}
if (after_decimal_point) {
// For each fractional digit we emit in our parsed integer, adjust our
// decimal exponent to compensate.
--exponent_adjust;
}
char digit = (*begin - '0');
--significant_digits;
if (significant_digits == 0 && std::next(begin) != end &&
(digit == 0 || digit == 5)) {
// If this is the very last significant digit, but insignificant digits
// remain, we know that the last of those remaining significant digits is
// nonzero. (If it wasn't, we would have stripped it before we got here.)
// So if this final digit is a 0 or 5, adjust it upward by 1.
//
// This adjustment is what allows incredibly large mantissas ending in
// 500000...000000000001 to correctly round up, rather than to nearest.
++digit;
}
queued = 10 * queued + static_cast<uint32_t>(digit);
++digits_queued;
if (digits_queued == kMaxSmallPowerOfTen) {
MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]);
AddWithCarry(0, queued);
queued = digits_queued = 0;
}
}
// Encode any remaining digits.
if (digits_queued) {
MultiplyBy(kTenToNth[digits_queued]);
AddWithCarry(0, queued);
}
// If any insignificant digits remain, we will drop them. But if we have not
// yet read the decimal point, then we have to adjust the exponent to account
// for the dropped digits.
if (begin < end && !after_decimal_point) {
// This call to std::find will result in a pointer either to the decimal
// point, or to the end of our buffer if there was none.
//
// Either way, [begin, decimal_point) will contain the set of dropped digits
// that require an exponent adjustment.
const char* decimal_point = std::find(begin, end, '.');
exponent_adjust += (decimal_point - begin);
}
return exponent_adjust;
}
template <int max_words>
/* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth(
int n) {
BigUnsigned answer(1u);
// Seed from the table of large powers, if possible.
bool first_pass = true;
while (n >= kLargePowerOfFiveStep) {
int big_power =
std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex);
if (first_pass) {
// just copy, rather than multiplying by 1
std::copy_n(LargePowerOfFiveData(big_power),
LargePowerOfFiveSize(big_power), answer.words_);
answer.size_ = LargePowerOfFiveSize(big_power);
first_pass = false;
} else {
answer.MultiplyBy(LargePowerOfFiveSize(big_power),
LargePowerOfFiveData(big_power));
}
n -= kLargePowerOfFiveStep * big_power;
}
answer.MultiplyByFiveToTheNth(n);
return answer;
}
template <int max_words>
void BigUnsigned<max_words>::MultiplyStep(int original_size,
const uint32_t* other_words,
int other_size, int step) {
int this_i = std::min(original_size - 1, step);
int other_i = step - this_i;
uint64_t this_word = 0;
uint64_t carry = 0;
for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) {
uint64_t product = words_[this_i];
product *= other_words[other_i];
this_word += product;
carry += (this_word >> 32);
this_word &= 0xffffffff;
}
AddWithCarry(step + 1, carry);
words_[step] = this_word & 0xffffffff;
if (this_word > 0 && size_ <= step) {
size_ = step + 1;
}
}
template <int max_words>
std::string BigUnsigned<max_words>::ToString() const {
BigUnsigned<max_words> copy = *this;
std::string result;
// Build result in reverse order
while (copy.size() > 0) {
uint32_t next_digit = copy.DivMod<10>();
result.push_back('0' + static_cast<char>(next_digit));
}
if (result.empty()) {
result.push_back('0');
}
std::reverse(result.begin(), result.end());
return result;
}
template class BigUnsigned<4>;
template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,433 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
#include <algorithm>
#include <cstdint>
#include <ostream>
#include <string>
#include "absl/base/config.h"
#include "absl/strings/ascii.h"
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The largest power that 5 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfFive = 13;
// The largest power that 10 that can be raised to, and still fit in a uint32_t.
constexpr int kMaxSmallPowerOfTen = 9;
ABSL_DLL extern const uint32_t
kFiveToNth[kMaxSmallPowerOfFive + 1];
ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
// Large, fixed-width unsigned integer.
//
// Exact rounding for decimal-to-binary floating point conversion requires very
// large integer math, but a design goal of absl::from_chars is to avoid
// allocating memory. The integer precision needed for decimal-to-binary
// conversions is large but bounded, so a huge fixed-width integer class
// suffices.
//
// This is an intentionally limited big integer class. Only needed operations
// are implemented. All storage lives in an array data member, and all
// arithmetic is done in-place, to avoid requiring separate storage for operand
// and result.
//
// This is an internal class. Some methods live in the .cc file, and are
// instantiated only for the values of max_words we need.
template <int max_words>
class BigUnsigned {
public:
static_assert(max_words == 4 || max_words == 84,
"unsupported max_words value");
BigUnsigned() : size_(0), words_{} {}
explicit constexpr BigUnsigned(uint64_t v)
: size_((v >> 32) ? 2 : v ? 1 : 0),
words_{static_cast<uint32_t>(v & 0xffffffffu),
static_cast<uint32_t>(v >> 32)} {}
// Constructs a BigUnsigned from the given string_view containing a decimal
// value. If the input string is not a decimal integer, constructs a 0
// instead.
explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
// Check for valid input, returning a 0 otherwise. This is reasonable
// behavior only because this constructor is for unit tests.
if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
sv.empty()) {
return;
}
int exponent_adjust =
ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
if (exponent_adjust > 0) {
MultiplyByTenToTheNth(exponent_adjust);
}
}
// Loads the mantissa value of a previously-parsed float.
//
// Returns the associated decimal exponent. The value of the parsed float is
// exactly *this * 10**exponent.
int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
// Returns the number of decimal digits of precision this type provides. All
// numbers with this many decimal digits or fewer are representable by this
// type.
//
// Analogous to std::numeric_limits<BigUnsigned>::digits10.
static constexpr int Digits10() {
// 9975007/1035508 is very slightly less than log10(2**32).
return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
}
// Shifts left by the given number of bits.
void ShiftLeft(int count) {
if (count > 0) {
const int word_shift = count / 32;
if (word_shift >= max_words) {
SetToZero();
return;
}
size_ = (std::min)(size_ + word_shift, max_words);
count %= 32;
if (count == 0) {
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=warray-bounds
// shows a lot of bogus -Warray-bounds warnings under GCC.
// This is not the only one in Abseil.
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(14, 0)
#pragma GCC diagnostic pop
#endif
} else {
for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
words_[i] = (words_[i - word_shift] << count) |
(words_[i - word_shift - 1] >> (32 - count));
}
words_[word_shift] = words_[0] << count;
// Grow size_ if necessary.
if (size_ < max_words && words_[size_]) {
++size_;
}
}
std::fill_n(words_, word_shift, 0u);
}
}
// Multiplies by v in-place.
void MultiplyBy(uint32_t v) {
if (size_ == 0 || v == 1) {
return;
}
if (v == 0) {
SetToZero();
return;
}
const uint64_t factor = v;
uint64_t window = 0;
for (int i = 0; i < size_; ++i) {
window += factor * words_[i];
words_[i] = window & 0xffffffff;
window >>= 32;
}
// If carry bits remain and there's space for them, grow size_.
if (window && size_ < max_words) {
words_[size_] = window & 0xffffffff;
++size_;
}
}
void MultiplyBy(uint64_t v) {
uint32_t words[2];
words[0] = static_cast<uint32_t>(v);
words[1] = static_cast<uint32_t>(v >> 32);
if (words[1] == 0) {
MultiplyBy(words[0]);
} else {
MultiplyBy(2, words);
}
}
// Multiplies in place by 5 to the power of n. n must be non-negative.
void MultiplyByFiveToTheNth(int n) {
while (n >= kMaxSmallPowerOfFive) {
MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
n -= kMaxSmallPowerOfFive;
}
if (n > 0) {
MultiplyBy(kFiveToNth[n]);
}
}
// Multiplies in place by 10 to the power of n. n must be non-negative.
void MultiplyByTenToTheNth(int n) {
if (n > kMaxSmallPowerOfTen) {
// For large n, raise to a power of 5, then shift left by the same amount.
// (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
MultiplyByFiveToTheNth(n);
ShiftLeft(n);
} else if (n > 0) {
// We can do this more quickly for very small N by using a single
// multiplication.
MultiplyBy(kTenToNth[n]);
}
}
// Returns the value of 5**n, for non-negative n. This implementation uses
// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
// MultiplyByFiveToTheNth().
static BigUnsigned FiveToTheNth(int n);
// Multiplies by another BigUnsigned, in-place.
template <int M>
void MultiplyBy(const BigUnsigned<M>& other) {
MultiplyBy(other.size(), other.words());
}
void SetToZero() {
std::fill_n(words_, size_, 0u);
size_ = 0;
}
// Returns the value of the nth word of this BigUnsigned. This is
// range-checked, and returns 0 on out-of-bounds accesses.
uint32_t GetWord(int index) const {
if (index < 0 || index >= size_) {
return 0;
}
return words_[index];
}
// Returns this integer as a decimal string. This is not used in the decimal-
// to-binary conversion; it is intended to aid in testing.
std::string ToString() const;
int size() const { return size_; }
const uint32_t* words() const { return words_; }
private:
// Reads the number between [begin, end), possibly containing a decimal point,
// into this BigUnsigned.
//
// Callers are required to ensure [begin, end) contains a valid number, with
// one or more decimal digits and at most one decimal point. This routine
// will behave unpredictably if these preconditions are not met.
//
// Only the first `significant_digits` digits are read. Digits beyond this
// limit are "sticky": If the final significant digit is 0 or 5, and if any
// dropped digit is nonzero, then that final significant digit is adjusted up
// to 1 or 6. This adjustment allows for precise rounding.
//
// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
// account for the decimal point and for dropped significant digits. After
// this function returns,
// actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
int ReadDigits(const char* begin, const char* end, int significant_digits);
// Performs a step of big integer multiplication. This computes the full
// (64-bit-wide) values that should be added at the given index (step), and
// adds to that location in-place.
//
// Because our math all occurs in place, we must multiply starting from the
// highest word working downward. (This is a bit more expensive due to the
// extra carries involved.)
//
// This must be called in steps, for each word to be calculated, starting from
// the high end and working down to 0. The first value of `step` should be
// `std::min(original_size + other.size_ - 2, max_words - 1)`.
// The reason for this expression is that multiplying the i'th word from one
// multiplicand and the j'th word of another multiplicand creates a
// two-word-wide value to be stored at the (i+j)'th element. The highest
// word indices we will access are `original_size - 1` from this object, and
// `other.size_ - 1` from our operand. Therefore,
// `original_size + other.size_ - 2` is the first step we should calculate,
// but limited on an upper bound by max_words.
// Working from high-to-low ensures that we do not overwrite the portions of
// the initial value of *this which are still needed for later steps.
//
// Once called with step == 0, *this contains the result of the
// multiplication.
//
// `original_size` is the size_ of *this before the first call to
// MultiplyStep(). `other_words` and `other_size` are the contents of our
// operand. `step` is the step to perform, as described above.
void MultiplyStep(int original_size, const uint32_t* other_words,
int other_size, int step);
void MultiplyBy(int other_size, const uint32_t* other_words) {
const int original_size = size_;
const int first_step =
(std::min)(original_size + other_size - 2, max_words - 1);
for (int step = first_step; step >= 0; --step) {
MultiplyStep(original_size, other_words, other_size, step);
}
}
// Adds a 32-bit value to the index'th word, with carry.
void AddWithCarry(int index, uint32_t value) {
if (value) {
while (index < max_words && value > 0) {
words_[index] += value;
// carry if we overflowed in this word:
if (value > words_[index]) {
value = 1;
++index;
} else {
value = 0;
}
}
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
void AddWithCarry(int index, uint64_t value) {
if (value && index < max_words) {
uint32_t high = value >> 32;
uint32_t low = value & 0xffffffff;
words_[index] += low;
if (words_[index] < low) {
++high;
if (high == 0) {
// Carry from the low word caused our high word to overflow.
// Short circuit here to do the right thing.
AddWithCarry(index + 2, static_cast<uint32_t>(1));
return;
}
}
if (high > 0) {
AddWithCarry(index + 1, high);
} else {
// Normally 32-bit AddWithCarry() sets size_, but since we don't call
// it when `high` is 0, do it ourselves here.
size_ = (std::min)(max_words, (std::max)(index + 1, size_));
}
}
}
// Divide this in place by a constant divisor. Returns the remainder of the
// division.
template <uint32_t divisor>
uint32_t DivMod() {
uint64_t accumulator = 0;
for (int i = size_ - 1; i >= 0; --i) {
accumulator <<= 32;
accumulator += words_[i];
// accumulator / divisor will never overflow an int32_t in this loop
words_[i] = static_cast<uint32_t>(accumulator / divisor);
accumulator = accumulator % divisor;
}
while (size_ > 0 && words_[size_ - 1] == 0) {
--size_;
}
return static_cast<uint32_t>(accumulator);
}
// The number of elements in words_ that may carry significant values.
// All elements beyond this point are 0.
//
// When size_ is 0, this BigUnsigned stores the value 0.
// When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
// nonzero. This can occur due to overflow truncation.
// In particular, x.size_ != y.size_ does *not* imply x != y.
int size_;
uint32_t words_[max_words];
};
// Compares two big integer instances.
//
// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
template <int N, int M>
int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = limit - 1; i >= 0; --i) {
const uint32_t lhs_word = lhs.GetWord(i);
const uint32_t rhs_word = rhs.GetWord(i);
if (lhs_word < rhs_word) {
return -1;
} else if (lhs_word > rhs_word) {
return 1;
}
}
return 0;
}
template <int N, int M>
bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
int limit = (std::max)(lhs.size(), rhs.size());
for (int i = 0; i < limit; ++i) {
if (lhs.GetWord(i) != rhs.GetWord(i)) {
return false;
}
}
return true;
}
template <int N, int M>
bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs == rhs);
}
template <int N, int M>
bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return Compare(lhs, rhs) == -1;
}
template <int N, int M>
bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return rhs < lhs;
}
template <int N, int M>
bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(rhs < lhs);
}
template <int N, int M>
bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
return !(lhs < rhs);
}
// Output operator for BigUnsigned, for testing purposes only.
template <int N>
std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
return os << num.ToString();
}
// Explicit instantiation declarations for the sizes of BigUnsigned that we
// are using.
//
// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
// still bigger than an int128, and 84 is a large value we will want to use
// in the from_chars implementation.
//
// Comments justifying the use of 84 belong in the from_chars implementation,
// and will be added in a follow-up CL.
extern template class BigUnsigned<4>;
extern template class BigUnsigned<84>;
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_

View file

@ -0,0 +1,260 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_bigint.h"
#include <string>
#include "gtest/gtest.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
TEST(BigUnsigned, ShiftLeft) {
{
// Check that 3 * 2**100 is calculated correctly
BigUnsigned<4> num(3u);
num.ShiftLeft(100);
EXPECT_EQ(num, BigUnsigned<4>("3802951800684688204490109616128"));
}
{
// Test that overflow is truncated properly.
// 15 is 4 bits long, and BigUnsigned<4> is a 128-bit bigint.
// Shifting left by 125 bits should truncate off the high bit, so that
// 15 << 125 == 7 << 125
// after truncation.
BigUnsigned<4> a(15u);
BigUnsigned<4> b(7u);
BigUnsigned<4> c(3u);
a.ShiftLeft(125);
b.ShiftLeft(125);
c.ShiftLeft(125);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Same test, larger bigint:
BigUnsigned<84> a(15u);
BigUnsigned<84> b(7u);
BigUnsigned<84> c(3u);
a.ShiftLeft(84 * 32 - 3);
b.ShiftLeft(84 * 32 - 3);
c.ShiftLeft(84 * 32 - 3);
EXPECT_EQ(a, b);
EXPECT_NE(a, c);
}
{
// Check that incrementally shifting has the same result as doing it all at
// once (attempting to capture corner cases.)
const std::string seed = "1234567890123456789012345678901234567890";
BigUnsigned<84> a(seed);
for (int i = 1; i <= 84 * 32; ++i) {
a.ShiftLeft(1);
BigUnsigned<84> b(seed);
b.ShiftLeft(i);
EXPECT_EQ(a, b);
}
// And we should have fully rotated all bits off by now:
EXPECT_EQ(a, BigUnsigned<84>(0u));
}
{
// Bit shifting large and small numbers by large and small offsets.
// Intended to exercise bounds-checking corner on ShiftLeft() (directly
// and under asan).
// 2**(32*84)-1
const BigUnsigned<84> all_bits_one(
"1474444211396924248063325089479706787923460402125687709454567433186613"
"6228083464060749874845919674257665016359189106695900028098437021384227"
"3285029708032466536084583113729486015826557532750465299832071590813090"
"2011853039837649252477307070509704043541368002938784757296893793903797"
"8180292336310543540677175225040919704702800559606097685920595947397024"
"8303316808753252115729411497720357971050627997031988036134171378490368"
"6008000778741115399296162550786288457245180872759047016734959330367829"
"5235612397427686310674725251378116268607113017720538636924549612987647"
"5767411074510311386444547332882472126067840027882117834454260409440463"
"9345147252664893456053258463203120637089916304618696601333953616715125"
"2115882482473279040772264257431663818610405673876655957323083702713344"
"4201105427930770976052393421467136557055");
const BigUnsigned<84> zero(0u);
const BigUnsigned<84> one(1u);
// in bounds shifts
for (int i = 1; i < 84*32; ++i) {
// shifting all_bits_one to the left should result in a smaller number,
// since the high bits rotate off and the low bits are replaced with
// zeroes.
BigUnsigned<84> big_shifted = all_bits_one;
big_shifted.ShiftLeft(i);
EXPECT_GT(all_bits_one, big_shifted);
// Shifting 1 to the left should instead result in a larger number.
BigUnsigned<84> small_shifted = one;
small_shifted.ShiftLeft(i);
EXPECT_LT(one, small_shifted);
}
// Shifting by zero or a negative number has no effect
for (int no_op_shift : {0, -1, -84 * 32, std::numeric_limits<int>::min()}) {
BigUnsigned<84> big_shifted = all_bits_one;
big_shifted.ShiftLeft(no_op_shift);
EXPECT_EQ(all_bits_one, big_shifted);
BigUnsigned<84> small_shifted = one;
big_shifted.ShiftLeft(no_op_shift);
EXPECT_EQ(one, small_shifted);
}
// Shifting by an amount greater than the number of bits should result in
// zero.
for (int out_of_bounds_shift :
{84 * 32, 84 * 32 + 1, std::numeric_limits<int>::max()}) {
BigUnsigned<84> big_shifted = all_bits_one;
big_shifted.ShiftLeft(out_of_bounds_shift);
EXPECT_EQ(zero, big_shifted);
BigUnsigned<84> small_shifted = one;
small_shifted.ShiftLeft(out_of_bounds_shift);
EXPECT_EQ(zero, small_shifted);
}
}
}
TEST(BigUnsigned, MultiplyByUint32) {
const BigUnsigned<84> factorial_100(
"933262154439441526816992388562667004907159682643816214685929638952175999"
"932299156089414639761565182862536979208272237582511852109168640000000000"
"00000000000000");
BigUnsigned<84> a(1u);
for (uint32_t i = 1; i <= 100; ++i) {
a.MultiplyBy(i);
}
EXPECT_EQ(a, BigUnsigned<84>(factorial_100));
}
TEST(BigUnsigned, MultiplyByBigUnsigned) {
{
// Put the terms of factorial_200 into two bigints, and multiply them
// together.
const BigUnsigned<84> factorial_200(
"7886578673647905035523632139321850622951359776871732632947425332443594"
"4996340334292030428401198462390417721213891963883025764279024263710506"
"1926624952829931113462857270763317237396988943922445621451664240254033"
"2918641312274282948532775242424075739032403212574055795686602260319041"
"7032406235170085879617892222278962370389737472000000000000000000000000"
"0000000000000000000000000");
BigUnsigned<84> evens(1u);
BigUnsigned<84> odds(1u);
for (uint32_t i = 1; i < 200; i += 2) {
odds.MultiplyBy(i);
evens.MultiplyBy(i + 1);
}
evens.MultiplyBy(odds);
EXPECT_EQ(evens, factorial_200);
}
{
// Multiply various powers of 10 together.
for (int a = 0 ; a < 700; a += 25) {
SCOPED_TRACE(a);
BigUnsigned<84> a_value("3" + std::string(a, '0'));
for (int b = 0; b < (700 - a); b += 25) {
SCOPED_TRACE(b);
BigUnsigned<84> b_value("2" + std::string(b, '0'));
BigUnsigned<84> expected_product("6" + std::string(a + b, '0'));
b_value.MultiplyBy(a_value);
EXPECT_EQ(b_value, expected_product);
}
}
}
}
TEST(BigUnsigned, MultiplyByOverflow) {
{
// Check that multiplcation overflow predictably truncates.
// A big int with all bits on.
BigUnsigned<4> all_bits_on("340282366920938463463374607431768211455");
// Modulo 2**128, this is equal to -1. Therefore the square of this,
// modulo 2**128, should be 1.
all_bits_on.MultiplyBy(all_bits_on);
EXPECT_EQ(all_bits_on, BigUnsigned<4>(1u));
}
{
// Try multiplying a large bigint by 2**50, and compare the result to
// shifting.
BigUnsigned<4> value_1("12345678901234567890123456789012345678");
BigUnsigned<4> value_2("12345678901234567890123456789012345678");
BigUnsigned<4> two_to_fiftieth(1u);
two_to_fiftieth.ShiftLeft(50);
value_1.ShiftLeft(50);
value_2.MultiplyBy(two_to_fiftieth);
EXPECT_EQ(value_1, value_2);
}
}
TEST(BigUnsigned, FiveToTheNth) {
{
// Sanity check that MultiplyByFiveToTheNth gives consistent answers, up to
// and including overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByFiveToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(5u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Check that the faster, table-lookup-based static method returns the same
// result that multiplying in-place would return, up to and including
// overflow.
for (int i = 0; i < 1160; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(1u);
value_1.MultiplyByFiveToTheNth(i);
BigUnsigned<84> value_2 = BigUnsigned<84>::FiveToTheNth(i);
EXPECT_EQ(value_1, value_2);
}
}
}
TEST(BigUnsigned, TenToTheNth) {
{
// Sanity check MultiplyByTenToTheNth.
for (int i = 0; i < 800; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(123u);
BigUnsigned<84> value_2(123u);
value_1.MultiplyByTenToTheNth(i);
for (int j = 0; j < i; j++) {
value_2.MultiplyBy(10u);
}
EXPECT_EQ(value_1, value_2);
}
}
{
// Alternate testing approach, taking advantage of the decimal parser.
for (int i = 0; i < 200; ++i) {
SCOPED_TRACE(i);
BigUnsigned<84> value_1(135u);
value_1.MultiplyByTenToTheNth(i);
BigUnsigned<84> value_2("135" + std::string(i, '0'));
EXPECT_EQ(value_1, value_2);
}
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,504 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include "absl/strings/charconv.h"
#include <cassert>
#include <cstdint>
#include <limits>
#include "absl/strings/internal/memutil.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace {
// ParseFloat<10> will read the first 19 significant digits of the mantissa.
// This number was chosen for multiple reasons.
//
// (a) First, for whatever integer type we choose to represent the mantissa, we
// want to choose the largest possible number of decimal digits for that integer
// type. We are using uint64_t, which can express any 19-digit unsigned
// integer.
//
// (b) Second, we need to parse enough digits that the binary value of any
// mantissa we capture has more bits of resolution than the mantissa
// representation in the target float. Our algorithm requires at least 3 bits
// of headway, but 19 decimal digits give a little more than that.
//
// The following static assertions verify the above comments:
constexpr int kDecimalMantissaDigitsMax = 19;
static_assert(std::numeric_limits<uint64_t>::digits10 ==
kDecimalMantissaDigitsMax,
"(a) above");
// IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa.
static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed");
static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact");
static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact");
// The lowest valued 19-digit decimal mantissa we can read still contains
// sufficient information to reconstruct a binary mantissa.
static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above");
// ParseFloat<16> will read the first 15 significant digits of the mantissa.
//
// Because a base-16-to-base-2 conversion can be done exactly, we do not need
// to maximize the number of scanned hex digits to improve our conversion. What
// is required is to scan two more bits than the mantissa can represent, so that
// we always round correctly.
//
// (One extra bit does not suffice to perform correct rounding, since a number
// exactly halfway between two representable floats has unique rounding rules,
// so we need to differentiate between a "halfway between" number and a "closer
// to the larger value" number.)
constexpr int kHexadecimalMantissaDigitsMax = 15;
// The minimum number of significant bits that will be read from
// kHexadecimalMantissaDigitsMax hex digits. We must subtract by three, since
// the most significant digit can be a "1", which only contributes a single
// significant bit.
constexpr int kGuaranteedHexadecimalMantissaBitPrecision =
4 * kHexadecimalMantissaDigitsMax - 3;
static_assert(kGuaranteedHexadecimalMantissaBitPrecision >
std::numeric_limits<double>::digits + 2,
"kHexadecimalMantissaDigitsMax too small");
// We also impose a limit on the number of significant digits we will read from
// an exponent, to avoid having to deal with integer overflow. We use 9 for
// this purpose.
//
// If we read a 9 digit exponent, the end result of the conversion will
// necessarily be infinity or zero, depending on the sign of the exponent.
// Therefore we can just drop extra digits on the floor without any extra
// logic.
constexpr int kDecimalExponentDigitsMax = 9;
static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax,
"int type too small");
// To avoid incredibly large inputs causing integer overflow for our exponent,
// we impose an arbitrary but very large limit on the number of significant
// digits we will accept. The implementation refuses to match a string with
// more consecutive significant mantissa digits than this.
constexpr int kDecimalDigitLimit = 50000000;
// Corresponding limit for hexadecimal digit inputs. This is one fourth the
// amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires
// a binary exponent adjustment of 4.
constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4;
// The largest exponent we can read is 999999999 (per
// kDecimalExponentDigitsMax), and the largest exponent adjustment we can get
// from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these
// comfortably fits in an integer.
//
// We count kDecimalDigitLimit twice because there are independent limits for
// numbers before and after the decimal point. (In the case where there are no
// significant digits before the decimal point, there are independent limits for
// post-decimal-point leading zeroes and for significant digits.)
static_assert(999999999 + 2 * kDecimalDigitLimit <
std::numeric_limits<int>::max(),
"int type too small");
static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) <
std::numeric_limits<int>::max(),
"int type too small");
// Returns true if the provided bitfield allows parsing an exponent value
// (e.g., "1.5e100").
bool AllowExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific || !fixed;
}
// Returns true if the provided bitfield requires an exponent value be present.
bool RequireExponent(chars_format flags) {
bool fixed = (flags & chars_format::fixed) == chars_format::fixed;
bool scientific =
(flags & chars_format::scientific) == chars_format::scientific;
return scientific && !fixed;
}
const int8_t kAsciiToInt[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1};
// Returns true if `ch` is a digit in the given base
template <int base>
bool IsDigit(char ch);
// Converts a valid `ch` to its digit value in the given base.
template <int base>
unsigned ToDigit(char ch);
// Returns true if `ch` is the exponent delimiter for the given base.
template <int base>
bool IsExponentCharacter(char ch);
// Returns the maximum number of significant digits we will read for a float
// in the given base.
template <int base>
constexpr int MantissaDigitsMax();
// Returns the largest consecutive run of digits we will accept when parsing a
// number in the given base.
template <int base>
constexpr int DigitLimit();
// Returns the amount the exponent must be adjusted by for each dropped digit.
// (For decimal this is 1, since the digits are in base 10 and the exponent base
// is also 10, but for hexadecimal this is 4, since the digits are base 16 but
// the exponent base is 2.)
template <int base>
constexpr int DigitMagnitude();
template <>
bool IsDigit<10>(char ch) {
return ch >= '0' && ch <= '9';
}
template <>
bool IsDigit<16>(char ch) {
return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0;
}
template <>
unsigned ToDigit<10>(char ch) {
return static_cast<unsigned>(ch - '0');
}
template <>
unsigned ToDigit<16>(char ch) {
return static_cast<unsigned>(kAsciiToInt[static_cast<unsigned char>(ch)]);
}
template <>
bool IsExponentCharacter<10>(char ch) {
return ch == 'e' || ch == 'E';
}
template <>
bool IsExponentCharacter<16>(char ch) {
return ch == 'p' || ch == 'P';
}
template <>
constexpr int MantissaDigitsMax<10>() {
return kDecimalMantissaDigitsMax;
}
template <>
constexpr int MantissaDigitsMax<16>() {
return kHexadecimalMantissaDigitsMax;
}
template <>
constexpr int DigitLimit<10>() {
return kDecimalDigitLimit;
}
template <>
constexpr int DigitLimit<16>() {
return kHexadecimalDigitLimit;
}
template <>
constexpr int DigitMagnitude<10>() {
return 1;
}
template <>
constexpr int DigitMagnitude<16>() {
return 4;
}
// Reads decimal digits from [begin, end) into *out. Returns the number of
// digits consumed.
//
// After max_digits has been read, keeps consuming characters, but no longer
// adjusts *out. If a nonzero digit is dropped this way, *dropped_nonzero_digit
// is set; otherwise, it is left unmodified.
//
// If no digits are matched, returns 0 and leaves *out unchanged.
//
// ConsumeDigits does not protect against overflow on *out; max_digits must
// be chosen with respect to type T to avoid the possibility of overflow.
template <int base, typename T>
int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out,
bool* dropped_nonzero_digit) {
if (base == 10) {
assert(max_digits <= std::numeric_limits<T>::digits10);
} else if (base == 16) {
assert(max_digits * 4 <= std::numeric_limits<T>::digits);
}
const char* const original_begin = begin;
// Skip leading zeros, but only if *out is zero.
// They don't cause an overflow so we don't have to count them for
// `max_digits`.
while (!*out && end != begin && *begin == '0') ++begin;
T accumulator = *out;
const char* significant_digits_end =
(end - begin > max_digits) ? begin + max_digits : end;
while (begin < significant_digits_end && IsDigit<base>(*begin)) {
// Do not guard against *out overflow; max_digits was chosen to avoid this.
// Do assert against it, to detect problems in debug builds.
auto digit = static_cast<T>(ToDigit<base>(*begin));
assert(accumulator * base >= accumulator);
accumulator *= base;
assert(accumulator + digit >= accumulator);
accumulator += digit;
++begin;
}
bool dropped_nonzero = false;
while (begin < end && IsDigit<base>(*begin)) {
dropped_nonzero = dropped_nonzero || (*begin != '0');
++begin;
}
if (dropped_nonzero && dropped_nonzero_digit != nullptr) {
*dropped_nonzero_digit = true;
}
*out = accumulator;
return static_cast<int>(begin - original_begin);
}
// Returns true if `v` is one of the chars allowed inside parentheses following
// a NaN.
bool IsNanChar(char v) {
return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') ||
(v >= 'A' && v <= 'Z');
}
// Checks the range [begin, end) for a strtod()-formatted infinity or NaN. If
// one is found, sets `out` appropriately and returns true.
bool ParseInfinityOrNan(const char* begin, const char* end,
strings_internal::ParsedFloat* out) {
if (end - begin < 3) {
return false;
}
switch (*begin) {
case 'i':
case 'I': {
// An infinity string consists of the characters "inf" or "infinity",
// case insensitive.
if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kInfinity;
if (end - begin >= 8 &&
strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) {
out->end = begin + 8;
} else {
out->end = begin + 3;
}
return true;
}
case 'n':
case 'N': {
// A NaN consists of the characters "nan", case insensitive, optionally
// followed by a parenthesized sequence of zero or more alphanumeric
// characters and/or underscores.
if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) {
return false;
}
out->type = strings_internal::FloatType::kNan;
out->end = begin + 3;
// NaN is allowed to be followed by a parenthesized string, consisting of
// only the characters [a-zA-Z0-9_]. Match that if it's present.
begin += 3;
if (begin < end && *begin == '(') {
const char* nan_begin = begin + 1;
while (nan_begin < end && IsNanChar(*nan_begin)) {
++nan_begin;
}
if (nan_begin < end && *nan_begin == ')') {
// We found an extra NaN specifier range
out->subrange_begin = begin + 1;
out->subrange_end = nan_begin;
out->end = nan_begin + 1;
}
}
return true;
}
default:
return false;
}
}
} // namespace
namespace strings_internal {
template <int base>
strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end,
chars_format format_flags) {
strings_internal::ParsedFloat result;
// Exit early if we're given an empty range.
if (begin == end) return result;
// Handle the infinity and NaN cases.
if (ParseInfinityOrNan(begin, end, &result)) {
return result;
}
const char* const mantissa_begin = begin;
while (begin < end && *begin == '0') {
++begin; // skip leading zeros
}
uint64_t mantissa = 0;
int exponent_adjustment = 0;
bool mantissa_is_inexact = false;
int pre_decimal_digits = ConsumeDigits<base>(
begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact);
begin += pre_decimal_digits;
int digits_left;
if (pre_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (pre_decimal_digits > MantissaDigitsMax<base>()) {
// We dropped some non-fraction digits on the floor. Adjust our exponent
// to compensate.
exponent_adjustment =
static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>());
digits_left = 0;
} else {
digits_left =
static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits);
}
if (begin < end && *begin == '.') {
++begin;
if (mantissa == 0) {
// If we haven't seen any nonzero digits yet, keep skipping zeros. We
// have to adjust the exponent to reflect the changed place value.
const char* begin_zeros = begin;
while (begin < end && *begin == '0') {
++begin;
}
int zeros_skipped = static_cast<int>(begin - begin_zeros);
if (zeros_skipped >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
}
exponent_adjustment -= static_cast<int>(zeros_skipped);
}
int post_decimal_digits = ConsumeDigits<base>(
begin, end, digits_left, &mantissa, &mantissa_is_inexact);
begin += post_decimal_digits;
// Since `mantissa` is an integer, each significant digit we read after
// the decimal point requires an adjustment to the exponent. "1.23e0" will
// be stored as `mantissa` == 123 and `exponent` == -2 (that is,
// "123e-2").
if (post_decimal_digits >= DigitLimit<base>()) {
// refuse to parse pathological inputs
return result;
} else if (post_decimal_digits > digits_left) {
exponent_adjustment -= digits_left;
} else {
exponent_adjustment -= post_decimal_digits;
}
}
// If we've found no mantissa whatsoever, this isn't a number.
if (mantissa_begin == begin) {
return result;
}
// A bare "." doesn't count as a mantissa either.
if (begin - mantissa_begin == 1 && *mantissa_begin == '.') {
return result;
}
if (mantissa_is_inexact) {
// We dropped significant digits on the floor. Handle this appropriately.
if (base == 10) {
// If we truncated significant decimal digits, store the full range of the
// mantissa for future big integer math for exact rounding.
result.subrange_begin = mantissa_begin;
result.subrange_end = begin;
} else if (base == 16) {
// If we truncated hex digits, reflect this fact by setting the low
// ("sticky") bit. This allows for correct rounding in all cases.
mantissa |= 1;
}
}
result.mantissa = mantissa;
const char* const exponent_begin = begin;
result.literal_exponent = 0;
bool found_exponent = false;
if (AllowExponent(format_flags) && begin < end &&
IsExponentCharacter<base>(*begin)) {
bool negative_exponent = false;
++begin;
if (begin < end && *begin == '-') {
negative_exponent = true;
++begin;
} else if (begin < end && *begin == '+') {
++begin;
}
const char* const exponent_digits_begin = begin;
// Exponent is always expressed in decimal, even for hexadecimal floats.
begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax,
&result.literal_exponent, nullptr);
if (begin == exponent_digits_begin) {
// there were no digits where we expected an exponent. We failed to read
// an exponent and should not consume the 'e' after all. Rewind 'begin'.
found_exponent = false;
begin = exponent_begin;
} else {
found_exponent = true;
if (negative_exponent) {
result.literal_exponent = -result.literal_exponent;
}
}
}
if (!found_exponent && RequireExponent(format_flags)) {
// Provided flags required an exponent, but none was found. This results
// in a failure to scan.
return result;
}
// Success!
result.type = strings_internal::FloatType::kNumber;
if (result.mantissa > 0) {
result.exponent = result.literal_exponent +
(DigitMagnitude<base>() * exponent_adjustment);
} else {
result.exponent = 0;
}
result.end = begin;
return result;
}
template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
chars_format format_flags);
template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,99 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#define ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/charconv.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Enum indicating whether a parsed float is a number or special value.
enum class FloatType { kNumber, kInfinity, kNan };
// The decomposed parts of a parsed `float` or `double`.
struct ParsedFloat {
// Representation of the parsed mantissa, with the decimal point adjusted to
// make it an integer.
//
// During decimal scanning, this contains 19 significant digits worth of
// mantissa value. If digits beyond this point are found, they
// are truncated, and if any of these dropped digits are nonzero, then
// `mantissa` is inexact, and the full mantissa is stored in [subrange_begin,
// subrange_end).
//
// During hexadecimal scanning, this contains 15 significant hex digits worth
// of mantissa value. Digits beyond this point are sticky -- they are
// truncated, but if any dropped digits are nonzero, the low bit of mantissa
// will be set. (This allows for precise rounding, and avoids the need
// to store the full mantissa in [subrange_begin, subrange_end).)
uint64_t mantissa = 0;
// Floating point expontent. This reflects any decimal point adjustments and
// any truncated digits from the mantissa. The absolute value of the parsed
// number is represented by mantissa * (base ** exponent), where base==10 for
// decimal floats, and base==2 for hexadecimal floats.
int exponent = 0;
// The literal exponent value scanned from the input, or 0 if none was
// present. This does not reflect any adjustments applied to mantissa.
int literal_exponent = 0;
// The type of number scanned.
FloatType type = FloatType::kNumber;
// When non-null, [subrange_begin, subrange_end) marks a range of characters
// that require further processing. The meaning is dependent on float type.
// If type == kNumber and this is set, this is a "wide input": the input
// mantissa contained more than 19 digits. The range contains the full
// mantissa. It plus `literal_exponent` need to be examined to find the best
// floating point match.
// If type == kNan and this is set, the range marks the contents of a
// matched parenthesized character region after the NaN.
const char* subrange_begin = nullptr;
const char* subrange_end = nullptr;
// One-past-the-end of the successfully parsed region, or nullptr if no
// matching pattern was found.
const char* end = nullptr;
};
// Read the floating point number in the provided range, and populate
// ParsedFloat accordingly.
//
// format_flags is a bitmask value specifying what patterns this API will match.
// `scientific` and `fixed` are honored per std::from_chars rules
// ([utility.from.chars], C++17): if exactly one of these bits is set, then an
// exponent is required, or dislallowed, respectively.
//
// Template parameter `base` must be either 10 or 16. For base 16, a "0x" is
// *not* consumed. The `hex` bit from format_flags is ignored by ParseFloat.
template <int base>
ParsedFloat ParseFloat(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<10>(const char* begin, const char* end,
absl::chars_format format_flags);
extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end,
absl::chars_format format_flags);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_

View file

@ -0,0 +1,357 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/charconv_parse.h"
#include <string>
#include <utility>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/log/check.h"
#include "absl/strings/str_cat.h"
using absl::chars_format;
using absl::strings_internal::FloatType;
using absl::strings_internal::ParsedFloat;
using absl::strings_internal::ParseFloat;
namespace {
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that should be consumed by the match. It is stripped from the
// input to ParseFloat.
//
// If input string `s` contains '[' and ']' characters, these mark the region
// of characters that should be marked as the "subrange". For NaNs, this is
// the location of the extended NaN string. For numbers, this is the location
// of the full, over-large mantissa.
template <int base>
void ExpectParsedFloat(std::string s, absl::chars_format format_flags,
FloatType expected_type, uint64_t expected_mantissa,
int expected_exponent,
int expected_literal_exponent = -999) {
SCOPED_TRACE(s);
int begin_subrange = -1;
int end_subrange = -1;
// If s contains '[' and ']', then strip these characters and set the subrange
// indices appropriately.
std::string::size_type open_bracket_pos = s.find('[');
if (open_bracket_pos != std::string::npos) {
begin_subrange = static_cast<int>(open_bracket_pos);
s.replace(open_bracket_pos, 1, "");
std::string::size_type close_bracket_pos = s.find(']');
CHECK_NE(close_bracket_pos, absl::string_view::npos)
<< "Test input contains [ without matching ]";
end_subrange = static_cast<int>(close_bracket_pos);
s.replace(close_bracket_pos, 1, "");
}
const std::string::size_type expected_characters_matched = s.find('$');
CHECK_NE(expected_characters_matched, std::string::npos)
<< "Input string must contain $";
s.replace(expected_characters_matched, 1, "");
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_NE(parsed.end, nullptr);
if (parsed.end == nullptr) {
return; // The following tests are not useful if we fully failed to parse
}
EXPECT_EQ(parsed.type, expected_type);
if (begin_subrange == -1) {
EXPECT_EQ(parsed.subrange_begin, nullptr);
EXPECT_EQ(parsed.subrange_end, nullptr);
} else {
EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange);
EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange);
}
if (parsed.type == FloatType::kNumber) {
EXPECT_EQ(parsed.mantissa, expected_mantissa);
EXPECT_EQ(parsed.exponent, expected_exponent);
if (expected_literal_exponent != -999) {
EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent);
}
}
auto characters_matched = static_cast<int>(parsed.end - s.data());
EXPECT_EQ(characters_matched, expected_characters_matched);
}
// Check that a given string input is parsed to the expected mantissa and
// exponent.
//
// Input string `s` must contain a '$' character. It marks the end of the
// characters that were consumed by the match.
template <int base>
void ExpectNumber(std::string s, absl::chars_format format_flags,
uint64_t expected_mantissa, int expected_exponent,
int expected_literal_exponent = -999) {
ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber,
expected_mantissa, expected_exponent,
expected_literal_exponent);
}
// Check that a given string input is parsed to the given special value.
//
// This tests against both number bases, since infinities and NaNs have
// identical representations in both modes.
void ExpectSpecial(const std::string& s, absl::chars_format format_flags,
FloatType type) {
ExpectParsedFloat<10>(s, format_flags, type, 0, 0);
ExpectParsedFloat<16>(s, format_flags, type, 0, 0);
}
// Check that a given input string is not matched by Float.
template <int base>
void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) {
ParsedFloat parsed =
ParseFloat<base>(s.data(), s.data() + s.size(), format_flags);
EXPECT_EQ(parsed.end, nullptr);
}
TEST(ParseFloat, SimpleValue) {
// Test that various forms of floating point numbers all parse correctly.
ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3);
ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8);
// ExpectNumber does not attempt to drop trailing zeroes.
ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900,
-5);
ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general,
0x1234abcdef000, -20);
// Ensure non-matching characters after a number are ignored, even when they
// look like potentially matching characters.
ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3);
ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789,
-3);
ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3);
ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3);
ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general,
0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef,
-8);
ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8);
ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8);
// Ensure we can read a full resolution mantissa without overflow.
ExpectNumber<10>("9999999999999999999$", chars_format::general,
9999999999999999999u, 0);
ExpectNumber<16>("fffffffffffffff$", chars_format::general,
0xfffffffffffffffu, 0);
// Check that zero is consistently read.
ExpectNumber<10>("0$", chars_format::general, 0, 0);
ExpectNumber<16>("0$", chars_format::general, 0, 0);
ExpectNumber<10>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("000000000000000000000000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<16>("0000000000000000000000.000000000000000000$",
chars_format::general, 0, 0);
ExpectNumber<10>("0.00000000000000000000000000000000e123456$",
chars_format::general, 0, 0);
ExpectNumber<16>("0.00000000000000000000000000000000p123456$",
chars_format::general, 0, 0);
}
TEST(ParseFloat, LargeDecimalMantissa) {
// After 19 significant decimal digits in the mantissa, ParsedFloat will
// truncate additional digits. We need to test that:
// 1) the truncation to 19 digits happens
// 2) the returned exponent reflects the dropped significant digits
// 3) a correct literal_exponent is set
//
// If and only if a significant digit is found after 19 digits, then the
// entirety of the mantissa in case the exact value is needed to make a
// rounding decision. The [ and ] characters below denote where such a
// subregion was marked by by ParseFloat. They are not part of the input.
// Mark a capture group only if a dropped digit is significant (nonzero).
ExpectNumber<10>("100000000000000000000000000$", chars_format::general,
1000000000000000000,
/* adjusted exponent */ 8);
ExpectNumber<10>("123456789123456789100000000$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8);
ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
// Leading zeroes should not count towards the 19 significant digit limit
ExpectNumber<10>("[00000000123456789123456789123456789]$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8,
/* literal exponent */ 0);
ExpectNumber<10>("00000000123456789123456789100000000$",
chars_format::general, 1234567891234567891,
/* adjusted exponent */ 8);
// Truncated digits after the decimal point should not cause a further
// exponent adjustment.
ExpectNumber<10>("1.234567891234567891e123$", chars_format::general,
1234567891234567891, 105);
ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general,
1234567891234567891,
/* adjusted exponent */ 105,
/* literal exponent */ 123);
// Ensure we truncate, and not round. (The from_chars algorithm we use
// depends on our guess missing low, if it misses, so we need the rounding
// error to be downward.)
ExpectNumber<10>("[1999999999999999999999]$", chars_format::general,
1999999999999999999,
/* adjusted exponent */ 3,
/* literal exponent */ 0);
}
TEST(ParseFloat, LargeHexadecimalMantissa) {
// After 15 significant hex digits in the mantissa, ParsedFloat will treat
// additional digits as sticky, We need to test that:
// 1) The truncation to 15 digits happens
// 2) The returned exponent reflects the dropped significant digits
// 3) If a nonzero digit is dropped, the low bit of mantissa is set.
ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general,
0x123456789abcdef, 60);
// Leading zeroes should not count towards the 15 significant digit limit
ExpectNumber<16>("000000123456789abcdef123456789abcdef$",
chars_format::general, 0x123456789abcdef, 60);
// Truncated digits after the radix point should not cause a further
// exponent adjustment.
ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general,
0x123456789abcdef, 44);
ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$",
chars_format::general, 0x123456789abcdef, 44);
// test sticky digit behavior. The low bit should be set iff any dropped
// digit is nonzero.
ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general,
0x123456789abcdef, 60);
ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general,
0x123456789abcdee, 60);
}
TEST(ParseFloat, ScientificVsFixed) {
// In fixed mode, an exponent is never matched (but the remainder of the
// number will be matched.)
ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8);
ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3);
ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36);
ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8);
// In scientific mode, numbers don't match *unless* they have an exponent.
ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3);
ExpectFailedParse<10>("-123456.789$", chars_format::scientific);
ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef,
-8);
ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific);
}
TEST(ParseFloat, Infinity) {
ExpectFailedParse<10>("in", chars_format::general);
ExpectFailedParse<16>("in", chars_format::general);
ExpectFailedParse<10>("inx", chars_format::general);
ExpectFailedParse<16>("inx", chars_format::general);
ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity);
ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity);
ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity);
}
TEST(ParseFloat, NaN) {
ExpectFailedParse<10>("na", chars_format::general);
ExpectFailedParse<16>("na", chars_format::general);
ExpectFailedParse<10>("nah", chars_format::general);
ExpectFailedParse<16>("nah", chars_format::general);
ExpectSpecial("nan$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$", chars_format::general, FloatType::kNan);
ExpectSpecial("nAn$", chars_format::general, FloatType::kNan);
ExpectSpecial("NAN$", chars_format::general, FloatType::kNan);
ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan);
// A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to
// appear after an NaN. Check that this is allowed, and that the correct
// characters are grouped.
//
// (The characters [ and ] in the pattern below delimit the expected matched
// subgroup; they are not part of the input passed to ParseFloat.)
ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan);
ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan);
// If the subgroup contains illegal characters, don't match it at all.
ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan);
// Also cope with a missing close paren.
ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan);
}
} // namespace

View file

@ -0,0 +1,63 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_
#define ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_
#include <cassert>
#include <cstddef>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node
// holding a FLAT or EXTERNAL child rep. Requires `rep != nullptr`.
inline bool IsDataEdge(const CordRep* edge) {
assert(edge != nullptr);
// The fast path is that `edge` is an EXTERNAL or FLAT node, making the below
// if a single, well predicted branch. We then repeat the FLAT or EXTERNAL
// check in the slow path of the SUBSTRING check to optimize for the hot path.
if (edge->tag == EXTERNAL || edge->tag >= FLAT) return true;
if (edge->tag == SUBSTRING) edge = edge->substring()->child;
return edge->tag == EXTERNAL || edge->tag >= FLAT;
}
// Returns the `absl::string_view` data reference for the provided data edge.
// Requires 'IsDataEdge(edge) == true`.
inline absl::string_view EdgeData(const CordRep* edge) {
assert(IsDataEdge(edge));
size_t offset = 0;
const size_t length = edge->length;
if (edge->IsSubstring()) {
offset = edge->substring()->start;
edge = edge->substring()->child;
}
return edge->tag >= FLAT
? absl::string_view{edge->flat()->Data() + offset, length}
: absl::string_view{edge->external()->base + offset, length};
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_

View file

@ -0,0 +1,130 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_data_edge.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::absl::cordrep_testing::MakeExternal;
using ::absl::cordrep_testing::MakeFlat;
using ::absl::cordrep_testing::MakeSubstring;
TEST(CordDataEdgeTest, IsDataEdgeOnFlat) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
EXPECT_TRUE(IsDataEdge(rep));
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, IsDataEdgeOnExternal) {
CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ...");
EXPECT_TRUE(IsDataEdge(rep));
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfFlat) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_TRUE(IsDataEdge(substr));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfExternal) {
CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_TRUE(IsDataEdge(substr));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, IsDataEdgeOnBtree) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRepBtree* tree = CordRepBtree::New(rep);
EXPECT_FALSE(IsDataEdge(tree));
CordRep::Unref(tree);
}
TEST(CordDataEdgeTest, IsDataEdgeOnBadSubstr) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep));
EXPECT_FALSE(IsDataEdge(substr));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, EdgeDataOnFlat) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeFlat(value);
EXPECT_EQ(EdgeData(rep), value);
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, EdgeDataOnExternal) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeExternal(value);
EXPECT_EQ(EdgeData(rep), value);
CordRep::Unref(rep);
}
TEST(CordDataEdgeTest, EdgeDataOnSubstringOfFlat) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeFlat(value);
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_EQ(EdgeData(substr), value.substr(1, 20));
CordRep::Unref(substr);
}
TEST(CordDataEdgeTest, EdgeDataOnSubstringOfExternal) {
absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ...";
CordRep* rep = MakeExternal(value);
CordRep* substr = MakeSubstring(1, 20, rep);
EXPECT_EQ(EdgeData(substr), value.substr(1, 20));
CordRep::Unref(substr);
}
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
TEST(CordDataEdgeTest, IsDataEdgeOnNullPtr) {
EXPECT_DEATH(IsDataEdge(nullptr), ".*");
}
TEST(CordDataEdgeTest, EdgeDataOnNullPtr) {
EXPECT_DEATH(EdgeData(nullptr), ".*");
}
TEST(CordDataEdgeTest, EdgeDataOnBtree) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRepBtree* tree = CordRepBtree::New(rep);
EXPECT_DEATH(EdgeData(tree), ".*");
CordRep::Unref(tree);
}
TEST(CordDataEdgeTest, EdgeDataOnBadSubstr) {
CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ...");
CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep));
EXPECT_DEATH(EdgeData(substr), ".*");
CordRep::Unref(substr);
}
#endif // GTEST_HAS_DEATH_TEST && !NDEBUG
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,70 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_internal.h"
#include <atomic>
#include <cassert>
#include <memory>
#include "absl/base/internal/raw_logging.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/str_cat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled(
kCordShallowSubcordsDefault);
void LogFatalNodeType(CordRep* rep) {
ABSL_INTERNAL_LOG(FATAL, absl::StrCat("Unexpected node type: ",
static_cast<int>(rep->tag)));
}
void CordRep::Destroy(CordRep* rep) {
assert(rep != nullptr);
while (true) {
assert(!rep->refcount.IsImmortal());
if (rep->tag == BTREE) {
CordRepBtree::Destroy(rep->btree());
return;
} else if (rep->tag == EXTERNAL) {
CordRepExternal::Delete(rep);
return;
} else if (rep->tag == SUBSTRING) {
CordRepSubstring* rep_substring = rep->substring();
rep = rep_substring->child;
delete rep_substring;
if (rep->refcount.Decrement()) {
return;
}
} else if (rep->tag == CRC) {
CordRepCrc::Destroy(rep->crc());
return;
} else {
assert(rep->IsFlat());
CordRepFlat::Delete(rep);
return;
}
}
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,944 @@
// Copyright 2021 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <string>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/internal/invoke.h"
#include "absl/base/macros.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
#include "absl/container/internal/container_memory.h"
#include "absl/strings/string_view.h"
// We can only add poisoning if we can detect consteval executions.
#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
(defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
defined(ABSL_HAVE_MEMORY_SANITIZER))
#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
#endif
#define ABSL_CORD_INTERNAL_NO_SANITIZE \
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// The overhead of a vtable is too much for Cord, so we roll our own subclasses
// using only a single byte to differentiate classes from each other - the "tag"
// byte. Define the subclasses first so we can provide downcasting helper
// functions in the base class.
struct CordRep;
struct CordRepConcat;
struct CordRepExternal;
struct CordRepFlat;
struct CordRepSubstring;
struct CordRepCrc;
class CordRepBtree;
class CordzInfo;
// Default feature enable states for cord ring buffers
enum CordFeatureDefaults { kCordShallowSubcordsDefault = false };
extern std::atomic<bool> shallow_subcords_enabled;
inline void enable_shallow_subcords(bool enable) {
shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
}
enum Constants {
// The inlined size to use with absl::InlinedVector.
//
// Note: The InlinedVectors in this file (and in cord.h) do not need to use
// the same value for their inlined size. The fact that they do is historical.
// It may be desirable for each to use a different inlined size optimized for
// that InlinedVector's usage.
//
// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
// the inlined vector size (47 exists for backward compatibility).
kInlinedVectorSize = 47,
// Prefer copying blocks of at most this size, otherwise reference count.
kMaxBytesToCopy = 511
};
// Emits a fatal error "Unexpected node type: xyz" and aborts the program.
[[noreturn]] void LogFatalNodeType(CordRep* rep);
// Fast implementation of memmove for up to 15 bytes. This implementation is
// safe for overlapping regions. If nullify_tail is true, the destination is
// padded with '\0' up to 15 bytes.
template <bool nullify_tail = false>
inline void SmallMemmove(char* dst, const char* src, size_t n) {
if (n >= 8) {
assert(n <= 15);
uint64_t buf1;
uint64_t buf2;
memcpy(&buf1, src, 8);
memcpy(&buf2, src + n - 8, 8);
if (nullify_tail) {
memset(dst + 7, 0, 8);
}
// GCC 12 has a false-positive -Wstringop-overflow warning here.
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-overflow"
#endif
memcpy(dst, &buf1, 8);
memcpy(dst + n - 8, &buf2, 8);
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
#pragma GCC diagnostic pop
#endif
} else if (n >= 4) {
uint32_t buf1;
uint32_t buf2;
memcpy(&buf1, src, 4);
memcpy(&buf2, src + n - 4, 4);
if (nullify_tail) {
memset(dst + 4, 0, 4);
memset(dst + 7, 0, 8);
}
memcpy(dst, &buf1, 4);
memcpy(dst + n - 4, &buf2, 4);
} else {
if (n != 0) {
dst[0] = src[0];
dst[n / 2] = src[n / 2];
dst[n - 1] = src[n - 1];
}
if (nullify_tail) {
memset(dst + 7, 0, 8);
memset(dst + n, 0, 8);
}
}
}
// Compact class for tracking the reference count and state flags for CordRep
// instances. Data is stored in an atomic int32_t for compactness and speed.
class RefcountAndFlags {
public:
constexpr RefcountAndFlags() : count_{kRefIncrement} {}
struct Immortal {};
explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
// Increments the reference count. Imposes no memory ordering.
inline void Increment() {
count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
}
// Asserts that the current refcount is greater than 0. If the refcount is
// greater than 1, decrements the reference count.
//
// Returns false if there are no references outstanding; true otherwise.
// Inserts barriers to ensure that state written before this method returns
// false will be visible to a thread that just observed this method returning
// false. Always returns false when the immortal bit is set.
inline bool Decrement() {
int32_t refcount = count_.load(std::memory_order_acquire);
assert(refcount > 0 || refcount & kImmortalFlag);
return refcount != kRefIncrement &&
count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=
kRefIncrement;
}
// Same as Decrement but expect that refcount is greater than 1.
inline bool DecrementExpectHighRefcount() {
int32_t refcount =
count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
assert(refcount > 0 || refcount & kImmortalFlag);
return refcount != kRefIncrement;
}
// Returns the current reference count using acquire semantics.
inline size_t Get() const {
return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
kNumFlags);
}
// Returns whether the atomic integer is 1.
// If the reference count is used in the conventional way, a
// reference count of 1 implies that the current thread owns the
// reference and no other thread shares it.
// This call performs the test for a reference count of one, and
// performs the memory barrier needed for the owning thread
// to act on the object, knowing that it has exclusive access to the
// object. Always returns false when the immortal bit is set.
inline bool IsOne() {
return count_.load(std::memory_order_acquire) == kRefIncrement;
}
bool IsImmortal() const {
return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
}
private:
// We reserve the bottom bit for flag.
// kImmortalBit indicates that this entity should never be collected; it is
// used for the StringConstant constructor to avoid collecting immutable
// constant cords.
enum Flags {
kNumFlags = 1,
kImmortalFlag = 0x1,
kRefIncrement = (1 << kNumFlags),
};
std::atomic<int32_t> count_;
};
// Various representations that we allow
enum CordRepKind {
UNUSED_0 = 0,
SUBSTRING = 1,
CRC = 2,
BTREE = 3,
UNUSED_4 = 4,
EXTERNAL = 5,
// We have different tags for different sized flat arrays,
// starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
// allocated range of 32 bytes to 256 KB. The current granularity is:
// - 8 byte granularity for flat sizes in [32 - 512]
// - 64 byte granularity for flat sizes in (512 - 8KiB]
// - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
// If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
// be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
// represents the minimum flat allocation size. (32 bytes as of now).
FLAT = 6,
MAX_FLAT_TAG = 248
};
// There are various locations where we want to check if some rep is a 'plain'
// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
// can perform this check in a single branch as 'tag >= EXTERNAL'
// Note that we can leave this optimization to the compiler. The compiler will
// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
struct CordRep {
// Result from an `extract edge` operation. Contains the (possibly changed)
// tree node as well as the extracted edge, or {tree, nullptr} if no edge
// could be extracted.
// On success, the returned `tree` value is null if `extracted` was the only
// data edge inside the tree, a data edge if there were only two data edges in
// the tree, or the (possibly new / smaller) remaining tree with the extracted
// data edge removed.
struct ExtractResult {
CordRep* tree;
CordRep* extracted;
};
CordRep() = default;
constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
: length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
// The following three fields have to be less than 32 bytes since
// that is the smallest supported flat node size. Some code optimizations rely
// on the specific layout of these fields. Notably: the non-trivial field
// `refcount` being preceded by `length`, and being tailed by POD data
// members only.
// LINT.IfChange
size_t length;
RefcountAndFlags refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
// Otherwise, the node type is CordRepFlat and the tag is the encoded size.
uint8_t tag;
// `storage` provides two main purposes:
// - the starting point for FlatCordRep.Data() [flexible-array-member]
// - 3 bytes of additional storage for use by derived classes.
// The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
// a 'depth' value in storage[0], and the (future) CordRepBtree class stores
// `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
// allocate room for these in the derived class, as not all compilers reuse
// padding space from the base class (clang and gcc do, MSVC does not, etc)
uint8_t storage[3];
// LINT.ThenChange(cord_rep_btree.h:copy_raw)
// Returns true if this instance's tag matches the requested type.
constexpr bool IsSubstring() const { return tag == SUBSTRING; }
constexpr bool IsCrc() const { return tag == CRC; }
constexpr bool IsExternal() const { return tag == EXTERNAL; }
constexpr bool IsFlat() const { return tag >= FLAT; }
constexpr bool IsBtree() const { return tag == BTREE; }
inline CordRepSubstring* substring();
inline const CordRepSubstring* substring() const;
inline CordRepCrc* crc();
inline const CordRepCrc* crc() const;
inline CordRepExternal* external();
inline const CordRepExternal* external() const;
inline CordRepFlat* flat();
inline const CordRepFlat* flat() const;
inline CordRepBtree* btree();
inline const CordRepBtree* btree() const;
// --------------------------------------------------------------------
// Memory management
// Destroys the provided `rep`.
static void Destroy(CordRep* rep);
// Increments the reference count of `rep`.
// Requires `rep` to be a non-null pointer value.
static inline CordRep* Ref(CordRep* rep);
// Decrements the reference count of `rep`. Destroys rep if count reaches
// zero. Requires `rep` to be a non-null pointer value.
static inline void Unref(CordRep* rep);
};
struct CordRepSubstring : public CordRep {
size_t start; // Starting offset of substring in child
CordRep* child;
// Creates a substring on `child`, adopting a reference on `child`.
// Requires `child` to be either a flat or external node, and `pos` and `n` to
// form a non-empty partial sub range of `'child`, i.e.:
// `n > 0 && n < length && n + pos <= length`
static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
// Creates a substring of `rep`. Does not adopt a reference on `rep`.
// Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
// If `n == rep->length` then this method returns `CordRep::Ref(rep)`
// If `rep` is a substring of a flat or external node, then this method will
// return a new substring of that flat or external node with `pos` adjusted
// with the original `start` position.
static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
};
// Type for function pointer that will invoke the releaser function and also
// delete the `CordRepExternalImpl` corresponding to the passed in
// `CordRepExternal`.
using ExternalReleaserInvoker = void (*)(CordRepExternal*);
// External CordReps are allocated together with a type erased releaser. The
// releaser is stored in the memory directly following the CordRepExternal.
struct CordRepExternal : public CordRep {
CordRepExternal() = default;
explicit constexpr CordRepExternal(absl::string_view str)
: CordRep(RefcountAndFlags::Immortal{}, str.size()),
base(str.data()),
releaser_invoker(nullptr) {}
const char* base;
// Pointer to function that knows how to call and destroy the releaser.
ExternalReleaserInvoker releaser_invoker;
// Deletes (releases) the external rep.
// Requires rep != nullptr and rep->IsExternal()
static void Delete(CordRep* rep);
};
// Use go/ranked-overloads for dispatching.
struct Rank0 {};
struct Rank1 : Rank0 {};
template <typename Releaser, typename = ::absl::base_internal::invoke_result_t<
Releaser, absl::string_view>>
void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view data) {
::absl::base_internal::invoke(std::forward<Releaser>(releaser), data);
}
template <typename Releaser,
typename = ::absl::base_internal::invoke_result_t<Releaser>>
void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view) {
::absl::base_internal::invoke(std::forward<Releaser>(releaser));
}
// We use CompressedTuple so that we can benefit from EBCO.
template <typename Releaser>
struct CordRepExternalImpl
: public CordRepExternal,
public ::absl::container_internal::CompressedTuple<Releaser> {
// The extra int arg is so that we can avoid interfering with copy/move
// constructors while still benefitting from perfect forwarding.
template <typename T>
CordRepExternalImpl(T&& releaser, int)
: CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
this->releaser_invoker = &Release;
}
~CordRepExternalImpl() {
InvokeReleaser(Rank1{}, std::move(this->template get<0>()),
absl::string_view(base, length));
}
static void Release(CordRepExternal* rep) {
delete static_cast<CordRepExternalImpl*>(rep);
}
};
inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
size_t n) {
assert(child != nullptr);
assert(n > 0);
assert(n < child->length);
assert(pos < child->length);
assert(n <= child->length - pos);
// Move to strategical places inside the Cord logic and make this an assert.
if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
LogFatalNodeType(child);
}
CordRepSubstring* rep = new CordRepSubstring();
rep->length = n;
rep->tag = SUBSTRING;
rep->start = pos;
rep->child = child;
return rep;
}
inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
size_t n) {
assert(rep != nullptr);
assert(n != 0);
assert(pos < rep->length);
assert(n <= rep->length - pos);
if (n == rep->length) return CordRep::Ref(rep);
if (rep->IsSubstring()) {
pos += rep->substring()->start;
rep = rep->substring()->child;
}
CordRepSubstring* substr = new CordRepSubstring();
substr->length = n;
substr->tag = SUBSTRING;
substr->start = pos;
substr->child = CordRep::Ref(rep);
return substr;
}
inline void CordRepExternal::Delete(CordRep* rep) {
assert(rep != nullptr && rep->IsExternal());
auto* rep_external = static_cast<CordRepExternal*>(rep);
assert(rep_external->releaser_invoker != nullptr);
rep_external->releaser_invoker(rep_external);
}
template <typename Str>
struct ConstInitExternalStorage {
ABSL_CONST_INIT static CordRepExternal value;
};
template <typename Str>
ABSL_CONST_INIT CordRepExternal
ConstInitExternalStorage<Str>::value(Str::value);
enum {
kMaxInline = 15,
};
constexpr char GetOrNull(absl::string_view data, size_t pos) {
return pos < data.size() ? data[pos] : '\0';
}
// We store cordz_info as 64 bit pointer value in little endian format. This
// guarantees that the least significant byte of cordz_info matches the first
// byte of the inline data representation in `data`, which holds the inlined
// size or the 'is_tree' bit.
using cordz_info_t = int64_t;
// Assert that the `cordz_info` pointer value perfectly overlaps the last half
// of `data` and can hold a pointer value.
static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
// LittleEndianByte() creates a little endian representation of 'value', i.e.:
// a little endian value where the first byte in the host's representation
// holds 'value`, with all other bytes being 0.
static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
#if defined(ABSL_IS_BIG_ENDIAN)
return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
#else
return value;
#endif
}
class InlineData {
public:
// DefaultInitType forces the use of the default initialization constructor.
enum DefaultInitType { kDefaultInit };
// kNullCordzInfo holds the little endian representation of intptr_t(1)
// This is the 'null' / initial value of 'cordz_info'. The null value
// is specifically big endian 1 as with 64-bit pointers, the last
// byte of cordz_info overlaps with the last byte holding the tag.
static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
// kTagOffset contains the offset of the control byte / tag. This constant is
// intended mostly for debugging purposes: do not remove this constant as it
// is actively inspected and used by gdb pretty printing code.
static constexpr size_t kTagOffset = 0;
// Implement `~InlineData()` conditionally: we only need this destructor to
// unpoison poisoned instances under *SAN, and it will only compile correctly
// if the current compiler supports `absl::is_constant_evaluated()`.
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
~InlineData() noexcept { unpoison(); }
#endif
constexpr InlineData() noexcept { poison_this(); }
explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
poison_this();
}
explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
ABSL_ASSERT(rep != nullptr);
}
// Explicit constexpr constructor to create a constexpr InlineData
// value. Creates an inlined SSO value if `rep` is null, otherwise
// creates a tree instance value.
constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
: rep_(rep ? Rep(rep) : Rep(sv)) {
poison();
}
constexpr InlineData(const InlineData& rhs) noexcept;
InlineData& operator=(const InlineData& rhs) noexcept;
friend void swap(InlineData& lhs, InlineData& rhs) noexcept;
friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
const Rep l = lhs.rep_.SanitizerSafeCopy();
const Rep r = rhs.rep_.SanitizerSafeCopy();
return memcmp(&l, &r, sizeof(l)) == 0;
#else
return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
#endif
}
friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
return !operator==(lhs, rhs);
}
// Poisons the unused inlined SSO data if the current instance
// is inlined, else un-poisons the entire instance.
constexpr void poison();
// Un-poisons this instance.
constexpr void unpoison();
// Poisons the current instance. This is used on default initialization.
constexpr void poison_this();
// Returns true if the current instance is empty.
// The 'empty value' is an inlined data value of zero length.
bool is_empty() const { return rep_.tag() == 0; }
// Returns true if the current instance holds a tree value.
bool is_tree() const { return (rep_.tag() & 1) != 0; }
// Returns true if the current instance holds a cordz_info value.
// Requires the current instance to hold a tree value.
bool is_profiled() const {
assert(is_tree());
return rep_.cordz_info() != kNullCordzInfo;
}
// Returns true if either of the provided instances hold a cordz_info value.
// This method is more efficient than the equivalent `data1.is_profiled() ||
// data2.is_profiled()`. Requires both arguments to hold a tree.
static bool is_either_profiled(const InlineData& data1,
const InlineData& data2) {
assert(data1.is_tree() && data2.is_tree());
return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
kNullCordzInfo;
}
// Returns the cordz_info sampling instance for this instance, or nullptr
// if the current instance is not sampled and does not have CordzInfo data.
// Requires the current instance to hold a tree value.
CordzInfo* cordz_info() const {
assert(is_tree());
intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
static_cast<uint64_t>(rep_.cordz_info())));
assert(info & 1);
return reinterpret_cast<CordzInfo*>(info - 1);
}
// Sets the current cordz_info sampling instance for this instance, or nullptr
// if the current instance is not sampled and does not have CordzInfo data.
// Requires the current instance to hold a tree value.
void set_cordz_info(CordzInfo* cordz_info) {
assert(is_tree());
uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
rep_.set_cordz_info(
static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
}
// Resets the current cordz_info to null / empty.
void clear_cordz_info() {
assert(is_tree());
rep_.set_cordz_info(kNullCordzInfo);
}
// Returns a read only pointer to the character data inside this instance.
// Requires the current instance to hold inline data.
const char* as_chars() const {
assert(!is_tree());
return rep_.as_chars();
}
// Returns a mutable pointer to the character data inside this instance.
// Should be used for 'write only' operations setting an inlined value.
// Applications can set the value of inlined data either before or after
// setting the inlined size, i.e., both of the below are valid:
//
// // Set inlined data and inline size
// memcpy(data_.as_chars(), data, size);
// data_.set_inline_size(size);
//
// // Set inlined size and inline data
// data_.set_inline_size(size);
// memcpy(data_.as_chars(), data, size);
//
// It's an error to read from the returned pointer without a preceding write
// if the current instance does not hold inline data, i.e.: is_tree() == true.
char* as_chars() { return rep_.as_chars(); }
// Returns the tree value of this value.
// Requires the current instance to hold a tree value.
CordRep* as_tree() const {
assert(is_tree());
return rep_.tree();
}
void set_inline_data(const char* data, size_t n) {
ABSL_ASSERT(n <= kMaxInline);
unpoison();
rep_.set_tag(static_cast<int8_t>(n << 1));
SmallMemmove<true>(rep_.as_chars(), data, n);
poison();
}
void CopyInlineToString(absl::Nonnull<std::string*> dst) const {
assert(!is_tree());
// As Cord can store only 15 bytes it is smaller than std::string's
// small string optimization buffer size. Therefore we will always trigger
// the fast assign short path.
//
// Copying with a size equal to the maximum allows more efficient, wider
// stores to be used and no branching.
dst->assign(rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
// After the copy we then change the size and put in a 0 byte.
dst->erase(inline_size());
}
void copy_max_inline_to(char* dst) const {
assert(!is_tree());
memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
}
// Initialize this instance to holding the tree value `rep`,
// initializing the cordz_info to null, i.e.: 'not profiled'.
void make_tree(CordRep* rep) {
unpoison();
rep_.make_tree(rep);
}
// Set the tree value of this instance to 'rep`.
// Requires the current instance to already hold a tree value.
// Does not affect the value of cordz_info.
void set_tree(CordRep* rep) {
assert(is_tree());
rep_.set_tree(rep);
}
// Returns the size of the inlined character data inside this instance.
// Requires the current instance to hold inline data.
size_t inline_size() const { return rep_.inline_size(); }
// Sets the size of the inlined character data inside this instance.
// Requires `size` to be <= kMaxInline.
// See the documentation on 'as_chars()' for more information and examples.
void set_inline_size(size_t size) {
unpoison();
rep_.set_inline_size(size);
poison();
}
// Compares 'this' inlined data with rhs. The comparison is a straightforward
// lexicographic comparison. `Compare()` returns values as follows:
//
// -1 'this' InlineData instance is smaller
// 0 the InlineData instances are equal
// 1 'this' InlineData instance larger
int Compare(const InlineData& rhs) const {
return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
}
private:
struct Rep {
// See cordz_info_t for forced alignment and size of `cordz_info` details.
struct AsTree {
explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
: rep(tree) {}
cordz_info_t cordz_info = kNullCordzInfo;
absl::cord_internal::CordRep* rep;
};
explicit Rep(DefaultInitType) {}
constexpr Rep() : data{0} {}
constexpr Rep(const Rep&) = default;
constexpr Rep& operator=(const Rep&) = default;
explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
explicit constexpr Rep(absl::string_view chars)
: data{static_cast<char>((chars.size() << 1)),
GetOrNull(chars, 0),
GetOrNull(chars, 1),
GetOrNull(chars, 2),
GetOrNull(chars, 3),
GetOrNull(chars, 4),
GetOrNull(chars, 5),
GetOrNull(chars, 6),
GetOrNull(chars, 7),
GetOrNull(chars, 8),
GetOrNull(chars, 9),
GetOrNull(chars, 10),
GetOrNull(chars, 11),
GetOrNull(chars, 12),
GetOrNull(chars, 13),
GetOrNull(chars, 14)} {}
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
// Break compiler optimization for cases when value is allocated on the
// stack. Compiler assumes that the the variable is fully accessible
// regardless of our poisoning.
// Missing report: https://github.com/llvm/llvm-project/issues/100640
const Rep* self() const {
const Rep* volatile ptr = this;
return ptr;
}
Rep* self() {
Rep* volatile ptr = this;
return ptr;
}
#else
constexpr const Rep* self() const { return this; }
constexpr Rep* self() { return this; }
#endif
// Disable sanitizer as we must always be able to read `tag`.
ABSL_CORD_INTERNAL_NO_SANITIZE
int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(self())[0] = rhs; }
char* as_chars() { return self()->data + 1; }
const char* as_chars() const { return self()->data + 1; }
bool is_tree() const { return (self()->tag() & 1) != 0; }
size_t inline_size() const {
ABSL_ASSERT(!self()->is_tree());
return static_cast<size_t>(self()->tag()) >> 1;
}
void set_inline_size(size_t size) {
ABSL_ASSERT(size <= kMaxInline);
self()->set_tag(static_cast<int8_t>(size << 1));
}
CordRep* tree() const { return self()->as_tree.rep; }
void set_tree(CordRep* rhs) { self()->as_tree.rep = rhs; }
cordz_info_t cordz_info() const { return self()->as_tree.cordz_info; }
void set_cordz_info(cordz_info_t rhs) { self()->as_tree.cordz_info = rhs; }
void make_tree(CordRep* tree) {
self()->as_tree.rep = tree;
self()->as_tree.cordz_info = kNullCordzInfo;
}
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
constexpr Rep SanitizerSafeCopy() const {
if (!absl::is_constant_evaluated()) {
Rep res;
if (is_tree()) {
res = *this;
} else {
res.set_tag(tag());
memcpy(res.as_chars(), as_chars(), inline_size());
}
return res;
} else {
return *this;
}
}
#else
constexpr const Rep& SanitizerSafeCopy() const { return *this; }
#endif
// If the data has length <= kMaxInline, we store it in `data`, and
// store the size in the first char of `data` shifted left + 1.
// Else we store it in a tree and store a pointer to that tree in
// `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
union {
char data[kMaxInline + 1];
AsTree as_tree;
};
// TODO(b/145829486): see swap(InlineData, InlineData) for more info.
inline void SwapValue(Rep rhs, Rep& refrhs) {
memcpy(&refrhs, this, sizeof(*this));
memcpy(this, &rhs, sizeof(*this));
}
};
// Private implementation of `Compare()`
static inline int Compare(const Rep& lhs, const Rep& rhs) {
uint64_t x, y;
memcpy(&x, lhs.as_chars(), sizeof(x));
memcpy(&y, rhs.as_chars(), sizeof(y));
if (x == y) {
memcpy(&x, lhs.as_chars() + 7, sizeof(x));
memcpy(&y, rhs.as_chars() + 7, sizeof(y));
if (x == y) {
if (lhs.inline_size() == rhs.inline_size()) return 0;
return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
}
}
x = absl::big_endian::FromHost64(x);
y = absl::big_endian::FromHost64(y);
return x < y ? -1 : 1;
}
Rep rep_;
};
static_assert(sizeof(InlineData) == kMaxInline + 1, "");
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
constexpr InlineData::InlineData(const InlineData& rhs) noexcept
: rep_(rhs.rep_.SanitizerSafeCopy()) {
poison();
}
inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
unpoison();
rep_ = rhs.rep_.SanitizerSafeCopy();
poison();
return *this;
}
constexpr void InlineData::poison_this() {
if (!absl::is_constant_evaluated()) {
container_internal::SanitizerPoisonObject(this);
}
}
constexpr void InlineData::unpoison() {
if (!absl::is_constant_evaluated()) {
container_internal::SanitizerUnpoisonObject(this);
}
}
constexpr void InlineData::poison() {
if (!absl::is_constant_evaluated()) {
if (is_tree()) {
container_internal::SanitizerUnpoisonObject(this);
} else if (const size_t size = inline_size()) {
if (size < kMaxInline) {
const char* end = rep_.as_chars() + size;
container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
}
} else {
container_internal::SanitizerPoisonObject(this);
}
}
}
#else // ABSL_INTERNAL_CORD_HAVE_SANITIZER
constexpr InlineData::InlineData(const InlineData&) noexcept = default;
inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
constexpr void InlineData::poison_this() {}
constexpr void InlineData::unpoison() {}
constexpr void InlineData::poison() {}
#endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER
inline CordRepSubstring* CordRep::substring() {
assert(IsSubstring());
return static_cast<CordRepSubstring*>(this);
}
inline const CordRepSubstring* CordRep::substring() const {
assert(IsSubstring());
return static_cast<const CordRepSubstring*>(this);
}
inline CordRepExternal* CordRep::external() {
assert(IsExternal());
return static_cast<CordRepExternal*>(this);
}
inline const CordRepExternal* CordRep::external() const {
assert(IsExternal());
return static_cast<const CordRepExternal*>(this);
}
inline CordRep* CordRep::Ref(CordRep* rep) {
// ABSL_ASSUME is a workaround for
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
ABSL_ASSUME(rep != nullptr);
rep->refcount.Increment();
return rep;
}
inline void CordRep::Unref(CordRep* rep) {
assert(rep != nullptr);
// Expect refcount to be 0. Avoiding the cost of an atomic decrement should
// typically outweigh the cost of an extra branch checking for ref == 1.
if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
Destroy(rep);
}
}
inline void swap(InlineData& lhs, InlineData& rhs) noexcept {
lhs.unpoison();
rhs.unpoison();
// TODO(b/145829486): `std::swap(lhs.rep_, rhs.rep_)` results in bad codegen
// on clang, spilling the temporary swap value on the stack. Since `Rep` is
// trivial, we can make clang DTRT by calling a hand-rolled `SwapValue` where
// we pass `rhs` both by value (register allocated) and by reference. The IR
// then folds and inlines correctly into an optimized swap without spill.
lhs.rep_.SwapValue(rhs.rep_, rhs.rep_);
rhs.poison();
lhs.poison();
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,944 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_
#include <cassert>
#include <cstdint>
#include <iosfwd>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/optimization.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// `SetCordBtreeExhaustiveValidation()` can be set to force exhaustive
// validation in debug assertions, and code that calls `IsValid()`
// explicitly. By default, assertions should be relatively cheap and
// AssertValid() can easily lead to O(n^2) complexity as recursive / full tree
// validation is O(n).
void SetCordBtreeExhaustiveValidation(bool do_exaustive_validation);
bool IsCordBtreeExhaustiveValidationEnabled();
class CordRepBtreeNavigator;
// CordRepBtree is as the name implies a btree implementation of a Cordrep tree.
// Data is stored at the leaf level only, non leaf nodes contain down pointers
// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT
// or EXTERNAL nodes. The implementation allows for data to be added to either
// end of the tree only, it does not provide any 'insert' logic. This has the
// benefit that we can expect good fill ratios: all nodes except the outer
// 'legs' will have 100% fill ratios for trees built using Append/Prepend
// methods. Merged trees will typically have a fill ratio well above 50% as in a
// similar fashion, one side of the merged tree will typically have a 100% fill
// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or
// better, and the tree never needs balancing.
//
// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that
// input unless explicitly stated otherwise. All functions returning a CordRep*
// or CordRepBtree* instance transfer a reference back to the caller.
// Simplified, callers both 'donate' and 'consume' a reference count on each
// call, simplifying the API. An example of building a tree:
//
// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello"));
// tree = CordRepBtree::Append(tree, MakeFlat("world"));
//
// In the above example, all inputs are consumed, making each call affecting
// `tree` reference count neutral. The returned `tree` value can be different
// from the input if the input is shared with other threads, or if the tree
// grows in height, but callers typically never have to concern themselves with
// that and trust that all methods DTRT at all times.
class CordRepBtree : public CordRep {
public:
// EdgeType identifies `front` and `back` enum values.
// Various implementations in CordRepBtree such as `Add` and `Edge` are
// generic and templated on operating on either of the boundary edges.
// For more information on the possible edges contained in a CordRepBtree
// instance see the documentation for `edges_`.
enum class EdgeType { kFront, kBack };
// Convenience constants into `EdgeType`
static constexpr EdgeType kFront = EdgeType::kFront;
static constexpr EdgeType kBack = EdgeType::kBack;
// Maximum number of edges: based on experiments and performance data, we can
// pick suitable values resulting in optimum cacheline aligned values. The
// preferred values are based on 64-bit systems where we aim to align this
// class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc.
// TODO(b/192061034): experiment with alternative sizes.
static constexpr size_t kMaxCapacity = 6;
// Reasonable maximum height of the btree. We can expect a fill ratio of at
// least 50%: trees are always expanded at the front or back. Concatenating
// trees will then typically fold at the top most node, where the lower nodes
// are at least at capacity on one side of joined inputs. At a lower fill
// rate of 4 edges per node, we have capacity for ~16 million leaf nodes.
// We will fail / abort if an application ever exceeds this height, which
// should be extremely rare (near impossible) and be an indication of an
// application error: we do not assume it reasonable for any application to
// operate correctly with such monster trees.
// Another compelling reason for the number `12` is that any contextual stack
// required for navigation or insertion requires 12 words and 12 bytes, which
// fits inside 2 cache lines with some room to spare, and is reasonable as a
// local stack variable compared to Cord's current near 400 bytes stack use.
// The maximum `height` value of a node is then `kMaxDepth - 1` as node height
// values start with a value of 0 for leaf nodes.
static constexpr size_t kMaxDepth = 12;
// See comments on height() for why this is an int and not a size_t.
static constexpr int kMaxHeight = static_cast<int>(kMaxDepth - 1);
// `Action` defines the action for unwinding changes done at the btree's leaf
// level that need to be propagated up to the parent node(s). Each operation
// on a node has an effect / action defined as follows:
// - kSelf
// The operation (add / update, etc) was performed directly on the node as
// the node is private to the current thread (i.e.: not shared directly or
// indirectly through a refcount > 1). Changes can be propagated directly to
// all parent nodes as all parent nodes are also then private to the current
// thread.
// - kCopied
// The operation (add / update, etc) was performed on a copy of the original
// node, as the node is (potentially) directly or indirectly shared with
// other threads. Changes need to be propagated into the parent nodes where
// the old down pointer must be unreffed and replaced with this new copy.
// Such changes to parent nodes may themselves require a copy if the parent
// node is also shared. A kCopied action can propagate all the way to the
// top node where we then must unref the `tree` input provided by the
// caller, and return the new copy.
// - kPopped
// The operation (typically add) could not be satisfied due to insufficient
// capacity in the targeted node, and a new 'leg' was created that needs to
// be added into the parent node. For example, adding a FLAT inside a leaf
// node that is at capacity will create a new leaf node containing that
// FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can
// cascade up the tree if parent nodes are also at capacity. A 'Popped'
// action propagating all the way to the top of the tree will result in
// the tree becoming one level higher than the current tree through a final
// `CordRepBtree::New(tree, popped)` call, resulting in a new top node
// referencing the old tree and the new (fully popped upwards) 'leg'.
enum Action { kSelf, kCopied, kPopped };
// Result of an operation on a node. See the `Action` enum for details.
struct OpResult {
CordRepBtree* tree;
Action action;
};
// Return value of the CopyPrefix and CopySuffix methods which can
// return a node or data edge at any height inside the tree.
// A height of 0 defines the lowest (leaf) node, a height of -1 identifies
// `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof.
struct CopyResult {
CordRep* edge;
int height;
};
// Logical position inside a node:
// - index: index of the edge.
// - n: size or offset value depending on context.
struct Position {
size_t index;
size_t n;
};
// Creates a btree from the given input. Adopts a ref of `rep`.
// If the input `rep` is itself a btree, i.e., `IsBtree()`, then this
// function immediately returns `rep->btree()`. If the input is a valid data
// edge (see IsDataEdge()), then a new leaf node is returned containing `rep`
// as the sole data edge. Else, the input is assumed to be a (legacy) concat
// tree, and the input is consumed and transformed into a btree().
static CordRepBtree* Create(CordRep* rep);
// Destroys the provided tree. Should only be called by cord internal API's,
// typically after a ref_count.Decrement() on the last reference count.
static void Destroy(CordRepBtree* tree);
// Destruction
static void Delete(CordRepBtree* tree) { delete tree; }
// Use CordRep::Unref() as we overload for absl::Span<CordRep* const>.
using CordRep::Unref;
// Unrefs all edges in `edges` which are assumed to be 'likely one'.
static void Unref(absl::Span<CordRep* const> edges);
// Appends / Prepends an existing CordRep instance to this tree.
// The below methods accept three types of input:
// 1) `rep` is a data node (See `IsDataNode` for valid data edges).
// `rep` is appended or prepended to this tree 'as is'.
// 2) `rep` is a BTREE.
// `rep` is merged into `tree` respecting the Append/Prepend order.
// 3) `rep` is some other (legacy) type.
// `rep` is converted in place and added to `tree`
// Requires `tree` and `rep` to be not null.
static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep);
static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep);
// Append/Prepend the data in `data` to this tree.
// The `extra` parameter defines how much extra capacity should be allocated
// for any additional FLAT being allocated. This is an optimization hint from
// the caller. For example, a caller may need to add 2 string_views of data
// "abc" and "defghi" which are not consecutive. The caller can in this case
// invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated
// where possible with at least 6 bytes of extra capacity beyond `length`.
// This helps avoiding data getting fragmented over multiple flats.
// There is no limit on the size of `data`. If `data` can not be stored inside
// a single flat, then the function will iteratively add flats until all data
// has been consumed and appended or prepended to the tree.
static CordRepBtree* Append(CordRepBtree* tree, string_view data,
size_t extra = 0);
static CordRepBtree* Prepend(CordRepBtree* tree, string_view data,
size_t extra = 0);
// Returns a new tree, containing `n` bytes of data from this instance
// starting at offset `offset`. Where possible, the returned tree shares
// (re-uses) data edges and nodes with this instance to minimize the
// combined memory footprint of both trees.
// Requires `offset + n <= length`. Returns `nullptr` if `n` is zero.
CordRep* SubTree(size_t offset, size_t n);
// Removes `n` trailing bytes from `tree`, and returns the resulting tree
// or data edge. Returns `tree` if n is zero, and nullptr if n == length.
// This function is logically identical to:
// result = tree->SubTree(0, tree->length - n);
// Unref(tree);
// return result;
// However, the actual implementation will as much as possible perform 'in
// place' modifications on the tree on all nodes and edges that are mutable.
// For example, in a fully privately owned tree with the last edge being a
// flat of length 12, RemoveSuffix(1) will simply set the length of that data
// edge to 11, and reduce the length of all nodes on the edge path by 1.
static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n);
// Returns the character at the given offset.
char GetCharacter(size_t offset) const;
// Returns true if this node holds a single data edge, and if so, sets
// `fragment` to reference the contained data. `fragment` is an optional
// output parameter and allowed to be null.
bool IsFlat(absl::string_view* fragment) const;
// Returns true if the data of `n` bytes starting at offset `offset`
// is contained in a single data edge, and if so, sets fragment to reference
// the contained data. `fragment` is an optional output parameter and allowed
// to be null.
bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const;
// Returns a span (mutable range of bytes) of up to `size` bytes into the
// last FLAT data edge inside this tree under the following conditions:
// - none of the nodes down into the FLAT node are shared.
// - the last data edge in this tree is a non-shared FLAT.
// - the referenced FLAT has additional capacity available.
// If all these conditions are met, a non-empty span is returned, and the
// length of the flat node and involved tree nodes have been increased by
// `span.length()`. The caller is responsible for immediately assigning values
// to all uninitialized data reference by the returned span.
// Requires `this->refcount.IsOne()`: this function forces the caller to do
// this fast path check on the top level node, as this is the most commonly
// shared node of a cord tree.
Span<char> GetAppendBuffer(size_t size);
// Extracts the right-most data edge from this tree iff:
// - the tree and all internal edges to the right-most node are not shared.
// - the right-most node is a FLAT node and not shared.
// - the right-most node has at least the desired extra capacity.
//
// Returns {tree, nullptr} if any of the above conditions are not met.
// This method effectively removes data from the tree. The intent of this
// method is to allow applications appending small string data to use
// pre-existing capacity, and add the modified rep back to the tree.
//
// Simplified such code would look similar to this:
// void MyTreeBuilder::Append(string_view data) {
// ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1);
// if (CordRep* rep = result.extracted) {
// size_t available = rep->Capacity() - rep->length;
// size_t n = std::min(data.size(), n);
// memcpy(rep->Data(), data.data(), n);
// rep->length += n;
// data.remove_prefix(n);
// if (!result.tree->IsBtree()) {
// tree_ = CordRepBtree::Create(result.tree);
// }
// tree_ = CordRepBtree::Append(tree_, rep);
// }
// ...
// // Remaining edge in `result.tree`.
// }
static ExtractResult ExtractAppendBuffer(CordRepBtree* tree,
size_t extra_capacity = 1);
// Returns the `height` of the tree. The height of a tree is limited to
// kMaxHeight. `height` is implemented as an `int` as in some places we
// use negative (-1) values for 'data edges'.
int height() const { return static_cast<int>(storage[0]); }
// Properties: begin, back, end, front/back boundary indexes.
size_t begin() const { return static_cast<size_t>(storage[1]); }
size_t back() const { return static_cast<size_t>(storage[2]) - 1; }
size_t end() const { return static_cast<size_t>(storage[2]); }
size_t index(EdgeType edge) const {
return edge == kFront ? begin() : back();
}
// Properties: size and capacity.
// `capacity` contains the current capacity of this instance, where
// `kMaxCapacity` contains the maximum capacity of a btree node.
// For now, `capacity` and `kMaxCapacity` return the same value, but this may
// change in the future if we see benefit in dynamically sizing 'small' nodes
// to 'large' nodes for large data trees.
size_t size() const { return end() - begin(); }
size_t capacity() const { return kMaxCapacity; }
// Edge access
inline CordRep* Edge(size_t index) const;
inline CordRep* Edge(EdgeType edge_type) const;
inline absl::Span<CordRep* const> Edges() const;
inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const;
// Returns reference to the data edge at `index`.
// Requires this instance to be a leaf node, and `index` to be valid index.
inline absl::string_view Data(size_t index) const;
// Diagnostics: returns true if `tree` is valid and internally consistent.
// If `shallow` is false, then the provided top level node and all child nodes
// below it are recursively checked. If `shallow` is true, only the provided
// node in `tree` and the cumulative length, type and height of the direct
// child nodes of `tree` are checked. The value of `shallow` is ignored if the
// internal `cord_btree_exhaustive_validation` diagnostics variable is true,
// in which case the performed validations works as if `shallow` were false.
// This function is intended for debugging and testing purposes only.
static bool IsValid(const CordRepBtree* tree, bool shallow = false);
// Diagnostics: asserts that the provided tree is valid.
// `AssertValid()` performs a shallow validation by default. `shallow` can be
// set to false in which case an exhaustive validation is performed. This
// function is implemented in terms of calling `IsValid()` and asserting the
// return value to be true. See `IsValid()` for more information.
// This function is intended for debugging and testing purposes only.
static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true);
static const CordRepBtree* AssertValid(const CordRepBtree* tree,
bool shallow = true);
// Diagnostics: dump the contents of this tree to `stream`.
// This function is intended for debugging and testing purposes only.
static void Dump(const CordRep* rep, std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
std::ostream& stream);
static void Dump(const CordRep* rep, absl::string_view label,
bool include_contents, std::ostream& stream);
// Adds the edge `edge` to this node if possible. `owned` indicates if the
// current node is potentially shared or not with other threads. Returns:
// - {kSelf, <this>}
// The edge was directly added to this node.
// - {kCopied, <node>}
// The edge was added to a copy of this node.
// - {kPopped, New(edge, height())}
// A new leg with the edge was created as this node has no extra capacity.
template <EdgeType edge_type>
inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta);
// Replaces the front or back edge with the provided new edge. Returns:
// - {kSelf, <this>}
// The edge was directly set in this node. The old edge is unreffed.
// - {kCopied, <node>}
// A copy of this node was created with the new edge value.
// In both cases, the function adopts a reference on `edge`.
template <EdgeType edge_type>
OpResult SetEdge(bool owned, CordRep* edge, size_t delta);
// Creates a new empty node at the specified height.
static CordRepBtree* New(int height = 0);
// Creates a new node containing `rep`, with the height being computed
// automatically based on the type of `rep`.
static CordRepBtree* New(CordRep* rep);
// Creates a new node containing both `front` and `back` at height
// `front.height() + 1`. Requires `back.height() == front.height()`.
static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back);
// Creates a fully balanced tree from the provided tree by rebuilding a new
// tree from all data edges in the input. This function is automatically
// invoked internally when the tree exceeds the maximum height.
static CordRepBtree* Rebuild(CordRepBtree* tree);
private:
CordRepBtree() = default;
~CordRepBtree() = default;
// Initializes the main properties `tag`, `begin`, `end`, `height`.
inline void InitInstance(int height, size_t begin = 0, size_t end = 0);
// Direct property access begin / end
void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); }
void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); }
// Decreases the value of `begin` by `n`, and returns the new value. Notice
// how this returns the new value unlike atomic::fetch_add which returns the
// old value. This is because this is used to prepend edges at 'begin - 1'.
size_t sub_fetch_begin(size_t n) {
storage[1] -= static_cast<uint8_t>(n);
return storage[1];
}
// Increases the value of `end` by `n`, and returns the previous value. This
// function is typically used to append edges at 'end'.
size_t fetch_add_end(size_t n) {
const uint8_t current = storage[2];
storage[2] = static_cast<uint8_t>(current + n);
return current;
}
// Returns the index of the last edge starting on, or before `offset`, with
// `n` containing the relative offset of `offset` inside that edge.
// Requires `offset` < length.
Position IndexOf(size_t offset) const;
// Returns the index of the last edge starting before `offset`, with `n`
// containing the relative offset of `offset` inside that edge.
// This function is useful to find the edges for some span of bytes ending at
// `offset` (i.e., `n` bytes). For example:
//
// Position pos = IndexBefore(n)
// edges = Edges(begin(), pos.index) // All full edges (may be empty)
// last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty)
//
// Requires 0 < `offset` <= length.
Position IndexBefore(size_t offset) const;
// Returns the index of the edge ending at (or on) length `length`, and the
// number of bytes inside that edge up to `length`. For example, if we have a
// Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27)
// will return {1, 17}, and IndexOfLength(10) will return {0, 10}.
Position IndexOfLength(size_t n) const;
// Identical to the above function except starting from the position `front`.
// This function is equivalent to `IndexBefore(front.n + offset)`, with
// the difference that this function is optimized to start at `front.index`.
Position IndexBefore(Position front, size_t offset) const;
// Returns the index of the edge directly beyond the edge containing offset
// `offset`, with `n` containing the distance of that edge from `offset`.
// This function is useful for iteratively finding suffix nodes and remaining
// partial bytes in left-most suffix nodes as for example in CopySuffix.
// Requires `offset` < length.
Position IndexBeyond(size_t offset) const;
// Creates a new leaf node containing as much data as possible from `data`.
// The data is added either forwards or reversed depending on `edge_type`.
// Callers must check the length of the returned node to determine if all data
// was copied or not.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
// Creates a raw copy of this Btree node with the specified length, copying
// all properties, but without adding any references to existing edges.
CordRepBtree* CopyRaw(size_t new_length) const;
// Creates a full copy of this Btree node, adding a reference on all edges.
CordRepBtree* Copy() const;
// Creates a partial copy of this Btree node, copying all edges up to `end`,
// adding a reference on each copied edge, and sets the length of the newly
// created copy to `new_length`.
CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const;
// Returns a tree containing the edges [tree->begin(), end) and length
// of `new_length`. This method consumes a reference on the provided
// tree, and logically performs the following operation:
// result = tree->CopyBeginTo(end, new_length);
// CordRep::Unref(tree);
// return result;
static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end,
size_t new_length);
// Creates a partial copy of this Btree node, copying all edges starting at
// `begin`, adding a reference on each copied edge, and sets the length of
// the newly created copy to `new_length`.
CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const;
// Extracts and returns the front edge from the provided tree.
// This method consumes a reference on the provided tree, and logically
// performs the following operation:
// edge = CordRep::Ref(tree->Edge(kFront));
// CordRep::Unref(tree);
// return edge;
static CordRep* ExtractFront(CordRepBtree* tree);
// Returns a tree containing the result of appending `right` to `left`.
static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right);
// Fallback functions for `Create()`, `Append()` and `Prepend()` which
// deal with legacy / non conforming input, i.e.: CONCAT trees.
static CordRepBtree* CreateSlow(CordRep* rep);
static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep);
static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep);
// Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the
// function will consume a reference on `tree`. `stack` is a null terminated
// array containing the new tree's state, with the current leaf node at
// stack[0], and parent nodes above that, or null for 'top of tree'.
static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume);
// Aligns existing edges to start at index 0, to allow for a new edge to be
// added to the back of the current edges.
inline void AlignBegin();
// Aligns existing edges to end at `capacity`, to allow for a new edge to be
// added in front of the current edges.
inline void AlignEnd();
// Adds the provided edge to this node.
// Requires this node to have capacity for the edge. Realigns / moves
// existing edges as needed to prepend or append the new edge.
template <EdgeType edge_type>
inline void Add(CordRep* rep);
// Adds the provided edges to this node.
// Requires this node to have capacity for the edges. Realigns / moves
// existing edges as needed to prepend or append the new edges.
template <EdgeType edge_type>
inline void Add(absl::Span<CordRep* const>);
// Adds data from `data` to this node until either all data has been consumed,
// or there is no more capacity for additional flat nodes inside this node.
// Requires the current node to be a leaf node, data to be non empty, and the
// current node to have capacity for at least one more data edge.
// Returns any remaining data from `data` that was not added, which is
// depending on the edge type (front / back) either the remaining prefix of
// suffix of the input.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
absl::string_view AddData(absl::string_view data, size_t extra);
// Replace the front or back edge with the provided value.
// Adopts a reference on `edge` and unrefs the old edge.
template <EdgeType edge_type>
inline void SetEdge(CordRep* edge);
// Returns a partial copy of the current tree containing the first `n` bytes
// of data. `CopyResult` contains both the resulting edge and its height. The
// resulting tree may be less high than the current tree, or even be a single
// matching data edge if `allow_folding` is set to true.
// For example, if `n == 1`, then the result will be the single data edge, and
// height will be set to -1 (one below the owning leaf node). If n == 0, this
// function returns null. Requires `n <= length`
CopyResult CopyPrefix(size_t n, bool allow_folding = true);
// Returns a partial copy of the current tree containing all data starting
// after `offset`. `CopyResult` contains both the resulting edge and its
// height. The resulting tree may be less high than the current tree, or even
// be a single matching data edge. For example, if `n == length - 1`, then the
// result will be a single data edge, and height will be set to -1 (one below
// the owning leaf node).
// Requires `offset < length`
CopyResult CopySuffix(size_t offset);
// Returns a OpResult value of {this, kSelf} or {Copy(), kCopied}
// depending on the value of `owned`.
inline OpResult ToOpResult(bool owned);
// Adds `rep` to the specified tree, returning the modified tree.
template <EdgeType edge_type>
static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep);
// Adds `data` to the specified tree, returning the modified tree.
// See the `Append/Prepend` function for the meaning and purpose of `extra`.
template <EdgeType edge_type>
static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data,
size_t extra = 0);
// Merges `src` into `dst` with `src` being added either before (kFront) or
// after (kBack) `dst`. Requires the height of `dst` to be greater than or
// equal to the height of `src`.
template <EdgeType edge_type>
static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src);
// Fallback version of GetAppendBuffer for large trees: GetAppendBuffer()
// implements an inlined version for trees of limited height (3 levels),
// GetAppendBufferSlow implements the logic for large trees.
Span<char> GetAppendBufferSlow(size_t size);
// `edges_` contains all edges starting from this instance.
// These are explicitly `child` edges only, a cord btree (or any cord tree in
// that respect) does not store `parent` pointers anywhere: multiple trees /
// parents can reference the same shared child edge. The type of these edges
// depends on the height of the node. `Leaf nodes` (height == 0) contain `data
// edges` (external or flat nodes, or sub-strings thereof). All other nodes
// (height > 0) contain pointers to BTREE nodes with a height of `height - 1`.
CordRep* edges_[kMaxCapacity];
friend class CordRepBtreeTestPeer;
friend class CordRepBtreeNavigator;
};
inline CordRepBtree* CordRep::btree() {
assert(IsBtree());
return static_cast<CordRepBtree*>(this);
}
inline const CordRepBtree* CordRep::btree() const {
assert(IsBtree());
return static_cast<const CordRepBtree*>(this);
}
inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) {
tag = BTREE;
storage[0] = static_cast<uint8_t>(height);
storage[1] = static_cast<uint8_t>(begin);
storage[2] = static_cast<uint8_t>(end);
}
inline CordRep* CordRepBtree::Edge(size_t index) const {
assert(index >= begin());
assert(index < end());
return edges_[index];
}
inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const {
return edges_[edge_type == kFront ? begin() : back()];
}
inline absl::Span<CordRep* const> CordRepBtree::Edges() const {
return {edges_ + begin(), size()};
}
inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin,
size_t end) const {
assert(begin <= end);
assert(begin >= this->begin());
assert(end <= this->end());
return {edges_ + begin, static_cast<size_t>(end - begin)};
}
inline absl::string_view CordRepBtree::Data(size_t index) const {
assert(height() == 0);
return EdgeData(Edge(index));
}
inline CordRepBtree* CordRepBtree::New(int height) {
CordRepBtree* tree = new CordRepBtree;
tree->length = 0;
tree->InitInstance(height);
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRep* rep) {
CordRepBtree* tree = new CordRepBtree;
int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0;
tree->length = rep->length;
tree->InitInstance(height, /*begin=*/0, /*end=*/1);
tree->edges_[0] = rep;
return tree;
}
inline CordRepBtree* CordRepBtree::New(CordRepBtree* front,
CordRepBtree* back) {
assert(front->height() == back->height());
CordRepBtree* tree = new CordRepBtree;
tree->length = front->length + back->length;
tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2);
tree->edges_[0] = front;
tree->edges_[1] = back;
return tree;
}
inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
for (CordRep* edge : edges) {
if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) {
CordRep::Destroy(edge);
}
}
}
inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const {
CordRepBtree* tree = new CordRepBtree;
// `length` and `refcount` are the first members of `CordRepBtree`.
// We initialize `length` using the given length, have `refcount` be set to
// ref = 1 through its default constructor, and copy all data beyond
// 'refcount' which starts with `tag` using a single memcpy: all contents
// except `refcount` is trivially copyable, and the compiler does not
// efficiently coalesce member-wise copy of these members.
// See https://gcc.godbolt.org/z/qY8zsca6z
// LINT.IfChange(copy_raw)
tree->length = new_length;
uint8_t* dst = &tree->tag;
const uint8_t* src = &tag;
const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this);
memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset));
return tree;
// LINT.ThenChange()
}
inline CordRepBtree* CordRepBtree::Copy() const {
CordRepBtree* tree = CopyRaw(length);
for (CordRep* rep : Edges()) CordRep::Ref(rep);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
size_t new_length) const {
assert(begin >= this->begin());
assert(begin <= this->end());
CordRepBtree* tree = CopyRaw(new_length);
tree->set_begin(begin);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
size_t new_length) const {
assert(end <= capacity());
assert(end >= this->begin());
CordRepBtree* tree = CopyRaw(new_length);
tree->set_end(end);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
}
inline void CordRepBtree::AlignBegin() {
// The below code itself does not need to be fast as typically we have
// mono-directional append/prepend calls, and `begin` / `end` are typically
// adjusted no more than once. But we want to avoid potential register clobber
// effects, making the compiler emit register save/store/spills, and minimize
// the size of code.
const size_t delta = begin();
if (ABSL_PREDICT_FALSE(delta != 0)) {
const size_t new_end = end() - delta;
set_begin(0);
set_end(new_end);
// TODO(mvels): we can write this using 2 loads / 2 stores depending on
// total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on
// size, and then do overlapping load/store of up to 4 pointers (inlined as
// XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a)
// compact and b) not clobbering any registers.
ABSL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = 0; i < new_end; ++i) {
edges_[i] = edges_[i + delta];
}
}
}
inline void CordRepBtree::AlignEnd() {
// See comments in `AlignBegin` for motivation on the hand-rolled for loops.
const size_t delta = capacity() - end();
if (delta != 0) {
const size_t new_begin = begin() + delta;
const size_t new_end = end() + delta;
set_begin(new_begin);
set_end(new_end);
ABSL_ASSUME(new_end <= kMaxCapacity);
#ifdef __clang__
#pragma unroll 1
#endif
for (size_t i = new_end - 1; i >= new_begin; --i) {
edges_[i] = edges_[i - delta];
}
}
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) {
AlignBegin();
edges_[fetch_add_end(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kBack>(
absl::Span<CordRep* const> edges) {
AlignBegin();
size_t new_end = end();
for (CordRep* edge : edges) edges_[new_end++] = edge;
set_end(new_end);
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) {
AlignEnd();
edges_[sub_fetch_begin(1)] = rep;
}
template <>
inline void CordRepBtree::Add<CordRepBtree::kFront>(
absl::Span<CordRep* const> edges) {
AlignEnd();
size_t new_begin = begin() - edges.size();
set_begin(new_begin);
for (CordRep* edge : edges) edges_[new_begin++] = edge;
}
template <CordRepBtree::EdgeType edge_type>
inline void CordRepBtree::SetEdge(CordRep* edge) {
const int idx = edge_type == kFront ? begin() : back();
CordRep::Unref(edges_[idx]);
edges_[idx] = edge;
}
inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) {
return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied};
}
inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const {
assert(offset < length);
size_t index = begin();
while (offset >= edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const {
assert(offset > 0);
assert(offset <= length);
size_t index = begin();
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front,
size_t offset) const {
size_t index = front.index;
offset = offset + front.n;
while (offset > edges_[index]->length) offset -= edges_[index++]->length;
return {index, offset};
}
inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const {
assert(n <= length);
size_t index = back();
size_t strip = length - n;
while (strip >= edges_[index]->length) strip -= edges_[index--]->length;
return {index, edges_[index]->length - strip};
}
inline CordRepBtree::Position CordRepBtree::IndexBeyond(
const size_t offset) const {
// We need to find the edge which `starting offset` is beyond (>=)`offset`.
// For this we can't use the `offset -= length` logic of IndexOf. Instead, we
// track the offset of the `current edge` in `off`, which we increase as we
// iterate over the edges until we find the matching edge.
size_t off = 0;
size_t index = begin();
while (offset > off) off += edges_[index++]->length;
return {index, off - offset};
}
inline CordRepBtree* CordRepBtree::Create(CordRep* rep) {
if (IsDataEdge(rep)) return New(rep);
return CreateSlow(rep);
}
inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) {
assert(refcount.IsOne());
CordRepBtree* tree = this;
const int height = this->height();
CordRepBtree* n1 = tree;
CordRepBtree* n2 = tree;
CordRepBtree* n3 = tree;
switch (height) {
case 3:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n2 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 2:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
n1 = tree;
ABSL_FALLTHROUGH_INTENDED;
case 1:
tree = tree->Edge(kBack)->btree();
if (!tree->refcount.IsOne()) return {};
ABSL_FALLTHROUGH_INTENDED;
case 0:
CordRep* edge = tree->Edge(kBack);
if (!edge->refcount.IsOne()) return {};
if (edge->tag < FLAT) return {};
size_t avail = edge->flat()->Capacity() - edge->length;
if (avail == 0) return {};
size_t delta = (std::min)(size, avail);
Span<char> span = {edge->flat()->Data() + edge->length, delta};
edge->length += delta;
switch (height) {
case 3:
n3->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 2:
n2->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 1:
n1->length += delta;
ABSL_FALLTHROUGH_INTENDED;
case 0:
tree->length += delta;
return span;
}
break;
}
return GetAppendBufferSlow(size);
}
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>(
CordRepBtree* tree, CordRep* rep);
extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>(
CordRepBtree* tree, CordRep* rep);
inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kBack>(tree, rep);
}
return AppendSlow(tree, rep);
}
inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) {
if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) {
return CordRepBtree::AddCordRep<kFront>(tree, rep);
}
return PrependSlow(tree, rep);
}
#ifdef NDEBUG
inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree,
bool /* shallow */) {
return tree;
}
inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree,
bool /* shallow */) {
return tree;
}
#endif
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_

View file

@ -0,0 +1,187 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include <cassert>
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
using ReadResult = CordRepBtreeNavigator::ReadResult;
namespace {
// Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`.
// If `rep` is already a `CordRepSubstring` instance, an adjusted instance is
// created based on the old offset and new offset.
// Adopts a reference on `rep`. Rep must be a valid data edge. Returns
// nullptr if `n == 0`, `rep` if `n == rep->length`.
// Requires `offset < rep->length` and `offset + n <= rep->length`.
// TODO(192061034): move to utility library in internal and optimize for small
// substrings of larger reps.
inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) {
assert(n <= rep->length);
assert(offset < rep->length);
assert(offset <= rep->length - n);
assert(IsDataEdge(rep));
if (n == 0) return nullptr;
if (n == rep->length) return CordRep::Ref(rep);
if (rep->tag == SUBSTRING) {
offset += rep->substring()->start;
rep = rep->substring()->child;
}
assert(rep->IsExternal() || rep->IsFlat());
CordRepSubstring* substring = new CordRepSubstring();
substring->length = n;
substring->tag = SUBSTRING;
substring->start = offset;
substring->child = CordRep::Ref(rep);
return substring;
}
inline CordRep* Substring(CordRep* rep, size_t offset) {
return Substring(rep, offset, rep->length - offset);
}
} // namespace
CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) {
int height = 0;
size_t index = index_[0];
CordRepBtree* node = node_[0];
CordRep* edge = node->Edge(index);
// Overall logic: Find an edge of at least the length we need to skip.
// We consume all edges which are smaller (i.e., must be 100% skipped).
// If we exhausted all edges on the current level, we move one level
// up the tree, and repeat until we either find the edge, or until we hit
// the top of the tree meaning the skip exceeds tree->length.
while (n >= edge->length) {
n -= edge->length;
while (++index == node->end()) {
if (++height > height_) return {nullptr, n};
node = node_[height];
index = index_[height];
}
edge = node->Edge(index);
}
// If we moved up the tree, descend down to the leaf level, consuming all
// edges that must be skipped.
while (height > 0) {
node = edge->btree();
index_[height] = static_cast<uint8_t>(index);
node_[--height] = node;
index = node->begin();
edge = node->Edge(index);
while (n >= edge->length) {
n -= edge->length;
++index;
assert(index != node->end());
edge = node->Edge(index);
}
}
index_[0] = static_cast<uint8_t>(index);
return {edge, n};
}
ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) {
int height = 0;
size_t length = edge_offset + n;
size_t index = index_[0];
CordRepBtree* node = node_[0];
CordRep* edge = node->Edge(index);
assert(edge_offset < edge->length);
if (length < edge->length) {
return {Substring(edge, edge_offset, n), length};
}
// Similar to 'Skip', we consume all edges that are inside the 'length' of
// data that needs to be read. If we exhaust the current level, we move one
// level up the tree and repeat until we hit the final edge that must be
// (partially) read. We consume all edges into `subtree`.
CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset));
size_t subtree_end = 1;
do {
length -= edge->length;
while (++index == node->end()) {
index_[height] = static_cast<uint8_t>(index);
if (++height > height_) {
subtree->set_end(subtree_end);
if (length == 0) return {subtree, 0};
CordRep::Unref(subtree);
return {nullptr, length};
}
if (length != 0) {
subtree->set_end(subtree_end);
subtree = CordRepBtree::New(subtree);
subtree_end = 1;
}
node = node_[height];
index = index_[height];
}
edge = node->Edge(index);
if (length >= edge->length) {
subtree->length += edge->length;
subtree->edges_[subtree_end++] = CordRep::Ref(edge);
}
} while (length >= edge->length);
CordRepBtree* tree = subtree;
subtree->length += length;
// If we moved up the tree, descend down to the leaf level, consuming all
// edges that must be read, adding 'down' nodes to `subtree`.
while (height > 0) {
node = edge->btree();
index_[height] = static_cast<uint8_t>(index);
node_[--height] = node;
index = node->begin();
edge = node->Edge(index);
if (length != 0) {
CordRepBtree* right = CordRepBtree::New(height);
right->length = length;
subtree->edges_[subtree_end++] = right;
subtree->set_end(subtree_end);
subtree = right;
subtree_end = 0;
while (length >= edge->length) {
subtree->edges_[subtree_end++] = CordRep::Ref(edge);
length -= edge->length;
edge = node->Edge(++index);
}
}
}
// Add any (partial) edge still remaining at the leaf level.
if (length != 0) {
subtree->edges_[subtree_end++] = Substring(edge, 0, length);
}
subtree->set_end(subtree_end);
index_[0] = static_cast<uint8_t>(index);
return {tree, length};
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,267 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_
#include <cassert>
#include <iostream>
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordRepBtreeNavigator is a bi-directional navigator allowing callers to
// navigate all the (leaf) data edges in a CordRepBtree instance.
//
// A CordRepBtreeNavigator instance is by default empty. Callers initialize a
// navigator instance by calling one of `InitFirst()`, `InitLast()` or
// `InitOffset()`, which establishes a current position. Callers can then
// navigate using the `Next`, `Previous`, `Skip` and `Seek` methods.
//
// The navigator instance does not take or adopt a reference on the provided
// `tree` on any of the initialization calls. Callers are responsible for
// guaranteeing the lifecycle of the provided tree. A navigator instance can
// be reset to the empty state by calling `Reset`.
//
// A navigator only keeps positional state on the 'current data edge', it does
// explicitly not keep any 'offset' state. The class does accept and return
// offsets in the `Read()`, `Skip()` and 'Seek()` methods as these would
// otherwise put a big burden on callers. Callers are expected to maintain
// (returned) offset info if they require such granular state.
class CordRepBtreeNavigator {
public:
// The logical position as returned by the Seek() and Skip() functions.
// Returns the current leaf edge for the desired seek or skip position and
// the offset of that position inside that edge.
struct Position {
CordRep* edge;
size_t offset;
};
// The read result as returned by the Read() function.
// `tree` contains the resulting tree which is identical to the result
// of calling CordRepBtree::SubTree(...) on the tree being navigated.
// `n` contains the number of bytes used from the last navigated to
// edge of the tree.
struct ReadResult {
CordRep* tree;
size_t n;
};
// Returns true if this instance is not empty.
explicit operator bool() const;
// Returns the tree for this instance or nullptr if empty.
CordRepBtree* btree() const;
// Returns the data edge of the current position.
// Requires this instance to not be empty.
CordRep* Current() const;
// Resets this navigator to `tree`, returning the first data edge in the tree.
CordRep* InitFirst(CordRepBtree* tree);
// Resets this navigator to `tree`, returning the last data edge in the tree.
CordRep* InitLast(CordRepBtree* tree);
// Resets this navigator to `tree` returning the data edge at position
// `offset` and the relative offset of `offset` into that data edge.
// Returns `Position.edge = nullptr` if the provided offset is greater
// than or equal to the length of the tree, in which case the state of
// the navigator instance remains unchanged.
Position InitOffset(CordRepBtree* tree, size_t offset);
// Navigates to the next data edge.
// Returns the next data edge or nullptr if there is no next data edge, in
// which case the current position remains unchanged.
CordRep* Next();
// Navigates to the previous data edge.
// Returns the previous data edge or nullptr if there is no previous data
// edge, in which case the current position remains unchanged.
CordRep* Previous();
// Navigates to the data edge at position `offset`. Returns the navigated to
// data edge in `Position.edge` and the relative offset of `offset` into that
// data edge in `Position.offset`. Returns `Position.edge = nullptr` if the
// provide offset is greater than or equal to the tree's length.
Position Seek(size_t offset);
// Reads `n` bytes of data starting at offset `edge_offset` of the current
// data edge, and returns the result in `ReadResult.tree`. `ReadResult.n`
// contains the 'bytes used` from the last / current data edge in the tree.
// This allows users that mix regular navigation (using string views) and
// 'read into cord' navigation to keep track of the current state, and which
// bytes have been consumed from a navigator.
// This function returns `ReadResult.tree = nullptr` if the requested length
// exceeds the length of the tree starting at the current data edge.
ReadResult Read(size_t edge_offset, size_t n);
// Skips `n` bytes forward from the current data edge, returning the navigated
// to data edge in `Position.edge` and `Position.offset` containing the offset
// inside that data edge. Note that the state of the navigator is left
// unchanged if `n` is smaller than the length of the current data edge.
Position Skip(size_t n);
// Resets this instance to the default / empty state.
void Reset();
private:
// Slow path for Next() if Next() reached the end of a leaf node. Backtracks
// up the stack until it finds a node that has a 'next' position available,
// and then does a 'front dive' towards the next leaf node.
CordRep* NextUp();
// Slow path for Previous() if Previous() reached the beginning of a leaf
// node. Backtracks up the stack until it finds a node that has a 'previous'
// position available, and then does a 'back dive' towards the previous leaf
// node.
CordRep* PreviousUp();
// Generic implementation of InitFirst() and InitLast().
template <CordRepBtree::EdgeType edge_type>
CordRep* Init(CordRepBtree* tree);
// `height_` contains the height of the current tree, or -1 if empty.
int height_ = -1;
// `index_` and `node_` contain the navigation state as the 'path' to the
// current data edge which is at `node_[0]->Edge(index_[0])`. The contents
// of these are undefined until the instance is initialized (`height_ >= 0`).
uint8_t index_[CordRepBtree::kMaxDepth];
CordRepBtree* node_[CordRepBtree::kMaxDepth];
};
// Returns true if this instance is not empty.
inline CordRepBtreeNavigator::operator bool() const { return height_ >= 0; }
inline CordRepBtree* CordRepBtreeNavigator::btree() const {
return height_ >= 0 ? node_[height_] : nullptr;
}
inline CordRep* CordRepBtreeNavigator::Current() const {
assert(height_ >= 0);
return node_[0]->Edge(index_[0]);
}
inline void CordRepBtreeNavigator::Reset() { height_ = -1; }
inline CordRep* CordRepBtreeNavigator::InitFirst(CordRepBtree* tree) {
return Init<CordRepBtree::kFront>(tree);
}
inline CordRep* CordRepBtreeNavigator::InitLast(CordRepBtree* tree) {
return Init<CordRepBtree::kBack>(tree);
}
template <CordRepBtree::EdgeType edge_type>
inline CordRep* CordRepBtreeNavigator::Init(CordRepBtree* tree) {
assert(tree != nullptr);
assert(tree->size() > 0);
assert(tree->height() <= CordRepBtree::kMaxHeight);
int height = height_ = tree->height();
size_t index = tree->index(edge_type);
node_[height] = tree;
index_[height] = static_cast<uint8_t>(index);
while (--height >= 0) {
tree = tree->Edge(index)->btree();
node_[height] = tree;
index = tree->index(edge_type);
index_[height] = static_cast<uint8_t>(index);
}
return node_[0]->Edge(index);
}
inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::Seek(
size_t offset) {
assert(btree() != nullptr);
int height = height_;
CordRepBtree* edge = node_[height];
if (ABSL_PREDICT_FALSE(offset >= edge->length)) return {nullptr, 0};
CordRepBtree::Position index = edge->IndexOf(offset);
index_[height] = static_cast<uint8_t>(index.index);
while (--height >= 0) {
edge = edge->Edge(index.index)->btree();
node_[height] = edge;
index = edge->IndexOf(index.n);
index_[height] = static_cast<uint8_t>(index.index);
}
return {edge->Edge(index.index), index.n};
}
inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::InitOffset(
CordRepBtree* tree, size_t offset) {
assert(tree != nullptr);
assert(tree->height() <= CordRepBtree::kMaxHeight);
if (ABSL_PREDICT_FALSE(offset >= tree->length)) return {nullptr, 0};
height_ = tree->height();
node_[height_] = tree;
return Seek(offset);
}
inline CordRep* CordRepBtreeNavigator::Next() {
CordRepBtree* edge = node_[0];
return index_[0] == edge->back() ? NextUp() : edge->Edge(++index_[0]);
}
inline CordRep* CordRepBtreeNavigator::Previous() {
CordRepBtree* edge = node_[0];
return index_[0] == edge->begin() ? PreviousUp() : edge->Edge(--index_[0]);
}
inline CordRep* CordRepBtreeNavigator::NextUp() {
assert(index_[0] == node_[0]->back());
CordRepBtree* edge;
size_t index;
int height = 0;
do {
if (++height > height_) return nullptr;
edge = node_[height];
index = index_[height] + 1;
} while (index == edge->end());
index_[height] = static_cast<uint8_t>(index);
do {
node_[--height] = edge = edge->Edge(index)->btree();
index_[height] = static_cast<uint8_t>(index = edge->begin());
} while (height > 0);
return edge->Edge(index);
}
inline CordRep* CordRepBtreeNavigator::PreviousUp() {
assert(index_[0] == node_[0]->begin());
CordRepBtree* edge;
size_t index;
int height = 0;
do {
if (++height > height_) return nullptr;
edge = node_[height];
index = index_[height];
} while (index == edge->begin());
index_[height] = static_cast<uint8_t>(--index);
do {
node_[--height] = edge = edge->Edge(index)->btree();
index_[height] = static_cast<uint8_t>(index = edge->back());
} while (height > 0);
return edge->Edge(index);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_

View file

@ -0,0 +1,346 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::Eq;
using ::testing::Ne;
using ::absl::cordrep_testing::CordRepBtreeFromFlats;
using ::absl::cordrep_testing::CordToString;
using ::absl::cordrep_testing::CreateFlatsFromString;
using ::absl::cordrep_testing::CreateRandomString;
using ::absl::cordrep_testing::MakeFlat;
using ::absl::cordrep_testing::MakeSubstring;
using ReadResult = CordRepBtreeNavigator::ReadResult;
using Position = CordRepBtreeNavigator::Position;
// CordRepBtreeNavigatorTest is a test fixture which automatically creates a
// tree to test navigation logic on. The parameter `count' defines the number of
// data edges in the test tree.
class CordRepBtreeNavigatorTest : public testing::TestWithParam<size_t> {
public:
using Flats = std::vector<CordRep*>;
static constexpr size_t kCharsPerFlat = 3;
CordRepBtreeNavigatorTest() {
data_ = CreateRandomString(count() * kCharsPerFlat);
flats_ = CreateFlatsFromString(data_, kCharsPerFlat);
// Turn flat 0 or 1 into a substring to cover partial reads on substrings.
if (count() > 1) {
CordRep::Unref(flats_[1]);
flats_[1] = MakeSubstring(kCharsPerFlat, kCharsPerFlat, MakeFlat(data_));
} else {
CordRep::Unref(flats_[0]);
flats_[0] = MakeSubstring(0, kCharsPerFlat, MakeFlat(data_));
}
tree_ = CordRepBtreeFromFlats(flats_);
}
~CordRepBtreeNavigatorTest() override { CordRep::Unref(tree_); }
size_t count() const { return GetParam(); }
CordRepBtree* tree() { return tree_; }
const std::string& data() const { return data_; }
const std::vector<CordRep*>& flats() const { return flats_; }
static std::string ToString(testing::TestParamInfo<size_t> param) {
return absl::StrCat(param.param, "_Flats");
}
private:
std::string data_;
Flats flats_;
CordRepBtree* tree_;
};
INSTANTIATE_TEST_SUITE_P(
WithParam, CordRepBtreeNavigatorTest,
testing::Values(1, CordRepBtree::kMaxCapacity - 1,
CordRepBtree::kMaxCapacity,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity - 1,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity + 1,
CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity * 2 +
17),
CordRepBtreeNavigatorTest::ToString);
TEST(CordRepBtreeNavigatorTest, Uninitialized) {
CordRepBtreeNavigator nav;
EXPECT_FALSE(nav);
EXPECT_THAT(nav.btree(), Eq(nullptr));
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEATH(nav.Current(), ".*");
#endif
}
TEST_P(CordRepBtreeNavigatorTest, InitFirst) {
CordRepBtreeNavigator nav;
CordRep* edge = nav.InitFirst(tree());
EXPECT_TRUE(nav);
EXPECT_THAT(nav.btree(), Eq(tree()));
EXPECT_THAT(nav.Current(), Eq(flats().front()));
EXPECT_THAT(edge, Eq(flats().front()));
}
TEST_P(CordRepBtreeNavigatorTest, InitLast) {
CordRepBtreeNavigator nav;
CordRep* edge = nav.InitLast(tree());
EXPECT_TRUE(nav);
EXPECT_THAT(nav.btree(), Eq(tree()));
EXPECT_THAT(nav.Current(), Eq(flats().back()));
EXPECT_THAT(edge, Eq(flats().back()));
}
TEST_P(CordRepBtreeNavigatorTest, NextPrev) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
const Flats& flats = this->flats();
EXPECT_THAT(nav.Previous(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.front()));
for (size_t i = 1; i < flats.size(); ++i) {
ASSERT_THAT(nav.Next(), Eq(flats[i]));
EXPECT_THAT(nav.Current(), Eq(flats[i]));
}
EXPECT_THAT(nav.Next(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.back()));
for (size_t i = flats.size() - 1; i > 0; --i) {
ASSERT_THAT(nav.Previous(), Eq(flats[i - 1]));
EXPECT_THAT(nav.Current(), Eq(flats[i - 1]));
}
EXPECT_THAT(nav.Previous(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.front()));
}
TEST_P(CordRepBtreeNavigatorTest, PrevNext) {
CordRepBtreeNavigator nav;
nav.InitLast(tree());
const Flats& flats = this->flats();
EXPECT_THAT(nav.Next(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.back()));
for (size_t i = flats.size() - 1; i > 0; --i) {
ASSERT_THAT(nav.Previous(), Eq(flats[i - 1]));
EXPECT_THAT(nav.Current(), Eq(flats[i - 1]));
}
EXPECT_THAT(nav.Previous(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.front()));
for (size_t i = 1; i < flats.size(); ++i) {
ASSERT_THAT(nav.Next(), Eq(flats[i]));
EXPECT_THAT(nav.Current(), Eq(flats[i]));
}
EXPECT_THAT(nav.Next(), Eq(nullptr));
EXPECT_THAT(nav.Current(), Eq(flats.back()));
}
TEST(CordRepBtreeNavigatorTest, Reset) {
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
CordRepBtreeNavigator nav;
nav.InitFirst(tree);
nav.Reset();
EXPECT_FALSE(nav);
EXPECT_THAT(nav.btree(), Eq(nullptr));
#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
EXPECT_DEATH(nav.Current(), ".*");
#endif
CordRep::Unref(tree);
}
TEST_P(CordRepBtreeNavigatorTest, Skip) {
size_t count = this->count();
const Flats& flats = this->flats();
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
for (size_t char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
Position pos = nav.Skip(char_offset);
EXPECT_THAT(pos.edge, Eq(nav.Current()));
EXPECT_THAT(pos.edge, Eq(flats[0]));
EXPECT_THAT(pos.offset, Eq(char_offset));
}
for (size_t index1 = 0; index1 < count; ++index1) {
for (size_t index2 = index1; index2 < count; ++index2) {
for (size_t char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
size_t length1 = index1 * kCharsPerFlat;
Position pos1 = nav.Skip(length1 + char_offset);
ASSERT_THAT(pos1.edge, Eq(flats[index1]));
ASSERT_THAT(pos1.edge, Eq(nav.Current()));
ASSERT_THAT(pos1.offset, Eq(char_offset));
size_t length2 = index2 * kCharsPerFlat;
Position pos2 = nav.Skip(length2 - length1 + char_offset);
ASSERT_THAT(pos2.edge, Eq(flats[index2]));
ASSERT_THAT(pos2.edge, Eq(nav.Current()));
ASSERT_THAT(pos2.offset, Eq(char_offset));
}
}
}
}
TEST_P(CordRepBtreeNavigatorTest, Seek) {
size_t count = this->count();
const Flats& flats = this->flats();
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
for (size_t char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
Position pos = nav.Seek(char_offset);
EXPECT_THAT(pos.edge, Eq(nav.Current()));
EXPECT_THAT(pos.edge, Eq(flats[0]));
EXPECT_THAT(pos.offset, Eq(char_offset));
}
for (size_t index = 0; index < count; ++index) {
for (size_t char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) {
size_t offset = index * kCharsPerFlat + char_offset;
Position pos1 = nav.Seek(offset);
ASSERT_THAT(pos1.edge, Eq(flats[index]));
ASSERT_THAT(pos1.edge, Eq(nav.Current()));
ASSERT_THAT(pos1.offset, Eq(char_offset));
}
}
}
TEST(CordRepBtreeNavigatorTest, InitOffset) {
// Whitebox: InitOffset() is implemented in terms of Seek() which is
// exhaustively tested. Only test it initializes / forwards properly..
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
tree = CordRepBtree::Append(tree, MakeFlat("def"));
CordRepBtreeNavigator nav;
Position pos = nav.InitOffset(tree, 5);
EXPECT_TRUE(nav);
EXPECT_THAT(nav.btree(), Eq(tree));
EXPECT_THAT(pos.edge, Eq(tree->Edges()[1]));
EXPECT_THAT(pos.edge, Eq(nav.Current()));
EXPECT_THAT(pos.offset, Eq(2u));
CordRep::Unref(tree);
}
TEST(CordRepBtreeNavigatorTest, InitOffsetAndSeekBeyondLength) {
CordRepBtree* tree1 = CordRepBtree::Create(MakeFlat("abc"));
CordRepBtree* tree2 = CordRepBtree::Create(MakeFlat("def"));
CordRepBtreeNavigator nav;
nav.InitFirst(tree1);
EXPECT_THAT(nav.Seek(3).edge, Eq(nullptr));
EXPECT_THAT(nav.Seek(100).edge, Eq(nullptr));
EXPECT_THAT(nav.btree(), Eq(tree1));
EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front()));
EXPECT_THAT(nav.InitOffset(tree2, 3).edge, Eq(nullptr));
EXPECT_THAT(nav.InitOffset(tree2, 100).edge, Eq(nullptr));
EXPECT_THAT(nav.btree(), Eq(tree1));
EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front()));
CordRep::Unref(tree1);
CordRep::Unref(tree2);
}
TEST_P(CordRepBtreeNavigatorTest, Read) {
const Flats& flats = this->flats();
const std::string& data = this->data();
for (size_t offset = 0; offset < data.size(); ++offset) {
for (size_t length = 1; length <= data.size() - offset; ++length) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
// Skip towards edge holding offset
size_t edge_offset = nav.Skip(offset).offset;
// Read node
ReadResult result = nav.Read(edge_offset, length);
ASSERT_THAT(result.tree, Ne(nullptr));
EXPECT_THAT(result.tree->length, Eq(length));
if (result.tree->tag == BTREE) {
ASSERT_TRUE(CordRepBtree::IsValid(result.tree->btree()));
}
// Verify contents
std::string value = CordToString(result.tree);
EXPECT_THAT(value, Eq(data.substr(offset, length)));
// Verify 'partial last edge' reads.
size_t partial = (offset + length) % kCharsPerFlat;
ASSERT_THAT(result.n, Eq(partial));
// Verify ending position if not EOF
if (offset + length < data.size()) {
size_t index = (offset + length) / kCharsPerFlat;
EXPECT_THAT(nav.Current(), Eq(flats[index]));
}
CordRep::Unref(result.tree);
}
}
}
TEST_P(CordRepBtreeNavigatorTest, ReadBeyondLengthOfTree) {
CordRepBtreeNavigator nav;
nav.InitFirst(tree());
ReadResult result = nav.Read(2, tree()->length);
ASSERT_THAT(result.tree, Eq(nullptr));
}
TEST(CordRepBtreeNavigatorTest, NavigateMaximumTreeDepth) {
CordRepFlat* flat1 = MakeFlat("Hello world");
CordRepFlat* flat2 = MakeFlat("World Hello");
CordRepBtree* node = CordRepBtree::Create(flat1);
node = CordRepBtree::Append(node, flat2);
while (node->height() < CordRepBtree::kMaxHeight) {
node = CordRepBtree::New(node);
}
CordRepBtreeNavigator nav;
CordRep* edge = nav.InitFirst(node);
EXPECT_THAT(edge, Eq(flat1));
EXPECT_THAT(nav.Next(), Eq(flat2));
EXPECT_THAT(nav.Next(), Eq(nullptr));
EXPECT_THAT(nav.Previous(), Eq(flat1));
EXPECT_THAT(nav.Previous(), Eq(nullptr));
CordRep::Unref(node);
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,69 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_reader.h"
#include <cassert>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include "absl/strings/internal/cord_rep_flat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size,
CordRep*& tree) {
assert(chunk_size <= navigator_.Current()->length);
// If chunk_size is non-zero, we need to start inside last returned edge.
// Else we start reading at the next data edge of the tree.
CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next();
const size_t offset = chunk_size ? edge->length - chunk_size : 0;
// Read the sub tree and verify we got what we wanted.
ReadResult result = navigator_.Read(offset, n);
tree = result.tree;
// If the data returned in `tree` was covered entirely by `chunk_size`, i.e.,
// read from the 'previous' edge, we did not consume any additional data, and
// can directly return the substring into the current data edge as the next
// chunk. We can easily establish from the above code that `navigator_.Next()`
// has not been called as that requires `chunk_size` to be zero.
if (n < chunk_size) return EdgeData(edge).substr(result.n);
// The amount of data taken from the last edge is `chunk_size` and `result.n`
// contains the offset into the current edge trailing the read data (which can
// be 0). As the call to `navigator_.Read()` could have consumed all remaining
// data, calling `navigator_.Current()` is not safe before checking if we
// already consumed all remaining data.
const size_t consumed_by_read = n - chunk_size - result.n;
if (consumed_by_read >= remaining_) {
remaining_ = 0;
return {};
}
// We did not read all data, return remaining data from current edge.
edge = navigator_.Current();
remaining_ -= consumed_by_read + edge->length;
return EdgeData(edge).substr(result.n);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,212 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_
#include <cassert>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_data_edge.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_btree_navigator.h"
#include "absl/strings/internal/cord_rep_flat.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordRepBtreeReader implements logic to iterate over cord btrees.
// References to the underlying data are returned as absl::string_view values.
// The most typical use case is a forward only iteration over tree data.
// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to
// CordRepBtreeNavigator that allow more advanced navigation.
//
// Example: iterate over all data inside a cord btree:
//
// CordRepBtreeReader reader;
// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) {
// DoSomethingWithDataIn(sv);
// }
//
// All navigation methods always return the next 'chunk' of data. The class
// assumes that all data is directly 'consumed' by the caller. For example:
// invoking `Skip()` will skip the desired number of bytes, and directly
// read and return the next chunk of data directly after the skipped bytes.
//
// Example: iterate over all data inside a btree skipping the first 100 bytes:
//
// CordRepBtreeReader reader;
// absl::string_view sv = reader.Init(tree);
// if (sv.length() > 100) {
// sv.RemovePrefix(100);
// } else {
// sv = reader.Skip(100 - sv.length());
// }
// while (!sv.empty()) {
// DoSomethingWithDataIn(sv);
// absl::string_view sv = reader.Next();
// }
//
// It is important to notice that `remaining` is based on the end position of
// the last data edge returned to the caller, not the cumulative data returned
// to the caller which can be less in cases of skipping or seeking over data.
//
// For example, consider a cord btree with five data edges: "abc", "def", "ghi",
// "jkl" and "mno":
//
// absl::string_view sv;
// CordRepBtreeReader reader;
//
// sv = reader.Init(tree); // sv = "abc", remaining = 12
// sv = reader.Skip(4); // sv = "hi", remaining = 6
// sv = reader.Skip(2); // sv = "l", remaining = 3
// sv = reader.Next(); // sv = "mno", remaining = 0
// sv = reader.Seek(1); // sv = "bc", remaining = 12
//
class CordRepBtreeReader {
public:
using ReadResult = CordRepBtreeNavigator::ReadResult;
using Position = CordRepBtreeNavigator::Position;
// Returns true if this instance is not empty.
explicit operator bool() const { return navigator_.btree() != nullptr; }
// Returns the tree referenced by this instance or nullptr if empty.
CordRepBtree* btree() const { return navigator_.btree(); }
// Returns the current data edge inside the referenced btree.
// Requires that the current instance is not empty.
CordRep* node() const { return navigator_.Current(); }
// Returns the length of the referenced tree.
// Requires that the current instance is not empty.
size_t length() const;
// Returns the number of remaining bytes available for iteration, which is the
// number of bytes directly following the end of the last chunk returned.
// This value will be zero if we iterated over the last edge in the bound
// tree, in which case any call to Next() or Skip() will return an empty
// string_view reflecting the EOF state.
// Note that a call to `Seek()` resets `remaining` to a value based on the
// end position of the chunk returned by that call.
size_t remaining() const { return remaining_; }
// Resets this instance to an empty value.
void Reset() { navigator_.Reset(); }
// Initializes this instance with `tree`. `tree` must not be null.
// Returns a reference to the first data edge of the provided tree.
absl::string_view Init(CordRepBtree* tree);
// Navigates to and returns the next data edge of the referenced tree.
// Returns an empty string_view if an attempt is made to read beyond the end
// of the tree, i.e.: if `remaining()` is zero indicating an EOF condition.
// Requires that the current instance is not empty.
absl::string_view Next();
// Skips the provided amount of bytes and returns a reference to the data
// directly following the skipped bytes.
absl::string_view Skip(size_t skip);
// Reads `n` bytes into `tree`.
// If `chunk_size` is zero, starts reading at the next data edge. If
// `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of
// the last returned data edge. Effectively, this means that the read starts
// at offset `consumed() - chunk_size`.
// Requires that `chunk_size` is less than or equal to the length of the
// last returned data edge. The purpose of `chunk_size` is to simplify code
// partially consuming a returned chunk and wanting to include the remaining
// bytes in the Read call. For example, the below code will read 1000 bytes of
// data into a cord tree if the first chunk starts with "big:":
//
// CordRepBtreeReader reader;
// absl::string_view sv = reader.Init(tree);
// if (absl::StartsWith(sv, "big:")) {
// CordRepBtree tree;
// sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree);
// }
//
// This method will return an empty string view if all remaining data was
// read. If `n` exceeded the amount of remaining data this function will
// return an empty string view and `tree` will be set to nullptr.
// In both cases, `consumed` will be set to `length`.
absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree);
// Navigates to the chunk at offset `offset`.
// Returns a reference into the navigated to chunk, adjusted for the relative
// position of `offset` into that chunk. For example, calling `Seek(13)` on a
// cord tree containing 2 chunks of 10 and 20 bytes respectively will return
// a string view into the second chunk starting at offset 3 with a size of 17.
// Returns an empty string view if `offset` is equal to or greater than the
// length of the referenced tree.
absl::string_view Seek(size_t offset);
private:
size_t remaining_ = 0;
CordRepBtreeNavigator navigator_;
};
inline size_t CordRepBtreeReader::length() const {
assert(btree() != nullptr);
return btree()->length;
}
inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) {
assert(tree != nullptr);
const CordRep* edge = navigator_.InitFirst(tree);
remaining_ = tree->length - edge->length;
return EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Next() {
if (remaining_ == 0) return {};
const CordRep* edge = navigator_.Next();
assert(edge != nullptr);
remaining_ -= edge->length;
return EdgeData(edge);
}
inline absl::string_view CordRepBtreeReader::Skip(size_t skip) {
// As we are always positioned on the last 'consumed' edge, we
// need to skip the current edge as well as `skip`.
const size_t edge_length = navigator_.Current()->length;
CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length);
if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
remaining_ = 0;
return {};
}
// The combined length of all edges skipped before `pos.edge` is `skip -
// pos.offset`, all of which are 'consumed', as well as the current edge.
remaining_ -= skip - pos.offset + pos.edge->length;
return EdgeData(pos.edge).substr(pos.offset);
}
inline absl::string_view CordRepBtreeReader::Seek(size_t offset) {
const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset);
if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) {
remaining_ = 0;
return {};
}
absl::string_view chunk = EdgeData(pos.edge).substr(pos.offset);
remaining_ = length() - offset - chunk.length();
return chunk;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_

View file

@ -0,0 +1,293 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_btree_reader.h"
#include <iostream>
#include <random>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_test_util.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::Ne;
using ::testing::Not;
using ::absl::cordrep_testing::CordRepBtreeFromFlats;
using ::absl::cordrep_testing::MakeFlat;
using ::absl::cordrep_testing::CordToString;
using ::absl::cordrep_testing::CreateFlatsFromString;
using ::absl::cordrep_testing::CreateRandomString;
using ReadResult = CordRepBtreeReader::ReadResult;
TEST(CordRepBtreeReaderTest, Next) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
size_t counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
for (size_t count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
CordRepBtreeReader reader;
size_t remaining = data.length();
absl::string_view chunk = reader.Init(node);
EXPECT_THAT(chunk, Eq(data.substr(0, chunk.length())));
remaining -= chunk.length();
EXPECT_THAT(reader.remaining(), Eq(remaining));
while (remaining > 0) {
const size_t offset = data.length() - remaining;
chunk = reader.Next();
EXPECT_THAT(chunk, Eq(data.substr(offset, chunk.length())));
remaining -= chunk.length();
EXPECT_THAT(reader.remaining(), Eq(remaining));
}
EXPECT_THAT(reader.remaining(), Eq(0u));
// Verify trying to read beyond EOF returns empty string_view
EXPECT_THAT(reader.Next(), testing::IsEmpty());
CordRep::Unref(node);
}
}
TEST(CordRepBtreeReaderTest, Skip) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
size_t counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
for (size_t count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
for (size_t skip1 = 0; skip1 < data.length() - kChars; ++skip1) {
for (size_t skip2 = 0; skip2 < data.length() - kChars; ++skip2) {
CordRepBtreeReader reader;
size_t remaining = data.length();
absl::string_view chunk = reader.Init(node);
remaining -= chunk.length();
chunk = reader.Skip(skip1);
size_t offset = data.length() - remaining;
ASSERT_THAT(chunk, Eq(data.substr(offset + skip1, chunk.length())));
remaining -= chunk.length() + skip1;
ASSERT_THAT(reader.remaining(), Eq(remaining));
if (remaining == 0) continue;
size_t skip = std::min(remaining - 1, skip2);
chunk = reader.Skip(skip);
offset = data.length() - remaining;
ASSERT_THAT(chunk, Eq(data.substr(offset + skip, chunk.length())));
}
}
CordRep::Unref(node);
}
}
TEST(CordRepBtreeReaderTest, SkipBeyondLength) {
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
tree = CordRepBtree::Append(tree, MakeFlat("def"));
CordRepBtreeReader reader;
reader.Init(tree);
EXPECT_THAT(reader.Skip(100), IsEmpty());
EXPECT_THAT(reader.remaining(), Eq(0u));
CordRep::Unref(tree);
}
TEST(CordRepBtreeReaderTest, Seek) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
size_t counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17};
for (size_t count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
for (size_t seek = 0; seek < data.length() - 1; ++seek) {
CordRepBtreeReader reader;
reader.Init(node);
absl::string_view chunk = reader.Seek(seek);
ASSERT_THAT(chunk, Not(IsEmpty()));
ASSERT_THAT(chunk, Eq(data.substr(seek, chunk.length())));
ASSERT_THAT(reader.remaining(),
Eq(data.length() - seek - chunk.length()));
}
CordRep::Unref(node);
}
}
TEST(CordRepBtreeReaderTest, SeekBeyondLength) {
CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc"));
tree = CordRepBtree::Append(tree, MakeFlat("def"));
CordRepBtreeReader reader;
reader.Init(tree);
EXPECT_THAT(reader.Seek(6), IsEmpty());
EXPECT_THAT(reader.remaining(), Eq(0u));
EXPECT_THAT(reader.Seek(100), IsEmpty());
EXPECT_THAT(reader.remaining(), Eq(0u));
CordRep::Unref(tree);
}
TEST(CordRepBtreeReaderTest, Read) {
std::string data = "abcdefghijklmno";
std::vector<CordRep*> flats = CreateFlatsFromString(data, 5);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
CordRep* tree;
CordRepBtreeReader reader;
absl::string_view chunk;
// Read zero bytes
chunk = reader.Init(node);
chunk = reader.Read(0, chunk.length(), tree);
EXPECT_THAT(tree, Eq(nullptr));
EXPECT_THAT(chunk, Eq("abcde"));
EXPECT_THAT(reader.remaining(), Eq(10u));
EXPECT_THAT(reader.Next(), Eq("fghij"));
// Read in full
chunk = reader.Init(node);
chunk = reader.Read(15, chunk.length(), tree);
EXPECT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("abcdefghijklmno"));
EXPECT_THAT(chunk, Eq(""));
EXPECT_THAT(reader.remaining(), Eq(0u));
CordRep::Unref(tree);
// Read < chunk bytes
chunk = reader.Init(node);
chunk = reader.Read(3, chunk.length(), tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("abc"));
EXPECT_THAT(chunk, Eq("de"));
EXPECT_THAT(reader.remaining(), Eq(10u));
EXPECT_THAT(reader.Next(), Eq("fghij"));
CordRep::Unref(tree);
// Read < chunk bytes at offset
chunk = reader.Init(node);
chunk = reader.Read(2, chunk.length() - 2, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("cd"));
EXPECT_THAT(chunk, Eq("e"));
EXPECT_THAT(reader.remaining(), Eq(10u));
EXPECT_THAT(reader.Next(), Eq("fghij"));
CordRep::Unref(tree);
// Read from consumed chunk
chunk = reader.Init(node);
chunk = reader.Read(3, 0, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("fgh"));
EXPECT_THAT(chunk, Eq("ij"));
EXPECT_THAT(reader.remaining(), Eq(5u));
EXPECT_THAT(reader.Next(), Eq("klmno"));
CordRep::Unref(tree);
// Read across chunks
chunk = reader.Init(node);
chunk = reader.Read(12, chunk.length() - 2, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("cdefghijklmn"));
EXPECT_THAT(chunk, Eq("o"));
EXPECT_THAT(reader.remaining(), Eq(0u));
CordRep::Unref(tree);
// Read across chunks landing on exact edge boundary
chunk = reader.Init(node);
chunk = reader.Read(10 - 2, chunk.length() - 2, tree);
ASSERT_THAT(tree, Ne(nullptr));
EXPECT_THAT(CordToString(tree), Eq("cdefghij"));
EXPECT_THAT(chunk, Eq("klmno"));
EXPECT_THAT(reader.remaining(), Eq(0u));
CordRep::Unref(tree);
CordRep::Unref(node);
}
TEST(CordRepBtreeReaderTest, ReadExhaustive) {
constexpr size_t kChars = 3;
const size_t cap = CordRepBtree::kMaxCapacity;
size_t counts[] = {1, 2, cap, cap * cap + 1, cap * cap * cap * 2 + 17};
for (size_t count : counts) {
std::string data = CreateRandomString(count * kChars);
std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars);
CordRepBtree* node = CordRepBtreeFromFlats(flats);
for (size_t read_size : {kChars - 1, kChars, kChars + 7, cap * cap}) {
CordRepBtreeReader reader;
absl::string_view chunk = reader.Init(node);
// `consumed` tracks the end of last consumed chunk which is the start of
// the next chunk: we always read with `chunk_size = chunk.length()`.
size_t consumed = 0;
size_t remaining = data.length();
while (remaining > 0) {
CordRep* tree;
size_t n = (std::min)(remaining, read_size);
chunk = reader.Read(n, chunk.length(), tree);
EXPECT_THAT(tree, Ne(nullptr));
if (tree) {
EXPECT_THAT(CordToString(tree), Eq(data.substr(consumed, n)));
CordRep::Unref(tree);
}
consumed += n;
remaining -= n;
EXPECT_THAT(reader.remaining(), Eq(remaining - chunk.length()));
if (remaining > 0) {
ASSERT_FALSE(chunk.empty());
ASSERT_THAT(chunk, Eq(data.substr(consumed, chunk.length())));
} else {
ASSERT_TRUE(chunk.empty()) << chunk;
}
}
}
CordRep::Unref(node);
}
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,64 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_consume.h"
#include <array>
#include <utility>
#include "absl/container/inlined_vector.h"
#include "absl/functional/function_ref.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
// Unrefs the provided `substring`, and returns `substring->child`
// Adds or assumes a reference on `substring->child`
CordRep* ClipSubstring(CordRepSubstring* substring) {
CordRep* child = substring->child;
if (substring->refcount.IsOne()) {
delete substring;
} else {
CordRep::Ref(child);
CordRep::Unref(substring);
}
return child;
}
} // namespace
void Consume(CordRep* rep,
FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) {
size_t offset = 0;
size_t length = rep->length;
if (rep->tag == SUBSTRING) {
offset += rep->substring()->start;
rep = ClipSubstring(rep->substring());
}
consume_fn(rep, offset, length);
}
void ReverseConsume(CordRep* rep,
FunctionRef<void(CordRep*, size_t, size_t)> consume_fn) {
return Consume(rep, consume_fn);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,47 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_
#include <functional>
#include "absl/functional/function_ref.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Consume() and ReverseConsume() consume CONCAT based trees and invoke the
// provided functor with the contained nodes in the proper forward or reverse
// order, which is used to convert CONCAT trees into other tree or cord data.
// All CONCAT and SUBSTRING nodes are processed internally. The 'offset`
// parameter of the functor is non-zero for any nodes below SUBSTRING nodes.
// It's up to the caller to form these back into SUBSTRING nodes or otherwise
// store offset / prefix information. These functions are intended to be used
// only for migration / transitional code where due to factors such as ODR
// violations, we can not 100% guarantee that all code respects 'new format'
// settings and flags, so we need to be able to parse old data on the fly until
// all old code is deprecated / no longer the default format.
void Consume(CordRep* rep,
FunctionRef<void(CordRep*, size_t, size_t)> consume_fn);
void ReverseConsume(CordRep* rep,
FunctionRef<void(CordRep*, size_t, size_t)> consume_fn);
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_

View file

@ -0,0 +1,56 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_crc.h"
#include <cassert>
#include <cstdint>
#include <utility>
#include "absl/base/config.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
CordRepCrc* CordRepCrc::New(CordRep* child, crc_internal::CrcCordState state) {
if (child != nullptr && child->IsCrc()) {
if (child->refcount.IsOne()) {
child->crc()->crc_cord_state = std::move(state);
return child->crc();
}
CordRep* old = child;
child = old->crc()->child;
CordRep::Ref(child);
CordRep::Unref(old);
}
auto* new_cordrep = new CordRepCrc;
new_cordrep->length = child != nullptr ? child->length : 0;
new_cordrep->tag = cord_internal::CRC;
new_cordrep->child = child;
new_cordrep->crc_cord_state = std::move(state);
return new_cordrep;
}
void CordRepCrc::Destroy(CordRepCrc* node) {
if (node->child != nullptr) {
CordRep::Unref(node->child);
}
delete node;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,103 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CRC_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_CRC_H_
#include <cassert>
#include <cstdint>
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/crc/internal/crc_cord_state.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordRepCrc is a CordRep node intended only to appear at the top level of a
// cord tree. It associates an "expected CRC" with the contained data, to allow
// for easy passage of checksum data in Cord data flows.
//
// From Cord's perspective, the crc value has no semantics; any validation of
// the contained checksum is the user's responsibility.
struct CordRepCrc : public CordRep {
CordRep* child;
absl::crc_internal::CrcCordState crc_cord_state;
// Consumes `child` and returns a CordRepCrc prefixed tree containing `child`.
// If the specified `child` is itself a CordRepCrc node, then this method
// either replaces the existing node, or directly updates the crc state in it
// depending on the node being shared or not, i.e.: refcount.IsOne().
// `child` must only be null if the Cord is empty. Never returns null.
static CordRepCrc* New(CordRep* child, crc_internal::CrcCordState state);
// Destroys (deletes) the provided node. `node` must not be null.
static void Destroy(CordRepCrc* node);
};
// Consumes `rep` and returns a CordRep* with any outer CordRepCrc wrapper
// removed. This is usually a no-op (returning `rep`), but this will remove and
// unref an outer CordRepCrc node.
inline CordRep* RemoveCrcNode(CordRep* rep) {
assert(rep != nullptr);
if (ABSL_PREDICT_FALSE(rep->IsCrc())) {
CordRep* child = rep->crc()->child;
if (rep->refcount.IsOne()) {
delete rep->crc();
} else {
CordRep::Ref(child);
CordRep::Unref(rep);
}
return child;
}
return rep;
}
// Returns `rep` if it is not a CordRepCrc node, or its child if it is.
// Does not consume or create a reference on `rep` or the returned value.
inline CordRep* SkipCrcNode(CordRep* rep) {
assert(rep != nullptr);
if (ABSL_PREDICT_FALSE(rep->IsCrc())) {
return rep->crc()->child;
} else {
return rep;
}
}
inline const CordRep* SkipCrcNode(const CordRep* rep) {
assert(rep != nullptr);
if (ABSL_PREDICT_FALSE(rep->IsCrc())) {
return rep->crc()->child;
} else {
return rep;
}
}
inline CordRepCrc* CordRep::crc() {
assert(IsCrc());
return static_cast<CordRepCrc*>(this);
}
inline const CordRepCrc* CordRep::crc() const {
assert(IsCrc());
return static_cast<const CordRepCrc*>(this);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CRC_H_

View file

@ -0,0 +1,130 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cord_rep_crc.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/crc/internal/crc_cord_state.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_test_util.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::absl::cordrep_testing::MakeFlat;
using ::testing::Eq;
using ::testing::IsNull;
using ::testing::Ne;
#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
TEST(CordRepCrc, RemoveCrcWithNullptr) {
EXPECT_DEATH(RemoveCrcNode(nullptr), "");
}
#endif // !NDEBUG && GTEST_HAS_DEATH_TEST
absl::crc_internal::CrcCordState MakeCrcCordState(uint32_t crc) {
crc_internal::CrcCordState state;
state.mutable_rep()->prefix_crc.push_back(
crc_internal::CrcCordState::PrefixCrc(42, crc32c_t{crc}));
return state;
}
TEST(CordRepCrc, NewDestroy) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
EXPECT_TRUE(crc->refcount.IsOne());
EXPECT_THAT(crc->child, Eq(rep));
EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u}));
EXPECT_TRUE(rep->refcount.IsOne());
CordRepCrc::Destroy(crc);
}
TEST(CordRepCrc, NewExistingCrcNotShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321));
EXPECT_THAT(new_crc, Eq(crc));
EXPECT_TRUE(new_crc->refcount.IsOne());
EXPECT_THAT(new_crc->child, Eq(rep));
EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u}));
EXPECT_TRUE(rep->refcount.IsOne());
CordRepCrc::Destroy(new_crc);
}
TEST(CordRepCrc, NewExistingCrcShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
CordRep::Ref(crc);
CordRepCrc* new_crc = CordRepCrc::New(crc, MakeCrcCordState(54321));
EXPECT_THAT(new_crc, Ne(crc));
EXPECT_TRUE(new_crc->refcount.IsOne());
EXPECT_TRUE(crc->refcount.IsOne());
EXPECT_FALSE(rep->refcount.IsOne());
EXPECT_THAT(crc->child, Eq(rep));
EXPECT_THAT(new_crc->child, Eq(rep));
EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u}));
EXPECT_THAT(new_crc->crc_cord_state.Checksum(), Eq(crc32c_t{54321u}));
CordRep::Unref(crc);
CordRep::Unref(new_crc);
}
TEST(CordRepCrc, NewEmpty) {
CordRepCrc* crc = CordRepCrc::New(nullptr, MakeCrcCordState(12345));
EXPECT_TRUE(crc->refcount.IsOne());
EXPECT_THAT(crc->child, IsNull());
EXPECT_THAT(crc->length, Eq(0u));
EXPECT_THAT(crc->crc_cord_state.Checksum(), Eq(crc32c_t{12345u}));
EXPECT_TRUE(crc->refcount.IsOne());
CordRepCrc::Destroy(crc);
}
TEST(CordRepCrc, RemoveCrcNotCrc) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRep* nocrc = RemoveCrcNode(rep);
EXPECT_THAT(nocrc, Eq(rep));
CordRep::Unref(nocrc);
}
TEST(CordRepCrc, RemoveCrcNotShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
CordRep* nocrc = RemoveCrcNode(crc);
EXPECT_THAT(nocrc, Eq(rep));
EXPECT_TRUE(rep->refcount.IsOne());
CordRep::Unref(nocrc);
}
TEST(CordRepCrc, RemoveCrcShared) {
CordRep* rep = cordrep_testing::MakeFlat("Hello world");
CordRepCrc* crc = CordRepCrc::New(rep, MakeCrcCordState(12345));
CordRep::Ref(crc);
CordRep* nocrc = RemoveCrcNode(crc);
EXPECT_THAT(nocrc, Eq(rep));
EXPECT_FALSE(rep->refcount.IsOne());
CordRep::Unref(nocrc);
CordRep::Unref(crc);
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,195 @@
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <memory>
#include "absl/base/config.h"
#include "absl/base/macros.h"
#include "absl/strings/internal/cord_internal.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Note: all constants below are never ODR used and internal to cord, we define
// these as static constexpr to avoid 'in struct' definition and usage clutter.
// Largest and smallest flat node lengths we are willing to allocate
// Flat allocation size is stored in tag, which currently can encode sizes up
// to 4K, encoded as multiple of either 8 or 32 bytes.
// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc.
// kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and
// ideally a 'nice' size aligning with allocation and cacheline sizes like 32.
// kMaxFlatSize is bounded by the size resulting in a computed tag no greater
// than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values.
static constexpr size_t kFlatOverhead = offsetof(CordRep, storage);
static constexpr size_t kMinFlatSize = 32;
static constexpr size_t kMaxFlatSize = 4096;
static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead;
static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead;
static constexpr size_t kMaxLargeFlatSize = 256 * 1024;
static constexpr size_t kMaxLargeFlatLength = kMaxLargeFlatSize - kFlatOverhead;
// kTagBase should make the Size <--> Tag computation resilient
// against changes to the value of FLAT when we add a new tag..
static constexpr uint8_t kTagBase = FLAT - 4;
// Converts the provided rounded size to the corresponding tag
constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) {
return static_cast<uint8_t>(size <= 512 ? kTagBase + size / 8
: size <= 8192
? kTagBase + 512 / 8 + size / 64 - 512 / 64
: kTagBase + 512 / 8 + ((8192 - 512) / 64) +
size / 4096 - 8192 / 4096);
}
// Converts the provided tag to the corresponding allocated size
constexpr size_t TagToAllocatedSize(uint8_t tag) {
return (tag <= kTagBase + 512 / 8) ? tag * 8 - kTagBase * 8
: (tag <= kTagBase + (512 / 8) + ((8192 - 512) / 64))
? 512 + tag * 64 - kTagBase * 64 - 512 / 8 * 64
: 8192 + tag * 4096 - kTagBase * 4096 -
((512 / 8) + ((8192 - 512) / 64)) * 4096;
}
static_assert(AllocatedSizeToTagUnchecked(kMinFlatSize) == FLAT, "");
static_assert(AllocatedSizeToTagUnchecked(kMaxLargeFlatSize) == MAX_FLAT_TAG,
"");
// RoundUp logically performs `((n + m - 1) / m) * m` to round up to the nearest
// multiple of `m`, optimized for the invariant that `m` is a power of 2.
constexpr size_t RoundUp(size_t n, size_t m) {
return (n + m - 1) & (0 - m);
}
// Returns the size to the nearest equal or larger value that can be
// expressed exactly as a tag value.
inline size_t RoundUpForTag(size_t size) {
return RoundUp(size, (size <= 512) ? 8 : (size <= 8192 ? 64 : 4096));
}
// Converts the allocated size to a tag, rounding down if the size
// does not exactly match a 'tag expressible' size value. The result is
// undefined if the size exceeds the maximum size that can be encoded in
// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>).
inline uint8_t AllocatedSizeToTag(size_t size) {
const uint8_t tag = AllocatedSizeToTagUnchecked(size);
assert(tag <= MAX_FLAT_TAG);
return tag;
}
// Converts the provided tag to the corresponding available data length
constexpr size_t TagToLength(uint8_t tag) {
return TagToAllocatedSize(tag) - kFlatOverhead;
}
// Enforce that kMaxFlatSize maps to a well-known exact tag value.
static_assert(TagToAllocatedSize(MAX_FLAT_TAG) == kMaxLargeFlatSize,
"Bad tag logic");
struct CordRepFlat : public CordRep {
// Tag for explicit 'large flat' allocation
struct Large {};
// Creates a new flat node.
template <size_t max_flat_size, typename... Args>
static CordRepFlat* NewImpl(size_t len, Args... args ABSL_ATTRIBUTE_UNUSED) {
if (len <= kMinFlatLength) {
len = kMinFlatLength;
} else if (len > max_flat_size - kFlatOverhead) {
len = max_flat_size - kFlatOverhead;
}
// Round size up so it matches a size we can exactly express in a tag.
const size_t size = RoundUpForTag(len + kFlatOverhead);
void* const raw_rep = ::operator new(size);
// GCC 13 has a false-positive -Wstringop-overflow warning here.
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(13, 0)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-overflow"
#endif
CordRepFlat* rep = new (raw_rep) CordRepFlat();
rep->tag = AllocatedSizeToTag(size);
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(13, 0)
#pragma GCC diagnostic pop
#endif
return rep;
}
static CordRepFlat* New(size_t len) { return NewImpl<kMaxFlatSize>(len); }
static CordRepFlat* New(Large, size_t len) {
return NewImpl<kMaxLargeFlatSize>(len);
}
// Deletes a CordRepFlat instance created previously through a call to New().
// Flat CordReps are allocated and constructed with raw ::operator new and
// placement new, and must be destructed and deallocated accordingly.
static void Delete(CordRep*rep) {
assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG);
#if defined(__cpp_sized_deallocation)
size_t size = TagToAllocatedSize(rep->tag);
rep->~CordRep();
::operator delete(rep, size);
#else
rep->~CordRep();
::operator delete(rep);
#endif
}
// Create a CordRepFlat containing `data`, with an optional additional
// extra capacity of up to `extra` bytes. Requires that `data.size()`
// is less than kMaxFlatLength.
static CordRepFlat* Create(absl::string_view data, size_t extra = 0) {
assert(data.size() <= kMaxFlatLength);
CordRepFlat* flat = New(data.size() + (std::min)(extra, kMaxFlatLength));
memcpy(flat->Data(), data.data(), data.size());
flat->length = data.size();
return flat;
}
// Returns a pointer to the data inside this flat rep.
char* Data() { return reinterpret_cast<char*>(storage); }
const char* Data() const { return reinterpret_cast<const char*>(storage); }
// Returns the maximum capacity (payload size) of this instance.
size_t Capacity() const { return TagToLength(tag); }
// Returns the allocated size (payload + overhead) of this instance.
size_t AllocatedSize() const { return TagToAllocatedSize(tag); }
};
// Now that CordRepFlat is defined, we can define CordRep's helper casts:
inline CordRepFlat* CordRep::flat() {
assert(tag >= FLAT && tag <= MAX_FLAT_TAG);
return reinterpret_cast<CordRepFlat*>(this);
}
inline const CordRepFlat* CordRep::flat() const {
assert(tag >= FLAT && tag <= MAX_FLAT_TAG);
return reinterpret_cast<const CordRepFlat*>(this);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_

View file

@ -0,0 +1,205 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_
#include <cassert>
#include <memory>
#include <random>
#include <string>
#include <vector>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cordrep_testing {
inline cord_internal::CordRepSubstring* MakeSubstring(
size_t start, size_t len, cord_internal::CordRep* rep) {
auto* sub = new cord_internal::CordRepSubstring;
sub->tag = cord_internal::SUBSTRING;
sub->start = start;
sub->length = len <= 0 ? rep->length - start + len : len;
sub->child = rep;
return sub;
}
inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) {
assert(value.length() <= cord_internal::kMaxFlatLength);
auto* flat = cord_internal::CordRepFlat::New(value.length());
flat->length = value.length();
memcpy(flat->Data(), value.data(), value.length());
return flat;
}
// Creates an external node for testing
inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) {
struct Rep : public cord_internal::CordRepExternal {
std::string s;
explicit Rep(absl::string_view sv) : s(sv) {
this->tag = cord_internal::EXTERNAL;
this->base = s.data();
this->length = s.length();
this->releaser_invoker = [](cord_internal::CordRepExternal* self) {
delete static_cast<Rep*>(self);
};
}
};
return new Rep(s);
}
inline std::string CreateRandomString(size_t n) {
absl::string_view data =
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789~!@#$%^&*()_+=-<>?:\"{}[]|";
std::minstd_rand rnd;
std::uniform_int_distribution<size_t> dist(0, data.size() - 1);
std::string s(n, ' ');
for (size_t i = 0; i < n; ++i) {
s[i] = data[dist(rnd)];
}
return s;
}
// Creates an array of flats from the provided string, chopping
// the provided string up into flats of size `chunk_size` characters
// resulting in roughly `data.size() / chunk_size` total flats.
inline std::vector<cord_internal::CordRep*> CreateFlatsFromString(
absl::string_view data, size_t chunk_size) {
assert(chunk_size > 0);
std::vector<cord_internal::CordRep*> flats;
for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) {
flats.push_back(MakeFlat(s.substr(0, chunk_size)));
}
return flats;
}
inline cord_internal::CordRepBtree* CordRepBtreeFromFlats(
absl::Span<cord_internal::CordRep* const> flats) {
assert(!flats.empty());
auto* node = cord_internal::CordRepBtree::Create(flats[0]);
for (size_t i = 1; i < flats.size(); ++i) {
node = cord_internal::CordRepBtree::Append(node, flats[i]);
}
return node;
}
template <typename Fn>
inline void CordVisitReps(cord_internal::CordRep* rep, Fn&& fn) {
fn(rep);
while (rep->tag == cord_internal::SUBSTRING) {
rep = rep->substring()->child;
fn(rep);
}
if (rep->tag == cord_internal::BTREE) {
for (cord_internal::CordRep* edge : rep->btree()->Edges()) {
CordVisitReps(edge, fn);
}
}
}
template <typename Predicate>
inline std::vector<cord_internal::CordRep*> CordCollectRepsIf(
Predicate&& predicate, cord_internal::CordRep* rep) {
std::vector<cord_internal::CordRep*> reps;
CordVisitReps(rep, [&reps, &predicate](cord_internal::CordRep* rep) {
if (predicate(rep)) reps.push_back(rep);
});
return reps;
}
inline std::vector<cord_internal::CordRep*> CordCollectReps(
cord_internal::CordRep* rep) {
std::vector<cord_internal::CordRep*> reps;
auto fn = [&reps](cord_internal::CordRep* rep) { reps.push_back(rep); };
CordVisitReps(rep, fn);
return reps;
}
inline void CordToString(cord_internal::CordRep* rep, std::string& s) {
size_t offset = 0;
size_t length = rep->length;
while (rep->tag == cord_internal::SUBSTRING) {
offset += rep->substring()->start;
rep = rep->substring()->child;
}
if (rep->tag == cord_internal::BTREE) {
for (cord_internal::CordRep* edge : rep->btree()->Edges()) {
CordToString(edge, s);
}
} else if (rep->tag >= cord_internal::FLAT) {
s.append(rep->flat()->Data() + offset, length);
} else if (rep->tag == cord_internal::EXTERNAL) {
s.append(rep->external()->base + offset, length);
} else {
ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag);
}
}
inline std::string CordToString(cord_internal::CordRep* rep) {
std::string s;
s.reserve(rep->length);
CordToString(rep, s);
return s;
}
// RAII Helper class to automatically unref reps on destruction.
class AutoUnref {
public:
~AutoUnref() {
for (CordRep* rep : unrefs_) CordRep::Unref(rep);
}
// Adds `rep` to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Add(CordRepType* rep) {
unrefs_.push_back(rep);
return rep;
}
// Increments the reference count of `rep` by one, and adds it to
// the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* Ref(CordRepType* rep) {
unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
// Increments the reference count of `rep` by one if `condition` is true,
// and adds it to the list of reps to be unreffed at destruction.
template <typename CordRepType>
CordRepType* RefIf(bool condition, CordRepType* rep) {
if (condition) unrefs_.push_back(CordRep::Ref(rep));
return rep;
}
private:
using CordRep = absl::cord_internal::CordRep;
std::vector<CordRep*> unrefs_;
};
} // namespace cordrep_testing
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_

View file

@ -0,0 +1,102 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_functions.h"
#include <atomic>
#include <cmath>
#include <limits>
#include <random>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/profiling/internal/exponential_biased.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
// The average interval until the next sample. A value of 0 disables profiling
// while a value of 1 will profile all Cords.
std::atomic<int> g_cordz_mean_interval(50000);
} // namespace
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
// Special negative 'not initialized' per thread value for cordz_next_sample.
static constexpr int64_t kInitCordzNextSample = -1;
ABSL_CONST_INIT thread_local SamplingState cordz_next_sample = {
kInitCordzNextSample, 1};
// kIntervalIfDisabled is the number of profile-eligible events need to occur
// before the code will confirm that cordz is still disabled.
constexpr int64_t kIntervalIfDisabled = 1 << 16;
ABSL_ATTRIBUTE_NOINLINE int64_t
cordz_should_profile_slow(SamplingState& state) {
thread_local absl::profiling_internal::ExponentialBiased
exponential_biased_generator;
int32_t mean_interval = get_cordz_mean_interval();
// Check if we disabled profiling. If so, set the next sample to a "large"
// number to minimize the overhead of the should_profile codepath.
if (mean_interval <= 0) {
state = {kIntervalIfDisabled, kIntervalIfDisabled};
return 0;
}
// Check if we're always sampling.
if (mean_interval == 1) {
state = {1, 1};
return 1;
}
if (cordz_next_sample.next_sample <= 0) {
// If first check on current thread, check cordz_should_profile()
// again using the created (initial) stride in cordz_next_sample.
const bool initialized =
cordz_next_sample.next_sample != kInitCordzNextSample;
auto old_stride = state.sample_stride;
auto stride = exponential_biased_generator.GetStride(mean_interval);
state = {stride, stride};
bool should_sample = initialized || cordz_should_profile() > 0;
return should_sample ? old_stride : 0;
}
--state.next_sample;
return 0;
}
void cordz_set_next_sample_for_testing(int64_t next_sample) {
cordz_next_sample = {next_sample, next_sample};
}
#endif // ABSL_INTERNAL_CORDZ_ENABLED
int32_t get_cordz_mean_interval() {
return g_cordz_mean_interval.load(std::memory_order_acquire);
}
void set_cordz_mean_interval(int32_t mean_interval) {
g_cordz_mean_interval.store(mean_interval, std::memory_order_release);
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,87 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_FUNCTIONS_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_FUNCTIONS_H_
#include <stdint.h>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/optimization.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Returns the current sample rate. This represents the average interval
// between samples.
int32_t get_cordz_mean_interval();
// Sets the sample rate with the average interval between samples.
void set_cordz_mean_interval(int32_t mean_interval);
// Cordz is only enabled on Linux with thread_local support.
#if defined(ABSL_INTERNAL_CORDZ_ENABLED)
#error ABSL_INTERNAL_CORDZ_ENABLED cannot be set directly
#elif defined(__linux__) && defined(ABSL_HAVE_THREAD_LOCAL)
#define ABSL_INTERNAL_CORDZ_ENABLED 1
#endif
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
struct SamplingState {
int64_t next_sample;
int64_t sample_stride;
};
// cordz_next_sample is the number of events until the next sample event. If
// the value is 1 or less, the code will check on the next event if cordz is
// enabled, and if so, will sample the Cord. cordz is only enabled when we can
// use thread locals.
ABSL_CONST_INIT extern thread_local SamplingState cordz_next_sample;
// Determines if the next sample should be profiled.
// Returns:
// 0: Do not sample
// >0: Sample with the stride of the last sampling period
int64_t cordz_should_profile_slow(SamplingState& state);
// Determines if the next sample should be profiled.
// Returns:
// 0: Do not sample
// >0: Sample with the stride of the last sampling period
inline int64_t cordz_should_profile() {
if (ABSL_PREDICT_TRUE(cordz_next_sample.next_sample > 1)) {
cordz_next_sample.next_sample--;
return 0;
}
return cordz_should_profile_slow(cordz_next_sample);
}
// Sets the interval until the next sample (for testing only)
void cordz_set_next_sample_for_testing(int64_t next_sample);
#else // ABSL_INTERNAL_CORDZ_ENABLED
inline int64_t cordz_should_profile() { return 0; }
inline void cordz_set_next_sample_for_testing(int64_t) {}
#endif // ABSL_INTERNAL_CORDZ_ENABLED
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_FUNCTIONS_H_

View file

@ -0,0 +1,147 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_functions.h"
#include <thread> // NOLINT we need real clean new threads
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::Eq;
using ::testing::Ge;
using ::testing::Le;
TEST(CordzFunctionsTest, SampleRate) {
int32_t orig_sample_rate = get_cordz_mean_interval();
int32_t expected_sample_rate = 123;
set_cordz_mean_interval(expected_sample_rate);
EXPECT_THAT(get_cordz_mean_interval(), Eq(expected_sample_rate));
set_cordz_mean_interval(orig_sample_rate);
}
// Cordz is disabled when we don't have thread_local. All calls to
// should_profile will return false when cordz is disabled, so we might want to
// avoid those tests.
#ifdef ABSL_INTERNAL_CORDZ_ENABLED
TEST(CordzFunctionsTest, ShouldProfileDisable) {
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(0);
cordz_set_next_sample_for_testing(0);
EXPECT_EQ(cordz_should_profile(), 0);
// 1 << 16 is from kIntervalIfDisabled in cordz_functions.cc.
EXPECT_THAT(cordz_next_sample.next_sample, Eq(1 << 16));
set_cordz_mean_interval(orig_sample_rate);
}
TEST(CordzFunctionsTest, ShouldProfileAlways) {
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(1);
cordz_set_next_sample_for_testing(1);
EXPECT_GT(cordz_should_profile(), 0);
EXPECT_THAT(cordz_next_sample.next_sample, Le(1));
set_cordz_mean_interval(orig_sample_rate);
}
TEST(CordzFunctionsTest, DoesNotAlwaysSampleFirstCord) {
// Set large enough interval such that the chance of 'tons' of threads
// randomly sampling the first call is infinitely small.
set_cordz_mean_interval(10000);
int tries = 0;
bool sampled = false;
do {
++tries;
ASSERT_THAT(tries, Le(1000));
std::thread thread([&sampled] { sampled = cordz_should_profile() > 0; });
thread.join();
} while (sampled);
}
TEST(CordzFunctionsTest, ShouldProfileRate) {
static constexpr int kDesiredMeanInterval = 1000;
static constexpr int kSamples = 10000;
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(kDesiredMeanInterval);
int64_t sum_of_intervals = 0;
for (int i = 0; i < kSamples; i++) {
// Setting next_sample to 0 will force cordz_should_profile to generate a
// new value for next_sample each iteration.
cordz_set_next_sample_for_testing(0);
cordz_should_profile();
sum_of_intervals += cordz_next_sample.next_sample;
}
// The sum of independent exponential variables is an Erlang distribution,
// which is a gamma distribution where the shape parameter is equal to the
// number of summands. The distribution used for cordz_should_profile is
// actually floor(Exponential(1/mean)) which introduces bias. However, we can
// apply the squint-really-hard correction factor. That is, when mean is
// large, then if we squint really hard the shape of the distribution between
// N and N+1 looks like a uniform distribution. On average, each value for
// next_sample will be about 0.5 lower than we would expect from an
// exponential distribution. This squint-really-hard correction approach won't
// work when mean is smaller than about 10 but works fine when mean is 1000.
//
// We can use R to calculate a confidence interval. This
// shows how to generate a confidence interval with a false positive rate of
// one in a billion.
//
// $ R -q
// > mean = 1000
// > kSamples = 10000
// > errorRate = 1e-9
// > correction = -kSamples / 2
// > low = qgamma(errorRate/2, kSamples, 1/mean) + correction
// > high = qgamma(1 - errorRate/2, kSamples, 1/mean) + correction
// > low
// [1] 9396115
// > high
// [1] 10618100
EXPECT_THAT(sum_of_intervals, Ge(9396115));
EXPECT_THAT(sum_of_intervals, Le(10618100));
set_cordz_mean_interval(orig_sample_rate);
}
#else // ABSL_INTERNAL_CORDZ_ENABLED
TEST(CordzFunctionsTest, ShouldProfileDisabled) {
int32_t orig_sample_rate = get_cordz_mean_interval();
set_cordz_mean_interval(1);
cordz_set_next_sample_for_testing(0);
EXPECT_FALSE(cordz_should_profile());
set_cordz_mean_interval(orig_sample_rate);
}
#endif // ABSL_INTERNAL_CORDZ_ENABLED
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,165 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_handle.h"
#include <atomic>
#include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK
#include "absl/base/no_destructor.h"
#include "absl/synchronization/mutex.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
struct Queue {
Queue() = default;
absl::Mutex mutex;
std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr};
// Returns true if this delete queue is empty. This method does not acquire
// the lock, but does a 'load acquire' observation on the delete queue tail.
// It is used inside Delete() to check for the presence of a delete queue
// without holding the lock. The assumption is that the caller is in the
// state of 'being deleted', and can not be newly discovered by a concurrent
// 'being constructed' snapshot instance. Practically, this means that any
// such discovery (`find`, 'first' or 'next', etc) must have proper 'happens
// before / after' semantics and atomic fences.
bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
return dq_tail.load(std::memory_order_acquire) == nullptr;
}
};
static Queue& GlobalQueue() {
static absl::NoDestructor<Queue> global_queue;
return *global_queue;
}
} // namespace
CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) {
Queue& global_queue = GlobalQueue();
if (is_snapshot) {
MutexLock lock(&global_queue.mutex);
CordzHandle* dq_tail = global_queue.dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
dq_prev_ = dq_tail;
dq_tail->dq_next_ = this;
}
global_queue.dq_tail.store(this, std::memory_order_release);
}
}
CordzHandle::~CordzHandle() {
Queue& global_queue = GlobalQueue();
if (is_snapshot_) {
std::vector<CordzHandle*> to_delete;
{
MutexLock lock(&global_queue.mutex);
CordzHandle* next = dq_next_;
if (dq_prev_ == nullptr) {
// We were head of the queue, delete every CordzHandle until we reach
// either the end of the list, or a snapshot handle.
while (next && !next->is_snapshot_) {
to_delete.push_back(next);
next = next->dq_next_;
}
} else {
// Another CordzHandle existed before this one, don't delete anything.
dq_prev_->dq_next_ = next;
}
if (next) {
next->dq_prev_ = dq_prev_;
} else {
global_queue.dq_tail.store(dq_prev_, std::memory_order_release);
}
}
for (CordzHandle* handle : to_delete) {
delete handle;
}
}
}
bool CordzHandle::SafeToDelete() const {
return is_snapshot_ || GlobalQueue().IsEmpty();
}
void CordzHandle::Delete(CordzHandle* handle) {
assert(handle);
if (handle) {
Queue& queue = GlobalQueue();
if (!handle->SafeToDelete()) {
MutexLock lock(&queue.mutex);
CordzHandle* dq_tail = queue.dq_tail.load(std::memory_order_acquire);
if (dq_tail != nullptr) {
handle->dq_prev_ = dq_tail;
dq_tail->dq_next_ = handle;
queue.dq_tail.store(handle, std::memory_order_release);
return;
}
}
delete handle;
}
}
std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() {
std::vector<const CordzHandle*> handles;
Queue& global_queue = GlobalQueue();
MutexLock lock(&global_queue.mutex);
CordzHandle* dq_tail = global_queue.dq_tail.load(std::memory_order_acquire);
for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) {
handles.push_back(p);
}
return handles;
}
bool CordzHandle::DiagnosticsHandleIsSafeToInspect(
const CordzHandle* handle) const {
if (!is_snapshot_) return false;
if (handle == nullptr) return true;
if (handle->is_snapshot_) return false;
bool snapshot_found = false;
Queue& global_queue = GlobalQueue();
MutexLock lock(&global_queue.mutex);
for (const CordzHandle* p = global_queue.dq_tail; p; p = p->dq_prev_) {
if (p == handle) return !snapshot_found;
if (p == this) snapshot_found = true;
}
ABSL_ASSERT(snapshot_found); // Assert that 'this' is in delete queue.
return true;
}
std::vector<const CordzHandle*>
CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() {
std::vector<const CordzHandle*> handles;
if (!is_snapshot()) {
return handles;
}
Queue& global_queue = GlobalQueue();
MutexLock lock(&global_queue.mutex);
for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) {
if (!p->is_snapshot()) {
handles.push_back(p);
}
}
return handles;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,98 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_HANDLE_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_HANDLE_H_
#include <atomic>
#include <vector>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// This base class allows multiple types of object (CordzInfo and
// CordzSampleToken) to exist simultaneously on the delete queue (pointed to by
// global_dq_tail and traversed using dq_prev_ and dq_next_). The
// delete queue guarantees that once a profiler creates a CordzSampleToken and
// has gained visibility into a CordzInfo object, that CordzInfo object will not
// be deleted prematurely. This allows the profiler to inspect all CordzInfo
// objects that are alive without needing to hold a global lock.
class ABSL_DLL CordzHandle {
public:
CordzHandle() : CordzHandle(false) {}
bool is_snapshot() const { return is_snapshot_; }
// Returns true if this instance is safe to be deleted because it is either a
// snapshot, which is always safe to delete, or not included in the global
// delete queue and thus not included in any snapshot.
// Callers are responsible for making sure this instance can not be newly
// discovered by other threads. For example, CordzInfo instances first de-list
// themselves from the global CordzInfo list before determining if they are
// safe to be deleted directly.
// If SafeToDelete returns false, callers MUST use the Delete() method to
// safely queue CordzHandle instances for deletion.
bool SafeToDelete() const;
// Deletes the provided instance, or puts it on the delete queue to be deleted
// once there are no more sample tokens (snapshot) instances potentially
// referencing the instance. `handle` should not be null.
static void Delete(CordzHandle* handle);
// Returns the current entries in the delete queue in LIFO order.
static std::vector<const CordzHandle*> DiagnosticsGetDeleteQueue();
// Returns true if the provided handle is nullptr or guarded by this handle.
// Since the CordzSnapshot token is itself a CordzHandle, this method will
// allow tests to check if that token is keeping an arbitrary CordzHandle
// alive.
bool DiagnosticsHandleIsSafeToInspect(const CordzHandle* handle) const;
// Returns the current entries in the delete queue, in LIFO order, that are
// protected by this. CordzHandle objects are only placed on the delete queue
// after CordzHandle::Delete is called with them as an argument. Only
// CordzHandle objects that are not also CordzSnapshot objects will be
// included in the return vector. For each of the handles in the return
// vector, the earliest that their memory can be freed is when this
// CordzSnapshot object is deleted.
std::vector<const CordzHandle*> DiagnosticsGetSafeToInspectDeletedHandles();
protected:
explicit CordzHandle(bool is_snapshot);
virtual ~CordzHandle();
private:
const bool is_snapshot_;
// dq_prev_ and dq_next_ require the global queue mutex to be held.
// Unfortunately we can't use thread annotations such that the thread safety
// analysis understands that queue_ and global_queue_ are one and the same.
CordzHandle* dq_prev_ = nullptr;
CordzHandle* dq_next_ = nullptr;
};
class CordzSnapshot : public CordzHandle {
public:
CordzSnapshot() : CordzHandle(true) {}
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_HANDLE_H_

View file

@ -0,0 +1,265 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_handle.h"
#include <random>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/memory/memory.h"
#include "absl/synchronization/internal/thread_pool.h"
#include "absl/synchronization/notification.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::ElementsAre;
using ::testing::Gt;
using ::testing::IsEmpty;
using ::testing::SizeIs;
// Local less verbose helper
std::vector<const CordzHandle*> DeleteQueue() {
return CordzHandle::DiagnosticsGetDeleteQueue();
}
struct CordzHandleDeleteTracker : public CordzHandle {
bool* deleted;
explicit CordzHandleDeleteTracker(bool* deleted) : deleted(deleted) {}
~CordzHandleDeleteTracker() override { *deleted = true; }
};
TEST(CordzHandleTest, DeleteQueueIsEmpty) {
EXPECT_THAT(DeleteQueue(), SizeIs(0));
}
TEST(CordzHandleTest, CordzHandleCreateDelete) {
bool deleted = false;
auto* handle = new CordzHandleDeleteTracker(&deleted);
EXPECT_FALSE(handle->is_snapshot());
EXPECT_TRUE(handle->SafeToDelete());
EXPECT_THAT(DeleteQueue(), SizeIs(0));
CordzHandle::Delete(handle);
EXPECT_THAT(DeleteQueue(), SizeIs(0));
EXPECT_TRUE(deleted);
}
TEST(CordzHandleTest, CordzSnapshotCreateDelete) {
auto* snapshot = new CordzSnapshot();
EXPECT_TRUE(snapshot->is_snapshot());
EXPECT_TRUE(snapshot->SafeToDelete());
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot));
delete snapshot;
EXPECT_THAT(DeleteQueue(), SizeIs(0));
}
TEST(CordzHandleTest, CordzHandleCreateDeleteWithSnapshot) {
bool deleted = false;
auto* snapshot = new CordzSnapshot();
auto* handle = new CordzHandleDeleteTracker(&deleted);
EXPECT_FALSE(handle->SafeToDelete());
CordzHandle::Delete(handle);
EXPECT_THAT(DeleteQueue(), ElementsAre(handle, snapshot));
EXPECT_FALSE(deleted);
EXPECT_FALSE(handle->SafeToDelete());
delete snapshot;
EXPECT_THAT(DeleteQueue(), SizeIs(0));
EXPECT_TRUE(deleted);
}
TEST(CordzHandleTest, MultiSnapshot) {
bool deleted[3] = {false, false, false};
CordzSnapshot* snapshot[3];
CordzHandleDeleteTracker* handle[3];
for (int i = 0; i < 3; ++i) {
snapshot[i] = new CordzSnapshot();
handle[i] = new CordzHandleDeleteTracker(&deleted[i]);
CordzHandle::Delete(handle[i]);
}
EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1],
snapshot[1], handle[0], snapshot[0]));
EXPECT_THAT(deleted, ElementsAre(false, false, false));
delete snapshot[1];
EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1],
handle[0], snapshot[0]));
EXPECT_THAT(deleted, ElementsAre(false, false, false));
delete snapshot[0];
EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2]));
EXPECT_THAT(deleted, ElementsAre(true, true, false));
delete snapshot[2];
EXPECT_THAT(DeleteQueue(), SizeIs(0));
EXPECT_THAT(deleted, ElementsAre(true, true, deleted));
}
TEST(CordzHandleTest, DiagnosticsHandleIsSafeToInspect) {
CordzSnapshot snapshot1;
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(nullptr));
auto* handle1 = new CordzHandle();
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
CordzHandle::Delete(handle1);
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
CordzSnapshot snapshot2;
auto* handle2 = new CordzHandle();
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle2));
EXPECT_FALSE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle1));
EXPECT_TRUE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle2));
CordzHandle::Delete(handle2);
EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1));
}
TEST(CordzHandleTest, DiagnosticsGetSafeToInspectDeletedHandles) {
EXPECT_THAT(DeleteQueue(), IsEmpty());
auto* handle = new CordzHandle();
auto* snapshot1 = new CordzSnapshot();
// snapshot1 should be able to see handle.
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot1));
EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle));
EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(),
IsEmpty());
// This handle will be safe to inspect as long as snapshot1 is alive. However,
// since only snapshot1 can prove that it's alive, it will be hidden from
// snapshot2.
CordzHandle::Delete(handle);
// This snapshot shouldn't be able to see handle because handle was already
// sent to Delete.
auto* snapshot2 = new CordzSnapshot();
// DeleteQueue elements are LIFO order.
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2, handle, snapshot1));
EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle));
EXPECT_FALSE(snapshot2->DiagnosticsHandleIsSafeToInspect(handle));
EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(),
ElementsAre(handle));
EXPECT_THAT(snapshot2->DiagnosticsGetSafeToInspectDeletedHandles(),
IsEmpty());
CordzHandle::Delete(snapshot1);
EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2));
CordzHandle::Delete(snapshot2);
EXPECT_THAT(DeleteQueue(), IsEmpty());
}
// Create and delete CordzHandle and CordzSnapshot objects in multiple threads
// so that tsan has some time to chew on it and look for memory problems.
TEST(CordzHandleTest, MultiThreaded) {
Notification stop;
static constexpr int kNumThreads = 4;
// Keep the number of handles relatively small so that the test will naturally
// transition to an empty delete queue during the test. If there are, say, 100
// handles, that will virtually never happen. With 10 handles and around 50k
// iterations in each of 4 threads, the delete queue appears to become empty
// around 200 times.
static constexpr int kNumHandles = 10;
// Each thread is going to pick a random index and atomically swap its
// CordzHandle with one in handles. This way, each thread can avoid
// manipulating a CordzHandle that might be operated upon in another thread.
std::vector<std::atomic<CordzHandle*>> handles(kNumHandles);
// global bool which is set when any thread did get some 'safe to inspect'
// handles. On some platforms and OSS tests, we might risk that some pool
// threads are starved, stalled, or just got a few unlikely random 'handle'
// coin tosses, so we satisfy this test with simply observing 'some' thread
// did something meaningful, which should minimize the potential for flakes.
std::atomic<bool> found_safe_to_inspect(false);
{
absl::synchronization_internal::ThreadPool pool(kNumThreads);
for (int i = 0; i < kNumThreads; ++i) {
pool.Schedule([&stop, &handles, &found_safe_to_inspect]() {
std::minstd_rand gen;
std::uniform_int_distribution<int> dist_type(0, 2);
std::uniform_int_distribution<int> dist_handle(0, kNumHandles - 1);
while (!stop.HasBeenNotified()) {
CordzHandle* handle;
switch (dist_type(gen)) {
case 0:
handle = new CordzHandle();
break;
case 1:
handle = new CordzSnapshot();
break;
default:
handle = nullptr;
break;
}
CordzHandle* old_handle = handles[dist_handle(gen)].exchange(handle);
if (old_handle != nullptr) {
std::vector<const CordzHandle*> safe_to_inspect =
old_handle->DiagnosticsGetSafeToInspectDeletedHandles();
for (const CordzHandle* handle : safe_to_inspect) {
// We're in a tight loop, so don't generate too many error
// messages.
ASSERT_FALSE(handle->is_snapshot());
}
if (!safe_to_inspect.empty()) {
found_safe_to_inspect.store(true);
}
CordzHandle::Delete(old_handle);
}
}
// Have each thread attempt to clean up everything. Some thread will be
// the last to reach this cleanup code, and it will be guaranteed to
// clean up everything because nothing remains to create new handles.
for (auto& h : handles) {
if (CordzHandle* handle = h.exchange(nullptr)) {
CordzHandle::Delete(handle);
}
}
});
}
// The threads will hammer away. Give it a little bit of time for tsan to
// spot errors.
absl::SleepFor(absl::Seconds(3));
stop.Notify();
}
// Confirm that the test did *something*. This check will be satisfied as
// long as any thread has deleted a CordzSnapshot object and a non-snapshot
// CordzHandle was deleted after the CordzSnapshot was created.
// See also comments on `found_safe_to_inspect`
EXPECT_TRUE(found_safe_to_inspect.load());
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,422 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_info.h"
#include <cstdint>
#include "absl/base/config.h"
#include "absl/base/internal/spinlock.h"
#include "absl/container/inlined_vector.h"
#include "absl/debugging/stacktrace.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/mutex.h"
#include "absl/time/clock.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
constexpr size_t CordzInfo::kMaxStackDepth;
#endif
ABSL_CONST_INIT CordzInfo::List CordzInfo::global_list_{absl::kConstInit};
namespace {
// CordRepAnalyzer performs the analysis of a cord.
//
// It computes absolute node counts and total memory usage, and an 'estimated
// fair share memory usage` statistic.
// Conceptually, it divides the 'memory usage' at each location in the 'cord
// graph' by the cumulative reference count of that location. The cumulative
// reference count is the factored total of all edges leading into that node.
//
// The top level node is treated specially: we assume the current thread
// (typically called from the CordzHandler) to hold a reference purely to
// perform a safe analysis, and not being part of the application. So we
// subtract 1 from the reference count of the top node to compute the
// 'application fair share' excluding the reference of the current thread.
//
// An example of fair sharing, and why we multiply reference counts:
// Assume we have 2 CordReps, both being a Substring referencing a Flat:
// CordSubstring A (refcount = 5) --> child Flat C (refcount = 2)
// CordSubstring B (refcount = 9) --> child Flat C (refcount = 2)
//
// Flat C has 2 incoming edges from the 2 substrings (refcount = 2) and is not
// referenced directly anywhere else. Translated into a 'fair share', we then
// attribute 50% of the memory (memory / refcount = 2) to each incoming edge.
// Rep A has a refcount of 5, so we attribute each incoming edge 1 / 5th of the
// memory cost below it, i.e.: the fair share of Rep A of the memory used by C
// is then 'memory C / (refcount C * refcount A) + (memory A / refcount A)'.
// It is also easy to see how all incoming edges add up to 100%.
class CordRepAnalyzer {
public:
// Creates an analyzer instance binding to `statistics`.
explicit CordRepAnalyzer(CordzStatistics& statistics)
: statistics_(statistics) {}
// Analyzes the memory statistics and node counts for the provided `rep`, and
// adds the results to `statistics`. Note that node counts and memory sizes
// are not initialized, computed values are added to any existing values.
void AnalyzeCordRep(const CordRep* rep) {
ABSL_ASSERT(rep != nullptr);
// Process all linear nodes.
// As per the class comments, use refcout - 1 on the top level node, as the
// top level node is assumed to be referenced only for analysis purposes.
size_t refcount = rep->refcount.Get();
RepRef repref{rep, (refcount > 1) ? refcount - 1 : 1};
// Process the top level CRC node, if present.
if (repref.tag() == CRC) {
statistics_.node_count++;
statistics_.node_counts.crc++;
memory_usage_.Add(sizeof(CordRepCrc), repref.refcount);
repref = repref.Child(repref.rep->crc()->child);
}
// Process all top level linear nodes (substrings and flats).
repref = CountLinearReps(repref, memory_usage_);
switch (repref.tag()) {
case CordRepKind::BTREE:
AnalyzeBtree(repref);
break;
default:
// We should have a btree node if not null.
ABSL_ASSERT(repref.tag() == CordRepKind::UNUSED_0);
break;
}
// Adds values to output
statistics_.estimated_memory_usage += memory_usage_.total;
statistics_.estimated_fair_share_memory_usage +=
static_cast<size_t>(memory_usage_.fair_share);
}
private:
// RepRef identifies a CordRep* inside the Cord tree with its cumulative
// refcount including itself. For example, a tree consisting of a substring
// with a refcount of 3 and a child flat with a refcount of 4 will have RepRef
// refcounts of 3 and 12 respectively.
struct RepRef {
const CordRep* rep;
size_t refcount;
// Returns a 'child' RepRef which contains the cumulative reference count
// of this instance multiplied by the child's reference count. Returns a
// nullptr RepRef value with a refcount of 0 if `child` is nullptr.
RepRef Child(const CordRep* child) const {
if (child == nullptr) return RepRef{nullptr, 0};
return RepRef{child, refcount * child->refcount.Get()};
}
// Returns the tag of this rep, or UNUSED_0 if this instance is null
constexpr CordRepKind tag() const {
ABSL_ASSERT(rep == nullptr || rep->tag != CordRepKind::UNUSED_0);
return rep ? static_cast<CordRepKind>(rep->tag) : CordRepKind::UNUSED_0;
}
};
// Memory usage values
struct MemoryUsage {
size_t total = 0;
double fair_share = 0.0;
// Adds 'size` memory usage to this class, with a cumulative (recursive)
// reference count of `refcount`
void Add(size_t size, size_t refcount) {
total += size;
fair_share += static_cast<double>(size) / refcount;
}
};
// Counts a flat of the provide allocated size
void CountFlat(size_t size) {
statistics_.node_count++;
statistics_.node_counts.flat++;
if (size <= 64) {
statistics_.node_counts.flat_64++;
} else if (size <= 128) {
statistics_.node_counts.flat_128++;
} else if (size <= 256) {
statistics_.node_counts.flat_256++;
} else if (size <= 512) {
statistics_.node_counts.flat_512++;
} else if (size <= 1024) {
statistics_.node_counts.flat_1k++;
}
}
// Processes 'linear' reps (substring, flat, external) not requiring iteration
// or recursion. Returns RefRep{null} if all reps were processed, else returns
// the top-most non-linear concat or ring cordrep.
// Node counts are updated into `statistics_`, memory usage is update into
// `memory_usage`, which typically references `memory_usage_` except for ring
// buffers where we count children unrounded.
RepRef CountLinearReps(RepRef rep, MemoryUsage& memory_usage) {
// Consume all substrings
while (rep.tag() == SUBSTRING) {
statistics_.node_count++;
statistics_.node_counts.substring++;
memory_usage.Add(sizeof(CordRepSubstring), rep.refcount);
rep = rep.Child(rep.rep->substring()->child);
}
// Consume possible FLAT
if (rep.tag() >= FLAT) {
size_t size = rep.rep->flat()->AllocatedSize();
CountFlat(size);
memory_usage.Add(size, rep.refcount);
return RepRef{nullptr, 0};
}
// Consume possible external
if (rep.tag() == EXTERNAL) {
statistics_.node_count++;
statistics_.node_counts.external++;
size_t size = rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>);
memory_usage.Add(size, rep.refcount);
return RepRef{nullptr, 0};
}
return rep;
}
// Analyzes the provided btree.
void AnalyzeBtree(RepRef rep) {
statistics_.node_count++;
statistics_.node_counts.btree++;
memory_usage_.Add(sizeof(CordRepBtree), rep.refcount);
const CordRepBtree* tree = rep.rep->btree();
if (tree->height() > 0) {
for (CordRep* edge : tree->Edges()) {
AnalyzeBtree(rep.Child(edge));
}
} else {
for (CordRep* edge : tree->Edges()) {
CountLinearReps(rep.Child(edge), memory_usage_);
}
}
}
CordzStatistics& statistics_;
MemoryUsage memory_usage_;
};
} // namespace
CordzInfo* CordzInfo::Head(const CordzSnapshot& snapshot) {
ABSL_ASSERT(snapshot.is_snapshot());
// We can do an 'unsafe' load of 'head', as we are guaranteed that the
// instance it points to is kept alive by the provided CordzSnapshot, so we
// can simply return the current value using an acquire load.
// We do enforce in DEBUG builds that the 'head' value is present in the
// delete queue: ODR violations may lead to 'snapshot' and 'global_list_'
// being in different libraries / modules.
CordzInfo* head = global_list_.head.load(std::memory_order_acquire);
ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(head));
return head;
}
CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const {
ABSL_ASSERT(snapshot.is_snapshot());
// Similar to the 'Head()' function, we do not need a mutex here.
CordzInfo* next = ci_next_.load(std::memory_order_acquire);
ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(this));
ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(next));
return next;
}
void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method,
int64_t sampling_stride) {
assert(cord.is_tree());
assert(!cord.is_profiled());
CordzInfo* cordz_info =
new CordzInfo(cord.as_tree(), nullptr, method, sampling_stride);
cord.set_cordz_info(cordz_info);
cordz_info->Track();
}
void CordzInfo::TrackCord(InlineData& cord, const InlineData& src,
MethodIdentifier method) {
assert(cord.is_tree());
assert(src.is_tree());
// Unsample current as we the current cord is being replaced with 'src',
// so any method history is no longer relevant.
CordzInfo* cordz_info = cord.cordz_info();
if (cordz_info != nullptr) cordz_info->Untrack();
// Start new cord sample
cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method,
src.cordz_info()->sampling_stride());
cord.set_cordz_info(cordz_info);
cordz_info->Track();
}
void CordzInfo::MaybeTrackCordImpl(InlineData& cord, const InlineData& src,
MethodIdentifier method) {
if (src.is_profiled()) {
TrackCord(cord, src, method);
} else if (cord.is_profiled()) {
cord.cordz_info()->Untrack();
cord.clear_cordz_info();
}
}
CordzInfo::MethodIdentifier CordzInfo::GetParentMethod(const CordzInfo* src) {
if (src == nullptr) return MethodIdentifier::kUnknown;
return src->parent_method_ != MethodIdentifier::kUnknown ? src->parent_method_
: src->method_;
}
size_t CordzInfo::FillParentStack(const CordzInfo* src, void** stack) {
assert(stack);
if (src == nullptr) return 0;
if (src->parent_stack_depth_) {
memcpy(stack, src->parent_stack_, src->parent_stack_depth_ * sizeof(void*));
return src->parent_stack_depth_;
}
memcpy(stack, src->stack_, src->stack_depth_ * sizeof(void*));
return src->stack_depth_;
}
CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src,
MethodIdentifier method, int64_t sampling_stride)
: rep_(rep),
stack_depth_(
static_cast<size_t>(absl::GetStackTrace(stack_,
/*max_depth=*/kMaxStackDepth,
/*skip_count=*/1))),
parent_stack_depth_(FillParentStack(src, parent_stack_)),
method_(method),
parent_method_(GetParentMethod(src)),
create_time_(absl::Now()),
sampling_stride_(sampling_stride) {
update_tracker_.LossyAdd(method);
if (src) {
// Copy parent counters.
update_tracker_.LossyAdd(src->update_tracker_);
}
}
CordzInfo::~CordzInfo() {
// `rep_` is potentially kept alive if CordzInfo is included
// in a collection snapshot (which should be rare).
if (ABSL_PREDICT_FALSE(rep_)) {
CordRep::Unref(rep_);
}
}
void CordzInfo::Track() {
SpinLockHolder l(&list_->mutex);
CordzInfo* const head = list_->head.load(std::memory_order_acquire);
if (head != nullptr) {
head->ci_prev_.store(this, std::memory_order_release);
}
ci_next_.store(head, std::memory_order_release);
list_->head.store(this, std::memory_order_release);
}
void CordzInfo::Untrack() {
ODRCheck();
{
SpinLockHolder l(&list_->mutex);
CordzInfo* const head = list_->head.load(std::memory_order_acquire);
CordzInfo* const next = ci_next_.load(std::memory_order_acquire);
CordzInfo* const prev = ci_prev_.load(std::memory_order_acquire);
if (next) {
ABSL_ASSERT(next->ci_prev_.load(std::memory_order_acquire) == this);
next->ci_prev_.store(prev, std::memory_order_release);
}
if (prev) {
ABSL_ASSERT(head != this);
ABSL_ASSERT(prev->ci_next_.load(std::memory_order_acquire) == this);
prev->ci_next_.store(next, std::memory_order_release);
} else {
ABSL_ASSERT(head == this);
list_->head.store(next, std::memory_order_release);
}
}
// We can no longer be discovered: perform a fast path check if we are not
// listed on any delete queue, so we can directly delete this instance.
if (SafeToDelete()) {
UnsafeSetCordRep(nullptr);
delete this;
return;
}
// We are likely part of a snapshot, extend the life of the CordRep
{
absl::MutexLock lock(&mutex_);
if (rep_) CordRep::Ref(rep_);
}
CordzHandle::Delete(this);
}
void CordzInfo::Lock(MethodIdentifier method)
ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_) {
mutex_.Lock();
update_tracker_.LossyAdd(method);
assert(rep_);
}
void CordzInfo::Unlock() ABSL_UNLOCK_FUNCTION(mutex_) {
bool tracked = rep_ != nullptr;
mutex_.Unlock();
if (!tracked) {
Untrack();
}
}
absl::Span<void* const> CordzInfo::GetStack() const {
return absl::MakeConstSpan(stack_, stack_depth_);
}
absl::Span<void* const> CordzInfo::GetParentStack() const {
return absl::MakeConstSpan(parent_stack_, parent_stack_depth_);
}
CordzStatistics CordzInfo::GetCordzStatistics() const {
CordzStatistics stats;
stats.method = method_;
stats.parent_method = parent_method_;
stats.update_tracker = update_tracker_;
if (CordRep* rep = RefCordRep()) {
stats.size = rep->length;
CordRepAnalyzer analyzer(stats);
analyzer.AnalyzeCordRep(rep);
CordRep::Unref(rep);
}
return stats;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,303 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_INFO_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_INFO_H_
#include <atomic>
#include <cstdint>
#include <functional>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/spinlock.h"
#include "absl/base/thread_annotations.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cordz_functions.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzInfo tracks a profiled Cord. Each of these objects can be in two places.
// If a Cord is alive, the CordzInfo will be in the global_cordz_infos map, and
// can also be retrieved via the linked list starting with
// global_cordz_infos_head and continued via the cordz_info_next() method. When
// a Cord has reached the end of its lifespan, the CordzInfo object will be
// migrated out of the global_cordz_infos list and the global_cordz_infos_map,
// and will either be deleted or appended to the global_delete_queue. If it is
// placed on the global_delete_queue, the CordzInfo object will be cleaned in
// the destructor of a CordzSampleToken object.
class ABSL_LOCKABLE CordzInfo : public CordzHandle {
public:
using MethodIdentifier = CordzUpdateTracker::MethodIdentifier;
// TrackCord creates a CordzInfo instance which tracks important metrics of
// a sampled cord, and stores the created CordzInfo instance into `cord'. All
// CordzInfo instances are placed in a global list which is used to discover
// and snapshot all actively tracked cords. Callers are responsible for
// calling UntrackCord() before the tracked Cord instance is deleted, or to
// stop tracking the sampled Cord. Callers are also responsible for guarding
// changes to the 'tree' value of a Cord (InlineData.tree) through the Lock()
// and Unlock() calls. Any change resulting in a new tree value for the cord
// requires a call to SetCordRep() before the old tree has been unreffed
// and/or deleted. `method` identifies the Cord public API method initiating
// the cord to be sampled.
// Requires `cord` to hold a tree, and `cord.cordz_info()` to be null.
static void TrackCord(InlineData& cord, MethodIdentifier method,
int64_t sampling_stride);
// Identical to TrackCord(), except that this function fills the
// `parent_stack` and `parent_method` properties of the returned CordzInfo
// instance from the provided `src` instance if `src` is sampled.
// This function should be used for sampling 'copy constructed' and 'copy
// assigned' cords. This function allows 'cord` to be already sampled, in
// which case the CordzInfo will be newly created from `src`.
static void TrackCord(InlineData& cord, const InlineData& src,
MethodIdentifier method);
// Maybe sample the cord identified by 'cord' for method 'method'.
// Uses `cordz_should_profile` to randomly pick cords to be sampled, and if
// so, invokes `TrackCord` to start sampling `cord`.
static void MaybeTrackCord(InlineData& cord, MethodIdentifier method);
// Maybe sample the cord identified by 'cord' for method 'method'.
// `src` identifies a 'parent' cord which is assigned to `cord`, typically the
// input cord for a copy constructor, or an assign method such as `operator=`
// `cord` will be sampled if (and only if) `src` is sampled.
// If `cord` is currently being sampled and `src` is not being sampled, then
// this function will stop sampling the cord and reset the cord's cordz_info.
//
// Previously this function defined that `cord` will be sampled if either
// `src` is sampled, or if `cord` is randomly picked for sampling. However,
// this can cause issues, as there may be paths where some cord is assigned an
// indirect copy of it's own value. As such a 'string of copies' would then
// remain sampled (`src.is_profiled`), then assigning such a cord back to
// 'itself' creates a cycle where the cord will converge to 'always sampled`.
//
// For example:
//
// Cord x;
// for (...) {
// // Copy ctor --> y.is_profiled := x.is_profiled | random(...)
// Cord y = x;
// ...
// // Assign x = y --> x.is_profiled = y.is_profiled | random(...)
// // ==> x.is_profiled |= random(...)
// // ==> x converges to 'always profiled'
// x = y;
// }
static void MaybeTrackCord(InlineData& cord, const InlineData& src,
MethodIdentifier method);
// Stops tracking changes for a sampled cord, and deletes the provided info.
// This function must be called before the sampled cord instance is deleted,
// and before the root cordrep of the sampled cord is unreffed.
// This function may extend the lifetime of the cordrep in cases where the
// CordInfo instance is being held by a concurrent collection thread.
void Untrack();
// Invokes UntrackCord() on `info` if `info` is not null.
static void MaybeUntrackCord(CordzInfo* info);
CordzInfo() = delete;
CordzInfo(const CordzInfo&) = delete;
CordzInfo& operator=(const CordzInfo&) = delete;
// Retrieves the oldest existing CordzInfo.
static CordzInfo* Head(const CordzSnapshot& snapshot)
ABSL_NO_THREAD_SAFETY_ANALYSIS;
// Retrieves the next oldest existing CordzInfo older than 'this' instance.
CordzInfo* Next(const CordzSnapshot& snapshot) const
ABSL_NO_THREAD_SAFETY_ANALYSIS;
// Locks this instance for the update identified by `method`.
// Increases the count for `method` in `update_tracker`.
void Lock(MethodIdentifier method) ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_);
// Unlocks this instance. If the contained `rep` has been set to null
// indicating the Cord has been cleared or is otherwise no longer sampled,
// then this method will delete this CordzInfo instance.
void Unlock() ABSL_UNLOCK_FUNCTION(mutex_);
// Asserts that this CordzInfo instance is locked.
void AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_);
// Updates the `rep` property of this instance. This methods is invoked by
// Cord logic each time the root node of a sampled Cord changes, and before
// the old root reference count is deleted. This guarantees that collection
// code can always safely take a reference on the tracked cord.
// Requires a lock to be held through the `Lock()` method.
// TODO(b/117940323): annotate with ABSL_EXCLUSIVE_LOCKS_REQUIRED once all
// Cord code is in a state where this can be proven true by the compiler.
void SetCordRep(CordRep* rep);
// Returns the current `rep` property of this instance with a reference
// added, or null if this instance represents a cord that has since been
// deleted or untracked.
CordRep* RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_);
// Returns the current value of `rep_` for testing purposes only.
CordRep* GetCordRepForTesting() const ABSL_NO_THREAD_SAFETY_ANALYSIS {
return rep_;
}
// Sets the current value of `rep_` for testing purposes only.
void SetCordRepForTesting(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS {
rep_ = rep;
}
// Returns the stack trace for where the cord was first sampled. Cords are
// potentially sampled when they promote from an inlined cord to a tree or
// ring representation, which is not necessarily the location where the cord
// was first created. Some cords are created as inlined cords, and only as
// data is added do they become a non-inlined cord. However, typically the
// location represents reasonably well where the cord is 'created'.
absl::Span<void* const> GetStack() const;
// Returns the stack trace for a sampled cord's 'parent stack trace'. This
// value may be set if the cord is sampled (promoted) after being created
// from, or being assigned the value of an existing (sampled) cord.
absl::Span<void* const> GetParentStack() const;
// Retrieves the CordzStatistics associated with this Cord. The statistics
// are only updated when a Cord goes through a mutation, such as an Append
// or RemovePrefix.
CordzStatistics GetCordzStatistics() const;
int64_t sampling_stride() const { return sampling_stride_; }
private:
using SpinLock = absl::base_internal::SpinLock;
using SpinLockHolder = ::absl::base_internal::SpinLockHolder;
// Global cordz info list. CordzInfo stores a pointer to the global list
// instance to harden against ODR violations.
struct List {
constexpr explicit List(absl::ConstInitType)
: mutex(absl::kConstInit,
absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {}
SpinLock mutex;
std::atomic<CordzInfo*> head ABSL_GUARDED_BY(mutex){nullptr};
};
static constexpr size_t kMaxStackDepth = 64;
explicit CordzInfo(CordRep* rep, const CordzInfo* src,
MethodIdentifier method, int64_t weight);
~CordzInfo() override;
// Sets `rep_` without holding a lock.
void UnsafeSetCordRep(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS;
void Track();
// Returns the parent method from `src`, which is either `parent_method_` or
// `method_` depending on `parent_method_` being kUnknown.
// Returns kUnknown if `src` is null.
static MethodIdentifier GetParentMethod(const CordzInfo* src);
// Fills the provided stack from `src`, copying either `parent_stack_` or
// `stack_` depending on `parent_stack_` being empty, returning the size of
// the parent stack.
// Returns 0 if `src` is null.
static size_t FillParentStack(const CordzInfo* src, void** stack);
void ODRCheck() const {
#ifndef NDEBUG
ABSL_RAW_CHECK(list_ == &global_list_, "ODR violation in Cord");
#endif
}
// Non-inlined implementation of `MaybeTrackCord`, which is executed if
// either `src` is sampled or `cord` is sampled, and either untracks or
// tracks `cord` as documented per `MaybeTrackCord`.
static void MaybeTrackCordImpl(InlineData& cord, const InlineData& src,
MethodIdentifier method);
ABSL_CONST_INIT static List global_list_;
List* const list_ = &global_list_;
// ci_prev_ and ci_next_ require the global list mutex to be held.
// Unfortunately we can't use thread annotations such that the thread safety
// analysis understands that list_ and global_list_ are one and the same.
std::atomic<CordzInfo*> ci_prev_{nullptr};
std::atomic<CordzInfo*> ci_next_{nullptr};
mutable absl::Mutex mutex_;
CordRep* rep_ ABSL_GUARDED_BY(mutex_);
void* stack_[kMaxStackDepth];
void* parent_stack_[kMaxStackDepth];
const size_t stack_depth_;
const size_t parent_stack_depth_;
const MethodIdentifier method_;
const MethodIdentifier parent_method_;
CordzUpdateTracker update_tracker_;
const absl::Time create_time_;
const int64_t sampling_stride_;
};
inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord(
InlineData& cord, MethodIdentifier method) {
auto stride = cordz_should_profile();
if (ABSL_PREDICT_FALSE(stride > 0)) {
TrackCord(cord, method, stride);
}
}
inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord(
InlineData& cord, const InlineData& src, MethodIdentifier method) {
if (ABSL_PREDICT_FALSE(InlineData::is_either_profiled(cord, src))) {
MaybeTrackCordImpl(cord, src, method);
}
}
inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeUntrackCord(
CordzInfo* info) {
if (ABSL_PREDICT_FALSE(info)) {
info->Untrack();
}
}
inline void CordzInfo::AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_) {
#ifndef NDEBUG
mutex_.AssertHeld();
#endif
}
inline void CordzInfo::SetCordRep(CordRep* rep) {
AssertHeld();
rep_ = rep;
}
inline void CordzInfo::UnsafeSetCordRep(CordRep* rep) { rep_ = rep; }
inline CordRep* CordzInfo::RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_) {
MutexLock lock(&mutex_);
return rep_ ? CordRep::Ref(rep_) : nullptr;
}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_INFO_H_

View file

@ -0,0 +1,510 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <random>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/crc/internal/crc_cord_state.h"
#include "absl/strings/cord.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cord_rep_btree.h"
#include "absl/strings/internal/cord_rep_crc.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_sample_token.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_scope.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/synchronization/internal/thread_pool.h"
#include "absl/synchronization/notification.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// Do not print statistics contents, the matcher prints them as needed.
inline void PrintTo(const CordzStatistics& stats, std::ostream* s) {
if (s) *s << "CordzStatistics{...}";
}
namespace {
using ::testing::Ge;
// Creates a flat of the specified allocated size
CordRepFlat* Flat(size_t size) {
// Round up to a tag size, as we are going to poke an exact tag size back into
// the allocated flat. 'size returning allocators' could grant us more than we
// wanted, but we are ok to poke the 'requested' size in the tag, even in the
// presence of sized deletes, so we need to make sure the size rounds
// perfectly to a tag value.
assert(size >= kMinFlatSize);
size = RoundUpForTag(size);
CordRepFlat* flat = CordRepFlat::New(size - kFlatOverhead);
flat->tag = AllocatedSizeToTag(size);
flat->length = size - kFlatOverhead;
return flat;
}
// Creates an external of the specified length
CordRepExternal* External(size_t length = 512) {
return static_cast<CordRepExternal*>(
NewExternalRep(absl::string_view("", length), [](absl::string_view) {}));
}
// Creates a substring on the provided rep of length - 1
CordRepSubstring* Substring(CordRep* rep) {
auto* substring = new CordRepSubstring;
substring->length = rep->length - 1;
substring->tag = SUBSTRING;
substring->child = rep;
return substring;
}
// Reference count helper
struct RefHelper {
std::vector<CordRep*> refs;
~RefHelper() {
for (CordRep* rep : refs) {
CordRep::Unref(rep);
}
}
// Invokes CordRep::Unref() on `rep` when this instance is destroyed.
template <typename T>
T* NeedsUnref(T* rep) {
refs.push_back(rep);
return rep;
}
// Adds `n` reference counts to `rep` which will be unreffed when this
// instance is destroyed.
template <typename T>
T* Ref(T* rep, size_t n = 1) {
while (n--) {
NeedsUnref(CordRep::Ref(rep));
}
return rep;
}
};
// Sizeof helper. Returns the allocated size of `p`, excluding any child
// elements for substring, concat and ring cord reps.
template <typename T>
size_t SizeOf(const T* rep) {
return sizeof(T);
}
template <>
size_t SizeOf(const CordRepFlat* rep) {
return rep->AllocatedSize();
}
template <>
size_t SizeOf(const CordRepExternal* rep) {
// See cord.cc
return sizeof(CordRepExternalImpl<intptr_t>) + rep->length;
}
// Computes fair share memory used in a naive 'we dare to recurse' way.
double FairShareImpl(CordRep* rep, size_t ref) {
double self = 0.0, children = 0.0;
ref *= rep->refcount.Get();
if (rep->tag >= FLAT) {
self = SizeOf(rep->flat());
} else if (rep->tag == EXTERNAL) {
self = SizeOf(rep->external());
} else if (rep->tag == SUBSTRING) {
self = SizeOf(rep->substring());
children = FairShareImpl(rep->substring()->child, ref);
} else if (rep->tag == BTREE) {
self = SizeOf(rep->btree());
for (CordRep*edge : rep->btree()->Edges()) {
children += FairShareImpl(edge, ref);
}
} else {
assert(false);
}
return self / ref + children;
}
// Returns the fair share memory size from `ShareFhareImpl()` as a size_t.
size_t FairShare(CordRep* rep, size_t ref = 1) {
return static_cast<size_t>(FairShareImpl(rep, ref));
}
// Samples the cord and returns CordzInfo::GetStatistics()
CordzStatistics SampleCord(CordRep* rep) {
InlineData cord(rep);
CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown, 1);
CordzStatistics stats = cord.cordz_info()->GetCordzStatistics();
cord.cordz_info()->Untrack();
return stats;
}
MATCHER_P(EqStatistics, stats, "Statistics equal expected values") {
bool ok = true;
#define STATS_MATCHER_EXPECT_EQ(member) \
if (stats.member != arg.member) { \
*result_listener << "\n stats." << #member \
<< ": actual = " << arg.member << ", expected " \
<< stats.member; \
ok = false; \
}
STATS_MATCHER_EXPECT_EQ(size);
STATS_MATCHER_EXPECT_EQ(node_count);
STATS_MATCHER_EXPECT_EQ(node_counts.flat);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_64);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_128);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_256);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_512);
STATS_MATCHER_EXPECT_EQ(node_counts.flat_1k);
STATS_MATCHER_EXPECT_EQ(node_counts.external);
STATS_MATCHER_EXPECT_EQ(node_counts.concat);
STATS_MATCHER_EXPECT_EQ(node_counts.substring);
STATS_MATCHER_EXPECT_EQ(node_counts.ring);
STATS_MATCHER_EXPECT_EQ(node_counts.btree);
STATS_MATCHER_EXPECT_EQ(estimated_memory_usage);
STATS_MATCHER_EXPECT_EQ(estimated_fair_share_memory_usage);
#undef STATS_MATCHER_EXPECT_EQ
return ok;
}
TEST(CordzInfoStatisticsTest, Flat) {
RefHelper ref;
auto* flat = ref.NeedsUnref(Flat(512));
CordzStatistics expected;
expected.size = flat->length;
expected.estimated_memory_usage = SizeOf(flat);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 1;
expected.node_counts.flat = 1;
expected.node_counts.flat_512 = 1;
EXPECT_THAT(SampleCord(flat), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, SharedFlat) {
RefHelper ref;
auto* flat = ref.Ref(ref.NeedsUnref(Flat(64)));
CordzStatistics expected;
expected.size = flat->length;
expected.estimated_memory_usage = SizeOf(flat);
expected.estimated_fair_share_memory_usage = SizeOf(flat) / 2;
expected.node_count = 1;
expected.node_counts.flat = 1;
expected.node_counts.flat_64 = 1;
EXPECT_THAT(SampleCord(flat), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, External) {
RefHelper ref;
auto* external = ref.NeedsUnref(External());
CordzStatistics expected;
expected.size = external->length;
expected.estimated_memory_usage = SizeOf(external);
expected.estimated_fair_share_memory_usage = SizeOf(external);
expected.node_count = 1;
expected.node_counts.external = 1;
EXPECT_THAT(SampleCord(external), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, SharedExternal) {
RefHelper ref;
auto* external = ref.Ref(ref.NeedsUnref(External()));
CordzStatistics expected;
expected.size = external->length;
expected.estimated_memory_usage = SizeOf(external);
expected.estimated_fair_share_memory_usage = SizeOf(external) / 2;
expected.node_count = 1;
expected.node_counts.external = 1;
EXPECT_THAT(SampleCord(external), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, Substring) {
RefHelper ref;
auto* flat = Flat(1024);
auto* substring = ref.NeedsUnref(Substring(flat));
CordzStatistics expected;
expected.size = substring->length;
expected.estimated_memory_usage = SizeOf(substring) + SizeOf(flat);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 2;
expected.node_counts.flat = 1;
expected.node_counts.flat_1k = 1;
expected.node_counts.substring = 1;
EXPECT_THAT(SampleCord(substring), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, SharedSubstring) {
RefHelper ref;
auto* flat = ref.Ref(Flat(511), 2);
auto* substring = ref.Ref(ref.NeedsUnref(Substring(flat)));
CordzStatistics expected;
expected.size = substring->length;
expected.estimated_memory_usage = SizeOf(flat) + SizeOf(substring);
expected.estimated_fair_share_memory_usage =
SizeOf(substring) / 2 + SizeOf(flat) / 6;
expected.node_count = 2;
expected.node_counts.flat = 1;
expected.node_counts.flat_512 = 1;
expected.node_counts.substring = 1;
EXPECT_THAT(SampleCord(substring), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, BtreeLeaf) {
ASSERT_THAT(CordRepBtree::kMaxCapacity, Ge(3u));
RefHelper ref;
auto* flat1 = Flat(2000);
auto* flat2 = Flat(200);
auto* substr = Substring(flat2);
auto* external = External(3000);
CordRepBtree* tree = CordRepBtree::Create(flat1);
tree = CordRepBtree::Append(tree, substr);
tree = CordRepBtree::Append(tree, external);
size_t flat3_count = CordRepBtree::kMaxCapacity - 3;
size_t flat3_size = 0;
for (size_t i = 0; i < flat3_count; ++i) {
auto* flat3 = Flat(70);
flat3_size += SizeOf(flat3);
tree = CordRepBtree::Append(tree, flat3);
}
ref.NeedsUnref(tree);
CordzStatistics expected;
expected.size = tree->length;
expected.estimated_memory_usage = SizeOf(tree) + SizeOf(flat1) +
SizeOf(flat2) + SizeOf(substr) +
flat3_size + SizeOf(external);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 1 + 3 + 1 + flat3_count;
expected.node_counts.flat = 2 + flat3_count;
expected.node_counts.flat_128 = flat3_count;
expected.node_counts.flat_256 = 1;
expected.node_counts.external = 1;
expected.node_counts.substring = 1;
expected.node_counts.btree = 1;
EXPECT_THAT(SampleCord(tree), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, BtreeNodeShared) {
RefHelper ref;
static constexpr int leaf_count = 3;
const size_t flat3_count = CordRepBtree::kMaxCapacity - 3;
ASSERT_THAT(flat3_count, Ge(0u));
CordRepBtree* tree = nullptr;
size_t mem_size = 0;
for (int i = 0; i < leaf_count; ++i) {
auto* flat1 = ref.Ref(Flat(2000), 9);
mem_size += SizeOf(flat1);
if (i == 0) {
tree = CordRepBtree::Create(flat1);
} else {
tree = CordRepBtree::Append(tree, flat1);
}
auto* flat2 = Flat(200);
auto* substr = Substring(flat2);
mem_size += SizeOf(flat2) + SizeOf(substr);
tree = CordRepBtree::Append(tree, substr);
auto* external = External(30);
mem_size += SizeOf(external);
tree = CordRepBtree::Append(tree, external);
for (size_t i = 0; i < flat3_count; ++i) {
auto* flat3 = Flat(70);
mem_size += SizeOf(flat3);
tree = CordRepBtree::Append(tree, flat3);
}
if (i == 0) {
mem_size += SizeOf(tree);
} else {
mem_size += SizeOf(tree->Edges().back()->btree());
}
}
ref.NeedsUnref(tree);
// Ref count: 2 for top (add 1), 5 for leaf 0 (add 4).
ref.Ref(tree, 1);
ref.Ref(tree->Edges().front(), 4);
CordzStatistics expected;
expected.size = tree->length;
expected.estimated_memory_usage = SizeOf(tree) + mem_size;
expected.estimated_fair_share_memory_usage = FairShare(tree);
expected.node_count = 1 + leaf_count * (1 + 3 + 1 + flat3_count);
expected.node_counts.flat = leaf_count * (2 + flat3_count);
expected.node_counts.flat_128 = leaf_count * flat3_count;
expected.node_counts.flat_256 = leaf_count;
expected.node_counts.external = leaf_count;
expected.node_counts.substring = leaf_count;
expected.node_counts.btree = 1 + leaf_count;
EXPECT_THAT(SampleCord(tree), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, Crc) {
RefHelper ref;
auto* left = Flat(1000);
auto* crc = ref.NeedsUnref(CordRepCrc::New(left, {}));
CordzStatistics expected;
expected.size = left->length;
expected.estimated_memory_usage = SizeOf(crc) + SizeOf(left);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 2;
expected.node_counts.flat = 1;
expected.node_counts.flat_1k = 1;
expected.node_counts.crc = 1;
EXPECT_THAT(SampleCord(crc), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, EmptyCrc) {
RefHelper ref;
auto* crc = ref.NeedsUnref(CordRepCrc::New(nullptr, {}));
CordzStatistics expected;
expected.size = 0;
expected.estimated_memory_usage = SizeOf(crc);
expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage;
expected.node_count = 1;
expected.node_counts.crc = 1;
EXPECT_THAT(SampleCord(crc), EqStatistics(expected));
}
TEST(CordzInfoStatisticsTest, ThreadSafety) {
Notification stop;
static constexpr int kNumThreads = 8;
int64_t sampled_node_count = 0;
{
absl::synchronization_internal::ThreadPool pool(kNumThreads);
// Run analyzer thread emulating a CordzHandler collection.
pool.Schedule([&]() {
while (!stop.HasBeenNotified()) {
// Run every 10us (about 100K total collections).
absl::SleepFor(absl::Microseconds(10));
CordzSampleToken token;
for (const CordzInfo& cord_info : token) {
CordzStatistics stats = cord_info.GetCordzStatistics();
sampled_node_count += stats.node_count;
}
}
});
// Run 'application threads'
for (int i = 0; i < kNumThreads; ++i) {
pool.Schedule([&]() {
// Track 0 - 2 cordz infos at a time, providing permutations of 0, 1
// and 2 CordzHandle and CordzInfo queues being active, with plenty of
// 'empty to non empty' transitions.
InlineData cords[2];
std::minstd_rand gen;
std::uniform_int_distribution<int> coin_toss(0, 1);
std::uniform_int_distribution<int> dice_roll(1, 6);
while (!stop.HasBeenNotified()) {
for (InlineData& cord : cords) {
// 50/50 flip the state of the cord
if (coin_toss(gen) != 0) {
if (cord.is_tree()) {
// 50/50 simulate delete (untrack) or 'edit to empty'
if (coin_toss(gen) != 0) {
CordzInfo::MaybeUntrackCord(cord.cordz_info());
} else {
CordzUpdateScope scope(cord.cordz_info(),
CordzUpdateTracker::kUnknown);
scope.SetCordRep(nullptr);
}
CordRep::Unref(cord.as_tree());
cord.set_inline_size(0);
} else {
// Coin toss to 50% btree, and 50% flat.
CordRep* rep = Flat(256);
if (coin_toss(gen) != 0) {
rep = CordRepBtree::Create(rep);
}
// Maybe CRC this cord
if (dice_roll(gen) == 6) {
if (dice_roll(gen) == 6) {
// Empty CRC rep
CordRep::Unref(rep);
rep = CordRepCrc::New(nullptr, {});
} else {
// Regular CRC rep
rep = CordRepCrc::New(rep, {});
}
}
cord.make_tree(rep);
// 50/50 sample
if (coin_toss(gen) != 0) {
CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown, 1);
}
}
}
}
}
for (InlineData& cord : cords) {
if (cord.is_tree()) {
CordzInfo::MaybeUntrackCord(cord.cordz_info());
CordRep::Unref(cord.as_tree());
}
}
});
}
// Run for 1 second to give memory and thread safety analyzers plenty of
// time to detect any mishaps or undefined behaviors.
absl::SleepFor(absl::Seconds(1));
stop.Notify();
}
std::cout << "Sampled " << sampled_node_count << " nodes\n";
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,342 @@
// Copyright 2019 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_info.h"
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/debugging/stacktrace.h"
#include "absl/debugging/symbolize.h"
#include "absl/strings/cordz_test_helpers.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_statistics.h"
#include "absl/strings/internal/cordz_update_tracker.h"
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::Ne;
using ::testing::SizeIs;
// Used test values
auto constexpr kUnknownMethod = CordzUpdateTracker::kUnknown;
auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString;
auto constexpr kChildMethod = CordzUpdateTracker::kConstructorCord;
auto constexpr kUpdateMethod = CordzUpdateTracker::kAppendString;
// Local less verbose helper
std::vector<const CordzHandle*> DeleteQueue() {
return CordzHandle::DiagnosticsGetDeleteQueue();
}
std::string FormatStack(absl::Span<void* const> raw_stack) {
static constexpr size_t buf_size = 1 << 14;
std::unique_ptr<char[]> buf(new char[buf_size]);
std::string output;
for (void* stackp : raw_stack) {
if (absl::Symbolize(stackp, buf.get(), buf_size)) {
absl::StrAppend(&output, " ", buf.get(), "\n");
}
}
return output;
}
TEST(CordzInfoTest, TrackCord) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
ASSERT_THAT(info, Ne(nullptr));
EXPECT_FALSE(info->is_snapshot());
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info));
EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep));
info->Untrack();
}
TEST(CordzInfoTest, MaybeTrackChildCordWithoutSampling) {
CordzSamplingIntervalHelper sample_none(99999);
TestCordData parent, child;
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, MaybeTrackChildCordWithSampling) {
CordzSamplingIntervalHelper sample_all(1);
TestCordData parent, child;
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) {
CordzSamplingIntervalHelper sample_none(99999);
TestCordData parent, child;
CordzInfo::TrackCord(parent.data, kTrackCordMethod, 1);
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
CordzInfo* parent_info = parent.data.cordz_info();
CordzInfo* child_info = child.data.cordz_info();
ASSERT_THAT(child_info, Ne(nullptr));
EXPECT_THAT(child_info->GetCordRepForTesting(), Eq(child.rep.rep));
EXPECT_THAT(child_info->GetParentStack(), parent_info->GetStack());
parent_info->Untrack();
child_info->Untrack();
}
TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) {
CordzSamplingIntervalHelper sample_none(99999);
TestCordData parent, child;
CordzInfo::TrackCord(child.data, kTrackCordMethod, 1);
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, MaybeTrackChildCordWithSamplingChildSampled) {
CordzSamplingIntervalHelper sample_all(1);
TestCordData parent, child;
CordzInfo::TrackCord(child.data, kTrackCordMethod, 1);
CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod);
EXPECT_THAT(child.data.cordz_info(), Eq(nullptr));
}
TEST(CordzInfoTest, UntrackCord) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
info->Untrack();
EXPECT_THAT(DeleteQueue(), SizeIs(0u));
}
TEST(CordzInfoTest, UntrackCordWithSnapshot) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
CordzSnapshot snapshot;
info->Untrack();
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr));
EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep));
EXPECT_THAT(DeleteQueue(), ElementsAre(info, &snapshot));
}
TEST(CordzInfoTest, SetCordRep) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
TestCordRep rep;
info->Lock(CordzUpdateTracker::kAppendCord);
info->SetCordRep(rep.rep);
info->Unlock();
EXPECT_THAT(info->GetCordRepForTesting(), Eq(rep.rep));
info->Untrack();
}
TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
info->Lock(CordzUpdateTracker::kAppendString);
info->SetCordRep(nullptr);
EXPECT_THAT(info->GetCordRepForTesting(), Eq(nullptr));
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info));
info->Unlock();
EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr));
}
TEST(CordzInfoTest, RefCordRep) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
size_t refcount = data.rep.rep->refcount.Get();
EXPECT_THAT(info->RefCordRep(), Eq(data.rep.rep));
EXPECT_THAT(data.rep.rep->refcount.Get(), Eq(refcount + 1));
CordRep::Unref(data.rep.rep);
info->Untrack();
}
#if GTEST_HAS_DEATH_TEST
TEST(CordzInfoTest, SetCordRepRequiresMutex) {
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
TestCordRep rep;
EXPECT_DEBUG_DEATH(info->SetCordRep(rep.rep), ".*");
info->Untrack();
}
#endif // GTEST_HAS_DEATH_TEST
TEST(CordzInfoTest, TrackUntrackHeadFirstV2) {
CordzSnapshot snapshot;
EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info1 = data.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
TestCordData data2;
CordzInfo::TrackCord(data2.data, kTrackCordMethod, 1);
CordzInfo* info2 = data2.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2));
EXPECT_THAT(info2->Next(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
info2->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
info1->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
}
TEST(CordzInfoTest, TrackUntrackTailFirstV2) {
CordzSnapshot snapshot;
EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
TestCordData data;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info1 = data.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
TestCordData data2;
CordzInfo::TrackCord(data2.data, kTrackCordMethod, 1);
CordzInfo* info2 = data2.data.cordz_info();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2));
EXPECT_THAT(info2->Next(snapshot), Eq(info1));
EXPECT_THAT(info1->Next(snapshot), Eq(nullptr));
info1->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2));
EXPECT_THAT(info2->Next(snapshot), Eq(nullptr));
info2->Untrack();
ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr));
}
TEST(CordzInfoTest, StackV2) {
TestCordData data;
// kMaxStackDepth is intentionally less than 64 (which is the max depth that
// Cordz will record) because if the actual stack depth is over 64
// (which it is on Apple platforms) then the expected_stack will end up
// catching a few frames at the end that the actual_stack didn't get and
// it will no longer be subset. At the time of this writing 58 is the max
// that will allow this test to pass (with a minimum os version of iOS 9), so
// rounded down to 50 to hopefully not run into this in the future if Apple
// makes small modifications to its testing stack. 50 is sufficient to prove
// that we got a decent stack.
static constexpr int kMaxStackDepth = 50;
CordzInfo::TrackCord(data.data, kTrackCordMethod, 1);
CordzInfo* info = data.data.cordz_info();
std::vector<void*> local_stack;
local_stack.resize(kMaxStackDepth);
// In some environments we don't get stack traces. For example in Android
// absl::GetStackTrace will return 0 indicating it didn't find any stack. The
// resultant formatted stack will be "", but that still equals the stack
// recorded in CordzInfo, which is also empty. The skip_count is 1 so that the
// line number of the current stack isn't included in the HasSubstr check.
local_stack.resize(static_cast<size_t>(
absl::GetStackTrace(local_stack.data(), kMaxStackDepth,
/*skip_count=*/1)));
std::string got_stack = FormatStack(info->GetStack());
std::string expected_stack = FormatStack(local_stack);
// If TrackCord is inlined, got_stack should match expected_stack. If it isn't
// inlined, got_stack should include an additional frame not present in
// expected_stack. Either way, expected_stack should be a substring of
// got_stack.
EXPECT_THAT(got_stack, HasSubstr(expected_stack));
info->Untrack();
}
// Local helper functions to get different stacks for child and parent.
CordzInfo* TrackChildCord(InlineData& data, const InlineData& parent) {
CordzInfo::TrackCord(data, parent, kChildMethod);
return data.cordz_info();
}
CordzInfo* TrackParentCord(InlineData& data) {
CordzInfo::TrackCord(data, kTrackCordMethod, 1);
return data.cordz_info();
}
TEST(CordzInfoTest, GetStatistics) {
TestCordData data;
CordzInfo* info = TrackParentCord(data.data);
CordzStatistics statistics = info->GetCordzStatistics();
EXPECT_THAT(statistics.size, Eq(data.rep.rep->length));
EXPECT_THAT(statistics.method, Eq(kTrackCordMethod));
EXPECT_THAT(statistics.parent_method, Eq(kUnknownMethod));
EXPECT_THAT(statistics.update_tracker.Value(kTrackCordMethod), Eq(1));
info->Untrack();
}
TEST(CordzInfoTest, LockCountsMethod) {
TestCordData data;
CordzInfo* info = TrackParentCord(data.data);
info->Lock(kUpdateMethod);
info->Unlock();
info->Lock(kUpdateMethod);
info->Unlock();
CordzStatistics statistics = info->GetCordzStatistics();
EXPECT_THAT(statistics.update_tracker.Value(kUpdateMethod), Eq(2));
info->Untrack();
}
TEST(CordzInfoTest, FromParent) {
TestCordData parent;
TestCordData child;
CordzInfo* info_parent = TrackParentCord(parent.data);
CordzInfo* info_child = TrackChildCord(child.data, parent.data);
std::string stack = FormatStack(info_parent->GetStack());
std::string parent_stack = FormatStack(info_child->GetParentStack());
EXPECT_THAT(stack, Eq(parent_stack));
CordzStatistics statistics = info_child->GetCordzStatistics();
EXPECT_THAT(statistics.size, Eq(child.rep.rep->length));
EXPECT_THAT(statistics.method, Eq(kChildMethod));
EXPECT_THAT(statistics.parent_method, Eq(kTrackCordMethod));
EXPECT_THAT(statistics.update_tracker.Value(kChildMethod), Eq(1));
info_parent->Untrack();
info_child->Untrack();
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,64 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_sample_token.h"
#include "absl/base/config.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_info.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
CordzSampleToken::Iterator& CordzSampleToken::Iterator::operator++() {
if (current_) {
current_ = current_->Next(*token_);
}
return *this;
}
CordzSampleToken::Iterator CordzSampleToken::Iterator::operator++(int) {
Iterator it(*this);
operator++();
return it;
}
bool operator==(const CordzSampleToken::Iterator& lhs,
const CordzSampleToken::Iterator& rhs) {
return lhs.current_ == rhs.current_ &&
(lhs.current_ == nullptr || lhs.token_ == rhs.token_);
}
bool operator!=(const CordzSampleToken::Iterator& lhs,
const CordzSampleToken::Iterator& rhs) {
return !(lhs == rhs);
}
CordzSampleToken::Iterator::reference CordzSampleToken::Iterator::operator*()
const {
return *current_;
}
CordzSampleToken::Iterator::pointer CordzSampleToken::Iterator::operator->()
const {
return current_;
}
CordzSampleToken::Iterator::Iterator(const CordzSampleToken* token)
: token_(token), current_(CordzInfo::Head(*token)) {}
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,97 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/base/config.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_info.h"
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_SAMPLE_TOKEN_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_SAMPLE_TOKEN_H_
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// The existence of a CordzSampleToken guarantees that a reader can traverse the
// global_cordz_infos_head linked-list without needing to hold a mutex. When a
// CordzSampleToken exists, all CordzInfo objects that would be destroyed are
// instead appended to a deletion queue. When the CordzSampleToken is destroyed,
// it will also clean up any of these CordzInfo objects.
//
// E.g., ST are CordzSampleToken objects and CH are CordzHandle objects.
// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail
//
// This list tracks that CH1 and CH2 were created after ST1, so the thread
// holding ST1 might have a reference to CH1, CH2, ST2, and CH3. However, ST2
// was created later, so the thread holding the ST2 token cannot have a
// reference to ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will
// delete ST1, CH1, and CH2. If instead ST2 is cleaned up first, that thread
// will only delete ST2.
//
// If ST1 is cleaned up first, the new list will be:
// ST2 <- CH3 <- global_delete_queue_tail
//
// If ST2 is cleaned up first, the new list will be:
// ST1 <- CH1 <- CH2 <- CH3 <- global_delete_queue_tail
//
// All new CordzHandle objects are appended to the list, so if a new thread
// comes along before either ST1 or ST2 are cleaned up, the new list will be:
// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- ST3 <- global_delete_queue_tail
//
// A thread must hold the global_delete_queue_mu mutex whenever it's altering
// this list.
//
// It is safe for thread that holds a CordzSampleToken to read
// global_cordz_infos at any time since the objects it is able to retrieve will
// not be deleted while the CordzSampleToken exists.
class CordzSampleToken : public CordzSnapshot {
public:
class Iterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = const CordzInfo&;
using difference_type = ptrdiff_t;
using pointer = const CordzInfo*;
using reference = value_type;
Iterator() = default;
Iterator& operator++();
Iterator operator++(int);
friend bool operator==(const Iterator& lhs, const Iterator& rhs);
friend bool operator!=(const Iterator& lhs, const Iterator& rhs);
reference operator*() const;
pointer operator->() const;
private:
friend class CordzSampleToken;
explicit Iterator(const CordzSampleToken* token);
const CordzSampleToken* token_ = nullptr;
pointer current_ = nullptr;
};
CordzSampleToken() = default;
CordzSampleToken(const CordzSampleToken&) = delete;
CordzSampleToken& operator=(const CordzSampleToken&) = delete;
Iterator begin() { return Iterator(this); }
Iterator end() { return Iterator(); }
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_SAMPLE_TOKEN_H_

View file

@ -0,0 +1,208 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_sample_token.h"
#include <memory>
#include <type_traits>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/memory/memory.h"
#include "absl/random/random.h"
#include "absl/strings/cordz_test_helpers.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cordz_handle.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/synchronization/internal/thread_pool.h"
#include "absl/synchronization/notification.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::Ne;
// Used test values
auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString;
TEST(CordzSampleTokenTest, IteratorTraits) {
static_assert(std::is_copy_constructible<CordzSampleToken::Iterator>::value,
"");
static_assert(std::is_copy_assignable<CordzSampleToken::Iterator>::value, "");
static_assert(std::is_move_constructible<CordzSampleToken::Iterator>::value,
"");
static_assert(std::is_move_assignable<CordzSampleToken::Iterator>::value, "");
static_assert(
std::is_same<
std::iterator_traits<CordzSampleToken::Iterator>::iterator_category,
std::input_iterator_tag>::value,
"");
static_assert(
std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::value_type,
const CordzInfo&>::value,
"");
static_assert(
std::is_same<
std::iterator_traits<CordzSampleToken::Iterator>::difference_type,
ptrdiff_t>::value,
"");
static_assert(
std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::pointer,
const CordzInfo*>::value,
"");
static_assert(
std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::reference,
const CordzInfo&>::value,
"");
}
TEST(CordzSampleTokenTest, IteratorEmpty) {
CordzSampleToken token;
EXPECT_THAT(token.begin(), Eq(token.end()));
}
TEST(CordzSampleTokenTest, Iterator) {
TestCordData cord1, cord2, cord3;
CordzInfo::TrackCord(cord1.data, kTrackCordMethod, 1);
CordzInfo* info1 = cord1.data.cordz_info();
CordzInfo::TrackCord(cord2.data, kTrackCordMethod, 1);
CordzInfo* info2 = cord2.data.cordz_info();
CordzInfo::TrackCord(cord3.data, kTrackCordMethod, 1);
CordzInfo* info3 = cord3.data.cordz_info();
CordzSampleToken token;
std::vector<const CordzInfo*> found;
for (const CordzInfo& cord_info : token) {
found.push_back(&cord_info);
}
EXPECT_THAT(found, ElementsAre(info3, info2, info1));
info1->Untrack();
info2->Untrack();
info3->Untrack();
}
TEST(CordzSampleTokenTest, IteratorEquality) {
TestCordData cord1;
TestCordData cord2;
TestCordData cord3;
CordzInfo::TrackCord(cord1.data, kTrackCordMethod, 1);
CordzInfo* info1 = cord1.data.cordz_info();
CordzSampleToken token1;
// lhs starts with the CordzInfo corresponding to cord1 at the head.
CordzSampleToken::Iterator lhs = token1.begin();
CordzInfo::TrackCord(cord2.data, kTrackCordMethod, 1);
CordzInfo* info2 = cord2.data.cordz_info();
CordzSampleToken token2;
// rhs starts with the CordzInfo corresponding to cord2 at the head.
CordzSampleToken::Iterator rhs = token2.begin();
CordzInfo::TrackCord(cord3.data, kTrackCordMethod, 1);
CordzInfo* info3 = cord3.data.cordz_info();
// lhs is on cord1 while rhs is on cord2.
EXPECT_THAT(lhs, Ne(rhs));
rhs++;
// lhs and rhs are both on cord1, but they didn't come from the same
// CordzSampleToken.
EXPECT_THAT(lhs, Ne(rhs));
lhs++;
rhs++;
// Both lhs and rhs are done, so they are on nullptr.
EXPECT_THAT(lhs, Eq(rhs));
info1->Untrack();
info2->Untrack();
info3->Untrack();
}
TEST(CordzSampleTokenTest, MultiThreaded) {
Notification stop;
static constexpr int kNumThreads = 4;
static constexpr int kNumCords = 3;
static constexpr int kNumTokens = 3;
absl::synchronization_internal::ThreadPool pool(kNumThreads);
for (int i = 0; i < kNumThreads; ++i) {
pool.Schedule([&stop]() {
absl::BitGen gen;
TestCordData cords[kNumCords];
std::unique_ptr<CordzSampleToken> tokens[kNumTokens];
while (!stop.HasBeenNotified()) {
// Randomly perform one of five actions:
// 1) Untrack
// 2) Track
// 3) Iterate over Cords visible to a token.
// 4) Unsample
// 5) Sample
int index = absl::Uniform(gen, 0, kNumCords);
if (absl::Bernoulli(gen, 0.5)) {
TestCordData& cord = cords[index];
// Track/untrack.
if (cord.data.is_profiled()) {
// 1) Untrack
cord.data.cordz_info()->Untrack();
cord.data.clear_cordz_info();
} else {
// 2) Track
CordzInfo::TrackCord(cord.data, kTrackCordMethod, 1);
}
} else {
std::unique_ptr<CordzSampleToken>& token = tokens[index];
if (token) {
if (absl::Bernoulli(gen, 0.5)) {
// 3) Iterate over Cords visible to a token.
for (const CordzInfo& info : *token) {
// This is trivial work to allow us to compile the loop.
EXPECT_THAT(info.Next(*token), Ne(&info));
}
} else {
// 4) Unsample
token = nullptr;
}
} else {
// 5) Sample
token = absl::make_unique<CordzSampleToken>();
}
}
}
for (TestCordData& cord : cords) {
CordzInfo::MaybeUntrackCord(cord.data.cordz_info());
}
});
}
// The threads will hammer away. Give it a little bit of time for tsan to
// spot errors.
absl::SleepFor(absl::Seconds(3));
stop.Notify();
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,88 @@
// Copyright 2019 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/internal/cordz_update_tracker.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzStatistics captures some meta information about a Cord's shape.
struct CordzStatistics {
using MethodIdentifier = CordzUpdateTracker::MethodIdentifier;
// Node counts information
struct NodeCounts {
size_t flat = 0; // #flats
size_t flat_64 = 0; // #flats up to 64 bytes
size_t flat_128 = 0; // #flats up to 128 bytes
size_t flat_256 = 0; // #flats up to 256 bytes
size_t flat_512 = 0; // #flats up to 512 bytes
size_t flat_1k = 0; // #flats up to 1K bytes
size_t external = 0; // #external reps
size_t substring = 0; // #substring reps
size_t concat = 0; // #concat reps
size_t ring = 0; // #ring buffer reps
size_t btree = 0; // #btree reps
size_t crc = 0; // #crc reps
};
// The size of the cord in bytes. This matches the result of Cord::size().
size_t size = 0;
// The estimated memory used by the sampled cord. This value matches the
// value as reported by Cord::EstimatedMemoryUsage().
// A value of 0 implies the property has not been recorded.
size_t estimated_memory_usage = 0;
// The effective memory used by the sampled cord, inversely weighted by the
// effective indegree of each allocated node. This is a representation of the
// fair share of memory usage that should be attributed to the sampled cord.
// This value is more useful for cases where one or more nodes are referenced
// by multiple Cord instances, and for cases where a Cord includes the same
// node multiple times (either directly or indirectly).
// A value of 0 implies the property has not been recorded.
size_t estimated_fair_share_memory_usage = 0;
// The total number of nodes referenced by this cord.
// For ring buffer Cords, this includes the 'ring buffer' node.
// For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all
// the substring, flat and external nodes referenced by the tree.
// A value of 0 implies the property has not been recorded.
size_t node_count = 0;
// Detailed node counts per type
NodeCounts node_counts;
// The cord method responsible for sampling the cord.
MethodIdentifier method = MethodIdentifier::kUnknown;
// The cord method responsible for sampling the parent cord if applicable.
MethodIdentifier parent_method = MethodIdentifier::kUnknown;
// Update tracker tracking invocation count per cord method.
CordzUpdateTracker update_tracker;
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_

View file

@ -0,0 +1,71 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/base/thread_annotations.h"
#include "absl/strings/internal/cord_internal.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_update_tracker.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzUpdateScope scopes an update to the provided CordzInfo.
// The class invokes `info->Lock(method)` and `info->Unlock()` to guard
// cordrep updates. This class does nothing if `info` is null.
// See also the 'Lock`, `Unlock` and `SetCordRep` methods in `CordzInfo`.
class ABSL_SCOPED_LOCKABLE CordzUpdateScope {
public:
CordzUpdateScope(CordzInfo* info, CordzUpdateTracker::MethodIdentifier method)
ABSL_EXCLUSIVE_LOCK_FUNCTION(info)
: info_(info) {
if (ABSL_PREDICT_FALSE(info_)) {
info->Lock(method);
}
}
// CordzUpdateScope can not be copied or assigned to.
CordzUpdateScope(CordzUpdateScope&& rhs) = delete;
CordzUpdateScope(const CordzUpdateScope&) = delete;
CordzUpdateScope& operator=(CordzUpdateScope&& rhs) = delete;
CordzUpdateScope& operator=(const CordzUpdateScope&) = delete;
~CordzUpdateScope() ABSL_UNLOCK_FUNCTION() {
if (ABSL_PREDICT_FALSE(info_)) {
info_->Unlock();
}
}
void SetCordRep(CordRep* rep) const {
if (ABSL_PREDICT_FALSE(info_)) {
info_->SetCordRep(rep);
}
}
CordzInfo* info() const { return info_; }
private:
CordzInfo* info_;
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_

View file

@ -0,0 +1,49 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_update_scope.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/strings/cordz_test_helpers.h"
#include "absl/strings/internal/cord_rep_flat.h"
#include "absl/strings/internal/cordz_info.h"
#include "absl/strings/internal/cordz_update_tracker.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
// Used test values
auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString;
TEST(CordzUpdateScopeTest, ScopeNullptr) {
CordzUpdateScope scope(nullptr, kTrackCordMethod);
}
TEST(CordzUpdateScopeTest, ScopeSampledCord) {
TestCordData cord;
CordzInfo::TrackCord(cord.data, kTrackCordMethod, 1);
CordzUpdateScope scope(cord.data.cordz_info(), kTrackCordMethod);
cord.data.cordz_info()->SetCordRep(nullptr);
}
} // namespace
ABSL_NAMESPACE_END
} // namespace cord_internal
} // namespace absl

View file

@ -0,0 +1,123 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_
#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_
#include <atomic>
#include <cstdint>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
// CordzUpdateTracker tracks counters for Cord update methods.
//
// The purpose of CordzUpdateTracker is to track the number of calls to methods
// updating Cord data for sampled cords. The class internally uses 'lossy'
// atomic operations: Cord is thread-compatible, so there is no need to
// synchronize updates. However, Cordz collection threads may call 'Value()' at
// any point, so the class needs to provide thread safe access.
//
// This class is thread-safe. But as per above comments, all non-const methods
// should be used single-threaded only: updates are thread-safe but lossy.
class CordzUpdateTracker {
public:
// Tracked update methods.
enum MethodIdentifier {
kUnknown,
kAppendCord,
kAppendCordBuffer,
kAppendExternalMemory,
kAppendString,
kAssignCord,
kAssignString,
kClear,
kConstructorCord,
kConstructorString,
kCordReader,
kFlatten,
kGetAppendBuffer,
kGetAppendRegion,
kMakeCordFromExternal,
kMoveAppendCord,
kMoveAssignCord,
kMovePrependCord,
kPrependCord,
kPrependCordBuffer,
kPrependString,
kRemovePrefix,
kRemoveSuffix,
kSetExpectedChecksum,
kSubCord,
// kNumMethods defines the number of entries: must be the last entry.
kNumMethods,
};
// Constructs a new instance. All counters are zero-initialized.
constexpr CordzUpdateTracker() noexcept : values_{} {}
// Copy constructs a new instance.
CordzUpdateTracker(const CordzUpdateTracker& rhs) noexcept { *this = rhs; }
// Assigns the provided value to this instance.
CordzUpdateTracker& operator=(const CordzUpdateTracker& rhs) noexcept {
for (int i = 0; i < kNumMethods; ++i) {
values_[i].store(rhs.values_[i].load(std::memory_order_relaxed),
std::memory_order_relaxed);
}
return *this;
}
// Returns the value for the specified method.
int64_t Value(MethodIdentifier method) const {
return values_[method].load(std::memory_order_relaxed);
}
// Increases the value for the specified method by `n`
void LossyAdd(MethodIdentifier method, int64_t n = 1) {
auto& value = values_[method];
value.store(value.load(std::memory_order_relaxed) + n,
std::memory_order_relaxed);
}
// Adds all the values from `src` to this instance
void LossyAdd(const CordzUpdateTracker& src) {
for (int i = 0; i < kNumMethods; ++i) {
MethodIdentifier method = static_cast<MethodIdentifier>(i);
if (int64_t value = src.Value(method)) {
LossyAdd(method, value);
}
}
}
private:
// Until C++20 std::atomic is not constexpr default-constructible, so we need
// a wrapper for this class to be constexpr constructible.
class Counter : public std::atomic<int64_t> {
public:
constexpr Counter() noexcept : std::atomic<int64_t>(0) {}
};
Counter values_[kNumMethods];
};
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_

View file

@ -0,0 +1,147 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/cordz_update_tracker.h"
#include <array>
#include <thread> // NOLINT
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/synchronization/notification.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {
namespace {
using ::testing::AnyOf;
using ::testing::Eq;
using Method = CordzUpdateTracker::MethodIdentifier;
using Methods = std::array<Method, Method::kNumMethods>;
// Returns an array of all methods defined in `MethodIdentifier`
Methods AllMethods() {
return Methods{Method::kUnknown,
Method::kAppendCord,
Method::kAppendCordBuffer,
Method::kAppendExternalMemory,
Method::kAppendString,
Method::kAssignCord,
Method::kAssignString,
Method::kClear,
Method::kConstructorCord,
Method::kConstructorString,
Method::kCordReader,
Method::kFlatten,
Method::kGetAppendBuffer,
Method::kGetAppendRegion,
Method::kMakeCordFromExternal,
Method::kMoveAppendCord,
Method::kMoveAssignCord,
Method::kMovePrependCord,
Method::kPrependCord,
Method::kPrependCordBuffer,
Method::kPrependString,
Method::kRemovePrefix,
Method::kRemoveSuffix,
Method::kSetExpectedChecksum,
Method::kSubCord};
}
TEST(CordzUpdateTracker, IsConstExprAndInitializesToZero) {
constexpr CordzUpdateTracker tracker;
for (Method method : AllMethods()) {
ASSERT_THAT(tracker.Value(method), Eq(0));
}
}
TEST(CordzUpdateTracker, LossyAdd) {
int64_t n = 1;
CordzUpdateTracker tracker;
for (Method method : AllMethods()) {
tracker.LossyAdd(method, n);
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
}
TEST(CordzUpdateTracker, CopyConstructor) {
int64_t n = 1;
CordzUpdateTracker src;
for (Method method : AllMethods()) {
src.LossyAdd(method, n);
n += 2;
}
n = 1;
CordzUpdateTracker tracker(src);
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
}
TEST(CordzUpdateTracker, OperatorAssign) {
int64_t n = 1;
CordzUpdateTracker src;
CordzUpdateTracker tracker;
for (Method method : AllMethods()) {
src.LossyAdd(method, n);
n += 2;
}
n = 1;
tracker = src;
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
}
TEST(CordzUpdateTracker, ThreadSanitizedValueCheck) {
absl::Notification done;
CordzUpdateTracker tracker;
std::thread reader([&done, &tracker] {
while (!done.HasBeenNotified()) {
int n = 1;
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), AnyOf(Eq(n), Eq(0)));
n += 2;
}
}
int n = 1;
for (Method method : AllMethods()) {
EXPECT_THAT(tracker.Value(method), Eq(n));
n += 2;
}
});
int64_t n = 1;
for (Method method : AllMethods()) {
tracker.LossyAdd(method, n);
n += 2;
}
done.Notify();
reader.join();
}
} // namespace
} // namespace cord_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,93 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/damerau_levenshtein_distance.h"
#include <algorithm>
#include <array>
#include <numeric>
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Calculate DamerauLevenshtein (adjacent transpositions) distance
// between two strings,
// https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance. The
// algorithm follows the condition that no substring is edited more than once.
// While this can reduce is larger distance, it's a) a much simpler algorithm
// and b) more realistic for the case that typographic mistakes should be
// detected.
// When the distance is larger than cutoff, or one of the strings has more
// than MAX_SIZE=100 characters, the code returns min(MAX_SIZE, cutoff) + 1.
uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1,
absl::string_view s2, uint8_t cutoff) {
const uint8_t MAX_SIZE = 100;
const uint8_t _cutoff = std::min(MAX_SIZE, cutoff);
const uint8_t cutoff_plus_1 = static_cast<uint8_t>(_cutoff + 1);
if (s1.size() > s2.size()) std::swap(s1, s2);
if (s1.size() + _cutoff < s2.size() || s2.size() > MAX_SIZE)
return cutoff_plus_1;
if (s1.empty())
return static_cast<uint8_t>(s2.size());
// Lower diagonal bound: y = x - lower_diag
const uint8_t lower_diag =
_cutoff - static_cast<uint8_t>(s2.size() - s1.size());
// Upper diagonal bound: y = x + upper_diag
const uint8_t upper_diag = _cutoff;
// d[i][j] is the number of edits required to convert s1[0, i] to s2[0, j]
std::array<std::array<uint8_t, MAX_SIZE + 2>, MAX_SIZE + 2> d;
std::iota(d[0].begin(), d[0].begin() + upper_diag + 1, 0);
d[0][cutoff_plus_1] = cutoff_plus_1;
for (size_t i = 1; i <= s1.size(); ++i) {
// Deduce begin of relevant window.
size_t j_begin = 1;
if (i > lower_diag) {
j_begin = i - lower_diag;
d[i][j_begin - 1] = cutoff_plus_1;
} else {
d[i][0] = static_cast<uint8_t>(i);
}
// Deduce end of relevant window.
size_t j_end = i + upper_diag;
if (j_end > s2.size()) {
j_end = s2.size();
} else {
d[i][j_end + 1] = cutoff_plus_1;
}
for (size_t j = j_begin; j <= j_end; ++j) {
const uint8_t deletion_distance = d[i - 1][j] + 1;
const uint8_t insertion_distance = d[i][j - 1] + 1;
const uint8_t mismatched_tail_cost = s1[i - 1] == s2[j - 1] ? 0 : 1;
const uint8_t mismatch_distance = d[i - 1][j - 1] + mismatched_tail_cost;
uint8_t transposition_distance = _cutoff + 1;
if (i > 1 && j > 1 && s1[i - 1] == s2[j - 2] && s1[i - 2] == s2[j - 1])
transposition_distance = d[i - 2][j - 2] + 1;
d[i][j] = std::min({cutoff_plus_1, deletion_distance, insertion_distance,
mismatch_distance, transposition_distance});
}
}
return d[s1.size()][s2.size()];
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,34 @@
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_
#define ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_
#include <cstdint>
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Calculate DamerauLevenshtein distance between two strings.
// When the distance is larger than cutoff, the code just returns cutoff + 1.
uint8_t CappedDamerauLevenshteinDistance(absl::string_view s1,
absl::string_view s2, uint8_t cutoff);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_DAMERAU_LEVENSHTEIN_DISTANCE_H_

View file

@ -0,0 +1,99 @@
// Copyright 2022 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/damerau_levenshtein_distance.h"
#include <cstdint>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace {
using absl::strings_internal::CappedDamerauLevenshteinDistance;
TEST(Distance, TestDistances) {
EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 6), uint8_t{0});
EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "b", 6), uint8_t{1});
EXPECT_THAT(CappedDamerauLevenshteinDistance("ca", "abc", 6), uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "ad", 6), uint8_t{2});
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "cadb", 6), uint8_t{4});
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "bdac", 6), uint8_t{4});
EXPECT_THAT(CappedDamerauLevenshteinDistance("ab", "ab", 0), uint8_t{0});
EXPECT_THAT(CappedDamerauLevenshteinDistance("", "", 0), uint8_t{0});
// combinations for 3-character strings:
// 1, 2, 3 removals, insertions or replacements and transpositions
EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", "abc", 6), uint8_t{0});
for (auto res :
{"", "ca", "efg", "ea", "ce", "ceb", "eca", "cae", "cea", "bea"}) {
EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{3});
}
for (auto res :
{"a", "b", "c", "ba", "cb", "bca", "cab", "cba", "ace",
"efc", "ebf", "aef", "ae", "be", "eb", "ec", "ecb", "bec",
"bce", "cbe", "ace", "eac", "aeb", "bae", "eab", "eba"}) {
EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{2});
EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{2});
}
for (auto res : {"ab", "ac", "bc", "acb", "bac", "ebc", "aec", "abe"}) {
EXPECT_THAT(CappedDamerauLevenshteinDistance("abc", res, 6), uint8_t{1});
EXPECT_THAT(CappedDamerauLevenshteinDistance(res, "abc", 6), uint8_t{1});
}
}
TEST(Distance, TestCutoff) {
// Returning cutoff + 1 if the value is larger than cutoff or string longer
// than MAX_SIZE.
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 3), uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 2), uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcd", "a", 1), uint8_t{2});
EXPECT_THAT(CappedDamerauLevenshteinDistance("abcdefg", "a", 2), uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance("a", "abcde", 2), uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(102, 'a'),
std::string(102, 'a'), 105),
uint8_t{101});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
std::string(100, 'a'), 100),
uint8_t{0});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
std::string(100, 'b'), 100),
uint8_t{100});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
std::string(99, 'a'), 2),
uint8_t{1});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
std::string(101, 'a'), 2),
uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(100, 'a'),
std::string(101, 'a'), 2),
uint8_t{3});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX + 1, 'a'),
std::string(UINT8_MAX + 1, 'b'),
UINT8_MAX),
uint8_t{101});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'),
std::string(UINT8_MAX - 1, 'b'),
UINT8_MAX),
uint8_t{101});
EXPECT_THAT(
CappedDamerauLevenshteinDistance(std::string(UINT8_MAX, 'a'),
std::string(UINT8_MAX, 'b'), UINT8_MAX),
uint8_t{101});
EXPECT_THAT(CappedDamerauLevenshteinDistance(std::string(UINT8_MAX - 1, 'a'),
std::string(UINT8_MAX - 1, 'a'),
UINT8_MAX),
uint8_t{101});
}
} // namespace

View file

@ -0,0 +1,209 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/escaping.h"
#include <limits>
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The two strings below provide maps from normal 6-bit characters to their
// base64-escaped equivalent.
// For the inverse case, see kUn(WebSafe)Base64 in the external
// escaping.cc.
ABSL_CONST_INIT const char kBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
// Base64 encodes three bytes of input at a time. If the input is not
// divisible by three, we pad as appropriate.
//
// Base64 encodes each three bytes of input into four bytes of output.
constexpr size_t kMaxSize = (std::numeric_limits<size_t>::max() - 1) / 4 * 3;
ABSL_INTERNAL_CHECK(input_len <= kMaxSize,
"CalculateBase64EscapedLenInternal() overflow");
size_t len = (input_len / 3) * 4;
// Since all base 64 input is an integral number of octets, only the following
// cases can arise:
if (input_len % 3 == 0) {
// (from https://tools.ietf.org/html/rfc3548)
// (1) the final quantum of encoding input is an integral multiple of 24
// bits; here, the final unit of encoded output will be an integral
// multiple of 4 characters with no "=" padding,
} else if (input_len % 3 == 1) {
// (from https://tools.ietf.org/html/rfc3548)
// (2) the final quantum of encoding input is exactly 8 bits; here, the
// final unit of encoded output will be two characters followed by two
// "=" padding characters, or
len += 2;
if (do_padding) {
len += 2;
}
} else { // (input_len % 3 == 2)
// (from https://tools.ietf.org/html/rfc3548)
// (3) the final quantum of encoding input is exactly 16 bits; here, the
// final unit of encoded output will be three characters followed by one
// "=" padding character.
len += 3;
if (do_padding) {
len += 1;
}
}
return len;
}
// ----------------------------------------------------------------------
// Take the input in groups of 4 characters and turn each
// character into a code 0 to 63 thus:
// A-Z map to 0 to 25
// a-z map to 26 to 51
// 0-9 map to 52 to 61
// +(- for WebSafe) maps to 62
// /(_ for WebSafe) maps to 63
// There will be four numbers, all less than 64 which can be represented
// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
// Arrange the 6 digit binary numbers into three bytes as such:
// aaaaaabb bbbbcccc ccdddddd
// Equals signs (one or two) are used at the end of the encoded block to
// indicate that the text was not an integer multiple of three bytes long.
// ----------------------------------------------------------------------
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64,
bool do_padding) {
static const char kPad64 = '=';
if (szsrc * 4 > szdest * 3) return 0;
char* cur_dest = dest;
const unsigned char* cur_src = src;
char* const limit_dest = dest + szdest;
const unsigned char* const limit_src = src + szsrc;
// (from https://tools.ietf.org/html/rfc3548)
// Special processing is performed if fewer than 24 bits are available
// at the end of the data being encoded. A full encoding quantum is
// always completed at the end of a quantity. When fewer than 24 input
// bits are available in an input group, zero bits are added (on the
// right) to form an integral number of 6-bit groups.
//
// If do_padding is true, padding at the end of the data is performed. This
// output padding uses the '=' character.
// Three bytes of data encodes to four characters of cyphertext.
// So we can pump through three-byte chunks atomically.
if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
while (cur_src < limit_src - 3) { // While we have >= 32 bits.
uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
cur_src += 3;
}
}
// To save time, we didn't update szdest or szsrc in the loop. So do it now.
szdest = static_cast<size_t>(limit_dest - cur_dest);
szsrc = static_cast<size_t>(limit_src - cur_src);
/* now deal with the tail (<=3 bytes) */
switch (szsrc) {
case 0:
// Nothing left; nothing more to do.
break;
case 1: {
// One byte left: this encodes to two characters, and (optionally)
// two pad characters to round out the four-character cypherblock.
if (szdest < 2) return 0;
uint32_t in = cur_src[0];
cur_dest[0] = base64[in >> 2];
in &= 0x3;
cur_dest[1] = base64[in << 4];
cur_dest += 2;
szdest -= 2;
if (do_padding) {
if (szdest < 2) return 0;
cur_dest[0] = kPad64;
cur_dest[1] = kPad64;
cur_dest += 2;
szdest -= 2;
}
break;
}
case 2: {
// Two bytes left: this encodes to three characters, and (optionally)
// one pad character to round out the four-character cypherblock.
if (szdest < 3) return 0;
uint32_t in = absl::big_endian::Load16(cur_src);
cur_dest[0] = base64[in >> 10];
in &= 0x3FF;
cur_dest[1] = base64[in >> 4];
in &= 0x00F;
cur_dest[2] = base64[in << 2];
cur_dest += 3;
szdest -= 3;
if (do_padding) {
if (szdest < 1) return 0;
cur_dest[0] = kPad64;
cur_dest += 1;
szdest -= 1;
}
break;
}
case 3: {
// Three bytes left: same as in the big loop above. We can't do this in
// the loop because the loop above always reads 4 bytes, and the fourth
// byte is past the end of the input.
if (szdest < 4) return 0;
uint32_t in =
(uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
cur_dest[0] = base64[in >> 18];
in &= 0x3FFFF;
cur_dest[1] = base64[in >> 12];
in &= 0xFFF;
cur_dest[2] = base64[in >> 6];
in &= 0x3F;
cur_dest[3] = base64[in];
cur_dest += 4;
szdest -= 4;
break;
}
default:
// Should not be reached: blocks of 4 bytes are handled
// in the while loop before this switch statement.
ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
break;
}
return static_cast<size_t>(cur_dest - dest);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,57 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_H_
#include <cassert>
#include "absl/strings/internal/resize_uninitialized.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
ABSL_CONST_INIT extern const char kBase64Chars[];
ABSL_CONST_INIT extern const char kWebSafeBase64Chars[];
// Calculates the length of a Base64 encoding (RFC 4648) of a string of length
// `input_len`, with or without padding per `do_padding`. Note that 'web-safe'
// encoding (section 5 of the RFC) does not change this length.
size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding);
// Base64-encodes `src` using the alphabet provided in `base64` (which
// determines whether to do web-safe encoding or not) and writes the result to
// `dest`. If `do_padding` is true, `dest` is padded with '=' chars until its
// length is a multiple of 3. Returns the length of `dest`.
size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
size_t szdest, const char* base64, bool do_padding);
template <typename String>
void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
bool do_padding, const char* base64_chars) {
const size_t calc_escaped_size =
CalculateBase64EscapedLenInternal(szsrc, do_padding);
STLStringResizeUninitialized(dest, calc_escaped_size);
const size_t escaped_len = Base64EscapeInternal(
src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
assert(calc_escaped_size == escaped_len);
dest->erase(escaped_len);
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_

View file

@ -0,0 +1,133 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test contains common things needed by both escaping_test.cc and
// escaping_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_
#include <array>
#include "absl/strings/string_view.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
struct base64_testcase {
absl::string_view plaintext;
absl::string_view cyphertext;
};
inline const std::array<base64_testcase, 5>& base64_strings() {
static const std::array<base64_testcase, 5> testcase{{
// Some google quotes
// Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
// (Note that we're testing the websafe encoding, though, so if
// you add messages, be sure to run "tr -- '+/' '-_'" on the output)
{ "I was always good at math and science, and I never realized "
"that was unusual or somehow undesirable. So one of the things "
"I care a lot about is helping to remove that stigma, "
"to show girls that you can be feminine, you can like the things "
"that girls like, but you can also be really good at technology. "
"You can be really good at building things."
" - Marissa Meyer, Newsweek, 2010-12-22" "\n",
"SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
"bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
"ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
"YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
"b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
"a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
"c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
"ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
"ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
{ "Typical first year for a new cluster: "
"~0.5 overheating "
"~1 PDU failure "
"~1 rack-move "
"~1 network rewiring "
"~20 rack failures "
"~5 racks go wonky "
"~8 network maintenances "
"~12 router reloads "
"~3 router failures "
"~dozens of minor 30-second blips for dns "
"~1000 individual machine failures "
"~thousands of hard drive failures "
"slow disks, bad memory, misconfigured machines, flaky machines, etc."
" - Jeff Dean, The Joys of Real Hardware" "\n",
"VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
"ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
"b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
"bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
"cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
"bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
"bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
"ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
"YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
"ZWFsIEhhcmR3YXJlCg" },
{ "I'm the head of the webspam team at Google. "
"That means that if you type your name into Google and get porn back, "
"it's my fault. Unless you're a porn star, in which case porn is a "
"completely reasonable response."
" - Matt Cutts, Google Plus" "\n",
"SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
"VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
"b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
"IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
"Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
"IEdvb2dsZSBQbHVzCg" },
{ "It will still be a long time before machines approach human "
"intelligence. "
"But luckily, machines don't actually have to be intelligent; "
"they just have to fake it. Access to a wealth of information, "
"combined with a rudimentary decision-making capacity, "
"can often be almost as useful. Of course, the results are better yet "
"when coupled with intelligence. A reference librarian with access to "
"a good search engine is a formidable tool."
" - Craig Silverstein, Siemens Pictures of the Future, Spring 2004"
"\n",
"SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
"YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
"aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
"dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
"IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
"ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
"IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
"IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
"bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
"Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
"biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
"NAo" },
// Degenerate edge case
{ "",
"" },
}};
return testcase;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_

View file

@ -0,0 +1,48 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
#include "absl/strings/ascii.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
int memcasecmp(const char* s1, const char* s2, size_t len) {
const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
for (size_t i = 0; i < len; i++) {
unsigned char c1 = us1[i];
unsigned char c2 = us2[i];
// If bytes are the same, they will be the same when converted to lower.
// So we only need to convert if bytes are not equal.
// NOTE(b/308193381): We do not use `absl::ascii_tolower` here in order
// to avoid its lookup table and improve performance.
if (c1 != c2) {
c1 = c1 >= 'A' && c1 <= 'Z' ? c1 - 'A' + 'a' : c1;
c2 = c2 >= 'A' && c2 <= 'Z' ? c2 - 'A' + 'a' : c2;
const int diff = int{c1} - int{c2};
if (diff != 0) return diff;
}
}
return 0;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,40 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
#include <cstddef>
#include <cstring>
#include "absl/base/port.h" // disable some warnings on Windows
#include "absl/strings/ascii.h" // for absl::ascii_tolower
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Performs a byte-by-byte comparison of `len` bytes of the strings `s1` and
// `s2`, ignoring the case of the characters. It returns an integer less than,
// equal to, or greater than zero if `s1` is found, respectively, to be less
// than, to match, or be greater than `s2`.
int memcasecmp(const char* s1, const char* s2, size_t len);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_

View file

@ -0,0 +1,128 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/memutil.h"
#include <algorithm>
#include <cstdlib>
#include "benchmark/benchmark.h"
#include "absl/strings/ascii.h"
// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
// That gives us:
// - an easy search: 'b'
// - a medium search: 'ab'. That means every letter is a possible match.
// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
namespace {
constexpr int kHaystackSize = 10000;
constexpr int64_t kHaystackSize64 = kHaystackSize;
const char* MakeHaystack() {
char* haystack = new char[kHaystackSize];
for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
haystack[kHaystackSize - 1] = 'b';
return haystack;
}
const char* const kHaystack = MakeHaystack();
bool case_eq(const char a, const char b) {
return absl::ascii_tolower(a) == absl::ascii_tolower(b);
}
void BM_Searchcase(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 1,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Searchcase);
void BM_SearchcaseMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize - 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcaseMedium);
void BM_SearchcasePathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystack + kHaystackSize, case_eq));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_SearchcasePathological);
char* memcasechr(const char* s, int c, size_t slen) {
c = absl::ascii_tolower(c);
for (; slen; ++s, --slen) {
if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
}
return nullptr;
}
const char* memcasematch(const char* phaystack, size_t haylen,
const char* pneedle, size_t neelen) {
if (0 == neelen) {
return phaystack; // even if haylen is 0
}
if (haylen < neelen) return nullptr;
const char* match;
const char* hayend = phaystack + haylen - neelen + 1;
while ((match = static_cast<char*>(
memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
return match;
else
phaystack = match + 1;
}
return nullptr;
}
void BM_Memcasematch(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_Memcasematch);
void BM_MemcasematchMedium(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchMedium);
void BM_MemcasematchPathological(benchmark::State& state) {
for (auto _ : state) {
benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
kHaystack + kHaystackSize / 2,
kHaystackSize - kHaystackSize / 2));
}
state.SetBytesProcessed(kHaystackSize64 * state.iterations());
}
BENCHMARK(BM_MemcasematchPathological);
} // namespace

View file

@ -0,0 +1,41 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Unit test for memutil.cc
#include "absl/strings/internal/memutil.h"
#include <cstdlib>
#include "gtest/gtest.h"
namespace {
TEST(MemUtil, memcasecmp) {
// check memutil functions
const char a[] = "hello there";
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
sizeof("hello there") - 1),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 1),
-1);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
sizeof("hello there") - 2),
0);
EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
}
} // namespace

View file

@ -0,0 +1,184 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file contains common things needed by numbers_test.cc,
// numbers_legacy_test.cc and numbers_benchmark.cc.
#ifndef ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#define ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_
#include <array>
#include <cstdint>
#include <limits>
#include <string>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename IntType>
inline bool Itoa(IntType value, int base, std::string* destination) {
destination->clear();
if (base <= 1 || base > 36) {
return false;
}
if (value == 0) {
destination->push_back('0');
return true;
}
bool negative = value < 0;
while (value != 0) {
const IntType next_value = value / base;
// Can't use std::abs here because of problems when IntType is unsigned.
int remainder =
static_cast<int>(value > next_value * base ? value - next_value * base
: next_value * base - value);
char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
destination->insert(0, 1, c);
value = next_value;
}
if (negative) {
destination->insert(0, 1, '-');
}
return true;
}
struct uint32_test_case {
const char* str;
bool expect_ok;
int base; // base to pass to the conversion function
uint32_t expected;
};
inline const std::array<uint32_test_case, 27>& strtouint32_test_cases() {
static const std::array<uint32_test_case, 27> test_cases{{
{"0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{"0x34234324", true, 16, 0x34234324},
{"34234324", true, 16, 0x34234324},
{"0", true, 16, 0},
{" \t\n 0xffffffff", true, 16, (std::numeric_limits<uint32_t>::max)()},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
{" \t\n 72717222", true, 8, 072717222},
{" \t\n 072717222", true, 8, 072717222},
{" \t\n 072717228", false, 8, 07271722},
{"0", true, 0, 0},
// Base-10 version.
{"34234324", true, 0, 34234324},
{"4294967295", true, 0, (std::numeric_limits<uint32_t>::max)()},
{"34234324 \n\t", true, 10, 34234324},
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0}, // would be valid hex, but prefix is missing
{"34234324a", false, 0, 34234324},
{"34234.3", false, 0, 34234},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"4294967296", false, 0, (std::numeric_limits<uint32_t>::max)()},
{"0x100000000", false, 0, (std::numeric_limits<uint32_t>::max)()},
{nullptr, false, 0, 0},
}};
return test_cases;
}
struct uint64_test_case {
const char* str;
bool expect_ok;
int base;
uint64_t expected;
};
inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() {
static const std::array<uint64_test_case, 34> test_cases{{
{"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"3423432448783446", true, 16, int64_t{0x3423432448783446}},
{"0", true, 16, 0},
{"000", true, 0, 0},
{"0", true, 0, 0},
{" \t\n 0xffffffffffffffff", true, 16,
(std::numeric_limits<uint64_t>::max)()},
{"012345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12345670123456701234", true, 8, int64_t{012345670123456701234}},
{"12845670123456701234", false, 8, 0},
// Base-10 version.
{"34234324487834466", true, 0, int64_t{34234324487834466}},
{" \t\n 18446744073709551615", true, 0,
(std::numeric_limits<uint64_t>::max)()},
{"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
{" \f\v 46", true, 10, 46}, // must accept weird whitespace
// Unusual base
{"0", true, 3, 0},
{"2", true, 3, 2},
{"11", true, 3, 4},
{"0", true, 0, 0},
// Invalid uints.
{"", false, 0, 0},
{" ", false, 0, 0},
{"abc", false, 0, 0},
{"34234324487834466a", false, 0, 0},
{"34234487834466.3", false, 0, 0},
{"-1", false, 0, 0},
{" -123", false, 0, 0},
{" \t\n -123", false, 0, 0},
// Out of bounds.
{"18446744073709551616", false, 10, 0},
{"18446744073709551616", false, 0, 0},
{"0x10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 16,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x10000000000000000", false, 0, (std::numeric_limits<uint64_t>::max)()},
{"0X10000000000000000", false, 0,
(std::numeric_limits<uint64_t>::max)()}, // 0X versus 0x.
{"0x1234", true, 16, 0x1234},
// Base-10 string version.
{"1234", true, 0, 1234},
{nullptr, false, 0, 0},
}};
return test_cases;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_

View file

@ -0,0 +1,43 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <cassert>
#include <cstddef>
#include <ios>
#include <streambuf>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
OStringStream::Streambuf::int_type OStringStream::Streambuf::overflow(int c) {
assert(str_);
if (!std::streambuf::traits_type::eq_int_type(
c, std::streambuf::traits_type::eof()))
str_->push_back(static_cast<char>(c));
return 1;
}
std::streamsize OStringStream::Streambuf::xsputn(const char* s,
std::streamsize n) {
assert(str_);
str_->append(s, static_cast<size_t>(n));
return n;
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,114 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
#include <cassert>
#include <ios>
#include <ostream>
#include <streambuf>
#include <string>
#include <utility>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// The same as std::ostringstream but appends to a user-specified std::string,
// and is faster. It is ~70% faster to create, ~50% faster to write to, and
// completely free to extract the result std::string.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42 << ' ' << 3.14; // appends to `s`
//
// The stream object doesn't have to be named. Starting from C++11 operator<<
// works with rvalues of std::ostream.
//
// std::string s;
// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
//
// OStringStream is faster to create than std::ostringstream but it's still
// relatively slow. Avoid creating multiple streams where a single stream will
// do.
//
// Creates unnecessary instances of OStringStream: slow.
//
// std::string s;
// OStringStream(&s) << 42;
// OStringStream(&s) << ' ';
// OStringStream(&s) << 3.14;
//
// Creates a single instance of OStringStream and reuses it: fast.
//
// std::string s;
// OStringStream strm(&s);
// strm << 42;
// strm << ' ';
// strm << 3.14;
//
// Note: flush() has no effect. No reason to call it.
class OStringStream final : public std::ostream {
public:
// The argument can be null, in which case you'll need to call str(p) with a
// non-null argument before you can write to the stream.
//
// The destructor of OStringStream doesn't use the std::string. It's OK to
// destroy the std::string before the stream.
explicit OStringStream(std::string* str)
: std::ostream(&buf_), buf_(str) {}
OStringStream(OStringStream&& that)
: std::ostream(std::move(static_cast<std::ostream&>(that))),
buf_(that.buf_) {
rdbuf(&buf_);
}
OStringStream& operator=(OStringStream&& that) {
std::ostream::operator=(std::move(static_cast<std::ostream&>(that)));
buf_ = that.buf_;
rdbuf(&buf_);
return *this;
}
std::string* str() { return buf_.str(); }
const std::string* str() const { return buf_.str(); }
void str(std::string* str) { buf_.str(str); }
private:
class Streambuf final : public std::streambuf {
public:
explicit Streambuf(std::string* str) : str_(str) {}
Streambuf(const Streambuf&) = default;
Streambuf& operator=(const Streambuf&) = default;
std::string* str() { return str_; }
const std::string* str() const { return str_; }
void str(std::string* str) { str_ = str; }
protected:
int_type overflow(int c) override;
std::streamsize xsputn(const char* s, std::streamsize n) override;
private:
std::string* str_;
} buf_;
};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_

View file

@ -0,0 +1,106 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <sstream>
#include <string>
#include "benchmark/benchmark.h"
namespace {
enum StringType {
kNone,
kStdString,
};
// Benchmarks for std::ostringstream.
template <StringType kOutput>
void BM_StdStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::ostringstream strm;
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = strm.str();
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_StdStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
BENCHMARK_TEMPLATE(BM_StdStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Benchmarks for OStringStream.
template <StringType kOutput>
void BM_CustomStream(benchmark::State& state) {
const int num_writes = state.range(0);
const int bytes_per_write = state.range(1);
const std::string payload(bytes_per_write, 'x');
for (auto _ : state) {
std::string out;
absl::strings_internal::OStringStream strm(&out);
benchmark::DoNotOptimize(strm);
for (int i = 0; i != num_writes; ++i) {
strm << payload;
}
switch (kOutput) {
case kNone: {
break;
}
case kStdString: {
std::string s = out;
benchmark::DoNotOptimize(s);
break;
}
}
}
}
// Create the stream, optionally write to it, then destroy it.
BENCHMARK_TEMPLATE(BM_CustomStream, kNone)
->ArgPair(0, 0)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
// Create the stream, write to it, get std::string out, then destroy.
// It's not useful in practice to extract std::string from OStringStream; we
// measure it for completeness.
BENCHMARK_TEMPLATE(BM_CustomStream, kStdString)
->ArgPair(1, 16) // 16 bytes is small enough for SSO
->ArgPair(1, 256) // 256 bytes requires heap allocation
->ArgPair(1024, 256);
} // namespace

View file

@ -0,0 +1,131 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/ostringstream.h"
#include <ios>
#include <memory>
#include <ostream>
#include <string>
#include <type_traits>
#include <utility>
#include "gtest/gtest.h"
namespace {
TEST(OStringStream, IsOStream) {
static_assert(
std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
"");
}
TEST(OStringStream, ConstructNullptr) {
absl::strings_internal::OStringStream strm(nullptr);
EXPECT_EQ(nullptr, strm.str());
}
TEST(OStringStream, ConstructStr) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ(&s, strm.str());
}
EXPECT_EQ("abc", s);
}
TEST(OStringStream, Destroy) {
std::unique_ptr<std::string> s(new std::string);
absl::strings_internal::OStringStream strm(s.get());
s.reset();
}
TEST(OStringStream, MoveConstruct) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm1(&s);
strm1 << std::hex << 16;
EXPECT_EQ(&s, strm1.str());
absl::strings_internal::OStringStream strm2(std::move(strm1));
strm2 << 16; // We should still be in base 16.
EXPECT_EQ(&s, strm2.str());
}
EXPECT_EQ("abc1010", s);
}
TEST(OStringStream, MoveAssign) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm1(&s);
strm1 << std::hex << 16;
EXPECT_EQ(&s, strm1.str());
absl::strings_internal::OStringStream strm2(nullptr);
strm2 = std::move(strm1);
strm2 << 16; // We should still be in base 16.
EXPECT_EQ(&s, strm2.str());
}
EXPECT_EQ("abc1010", s);
}
TEST(OStringStream, Str) {
std::string s1;
absl::strings_internal::OStringStream strm(&s1);
const absl::strings_internal::OStringStream& c_strm(strm);
static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
strm.str(&s1);
EXPECT_EQ(&s1, strm.str());
EXPECT_EQ(&s1, c_strm.str());
std::string s2;
strm.str(&s2);
EXPECT_EQ(&s2, strm.str());
EXPECT_EQ(&s2, c_strm.str());
strm.str(nullptr);
EXPECT_EQ(nullptr, strm.str());
EXPECT_EQ(nullptr, c_strm.str());
}
TEST(OStreamStream, WriteToLValue) {
std::string s = "abc";
{
absl::strings_internal::OStringStream strm(&s);
EXPECT_EQ("abc", s);
strm << "";
EXPECT_EQ("abc", s);
strm << 42;
EXPECT_EQ("abc42", s);
strm << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
EXPECT_EQ("abc42xy", s);
}
TEST(OStreamStream, WriteToRValue) {
std::string s = "abc";
absl::strings_internal::OStringStream(&s) << "";
EXPECT_EQ("abc", s);
absl::strings_internal::OStringStream(&s) << 42;
EXPECT_EQ("abc42", s);
absl::strings_internal::OStringStream(&s) << 'x' << 'y';
EXPECT_EQ("abc42xy", s);
}
} // namespace

View file

@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
// The exact value of 1e23 falls precisely halfway between two representable
// doubles. Furthermore, the rounding rules we prefer (break ties by rounding
// to the nearest even) dictate in this case that the number should be rounded
// down, but this is not completely specified for floating-point literals in
// C++. (It just says to use the default rounding mode of the standard
// library.) We ensure the result we want by using a number that has an
// unambiguous correctly rounded answer.
constexpr double k1e23 = 9999999999999999e7;
constexpr double kPowersOfTen[] = {
0.0, 1e-323, 1e-322, 1e-321, 1e-320, 1e-319, 1e-318, 1e-317, 1e-316,
1e-315, 1e-314, 1e-313, 1e-312, 1e-311, 1e-310, 1e-309, 1e-308, 1e-307,
1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298,
1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289,
1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280,
1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271,
1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262,
1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253,
1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244,
1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235,
1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226,
1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217,
1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208,
1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199,
1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190,
1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181,
1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172,
1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163,
1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154,
1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145,
1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136,
1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127,
1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118,
1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109,
1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101, 1e-100,
1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91,
1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82,
1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73,
1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64,
1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55,
1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46,
1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37,
1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28,
1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19,
1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10,
1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8,
1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17,
1e+18, 1e+19, 1e+20, 1e+21, 1e+22, k1e23, 1e+24, 1e+25, 1e+26,
1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35,
1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44,
1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53,
1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62,
1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71,
1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89,
1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98,
1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107,
1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116,
1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125,
1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134,
1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143,
1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152,
1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161,
1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170,
1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179,
1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188,
1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197,
1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206,
1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215,
1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224,
1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233,
1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241, 1e+242,
1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251,
1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260,
1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269,
1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278,
1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287,
1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296,
1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305,
1e+306, 1e+307, 1e+308,
};
} // namespace
double Pow10(int exp) {
if (exp < -324) {
return 0.0;
} else if (exp > 308) {
return INFINITY;
} else {
return kPowersOfTen[exp + 324];
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,40 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This test helper library contains a table of powers of 10, to guarantee
// precise values are computed across the full range of doubles. We can't rely
// on the pow() function, because not all standard libraries ship a version
// that is precise.
#ifndef ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#define ABSL_STRINGS_INTERNAL_POW10_HELPER_H_
#include <vector>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// Computes the precise value of 10^exp. (I.e. the nearest representable
// double to the exact value, rounding to nearest-even in the (single) case of
// being exactly halfway between.)
double Pow10(int exp);
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_

View file

@ -0,0 +1,122 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/pow10_helper.h"
#include <cmath>
#include "gtest/gtest.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
namespace {
struct TestCase {
int power; // Testing Pow10(power)
uint64_t significand; // Raw bits of the expected value
int radix; // significand is adjusted by 2^radix
};
TEST(Pow10HelperTest, Works) {
// The logic in pow10_helper.cc is so simple that theoretically we don't even
// need a test. However, we're paranoid and believe that there may be
// compilers that don't round floating-point literals correctly, even though
// it is specified by the standard. We check various edge cases, just to be
// sure.
constexpr TestCase kTestCases[] = {
// Subnormals
{-323, 0x2, -1074},
{-322, 0x14, -1074},
{-321, 0xca, -1074},
{-320, 0x7e8, -1074},
{-319, 0x4f10, -1074},
{-318, 0x316a2, -1074},
{-317, 0x1ee257, -1074},
{-316, 0x134d761, -1074},
{-315, 0xc1069cd, -1074},
{-314, 0x78a42205, -1074},
{-313, 0x4b6695433, -1074},
{-312, 0x2f201d49fb, -1074},
{-311, 0x1d74124e3d1, -1074},
{-310, 0x12688b70e62b, -1074},
{-309, 0xb8157268fdaf, -1074},
{-308, 0x730d67819e8d2, -1074},
// Values that are very close to rounding the other way.
// Comment shows difference of significand from the true value.
{-307, 0x11fa182c40c60d, -1072}, // -.4588
{-290, 0x18f2b061aea072, -1016}, // .4854
{-276, 0x11BA03F5B21000, -969}, // .4709
{-259, 0x1899C2F6732210, -913}, // .4830
{-252, 0x1D53844EE47DD1, -890}, // -.4743
{-227, 0x1E5297287C2F45, -807}, // -.4708
{-198, 0x1322E220A5B17E, -710}, // -.4714
{-195, 0x12B010D3E1CF56, -700}, // .4928
{-192, 0x123FF06EEA847A, -690}, // .4968
{-163, 0x1708D0F84D3DE7, -594}, // -.4977
{-145, 0x13FAAC3E3FA1F3, -534}, // -.4785
{-111, 0x133D4032C2C7F5, -421}, // .4774
{-106, 0x1D5B561574765B, -405}, // -.4869
{-104, 0x16EF5B40C2FC77, -398}, // -.4741
{-88, 0x197683DF2F268D, -345}, // -.4738
{-86, 0x13E497065CD61F, -338}, // .4736
{-76, 0x17288E1271F513, -305}, // -.4761
{-63, 0x1A53FC9631D10D, -262}, // .4929
{-30, 0x14484BFEEBC2A0, -152}, // .4758
{-21, 0x12E3B40A0E9B4F, -122}, // -.4916
{-5, 0x14F8B588E368F1, -69}, // .4829
{23, 0x152D02C7E14AF6, 24}, // -.5000 (exactly, round-to-even)
{29, 0x1431E0FAE6D721, 44}, // -.4870
{34, 0x1ED09BEAD87C03, 60}, // -.4721
{70, 0x172EBAD6DDC73D, 180}, // .4733
{105, 0x1BE7ABD3781ECA, 296}, // -.4850
{126, 0x17A2ECC414A03F, 366}, // -.4999
{130, 0x1CDA62055B2D9E, 379}, // .4855
{165, 0x115D847AD00087, 496}, // -.4913
{172, 0x14B378469B6732, 519}, // .4818
{187, 0x1262DFEEBBB0F9, 569}, // -.4805
{210, 0x18557F31326BBB, 645}, // -.4992
{212, 0x1302CB5E6F642A, 652}, // -.4838
{215, 0x1290BA9A38C7D1, 662}, // -.4881
{236, 0x1F736F9B3494E9, 731}, // .4707
{244, 0x176EC98994F489, 758}, // .4924
{250, 0x1658E3AB795204, 778}, // -.4963
{252, 0x117571DDF6C814, 785}, // .4873
{254, 0x1B4781EAD1989E, 791}, // -.4887
{260, 0x1A03FDE214CAF1, 811}, // .4784
{284, 0x1585041B2C477F, 891}, // .4798
{304, 0x1D2A1BE4048F90, 957}, // -.4987
// Out-of-range values
{-324, 0x0, 0},
{-325, 0x0, 0},
{-326, 0x0, 0},
{309, 1, 2000},
{310, 1, 2000},
{311, 1, 2000},
};
for (const TestCase& test_case : kTestCases) {
EXPECT_EQ(Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix))
<< absl::StrFormat("Failure for Pow10(%d): %a vs %a", test_case.power,
Pow10(test_case.power),
std::ldexp(test_case.significand, test_case.radix));
}
}
} // namespace
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,119 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
#include <algorithm>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/base/port.h"
#include "absl/meta/type_traits.h" // for void_t
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
// In this type trait, we look for a __resize_default_init member function, and
// we use it if available, otherwise, we use resize. We provide HasMember to
// indicate whether __resize_default_init is present.
template <typename string_type, typename = void>
struct ResizeUninitializedTraits {
using HasMember = std::false_type;
static void Resize(string_type* s, size_t new_size) { s->resize(new_size); }
};
// __resize_default_init is provided by libc++ >= 8.0
template <typename string_type>
struct ResizeUninitializedTraits<
string_type, absl::void_t<decltype(std::declval<string_type&>()
.__resize_default_init(237))> > {
using HasMember = std::true_type;
static void Resize(string_type* s, size_t new_size) {
s->__resize_default_init(new_size);
}
};
// Returns true if the std::string implementation supports a resize where
// the new characters added to the std::string are left untouched.
//
// (A better name might be "STLStringSupportsUninitializedResize", alluding to
// the previous function.)
template <typename string_type>
inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
return ResizeUninitializedTraits<string_type>::HasMember::value;
}
// Like str->resize(new_size), except any new characters added to "*str" as a
// result of resizing may be left uninitialized, rather than being filled with
// '0' bytes. Typically used when code is then going to overwrite the backing
// store of the std::string with known data.
template <typename string_type, typename = void>
inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
ResizeUninitializedTraits<string_type>::Resize(s, new_size);
}
// Used to ensure exponential growth so that the amortized complexity of
// increasing the string size by a small amount is O(1), in contrast to
// O(str->size()) in the case of precise growth.
template <typename string_type>
void STLStringReserveAmortized(string_type* s, size_t new_size) {
const size_t cap = s->capacity();
if (new_size > cap) {
// Make sure to always grow by at least a factor of 2x.
s->reserve((std::max)(new_size, 2 * cap));
}
}
// In this type trait, we look for an __append_default_init member function, and
// we use it if available, otherwise, we use append.
template <typename string_type, typename = void>
struct AppendUninitializedTraits {
static void Append(string_type* s, size_t n) {
s->append(n, typename string_type::value_type());
}
};
template <typename string_type>
struct AppendUninitializedTraits<
string_type, absl::void_t<decltype(std::declval<string_type&>()
.__append_default_init(237))> > {
static void Append(string_type* s, size_t n) {
s->__append_default_init(n);
}
};
// Like STLStringResizeUninitialized(str, new_size), except guaranteed to use
// exponential growth so that the amortized complexity of increasing the string
// size by a small amount is O(1), in contrast to O(str->size()) in the case of
// precise growth.
template <typename string_type>
void STLStringResizeUninitializedAmortized(string_type* s, size_t new_size) {
const size_t size = s->size();
if (new_size > size) {
AppendUninitializedTraits<string_type>::Append(s, new_size - size);
} else {
s->erase(new_size);
}
}
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_

View file

@ -0,0 +1,133 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/resize_uninitialized.h"
#include "gtest/gtest.h"
namespace {
int resize_call_count = 0;
int append_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize()/append() methods have been called.
struct resizable_string {
using value_type = char;
size_t size() const { return 0; }
size_t capacity() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
void append(size_t, value_type) { append_call_count += 1; }
void reserve(size_t) {}
resizable_string& erase(size_t = 0, size_t = 0) { return *this; }
};
int resize_default_init_call_count = 0;
int append_default_init_call_count = 0;
// A mock string class whose only purpose is to track how many times its
// resize()/__resize_default_init()/append()/__append_default_init() methods
// have been called.
struct default_init_string {
size_t size() const { return 0; }
size_t capacity() const { return 0; }
char& operator[](size_t) {
static char c = '\0';
return c;
}
void resize(size_t) { resize_call_count += 1; }
void __resize_default_init(size_t) { resize_default_init_call_count += 1; }
void __append_default_init(size_t) { append_default_init_call_count += 1; }
void reserve(size_t) {}
default_init_string& erase(size_t = 0, size_t = 0) { return *this; }
};
TEST(ResizeUninit, WithAndWithout) {
resize_call_count = 0;
append_call_count = 0;
resize_default_init_call_count = 0;
append_default_init_call_count = 0;
{
resizable_string rs;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(append_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_EQ(append_default_init_call_count, 0);
EXPECT_FALSE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(append_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_EQ(append_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
EXPECT_EQ(resize_call_count, 1);
EXPECT_EQ(append_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_EQ(append_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitializedAmortized(&rs, 1000);
EXPECT_EQ(resize_call_count, 1);
EXPECT_EQ(append_call_count, 1);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_EQ(append_default_init_call_count, 0);
}
resize_call_count = 0;
append_call_count = 0;
resize_default_init_call_count = 0;
append_default_init_call_count = 0;
{
default_init_string rus;
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(append_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_EQ(append_default_init_call_count, 0);
EXPECT_TRUE(
absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(append_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 0);
EXPECT_EQ(append_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(append_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 1);
EXPECT_EQ(append_default_init_call_count, 0);
absl::strings_internal::STLStringResizeUninitializedAmortized(&rus, 1000);
EXPECT_EQ(resize_call_count, 0);
EXPECT_EQ(append_call_count, 0);
EXPECT_EQ(resize_default_init_call_count, 1);
EXPECT_EQ(append_default_init_call_count, 1);
}
}
TEST(ResizeUninit, Amortized) {
std::string str;
size_t prev_cap = str.capacity();
int cap_increase_count = 0;
for (int i = 0; i < 1000; ++i) {
absl::strings_internal::STLStringResizeUninitializedAmortized(&str, i);
size_t new_cap = str.capacity();
if (new_cap > prev_cap) ++cap_increase_count;
prev_cap = new_cap;
}
EXPECT_LT(cap_increase_count, 50);
}
} // namespace

View file

@ -0,0 +1,248 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// The file provides the IsStrictlyBaseOfAndConvertibleToSTLContainer type
// trait metafunction to assist in working with the _GLIBCXX_DEBUG debug
// wrappers of STL containers.
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// absl/strings/str_split.h.
//
// IWYU pragma: private, include "absl/strings/str_split.h"
#ifndef ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#define ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_
#include <array>
#include <bitset>
#include <deque>
#include <forward_list>
#include <list>
#include <map>
#include <set>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "absl/meta/type_traits.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace strings_internal {
template <typename C, template <typename...> class T>
struct IsSpecializationImpl : std::false_type {};
template <template <typename...> class T, typename... Args>
struct IsSpecializationImpl<T<Args...>, T> : std::true_type {};
template <typename C, template <typename...> class T>
using IsSpecialization = IsSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsArrayImpl<A<T, N>> : std::is_same<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsArray = IsArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBitsetImpl<B<N>> : std::is_same<B<N>, std::bitset<N>> {};
template <typename C>
using IsBitset = IsBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsSTLContainer
: absl::disjunction<
IsArray<C>, IsBitset<C>, IsSpecialization<C, std::deque>,
IsSpecialization<C, std::forward_list>,
IsSpecialization<C, std::list>, IsSpecialization<C, std::map>,
IsSpecialization<C, std::multimap>, IsSpecialization<C, std::set>,
IsSpecialization<C, std::multiset>,
IsSpecialization<C, std::unordered_map>,
IsSpecialization<C, std::unordered_multimap>,
IsSpecialization<C, std::unordered_set>,
IsSpecialization<C, std::unordered_multiset>,
IsSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsBaseOfSpecializationImpl : std::false_type {};
// IsBaseOfSpecializationImpl needs multiple partial specializations to SFINAE
// on the existence of container dependent types and plug them into the STL
// template.
template <typename C, template <typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_base_of<C,
T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {
};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>> {
};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsBaseOfSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_base_of<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsBaseOfSpecialization = IsBaseOfSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsBaseOfArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsBaseOfArrayImpl<A<T, N>> : std::is_base_of<A<T, N>, std::array<T, N>> {
};
template <typename C>
using IsBaseOfArray = IsBaseOfArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsBaseOfBitsetImpl<B<N>> : std::is_base_of<B<N>, std::bitset<N>> {};
template <typename C>
using IsBaseOfBitset = IsBaseOfBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsBaseOfSTLContainer
: absl::disjunction<IsBaseOfArray<C>, IsBaseOfBitset<C>,
IsBaseOfSpecialization<C, std::deque>,
IsBaseOfSpecialization<C, std::forward_list>,
IsBaseOfSpecialization<C, std::list>,
IsBaseOfSpecialization<C, std::map>,
IsBaseOfSpecialization<C, std::multimap>,
IsBaseOfSpecialization<C, std::set>,
IsBaseOfSpecialization<C, std::multiset>,
IsBaseOfSpecialization<C, std::unordered_map>,
IsBaseOfSpecialization<C, std::unordered_multimap>,
IsBaseOfSpecialization<C, std::unordered_set>,
IsBaseOfSpecialization<C, std::unordered_multiset>,
IsBaseOfSpecialization<C, std::vector>> {};
template <typename C, template <typename...> class T, typename = void>
struct IsConvertibleToSpecializationImpl : std::false_type {};
// IsConvertibleToSpecializationImpl needs multiple partial specializations to
// SFINAE on the existence of container dependent types and plug them into the
// STL template.
template <typename C, template <typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T, absl::void_t<typename C::value_type, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::value_type, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::key_compare,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::key_compare,
typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::mapped_type,
typename C::key_compare, typename C::allocator_type>> {};
template <typename C, template <typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::hasher,
typename C::key_equal, typename C::allocator_type>>
: std::is_convertible<
C, T<typename C::key_type, typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C,
template <typename, typename, typename, typename, typename> class T>
struct IsConvertibleToSpecializationImpl<
C, T,
absl::void_t<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>>
: std::is_convertible<C, T<typename C::key_type, typename C::mapped_type,
typename C::hasher, typename C::key_equal,
typename C::allocator_type>> {};
template <typename C, template <typename...> class T>
using IsConvertibleToSpecialization =
IsConvertibleToSpecializationImpl<absl::decay_t<C>, T>;
template <typename C>
struct IsConvertibleToArrayImpl : std::false_type {};
template <template <typename, size_t> class A, typename T, size_t N>
struct IsConvertibleToArrayImpl<A<T, N>>
: std::is_convertible<A<T, N>, std::array<T, N>> {};
template <typename C>
using IsConvertibleToArray = IsConvertibleToArrayImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToBitsetImpl : std::false_type {};
template <template <size_t> class B, size_t N>
struct IsConvertibleToBitsetImpl<B<N>>
: std::is_convertible<B<N>, std::bitset<N>> {};
template <typename C>
using IsConvertibleToBitset = IsConvertibleToBitsetImpl<absl::decay_t<C>>;
template <typename C>
struct IsConvertibleToSTLContainer
: absl::disjunction<
IsConvertibleToArray<C>, IsConvertibleToBitset<C>,
IsConvertibleToSpecialization<C, std::deque>,
IsConvertibleToSpecialization<C, std::forward_list>,
IsConvertibleToSpecialization<C, std::list>,
IsConvertibleToSpecialization<C, std::map>,
IsConvertibleToSpecialization<C, std::multimap>,
IsConvertibleToSpecialization<C, std::set>,
IsConvertibleToSpecialization<C, std::multiset>,
IsConvertibleToSpecialization<C, std::unordered_map>,
IsConvertibleToSpecialization<C, std::unordered_multimap>,
IsConvertibleToSpecialization<C, std::unordered_set>,
IsConvertibleToSpecialization<C, std::unordered_multiset>,
IsConvertibleToSpecialization<C, std::vector>> {};
template <typename C>
struct IsStrictlyBaseOfAndConvertibleToSTLContainer
: absl::conjunction<absl::negation<IsSTLContainer<C>>,
IsBaseOfSTLContainer<C>,
IsConvertibleToSTLContainer<C>> {};
} // namespace strings_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_

View file

@ -0,0 +1,671 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// POSIX spec:
// http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html
//
#include "absl/strings/internal/str_format/arg.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <cwchar>
#include <string>
#include <type_traits>
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/container/fixed_array.h"
#include "absl/numeric/int128.h"
#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/internal/str_format/float_conversion.h"
#include "absl/strings/numbers.h"
#include "absl/strings/string_view.h"
#if defined(ABSL_HAVE_STD_STRING_VIEW)
#include <string_view>
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
// Reduce *capacity by s.size(), clipped to a 0 minimum.
void ReducePadding(string_view s, size_t *capacity) {
*capacity = Excess(s.size(), *capacity);
}
// Reduce *capacity by n, clipped to a 0 minimum.
void ReducePadding(size_t n, size_t *capacity) {
*capacity = Excess(n, *capacity);
}
template <typename T>
struct MakeUnsigned : std::make_unsigned<T> {};
template <>
struct MakeUnsigned<absl::int128> {
using type = absl::uint128;
};
template <>
struct MakeUnsigned<absl::uint128> {
using type = absl::uint128;
};
template <typename T>
struct IsSigned : std::is_signed<T> {};
template <>
struct IsSigned<absl::int128> : std::true_type {};
template <>
struct IsSigned<absl::uint128> : std::false_type {};
// Integral digit printer.
// Call one of the PrintAs* routines after construction once.
// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results.
class IntDigits {
public:
// Print the unsigned integer as octal.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsOct(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
*--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7));
v >>= 3;
} while (v);
start_ = p;
size_ = static_cast<size_t>(storage_ + sizeof(storage_) - p);
}
// Print the signed or unsigned integer as decimal.
// Supports all integral types.
template <typename T>
void PrintAsDec(T v) {
static_assert(std::is_integral<T>::value, "");
start_ = storage_;
size_ = static_cast<size_t>(numbers_internal::FastIntToBuffer(v, storage_) -
storage_);
}
void PrintAsDec(int128 v) {
auto u = static_cast<uint128>(v);
bool add_neg = false;
if (v < 0) {
add_neg = true;
u = uint128{} - u;
}
PrintAsDec(u, add_neg);
}
void PrintAsDec(uint128 v, bool add_neg = false) {
// This function can be sped up if needed. We can call FastIntToBuffer
// twice, or fix FastIntToBuffer to support uint128.
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
numbers_internal::PutTwoDigits(static_cast<uint32_t>(v % 100), p);
v /= 100;
} while (v);
if (p[0] == '0') {
// We printed one too many hexits.
++p;
}
if (add_neg) {
*--p = '-';
}
size_ = static_cast<size_t>(storage_ + sizeof(storage_) - p);
start_ = p;
}
// Print the unsigned integer as hex using lowercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexLower(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
do {
p -= 2;
constexpr const char* table = numbers_internal::kHexTable;
std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2);
if (sizeof(T) == 1) break;
v >>= 8;
} while (v);
if (p[0] == '0') {
// We printed one too many digits.
++p;
}
start_ = p;
size_ = static_cast<size_t>(storage_ + sizeof(storage_) - p);
}
// Print the unsigned integer as hex using uppercase.
// Supports unsigned integral types and uint128.
template <typename T>
void PrintAsHexUpper(T v) {
static_assert(!IsSigned<T>::value, "");
char *p = storage_ + sizeof(storage_);
// kHexTable is only lowercase, so do it manually for uppercase.
do {
*--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15];
v >>= 4;
} while (v);
start_ = p;
size_ = static_cast<size_t>(storage_ + sizeof(storage_) - p);
}
// The printed value including the '-' sign if available.
// For inputs of value `0`, this will return "0"
string_view with_neg_and_zero() const { return {start_, size_}; }
// The printed value not including the '-' sign.
// For inputs of value `0`, this will return "".
string_view without_neg_or_zero() const {
static_assert('-' < '0', "The check below verifies both.");
size_t advance = start_[0] <= '0' ? 1 : 0;
return {start_ + advance, size_ - advance};
}
bool is_negative() const { return start_[0] == '-'; }
private:
const char *start_;
size_t size_;
// Max size: 128 bit value as octal -> 43 digits, plus sign char
char storage_[128 / 3 + 1 + 1];
};
// Note: 'o' conversions do not have a base indicator, it's just that
// the '#' flag is specified to modify the precision for 'o' conversions.
string_view BaseIndicator(const IntDigits &as_digits,
const FormatConversionSpecImpl conv) {
// always show 0x for %p.
bool alt = conv.has_alt_flag() ||
conv.conversion_char() == FormatConversionCharInternal::p;
bool hex = (conv.conversion_char() == FormatConversionCharInternal::x ||
conv.conversion_char() == FormatConversionCharInternal::X ||
conv.conversion_char() == FormatConversionCharInternal::p);
// From the POSIX description of '#' flag:
// "For x or X conversion specifiers, a non-zero result shall have
// 0x (or 0X) prefixed to it."
if (alt && hex && !as_digits.without_neg_or_zero().empty()) {
return conv.conversion_char() == FormatConversionCharInternal::X ? "0X"
: "0x";
}
return {};
}
string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) {
if (conv.conversion_char() == FormatConversionCharInternal::d ||
conv.conversion_char() == FormatConversionCharInternal::i) {
if (neg) return "-";
if (conv.has_show_pos_flag()) return "+";
if (conv.has_sign_col_flag()) return " ";
}
return {};
}
bool ConvertCharImpl(char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
size_t fill = 0;
if (conv.width() >= 0)
fill = static_cast<size_t>(conv.width());
ReducePadding(1, &fill);
if (!conv.has_left_flag()) sink->Append(fill, ' ');
sink->Append(1, v);
if (conv.has_left_flag()) sink->Append(fill, ' ');
return true;
}
bool ConvertIntImplInnerSlow(const IntDigits &as_digits,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
// Print as a sequence of Substrings:
// [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces]
size_t fill = 0;
if (conv.width() >= 0)
fill = static_cast<size_t>(conv.width());
string_view formatted = as_digits.without_neg_or_zero();
ReducePadding(formatted, &fill);
string_view sign = SignColumn(as_digits.is_negative(), conv);
ReducePadding(sign, &fill);
string_view base_indicator = BaseIndicator(as_digits, conv);
ReducePadding(base_indicator, &fill);
bool precision_specified = conv.precision() >= 0;
size_t precision =
precision_specified ? static_cast<size_t>(conv.precision()) : size_t{1};
if (conv.has_alt_flag() &&
conv.conversion_char() == FormatConversionCharInternal::o) {
// From POSIX description of the '#' (alt) flag:
// "For o conversion, it increases the precision (if necessary) to
// force the first digit of the result to be zero."
if (formatted.empty() || *formatted.begin() != '0') {
size_t needed = formatted.size() + 1;
precision = std::max(precision, needed);
}
}
size_t num_zeroes = Excess(formatted.size(), precision);
ReducePadding(num_zeroes, &fill);
size_t num_left_spaces = !conv.has_left_flag() ? fill : 0;
size_t num_right_spaces = conv.has_left_flag() ? fill : 0;
// From POSIX description of the '0' (zero) flag:
// "For d, i, o, u, x, and X conversion specifiers, if a precision
// is specified, the '0' flag is ignored."
if (!precision_specified && conv.has_zero_flag()) {
num_zeroes += num_left_spaces;
num_left_spaces = 0;
}
sink->Append(num_left_spaces, ' ');
sink->Append(sign);
sink->Append(base_indicator);
sink->Append(num_zeroes, '0');
sink->Append(formatted);
sink->Append(num_right_spaces, ' ');
return true;
}
template <typename T>
bool ConvertFloatArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
if (conv.conversion_char() == FormatConversionCharInternal::v) {
conv.set_conversion_char(FormatConversionCharInternal::g);
}
return FormatConversionCharIsFloat(conv.conversion_char()) &&
ConvertFloatImpl(v, conv, sink);
}
inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
if (conv.is_basic()) {
sink->Append(v);
return true;
}
return sink->PutPaddedString(v, conv.width(), conv.precision(),
conv.has_left_flag());
}
struct ShiftState {
bool saw_high_surrogate = false;
uint8_t bits = 0;
};
// Converts `v` from UTF-16 or UTF-32 to UTF-8 and writes to `buf`. `buf` is
// assumed to have enough space for the output. `s` is used to carry state
// between successive calls with a UTF-16 surrogate pair. Returns the number of
// chars written, or `static_cast<size_t>(-1)` on failure.
//
// This is basically std::wcrtomb(), but always outputting UTF-8 instead of
// respecting the current locale.
inline size_t WideToUtf8(wchar_t wc, char *buf, ShiftState &s) {
const auto v = static_cast<uint32_t>(wc);
if (v < 0x80) {
*buf = static_cast<char>(v);
return 1;
} else if (v < 0x800) {
*buf++ = static_cast<char>(0xc0 | (v >> 6));
*buf = static_cast<char>(0x80 | (v & 0x3f));
return 2;
} else if (v < 0xd800 || (v - 0xe000) < 0x2000) {
*buf++ = static_cast<char>(0xe0 | (v >> 12));
*buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f));
*buf = static_cast<char>(0x80 | (v & 0x3f));
return 3;
} else if ((v - 0x10000) < 0x100000) {
*buf++ = static_cast<char>(0xf0 | (v >> 18));
*buf++ = static_cast<char>(0x80 | ((v >> 12) & 0x3f));
*buf++ = static_cast<char>(0x80 | ((v >> 6) & 0x3f));
*buf = static_cast<char>(0x80 | (v & 0x3f));
return 4;
} else if (v < 0xdc00) {
s.saw_high_surrogate = true;
s.bits = static_cast<uint8_t>(v & 0x3);
const uint8_t high_bits = ((v >> 6) & 0xf) + 1;
*buf++ = static_cast<char>(0xf0 | (high_bits >> 2));
*buf =
static_cast<char>(0x80 | static_cast<uint8_t>((high_bits & 0x3) << 4) |
static_cast<uint8_t>((v >> 2) & 0xf));
return 2;
} else if (v < 0xe000 && s.saw_high_surrogate) {
*buf++ = static_cast<char>(0x80 | static_cast<uint8_t>(s.bits << 4) |
static_cast<uint8_t>((v >> 6) & 0xf));
*buf = static_cast<char>(0x80 | (v & 0x3f));
s.saw_high_surrogate = false;
s.bits = 0;
return 2;
} else {
return static_cast<size_t>(-1);
}
}
inline bool ConvertStringArg(const wchar_t *v,
size_t len,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
FixedArray<char> mb(len * 4);
ShiftState s;
size_t chars_written = 0;
for (size_t i = 0; i < len; ++i) {
const size_t chars = WideToUtf8(v[i], &mb[chars_written], s);
if (chars == static_cast<size_t>(-1)) { return false; }
chars_written += chars;
}
return ConvertStringArg(string_view(mb.data(), chars_written), conv, sink);
}
bool ConvertWCharTImpl(wchar_t v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
char mb[4];
ShiftState s;
const size_t chars_written = WideToUtf8(v, mb, s);
return chars_written != static_cast<size_t>(-1) && !s.saw_high_surrogate &&
ConvertStringArg(string_view(mb, chars_written), conv, sink);
}
} // namespace
bool ConvertBoolArg(bool v, FormatSinkImpl *sink) {
if (v) {
sink->Append("true");
} else {
sink->Append("false");
}
return true;
}
template <typename T>
bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
using U = typename MakeUnsigned<T>::type;
IntDigits as_digits;
// This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes
// it to complain about a switch/case type mismatch, even though both are
// FormatConversionChar. Likely this is because at this point
// FormatConversionChar is declared, but not defined.
switch (static_cast<uint8_t>(conv.conversion_char())) {
case static_cast<uint8_t>(FormatConversionCharInternal::c):
return (std::is_same<T, wchar_t>::value ||
(conv.length_mod() == LengthMod::l))
? ConvertWCharTImpl(static_cast<wchar_t>(v), conv, sink)
: ConvertCharImpl(static_cast<char>(v), conv, sink);
case static_cast<uint8_t>(FormatConversionCharInternal::o):
as_digits.PrintAsOct(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::x):
as_digits.PrintAsHexLower(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::X):
as_digits.PrintAsHexUpper(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::u):
as_digits.PrintAsDec(static_cast<U>(v));
break;
case static_cast<uint8_t>(FormatConversionCharInternal::d):
case static_cast<uint8_t>(FormatConversionCharInternal::i):
case static_cast<uint8_t>(FormatConversionCharInternal::v):
as_digits.PrintAsDec(v);
break;
case static_cast<uint8_t>(FormatConversionCharInternal::a):
case static_cast<uint8_t>(FormatConversionCharInternal::e):
case static_cast<uint8_t>(FormatConversionCharInternal::f):
case static_cast<uint8_t>(FormatConversionCharInternal::g):
case static_cast<uint8_t>(FormatConversionCharInternal::A):
case static_cast<uint8_t>(FormatConversionCharInternal::E):
case static_cast<uint8_t>(FormatConversionCharInternal::F):
case static_cast<uint8_t>(FormatConversionCharInternal::G):
return ConvertFloatImpl(static_cast<double>(v), conv, sink);
default:
ABSL_ASSUME(false);
}
if (conv.is_basic()) {
sink->Append(as_digits.with_neg_and_zero());
return true;
}
return ConvertIntImplInnerSlow(as_digits, conv, sink);
}
template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<signed char>(signed char v,
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<unsigned char>(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<wchar_t>(wchar_t v, FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<short>(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<unsigned short>(unsigned short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<unsigned int>(unsigned int v,
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<long>(long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<long long>(long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
template bool ConvertIntArg<unsigned long long>(unsigned long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl *sink);
// ==================== Strings ====================
StringConvertResult FormatConvertImpl(const std::string &v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
StringConvertResult FormatConvertImpl(const std::wstring &v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v.data(), v.size(), conv, sink)};
}
StringConvertResult FormatConvertImpl(string_view v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertStringArg(v, conv, sink)};
}
#if defined(ABSL_HAVE_STD_STRING_VIEW)
StringConvertResult FormatConvertImpl(std::wstring_view v,
const FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return {ConvertStringArg(v.data(), v.size(), conv, sink)};
}
#endif
StringPtrConvertResult FormatConvertImpl(const char* v,
const FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
if (conv.conversion_char() == FormatConversionCharInternal::p)
return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
size_t len;
if (v == nullptr) {
len = 0;
} else if (conv.precision() < 0) {
len = std::strlen(v);
} else {
// If precision is set, we look for the NUL-terminator on the valid range.
len = static_cast<size_t>(std::find(v, v + conv.precision(), '\0') - v);
}
return {ConvertStringArg(string_view(v, len), conv, sink)};
}
StringPtrConvertResult FormatConvertImpl(const wchar_t* v,
const FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
if (conv.conversion_char() == FormatConversionCharInternal::p) {
return {FormatConvertImpl(VoidPtr(v), conv, sink).value};
}
size_t len;
if (v == nullptr) {
len = 0;
} else if (conv.precision() < 0) {
len = std::wcslen(v);
} else {
// If precision is set, we look for the NUL-terminator on the valid range.
len = static_cast<size_t>(std::find(v, v + conv.precision(), L'\0') - v);
}
return {ConvertStringArg(v, len, conv, sink)};
}
StringPtrConvertResult FormatConvertImpl(std::nullptr_t,
const FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatConvertImpl(static_cast<const char*>(nullptr), conv, sink);
}
// ==================== Raw pointers ====================
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) {
if (!v.value) {
sink->Append("(nil)");
return {true};
}
IntDigits as_digits;
as_digits.PrintAsHexLower(v.value);
return {ConvertIntImplInnerSlow(as_digits, conv, sink)};
}
// ==================== Floats ====================
FloatingConvertResult FormatConvertImpl(float v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
FloatingConvertResult FormatConvertImpl(long double v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertFloatArg(v, conv, sink)};
}
// ==================== Chars ====================
CharConvertResult FormatConvertImpl(char v, const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
CharConvertResult FormatConvertImpl(wchar_t v,
const FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return {ConvertIntArg(v, conv, sink)};
}
// ==================== Ints ====================
IntegralConvertResult FormatConvertImpl(signed char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned char v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(int v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::int128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
IntegralConvertResult FormatConvertImpl(absl::uint128 v,
const FormatConversionSpecImpl conv,
FormatSinkImpl *sink) {
return {ConvertIntArg(v, conv, sink)};
}
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_();
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,671 @@
// Copyright 2020 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_
#include <string.h>
#include <wchar.h>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <type_traits>
#include <utility>
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/has_absl_stringify.h"
#include "absl/strings/internal/str_format/extension.h"
#include "absl/strings/string_view.h"
#if defined(ABSL_HAVE_STD_STRING_VIEW)
#include <string_view>
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
class Cord;
class FormatCountCapture;
class FormatSink;
template <absl::FormatConversionCharSet C>
struct FormatConvertResult;
class FormatConversionSpec;
namespace str_format_internal {
template <FormatConversionCharSet C>
struct ArgConvertResult {
bool value;
};
using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::c,
FormatConversionCharSetInternal::kNumeric,
FormatConversionCharSetInternal::kStar,
FormatConversionCharSetInternal::v)>;
using FloatingConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::kFloating,
FormatConversionCharSetInternal::v)>;
using CharConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::c,
FormatConversionCharSetInternal::kNumeric,
FormatConversionCharSetInternal::kStar)>;
template <typename T, typename = void>
struct HasUserDefinedConvert : std::false_type {};
template <typename T>
struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert(
std::declval<const T&>(),
std::declval<const FormatConversionSpec&>(),
std::declval<FormatSink*>()))>>
: std::true_type {};
// These declarations prevent ADL lookup from continuing in absl namespaces,
// we are deliberately using these as ADL hooks and want them to consider
// non-absl namespaces only.
void AbslFormatConvert();
void AbslStringify();
template <typename T>
bool ConvertIntArg(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
// Forward declarations of internal `ConvertIntArg` function template
// instantiations are here to avoid including the template body in the headers
// and instantiating it in large numbers of translation units. Explicit
// instantiations can be found in "absl/strings/internal/str_format/arg.cc"
extern template bool ConvertIntArg<char>(char v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<signed char>(signed char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<unsigned char>(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<wchar_t>(wchar_t v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<short>(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<unsigned short>( // NOLINT
unsigned short v, FormatConversionSpecImpl conv, // NOLINT
FormatSinkImpl* sink);
extern template bool ConvertIntArg<int>(int v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<unsigned int>(unsigned int v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<long>( // NOLINT
long v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // NOLINT
extern template bool ConvertIntArg<unsigned long>(unsigned long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<long long>(long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
extern template bool ConvertIntArg<unsigned long long>( // NOLINT
unsigned long long v, FormatConversionSpecImpl conv, // NOLINT
FormatSinkImpl* sink);
template <typename T>
auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink)
-> decltype(AbslFormatConvert(v,
std::declval<const FormatConversionSpec&>(),
std::declval<FormatSink*>())) {
using FormatConversionSpecT =
absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatConversionSpec>;
using FormatSinkT =
absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>;
auto fcs = conv.Wrap<FormatConversionSpecT>();
auto fs = sink->Wrap<FormatSinkT>();
return AbslFormatConvert(v, fcs, &fs);
}
template <typename T>
auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink)
-> std::enable_if_t<std::is_enum<T>::value &&
std::is_void<decltype(AbslStringify(
std::declval<FormatSink&>(), v))>::value,
IntegralConvertResult> {
if (conv.conversion_char() == FormatConversionCharInternal::v) {
using FormatSinkT =
absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>;
auto fs = sink->Wrap<FormatSinkT>();
AbslStringify(fs, v);
return {true};
} else {
return {ConvertIntArg(
static_cast<typename std::underlying_type<T>::type>(v), conv, sink)};
}
}
template <typename T>
auto FormatConvertImpl(const T& v, FormatConversionSpecImpl,
FormatSinkImpl* sink)
-> std::enable_if_t<!std::is_enum<T>::value &&
!std::is_same<T, absl::Cord>::value &&
std::is_void<decltype(AbslStringify(
std::declval<FormatSink&>(), v))>::value,
ArgConvertResult<FormatConversionCharSetInternal::v>> {
using FormatSinkT =
absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>;
auto fs = sink->Wrap<FormatSinkT>();
AbslStringify(fs, v);
return {true};
}
template <typename T>
class StreamedWrapper;
// If 'v' can be converted (in the printf sense) according to 'conv',
// then convert it, appending to `sink` and return `true`.
// Otherwise fail and return `false`.
// AbslFormatConvert(v, conv, sink) is intended to be found by ADL on 'v'
// as an extension mechanism. These FormatConvertImpl functions are the default
// implementations.
// The ADL search is augmented via the 'Sink*' parameter, which also
// serves as a disambiguator to reject possible unintended 'AbslFormatConvert'
// functions in the namespaces associated with 'v'.
// Raw pointers.
struct VoidPtr {
VoidPtr() = default;
template <typename T,
decltype(reinterpret_cast<uintptr_t>(std::declval<T*>())) = 0>
VoidPtr(T* ptr) // NOLINT
: value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {}
uintptr_t value;
};
template <FormatConversionCharSet C>
constexpr FormatConversionCharSet ExtractCharSet(FormatConvertResult<C>) {
return C;
}
template <FormatConversionCharSet C>
constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) {
return C;
}
ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl(
VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
// Strings.
using StringConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s,
FormatConversionCharSetInternal::v)>;
StringConvertResult FormatConvertImpl(const std::string& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
StringConvertResult FormatConvertImpl(const std::wstring& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
StringConvertResult FormatConvertImpl(string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
#if defined(ABSL_HAVE_STD_STRING_VIEW)
StringConvertResult FormatConvertImpl(std::wstring_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
#if !defined(ABSL_USES_STD_STRING_VIEW)
inline StringConvertResult FormatConvertImpl(std::string_view v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink);
}
#endif // !ABSL_USES_STD_STRING_VIEW
#endif // ABSL_HAVE_STD_STRING_VIEW
using StringPtrConvertResult = ArgConvertResult<FormatConversionCharSetUnion(
FormatConversionCharSetInternal::s,
FormatConversionCharSetInternal::p)>;
StringPtrConvertResult FormatConvertImpl(const char* v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
StringPtrConvertResult FormatConvertImpl(const wchar_t* v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// This overload is needed to disambiguate, since `nullptr` could match either
// of the other overloads equally well.
StringPtrConvertResult FormatConvertImpl(std::nullptr_t,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
template <class AbslCord, typename std::enable_if<std::is_same<
AbslCord, absl::Cord>::value>::type* = nullptr>
StringConvertResult FormatConvertImpl(const AbslCord& value,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
bool is_left = conv.has_left_flag();
size_t space_remaining = 0;
int width = conv.width();
if (width >= 0) space_remaining = static_cast<size_t>(width);
size_t to_write = value.size();
int precision = conv.precision();
if (precision >= 0)
to_write = (std::min)(to_write, static_cast<size_t>(precision));
space_remaining = Excess(to_write, space_remaining);
if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' ');
for (string_view piece : value.Chunks()) {
if (piece.size() > to_write) {
piece.remove_suffix(piece.size() - to_write);
to_write = 0;
} else {
to_write -= piece.size();
}
sink->Append(piece);
if (to_write == 0) {
break;
}
}
if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' ');
return {true};
}
bool ConvertBoolArg(bool v, FormatSinkImpl* sink);
// Floats.
FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
FloatingConvertResult FormatConvertImpl(long double v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Chars.
CharConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
CharConvertResult FormatConvertImpl(wchar_t v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// Ints.
IntegralConvertResult FormatConvertImpl(signed char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned char v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
IntegralConvertResult FormatConvertImpl(uint128 v,
FormatConversionSpecImpl conv,
FormatSinkImpl* sink);
// This function needs to be a template due to ambiguity regarding type
// conversions.
template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0>
IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
if (conv.conversion_char() == FormatConversionCharInternal::v) {
return {ConvertBoolArg(v, sink)};
}
return FormatConvertImpl(static_cast<int>(v), conv, sink);
}
// We provide this function to help the checker, but it is never defined.
// FormatArgImpl will use the underlying Convert functions instead.
template <typename T>
typename std::enable_if<std::is_enum<T>::value &&
!HasUserDefinedConvert<T>::value &&
!HasAbslStringify<T>::value,
IntegralConvertResult>::type
FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink);
template <typename T>
StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v,
FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
std::ostringstream oss;
oss << v.v_;
if (!oss) return {false};
return str_format_internal::FormatConvertImpl(oss.str(), conv, out);
}
// Use templates and dependent types to delay evaluation of the function
// until after FormatCountCapture is fully defined.
struct FormatCountCaptureHelper {
template <class T = int>
static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v;
if (conv.conversion_char() !=
str_format_internal::FormatConversionCharInternal::n) {
return {false};
}
*v2.p_ = static_cast<int>(sink->size());
return {true};
}
};
template <class T = int>
ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl(
const FormatCountCapture& v, FormatConversionSpecImpl conv,
FormatSinkImpl* sink) {
return FormatCountCaptureHelper::ConvertHelper(v, conv, sink);
}
// Helper friend struct to hide implementation details from the public API of
// FormatArgImpl.
struct FormatArgImplFriend {
template <typename Arg>
static bool ToInt(Arg arg, int* out) {
// A value initialized FormatConversionSpecImpl has a `none` conv, which
// tells the dispatcher to run the `int` conversion.
return arg.dispatcher_(arg.data_, {}, out);
}
template <typename Arg>
static bool Convert(Arg arg, FormatConversionSpecImpl conv,
FormatSinkImpl* out) {
return arg.dispatcher_(arg.data_, conv, out);
}
template <typename Arg>
static typename Arg::Dispatcher GetVTablePtrForTest(Arg arg) {
return arg.dispatcher_;
}
};
template <typename Arg>
constexpr FormatConversionCharSet ArgumentToConv() {
using ConvResult = decltype(str_format_internal::FormatConvertImpl(
std::declval<const Arg&>(),
std::declval<const FormatConversionSpecImpl&>(),
std::declval<FormatSinkImpl*>()));
return absl::str_format_internal::ExtractCharSet(ConvResult{});
}
// A type-erased handle to a format argument.
class FormatArgImpl {
private:
enum { kInlinedSpace = 8 };
using VoidPtr = str_format_internal::VoidPtr;
union Data {
const void* ptr;
const volatile void* volatile_ptr;
char buf[kInlinedSpace];
};
using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out);
template <typename T>
struct store_by_value
: std::integral_constant<bool, (sizeof(T) <= kInlinedSpace) &&
(std::is_integral<T>::value ||
std::is_floating_point<T>::value ||
std::is_pointer<T>::value ||
std::is_same<VoidPtr, T>::value)> {};
enum StoragePolicy { ByPointer, ByVolatilePointer, ByValue };
template <typename T>
struct storage_policy
: std::integral_constant<StoragePolicy,
(std::is_volatile<T>::value
? ByVolatilePointer
: (store_by_value<T>::value ? ByValue
: ByPointer))> {
};
// To reduce the number of vtables we will decay values before hand.
// Anything with a user-defined Convert will get its own vtable.
// For everything else:
// - Decay char* and char arrays into `const char*`
// - Decay wchar_t* and wchar_t arrays into `const wchar_t*`
// - Decay any other pointer to `const void*`
// - Decay all enums to the integral promotion of their underlying type.
// - Decay function pointers to void*.
template <typename T, typename = void>
struct DecayType {
static constexpr bool kHasUserDefined =
str_format_internal::HasUserDefinedConvert<T>::value ||
HasAbslStringify<T>::value;
using type = typename std::conditional<
!kHasUserDefined && std::is_convertible<T, const char*>::value,
const char*,
typename std::conditional<
!kHasUserDefined && std::is_convertible<T, const wchar_t*>::value,
const wchar_t*,
typename std::conditional<
!kHasUserDefined && std::is_convertible<T, VoidPtr>::value,
VoidPtr,
const T&>::type>::type>::type;
};
template <typename T>
struct DecayType<
T, typename std::enable_if<
!str_format_internal::HasUserDefinedConvert<T>::value &&
!HasAbslStringify<T>::value && std::is_enum<T>::value>::type> {
using type = decltype(+typename std::underlying_type<T>::type());
};
public:
template <typename T>
explicit FormatArgImpl(const T& value) {
using D = typename DecayType<T>::type;
static_assert(
std::is_same<D, const T&>::value || storage_policy<D>::value == ByValue,
"Decayed types must be stored by value");
Init(static_cast<D>(value));
}
private:
friend struct str_format_internal::FormatArgImplFriend;
template <typename T, StoragePolicy = storage_policy<T>::value>
struct Manager;
template <typename T>
struct Manager<T, ByPointer> {
static Data SetValue(const T& value) {
Data data;
data.ptr = std::addressof(value);
return data;
}
static const T& Value(Data arg) { return *static_cast<const T*>(arg.ptr); }
};
template <typename T>
struct Manager<T, ByVolatilePointer> {
static Data SetValue(const T& value) {
Data data;
data.volatile_ptr = &value;
return data;
}
static const T& Value(Data arg) {
return *static_cast<const T*>(arg.volatile_ptr);
}
};
template <typename T>
struct Manager<T, ByValue> {
static Data SetValue(const T& value) {
Data data;
memcpy(data.buf, &value, sizeof(value));
return data;
}
static T Value(Data arg) {
T value;
memcpy(&value, arg.buf, sizeof(T));
return value;
}
};
template <typename T>
void Init(const T& value) {
data_ = Manager<T>::SetValue(value);
dispatcher_ = &Dispatch<T>;
}
template <typename T>
static int ToIntVal(const T& val) {
using CommonType = typename std::conditional<std::is_signed<T>::value,
int64_t, uint64_t>::type;
if (static_cast<CommonType>(val) >
static_cast<CommonType>((std::numeric_limits<int>::max)())) {
return (std::numeric_limits<int>::max)();
} else if (std::is_signed<T>::value &&
static_cast<CommonType>(val) <
static_cast<CommonType>((std::numeric_limits<int>::min)())) {
return (std::numeric_limits<int>::min)();
}
return static_cast<int>(val);
}
template <typename T>
static bool ToInt(Data arg, int* out, std::true_type /* is_integral */,
std::false_type) {
*out = ToIntVal(Manager<T>::Value(arg));
return true;
}
template <typename T>
static bool ToInt(Data arg, int* out, std::false_type,
std::true_type /* is_enum */) {
*out = ToIntVal(static_cast<typename std::underlying_type<T>::type>(
Manager<T>::Value(arg)));
return true;
}
template <typename T>
static bool ToInt(Data, int*, std::false_type, std::false_type) {
return false;
}
template <typename T>
static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) {
// A `none` conv indicates that we want the `int` conversion.
if (ABSL_PREDICT_FALSE(spec.conversion_char() ==
FormatConversionCharInternal::kNone)) {
return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(),
std::is_enum<T>());
}
if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(),
spec.conversion_char()))) {
return false;
}
return str_format_internal::FormatConvertImpl(
Manager<T>::Value(arg), spec,
static_cast<FormatSinkImpl*>(out))
.value;
}
Data data_;
Dispatcher dispatcher_;
};
#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \
E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \
void*)
#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_(...) \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(bool, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(signed char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned char, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(short, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned short, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned int, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long, __VA_ARGS__); /* NOLINT */ \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(long double, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const char*, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::string, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(string_view, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(const wchar_t*, __VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::wstring, __VA_ARGS__)
#if defined(ABSL_HAVE_STD_STRING_VIEW)
#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_( \
__VA_ARGS__); \
ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(std::wstring_view, __VA_ARGS__)
#else
#define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_NO_WSTRING_VIEW_(__VA_ARGS__)
#endif
ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern);
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_

View file

@ -0,0 +1,162 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/strings/internal/str_format/arg.h"
#include <limits>
#include <string>
#include "gtest/gtest.h"
#include "absl/base/config.h"
#include "absl/strings/str_format.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace str_format_internal {
namespace {
class FormatArgImplTest : public ::testing::Test {
public:
enum Color { kRed, kGreen, kBlue };
static const char *hi() { return "hi"; }
struct X {};
X x_;
};
inline FormatConvertResult<FormatConversionCharSet{}> AbslFormatConvert(
const FormatArgImplTest::X &, const FormatConversionSpec &, FormatSink *) {
return {false};
}
TEST_F(FormatArgImplTest, ToInt) {
int out = 0;
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out));
EXPECT_EQ(1, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(-1), &out));
EXPECT_EQ(-1, out);
EXPECT_TRUE(
FormatArgImplFriend::ToInt(FormatArgImpl(static_cast<char>(64)), &out));
EXPECT_EQ(64, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>(123456)), &out)); // NOLINT
EXPECT_EQ(123456, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<unsigned long long>( // NOLINT
std::numeric_limits<int>::max()) +
1),
&out));
EXPECT_EQ(std::numeric_limits<int>::max(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<long long>( // NOLINT
std::numeric_limits<int>::min()) -
10),
&out));
EXPECT_EQ(std::numeric_limits<int>::min(), out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(false), &out));
EXPECT_EQ(0, out);
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(true), &out));
EXPECT_EQ(1, out);
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(2.2), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(3.2f), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(
FormatArgImpl(static_cast<int *>(nullptr)), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out));
EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(x_), &out));
EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out));
EXPECT_EQ(2, out);
}
extern const char kMyArray[];
TEST_F(FormatArgImplTest, CharArraysDecayToCharPtr) {
const char* a = "";
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("A")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl("ABC")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyArray)));
}
extern const wchar_t kMyWCharTArray[];
TEST_F(FormatArgImplTest, WCharTArraysDecayToWCharTPtr) {
const wchar_t* a = L"";
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"A")));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(L"ABC")));
EXPECT_EQ(
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(a)),
FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(kMyWCharTArray)));
}
TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) {
auto expected = FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<void *>(nullptr)));
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<int *>(nullptr))),
expected);
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(
FormatArgImpl(static_cast<volatile int *>(nullptr))),
expected);
auto p = static_cast<void (*)()>([] {});
EXPECT_EQ(FormatArgImplFriend::GetVTablePtrForTest(FormatArgImpl(p)),
expected);
}
TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) {
std::string s;
FormatSinkImpl sink(&s);
FormatConversionSpecImpl conv;
FormatConversionSpecImplFriend::SetConversionChar(
FormatConversionCharInternal::s, &conv);
FormatConversionSpecImplFriend::SetFlags(Flags(), &conv);
FormatConversionSpecImplFriend::SetWidth(-1, &conv);
FormatConversionSpecImplFriend::SetPrecision(-1, &conv);
EXPECT_TRUE(
FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink));
sink.Flush();
EXPECT_EQ("ABCDE", s);
}
const char kMyArray[] = "ABCDE";
TEST_F(FormatArgImplTest, WorksWithWCharTArraysOfUnknownSize) {
std::string s;
FormatSinkImpl sink(&s);
FormatConversionSpecImpl conv;
FormatConversionSpecImplFriend::SetConversionChar(
FormatConversionCharInternal::s, &conv);
FormatConversionSpecImplFriend::SetFlags(Flags(), &conv);
FormatConversionSpecImplFriend::SetWidth(-1, &conv);
FormatConversionSpecImplFriend::SetPrecision(-1, &conv);
EXPECT_TRUE(
FormatArgImplFriend::Convert(FormatArgImpl(kMyWCharTArray), conv, &sink));
sink.Flush();
EXPECT_EQ("ABCDE", s);
}
const wchar_t kMyWCharTArray[] = L"ABCDE";
} // namespace
} // namespace str_format_internal
ABSL_NAMESPACE_END
} // namespace absl

Some files were not shown because too many files have changed in this diff Show more