Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,98 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// base::AddressIsReadable() probes an address to see whether it is readable,
// without faulting.
#include "absl/debugging/internal/address_is_readable.h"
#if !defined(__linux__) || defined(__ANDROID__)
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// On platforms other than Linux, just return true.
bool AddressIsReadable(const void* /* addr */) { return true; }
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#else // __linux__ && !__ANDROID__
#include <stdint.h>
#include <syscall.h>
#include <unistd.h>
#include "absl/base/internal/errno_saver.h"
#include "absl/base/internal/raw_logging.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// NOTE: be extra careful about adding any interposable function calls here
// (such as open(), read(), etc.). These symbols may be interposed and will get
// invoked in contexts they don't expect.
//
// NOTE: any new system calls here may also require sandbox reconfiguration.
//
bool AddressIsReadable(const void *addr) {
// rt_sigprocmask below checks 8 contiguous bytes. If addr resides in the
// last 7 bytes of a page (unaligned), rt_sigprocmask would additionally
// check the readability of the next page, which is not desired. Align
// address on 8-byte boundary to check only the current page.
const uintptr_t u_addr = reinterpret_cast<uintptr_t>(addr) & ~uintptr_t{7};
addr = reinterpret_cast<const void *>(u_addr);
// rt_sigprocmask below will succeed for this input.
if (addr == nullptr) return false;
absl::base_internal::ErrnoSaver errno_saver;
// Here we probe with some syscall which
// - accepts an 8-byte region of user memory as input
// - tests for EFAULT before other validation
// - has no problematic side-effects
//
// rt_sigprocmask(2) works for this. It copies sizeof(kernel_sigset_t)==8
// bytes from the address into the kernel memory before any validation.
//
// The call can never succeed, since the `how` parameter is not one of
// SIG_BLOCK, SIG_UNBLOCK, SIG_SETMASK.
//
// This strategy depends on Linux implementation details,
// so we rely on the test to alert us if it stops working.
//
// Some discarded past approaches:
// - msync() doesn't reject PROT_NONE regions
// - write() on /dev/null doesn't return EFAULT
// - write() on a pipe requires creating it and draining the writes
// - connect() works but is problematic for sandboxes and needs a valid
// file descriptor
//
// This can never succeed (invalid first argument to sigprocmask).
ABSL_RAW_CHECK(syscall(SYS_rt_sigprocmask, ~0, addr, nullptr,
/*sizeof(kernel_sigset_t)*/ 8) == -1,
"unexpected success");
ABSL_RAW_CHECK(errno == EFAULT || errno == EINVAL, "unexpected errno");
return errno != EFAULT;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // __linux__ && !__ANDROID__

View file

@ -0,0 +1,32 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_DEBUGGING_INTERNAL_ADDRESS_IS_READABLE_H_
#define ABSL_DEBUGGING_INTERNAL_ADDRESS_IS_READABLE_H_
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// Return whether the byte at *addr is readable, without faulting.
// Save and restores errno.
bool AddressIsReadable(const void *addr);
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_ADDRESS_IS_READABLE_H_

View file

@ -0,0 +1,126 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
#define ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_
#include <cstdint>
#include "absl/base/config.h"
#include "absl/numeric/bits.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// A sequence of up to max_elements integers between 1 and 4 inclusive, whose
// insertion operation computes the sum of all the elements before the insertion
// point. This is useful in decoding Punycode, where one needs to know where in
// a UTF-8 byte stream the n-th code point begins.
//
// BoundedUtf8LengthSequence is async-signal-safe and suitable for use in
// symbolizing stack traces in a signal handler, provided max_elements is not
// improvidently large. For inputs of lengths accepted by the Rust demangler,
// up to a couple hundred code points, InsertAndReturnSumOfPredecessors should
// run in a few dozen clock cycles, on par with the other arithmetic required
// for Punycode decoding.
template <uint32_t max_elements>
class BoundedUtf8LengthSequence {
public:
// Constructs an empty sequence.
BoundedUtf8LengthSequence() = default;
// Inserts `utf_length` at position `index`, shifting any existing elements at
// or beyond `index` one position to the right. If the sequence is already
// full, the rightmost element is discarded.
//
// Returns the sum of the elements at positions 0 to `index - 1` inclusive.
// If `index` is greater than the number of elements already inserted, the
// excess positions in the range count 1 apiece.
//
// REQUIRES: index < max_elements and 1 <= utf8_length <= 4.
uint32_t InsertAndReturnSumOfPredecessors(
uint32_t index, uint32_t utf8_length) {
// The caller shouldn't pass out-of-bounds inputs, but if it does happen,
// clamp the values and try to continue. If we're being called from a
// signal handler, the last thing we want to do is crash. Emitting
// malformed UTF-8 is a lesser evil.
if (index >= max_elements) index = max_elements - 1;
if (utf8_length == 0 || utf8_length > 4) utf8_length = 1;
const uint32_t word_index = index/32;
const uint32_t bit_index = 2 * (index % 32);
const uint64_t ones_bit = uint64_t{1} << bit_index;
// Compute the sum of predecessors.
// - Each value from 1 to 4 is represented by a bit field with value from
// 0 to 3, so the desired sum is index plus the sum of the
// representations actually stored.
// - For each bit field, a set low bit should contribute 1 to the sum, and
// a set high bit should contribute 2.
// - Another way to say the same thing is that each set bit contributes 1,
// and each set high bit contributes an additional 1.
// - So the sum we want is index + popcount(everything) + popcount(bits in
// odd positions).
const uint64_t odd_bits_mask = 0xaaaaaaaaaaaaaaaa;
const uint64_t lower_seminibbles_mask = ones_bit - 1;
const uint64_t higher_seminibbles_mask = ~lower_seminibbles_mask;
const uint64_t same_word_bits_below_insertion =
rep_[word_index] & lower_seminibbles_mask;
int full_popcount = absl::popcount(same_word_bits_below_insertion);
int odd_popcount =
absl::popcount(same_word_bits_below_insertion & odd_bits_mask);
for (uint32_t j = word_index; j > 0; --j) {
const uint64_t word_below_insertion = rep_[j - 1];
full_popcount += absl::popcount(word_below_insertion);
odd_popcount += absl::popcount(word_below_insertion & odd_bits_mask);
}
const uint32_t sum_of_predecessors =
index + static_cast<uint32_t>(full_popcount + odd_popcount);
// Now insert utf8_length's representation, shifting successors up one
// place.
for (uint32_t j = max_elements/32 - 1; j > word_index; --j) {
rep_[j] = (rep_[j] << 2) | (rep_[j - 1] >> 62);
}
rep_[word_index] =
(rep_[word_index] & lower_seminibbles_mask) |
(uint64_t{utf8_length - 1} << bit_index) |
((rep_[word_index] & higher_seminibbles_mask) << 2);
return sum_of_predecessors;
}
private:
// If the (32 * i + j)-th element of the represented sequence has the value k
// (0 <= j < 32, 1 <= k <= 4), then bits 2 * j and 2 * j + 1 of rep_[i]
// contain the seminibble (k - 1).
//
// In particular, the zero-initialization of rep_ makes positions not holding
// any inserted element count as 1 in InsertAndReturnSumOfPredecessors.
//
// Example: rep_ = {0xb1, ... the rest zeroes ...} represents the sequence
// (2, 1, 4, 3, ... the rest 1's ...). Constructing the sequence of Unicode
// code points "Àa🂻中" = {U+00C0, U+0061, U+1F0BB, U+4E2D} (among many
// other examples) would yield this value of rep_.
static_assert(max_elements > 0 && max_elements % 32 == 0,
"max_elements must be a positive multiple of 32");
uint64_t rep_[max_elements/32] = {};
};
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_BOUNDED_UTF8_LENGTH_SEQUENCE_H_

View file

@ -0,0 +1,126 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/bounded_utf8_length_sequence.h"
#include <cstdint>
#include "gtest/gtest.h"
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfOneCorrectly) {
BoundedUtf8LengthSequence<32> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 1);
}
TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfTwoCorrectly) {
BoundedUtf8LengthSequence<32> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0);
EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 2);
}
TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfThreeCorrectly) {
BoundedUtf8LengthSequence<32> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 3), 0);
EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 3);
}
TEST(BoundedUtf8LengthSequenceTest, RemembersAValueOfFourCorrectly) {
BoundedUtf8LengthSequence<32> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 4), 0);
EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 4);
}
TEST(BoundedUtf8LengthSequenceTest, RemembersSeveralAppendedValues) {
BoundedUtf8LengthSequence<32> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 4), 1);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(2, 2), 5);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(3, 3), 7);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(4, 1), 10);
}
TEST(BoundedUtf8LengthSequenceTest, RemembersSeveralPrependedValues) {
BoundedUtf8LengthSequence<32> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 4), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 3), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(4, 1), 10);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(3, 1), 6);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(2, 1), 3);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(1, 1), 1);
}
TEST(BoundedUtf8LengthSequenceTest, RepeatedInsertsShiftValuesOutTheRightEnd) {
BoundedUtf8LengthSequence<32> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 2), 0);
for (uint32_t i = 1; i < 31; ++i) {
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0)
<< "while moving the 2 into position " << i;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 1), 32)
<< "after moving the 2 into position " << i;
}
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0)
<< "while moving the 2 into position 31";
EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 1), 31)
<< "after moving the 2 into position 31";
}
TEST(BoundedUtf8LengthSequenceTest, InsertsIntoWord1LeaveWord0Untouched) {
BoundedUtf8LengthSequence<64> seq;
for (uint32_t i = 0; i < 32; ++i) {
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(i, 2), 2 * i)
<< "at index " << i;
}
EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 64);
EXPECT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 64);
}
TEST(BoundedUtf8LengthSequenceTest, InsertsIntoWord0ShiftValuesIntoWord1) {
BoundedUtf8LengthSequence<64> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(29, 2), 29);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(30, 3), 31);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 4), 34);
// Pushing two 1's on the front moves the 3 and 4 into the high word.
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(34, 1), 31 + 2 + 3 + 4);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 31 + 2);
}
TEST(BoundedUtf8LengthSequenceTest, ValuesAreShiftedCorrectlyAmongThreeWords) {
BoundedUtf8LengthSequence<96> seq;
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(31, 3), 31);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(63, 4), 62 + 3);
// This insertion moves both the 3 and the 4 up a word.
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(0, 1), 0);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(65, 1), 63 + 3 + 4);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(64, 1), 63 + 3);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(33, 1), 32 + 3);
ASSERT_EQ(seq.InsertAndReturnSumOfPredecessors(32, 1), 32);
}
} // namespace
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,258 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/decode_rust_punycode.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "absl/base/config.h"
#include "absl/base/nullability.h"
#include "absl/debugging/internal/bounded_utf8_length_sequence.h"
#include "absl/debugging/internal/utf8_for_code_point.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
// Decoding Punycode requires repeated random-access insertion into a stream of
// variable-length UTF-8 code-point encodings. We need this to be tolerably
// fast (no N^2 slowdown for unfortunate inputs), and we can't allocate any data
// structures on the heap (async-signal-safety).
//
// It is pragmatic to impose a moderately low limit on the identifier length and
// bail out if we ever hit it. Then BoundedUtf8LengthSequence efficiently
// determines where to insert the next code point, and memmove efficiently makes
// room for it.
//
// The chosen limit is a round number several times larger than identifiers
// expected in practice, yet still small enough that a memmove of this many
// UTF-8 characters is not much more expensive than the division and modulus
// operations that Punycode decoding requires.
constexpr uint32_t kMaxChars = 256;
// Constants from RFC 3492 section 5.
constexpr uint32_t kBase = 36, kTMin = 1, kTMax = 26, kSkew = 38, kDamp = 700;
constexpr uint32_t kMaxCodePoint = 0x10ffff;
// Overflow threshold in DecodeRustPunycode's inner loop; see comments there.
constexpr uint32_t kMaxI = 1 << 30;
// If punycode_begin .. punycode_end begins with a prefix matching the regular
// expression [0-9a-zA-Z_]+_, removes that prefix, copies all but the final
// underscore into out_begin .. out_end, sets num_ascii_chars to the number of
// bytes copied, and returns true. (A prefix of this sort represents the
// nonempty subsequence of ASCII characters in the corresponding plaintext.)
//
// If punycode_begin .. punycode_end does not contain an underscore, sets
// num_ascii_chars to zero and returns true. (The encoding of a plaintext
// without any ASCII characters does not carry such a prefix.)
//
// Returns false and zeroes num_ascii_chars on failure (either parse error or
// not enough space in the output buffer).
bool ConsumeOptionalAsciiPrefix(const char*& punycode_begin,
const char* const punycode_end,
char* const out_begin,
char* const out_end,
uint32_t& num_ascii_chars) {
num_ascii_chars = 0;
// Remember the last underscore if any. Also use the same string scan to
// reject any ASCII bytes that do not belong in an identifier, including NUL,
// as well as non-ASCII bytes, which should have been delta-encoded instead.
int last_underscore = -1;
for (int i = 0; i < punycode_end - punycode_begin; ++i) {
const char c = punycode_begin[i];
if (c == '_') {
last_underscore = i;
continue;
}
// We write out the meaning of absl::ascii_isalnum rather than call that
// function because its documentation does not promise it will remain
// async-signal-safe under future development.
if ('a' <= c && c <= 'z') continue;
if ('A' <= c && c <= 'Z') continue;
if ('0' <= c && c <= '9') continue;
return false;
}
// If there was no underscore, that means there were no ASCII characters in
// the plaintext, so there is no prefix to consume. Our work is done.
if (last_underscore < 0) return true;
// Otherwise there will be an underscore delimiter somewhere. It can't be
// initial because then there would be no ASCII characters to its left, and no
// delimiter would have been added in that case.
if (last_underscore == 0) return false;
// Any other position is reasonable. Make sure there's room in the buffer.
if (last_underscore + 1 > out_end - out_begin) return false;
// Consume and write out the ASCII characters.
num_ascii_chars = static_cast<uint32_t>(last_underscore);
std::memcpy(out_begin, punycode_begin, num_ascii_chars);
out_begin[num_ascii_chars] = '\0';
punycode_begin += num_ascii_chars + 1;
return true;
}
// Returns the value of `c` as a base-36 digit according to RFC 3492 section 5,
// or -1 if `c` is not such a digit.
int DigitValue(char c) {
if ('0' <= c && c <= '9') return c - '0' + 26;
if ('a' <= c && c <= 'z') return c - 'a';
if ('A' <= c && c <= 'Z') return c - 'A';
return -1;
}
// Consumes the next delta encoding from punycode_begin .. punycode_end,
// updating i accordingly. Returns true on success. Returns false on parse
// failure or arithmetic overflow.
bool ScanNextDelta(const char*& punycode_begin, const char* const punycode_end,
uint32_t bias, uint32_t& i) {
uint64_t w = 1; // 64 bits to prevent overflow in w *= kBase - t
// "for k = base to infinity in steps of base do begin ... end" in RFC 3492
// section 6.2. Each loop iteration scans one digit of the delta.
for (uint32_t k = kBase; punycode_begin != punycode_end; k += kBase) {
const int digit_value = DigitValue(*punycode_begin++);
if (digit_value < 0) return false;
// Compute this in 64-bit arithmetic so we can check for overflow afterward.
const uint64_t new_i = i + static_cast<uint64_t>(digit_value) * w;
// Valid deltas are bounded by (#chars already emitted) * kMaxCodePoint, but
// invalid input could encode an arbitrarily large delta. Nip that in the
// bud here.
static_assert(
kMaxI >= kMaxChars * kMaxCodePoint,
"kMaxI is too small to prevent spurious failures on good input");
if (new_i > kMaxI) return false;
static_assert(
kMaxI < (uint64_t{1} << 32),
"Make kMaxI smaller or i 64 bits wide to prevent silent wraparound");
i = static_cast<uint32_t>(new_i);
// Compute the threshold that determines whether this is the last digit and
// (if not) what the next digit's place value will be. This logic from RFC
// 3492 section 6.2 is explained in section 3.3.
uint32_t t;
if (k <= bias + kTMin) {
t = kTMin;
} else if (k >= bias + kTMax) {
t = kTMax;
} else {
t = k - bias;
}
if (static_cast<uint32_t>(digit_value) < t) return true;
// If this gets too large, the range check on new_i in the next iteration
// will catch it. We know this multiplication will not overwrap because w
// is 64 bits wide.
w *= kBase - t;
}
return false;
}
} // namespace
absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options) {
const char* punycode_begin = options.punycode_begin;
const char* const punycode_end = options.punycode_end;
char* const out_begin = options.out_begin;
char* const out_end = options.out_end;
// Write a NUL terminator first. Later memcpy calls will keep bumping it
// along to its new right place.
const size_t out_size = static_cast<size_t>(out_end - out_begin);
if (out_size == 0) return nullptr;
*out_begin = '\0';
// RFC 3492 section 6.2 begins here. We retain the names of integer variables
// appearing in that text.
uint32_t n = 128, i = 0, bias = 72, num_chars = 0;
// If there are any ASCII characters, consume them and their trailing
// underscore delimiter.
if (!ConsumeOptionalAsciiPrefix(punycode_begin, punycode_end,
out_begin, out_end, num_chars)) {
return nullptr;
}
uint32_t total_utf8_bytes = num_chars;
BoundedUtf8LengthSequence<kMaxChars> utf8_lengths;
// "while the input is not exhausted do begin ... end"
while (punycode_begin != punycode_end) {
if (num_chars >= kMaxChars) return nullptr;
const uint32_t old_i = i;
if (!ScanNextDelta(punycode_begin, punycode_end, bias, i)) return nullptr;
// Update bias as in RFC 3492 section 6.1. (We have inlined adapt.)
uint32_t delta = i - old_i;
delta /= (old_i == 0 ? kDamp : 2);
delta += delta/(num_chars + 1);
bias = 0;
while (delta > ((kBase - kTMin) * kTMax)/2) {
delta /= kBase - kTMin;
bias += kBase;
}
bias += ((kBase - kTMin + 1) * delta)/(delta + kSkew);
// Back in section 6.2, compute the new code point and insertion index.
static_assert(
kMaxI + kMaxCodePoint < (uint64_t{1} << 32),
"Make kMaxI smaller or n 64 bits wide to prevent silent wraparound");
n += i/(num_chars + 1);
i %= num_chars + 1;
// To actually insert, we need to convert the code point n to UTF-8 and the
// character index i to an index into the byte stream emitted so far. First
// prepare the UTF-8 encoding for n, rejecting surrogates, overlarge values,
// and anything that won't fit into the remaining output storage.
Utf8ForCodePoint utf8_for_code_point(n);
if (!utf8_for_code_point.ok()) return nullptr;
if (total_utf8_bytes + utf8_for_code_point.length + 1 > out_size) {
return nullptr;
}
// Now insert the new character into both our length map and the output.
uint32_t n_index =
utf8_lengths.InsertAndReturnSumOfPredecessors(
i, utf8_for_code_point.length);
std::memmove(
out_begin + n_index + utf8_for_code_point.length, out_begin + n_index,
total_utf8_bytes + 1 - n_index);
std::memcpy(out_begin + n_index, utf8_for_code_point.bytes,
utf8_for_code_point.length);
total_utf8_bytes += utf8_for_code_point.length;
++num_chars;
// Finally, advance to the next state before continuing.
++i;
}
return out_begin + total_utf8_bytes;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,55 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
#define ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_
#include "absl/base/config.h"
#include "absl/base/nullability.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
struct DecodeRustPunycodeOptions {
const char* punycode_begin;
const char* punycode_end;
char* out_begin;
char* out_end;
};
// Given Rust Punycode in `punycode_begin .. punycode_end`, writes the
// corresponding UTF-8 plaintext into `out_begin .. out_end`, followed by a NUL
// character, and returns a pointer to that final NUL on success. On failure
// returns a null pointer, and the contents of `out_begin .. out_end` are
// unspecified.
//
// Failure occurs in precisely these cases:
// - Any input byte does not match [0-9a-zA-Z_].
// - The first input byte is an underscore, but no other underscore appears in
// the input.
// - The delta sequence does not represent a valid sequence of code-point
// insertions.
// - The plaintext would contain more than 256 code points.
//
// DecodeRustPunycode is async-signal-safe with bounded runtime and a small
// stack footprint, making it suitable for use in demangling Rust symbol names
// from a signal handler.
absl::Nullable<char*> DecodeRustPunycode(DecodeRustPunycodeOptions options);
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_DECODE_RUST_PUNYCODE_H_

View file

@ -0,0 +1,606 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/decode_rust_punycode.h"
#include <cstddef>
#include <cstring>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
using ::testing::AllOf;
using ::testing::Eq;
using ::testing::IsNull;
using ::testing::Pointee;
using ::testing::ResultOf;
using ::testing::StrEq;
class DecodeRustPunycodeTest : public ::testing::Test {
protected:
void FillBufferWithNonzeroBytes() {
// The choice of nonzero value to fill with is arbitrary. The point is just
// to fail tests if DecodeRustPunycode forgets to write the final NUL
// character.
std::memset(buffer_storage_, 0xab, sizeof(buffer_storage_));
}
DecodeRustPunycodeOptions WithAmpleSpace() {
FillBufferWithNonzeroBytes();
DecodeRustPunycodeOptions options;
options.punycode_begin = punycode_.data();
options.punycode_end = punycode_.data() + punycode_.size();
options.out_begin = buffer_storage_;
options.out_end = buffer_storage_ + sizeof(buffer_storage_);
return options;
}
DecodeRustPunycodeOptions WithJustEnoughSpace() {
FillBufferWithNonzeroBytes();
const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size() - 1;
DecodeRustPunycodeOptions options;
options.punycode_begin = punycode_.data();
options.punycode_end = punycode_.data() + punycode_.size();
options.out_begin = buffer_storage_ + begin_offset;
options.out_end = buffer_storage_ + sizeof(buffer_storage_);
return options;
}
DecodeRustPunycodeOptions WithOneByteTooFew() {
FillBufferWithNonzeroBytes();
const size_t begin_offset = sizeof(buffer_storage_) - plaintext_.size();
DecodeRustPunycodeOptions options;
options.punycode_begin = punycode_.data();
options.punycode_end = punycode_.data() + punycode_.size();
options.out_begin = buffer_storage_ + begin_offset;
options.out_end = buffer_storage_ + sizeof(buffer_storage_);
return options;
}
// Matches a correct return value of DecodeRustPunycode when `golden` is the
// expected plaintext output.
auto PointsToTheNulAfter(const std::string& golden) {
const size_t golden_size = golden.size();
return AllOf(
Pointee(Eq('\0')),
ResultOf("preceding string body",
[golden_size](const char* p) { return p - golden_size; },
StrEq(golden)));
}
std::string punycode_;
std::string plaintext_;
char buffer_storage_[1024];
};
TEST_F(DecodeRustPunycodeTest, MapsEmptyToEmpty) {
punycode_ = "";
plaintext_ = "";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest,
StripsTheTrailingDelimiterFromAPureRunOfBasicChars) {
punycode_ = "foo_";
plaintext_ = "foo";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, TreatsTheLastUnderscoreAsTheDelimiter) {
punycode_ = "foo_bar_";
plaintext_ = "foo_bar";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsALeadingUnderscoreIfNotTheDelimiter) {
punycode_ = "_foo_";
plaintext_ = "_foo";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsALeadingUnderscoreDelimiter) {
punycode_ = "_foo";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsEmbeddedNul) {
punycode_ = std::string("foo\0bar_", 8);
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsAsciiCharsOtherThanIdentifierChars) {
punycode_ = "foo\007_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "foo-_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "foo;_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "foo\177_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsRawNonAsciiChars) {
punycode_ = "\x80";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "\x80_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "\xff";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "\xff_";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RecognizesU0080) {
// a encodes 0, so the output is the smallest non-ASCII code point standing
// alone. (U+0080 PAD is not an identifier character, but DecodeRustPunycode
// does not check whether non-ASCII characters could belong to an identifier.)
punycode_ = "a";
plaintext_ = "\xc2\x80";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, OneByteDeltaSequencesMustBeA) {
// Because bias = 72 for the first code point, any digit but a/A is nonfinal
// in one of the first two bytes of a delta sequence.
punycode_ = "b";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "z";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "0";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "9";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsDeltaSequenceBA) {
punycode_ = "ba";
plaintext_ = "\xc2\x81";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsOtherDeltaSequencesWithSecondByteA) {
punycode_ = "ca";
plaintext_ = "\xc2\x82";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "za";
plaintext_ = "\xc2\x99";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "0a";
plaintext_ = "\xc2\x9a";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "1a";
plaintext_ = "\xc2\x9b";
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "9a";
plaintext_ = "£"; // Pound sign, U+00A3
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
}
TEST_F(DecodeRustPunycodeTest, RejectsDeltaWhereTheSecondAndLastDigitIsNotA) {
punycode_ = "bb";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "zz";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "00";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
punycode_ = "99";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsDeltasWithSecondByteBFollowedByA) {
punycode_ = "bba";
plaintext_ = "¤"; // U+00A4
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "cba";
plaintext_ = "¥"; // U+00A5
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "zba";
plaintext_ = "¼"; // U+00BC
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "0ba";
plaintext_ = "½"; // U+00BD
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "1ba";
plaintext_ = "¾"; // U+00BE
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
punycode_ = "9ba";
plaintext_ = "Æ"; // U+00C6
EXPECT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
}
// Tests beyond this point use characters allowed in identifiers, so you can
// prepend _RNvC1cu<decimal length><underscore if [0-9_] follows> to a test
// input and run it through another Rust demangler to verify that the
// corresponding golden output is correct.
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAlone) {
punycode_ = "0ca";
plaintext_ = "à";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharBeforeBasicChars) {
punycode_ = "_la_mode_yya";
plaintext_ = "à_la_mode";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAmidBasicChars) {
punycode_ = "verre__vin_m4a";
plaintext_ = "verre_à_vin";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsTwoByteCharAfterBasicChars) {
punycode_ = "belt_3na";
plaintext_ = "beltà";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedTwoByteChar) {
punycode_ = "0caaaa";
plaintext_ = "àààà";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsInOrder) {
punycode_ = "3camsuz";
plaintext_ = "ãéïôù";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyTwoByteCharsOutOfOrder) {
punycode_ = "3caltsx";
plaintext_ = "ùéôãï";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharAlone) {
punycode_ = "fiq";
plaintext_ = "";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedThreeByteChar) {
punycode_ = "fiqaaaa";
plaintext_ = "中中中中中";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsThreeByteCharsInOrder) {
punycode_ = "fiq228c";
plaintext_ = "中文";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyThreeByteCharsOutOfOrder) {
punycode_ = "fiq128c";
plaintext_ = "文中";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAlone) {
punycode_ = "uy7h";
plaintext_ = "🂻";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharBeforeBasicChars) {
punycode_ = "jack__uh63d";
plaintext_ = "jack_🂻";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAmidBasicChars) {
punycode_ = "jack__of_hearts_ki37n";
plaintext_ = "jack_🂻_of_hearts";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsFourByteCharAfterBasicChars) {
punycode_ = "_of_hearts_kz45i";
plaintext_ = "🂻_of_hearts";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsRepeatedFourByteChar) {
punycode_ = "uy7haaaa";
plaintext_ = "🂻🂻🂻🂻🂻";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsInOrder) {
punycode_ = "8x7hcjmf";
plaintext_ = "🂦🂧🂪🂭🂮";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsNearbyFourByteCharsOutOfOrder) {
punycode_ = "8x7hcild";
plaintext_ = "🂮🂦🂭🂪🂧";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, AcceptsAMixtureOfByteLengths) {
punycode_ = "3caltsx2079ivf8aiuy7cja3a6ak";
plaintext_ = "ùéôãï中文🂮🂦🂭🂪🂧";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
TEST_F(DecodeRustPunycodeTest, RejectsOverlargeDeltas) {
punycode_ = "123456789a";
EXPECT_THAT(DecodeRustPunycode(WithAmpleSpace()), IsNull());
}
// Finally, we test on a few prose and poetry snippets as a defense in depth.
// If our artificial short test inputs did not exercise a bug that is tickled by
// patterns typical of real human writing, maybe real human writing will catch
// that.
//
// These test inputs are extracted from texts old enough to be out of copyright
// that probe a variety of ranges of code-point space. All are longer than 32
// code points, so they exercise the carrying of seminibbles from one uint64_t
// to the next higher one in BoundedUtf8LengthSequence.
// The first three lines of the Old English epic _Beowulf_, mostly ASCII with a
// few archaic two-byte letters interspersed.
TEST_F(DecodeRustPunycodeTest, Beowulf) {
punycode_ = "hwt_we_gardena_in_geardagum_"
"eodcyninga_rym_gefrunon_"
"hu_a_elingas_ellen_fremedon_hxg9c70do9alau";
plaintext_ = "hwæt_we_gardena_in_geardagum_"
"þeodcyninga_þrym_gefrunon_"
"hu_ða_æþelingas_ellen_fremedon";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
// The whole of 過故人莊 by the 8th-century Chinese poet 孟浩然
// (Meng Haoran), exercising three-byte-character processing.
TEST_F(DecodeRustPunycodeTest, MengHaoran) {
punycode_ = "gmq4ss0cfvao1e2wg8mcw8b0wkl9a7tt90a8riuvbk7t8kbv9a66ogofvzlf6"
"3d01ybn1u28dyqi5q2cxyyxnk5d2gx1ks9ddvfm17bk6gbsd6wftrav60u4ta";
plaintext_ = "故人具雞黍" "邀我至田家"
"綠樹村邊合" "青山郭外斜"
"開軒面場圃" "把酒話桑麻"
"待到重陽日" "還來就菊花";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
// A poem of the 8th-century Japanese poet 山上憶良 (Yamanoue no Okura).
// Japanese mixes two-byte and three-byte characters: a good workout for codecs.
TEST_F(DecodeRustPunycodeTest, YamanoueNoOkura) {
punycode_ = "48jdaa3a6ccpepjrsmlb0q4bwcdtid8fg6c0cai9822utqeruk3om0u4f2wbp0"
"em23do0op23cc2ff70mb6tae8aq759gja";
plaintext_ = "瓜食めば"
"子ども思ほゆ"
"栗食めば"
"まして偲はゆ"
"何処より"
"来りしものそ"
"眼交に"
"もとな懸りて"
"安眠し寝さぬ";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
// The first two lines of the Phoenician-language inscription on the sarcophagus
// of Eshmunazar II of Sidon, 6th century BCE. Phoenician and many other
// archaic scripts are allocated in the Supplemental Multilingual Plane (U+10000
// through U+1FFFF) and thus exercise four-byte-character processing.
TEST_F(DecodeRustPunycodeTest, EshmunazarSarcophagus) {
punycode_ = "wj9caaabaabbaaohcacxvhdc7bgxbccbdcjeacddcedcdlddbdbddcdbdcknfcee"
"ifel8del2a7inq9fhcpxikms7a4a9ac9ataaa0g";
plaintext_ = "𐤁𐤉𐤓𐤇𐤁𐤋𐤁𐤔𐤍𐤕𐤏𐤎𐤓"
"𐤅𐤀𐤓𐤁𐤏𐤗𐤖𐤖𐤖𐤖𐤋𐤌𐤋𐤊𐤉𐤌𐤋𐤊"
"𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊𐤑𐤃𐤍𐤌"
"𐤁𐤍𐤌𐤋𐤊𐤕𐤁𐤍𐤕𐤌𐤋𐤊𐤑𐤃𐤍𐤌"
"𐤃𐤁𐤓𐤌𐤋𐤊𐤀𐤔𐤌𐤍𐤏𐤆𐤓𐤌𐤋𐤊"
"𐤑𐤃𐤍𐤌𐤋𐤀𐤌𐤓𐤍𐤂𐤆𐤋𐤕";
ASSERT_THAT(DecodeRustPunycode(WithAmpleSpace()),
PointsToTheNulAfter(plaintext_));
ASSERT_THAT(DecodeRustPunycode(WithJustEnoughSpace()),
PointsToTheNulAfter(plaintext_));
EXPECT_THAT(DecodeRustPunycode(WithOneByteTooFew()), IsNull());
}
} // namespace
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_DEBUGGING_INTERNAL_DEMANGLE_H_
#define ABSL_DEBUGGING_INTERNAL_DEMANGLE_H_
#include <string>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// Demangle `mangled`. On success, return true and write the
// demangled symbol name to `out`. Otherwise, return false.
// `out` is modified even if demangling is unsuccessful.
//
// This function provides an alternative to libstdc++'s abi::__cxa_demangle,
// which is not async signal safe (it uses malloc internally). It's intended to
// be used in async signal handlers to symbolize stack traces.
//
// Note that this demangler doesn't support full demangling. More
// specifically, it doesn't print types of function parameters and
// types of template arguments. It just skips them. However, it's
// still very useful to extract basic information such as class,
// function, constructor, destructor, and operator names.
//
// See the implementation note in demangle.cc if you are interested.
//
// Example:
//
// | Mangled Name | Demangle | DemangleString
// |---------------|-------------|-----------------------
// | _Z1fv | f() | f()
// | _Z1fi | f() | f(int)
// | _Z3foo3bar | foo() | foo(bar)
// | _Z1fIiEvi | f<>() | void f<int>(int)
// | _ZN1N1fE | N::f | N::f
// | _ZN3Foo3BarEv | Foo::Bar() | Foo::Bar()
// | _Zrm1XS_" | operator%() | operator%(X, X)
// | _ZN3FooC1Ev | Foo::Foo() | Foo::Foo()
// | _Z1fSs | f() | f(std::basic_string<char,
// | | | std::char_traits<char>,
// | | | std::allocator<char> >)
//
// See the unit test for more examples.
//
// Demangle also recognizes Rust mangled names by delegating the parsing of
// anything that starts with _R to DemangleRustSymbolEncoding (demangle_rust.h).
//
// Note: we might want to write demanglers for ABIs other than Itanium
// C++ ABI in the future.
bool Demangle(const char* mangled, char* out, size_t out_size);
// A wrapper around `abi::__cxa_demangle()`. On success, returns the demangled
// name. On failure, returns the input mangled name.
//
// This function is not async-signal-safe.
std::string DemangleString(const char* mangled);
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_DEMANGLE_H_

View file

@ -0,0 +1,925 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/demangle_rust.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/debugging/internal/decode_rust_punycode.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
// Same step limit as the C++ demangler in demangle.cc uses.
constexpr int kMaxReturns = 1 << 17;
bool IsDigit(char c) { return '0' <= c && c <= '9'; }
bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); }
const char* BasicTypeName(char c) {
switch (c) {
case 'a': return "i8";
case 'b': return "bool";
case 'c': return "char";
case 'd': return "f64";
case 'e': return "str";
case 'f': return "f32";
case 'h': return "u8";
case 'i': return "isize";
case 'j': return "usize";
case 'l': return "i32";
case 'm': return "u32";
case 'n': return "i128";
case 'o': return "u128";
case 'p': return "_";
case 's': return "i16";
case 't': return "u16";
case 'u': return "()";
case 'v': return "...";
case 'x': return "i64";
case 'y': return "u64";
case 'z': return "!";
}
return nullptr;
}
// Parser for Rust symbol mangling v0, whose grammar is defined here:
//
// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
class RustSymbolParser {
public:
// Prepares to demangle the given encoding, a Rust symbol name starting with
// _R, into the output buffer [out, out_end). The caller is expected to
// continue by calling the new object's Parse function.
RustSymbolParser(const char* encoding, char* out, char* const out_end)
: encoding_(encoding), out_(out), out_end_(out_end) {
if (out_ != out_end_) *out_ = '\0';
}
// Parses the constructor's encoding argument, writing output into the range
// [out, out_end). Returns true on success and false for input whose
// structure was not recognized or exceeded implementation limits, such as by
// nesting structures too deep. In either case *this should not be used
// again.
ABSL_MUST_USE_RESULT bool Parse() && {
// Recursively parses the grammar production named by callee, then resumes
// execution at the next statement.
//
// Recursive-descent parsing is a beautifully readable translation of a
// grammar, but it risks stack overflow if implemented by naive recursion on
// the C++ call stack. So we simulate recursion by goto and switch instead,
// keeping a bounded stack of "return addresses" in the recursion_stack_
// member.
//
// The callee argument is a statement label. We goto that label after
// saving the "return address" on recursion_stack_. The next continue
// statement in the for loop below "returns" from this "call".
//
// The caller argument names the return point. Each value of caller must
// appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the
// definition of enum ReturnAddress. The switch implements the control
// transfer from the end of a "called" subroutine back to the statement
// after the "call".
//
// Note that not all the grammar productions have to be packed into the
// switch, but only those which appear in a cycle in the grammar. Anything
// acyclic can be written as ordinary functions and function calls, e.g.,
// ParseIdentifier.
#define ABSL_DEMANGLER_RECURSE(callee, caller) \
do { \
if (recursion_depth_ == kStackSize) return false; \
/* The next continue will switch on this saved value ... */ \
recursion_stack_[recursion_depth_++] = caller; \
goto callee; \
/* ... and will land here, resuming the suspended code. */ \
case caller: {} \
} while (0)
// Parse the encoding, counting completed recursive calls to guard against
// excessively complex input and infinite-loop bugs.
int iter = 0;
goto whole_encoding;
for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) {
// This switch resumes the code path most recently suspended by
// ABSL_DEMANGLER_RECURSE.
switch (recursion_stack_[--recursion_depth_]) {
//
// symbol-name ->
// _R decimal-number? path instantiating-crate? vendor-specific-suffix?
whole_encoding:
if (!Eat('_') || !Eat('R')) return false;
// decimal-number? is always empty today, so proceed to path, which
// can't start with a decimal digit.
ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
if (IsAlpha(Peek())) {
++silence_depth_; // Print nothing more from here on.
ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
}
switch (Take()) {
case '.': case '$': case '\0': return true;
}
return false; // unexpected trailing content
// path -> crate-root | inherent-impl | trait-impl | trait-definition |
// nested-path | generic-args | backref
//
// Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch
// (which would hide the generated case label). Thus we jump out of the
// inner switch with gotos before performing any fake recursion.
path:
switch (Take()) {
case 'C': goto crate_root;
case 'M': goto inherent_impl;
case 'X': goto trait_impl;
case 'Y': goto trait_definition;
case 'N': goto nested_path;
case 'I': goto generic_args;
case 'B': goto path_backref;
default: return false;
}
// crate-root -> C identifier (C consumed above)
crate_root:
if (!ParseIdentifier()) return false;
continue;
// inherent-impl -> M impl-path type (M already consumed)
inherent_impl:
if (!Emit("<")) return false;
ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType);
ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding);
if (!Emit(">")) return false;
continue;
// trait-impl -> X impl-path type path (X already consumed)
trait_impl:
if (!Emit("<")) return false;
ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType);
ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix);
if (!Emit(" as ")) return false;
ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding);
if (!Emit(">")) return false;
continue;
// impl-path -> disambiguator? path (but never print it!)
impl_path:
++silence_depth_;
{
int ignored_disambiguator;
if (!ParseDisambiguator(ignored_disambiguator)) return false;
}
ABSL_DEMANGLER_RECURSE(path, kImplPathEnding);
--silence_depth_;
continue;
// trait-definition -> Y type path (Y already consumed)
trait_definition:
if (!Emit("<")) return false;
ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix);
if (!Emit(" as ")) return false;
ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding);
if (!Emit(">")) return false;
continue;
// nested-path -> N namespace path identifier (N already consumed)
// namespace -> lower | upper
nested_path:
// Uppercase namespaces must be saved on a stack so we can print
// ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
if (IsUpper(Peek())) {
if (!PushNamespace(Take())) return false;
ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
if (!Emit("::")) return false;
if (!ParseIdentifier(PopNamespace())) return false;
continue;
}
// Lowercase namespaces, however, are never represented in the output;
// they all emit just ::name.
if (IsLower(Take())) {
ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
if (!Emit("::")) return false;
if (!ParseIdentifier()) return false;
continue;
}
// Neither upper or lower
return false;
// type -> basic-type | array-type | slice-type | tuple-type |
// ref-type | mut-ref-type | const-ptr-type | mut-ptr-type |
// fn-type | dyn-trait-type | path | backref
//
// We use ifs instead of switch (Take()) because the default case jumps
// to path, which will need to see the first character not yet Taken
// from the input. Because we do not use a nested switch here,
// ABSL_DEMANGLER_RECURSE works fine in the 'S' case.
type:
if (IsLower(Peek())) {
const char* type_name = BasicTypeName(Take());
if (type_name == nullptr || !Emit(type_name)) return false;
continue;
}
if (Eat('A')) {
// array-type = A type const
if (!Emit("[")) return false;
ABSL_DEMANGLER_RECURSE(type, kArraySize);
if (!Emit("; ")) return false;
ABSL_DEMANGLER_RECURSE(constant, kFinishArray);
if (!Emit("]")) return false;
continue;
}
if (Eat('S')) {
if (!Emit("[")) return false;
ABSL_DEMANGLER_RECURSE(type, kSliceEnding);
if (!Emit("]")) return false;
continue;
}
if (Eat('T')) goto tuple_type;
if (Eat('R')) {
if (!Emit("&")) return false;
if (!ParseOptionalLifetime()) return false;
goto type;
}
if (Eat('Q')) {
if (!Emit("&mut ")) return false;
if (!ParseOptionalLifetime()) return false;
goto type;
}
if (Eat('P')) {
if (!Emit("*const ")) return false;
goto type;
}
if (Eat('O')) {
if (!Emit("*mut ")) return false;
goto type;
}
if (Eat('F')) goto fn_type;
if (Eat('D')) goto dyn_trait_type;
if (Eat('B')) goto type_backref;
goto path;
// tuple-type -> T type* E (T already consumed)
tuple_type:
if (!Emit("(")) return false;
// The toolchain should call the unit type u instead of TE, but the
// grammar and other demanglers also recognize TE, so we do too.
if (Eat('E')) {
if (!Emit(")")) return false;
continue;
}
// A tuple with one element is rendered (type,) instead of (type).
ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement);
if (Eat('E')) {
if (!Emit(",)")) return false;
continue;
}
// A tuple with two elements is of course (x, y).
if (!Emit(", ")) return false;
ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement);
if (Eat('E')) {
if (!Emit(")")) return false;
continue;
}
// And (x, y, z) for three elements.
if (!Emit(", ")) return false;
ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement);
if (Eat('E')) {
if (!Emit(")")) return false;
continue;
}
// For longer tuples we write (x, y, z, ...), printing none of the
// content of the fourth and later types. Thus we avoid exhausting
// output buffers and human readers' patience when some library has a
// long tuple as an implementation detail, without having to
// completely obfuscate all tuples.
if (!Emit(", ...)")) return false;
++silence_depth_;
while (!Eat('E')) {
ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement);
}
--silence_depth_;
continue;
// fn-type -> F fn-sig (F already consumed)
// fn-sig -> binder? U? (K abi)? type* E type
// abi -> C | undisambiguated-identifier
//
// We follow the C++ demangler in suppressing details of function
// signatures. Every function type is rendered "fn...".
fn_type:
if (!Emit("fn...")) return false;
++silence_depth_;
if (!ParseOptionalBinder()) return false;
(void)Eat('U');
if (Eat('K')) {
if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false;
}
while (!Eat('E')) {
ABSL_DEMANGLER_RECURSE(type, kContinueParameterList);
}
ABSL_DEMANGLER_RECURSE(type, kFinishFn);
--silence_depth_;
continue;
// dyn-trait-type -> D dyn-bounds lifetime (D already consumed)
// dyn-bounds -> binder? dyn-trait* E
//
// The grammar strangely allows an empty trait list, even though the
// compiler should never output one. We follow existing demanglers in
// rendering DEL_ as "dyn ".
//
// Because auto traits lengthen a type name considerably without
// providing much value to a search for related source code, it would be
// desirable to abbreviate
// dyn main::Trait + std::marker::Copy + std::marker::Send
// to
// dyn main::Trait + ...,
// eliding the auto traits. But it is difficult to do so correctly, in
// part because there is no guarantee that the mangling will list the
// main trait first. So we just print all the traits in their order of
// appearance in the mangled name.
dyn_trait_type:
if (!Emit("dyn ")) return false;
if (!ParseOptionalBinder()) return false;
if (!Eat('E')) {
ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits);
while (!Eat('E')) {
if (!Emit(" + ")) return false;
ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits);
}
}
if (!ParseRequiredLifetime()) return false;
continue;
// dyn-trait -> path dyn-trait-assoc-binding*
// dyn-trait-assoc-binding -> p undisambiguated-identifier type
//
// We render nonempty binding lists as <>, omitting their contents as
// for generic-args.
dyn_trait:
ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait);
if (Peek() == 'p') {
if (!Emit("<>")) return false;
++silence_depth_;
while (Eat('p')) {
if (!ParseUndisambiguatedIdentifier()) return false;
ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding);
}
--silence_depth_;
}
continue;
// const -> type const-data | p | backref
//
// const is a C++ keyword, so we use the label `constant` instead.
constant:
if (Eat('B')) goto const_backref;
if (Eat('p')) {
if (!Emit("_")) return false;
continue;
}
// Scan the type without printing it.
//
// The Rust language restricts the type of a const generic argument
// much more than the mangling grammar does. We do not enforce this.
//
// We also do not bother printing false, true, 'A', and '\u{abcd}' for
// the types bool and char. Because we do not print generic-args
// contents, we expect to print constants only in array sizes, and
// those should not be bool or char.
++silence_depth_;
ABSL_DEMANGLER_RECURSE(type, kConstData);
--silence_depth_;
// const-data -> n? hex-digit* _
//
// Although the grammar doesn't say this, existing demanglers expect
// that zero is 0, not an empty digit sequence, and no nonzero value
// may have leading zero digits. Also n0_ is accepted and printed as
// -0, though a toolchain will probably never write that encoding.
if (Eat('n') && !EmitChar('-')) return false;
if (!Emit("0x")) return false;
if (Eat('0')) {
if (!EmitChar('0')) return false;
if (!Eat('_')) return false;
continue;
}
while (IsLowerHexDigit(Peek())) {
if (!EmitChar(Take())) return false;
}
if (!Eat('_')) return false;
continue;
// generic-args -> I path generic-arg* E (I already consumed)
//
// We follow the C++ demangler in omitting all the arguments from the
// output, printing only the list opening and closing tokens.
generic_args:
ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList);
if (!Emit("::<>")) return false;
++silence_depth_;
while (!Eat('E')) {
ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList);
}
--silence_depth_;
continue;
// generic-arg -> lifetime | type | K const
generic_arg:
if (Peek() == 'L') {
if (!ParseOptionalLifetime()) return false;
continue;
}
if (Eat('K')) goto constant;
goto type;
// backref -> B base-62-number (B already consumed)
//
// The BeginBackref call parses and range-checks the base-62-number. We
// always do that much.
//
// The recursive call parses and prints what the backref points at. We
// save CPU and stack by skipping this work if the output would be
// suppressed anyway.
path_backref:
if (!BeginBackref()) return false;
if (silence_depth_ == 0) {
ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
}
EndBackref();
continue;
// This represents the same backref production as in path_backref but
// parses the target as a type instead of a path.
type_backref:
if (!BeginBackref()) return false;
if (silence_depth_ == 0) {
ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
}
EndBackref();
continue;
const_backref:
if (!BeginBackref()) return false;
if (silence_depth_ == 0) {
ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding);
}
EndBackref();
continue;
}
}
return false; // hit iteration limit or a bug in our stack handling
}
private:
// Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls.
enum ReturnAddress : uint8_t {
kInstantiatingCrate,
kVendorSpecificSuffix,
kIdentifierInUppercaseNamespace,
kIdentifierInLowercaseNamespace,
kInherentImplType,
kInherentImplEnding,
kTraitImplType,
kTraitImplInfix,
kTraitImplEnding,
kImplPathEnding,
kTraitDefinitionInfix,
kTraitDefinitionEnding,
kArraySize,
kFinishArray,
kSliceEnding,
kAfterFirstTupleElement,
kAfterSecondTupleElement,
kAfterThirdTupleElement,
kAfterSubsequentTupleElement,
kContinueParameterList,
kFinishFn,
kBeginAutoTraits,
kContinueAutoTraits,
kContinueDynTrait,
kContinueAssocBinding,
kConstData,
kBeginGenericArgList,
kContinueGenericArgList,
kPathBackrefEnding,
kTypeBackrefEnding,
kConstantBackrefEnding,
};
// Element counts for the stack arrays. Larger stack sizes accommodate more
// deeply nested names at the cost of a larger footprint on the C++ call
// stack.
enum {
// Maximum recursive calls outstanding at one time.
kStackSize = 256,
// Maximum N<uppercase> nested-paths open at once. We do not expect
// closures inside closures inside closures as much as functions inside
// modules inside other modules, so we can use a smaller array here.
kNamespaceStackSize = 64,
// Maximum number of nested backrefs. We can keep this stack pretty small
// because we do not follow backrefs inside generic-args or other contexts
// that suppress printing, so deep stacking is unlikely in practice.
kPositionStackSize = 16,
};
// Returns the next input character without consuming it.
char Peek() const { return encoding_[pos_]; }
// Consumes and returns the next input character.
char Take() { return encoding_[pos_++]; }
// If the next input character is the given character, consumes it and returns
// true; otherwise returns false without consuming a character.
ABSL_MUST_USE_RESULT bool Eat(char want) {
if (encoding_[pos_] != want) return false;
++pos_;
return true;
}
// Provided there is enough remaining output space, appends c to the output,
// writing a fresh NUL terminator afterward, and returns true. Returns false
// if the output buffer had less than two bytes free.
ABSL_MUST_USE_RESULT bool EmitChar(char c) {
if (silence_depth_ > 0) return true;
if (out_end_ - out_ < 2) return false;
*out_++ = c;
*out_ = '\0';
return true;
}
// Provided there is enough remaining output space, appends the C string token
// to the output, followed by a NUL character, and returns true. Returns
// false if not everything fit into the output buffer.
ABSL_MUST_USE_RESULT bool Emit(const char* token) {
if (silence_depth_ > 0) return true;
const size_t token_length = std::strlen(token);
const size_t bytes_to_copy = token_length + 1; // token and final NUL
if (static_cast<size_t>(out_end_ - out_) < bytes_to_copy) return false;
std::memcpy(out_, token, bytes_to_copy);
out_ += token_length;
return true;
}
// Provided there is enough remaining output space, appends the decimal form
// of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
// output, followed by a NUL character, and returns true. Returns false if
// not everything fit into the output buffer.
ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) {
if (disambiguator < 0) return EmitChar('?'); // parsed but too large
if (disambiguator == 0) return EmitChar('0');
// Convert disambiguator to decimal text. Three digits per byte is enough
// because 999 > 256. The bound will remain correct even if future
// maintenance changes the type of the disambiguator variable.
char digits[3 * sizeof(disambiguator)] = {};
size_t leading_digit_index = sizeof(digits) - 1;
for (; disambiguator > 0; disambiguator /= 10) {
digits[--leading_digit_index] =
static_cast<char>('0' + disambiguator % 10);
}
return Emit(digits + leading_digit_index);
}
// Consumes an optional disambiguator (s123_) from the input.
//
// On success returns true and fills value with the encoded value if it was
// not too big, otherwise with -1. If the optional disambiguator was omitted,
// value is 0. On parse failure returns false and sets value to -1.
ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) {
value = -1;
// disambiguator = s base-62-number
//
// Disambiguators are optional. An omitted disambiguator is zero.
if (!Eat('s')) {
value = 0;
return true;
}
int base_62_value = 0;
if (!ParseBase62Number(base_62_value)) return false;
value = base_62_value < 0 ? -1 : base_62_value + 1;
return true;
}
// Consumes a base-62 number like _ or 123_ from the input.
//
// On success returns true and fills value with the encoded value if it was
// not too big, otherwise with -1. On parse failure returns false and sets
// value to -1.
ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) {
value = -1;
// base-62-number = (digit | lower | upper)* _
//
// An empty base-62 digit sequence means 0.
if (Eat('_')) {
value = 0;
return true;
}
// A nonempty digit sequence denotes its base-62 value plus 1.
int encoded_number = 0;
bool overflowed = false;
while (IsAlpha(Peek()) || IsDigit(Peek())) {
const char c = Take();
if (encoded_number >= std::numeric_limits<int>::max()/62) {
// If we are close to overflowing an int, keep parsing but stop updating
// encoded_number and remember to return -1 at the end. The point is to
// avoid undefined behavior while parsing crate-root disambiguators,
// which are large in practice but not shown in demangling, while
// successfully computing closure and shim disambiguators, which are
// typically small and are printed out.
overflowed = true;
} else {
int digit;
if (IsDigit(c)) {
digit = c - '0';
} else if (IsLower(c)) {
digit = c - 'a' + 10;
} else {
digit = c - 'A' + 36;
}
encoded_number = 62 * encoded_number + digit;
}
}
if (!Eat('_')) return false;
if (!overflowed) value = encoded_number + 1;
return true;
}
// Consumes an identifier from the input, returning true on success.
//
// A nonzero uppercase_namespace specifies the character after the N in a
// nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
// write out the name with the conventional decoration for that namespace.
ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') {
// identifier -> disambiguator? undisambiguated-identifier
int disambiguator = 0;
if (!ParseDisambiguator(disambiguator)) return false;
return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator);
}
// Consumes from the input an identifier with no preceding disambiguator,
// returning true on success.
//
// When ParseIdentifier calls this, it passes the N<namespace> character and
// disambiguator value so that "{closure#42}" and similar forms can be
// rendered correctly.
//
// At other appearances of undisambiguated-identifier in the grammar, this
// treatment is not applicable, and the call site omits both arguments.
ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier(
char uppercase_namespace = '\0', int disambiguator = 0) {
// undisambiguated-identifier -> u? decimal-number _? bytes
const bool is_punycoded = Eat('u');
if (!IsDigit(Peek())) return false;
int num_bytes = 0;
if (!ParseDecimalNumber(num_bytes)) return false;
(void)Eat('_'); // optional separator, needed if a digit follows
if (is_punycoded) {
DecodeRustPunycodeOptions options;
options.punycode_begin = &encoding_[pos_];
options.punycode_end = &encoding_[pos_] + num_bytes;
options.out_begin = out_;
options.out_end = out_end_;
out_ = DecodeRustPunycode(options);
if (out_ == nullptr) return false;
pos_ += static_cast<size_t>(num_bytes);
}
// Emit the beginnings of braced forms like {shim:vtable#0}.
if (uppercase_namespace != '\0') {
switch (uppercase_namespace) {
case 'C':
if (!Emit("{closure")) return false;
break;
case 'S':
if (!Emit("{shim")) return false;
break;
default:
if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
break;
}
if (num_bytes > 0 && !Emit(":")) return false;
}
// Emit the name itself.
if (!is_punycoded) {
for (int i = 0; i < num_bytes; ++i) {
const char c = Take();
if (!IsIdentifierChar(c) &&
// The spec gives toolchains the choice of Punycode or raw UTF-8 for
// identifiers containing code points above 0x7f, so accept bytes
// with the high bit set.
(c & 0x80) == 0) {
return false;
}
if (!EmitChar(c)) return false;
}
}
// Emit the endings of braced forms, e.g., "#42}".
if (uppercase_namespace != '\0') {
if (!EmitChar('#')) return false;
if (!EmitDisambiguator(disambiguator)) return false;
if (!EmitChar('}')) return false;
}
return true;
}
// Consumes a decimal number like 0 or 123 from the input. On success returns
// true and fills value with the encoded value. If the encoded value is too
// large or otherwise unparsable, returns false and sets value to -1.
ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) {
value = -1;
if (!IsDigit(Peek())) return false;
int encoded_number = Take() - '0';
if (encoded_number == 0) {
// Decimal numbers are never encoded with extra leading zeroes.
value = 0;
return true;
}
while (IsDigit(Peek()) &&
// avoid overflow
encoded_number < std::numeric_limits<int>::max()/10) {
encoded_number = 10 * encoded_number + (Take() - '0');
}
if (IsDigit(Peek())) return false; // too big
value = encoded_number;
return true;
}
// Consumes a binder of higher-ranked lifetimes if one is present. On success
// returns true and discards the encoded lifetime count. On parse failure
// returns false.
ABSL_MUST_USE_RESULT bool ParseOptionalBinder() {
// binder -> G base-62-number
if (!Eat('G')) return true;
int ignored_binding_count;
return ParseBase62Number(ignored_binding_count);
}
// Consumes a lifetime if one is present.
//
// On success returns true and discards the lifetime index. We do not print
// or even range-check lifetimes because they are a finer detail than other
// things we omit from output, such as the entire contents of generic-args.
//
// On parse failure returns false.
ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() {
// lifetime -> L base-62-number
if (!Eat('L')) return true;
int ignored_de_bruijn_index;
return ParseBase62Number(ignored_de_bruijn_index);
}
// Consumes a lifetime just like ParseOptionalLifetime, but returns false if
// there is no lifetime here.
ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() {
if (Peek() != 'L') return false;
return ParseOptionalLifetime();
}
// Pushes ns onto the namespace stack and returns true if the stack is not
// full, else returns false.
ABSL_MUST_USE_RESULT bool PushNamespace(char ns) {
if (namespace_depth_ == kNamespaceStackSize) return false;
namespace_stack_[namespace_depth_++] = ns;
return true;
}
// Pops the last pushed namespace. Requires that the namespace stack is not
// empty (namespace_depth_ > 0).
char PopNamespace() { return namespace_stack_[--namespace_depth_]; }
// Pushes position onto the position stack and returns true if the stack is
// not full, else returns false.
ABSL_MUST_USE_RESULT bool PushPosition(int position) {
if (position_depth_ == kPositionStackSize) return false;
position_stack_[position_depth_++] = position;
return true;
}
// Pops the last pushed input position. Requires that the position stack is
// not empty (position_depth_ > 0).
int PopPosition() { return position_stack_[--position_depth_]; }
// Consumes a base-62-number denoting a backref target, pushes the current
// input position on the data stack, and sets the input position to the
// beginning of the backref target. Returns true on success. Returns false
// if parsing failed, the stack is exhausted, or the backref target position
// is out of range.
ABSL_MUST_USE_RESULT bool BeginBackref() {
// backref = B base-62-number (B already consumed)
//
// Reject backrefs that don't parse, overflow int, or don't point backward.
// If the offset looks fine, adjust it to account for the _R prefix.
int offset = 0;
const int offset_of_this_backref =
pos_ - 2 /* _R */ - 1 /* B already consumed */;
if (!ParseBase62Number(offset) || offset < 0 ||
offset >= offset_of_this_backref) {
return false;
}
offset += 2;
// Save the old position to restore later.
if (!PushPosition(pos_)) return false;
// Move the input position to the backref target.
//
// Note that we do not check whether the new position points to the
// beginning of a construct matching the context in which the backref
// appeared. We just jump to it and see whether nested parsing succeeds.
// We therefore accept various wrong manglings, e.g., a type backref
// pointing to an 'l' character inside an identifier, which happens to mean
// i32 when parsed as a type mangling. This saves the complexity and RAM
// footprint of remembering which offsets began which kinds of
// substructures. Existing demanglers take similar shortcuts.
pos_ = offset;
return true;
}
// Cleans up after a backref production by restoring the previous input
// position from the data stack.
void EndBackref() { pos_ = PopPosition(); }
// The leftmost recursion_depth_ elements of recursion_stack_ contain the
// ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed.
ReturnAddress recursion_stack_[kStackSize] = {};
int recursion_depth_ = 0;
// The leftmost namespace_depth_ elements of namespace_stack_ contain the
// uppercase namespace identifiers for open nested-paths, e.g., 'C' for a
// closure.
char namespace_stack_[kNamespaceStackSize] = {};
int namespace_depth_ = 0;
// The leftmost position_depth_ elements of position_stack_ contain the input
// positions to return to after fully printing the targets of backrefs.
int position_stack_[kPositionStackSize] = {};
int position_depth_ = 0;
// Anything parsed while silence_depth_ > 0 contributes nothing to the
// demangled output. For constructs omitted from the demangling, such as
// impl-path and the contents of generic-args, we will increment
// silence_depth_ on the way in and decrement silence_depth_ on the way out.
int silence_depth_ = 0;
// Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
// the next input character to be scanned.
int pos_ = 0;
const char* encoding_ = nullptr;
// Output: *out_ is where the next output character should be written, and
// out_end_ points past the last byte of available space.
char* out_ = nullptr;
char* out_end_ = nullptr;
};
} // namespace
bool DemangleRustSymbolEncoding(const char* mangled, char* out,
size_t out_size) {
return RustSymbolParser(mangled, out, out + out_size).Parse();
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,42 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
#define ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
#include <cstddef>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// Demangle the Rust encoding `mangled`. On success, return true and write the
// demangled symbol name to `out`. Otherwise, return false, leaving unspecified
// contents in `out`. For example, calling DemangleRustSymbolEncoding with
// `mangled = "_RNvC8my_crate7my_func"` will yield `my_crate::my_func` in `out`,
// provided `out_size` is large enough for that value and its trailing NUL.
//
// DemangleRustSymbolEncoding is async-signal-safe and runs in bounded C++
// call-stack space. It is suitable for symbolizing stack traces in a signal
// handler.
bool DemangleRustSymbolEncoding(const char* mangled, char* out,
size_t out_size);
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_

View file

@ -0,0 +1,584 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/demangle_rust.h"
#include <cstddef>
#include <string>
#include "gtest/gtest.h"
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
// If DemangleRustSymbolEncoding(mangled, <buffer with room for buffer_size
// chars>, buffer_size) returns true and seems not to have overrun its output
// buffer, returns the string written by DemangleRustSymbolEncoding; otherwise
// returns an error message.
std::string ResultOfDemangling(const char* mangled, size_t buffer_size) {
// Fill the buffer with something other than NUL so we test whether Demangle
// appends trailing NUL as expected.
std::string buffer(buffer_size + 1, '~');
constexpr char kCanaryCharacter = 0x7f; // arbitrary unlikely value
buffer[buffer_size] = kCanaryCharacter;
if (!DemangleRustSymbolEncoding(mangled, &buffer[0], buffer_size)) {
return "Failed parse";
}
if (buffer[buffer_size] != kCanaryCharacter) {
return "Buffer overrun by output: " + buffer.substr(0, buffer_size + 1)
+ "...";
}
return buffer.data(); // Not buffer itself: this trims trailing padding.
}
// Tests that DemangleRustSymbolEncoding converts mangled into plaintext given
// enough output buffer space but returns false and avoids overrunning a buffer
// that is one byte too short.
//
// The lambda wrapping allows ASSERT_EQ to branch out the first time an
// expectation is not satisfied, preventing redundant errors for the same bug.
//
// We test first with excess space so that if the algorithm just computes the
// wrong answer, it will be clear from the error log that the bounds checks are
// unlikely to be the code at fault.
#define EXPECT_DEMANGLING(mangled, plaintext) \
do { \
[] { \
constexpr size_t plenty_of_space = sizeof(plaintext) + 128; \
constexpr size_t just_enough_space = sizeof(plaintext); \
constexpr size_t one_byte_too_few = sizeof(plaintext) - 1; \
const char* expected_plaintext = plaintext; \
const char* expected_error = "Failed parse"; \
ASSERT_EQ(ResultOfDemangling(mangled, plenty_of_space), \
expected_plaintext); \
ASSERT_EQ(ResultOfDemangling(mangled, just_enough_space), \
expected_plaintext); \
ASSERT_EQ(ResultOfDemangling(mangled, one_byte_too_few), \
expected_error); \
}(); \
} while (0)
// Tests that DemangleRustSymbolEncoding rejects the given input (typically, a
// truncation of a real Rust symbol name).
#define EXPECT_DEMANGLING_FAILS(mangled) \
do { \
constexpr size_t plenty_of_space = 1024; \
const char* expected_error = "Failed parse"; \
EXPECT_EQ(ResultOfDemangling(mangled, plenty_of_space), expected_error); \
} while (0)
// Piping grep -C 1 _R demangle_test.cc into your favorite c++filt
// implementation allows you to verify that the goldens below are reasonable.
TEST(DemangleRust, EmptyDemangling) {
EXPECT_TRUE(DemangleRustSymbolEncoding("_RC0", nullptr, 0));
}
TEST(DemangleRust, FunctionAtCrateLevel) {
EXPECT_DEMANGLING("_RNvC10crate_name9func_name", "crate_name::func_name");
EXPECT_DEMANGLING(
"_RNvCs09azAZ_10crate_name9func_name", "crate_name::func_name");
}
TEST(DemangleRust, TruncationsOfFunctionAtCrateLevel) {
EXPECT_DEMANGLING_FAILS("_R");
EXPECT_DEMANGLING_FAILS("_RN");
EXPECT_DEMANGLING_FAILS("_RNvC");
EXPECT_DEMANGLING_FAILS("_RNvC10");
EXPECT_DEMANGLING_FAILS("_RNvC10crate_nam");
EXPECT_DEMANGLING_FAILS("_RNvC10crate_name");
EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9");
EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9func_nam");
EXPECT_DEMANGLING_FAILS("_RNvCs");
EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ");
EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ_");
}
TEST(DemangleRust, VendorSuffixes) {
EXPECT_DEMANGLING("_RNvC10crate_name9func_name.!@#", "crate_name::func_name");
EXPECT_DEMANGLING("_RNvC10crate_name9func_name$!@#", "crate_name::func_name");
}
TEST(DemangleRust, UnicodeIdentifiers) {
EXPECT_DEMANGLING("_RNvC7ice_cap17Eyjafjallajökull",
"ice_cap::Eyjafjallajökull");
EXPECT_DEMANGLING("_RNvC7ice_caps_u19Eyjafjallajkull_jtb",
"ice_cap::Eyjafjallajökull");
}
TEST(DemangleRust, FunctionInModule) {
EXPECT_DEMANGLING("_RNvNtCs09azAZ_10crate_name11module_name9func_name",
"crate_name::module_name::func_name");
}
TEST(DemangleRust, FunctionInFunction) {
EXPECT_DEMANGLING(
"_RNvNvCs09azAZ_10crate_name15outer_func_name15inner_func_name",
"crate_name::outer_func_name::inner_func_name");
}
TEST(DemangleRust, ClosureInFunction) {
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_name0",
"crate_name::func_name::{closure#0}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_name0Cs123_12client_crate",
"crate_name::func_name::{closure#0}");
}
TEST(DemangleRust, ClosureNumbering) {
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_names_0Cs123_12client_crate",
"crate_name::func_name::{closure#1}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_names0_0Cs123_12client_crate",
"crate_name::func_name::{closure#2}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_names9_0Cs123_12client_crate",
"crate_name::func_name::{closure#11}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_namesa_0Cs123_12client_crate",
"crate_name::func_name::{closure#12}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_namesz_0Cs123_12client_crate",
"crate_name::func_name::{closure#37}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_namesA_0Cs123_12client_crate",
"crate_name::func_name::{closure#38}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_namesZ_0Cs123_12client_crate",
"crate_name::func_name::{closure#63}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_names10_0Cs123_12client_crate",
"crate_name::func_name::{closure#64}");
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_namesg6_0Cs123_12client_crate",
"crate_name::func_name::{closure#1000}");
}
TEST(DemangleRust, ClosureNumberOverflowingInt) {
EXPECT_DEMANGLING(
"_RNCNvCs09azAZ_10crate_name9func_names1234567_0Cs123_12client_crate",
"crate_name::func_name::{closure#?}");
}
TEST(DemangleRust, UnexpectedlyNamedClosure) {
EXPECT_DEMANGLING(
"_RNCNvCs123_10crate_name9func_name12closure_nameCs456_12client_crate",
"crate_name::func_name::{closure:closure_name#0}");
EXPECT_DEMANGLING(
"_RNCNvCs123_10crate_name9func_names2_12closure_nameCs456_12client_crate",
"crate_name::func_name::{closure:closure_name#4}");
}
TEST(DemangleRust, ItemNestedInsideClosure) {
EXPECT_DEMANGLING(
"_RNvNCNvCs123_10crate_name9func_name015inner_func_nameCs_12client_crate",
"crate_name::func_name::{closure#0}::inner_func_name");
}
TEST(DemangleRust, Shim) {
EXPECT_DEMANGLING(
"_RNSNvCs123_10crate_name9func_name6vtableCs456_12client_crate",
"crate_name::func_name::{shim:vtable#0}");
}
TEST(DemangleRust, UnknownUppercaseNamespace) {
EXPECT_DEMANGLING(
"_RNXNvCs123_10crate_name9func_name14mystery_objectCs456_12client_crate",
"crate_name::func_name::{X:mystery_object#0}");
}
TEST(DemangleRust, NestedUppercaseNamespaces) {
EXPECT_DEMANGLING(
"_RNCNXNYCs123_10crate_names0_1ys1_1xs2_0Cs456_12client_crate",
"crate_name::{Y:y#2}::{X:x#3}::{closure#4}");
}
TEST(DemangleRust, TraitDefinition) {
EXPECT_DEMANGLING(
"_RNvYNtC7crate_a9my_structNtC7crate_b8my_trait1f",
"<crate_a::my_struct as crate_b::my_trait>::f");
}
TEST(DemangleRust, BasicTypeNames) {
EXPECT_DEMANGLING("_RNvYaNtC1c1t1f", "<i8 as c::t>::f");
EXPECT_DEMANGLING("_RNvYbNtC1c1t1f", "<bool as c::t>::f");
EXPECT_DEMANGLING("_RNvYcNtC1c1t1f", "<char as c::t>::f");
EXPECT_DEMANGLING("_RNvYdNtC1c1t1f", "<f64 as c::t>::f");
EXPECT_DEMANGLING("_RNvYeNtC1c1t1f", "<str as c::t>::f");
EXPECT_DEMANGLING("_RNvYfNtC1c1t1f", "<f32 as c::t>::f");
EXPECT_DEMANGLING("_RNvYhNtC1c1t1f", "<u8 as c::t>::f");
EXPECT_DEMANGLING("_RNvYiNtC1c1t1f", "<isize as c::t>::f");
EXPECT_DEMANGLING("_RNvYjNtC1c1t1f", "<usize as c::t>::f");
EXPECT_DEMANGLING("_RNvYlNtC1c1t1f", "<i32 as c::t>::f");
EXPECT_DEMANGLING("_RNvYmNtC1c1t1f", "<u32 as c::t>::f");
EXPECT_DEMANGLING("_RNvYnNtC1c1t1f", "<i128 as c::t>::f");
EXPECT_DEMANGLING("_RNvYoNtC1c1t1f", "<u128 as c::t>::f");
EXPECT_DEMANGLING("_RNvYpNtC1c1t1f", "<_ as c::t>::f");
EXPECT_DEMANGLING("_RNvYsNtC1c1t1f", "<i16 as c::t>::f");
EXPECT_DEMANGLING("_RNvYtNtC1c1t1f", "<u16 as c::t>::f");
EXPECT_DEMANGLING("_RNvYuNtC1c1t1f", "<() as c::t>::f");
EXPECT_DEMANGLING("_RNvYvNtC1c1t1f", "<... as c::t>::f");
EXPECT_DEMANGLING("_RNvYxNtC1c1t1f", "<i64 as c::t>::f");
EXPECT_DEMANGLING("_RNvYyNtC1c1t1f", "<u64 as c::t>::f");
EXPECT_DEMANGLING("_RNvYzNtC1c1t1f", "<! as c::t>::f");
EXPECT_DEMANGLING_FAILS("_RNvYkNtC1c1t1f");
}
TEST(DemangleRust, SliceTypes) {
EXPECT_DEMANGLING("_RNvYSlNtC1c1t1f", "<[i32] as c::t>::f");
EXPECT_DEMANGLING("_RNvYSNtC1d1sNtC1c1t1f", "<[d::s] as c::t>::f");
}
TEST(DemangleRust, ImmutableReferenceTypes) {
EXPECT_DEMANGLING("_RNvYRlNtC1c1t1f", "<&i32 as c::t>::f");
EXPECT_DEMANGLING("_RNvYRNtC1d1sNtC1c1t1f", "<&d::s as c::t>::f");
}
TEST(DemangleRust, MutableReferenceTypes) {
EXPECT_DEMANGLING("_RNvYQlNtC1c1t1f", "<&mut i32 as c::t>::f");
EXPECT_DEMANGLING("_RNvYQNtC1d1sNtC1c1t1f", "<&mut d::s as c::t>::f");
}
TEST(DemangleRust, ConstantRawPointerTypes) {
EXPECT_DEMANGLING("_RNvYPlNtC1c1t1f", "<*const i32 as c::t>::f");
EXPECT_DEMANGLING("_RNvYPNtC1d1sNtC1c1t1f", "<*const d::s as c::t>::f");
}
TEST(DemangleRust, MutableRawPointerTypes) {
EXPECT_DEMANGLING("_RNvYOlNtC1c1t1f", "<*mut i32 as c::t>::f");
EXPECT_DEMANGLING("_RNvYONtC1d1sNtC1c1t1f", "<*mut d::s as c::t>::f");
}
TEST(DemangleRust, TupleLength0) {
EXPECT_DEMANGLING("_RNvYTENtC1c1t1f", "<() as c::t>::f");
}
TEST(DemangleRust, TupleLength1) {
EXPECT_DEMANGLING("_RNvYTlENtC1c1t1f", "<(i32,) as c::t>::f");
EXPECT_DEMANGLING("_RNvYTNtC1d1sENtC1c1t1f", "<(d::s,) as c::t>::f");
}
TEST(DemangleRust, TupleLength2) {
EXPECT_DEMANGLING("_RNvYTlmENtC1c1t1f", "<(i32, u32) as c::t>::f");
EXPECT_DEMANGLING("_RNvYTNtC1d1xNtC1e1yENtC1c1t1f",
"<(d::x, e::y) as c::t>::f");
}
TEST(DemangleRust, TupleLength3) {
EXPECT_DEMANGLING("_RNvYTlmnENtC1c1t1f", "<(i32, u32, i128) as c::t>::f");
EXPECT_DEMANGLING("_RNvYTNtC1d1xNtC1e1yNtC1f1zENtC1c1t1f",
"<(d::x, e::y, f::z) as c::t>::f");
}
TEST(DemangleRust, LongerTuplesAbbreviated) {
EXPECT_DEMANGLING("_RNvYTlmnoENtC1c1t1f",
"<(i32, u32, i128, ...) as c::t>::f");
EXPECT_DEMANGLING("_RNvYTlmnNtC1d1xNtC1e1yENtC1c1t1f",
"<(i32, u32, i128, ...) as c::t>::f");
}
TEST(DemangleRust, PathBackrefToCrate) {
EXPECT_DEMANGLING("_RNvYNtC8my_crate9my_structNtB4_8my_trait1f",
"<my_crate::my_struct as my_crate::my_trait>::f");
}
TEST(DemangleRust, PathBackrefToNestedPath) {
EXPECT_DEMANGLING("_RNvYNtNtC1c1m1sNtB4_1t1f", "<c::m::s as c::m::t>::f");
}
TEST(DemangleRust, PathBackrefAsInstantiatingCrate) {
EXPECT_DEMANGLING("_RNCNvC8my_crate7my_func0B3_",
"my_crate::my_func::{closure#0}");
}
TEST(DemangleRust, TypeBackrefsNestedInTuple) {
EXPECT_DEMANGLING("_RNvYTTRlB4_ERB3_ENtC1c1t1f",
"<((&i32, &i32), &(&i32, &i32)) as c::t>::f");
}
TEST(DemangleRust, NoInfiniteLoopOnBackrefToTheWhole) {
EXPECT_DEMANGLING_FAILS("_RB_");
EXPECT_DEMANGLING_FAILS("_RNvB_1sNtC1c1t1f");
}
TEST(DemangleRust, NoCrashOnForwardBackref) {
EXPECT_DEMANGLING_FAILS("_RB0_");
EXPECT_DEMANGLING_FAILS("_RB1_");
EXPECT_DEMANGLING_FAILS("_RB2_");
EXPECT_DEMANGLING_FAILS("_RB3_");
EXPECT_DEMANGLING_FAILS("_RB4_");
}
TEST(DemangleRust, PathBackrefsDoNotRecurseDuringSilence) {
// B_ points at the value f (the whole mangling), so the cycle would lead to
// parse failure if the parser tried to parse what was pointed to.
EXPECT_DEMANGLING("_RNvYTlmnNtB_1sENtC1c1t1f",
"<(i32, u32, i128, ...) as c::t>::f");
}
TEST(DemangleRust, TypeBackrefsDoNotRecurseDuringSilence) {
// B2_ points at the tuple type, likewise making a cycle that the parser
// avoids following.
EXPECT_DEMANGLING("_RNvYTlmnB2_ENtC1c1t1f",
"<(i32, u32, i128, ...) as c::t>::f");
}
TEST(DemangleRust, ConstBackrefsDoNotRecurseDuringSilence) {
// B_ points at the whole I...E mangling, which does not parse as a const.
EXPECT_DEMANGLING("_RINvC1c1fAlB_E", "c::f::<>");
}
TEST(DemangleRust, ReturnFromBackrefToInputPosition256) {
// Show that we can resume at input positions that don't fit into a byte.
EXPECT_DEMANGLING("_RNvYNtC1c238very_long_type_"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABC"
"NtB4_1t1f",
"<c::very_long_type_"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
"ABCDEFGHIJabcdefghijABC"
" as c::t>::f");
}
TEST(DemangleRust, EmptyGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fE", "c::f::<>");
}
TEST(DemangleRust, OneSimpleTypeInGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1flE", // c::f::<i32>
"c::f::<>");
}
TEST(DemangleRust, OneTupleInGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fTlmEE", // c::f::<(i32, u32)>
"c::f::<>");
}
TEST(DemangleRust, OnePathInGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fNtC1d1sE", // c::f::<d::s>
"c::f::<>");
}
TEST(DemangleRust, LongerGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1flmRNtC1d1sE", // c::f::<i32, u32, &d::s>
"c::f::<>");
}
TEST(DemangleRust, BackrefInGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fRlB7_NtB2_1sE", // c::f::<&i32, &i32, c::s>
"c::f::<>");
}
TEST(DemangleRust, NestedGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fINtB2_1slEmE", // c::f::<c::s::<i32>, u32>
"c::f::<>");
}
TEST(DemangleRust, MonomorphicEntityNestedInsideGeneric) {
EXPECT_DEMANGLING("_RNvINvC1c1fppE1g", // c::f::<_, _>::g
"c::f::<>::g");
}
TEST(DemangleRust, ArrayTypeWithSimpleElementType) {
EXPECT_DEMANGLING("_RNvYAlj1f_NtC1c1t1f", "<[i32; 0x1f] as c::t>::f");
}
TEST(DemangleRust, ArrayTypeWithComplexElementType) {
EXPECT_DEMANGLING("_RNvYAINtC1c1slEj1f_NtB6_1t1f",
"<[c::s::<>; 0x1f] as c::t>::f");
}
TEST(DemangleRust, NestedArrayType) {
EXPECT_DEMANGLING("_RNvYAAlj1f_j2e_NtC1c1t1f",
"<[[i32; 0x1f]; 0x2e] as c::t>::f");
}
TEST(DemangleRust, BackrefArraySize) {
EXPECT_DEMANGLING("_RNvYAAlj1f_B5_NtC1c1t1f",
"<[[i32; 0x1f]; 0x1f] as c::t>::f");
}
TEST(DemangleRust, ZeroArraySize) {
EXPECT_DEMANGLING("_RNvYAlj0_NtC1c1t1f", "<[i32; 0x0] as c::t>::f");
}
TEST(DemangleRust, SurprisingMinusesInArraySize) {
// Compilers shouldn't do this stuff, but existing demanglers accept it.
EXPECT_DEMANGLING("_RNvYAljn0_NtC1c1t1f", "<[i32; -0x0] as c::t>::f");
EXPECT_DEMANGLING("_RNvYAljn42_NtC1c1t1f", "<[i32; -0x42] as c::t>::f");
}
TEST(DemangleRust, NumberAsGenericArg) {
EXPECT_DEMANGLING("_RINvC1c1fKl8_E", // c::f::<0x8>
"c::f::<>");
}
TEST(DemangleRust, NumberAsFirstOfTwoGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fKl8_mE", // c::f::<0x8, u32>
"c::f::<>");
}
TEST(DemangleRust, NumberAsSecondOfTwoGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fmKl8_E", // c::f::<u32, 0x8>
"c::f::<>");
}
TEST(DemangleRust, NumberPlaceholder) {
EXPECT_DEMANGLING("_RNvINvC1c1fKpE1g", // c::f::<_>::g
"c::f::<>::g");
}
TEST(DemangleRust, InherentImplWithoutDisambiguator) {
EXPECT_DEMANGLING("_RNvMNtC8my_crate6my_modNtB2_9my_struct7my_func",
"<my_crate::my_mod::my_struct>::my_func");
}
TEST(DemangleRust, InherentImplWithDisambiguator) {
EXPECT_DEMANGLING("_RNvMs_NtC8my_crate6my_modNtB4_9my_struct7my_func",
"<my_crate::my_mod::my_struct>::my_func");
}
TEST(DemangleRust, TraitImplWithoutDisambiguator) {
EXPECT_DEMANGLING("_RNvXC8my_crateNtB2_9my_structNtB2_8my_trait7my_func",
"<my_crate::my_struct as my_crate::my_trait>::my_func");
}
TEST(DemangleRust, TraitImplWithDisambiguator) {
EXPECT_DEMANGLING("_RNvXs_C8my_crateNtB4_9my_structNtB4_8my_trait7my_func",
"<my_crate::my_struct as my_crate::my_trait>::my_func");
}
TEST(DemangleRust, TraitImplWithNonpathSelfType) {
EXPECT_DEMANGLING("_RNvXC8my_crateRlNtB2_8my_trait7my_func",
"<&i32 as my_crate::my_trait>::my_func");
}
TEST(DemangleRust, ThunkType) {
EXPECT_DEMANGLING("_RNvYFEuNtC1c1t1f", // <fn() as c::t>::f
"<fn... as c::t>::f");
}
TEST(DemangleRust, NontrivialFunctionReturnType) {
EXPECT_DEMANGLING(
"_RNvYFERTlmENtC1c1t1f", // <fn() -> &(i32, u32) as c::t>::f
"<fn... as c::t>::f");
}
TEST(DemangleRust, OneParameterType) {
EXPECT_DEMANGLING("_RNvYFlEuNtC1c1t1f", // <fn(i32) as c::t>::f
"<fn... as c::t>::f");
}
TEST(DemangleRust, TwoParameterTypes) {
EXPECT_DEMANGLING("_RNvYFlmEuNtC1c1t1f", // <fn(i32, u32) as c::t>::f
"<fn... as c::t>::f");
}
TEST(DemangleRust, ExternC) {
EXPECT_DEMANGLING("_RNvYFKCEuNtC1c1t1f", // <extern "C" fn() as c::t>>::f
"<fn... as c::t>::f");
}
TEST(DemangleRust, ExternOther) {
EXPECT_DEMANGLING(
"_RNvYFK5not_CEuNtC1c1t1f", // <extern "not-C" fn() as c::t>::f
"<fn... as c::t>::f");
}
TEST(DemangleRust, Unsafe) {
EXPECT_DEMANGLING("_RNvYFUEuNtC1c1t1f", // <unsafe fn() as c::t>::f
"<fn... as c::t>::f");
}
TEST(DemangleRust, Binder) {
EXPECT_DEMANGLING(
// <for<'a> fn(&'a i32) -> &'a i32 as c::t>::f
"_RNvYFG_RL0_lEB5_NtC1c1t1f",
"<fn... as c::t>::f");
}
TEST(DemangleRust, AllFnSigFeaturesInOrder) {
EXPECT_DEMANGLING(
// <for<'a> unsafe extern "C" fn(&'a i32) -> &'a i32 as c::t>::f
"_RNvYFG_UKCRL0_lEB8_NtC1c1t1f",
"<fn... as c::t>::f");
}
TEST(DemangleRust, LifetimeInGenericArgs) {
EXPECT_DEMANGLING("_RINvC1c1fINtB2_1sL_EE", // c::f::<c::s::<'_>>
"c::f::<>");
}
TEST(DemangleRust, EmptyDynTrait) {
// This shouldn't happen, but the grammar allows it and existing demanglers
// accept it.
EXPECT_DEMANGLING("_RNvYDEL_NtC1c1t1f",
"<dyn as c::t>::f");
}
TEST(DemangleRust, SimpleDynTrait) {
EXPECT_DEMANGLING("_RNvYDNtC1c1tEL_NtC1d1u1f",
"<dyn c::t as d::u>::f");
}
TEST(DemangleRust, DynTraitWithOneAssociatedType) {
EXPECT_DEMANGLING(
"_RNvYDNtC1c1tp1xlEL_NtC1d1u1f", // <dyn c::t<x = i32> as d::u>::f
"<dyn c::t<> as d::u>::f");
}
TEST(DemangleRust, DynTraitWithTwoAssociatedTypes) {
EXPECT_DEMANGLING(
// <dyn c::t<x = i32, y = u32> as d::u>::f
"_RNvYDNtC1c1tp1xlp1ymEL_NtC1d1u1f",
"<dyn c::t<> as d::u>::f");
}
TEST(DemangleRust, DynTraitPlusAutoTrait) {
EXPECT_DEMANGLING(
"_RNvYDNtC1c1tNtNtC3std6marker4SendEL_NtC1d1u1f",
"<dyn c::t + std::marker::Send as d::u>::f");
}
TEST(DemangleRust, DynTraitPlusTwoAutoTraits) {
EXPECT_DEMANGLING(
"_RNvYDNtC1c1tNtNtC3std6marker4CopyNtBc_4SyncEL_NtC1d1u1f",
"<dyn c::t + std::marker::Copy + std::marker::Sync as d::u>::f");
}
TEST(DemangleRust, HigherRankedDynTrait) {
EXPECT_DEMANGLING(
// <dyn for<'a> c::t::<&'a i32> as d::u>::f
"_RNvYDG_INtC1c1tRL0_lEEL_NtC1d1u1f",
"<dyn c::t::<> as d::u>::f");
}
} // namespace
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,413 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Allow dynamic symbol lookup in an in-memory Elf image.
//
#include "absl/debugging/internal/elf_mem_image.h"
#ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h
#include <string.h>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
// From binutils/include/elf/common.h (this doesn't appear to be documented
// anywhere else).
//
// /* This flag appears in a Versym structure. It means that the symbol
// is hidden, and is only visible with an explicit version number.
// This is a GNU extension. */
// #define VERSYM_HIDDEN 0x8000
//
// /* This is the mask for the rest of the Versym information. */
// #define VERSYM_VERSION 0x7fff
#define VERSYM_VERSION 0x7fff
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
#if __SIZEOF_POINTER__ == 4
const int kElfClass = ELFCLASS32;
int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
#elif __SIZEOF_POINTER__ == 8
const int kElfClass = ELFCLASS64;
int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
#else
const int kElfClass = -1;
int ElfBind(const ElfW(Sym) *) {
ABSL_RAW_LOG(FATAL, "Unexpected word size");
return 0;
}
int ElfType(const ElfW(Sym) *) {
ABSL_RAW_LOG(FATAL, "Unexpected word size");
return 0;
}
#endif
// Extract an element from one of the ELF tables, cast it to desired type.
// This is just a simple arithmetic and a glorified cast.
// Callers are responsible for bounds checking.
template <typename T>
const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
ElfW(Word) element_size, size_t index) {
return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
+ table_offset
+ index * element_size);
}
} // namespace
// The value of this variable doesn't matter; it's used only for its
// unique address.
const int ElfMemImage::kInvalidBaseSentinel = 0;
ElfMemImage::ElfMemImage(const void *base) {
ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
Init(base);
}
uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; }
const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const {
ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return dynsym_ + index;
}
const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const {
ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return versym_ + index;
}
const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range");
return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize,
static_cast<size_t>(index));
}
const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
return dynstr_ + offset;
}
const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
// Symbol corresponds to "special" (e.g. SHN_ABS) section.
return reinterpret_cast<const void *>(sym->st_value);
}
ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
}
const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
"index out of range");
const ElfW(Verdef) *version_definition = verdef_;
while (version_definition->vd_ndx < index && version_definition->vd_next) {
const char *const version_definition_as_char =
reinterpret_cast<const char *>(version_definition);
version_definition =
reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
version_definition->vd_next);
}
return version_definition->vd_ndx == index ? version_definition : nullptr;
}
const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
const ElfW(Verdef) *verdef) const {
return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
}
const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
return dynstr_ + offset;
}
void ElfMemImage::Init(const void *base) {
ehdr_ = nullptr;
dynsym_ = nullptr;
dynstr_ = nullptr;
versym_ = nullptr;
verdef_ = nullptr;
num_syms_ = 0;
strsize_ = 0;
verdefnum_ = 0;
// Sentinel: PT_LOAD .p_vaddr can't possibly be this.
link_base_ = ~ElfW(Addr){0}; // NOLINT(readability/braces)
if (!base) {
return;
}
const char *const base_as_char = reinterpret_cast<const char *>(base);
if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
assert(false);
return;
}
int elf_class = base_as_char[EI_CLASS];
if (elf_class != kElfClass) {
assert(false);
return;
}
switch (base_as_char[EI_DATA]) {
case ELFDATA2LSB: {
#ifndef ABSL_IS_LITTLE_ENDIAN
assert(false);
return;
#endif
break;
}
case ELFDATA2MSB: {
#ifndef ABSL_IS_BIG_ENDIAN
assert(false);
return;
#endif
break;
}
default: {
assert(false);
return;
}
}
ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
const ElfW(Phdr) *dynamic_program_header = nullptr;
for (int i = 0; i < ehdr_->e_phnum; ++i) {
const ElfW(Phdr) *const program_header = GetPhdr(i);
switch (program_header->p_type) {
case PT_LOAD:
if (!~link_base_) {
link_base_ = program_header->p_vaddr;
}
break;
case PT_DYNAMIC:
dynamic_program_header = program_header;
break;
}
}
if (!~link_base_ || !dynamic_program_header) {
assert(false);
// Mark this image as not present. Can not recur infinitely.
Init(nullptr);
return;
}
ptrdiff_t relocation =
base_as_char - reinterpret_cast<const char *>(link_base_);
ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
uint32_t *sysv_hash = nullptr;
uint32_t *gnu_hash = nullptr;
for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
const auto value =
static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
switch (dynamic_entry->d_tag) {
case DT_HASH:
sysv_hash = reinterpret_cast<uint32_t *>(value);
break;
case DT_GNU_HASH:
gnu_hash = reinterpret_cast<uint32_t *>(value);
break;
case DT_SYMTAB:
dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
break;
case DT_STRTAB:
dynstr_ = reinterpret_cast<const char *>(value);
break;
case DT_VERSYM:
versym_ = reinterpret_cast<ElfW(Versym) *>(value);
break;
case DT_VERDEF:
verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
break;
case DT_VERDEFNUM:
verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
break;
case DT_STRSZ:
strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
break;
default:
// Unrecognized entries explicitly ignored.
break;
}
}
if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ ||
!verdef_ || !verdefnum_ || !strsize_) {
assert(false); // invalid VDSO
// Mark this image as not present. Can not recur infinitely.
Init(nullptr);
return;
}
if (sysv_hash) {
num_syms_ = sysv_hash[1];
} else {
assert(gnu_hash);
// Compute the number of symbols for DT_GNU_HASH, which is specified by
// https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt
uint32_t nbuckets = gnu_hash[0];
// The buckets array is located after the header (4 uint32) and the bloom
// filter (size_t array of gnu_hash[2] elements).
uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2];
// Find the chain of the last non-empty bucket.
uint32_t idx = 0;
for (uint32_t i = nbuckets; i > 0;) {
idx = buckets[--i];
if (idx != 0) break;
}
if (idx != 0) {
// Find the last element of the chain, which has an odd value.
// Add one to get the number of symbols.
uint32_t *chain = buckets + nbuckets - gnu_hash[1];
while (chain[idx++] % 2 == 0) {
}
}
num_syms_ = idx;
}
}
bool ElfMemImage::LookupSymbol(const char *name,
const char *version,
int type,
SymbolInfo *info_out) const {
for (const SymbolInfo& info : *this) {
if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
ElfType(info.symbol) == type) {
if (info_out) {
*info_out = info;
}
return true;
}
}
return false;
}
bool ElfMemImage::LookupSymbolByAddress(const void *address,
SymbolInfo *info_out) const {
for (const SymbolInfo& info : *this) {
const char *const symbol_start =
reinterpret_cast<const char *>(info.address);
const char *const symbol_end = symbol_start + info.symbol->st_size;
if (symbol_start <= address && address < symbol_end) {
if (info_out) {
// Client wants to know details for that symbol (the usual case).
if (ElfBind(info.symbol) == STB_GLOBAL) {
// Strong symbol; just return it.
*info_out = info;
return true;
} else {
// Weak or local. Record it, but keep looking for a strong one.
*info_out = info;
}
} else {
// Client only cares if there is an overlapping symbol.
return true;
}
}
}
return false;
}
ElfMemImage::SymbolIterator::SymbolIterator(const void *const image,
uint32_t index)
: index_(index), image_(image) {}
const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
return &info_;
}
const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
return info_;
}
bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
return this->image_ == rhs.image_ && this->index_ == rhs.index_;
}
bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
return !(*this == rhs);
}
ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
this->Update(1);
return *this;
}
ElfMemImage::SymbolIterator ElfMemImage::begin() const {
SymbolIterator it(this, 0);
it.Update(0);
return it;
}
ElfMemImage::SymbolIterator ElfMemImage::end() const {
return SymbolIterator(this, GetNumSymbols());
}
void ElfMemImage::SymbolIterator::Update(uint32_t increment) {
const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
if (!image->IsPresent()) {
return;
}
index_ += increment;
if (index_ >= image->GetNumSymbols()) {
index_ = image->GetNumSymbols();
return;
}
const ElfW(Sym) *symbol = image->GetDynsym(index_);
const ElfW(Versym) *version_symbol = image->GetVersym(index_);
ABSL_RAW_CHECK(symbol && version_symbol, "");
const char *const symbol_name = image->GetDynstr(symbol->st_name);
#if defined(__NetBSD__)
const int version_index = version_symbol->vs_vers & VERSYM_VERSION;
#else
const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
#endif
const ElfW(Verdef) *version_definition = nullptr;
const char *version_name = "";
if (symbol->st_shndx == SHN_UNDEF) {
// Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
// version_index could well be greater than verdefnum_, so calling
// GetVerdef(version_index) may trigger assertion.
} else {
version_definition = image->GetVerdef(version_index);
}
if (version_definition) {
// I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
// optional 2nd if the version has a parent.
ABSL_RAW_CHECK(
version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
"wrong number of entries");
const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
version_name = image->GetVerstr(version_aux->vda_name);
}
info_.name = symbol_name;
info_.version = version_name;
info_.address = image->GetSymAddr(symbol);
info_.symbol = symbol;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_HAVE_ELF_MEM_IMAGE

View file

@ -0,0 +1,141 @@
/*
* Copyright 2017 The Abseil Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Allow dynamic symbol lookup for in-memory Elf images.
#ifndef ABSL_DEBUGGING_INTERNAL_ELF_MEM_IMAGE_H_
#define ABSL_DEBUGGING_INTERNAL_ELF_MEM_IMAGE_H_
// Including this will define the __GLIBC__ macro if glibc is being
// used.
#include <climits>
#include <cstdint>
#include "absl/base/config.h"
// Maybe one day we can rewrite this file not to require the elf
// symbol extensions in glibc, but for right now we need them.
#ifdef ABSL_HAVE_ELF_MEM_IMAGE
#error ABSL_HAVE_ELF_MEM_IMAGE cannot be directly set
#endif
#if defined(__ELF__) && !defined(__OpenBSD__) && !defined(__QNX__) && \
!defined(__native_client__) && !defined(__asmjs__) && \
!defined(__wasm__) && !defined(__HAIKU__) && !defined(__sun) && \
!defined(__VXWORKS__) && !defined(__hexagon__) && !defined(__XTENSA__)
#define ABSL_HAVE_ELF_MEM_IMAGE 1
#endif
#ifdef ABSL_HAVE_ELF_MEM_IMAGE
#include <link.h> // for ElfW
#if defined(__FreeBSD__) && !defined(ElfW)
#define ElfW(x) __ElfN(x)
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// An in-memory ELF image (may not exist on disk).
class ElfMemImage {
private:
// Sentinel: there could never be an elf image at &kInvalidBaseSentinel.
static const int kInvalidBaseSentinel;
public:
// Sentinel: there could never be an elf image at this address.
static constexpr const void *const kInvalidBase =
static_cast<const void*>(&kInvalidBaseSentinel);
// Information about a single vdso symbol.
// All pointers are into .dynsym, .dynstr, or .text of the VDSO.
// Do not free() them or modify through them.
struct SymbolInfo {
const char *name; // E.g. "__vdso_getcpu"
const char *version; // E.g. "LINUX_2.6", could be ""
// for unversioned symbol.
const void *address; // Relocated symbol address.
const ElfW(Sym) *symbol; // Symbol in the dynamic symbol table.
};
// Supports iteration over all dynamic symbols.
class SymbolIterator {
public:
friend class ElfMemImage;
const SymbolInfo *operator->() const;
const SymbolInfo &operator*() const;
SymbolIterator& operator++();
bool operator!=(const SymbolIterator &rhs) const;
bool operator==(const SymbolIterator &rhs) const;
private:
SymbolIterator(const void *const image, uint32_t index);
void Update(uint32_t incr);
SymbolInfo info_;
uint32_t index_;
const void *const image_;
};
explicit ElfMemImage(const void *base);
void Init(const void *base);
bool IsPresent() const { return ehdr_ != nullptr; }
const ElfW(Phdr)* GetPhdr(int index) const;
const ElfW(Sym) * GetDynsym(uint32_t index) const;
const ElfW(Versym)* GetVersym(uint32_t index) const;
const ElfW(Verdef)* GetVerdef(int index) const;
const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const;
const char* GetDynstr(ElfW(Word) offset) const;
const void* GetSymAddr(const ElfW(Sym) *sym) const;
const char* GetVerstr(ElfW(Word) offset) const;
uint32_t GetNumSymbols() const;
SymbolIterator begin() const;
SymbolIterator end() const;
// Look up versioned dynamic symbol in the image.
// Returns false if image is not present, or doesn't contain given
// symbol/version/type combination.
// If info_out is non-null, additional details are filled in.
bool LookupSymbol(const char *name, const char *version,
int symbol_type, SymbolInfo *info_out) const;
// Find info about symbol (if any) which overlaps given address.
// Returns true if symbol was found; false if image isn't present
// or doesn't have a symbol overlapping given address.
// If info_out is non-null, additional details are filled in.
bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
private:
const ElfW(Ehdr) *ehdr_;
const ElfW(Sym) *dynsym_;
const ElfW(Versym) *versym_;
const ElfW(Verdef) *verdef_;
const char *dynstr_;
uint32_t num_syms_;
size_t strsize_;
size_t verdefnum_;
ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD).
};
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_HAVE_ELF_MEM_IMAGE
#endif // ABSL_DEBUGGING_INTERNAL_ELF_MEM_IMAGE_H_

View file

@ -0,0 +1,320 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "absl/debugging/internal/examine_stack.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "absl/base/config.h"
#ifdef ABSL_HAVE_MMAP
#include <sys/mman.h>
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
#endif
#if defined(__linux__) || defined(__APPLE__)
#include <sys/ucontext.h>
#endif
#include <csignal>
#include <cstdio>
#include "absl/base/attributes.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/macros.h"
#include "absl/debugging/stacktrace.h"
#include "absl/debugging/symbolize.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
constexpr int kDefaultDumpStackFramesLimit = 64;
// The %p field width for printf() functions is two characters per byte,
// and two extra for the leading "0x".
constexpr int kPrintfPointerFieldWidth = 2 + 2 * sizeof(void*);
ABSL_CONST_INIT SymbolizeUrlEmitter debug_stack_trace_hook = nullptr;
// Async-signal safe mmap allocator.
void* Allocate(size_t num_bytes) {
#ifdef ABSL_HAVE_MMAP
void* p = ::mmap(nullptr, num_bytes, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
return p == MAP_FAILED ? nullptr : p;
#else
(void)num_bytes;
return nullptr;
#endif // ABSL_HAVE_MMAP
}
void Deallocate(void* p, size_t size) {
#ifdef ABSL_HAVE_MMAP
::munmap(p, size);
#else
(void)p;
(void)size;
#endif // ABSL_HAVE_MMAP
}
// Print a program counter only.
void DumpPC(OutputWriter* writer, void* writer_arg, void* const pc,
const char* const prefix) {
char buf[100];
snprintf(buf, sizeof(buf), "%s@ %*p\n", prefix, kPrintfPointerFieldWidth, pc);
writer(buf, writer_arg);
}
// Print a program counter and the corresponding stack frame size.
void DumpPCAndFrameSize(OutputWriter* writer, void* writer_arg, void* const pc,
int framesize, const char* const prefix) {
char buf[100];
if (framesize <= 0) {
snprintf(buf, sizeof(buf), "%s@ %*p (unknown)\n", prefix,
kPrintfPointerFieldWidth, pc);
} else {
snprintf(buf, sizeof(buf), "%s@ %*p %9d\n", prefix,
kPrintfPointerFieldWidth, pc, framesize);
}
writer(buf, writer_arg);
}
// Print a program counter and the corresponding symbol.
void DumpPCAndSymbol(OutputWriter* writer, void* writer_arg, void* const pc,
const char* const prefix) {
char tmp[1024];
const char* symbol = "(unknown)";
// Symbolizes the previous address of pc because pc may be in the
// next function. The overrun happens when the function ends with
// a call to a function annotated noreturn (e.g. CHECK).
// If symbolization of pc-1 fails, also try pc on the off-chance
// that we crashed on the first instruction of a function (that
// actually happens very often for e.g. __restore_rt).
const uintptr_t prev_pc = reinterpret_cast<uintptr_t>(pc) - 1;
if (absl::Symbolize(reinterpret_cast<const char*>(prev_pc), tmp,
sizeof(tmp)) ||
absl::Symbolize(pc, tmp, sizeof(tmp))) {
symbol = tmp;
}
char buf[1024];
snprintf(buf, sizeof(buf), "%s@ %*p %s\n", prefix, kPrintfPointerFieldWidth,
pc, symbol);
writer(buf, writer_arg);
}
// Print a program counter, its stack frame size, and its symbol name.
// Note that there is a separate symbolize_pc argument. Return addresses may be
// at the end of the function, and this allows the caller to back up from pc if
// appropriate.
void DumpPCAndFrameSizeAndSymbol(OutputWriter* writer, void* writer_arg,
void* const pc, void* const symbolize_pc,
int framesize, const char* const prefix) {
char tmp[1024];
const char* symbol = "(unknown)";
if (absl::Symbolize(symbolize_pc, tmp, sizeof(tmp))) {
symbol = tmp;
}
char buf[1024];
if (framesize <= 0) {
snprintf(buf, sizeof(buf), "%s@ %*p (unknown) %s\n", prefix,
kPrintfPointerFieldWidth, pc, symbol);
} else {
snprintf(buf, sizeof(buf), "%s@ %*p %9d %s\n", prefix,
kPrintfPointerFieldWidth, pc, framesize, symbol);
}
writer(buf, writer_arg);
}
} // namespace
void RegisterDebugStackTraceHook(SymbolizeUrlEmitter hook) {
debug_stack_trace_hook = hook;
}
SymbolizeUrlEmitter GetDebugStackTraceHook() { return debug_stack_trace_hook; }
// Returns the program counter from signal context, nullptr if
// unknown. vuc is a ucontext_t*. We use void* to avoid the use of
// ucontext_t on non-POSIX systems.
void* GetProgramCounter(void* const vuc) {
#ifdef __linux__
if (vuc != nullptr) {
ucontext_t* context = reinterpret_cast<ucontext_t*>(vuc);
#if defined(__aarch64__)
return reinterpret_cast<void*>(context->uc_mcontext.pc);
#elif defined(__alpha__)
return reinterpret_cast<void*>(context->uc_mcontext.sc_pc);
#elif defined(__arm__)
return reinterpret_cast<void*>(context->uc_mcontext.arm_pc);
#elif defined(__hppa__)
return reinterpret_cast<void*>(context->uc_mcontext.sc_iaoq[0]);
#elif defined(__i386__)
if (14 < ABSL_ARRAYSIZE(context->uc_mcontext.gregs))
return reinterpret_cast<void*>(context->uc_mcontext.gregs[14]);
#elif defined(__ia64__)
return reinterpret_cast<void*>(context->uc_mcontext.sc_ip);
#elif defined(__m68k__)
return reinterpret_cast<void*>(context->uc_mcontext.gregs[16]);
#elif defined(__mips__)
return reinterpret_cast<void*>(context->uc_mcontext.pc);
#elif defined(__powerpc64__)
return reinterpret_cast<void*>(context->uc_mcontext.gp_regs[32]);
#elif defined(__powerpc__)
return reinterpret_cast<void*>(context->uc_mcontext.uc_regs->gregs[32]);
#elif defined(__riscv)
return reinterpret_cast<void*>(context->uc_mcontext.__gregs[REG_PC]);
#elif defined(__s390__) && !defined(__s390x__)
return reinterpret_cast<void*>(context->uc_mcontext.psw.addr & 0x7fffffff);
#elif defined(__s390__) && defined(__s390x__)
return reinterpret_cast<void*>(context->uc_mcontext.psw.addr);
#elif defined(__sh__)
return reinterpret_cast<void*>(context->uc_mcontext.pc);
#elif defined(__sparc__) && !defined(__arch64__)
return reinterpret_cast<void*>(context->uc_mcontext.gregs[19]);
#elif defined(__sparc__) && defined(__arch64__)
return reinterpret_cast<void*>(context->uc_mcontext.mc_gregs[19]);
#elif defined(__x86_64__)
if (16 < ABSL_ARRAYSIZE(context->uc_mcontext.gregs))
return reinterpret_cast<void*>(context->uc_mcontext.gregs[16]);
#elif defined(__e2k__)
return reinterpret_cast<void*>(context->uc_mcontext.cr0_hi);
#elif defined(__loongarch__)
return reinterpret_cast<void*>(context->uc_mcontext.__pc);
#else
#error "Undefined Architecture."
#endif
}
#elif defined(__APPLE__)
if (vuc != nullptr) {
ucontext_t* signal_ucontext = reinterpret_cast<ucontext_t*>(vuc);
#if defined(__aarch64__)
return reinterpret_cast<void*>(
__darwin_arm_thread_state64_get_pc(signal_ucontext->uc_mcontext->__ss));
#elif defined(__arm__)
#if __DARWIN_UNIX03
return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->__ss.__pc);
#else
return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->ss.pc);
#endif
#elif defined(__i386__)
#if __DARWIN_UNIX03
return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->__ss.__eip);
#else
return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->ss.eip);
#endif
#elif defined(__x86_64__)
#if __DARWIN_UNIX03
return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->__ss.__rip);
#else
return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->ss.rip);
#endif
#endif
}
#elif defined(__akaros__)
auto* ctx = reinterpret_cast<struct user_context*>(vuc);
return reinterpret_cast<void*>(get_user_ctx_pc(ctx));
#endif
static_cast<void>(vuc);
return nullptr;
}
void DumpPCAndFrameSizesAndStackTrace(void* const pc, void* const stack[],
int frame_sizes[], int depth,
int min_dropped_frames,
bool symbolize_stacktrace,
OutputWriter* writer, void* writer_arg) {
if (pc != nullptr) {
// We don't know the stack frame size for PC, use 0.
if (symbolize_stacktrace) {
DumpPCAndFrameSizeAndSymbol(writer, writer_arg, pc, pc, 0, "PC: ");
} else {
DumpPCAndFrameSize(writer, writer_arg, pc, 0, "PC: ");
}
}
for (int i = 0; i < depth; i++) {
if (symbolize_stacktrace) {
// Pass the previous address of pc as the symbol address because pc is a
// return address, and an overrun may occur when the function ends with a
// call to a function annotated noreturn (e.g. CHECK). Note that we don't
// do this for pc above, as the adjustment is only correct for return
// addresses.
DumpPCAndFrameSizeAndSymbol(writer, writer_arg, stack[i],
reinterpret_cast<char*>(stack[i]) - 1,
frame_sizes[i], " ");
} else {
DumpPCAndFrameSize(writer, writer_arg, stack[i], frame_sizes[i], " ");
}
}
if (min_dropped_frames > 0) {
char buf[100];
snprintf(buf, sizeof(buf), " @ ... and at least %d more frames\n",
min_dropped_frames);
writer(buf, writer_arg);
}
}
// Dump current stack trace as directed by writer.
// Make sure this function is not inlined to avoid skipping too many top frames.
ABSL_ATTRIBUTE_NOINLINE
void DumpStackTrace(int min_dropped_frames, int max_num_frames,
bool symbolize_stacktrace, OutputWriter* writer,
void* writer_arg) {
// Print stack trace
void* stack_buf[kDefaultDumpStackFramesLimit];
void** stack = stack_buf;
int num_stack = kDefaultDumpStackFramesLimit;
size_t allocated_bytes = 0;
if (num_stack >= max_num_frames) {
// User requested fewer frames than we already have space for.
num_stack = max_num_frames;
} else {
const size_t needed_bytes =
static_cast<size_t>(max_num_frames) * sizeof(stack[0]);
void* p = Allocate(needed_bytes);
if (p != nullptr) { // We got the space.
num_stack = max_num_frames;
stack = reinterpret_cast<void**>(p);
allocated_bytes = needed_bytes;
}
}
int depth = absl::GetStackTrace(stack, num_stack, min_dropped_frames + 1);
for (int i = 0; i < depth; i++) {
if (symbolize_stacktrace) {
DumpPCAndSymbol(writer, writer_arg, stack[static_cast<size_t>(i)],
" ");
} else {
DumpPC(writer, writer_arg, stack[static_cast<size_t>(i)], " ");
}
}
auto hook = GetDebugStackTraceHook();
if (hook != nullptr) {
(*hook)(stack, depth, writer, writer_arg);
}
if (allocated_bytes != 0) Deallocate(stack, allocated_bytes);
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,64 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef ABSL_DEBUGGING_INTERNAL_EXAMINE_STACK_H_
#define ABSL_DEBUGGING_INTERNAL_EXAMINE_STACK_H_
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// Type of function used for printing in stack trace dumping, etc.
// We avoid closures to keep things simple.
typedef void OutputWriter(const char*, void*);
// RegisterDebugStackTraceHook() allows to register a single routine
// `hook` that is called each time DumpStackTrace() is called.
// `hook` may be called from a signal handler.
typedef void (*SymbolizeUrlEmitter)(void* const stack[], int depth,
OutputWriter* writer, void* writer_arg);
// Registration of SymbolizeUrlEmitter for use inside of a signal handler.
// This is inherently unsafe and must be signal safe code.
void RegisterDebugStackTraceHook(SymbolizeUrlEmitter hook);
SymbolizeUrlEmitter GetDebugStackTraceHook();
// Returns the program counter from signal context, or nullptr if
// unknown. `vuc` is a ucontext_t*. We use void* to avoid the use of
// ucontext_t on non-POSIX systems.
void* GetProgramCounter(void* const vuc);
// Uses `writer` to dump the program counter, stack trace, and stack
// frame sizes.
void DumpPCAndFrameSizesAndStackTrace(void* const pc, void* const stack[],
int frame_sizes[], int depth,
int min_dropped_frames,
bool symbolize_stacktrace,
OutputWriter* writer, void* writer_arg);
// Dump current stack trace omitting the topmost `min_dropped_frames` stack
// frames.
void DumpStackTrace(int min_dropped_frames, int max_num_frames,
bool symbolize_stacktrace, OutputWriter* writer,
void* writer_arg);
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_EXAMINE_STACK_H_

View file

@ -0,0 +1,206 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/stack_consumption.h"
#ifdef ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION
#include <signal.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include "absl/base/attributes.h"
#include "absl/base/internal/raw_logging.h"
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
// This code requires that we know the direction in which the stack
// grows. It is commonly believed that this can be detected by putting
// a variable on the stack and then passing its address to a function
// that compares the address of this variable to the address of a
// variable on the function's own stack. However, this is unspecified
// behavior in C++: If two pointers p and q of the same type point to
// different objects that are not members of the same object or
// elements of the same array or to different functions, or if only
// one of them is null, the results of p<q, p>q, p<=q, and p>=q are
// unspecified. Therefore, instead we hardcode the direction of the
// stack on platforms we know about.
#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || \
defined(__aarch64__) || defined(__riscv)
constexpr bool kStackGrowsDown = true;
#else
#error Need to define kStackGrowsDown
#endif
// To measure the stack footprint of some code, we create a signal handler
// (for SIGUSR2 say) that exercises this code on an alternate stack. This
// alternate stack is initialized to some known pattern (0x55, 0x55, 0x55,
// ...). We then self-send this signal, and after the signal handler returns,
// look at the alternate stack buffer to see what portion has been touched.
//
// This trick gives us the the stack footprint of the signal handler. But the
// signal handler, even before the code for it is exercised, consumes some
// stack already. We however only want the stack usage of the code inside the
// signal handler. To measure this accurately, we install two signal handlers:
// one that does nothing and just returns, and the user-provided signal
// handler. The difference between the stack consumption of these two signals
// handlers should give us the stack foorprint of interest.
void EmptySignalHandler(int) {}
// This is arbitrary value, and could be increase further, at the cost of
// memset()ting it all to known sentinel value.
constexpr int kAlternateStackSize = 64 << 10; // 64KiB
constexpr int kSafetyMargin = 32;
constexpr char kAlternateStackFillValue = 0x55;
// These helper functions look at the alternate stack buffer, and figure
// out what portion of this buffer has been touched - this is the stack
// consumption of the signal handler running on this alternate stack.
// This function will return -1 if the alternate stack buffer has not been
// touched. It will abort the program if the buffer has overflowed or is about
// to overflow.
int GetStackConsumption(const void* const altstack) {
const char* begin;
int increment;
if (kStackGrowsDown) {
begin = reinterpret_cast<const char*>(altstack);
increment = 1;
} else {
begin = reinterpret_cast<const char*>(altstack) + kAlternateStackSize - 1;
increment = -1;
}
for (int usage_count = kAlternateStackSize; usage_count > 0; --usage_count) {
if (*begin != kAlternateStackFillValue) {
ABSL_RAW_CHECK(usage_count <= kAlternateStackSize - kSafetyMargin,
"Buffer has overflowed or is about to overflow");
return usage_count;
}
begin += increment;
}
ABSL_RAW_LOG(FATAL, "Unreachable code");
return -1;
}
} // namespace
int GetSignalHandlerStackConsumption(void (*signal_handler)(int)) {
// The alt-signal-stack cannot be heap allocated because there is a
// bug in glibc-2.2 where some signal handler setup code looks at the
// current stack pointer to figure out what thread is currently running.
// Therefore, the alternate stack must be allocated from the main stack
// itself.
void* altstack = mmap(nullptr, kAlternateStackSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ABSL_RAW_CHECK(altstack != MAP_FAILED, "mmap() failed");
// Set up the alt-signal-stack (and save the older one).
stack_t sigstk;
memset(&sigstk, 0, sizeof(sigstk));
sigstk.ss_sp = altstack;
sigstk.ss_size = kAlternateStackSize;
sigstk.ss_flags = 0;
stack_t old_sigstk;
memset(&old_sigstk, 0, sizeof(old_sigstk));
ABSL_RAW_CHECK(sigaltstack(&sigstk, &old_sigstk) == 0,
"sigaltstack() failed");
// Set up SIGUSR1 and SIGUSR2 signal handlers (and save the older ones).
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
struct sigaction old_sa1, old_sa2;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_ONSTACK;
// SIGUSR1 maps to EmptySignalHandler.
sa.sa_handler = EmptySignalHandler;
ABSL_RAW_CHECK(sigaction(SIGUSR1, &sa, &old_sa1) == 0, "sigaction() failed");
// SIGUSR2 maps to signal_handler.
sa.sa_handler = signal_handler;
ABSL_RAW_CHECK(sigaction(SIGUSR2, &sa, &old_sa2) == 0, "sigaction() failed");
// Send SIGUSR1 signal and measure the stack consumption of the empty
// signal handler.
// The first signal might use more stack space. Run once and ignore the
// results to get that out of the way.
ABSL_RAW_CHECK(kill(getpid(), SIGUSR1) == 0, "kill() failed");
memset(altstack, kAlternateStackFillValue, kAlternateStackSize);
ABSL_RAW_CHECK(kill(getpid(), SIGUSR1) == 0, "kill() failed");
int base_stack_consumption = GetStackConsumption(altstack);
// Send SIGUSR2 signal and measure the stack consumption of signal_handler.
ABSL_RAW_CHECK(kill(getpid(), SIGUSR2) == 0, "kill() failed");
int signal_handler_stack_consumption = GetStackConsumption(altstack);
// Now restore the old alt-signal-stack and signal handlers.
if (old_sigstk.ss_sp == nullptr && old_sigstk.ss_size == 0 &&
(old_sigstk.ss_flags & SS_DISABLE)) {
// https://git.musl-libc.org/cgit/musl/commit/src/signal/sigaltstack.c?id=7829f42a2c8944555439380498ab8b924d0f2070
// The original stack has ss_size==0 and ss_flags==SS_DISABLE, but some
// versions of musl have a bug that rejects ss_size==0. Work around this by
// setting ss_size to MINSIGSTKSZ, which should be ignored by the kernel
// when SS_DISABLE is set.
old_sigstk.ss_size = static_cast<size_t>(MINSIGSTKSZ);
}
ABSL_RAW_CHECK(sigaltstack(&old_sigstk, nullptr) == 0,
"sigaltstack() failed");
ABSL_RAW_CHECK(sigaction(SIGUSR1, &old_sa1, nullptr) == 0,
"sigaction() failed");
ABSL_RAW_CHECK(sigaction(SIGUSR2, &old_sa2, nullptr) == 0,
"sigaction() failed");
ABSL_RAW_CHECK(munmap(altstack, kAlternateStackSize) == 0, "munmap() failed");
if (signal_handler_stack_consumption != -1 && base_stack_consumption != -1) {
return signal_handler_stack_consumption - base_stack_consumption;
}
return -1;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#else
// https://github.com/abseil/abseil-cpp/issues/1465
// CMake builds on Apple platforms error when libraries are empty.
// Our CMake configuration can avoid this error on header-only libraries,
// but since this library is conditionally empty, including a single
// variable is an easy workaround.
#ifdef __APPLE__
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
extern const char kAvoidEmptyStackConsumptionLibraryWarning;
const char kAvoidEmptyStackConsumptionLibraryWarning = 0;
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // __APPLE__
#endif // ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION

View file

@ -0,0 +1,50 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Helper function for measuring stack consumption of signal handlers.
#ifndef ABSL_DEBUGGING_INTERNAL_STACK_CONSUMPTION_H_
#define ABSL_DEBUGGING_INTERNAL_STACK_CONSUMPTION_H_
#include "absl/base/config.h"
// The code in this module is not portable.
// Use this feature test macro to detect its availability.
#ifdef ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION
#error ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION cannot be set directly
#elif !defined(__APPLE__) && !defined(_WIN32) && !defined(__Fuchsia__) && \
(defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || \
defined(__aarch64__) || defined(__riscv))
#define ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION 1
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// Returns the stack consumption in bytes for the code exercised by
// signal_handler. To measure stack consumption, signal_handler is registered
// as a signal handler, so the code that it exercises must be async-signal
// safe. The argument of signal_handler is an implementation detail of signal
// handlers and should ignored by the code for signal_handler. Use global
// variables to pass information between your test code and signal_handler.
int GetSignalHandlerStackConsumption(void (*signal_handler)(int));
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION
#endif // ABSL_DEBUGGING_INTERNAL_STACK_CONSUMPTION_H_

View file

@ -0,0 +1,50 @@
//
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/stack_consumption.h"
#ifdef ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION
#include <string.h>
#include "gtest/gtest.h"
#include "absl/log/log.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
static void SimpleSignalHandler(int signo) {
char buf[100];
memset(buf, 'a', sizeof(buf));
// Never true, but prevents compiler from optimizing buf out.
if (signo == 0) {
LOG(INFO) << static_cast<void*>(buf);
}
}
TEST(SignalHandlerStackConsumptionTest, MeasuresStackConsumption) {
// Our handler should consume reasonable number of bytes.
EXPECT_GE(GetSignalHandlerStackConsumption(SimpleSignalHandler), 100);
}
} // namespace
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION

View file

@ -0,0 +1,285 @@
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_AARCH64_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_AARCH64_INL_H_
// Generate stack tracer for aarch64
#if defined(__linux__)
#include <signal.h>
#include <sys/mman.h>
#include <ucontext.h>
#include <unistd.h>
#endif
#include <atomic>
#include <cassert>
#include <cstdint>
#include <iostream>
#include <limits>
#include "absl/base/attributes.h"
#include "absl/debugging/internal/address_is_readable.h"
#include "absl/debugging/internal/vdso_support.h" // a no-op on non-elf or non-glibc systems
#include "absl/debugging/stacktrace.h"
static const size_t kUnknownFrameSize = 0;
// Stack end to use when we don't know the actual stack end
// (effectively just the end of address space).
constexpr uintptr_t kUnknownStackEnd =
std::numeric_limits<size_t>::max() - sizeof(void *);
#if defined(__linux__)
// Returns the address of the VDSO __kernel_rt_sigreturn function, if present.
static const unsigned char* GetKernelRtSigreturnAddress() {
constexpr uintptr_t kImpossibleAddress = 1;
ABSL_CONST_INIT static std::atomic<uintptr_t> memoized{kImpossibleAddress};
uintptr_t address = memoized.load(std::memory_order_relaxed);
if (address != kImpossibleAddress) {
return reinterpret_cast<const unsigned char*>(address);
}
address = reinterpret_cast<uintptr_t>(nullptr);
#ifdef ABSL_HAVE_VDSO_SUPPORT
absl::debugging_internal::VDSOSupport vdso;
if (vdso.IsPresent()) {
absl::debugging_internal::VDSOSupport::SymbolInfo symbol_info;
auto lookup = [&](int type) {
return vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.6.39", type,
&symbol_info);
};
if ((!lookup(STT_FUNC) && !lookup(STT_NOTYPE)) ||
symbol_info.address == nullptr) {
// Unexpected: VDSO is present, yet the expected symbol is missing
// or null.
assert(false && "VDSO is present, but doesn't have expected symbol");
} else {
if (reinterpret_cast<uintptr_t>(symbol_info.address) !=
kImpossibleAddress) {
address = reinterpret_cast<uintptr_t>(symbol_info.address);
} else {
assert(false && "VDSO returned invalid address");
}
}
}
#endif
memoized.store(address, std::memory_order_relaxed);
return reinterpret_cast<const unsigned char*>(address);
}
#endif // __linux__
// Compute the size of a stack frame in [low..high). We assume that
// low < high. Return size of kUnknownFrameSize.
template<typename T>
static size_t ComputeStackFrameSize(const T* low,
const T* high) {
const char* low_char_ptr = reinterpret_cast<const char *>(low);
const char* high_char_ptr = reinterpret_cast<const char *>(high);
return low < high ? static_cast<size_t>(high_char_ptr - low_char_ptr)
: kUnknownFrameSize;
}
// Saves stack info that is expensive to calculate to avoid recalculating per frame.
struct StackInfo {
uintptr_t stack_low;
uintptr_t stack_high;
uintptr_t sig_stack_low;
uintptr_t sig_stack_high;
};
static bool InsideSignalStack(void** ptr, const StackInfo* stack_info) {
uintptr_t comparable_ptr = reinterpret_cast<uintptr_t>(ptr);
if (stack_info->sig_stack_high == kUnknownStackEnd)
return false;
return (comparable_ptr >= stack_info->sig_stack_low &&
comparable_ptr < stack_info->sig_stack_high);
}
// Given a pointer to a stack frame, locate and return the calling
// stackframe, or return null if no stackframe can be found. Perform sanity
// checks (the strictness of which is controlled by the boolean parameter
// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
template<bool STRICT_UNWINDING, bool WITH_CONTEXT>
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
static void **NextStackFrame(void **old_frame_pointer, const void *uc,
const StackInfo *stack_info) {
void **new_frame_pointer = reinterpret_cast<void**>(*old_frame_pointer);
#if defined(__linux__)
if (WITH_CONTEXT && uc != nullptr) {
// Check to see if next frame's return address is __kernel_rt_sigreturn.
if (old_frame_pointer[1] == GetKernelRtSigreturnAddress()) {
const ucontext_t *ucv = static_cast<const ucontext_t *>(uc);
// old_frame_pointer[0] is not suitable for unwinding, look at
// ucontext to discover frame pointer before signal.
void **const pre_signal_frame_pointer =
reinterpret_cast<void **>(ucv->uc_mcontext.regs[29]);
// The most recent signal always needs special handling to find the frame
// pointer, but a nested signal does not. If pre_signal_frame_pointer is
// earlier in the stack than the old_frame_pointer, then use it. If it is
// later, then we have already unwound through it and it needs no special
// handling.
if (pre_signal_frame_pointer >= old_frame_pointer) {
new_frame_pointer = pre_signal_frame_pointer;
}
}
#endif
// The frame pointer should be 8-byte aligned.
if ((reinterpret_cast<uintptr_t>(new_frame_pointer) & 7) != 0)
return nullptr;
// Check that alleged frame pointer is actually readable. This is to
// prevent "double fault" in case we hit the first fault due to e.g.
// stack corruption.
if (!absl::debugging_internal::AddressIsReadable(
new_frame_pointer))
return nullptr;
}
// Only check the size if both frames are in the same stack.
if (InsideSignalStack(new_frame_pointer, stack_info) ==
InsideSignalStack(old_frame_pointer, stack_info)) {
// Check frame size. In strict mode, we assume frames to be under
// 100,000 bytes. In non-strict mode, we relax the limit to 1MB.
const size_t max_size = STRICT_UNWINDING ? 100000 : 1000000;
const size_t frame_size =
ComputeStackFrameSize(old_frame_pointer, new_frame_pointer);
if (frame_size == kUnknownFrameSize)
return nullptr;
// A very large frame may mean corrupt memory or an erroneous frame
// pointer. But also maybe just a plain-old large frame. Assume that if the
// frame is within a known stack, then it is valid.
if (frame_size > max_size) {
size_t stack_low = stack_info->stack_low;
size_t stack_high = stack_info->stack_high;
if (InsideSignalStack(new_frame_pointer, stack_info)) {
stack_low = stack_info->sig_stack_low;
stack_high = stack_info->sig_stack_high;
}
if (stack_high < kUnknownStackEnd &&
static_cast<size_t>(getpagesize()) < stack_low) {
const uintptr_t new_fp_u =
reinterpret_cast<uintptr_t>(new_frame_pointer);
// Stack bounds are known.
if (!(stack_low < new_fp_u && new_fp_u <= stack_high)) {
// new_frame_pointer is not within a known stack.
return nullptr;
}
} else {
// Stack bounds are unknown, prefer truncated stack to possible crash.
return nullptr;
}
}
}
return new_frame_pointer;
}
// When PAC-RET (-mbranch-protection=pac-ret) is enabled, return addresses
// stored on the stack will be signed, which means that pointer bits outside of
// the VA range are potentially set. Since the stacktrace code is expected to
// return normal code pointers, this function clears those bits.
inline void* ClearPacBits(void* ptr) {
register void* x30 __asm__("x30") = ptr;
// The normal instruction for clearing PAC bits is XPACI, but for
// compatibility with ARM platforms that do not support pointer
// authentication, we use the hint space instruction XPACLRI instead. Hint
// space instructions behave as NOPs on unsupported platforms.
#define ABSL_XPACLRI_HINT "hint #0x7;"
asm(ABSL_XPACLRI_HINT : "+r"(x30)); // asm("xpaclri" : "+r"(x30));
#undef ABSL_XPACLRI_HINT
return x30;
}
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
// We count on the bottom frame being this one. See the comment
// at prev_return_address
ABSL_ATTRIBUTE_NOINLINE
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count,
const void *ucp, int *min_dropped_frames) {
#ifdef __GNUC__
void **frame_pointer = reinterpret_cast<void**>(__builtin_frame_address(0));
#else
# error reading stack point not yet supported on this platform.
#endif
skip_count++; // Skip the frame for this function.
int n = 0;
// Assume that the first page is not stack.
StackInfo stack_info;
stack_info.stack_low = static_cast<uintptr_t>(getpagesize());
stack_info.stack_high = kUnknownStackEnd;
stack_info.sig_stack_low = stack_info.stack_low;
stack_info.sig_stack_high = kUnknownStackEnd;
// The frame pointer points to low address of a frame. The first 64-bit
// word of a frame points to the next frame up the call chain, which normally
// is just after the high address of the current frame. The second word of
// a frame contains return address of to the caller. To find a pc value
// associated with the current frame, we need to go down a level in the call
// chain. So we remember return the address of the last frame seen. This
// does not work for the first stack frame, which belongs to UnwindImp() but
// we skip the frame for UnwindImp() anyway.
void* prev_return_address = nullptr;
// The nth frame size is the difference between the nth frame pointer and the
// the frame pointer below it in the call chain. There is no frame below the
// leaf frame, but this function is the leaf anyway, and we skip it.
void** prev_frame_pointer = nullptr;
while (frame_pointer && n < max_depth) {
if (skip_count > 0) {
skip_count--;
} else {
result[n] = ClearPacBits(prev_return_address);
if (IS_STACK_FRAMES) {
sizes[n] = static_cast<int>(
ComputeStackFrameSize(prev_frame_pointer, frame_pointer));
}
n++;
}
prev_return_address = frame_pointer[1];
prev_frame_pointer = frame_pointer;
// The absl::GetStackFrames routine is called when we are in some
// informational context (the failure signal handler for example).
// Use the non-strict unwinding rules to produce a stack trace
// that is as complete as possible (even if it contains a few bogus
// entries in some rare cases).
frame_pointer = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(
frame_pointer, ucp, &stack_info);
}
if (min_dropped_frames != nullptr) {
// Implementation detail: we clamp the max of frames we are willing to
// count, so as not to spend too much time in the loop below.
const int kMaxUnwind = 200;
int num_dropped_frames = 0;
for (int j = 0; frame_pointer != nullptr && j < kMaxUnwind; j++) {
if (skip_count > 0) {
skip_count--;
} else {
num_dropped_frames++;
}
frame_pointer = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(
frame_pointer, ucp, &stack_info);
}
*min_dropped_frames = num_dropped_frames;
}
return n;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() {
return true;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_AARCH64_INL_H_

View file

@ -0,0 +1,139 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This is inspired by Craig Silverstein's PowerPC stacktrace code.
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_ARM_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_ARM_INL_H_
#include <cstdint>
#include "absl/debugging/stacktrace.h"
// WARNING:
// This only works if all your code is in either ARM or THUMB mode. With
// interworking, the frame pointer of the caller can either be in r11 (ARM
// mode) or r7 (THUMB mode). A callee only saves the frame pointer of its
// mode in a fixed location on its stack frame. If the caller is a different
// mode, there is no easy way to find the frame pointer. It can either be
// still in the designated register or saved on stack along with other callee
// saved registers.
// Given a pointer to a stack frame, locate and return the calling
// stackframe, or return nullptr if no stackframe can be found. Perform sanity
// checks (the strictness of which is controlled by the boolean parameter
// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
template<bool STRICT_UNWINDING>
static void **NextStackFrame(void **old_sp) {
void **new_sp = (void**) old_sp[-1];
// Check that the transition from frame pointer old_sp to frame
// pointer new_sp isn't clearly bogus
if (STRICT_UNWINDING) {
// With the stack growing downwards, older stack frame must be
// at a greater address that the current one.
if (new_sp <= old_sp) return nullptr;
// Assume stack frames larger than 100,000 bytes are bogus.
if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return nullptr;
} else {
// In the non-strict mode, allow discontiguous stack frames.
// (alternate-signal-stacks for example).
if (new_sp == old_sp) return nullptr;
// And allow frames upto about 1MB.
if ((new_sp > old_sp)
&& ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return nullptr;
}
if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return nullptr;
return new_sp;
}
// This ensures that absl::GetStackTrace sets up the Link Register properly.
#ifdef __GNUC__
void StacktraceArmDummyFunction() __attribute__((noinline));
void StacktraceArmDummyFunction() { __asm__ volatile(""); }
#else
# error StacktraceArmDummyFunction() needs to be ported to this platform.
#endif
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count,
const void * /* ucp */, int *min_dropped_frames) {
#ifdef __GNUC__
void **sp = reinterpret_cast<void**>(__builtin_frame_address(0));
#else
# error reading stack point not yet supported on this platform.
#endif
// On ARM, the return address is stored in the link register (r14).
// This is not saved on the stack frame of a leaf function. To
// simplify code that reads return addresses, we call a dummy
// function so that the return address of this function is also
// stored in the stack frame. This works at least for gcc.
StacktraceArmDummyFunction();
int n = 0;
while (sp && n < max_depth) {
// The absl::GetStackFrames routine is called when we are in some
// informational context (the failure signal handler for example).
// Use the non-strict unwinding rules to produce a stack trace
// that is as complete as possible (even if it contains a few bogus
// entries in some rare cases).
void **next_sp = NextStackFrame<!IS_STACK_FRAMES>(sp);
if (skip_count > 0) {
skip_count--;
} else {
result[n] = *sp;
if (IS_STACK_FRAMES) {
if (next_sp > sp) {
sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
} else {
// A frame-size of 0 is used to indicate unknown frame size.
sizes[n] = 0;
}
}
n++;
}
sp = next_sp;
}
if (min_dropped_frames != nullptr) {
// Implementation detail: we clamp the max of frames we are willing to
// count, so as not to spend too much time in the loop below.
const int kMaxUnwind = 200;
int num_dropped_frames = 0;
for (int j = 0; sp != nullptr && j < kMaxUnwind; j++) {
if (skip_count > 0) {
skip_count--;
} else {
num_dropped_frames++;
}
sp = NextStackFrame<!IS_STACK_FRAMES>(sp);
}
*min_dropped_frames = num_dropped_frames;
}
return n;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() {
return false;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_ARM_INL_H_

View file

@ -0,0 +1,95 @@
/*
* Copyright 2017 The Abseil Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Defines ABSL_STACKTRACE_INL_HEADER to the *-inl.h containing
* actual unwinder implementation.
* This header is "private" to stacktrace.cc.
* DO NOT include it into any other files.
*/
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_CONFIG_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_CONFIG_H_
#include "absl/base/config.h"
#if defined(ABSL_STACKTRACE_INL_HEADER)
#error ABSL_STACKTRACE_INL_HEADER cannot be directly set
#elif defined(_WIN32)
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_win32-inl.inc"
#elif defined(__APPLE__)
#ifdef ABSL_HAVE_THREAD_LOCAL
// Thread local support required for UnwindImpl.
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_generic-inl.inc"
#endif // defined(ABSL_HAVE_THREAD_LOCAL)
// Emscripten stacktraces rely on JS. Do not use them in standalone mode.
#elif defined(__EMSCRIPTEN__) && !defined(STANDALONE_WASM)
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_emscripten-inl.inc"
#elif defined(__ANDROID__) && __ANDROID_API__ >= 33
// Use the generic implementation for Android 33+ (Android T+). This is the
// first version of Android for which <execinfo.h> implements backtrace().
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_generic-inl.inc"
#elif defined(__linux__) && !defined(__ANDROID__)
#if defined(NO_FRAME_POINTER) && \
(defined(__i386__) || defined(__x86_64__) || defined(__aarch64__))
// Note: The libunwind-based implementation is not available to open-source
// users.
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_libunwind-inl.inc"
#define STACKTRACE_USES_LIBUNWIND 1
#elif defined(NO_FRAME_POINTER) && defined(__has_include)
#if __has_include(<execinfo.h>)
// Note: When using glibc this may require -funwind-tables to function properly.
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_generic-inl.inc"
#endif // __has_include(<execinfo.h>)
#elif defined(__i386__) || defined(__x86_64__)
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_x86-inl.inc"
#elif defined(__ppc__) || defined(__PPC__)
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_powerpc-inl.inc"
#elif defined(__aarch64__)
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_aarch64-inl.inc"
#elif defined(__riscv)
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_riscv-inl.inc"
#elif defined(__has_include)
#if __has_include(<execinfo.h>)
// Note: When using glibc this may require -funwind-tables to function properly.
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_generic-inl.inc"
#endif // __has_include(<execinfo.h>)
#endif // defined(__has_include)
#endif // defined(__linux__) && !defined(__ANDROID__)
// Fallback to the empty implementation.
#if !defined(ABSL_STACKTRACE_INL_HEADER)
#define ABSL_STACKTRACE_INL_HEADER \
"absl/debugging/internal/stacktrace_unimplemented-inl.inc"
#endif
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_CONFIG_H_

View file

@ -0,0 +1,110 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Portable implementation - just use glibc
//
// Note: The glibc implementation may cause a call to malloc.
// This can cause a deadlock in HeapProfiler.
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_EMSCRIPTEN_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_EMSCRIPTEN_INL_H_
#include <emscripten.h>
#include <atomic>
#include <cstring>
#include "absl/base/attributes.h"
#include "absl/debugging/stacktrace.h"
extern "C" {
uintptr_t emscripten_stack_snapshot();
uint32_t emscripten_stack_unwind_buffer(uintptr_t pc, void *buffer,
uint32_t depth);
}
// Sometimes, we can try to get a stack trace from within a stack
// trace, which can cause a self-deadlock.
// Protect against such reentrant call by failing to get a stack trace.
//
// We use __thread here because the code here is extremely low level -- it is
// called while collecting stack traces from within malloc and mmap, and thus
// can not call anything which might call malloc or mmap itself.
static __thread int recursive = 0;
// The stack trace function might be invoked very early in the program's
// execution (e.g. from the very first malloc).
// As such, we suppress usage of backtrace during this early stage of execution.
static std::atomic<bool> disable_stacktraces(true); // Disabled until healthy.
// Waiting until static initializers run seems to be late enough.
// This file is included into stacktrace.cc so this will only run once.
ABSL_ATTRIBUTE_UNUSED static int stacktraces_enabler = []() {
// Check if we can even create stacktraces. If not, bail early and leave
// disable_stacktraces set as-is.
// clang-format off
if (!EM_ASM_INT({ return (typeof wasmOffsetConverter !== 'undefined'); })) {
return 0;
}
// clang-format on
disable_stacktraces.store(false, std::memory_order_relaxed);
return 0;
}();
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
static int UnwindImpl(void **result, int *sizes, int max_depth, int skip_count,
const void *ucp, int *min_dropped_frames) {
if (recursive || disable_stacktraces.load(std::memory_order_relaxed)) {
return 0;
}
++recursive;
static_cast<void>(ucp); // Unused.
constexpr int kStackLength = 64;
void *stack[kStackLength];
int size;
uintptr_t pc = emscripten_stack_snapshot();
size = emscripten_stack_unwind_buffer(pc, stack, kStackLength);
int result_count = size - skip_count;
if (result_count < 0) result_count = 0;
if (result_count > max_depth) result_count = max_depth;
for (int i = 0; i < result_count; i++) result[i] = stack[i + skip_count];
if (IS_STACK_FRAMES) {
// No implementation for finding out the stack frame sizes yet.
memset(sizes, 0, sizeof(*sizes) * result_count);
}
if (min_dropped_frames != nullptr) {
if (size - skip_count - max_depth > 0) {
*min_dropped_frames = size - skip_count - max_depth;
} else {
*min_dropped_frames = 0;
}
}
--recursive;
return result_count;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() { return true; }
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_EMSCRIPTEN_INL_H_

View file

@ -0,0 +1,108 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Portable implementation - just use glibc
//
// Note: The glibc implementation may cause a call to malloc.
// This can cause a deadlock in HeapProfiler.
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_GENERIC_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_GENERIC_INL_H_
#include <execinfo.h>
#include <atomic>
#include <cstring>
#include "absl/debugging/stacktrace.h"
#include "absl/base/attributes.h"
// Sometimes, we can try to get a stack trace from within a stack
// trace, because we don't block signals inside this code (which would be too
// expensive: the two extra system calls per stack trace do matter here).
// That can cause a self-deadlock.
// Protect against such reentrant call by failing to get a stack trace.
//
// We use __thread here because the code here is extremely low level -- it is
// called while collecting stack traces from within malloc and mmap, and thus
// can not call anything which might call malloc or mmap itself.
static __thread int recursive = 0;
// The stack trace function might be invoked very early in the program's
// execution (e.g. from the very first malloc if using tcmalloc). Also, the
// glibc implementation itself will trigger malloc the first time it is called.
// As such, we suppress usage of backtrace during this early stage of execution.
static std::atomic<bool> disable_stacktraces(true); // Disabled until healthy.
// Waiting until static initializers run seems to be late enough.
// This file is included into stacktrace.cc so this will only run once.
ABSL_ATTRIBUTE_UNUSED static int stacktraces_enabler = []() {
void* unused_stack[1];
// Force the first backtrace to happen early to get the one-time shared lib
// loading (allocation) out of the way. After the first call it is much safer
// to use backtrace from a signal handler if we crash somewhere later.
backtrace(unused_stack, 1);
disable_stacktraces.store(false, std::memory_order_relaxed);
return 0;
}();
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count,
const void *ucp, int *min_dropped_frames) {
if (recursive || disable_stacktraces.load(std::memory_order_relaxed)) {
return 0;
}
++recursive;
static_cast<void>(ucp); // Unused.
static const int kStackLength = 64;
void * stack[kStackLength];
int size;
size = backtrace(stack, kStackLength);
skip_count++; // we want to skip the current frame as well
int result_count = size - skip_count;
if (result_count < 0)
result_count = 0;
if (result_count > max_depth)
result_count = max_depth;
for (int i = 0; i < result_count; i++)
result[i] = stack[i + skip_count];
if (IS_STACK_FRAMES) {
// No implementation for finding out the stack frame sizes yet.
memset(sizes, 0, sizeof(*sizes) * static_cast<size_t>(result_count));
}
if (min_dropped_frames != nullptr) {
if (size - skip_count - max_depth > 0) {
*min_dropped_frames = size - skip_count - max_depth;
} else {
*min_dropped_frames = 0;
}
}
--recursive;
return result_count;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() {
return true;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_GENERIC_INL_H_

View file

@ -0,0 +1,258 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Produce stack trace. I'm guessing (hoping!) the code is much like
// for x86. For apple machines, at least, it seems to be; see
// https://developer.apple.com/documentation/mac/runtimehtml/RTArch-59.html
// https://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK
// Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_POWERPC_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_POWERPC_INL_H_
#if defined(__linux__)
#include <asm/ptrace.h> // for PT_NIP.
#include <ucontext.h> // for ucontext_t
#endif
#include <unistd.h>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/base/port.h"
#include "absl/debugging/stacktrace.h"
#include "absl/debugging/internal/address_is_readable.h"
#include "absl/debugging/internal/vdso_support.h" // a no-op on non-elf or non-glibc systems
// Given a stack pointer, return the saved link register value.
// Note that this is the link register for a callee.
static inline void *StacktracePowerPCGetLR(void **sp) {
// PowerPC has 3 main ABIs, which say where in the stack the
// Link Register is. For DARWIN and AIX (used by apple and
// linux ppc64), it's in sp[2]. For SYSV (used by linux ppc),
// it's in sp[1].
#if defined(_CALL_AIX) || defined(_CALL_DARWIN)
return *(sp+2);
#elif defined(_CALL_SYSV)
return *(sp+1);
#elif defined(__APPLE__) || defined(__FreeBSD__) || \
(defined(__linux__) && defined(__PPC64__))
// This check is in case the compiler doesn't define _CALL_AIX/etc.
return *(sp+2);
#elif defined(__linux)
// This check is in case the compiler doesn't define _CALL_SYSV.
return *(sp+1);
#else
#error Need to specify the PPC ABI for your architecture.
#endif
}
// Given a pointer to a stack frame, locate and return the calling
// stackframe, or return null if no stackframe can be found. Perform sanity
// checks (the strictness of which is controlled by the boolean parameter
// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
template<bool STRICT_UNWINDING, bool IS_WITH_CONTEXT>
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
static void **NextStackFrame(void **old_sp, const void *uc) {
void **new_sp = (void **) *old_sp;
enum { kStackAlignment = 16 };
// Check that the transition from frame pointer old_sp to frame
// pointer new_sp isn't clearly bogus
if (STRICT_UNWINDING) {
// With the stack growing downwards, older stack frame must be
// at a greater address that the current one.
if (new_sp <= old_sp) return nullptr;
// Assume stack frames larger than 100,000 bytes are bogus.
if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return nullptr;
} else {
// In the non-strict mode, allow discontiguous stack frames.
// (alternate-signal-stacks for example).
if (new_sp == old_sp) return nullptr;
// And allow frames upto about 1MB.
if ((new_sp > old_sp)
&& ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return nullptr;
}
if ((uintptr_t)new_sp % kStackAlignment != 0) return nullptr;
#if defined(__linux__)
enum StackTraceKernelSymbolStatus {
kNotInitialized = 0, kAddressValid, kAddressInvalid };
if (IS_WITH_CONTEXT && uc != nullptr) {
static StackTraceKernelSymbolStatus kernel_symbol_status =
kNotInitialized; // Sentinel: not computed yet.
// Initialize with sentinel value: __kernel_rt_sigtramp_rt64 can not
// possibly be there.
static const unsigned char *kernel_sigtramp_rt64_address = nullptr;
if (kernel_symbol_status == kNotInitialized) {
absl::debugging_internal::VDSOSupport vdso;
if (vdso.IsPresent()) {
absl::debugging_internal::VDSOSupport::SymbolInfo
sigtramp_rt64_symbol_info;
if (!vdso.LookupSymbol(
"__kernel_sigtramp_rt64", "LINUX_2.6.15",
absl::debugging_internal::VDSOSupport::kVDSOSymbolType,
&sigtramp_rt64_symbol_info) ||
sigtramp_rt64_symbol_info.address == nullptr) {
// Unexpected: VDSO is present, yet the expected symbol is missing
// or null.
assert(false && "VDSO is present, but doesn't have expected symbol");
kernel_symbol_status = kAddressInvalid;
} else {
kernel_sigtramp_rt64_address =
reinterpret_cast<const unsigned char *>(
sigtramp_rt64_symbol_info.address);
kernel_symbol_status = kAddressValid;
}
} else {
kernel_symbol_status = kAddressInvalid;
}
}
if (new_sp != nullptr &&
kernel_symbol_status == kAddressValid &&
StacktracePowerPCGetLR(new_sp) == kernel_sigtramp_rt64_address) {
const ucontext_t* signal_context =
reinterpret_cast<const ucontext_t*>(uc);
void **const sp_before_signal =
#if defined(__PPC64__)
reinterpret_cast<void **>(signal_context->uc_mcontext.gp_regs[PT_R1]);
#else
reinterpret_cast<void **>(
signal_context->uc_mcontext.uc_regs->gregs[PT_R1]);
#endif
// Check that alleged sp before signal is nonnull and is reasonably
// aligned.
if (sp_before_signal != nullptr &&
((uintptr_t)sp_before_signal % kStackAlignment) == 0) {
// Check that alleged stack pointer is actually readable. This is to
// prevent a "double fault" in case we hit the first fault due to e.g.
// a stack corruption.
if (absl::debugging_internal::AddressIsReadable(sp_before_signal)) {
// Alleged stack pointer is readable, use it for further unwinding.
new_sp = sp_before_signal;
}
}
}
}
#endif
return new_sp;
}
// This ensures that absl::GetStackTrace sets up the Link Register properly.
ABSL_ATTRIBUTE_NOINLINE static void AbslStacktracePowerPCDummyFunction() {
ABSL_BLOCK_TAIL_CALL_OPTIMIZATION();
}
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count,
const void *ucp, int *min_dropped_frames) {
void **sp;
// Apple macOS uses an old version of gnu as -- both Darwin 7.9.0 (Panther)
// and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a
// different asm syntax. I don't know quite the best way to discriminate
// systems using the old as from the new one; I've gone with __APPLE__.
#ifdef __APPLE__
__asm__ volatile ("mr %0,r1" : "=r" (sp));
#else
__asm__ volatile ("mr %0,1" : "=r" (sp));
#endif
// On PowerPC, the "Link Register" or "Link Record" (LR), is a stack
// entry that holds the return address of the subroutine call (what
// instruction we run after our function finishes). This is the
// same as the stack-pointer of our parent routine, which is what we
// want here. While the compiler will always(?) set up LR for
// subroutine calls, it may not for leaf functions (such as this one).
// This routine forces the compiler (at least gcc) to push it anyway.
AbslStacktracePowerPCDummyFunction();
// The LR save area is used by the callee, so the top entry is bogus.
skip_count++;
int n = 0;
// Unlike ABIs of X86 and ARM, PowerPC ABIs say that return address (in
// the link register) of a function call is stored in the caller's stack
// frame instead of the callee's. When we look for the return address
// associated with a stack frame, we need to make sure that there is a
// caller frame before it. So we call NextStackFrame before entering the
// loop below and check next_sp instead of sp for loop termination.
// The outermost frame is set up by runtimes and it does not have a
// caller frame, so it is skipped.
// The absl::GetStackFrames routine is called when we are in some
// informational context (the failure signal handler for example).
// Use the non-strict unwinding rules to produce a stack trace
// that is as complete as possible (even if it contains a few
// bogus entries in some rare cases).
void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp);
while (next_sp && n < max_depth) {
if (skip_count > 0) {
skip_count--;
} else {
result[n] = StacktracePowerPCGetLR(sp);
if (IS_STACK_FRAMES) {
if (next_sp > sp) {
sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
} else {
// A frame-size of 0 is used to indicate unknown frame size.
sizes[n] = 0;
}
}
n++;
}
sp = next_sp;
next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp);
}
if (min_dropped_frames != nullptr) {
// Implementation detail: we clamp the max of frames we are willing to
// count, so as not to spend too much time in the loop below.
const int kMaxUnwind = 1000;
int num_dropped_frames = 0;
for (int j = 0; next_sp != nullptr && j < kMaxUnwind; j++) {
if (skip_count > 0) {
skip_count--;
} else {
num_dropped_frames++;
}
next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(next_sp, ucp);
}
*min_dropped_frames = num_dropped_frames;
}
return n;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() {
return true;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_POWERPC_INL_H_

View file

@ -0,0 +1,194 @@
// Copyright 2021 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_RISCV_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_RISCV_INL_H_
// Generate stack trace for riscv
#include <sys/ucontext.h>
#include "absl/base/config.h"
#if defined(__linux__)
#include <sys/mman.h>
#include <ucontext.h>
#include <unistd.h>
#endif
#include <atomic>
#include <cassert>
#include <cstdint>
#include <iostream>
#include <limits>
#include <utility>
#include "absl/base/attributes.h"
#include "absl/debugging/stacktrace.h"
static constexpr ptrdiff_t kUnknownFrameSize = 0;
// Compute the size of a stack frame in [low..high). We assume that low < high.
// Return size of kUnknownFrameSize.
template <typename T>
static inline ptrdiff_t ComputeStackFrameSize(const T *low, const T *high) {
const char *low_char_ptr = reinterpret_cast<const char *>(low);
const char *high_char_ptr = reinterpret_cast<const char *>(high);
return low < high ? static_cast<ptrdiff_t>(high_char_ptr - low_char_ptr)
: kUnknownFrameSize;
}
// Given a pointer to a stack frame, locate and return the calling stackframe,
// or return null if no stackframe can be found. Perform sanity checks (the
// strictness of which is controlled by the boolean parameter
// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
template <bool STRICT_UNWINDING, bool WITH_CONTEXT>
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
static void ** NextStackFrame(void **old_frame_pointer, const void *uc,
const std::pair<size_t, size_t> range) {
// .
// .
// .
// +-> +----------------+
// | | return address |
// | | previous fp |
// | | ... |
// | +----------------+ <-+
// | | return address | |
// +---|- previous fp | |
// | ... | |
// $fp ->|----------------+ |
// | return address | |
// | previous fp -|---+
// $sp ->| ... |
// +----------------+
void **new_frame_pointer = reinterpret_cast<void **>(old_frame_pointer[-2]);
uintptr_t frame_pointer = reinterpret_cast<uintptr_t>(new_frame_pointer);
// The RISCV ELF psABI mandates that the stack pointer is always 16-byte
// aligned.
// TODO(#1236) this doesn't hold for ILP32E which only mandates a 4-byte
// alignment.
if (frame_pointer & 15)
return nullptr;
// If the new frame pointer matches the signal context, avoid terminating
// early to deal with alternate signal stacks.
if (WITH_CONTEXT)
if (const ucontext_t *ucv = static_cast<const ucontext_t *>(uc))
// RISCV ELF psABI has the frame pointer at x8/fp/s0.
// -- RISCV psABI Table 18.2
if (ucv->uc_mcontext.__gregs[8] == frame_pointer)
return new_frame_pointer;
// Check frame size. In strict mode, we assume frames to be under 100,000
// bytes. In non-strict mode, we relax the limit to 1MB.
const ptrdiff_t max_size = STRICT_UNWINDING ? 100000 : 1000000;
const ptrdiff_t frame_size =
ComputeStackFrameSize(old_frame_pointer, new_frame_pointer);
if (frame_size == kUnknownFrameSize) {
if (STRICT_UNWINDING)
return nullptr;
// In non-strict mode permit non-contiguous stacks (e.g. alternate signal
// frame handling).
if (reinterpret_cast<uintptr_t>(new_frame_pointer) < range.first ||
reinterpret_cast<uintptr_t>(new_frame_pointer) > range.second)
return nullptr;
}
if (frame_size > max_size)
return nullptr;
return new_frame_pointer;
}
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
static int UnwindImpl(void **result, int *sizes, int max_depth, int skip_count,
const void *ucp, int *min_dropped_frames) {
// The `frame_pointer` that is computed here points to the top of the frame.
// The two words preceding the address are the return address and the previous
// frame pointer.
#if defined(__GNUC__)
void **frame_pointer = reinterpret_cast<void **>(__builtin_frame_address(0));
#else
#error reading stack pointer not yet supported on this platform
#endif
std::pair<size_t, size_t> stack = {
// assume that the first page is not the stack.
static_cast<size_t>(sysconf(_SC_PAGESIZE)),
std::numeric_limits<size_t>::max() - sizeof(void *)
};
int n = 0;
void *return_address = nullptr;
while (frame_pointer && n < max_depth) {
return_address = frame_pointer[-1];
// The absl::GetStackFrames routine is called when we are in some
// informational context (the failure signal handler for example). Use the
// non-strict unwinding rules to produce a stack trace that is as complete
// as possible (even if it contains a few bogus entries in some rare cases).
void **next_frame_pointer =
NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(frame_pointer, ucp,
stack);
if (skip_count > 0) {
skip_count--;
} else {
result[n] = return_address;
if (IS_STACK_FRAMES) {
// NextStackFrame() has already checked that frame size fits to int
sizes[n] = static_cast<int>(ComputeStackFrameSize(frame_pointer,
next_frame_pointer));
}
n++;
}
frame_pointer = next_frame_pointer;
}
if (min_dropped_frames != nullptr) {
// Implementation detail: we clamp the max of frames we are willing to
// count, so as not to spend too much time in the loop below.
const int kMaxUnwind = 200;
int num_dropped_frames = 0;
for (int j = 0; frame_pointer != nullptr && j < kMaxUnwind; j++) {
if (skip_count > 0) {
skip_count--;
} else {
num_dropped_frames++;
}
frame_pointer =
NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(frame_pointer, ucp,
stack);
}
*min_dropped_frames = num_dropped_frames;
}
return n;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() { return true; }
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif

View file

@ -0,0 +1,24 @@
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_UNIMPLEMENTED_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_UNIMPLEMENTED_INL_H_
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
static int UnwindImpl(void** /* result */, int* /* sizes */,
int /* max_depth */, int /* skip_count */,
const void* /* ucp */, int *min_dropped_frames) {
if (min_dropped_frames != nullptr) {
*min_dropped_frames = 0;
}
return 0;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() {
return false;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_UNIMPLEMENTED_INL_H_

View file

@ -0,0 +1,94 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Produces a stack trace for Windows. Normally, one could use
// stacktrace_x86-inl.h or stacktrace_x86_64-inl.h -- and indeed, that
// should work for binaries compiled using MSVC in "debug" mode.
// However, in "release" mode, Windows uses frame-pointer
// optimization, which makes getting a stack trace very difficult.
//
// There are several approaches one can take. One is to use Windows
// intrinsics like StackWalk64. These can work, but have restrictions
// on how successful they can be. Another attempt is to write a
// version of stacktrace_x86-inl.h that has heuristic support for
// dealing with FPO, similar to what WinDbg does (see
// http://www.nynaeve.net/?p=97). There are (non-working) examples of
// these approaches, complete with TODOs, in stacktrace_win32-inl.h#1
//
// The solution we've ended up doing is to call the undocumented
// windows function RtlCaptureStackBackTrace, which probably doesn't
// work with FPO but at least is fast, and doesn't require a symbol
// server.
//
// This code is inspired by a patch from David Vitek:
// https://code.google.com/p/google-perftools/issues/detail?id=83
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_WIN32_INL_H_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_WIN32_INL_H_
#include <windows.h> // for GetProcAddress and GetModuleHandle
#include <cassert>
typedef USHORT NTAPI RtlCaptureStackBackTrace_Function(
IN ULONG frames_to_skip,
IN ULONG frames_to_capture,
OUT PVOID *backtrace,
OUT PULONG backtrace_hash);
// It is not possible to load RtlCaptureStackBackTrace at static init time in
// UWP. CaptureStackBackTrace is the public version of RtlCaptureStackBackTrace
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn =
&::CaptureStackBackTrace;
#else
// Load the function we need at static init time, where we don't have
// to worry about someone else holding the loader's lock.
static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn =
(RtlCaptureStackBackTrace_Function*)GetProcAddress(
GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace");
#endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count,
const void*, int* min_dropped_frames) {
USHORT n = 0;
if (!RtlCaptureStackBackTrace_fn || skip_count < 0 || max_depth < 0) {
// can't get a stacktrace with no function/invalid args
} else {
n = RtlCaptureStackBackTrace_fn(static_cast<ULONG>(skip_count) + 2,
static_cast<ULONG>(max_depth), result, 0);
}
if (IS_STACK_FRAMES) {
// No implementation for finding out the stack frame sizes yet.
memset(sizes, 0, sizeof(*sizes) * n);
}
if (min_dropped_frames != nullptr) {
// Not implemented.
*min_dropped_frames = 0;
}
return n;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() {
return false;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_WIN32_INL_H_

View file

@ -0,0 +1,394 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Produce stack trace
#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_X86_INL_INC_
#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_X86_INL_INC_
#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
#include <ucontext.h> // for ucontext_t
#endif
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <cassert>
#include <cstdint>
#include <limits>
#include "absl/base/attributes.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/debugging/internal/address_is_readable.h"
#include "absl/debugging/internal/vdso_support.h" // a no-op on non-elf or non-glibc systems
#include "absl/debugging/stacktrace.h"
using absl::debugging_internal::AddressIsReadable;
#if defined(__linux__) && defined(__i386__)
// Count "push %reg" instructions in VDSO __kernel_vsyscall(),
// preceding "syscall" or "sysenter".
// If __kernel_vsyscall uses frame pointer, answer 0.
//
// kMaxBytes tells how many instruction bytes of __kernel_vsyscall
// to analyze before giving up. Up to kMaxBytes+1 bytes of
// instructions could be accessed.
//
// Here are known __kernel_vsyscall instruction sequences:
//
// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S).
// Used on Intel.
// 0xffffe400 <__kernel_vsyscall+0>: push %ecx
// 0xffffe401 <__kernel_vsyscall+1>: push %edx
// 0xffffe402 <__kernel_vsyscall+2>: push %ebp
// 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp
// 0xffffe405 <__kernel_vsyscall+5>: sysenter
//
// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S).
// Used on AMD.
// 0xffffe400 <__kernel_vsyscall+0>: push %ebp
// 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp
// 0xffffe403 <__kernel_vsyscall+3>: syscall
//
// The sequence below isn't actually expected in Google fleet,
// here only for completeness. Remove this comment from OSS release.
// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S)
// 0xffffe400 <__kernel_vsyscall+0>: int $0x80
// 0xffffe401 <__kernel_vsyscall+1>: ret
//
static const int kMaxBytes = 10;
// We use assert()s instead of DCHECK()s -- this is too low level
// for DCHECK().
static int CountPushInstructions(const unsigned char *const addr) {
int result = 0;
for (int i = 0; i < kMaxBytes; ++i) {
if (addr[i] == 0x89) {
// "mov reg,reg"
if (addr[i + 1] == 0xE5) {
// Found "mov %esp,%ebp".
return 0;
}
++i; // Skip register encoding byte.
} else if (addr[i] == 0x0F &&
(addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) {
// Found "sysenter" or "syscall".
return result;
} else if ((addr[i] & 0xF0) == 0x50) {
// Found "push %reg".
++result;
} else if (addr[i] == 0xCD && addr[i + 1] == 0x80) {
// Found "int $0x80"
assert(result == 0);
return 0;
} else {
// Unexpected instruction.
assert(false && "unexpected instruction in __kernel_vsyscall");
return 0;
}
}
// Unexpected: didn't find SYSENTER or SYSCALL in
// [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval.
assert(false && "did not find SYSENTER or SYSCALL in __kernel_vsyscall");
return 0;
}
#endif
// Assume stack frames larger than 100,000 bytes are bogus.
static const int kMaxFrameBytes = 100000;
// Stack end to use when we don't know the actual stack end
// (effectively just the end of address space).
constexpr uintptr_t kUnknownStackEnd =
std::numeric_limits<size_t>::max() - sizeof(void *);
// Returns the stack frame pointer from signal context, 0 if unknown.
// vuc is a ucontext_t *. We use void* to avoid the use
// of ucontext_t on non-POSIX systems.
static uintptr_t GetFP(const void *vuc) {
#if !defined(__linux__)
static_cast<void>(vuc); // Avoid an unused argument compiler warning.
#else
if (vuc != nullptr) {
auto *uc = reinterpret_cast<const ucontext_t *>(vuc);
#if defined(__i386__)
const auto bp = uc->uc_mcontext.gregs[REG_EBP];
const auto sp = uc->uc_mcontext.gregs[REG_ESP];
#elif defined(__x86_64__)
const auto bp = uc->uc_mcontext.gregs[REG_RBP];
const auto sp = uc->uc_mcontext.gregs[REG_RSP];
#else
const uintptr_t bp = 0;
const uintptr_t sp = 0;
#endif
// Sanity-check that the base pointer is valid. It's possible that some
// code in the process is compiled with --copt=-fomit-frame-pointer or
// --copt=-momit-leaf-frame-pointer.
//
// TODO(bcmills): -momit-leaf-frame-pointer is currently the default
// behavior when building with clang. Talk to the C++ toolchain team about
// fixing that.
if (bp >= sp && bp - sp <= kMaxFrameBytes)
return static_cast<uintptr_t>(bp);
// If bp isn't a plausible frame pointer, return the stack pointer instead.
// If we're lucky, it points to the start of a stack frame; otherwise, we'll
// get one frame of garbage in the stack trace and fail the sanity check on
// the next iteration.
return static_cast<uintptr_t>(sp);
}
#endif
return 0;
}
// Given a pointer to a stack frame, locate and return the calling
// stackframe, or return null if no stackframe can be found. Perform sanity
// checks (the strictness of which is controlled by the boolean parameter
// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
template <bool STRICT_UNWINDING, bool WITH_CONTEXT>
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
static void **NextStackFrame(void **old_fp, const void *uc,
size_t stack_low, size_t stack_high) {
void **new_fp = (void **)*old_fp;
#if defined(__linux__) && defined(__i386__)
if (WITH_CONTEXT && uc != nullptr) {
// How many "push %reg" instructions are there at __kernel_vsyscall?
// This is constant for a given kernel and processor, so compute
// it only once.
static int num_push_instructions = -1; // Sentinel: not computed yet.
// Initialize with sentinel value: __kernel_rt_sigreturn can not possibly
// be there.
static const unsigned char *kernel_rt_sigreturn_address = nullptr;
static const unsigned char *kernel_vsyscall_address = nullptr;
if (num_push_instructions == -1) {
#ifdef ABSL_HAVE_VDSO_SUPPORT
absl::debugging_internal::VDSOSupport vdso;
if (vdso.IsPresent()) {
absl::debugging_internal::VDSOSupport::SymbolInfo
rt_sigreturn_symbol_info;
absl::debugging_internal::VDSOSupport::SymbolInfo vsyscall_symbol_info;
if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5", STT_FUNC,
&rt_sigreturn_symbol_info) ||
!vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5", STT_FUNC,
&vsyscall_symbol_info) ||
rt_sigreturn_symbol_info.address == nullptr ||
vsyscall_symbol_info.address == nullptr) {
// Unexpected: 32-bit VDSO is present, yet one of the expected
// symbols is missing or null.
assert(false && "VDSO is present, but doesn't have expected symbols");
num_push_instructions = 0;
} else {
kernel_rt_sigreturn_address =
reinterpret_cast<const unsigned char *>(
rt_sigreturn_symbol_info.address);
kernel_vsyscall_address =
reinterpret_cast<const unsigned char *>(
vsyscall_symbol_info.address);
num_push_instructions =
CountPushInstructions(kernel_vsyscall_address);
}
} else {
num_push_instructions = 0;
}
#else // ABSL_HAVE_VDSO_SUPPORT
num_push_instructions = 0;
#endif // ABSL_HAVE_VDSO_SUPPORT
}
if (num_push_instructions != 0 && kernel_rt_sigreturn_address != nullptr &&
old_fp[1] == kernel_rt_sigreturn_address) {
const ucontext_t *ucv = static_cast<const ucontext_t *>(uc);
// This kernel does not use frame pointer in its VDSO code,
// and so %ebp is not suitable for unwinding.
void **const reg_ebp =
reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]);
const unsigned char *const reg_eip =
reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]);
if (new_fp == reg_ebp && kernel_vsyscall_address <= reg_eip &&
reg_eip - kernel_vsyscall_address < kMaxBytes) {
// We "stepped up" to __kernel_vsyscall, but %ebp is not usable.
// Restore from 'ucv' instead.
void **const reg_esp =
reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]);
// Check that alleged %esp is not null and is reasonably aligned.
if (reg_esp &&
((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) {
// Check that alleged %esp is actually readable. This is to prevent
// "double fault" in case we hit the first fault due to e.g. stack
// corruption.
void *const reg_esp2 = reg_esp[num_push_instructions - 1];
if (AddressIsReadable(reg_esp2)) {
// Alleged %esp is readable, use it for further unwinding.
new_fp = reinterpret_cast<void **>(reg_esp2);
}
}
}
}
}
#endif
const uintptr_t old_fp_u = reinterpret_cast<uintptr_t>(old_fp);
const uintptr_t new_fp_u = reinterpret_cast<uintptr_t>(new_fp);
// Check that the transition from frame pointer old_fp to frame
// pointer new_fp isn't clearly bogus. Skip the checks if new_fp
// matches the signal context, so that we don't skip out early when
// using an alternate signal stack.
//
// TODO(bcmills): The GetFP call should be completely unnecessary when
// ENABLE_COMBINED_UNWINDER is set (because we should be back in the thread's
// stack by this point), but it is empirically still needed (e.g. when the
// stack includes a call to abort). unw_get_reg returns UNW_EBADREG for some
// frames. Figure out why GetValidFrameAddr and/or libunwind isn't doing what
// it's supposed to.
if (STRICT_UNWINDING &&
(!WITH_CONTEXT || uc == nullptr || new_fp_u != GetFP(uc))) {
// With the stack growing downwards, older stack frame must be
// at a greater address that the current one.
if (new_fp_u <= old_fp_u) return nullptr;
// If we get a very large frame size, it may be an indication that we
// guessed frame pointers incorrectly and now risk a paging fault
// dereferencing a wrong frame pointer. Or maybe not because large frames
// are possible as well. The main stack is assumed to be readable,
// so we assume the large frame is legit if we know the real stack bounds
// and are within the stack.
if (new_fp_u - old_fp_u > kMaxFrameBytes) {
if (stack_high < kUnknownStackEnd &&
static_cast<size_t>(getpagesize()) < stack_low) {
// Stack bounds are known.
if (!(stack_low < new_fp_u && new_fp_u <= stack_high)) {
// new_fp_u is not within the known stack.
return nullptr;
}
} else {
// Stack bounds are unknown, prefer truncated stack to possible crash.
return nullptr;
}
}
if (stack_low < old_fp_u && old_fp_u <= stack_high) {
// Old BP was in the expected stack region...
if (!(stack_low < new_fp_u && new_fp_u <= stack_high)) {
// ... but new BP is outside of expected stack region.
// It is most likely bogus.
return nullptr;
}
} else {
// We may be here if we are executing in a co-routine with a
// separate stack. We can't do safety checks in this case.
}
} else {
if (new_fp == nullptr) return nullptr; // skip AddressIsReadable() below
// In the non-strict mode, allow discontiguous stack frames.
// (alternate-signal-stacks for example).
if (new_fp == old_fp) return nullptr;
}
if (new_fp_u & (sizeof(void *) - 1)) return nullptr;
#ifdef __i386__
// On 32-bit machines, the stack pointer can be very close to
// 0xffffffff, so we explicitly check for a pointer into the
// last two pages in the address space
if (new_fp_u >= 0xffffe000) return nullptr;
#endif
#if !defined(_WIN32)
if (!STRICT_UNWINDING) {
// Lax sanity checks cause a crash in 32-bit tcmalloc/crash_reason_test
// on AMD-based machines with VDSO-enabled kernels.
// Make an extra sanity check to insure new_fp is readable.
// Note: NextStackFrame<false>() is only called while the program
// is already on its last leg, so it's ok to be slow here.
if (!AddressIsReadable(new_fp)) {
return nullptr;
}
}
#endif
return new_fp;
}
template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT>
ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack.
ABSL_ATTRIBUTE_NOINLINE
static int UnwindImpl(void **result, int *sizes, int max_depth, int skip_count,
const void *ucp, int *min_dropped_frames) {
int n = 0;
void **fp = reinterpret_cast<void **>(__builtin_frame_address(0));
// Assume that the first page is not stack.
size_t stack_low = static_cast<size_t>(getpagesize());
size_t stack_high = kUnknownStackEnd;
while (fp && n < max_depth) {
if (*(fp + 1) == reinterpret_cast<void *>(0)) {
// In 64-bit code, we often see a frame that
// points to itself and has a return address of 0.
break;
}
void **next_fp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(
fp, ucp, stack_low, stack_high);
if (skip_count > 0) {
skip_count--;
} else {
result[n] = *(fp + 1);
if (IS_STACK_FRAMES) {
if (next_fp > fp) {
sizes[n] = static_cast<int>(
reinterpret_cast<uintptr_t>(next_fp) -
reinterpret_cast<uintptr_t>(fp));
} else {
// A frame-size of 0 is used to indicate unknown frame size.
sizes[n] = 0;
}
}
n++;
}
fp = next_fp;
}
if (min_dropped_frames != nullptr) {
// Implementation detail: we clamp the max of frames we are willing to
// count, so as not to spend too much time in the loop below.
const int kMaxUnwind = 1000;
int num_dropped_frames = 0;
for (int j = 0; fp != nullptr && j < kMaxUnwind; j++) {
if (skip_count > 0) {
skip_count--;
} else {
num_dropped_frames++;
}
fp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(fp, ucp, stack_low,
stack_high);
}
*min_dropped_frames = num_dropped_frames;
}
return n;
}
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
bool StackTraceWorksForTest() {
return true;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_X86_INL_INC_

View file

@ -0,0 +1,153 @@
// Copyright 2018 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This file contains internal parts of the Abseil symbolizer.
// Do not depend on the anything in this file, it may change at anytime.
#ifndef ABSL_DEBUGGING_INTERNAL_SYMBOLIZE_H_
#define ABSL_DEBUGGING_INTERNAL_SYMBOLIZE_H_
#ifdef __cplusplus
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
#include "absl/strings/string_view.h"
#ifdef ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE
#error ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE cannot be directly set
#elif defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__) \
&& !defined(__asmjs__) && !defined(__wasm__)
#define ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE 1
#include <elf.h>
#include <link.h> // For ElfW() macro.
#include <functional>
#include <string>
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// Iterates over all sections, invoking callback on each with the section name
// and the section header.
//
// Returns true on success; otherwise returns false in case of errors.
//
// This is not async-signal-safe.
bool ForEachSection(int fd,
const std::function<bool(absl::string_view name,
const ElfW(Shdr) &)>& callback);
// Gets the section header for the given name, if it exists. Returns true on
// success. Otherwise, returns false.
bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
ElfW(Shdr) *out);
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE
#ifdef ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE
#error ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE cannot be directly set
#elif defined(__APPLE__)
#define ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE 1
#endif
#ifdef ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE
#error ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE cannot be directly set
#elif defined(__EMSCRIPTEN__)
#define ABSL_INTERNAL_HAVE_EMSCRIPTEN_SYMBOLIZE 1
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
struct SymbolDecoratorArgs {
// The program counter we are getting symbolic name for.
const void *pc;
// 0 for main executable, load address for shared libraries.
ptrdiff_t relocation;
// Read-only file descriptor for ELF image covering "pc",
// or -1 if no such ELF image exists in /proc/self/maps.
int fd;
// Output buffer, size.
// Note: the buffer may not be empty -- default symbolizer may have already
// produced some output, and earlier decorators may have adorned it in
// some way. You are free to replace or augment the contents (within the
// symbol_buf_size limit).
char *const symbol_buf;
size_t symbol_buf_size;
// Temporary scratch space, size.
// Use that space in preference to allocating your own stack buffer to
// conserve stack.
char *const tmp_buf;
size_t tmp_buf_size;
// User-provided argument
void* arg;
};
using SymbolDecorator = void (*)(const SymbolDecoratorArgs *);
// Installs a function-pointer as a decorator. Returns a value less than zero
// if the system cannot install the decorator. Otherwise, returns a unique
// identifier corresponding to the decorator. This identifier can be used to
// uninstall the decorator - See RemoveSymbolDecorator() below.
int InstallSymbolDecorator(SymbolDecorator decorator, void* arg);
// Removes a previously installed function-pointer decorator. Parameter "ticket"
// is the return-value from calling InstallSymbolDecorator().
bool RemoveSymbolDecorator(int ticket);
// Remove all installed decorators. Returns true if successful, false if
// symbolization is currently in progress.
bool RemoveAllSymbolDecorators();
// Registers an address range to a file mapping.
//
// Preconditions:
// start <= end
// filename != nullptr
//
// Returns true if the file was successfully registered.
bool RegisterFileMappingHint(const void* start, const void* end,
uint64_t offset, const char* filename);
// Looks up the file mapping registered by RegisterFileMappingHint for an
// address range. If there is one, the file name is stored in *filename and
// *start and *end are modified to reflect the registered mapping. Returns
// whether any hint was found.
bool GetFileMappingHint(const void** start, const void** end, uint64_t* offset,
const char** filename);
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // __cplusplus
#include <stdbool.h>
#ifdef __cplusplus
extern "C"
#endif // __cplusplus
bool
AbslInternalGetFileMappingHint(const void** start, const void** end,
uint64_t* offset, const char** filename);
#endif // ABSL_DEBUGGING_INTERNAL_SYMBOLIZE_H_

View file

@ -0,0 +1,70 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/utf8_for_code_point.h"
#include <cstdint>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
// UTF-8 encoding bounds.
constexpr uint32_t kMinSurrogate = 0xd800, kMaxSurrogate = 0xdfff;
constexpr uint32_t kMax1ByteCodePoint = 0x7f;
constexpr uint32_t kMax2ByteCodePoint = 0x7ff;
constexpr uint32_t kMax3ByteCodePoint = 0xffff;
constexpr uint32_t kMaxCodePoint = 0x10ffff;
} // namespace
Utf8ForCodePoint::Utf8ForCodePoint(uint64_t code_point) {
if (code_point <= kMax1ByteCodePoint) {
length = 1;
bytes[0] = static_cast<char>(code_point);
return;
}
if (code_point <= kMax2ByteCodePoint) {
length = 2;
bytes[0] = static_cast<char>(0xc0 | (code_point >> 6));
bytes[1] = static_cast<char>(0x80 | (code_point & 0x3f));
return;
}
if (kMinSurrogate <= code_point && code_point <= kMaxSurrogate) return;
if (code_point <= kMax3ByteCodePoint) {
length = 3;
bytes[0] = static_cast<char>(0xe0 | (code_point >> 12));
bytes[1] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
bytes[2] = static_cast<char>(0x80 | (code_point & 0x3f));
return;
}
if (code_point > kMaxCodePoint) return;
length = 4;
bytes[0] = static_cast<char>(0xf0 | (code_point >> 18));
bytes[1] = static_cast<char>(0x80 | ((code_point >> 12) & 0x3f));
bytes[2] = static_cast<char>(0x80 | ((code_point >> 6) & 0x3f));
bytes[3] = static_cast<char>(0x80 | (code_point & 0x3f));
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,47 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_
#define ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_
#include <cstdint>
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
struct Utf8ForCodePoint {
// Converts a Unicode code point to the corresponding UTF-8 byte sequence.
// Async-signal-safe to support use in symbolizing stack traces from a signal
// handler.
explicit Utf8ForCodePoint(uint64_t code_point);
// Returns true if the constructor's code_point argument was valid.
bool ok() const { return length != 0; }
// If code_point was in range, then 1 <= length <= 4, and the UTF-8 encoding
// is found in bytes[0 .. (length - 1)]. If code_point was invalid, then
// length == 0. In either case, the contents of bytes[length .. 3] are
// unspecified.
char bytes[4] = {};
uint32_t length = 0;
};
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_DEBUGGING_INTERNAL_UTF8_FOR_CODE_POINT_H_

View file

@ -0,0 +1,175 @@
// Copyright 2024 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/debugging/internal/utf8_for_code_point.h"
#include <cstdint>
#include "gtest/gtest.h"
#include "absl/base/config.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
TEST(Utf8ForCodePointTest, RecognizesTheSmallestCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0});
ASSERT_EQ(utf8.length, 1);
EXPECT_EQ(utf8.bytes[0], '\0');
}
TEST(Utf8ForCodePointTest, RecognizesAsciiSmallA) {
Utf8ForCodePoint utf8(uint64_t{'a'});
ASSERT_EQ(utf8.length, 1);
EXPECT_EQ(utf8.bytes[0], 'a');
}
TEST(Utf8ForCodePointTest, RecognizesTheLargestOneByteCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0x7f});
ASSERT_EQ(utf8.length, 1);
EXPECT_EQ(utf8.bytes[0], '\x7f');
}
TEST(Utf8ForCodePointTest, RecognizesTheSmallestTwoByteCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0x80});
ASSERT_EQ(utf8.length, 2);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xc2));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80));
}
TEST(Utf8ForCodePointTest, RecognizesSmallNWithTilde) {
Utf8ForCodePoint utf8(uint64_t{0xf1});
ASSERT_EQ(utf8.length, 2);
const char* want = "ñ";
EXPECT_EQ(utf8.bytes[0], want[0]);
EXPECT_EQ(utf8.bytes[1], want[1]);
}
TEST(Utf8ForCodePointTest, RecognizesCapitalPi) {
Utf8ForCodePoint utf8(uint64_t{0x3a0});
ASSERT_EQ(utf8.length, 2);
const char* want = "Π";
EXPECT_EQ(utf8.bytes[0], want[0]);
EXPECT_EQ(utf8.bytes[1], want[1]);
}
TEST(Utf8ForCodePointTest, RecognizesTheLargestTwoByteCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0x7ff});
ASSERT_EQ(utf8.length, 2);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xdf));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf));
}
TEST(Utf8ForCodePointTest, RecognizesTheSmallestThreeByteCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0x800});
ASSERT_EQ(utf8.length, 3);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xe0));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xa0));
EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
}
TEST(Utf8ForCodePointTest, RecognizesTheChineseCharacterZhong1AsInZhong1Wen2) {
Utf8ForCodePoint utf8(uint64_t{0x4e2d});
ASSERT_EQ(utf8.length, 3);
const char* want = "";
EXPECT_EQ(utf8.bytes[0], want[0]);
EXPECT_EQ(utf8.bytes[1], want[1]);
EXPECT_EQ(utf8.bytes[2], want[2]);
}
TEST(Utf8ForCodePointTest, RecognizesOneBeforeTheSmallestSurrogate) {
Utf8ForCodePoint utf8(uint64_t{0xd7ff});
ASSERT_EQ(utf8.length, 3);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xed));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x9f));
EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
}
TEST(Utf8ForCodePointTest, RejectsTheSmallestSurrogate) {
Utf8ForCodePoint utf8(uint64_t{0xd800});
EXPECT_EQ(utf8.length, 0);
}
TEST(Utf8ForCodePointTest, RejectsTheLargestSurrogate) {
Utf8ForCodePoint utf8(uint64_t{0xdfff});
EXPECT_EQ(utf8.length, 0);
}
TEST(Utf8ForCodePointTest, RecognizesOnePastTheLargestSurrogate) {
Utf8ForCodePoint utf8(uint64_t{0xe000});
ASSERT_EQ(utf8.length, 3);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xee));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x80));
EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
}
TEST(Utf8ForCodePointTest, RecognizesTheLargestThreeByteCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0xffff});
ASSERT_EQ(utf8.length, 3);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xef));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0xbf));
EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
}
TEST(Utf8ForCodePointTest, RecognizesTheSmallestFourByteCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0x10000});
ASSERT_EQ(utf8.length, 4);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf0));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x90));
EXPECT_EQ(utf8.bytes[2], static_cast<char>(0x80));
EXPECT_EQ(utf8.bytes[3], static_cast<char>(0x80));
}
TEST(Utf8ForCodePointTest, RecognizesTheJackOfHearts) {
Utf8ForCodePoint utf8(uint64_t{0x1f0bb});
ASSERT_EQ(utf8.length, 4);
const char* want = "🂻";
EXPECT_EQ(utf8.bytes[0], want[0]);
EXPECT_EQ(utf8.bytes[1], want[1]);
EXPECT_EQ(utf8.bytes[2], want[2]);
EXPECT_EQ(utf8.bytes[3], want[3]);
}
TEST(Utf8ForCodePointTest, RecognizesTheLargestFourByteCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0x10ffff});
ASSERT_EQ(utf8.length, 4);
EXPECT_EQ(utf8.bytes[0], static_cast<char>(0xf4));
EXPECT_EQ(utf8.bytes[1], static_cast<char>(0x8f));
EXPECT_EQ(utf8.bytes[2], static_cast<char>(0xbf));
EXPECT_EQ(utf8.bytes[3], static_cast<char>(0xbf));
}
TEST(Utf8ForCodePointTest, RejectsTheSmallestOverlargeCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0x110000});
EXPECT_EQ(utf8.length, 0);
}
TEST(Utf8ForCodePointTest, RejectsAThroughlyOverlargeCodePoint) {
Utf8ForCodePoint utf8(uint64_t{0xffffffff00000000});
EXPECT_EQ(utf8.length, 0);
}
TEST(Utf8ForCodePointTest, OkReturnsTrueForAValidCodePoint) {
EXPECT_TRUE(Utf8ForCodePoint(uint64_t{0}).ok());
}
TEST(Utf8ForCodePointTest, OkReturnsFalseForAnInvalidCodePoint) {
EXPECT_FALSE(Utf8ForCodePoint(uint64_t{0xffffffff00000000}).ok());
}
} // namespace
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl

View file

@ -0,0 +1,205 @@
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Allow dynamic symbol lookup in the kernel VDSO page.
//
// VDSOSupport -- a class representing kernel VDSO (if present).
#include "absl/debugging/internal/vdso_support.h"
#ifdef ABSL_HAVE_VDSO_SUPPORT // defined in vdso_support.h
#if !defined(__has_include)
#define __has_include(header) 0
#endif
#include <errno.h>
#include <fcntl.h>
#if __has_include(<syscall.h>)
#include <syscall.h>
#elif __has_include(<sys/syscall.h>)
#include <sys/syscall.h>
#endif
#include <unistd.h>
#if !defined(__UCLIBC__) && defined(__GLIBC__) && \
(__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16))
#define ABSL_HAVE_GETAUXVAL
#endif
#ifdef ABSL_HAVE_GETAUXVAL
#include <sys/auxv.h>
#endif
#include "absl/base/dynamic_annotations.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/port.h"
#ifndef AT_SYSINFO_EHDR
#define AT_SYSINFO_EHDR 33 // for crosstoolv10
#endif
#if defined(__NetBSD__)
using Elf32_auxv_t = Aux32Info;
using Elf64_auxv_t = Aux64Info;
#endif
#if defined(__FreeBSD__)
#if defined(__ELF_WORD_SIZE) && __ELF_WORD_SIZE == 64
using Elf64_auxv_t = Elf64_Auxinfo;
#endif
using Elf32_auxv_t = Elf32_Auxinfo;
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
ABSL_CONST_INIT
std::atomic<const void *> VDSOSupport::vdso_base_(
debugging_internal::ElfMemImage::kInvalidBase);
ABSL_CONST_INIT std::atomic<VDSOSupport::GetCpuFn> VDSOSupport::getcpu_fn_(
&InitAndGetCPU);
VDSOSupport::VDSOSupport()
// If vdso_base_ is still set to kInvalidBase, we got here
// before VDSOSupport::Init has been called. Call it now.
: image_(vdso_base_.load(std::memory_order_relaxed) ==
debugging_internal::ElfMemImage::kInvalidBase
? Init()
: vdso_base_.load(std::memory_order_relaxed)) {}
// NOTE: we can't use GoogleOnceInit() below, because we can be
// called by tcmalloc, and none of the *once* stuff may be functional yet.
//
// In addition, we hope that the VDSOSupportHelper constructor
// causes this code to run before there are any threads, and before
// InitGoogle() has executed any chroot or setuid calls.
//
// Finally, even if there is a race here, it is harmless, because
// the operation should be idempotent.
const void *VDSOSupport::Init() {
const auto kInvalidBase = debugging_internal::ElfMemImage::kInvalidBase;
#ifdef ABSL_HAVE_GETAUXVAL
if (vdso_base_.load(std::memory_order_relaxed) == kInvalidBase) {
errno = 0;
const void *const sysinfo_ehdr =
reinterpret_cast<const void *>(getauxval(AT_SYSINFO_EHDR));
if (errno == 0) {
vdso_base_.store(sysinfo_ehdr, std::memory_order_relaxed);
}
}
#endif // ABSL_HAVE_GETAUXVAL
if (vdso_base_.load(std::memory_order_relaxed) == kInvalidBase) {
int fd = open("/proc/self/auxv", O_RDONLY);
if (fd == -1) {
// Kernel too old to have a VDSO.
vdso_base_.store(nullptr, std::memory_order_relaxed);
getcpu_fn_.store(&GetCPUViaSyscall, std::memory_order_relaxed);
return nullptr;
}
ElfW(auxv_t) aux;
while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
if (aux.a_type == AT_SYSINFO_EHDR) {
#if defined(__NetBSD__)
vdso_base_.store(reinterpret_cast<void *>(aux.a_v),
std::memory_order_relaxed);
#else
vdso_base_.store(reinterpret_cast<void *>(aux.a_un.a_val),
std::memory_order_relaxed);
#endif
break;
}
}
close(fd);
if (vdso_base_.load(std::memory_order_relaxed) == kInvalidBase) {
// Didn't find AT_SYSINFO_EHDR in auxv[].
vdso_base_.store(nullptr, std::memory_order_relaxed);
}
}
GetCpuFn fn = &GetCPUViaSyscall; // default if VDSO not present.
if (vdso_base_.load(std::memory_order_relaxed)) {
VDSOSupport vdso;
SymbolInfo info;
if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) {
fn = reinterpret_cast<GetCpuFn>(const_cast<void *>(info.address));
}
}
// Subtle: this code runs outside of any locks; prevent compiler
// from assigning to getcpu_fn_ more than once.
getcpu_fn_.store(fn, std::memory_order_relaxed);
return vdso_base_.load(std::memory_order_relaxed);
}
const void *VDSOSupport::SetBase(const void *base) {
ABSL_RAW_CHECK(base != debugging_internal::ElfMemImage::kInvalidBase,
"internal error");
const void *old_base = vdso_base_.load(std::memory_order_relaxed);
vdso_base_.store(base, std::memory_order_relaxed);
image_.Init(base);
// Also reset getcpu_fn_, so GetCPU could be tested with simulated VDSO.
getcpu_fn_.store(&InitAndGetCPU, std::memory_order_relaxed);
return old_base;
}
bool VDSOSupport::LookupSymbol(const char *name,
const char *version,
int type,
SymbolInfo *info) const {
return image_.LookupSymbol(name, version, type, info);
}
bool VDSOSupport::LookupSymbolByAddress(const void *address,
SymbolInfo *info_out) const {
return image_.LookupSymbolByAddress(address, info_out);
}
// NOLINT on 'long' because this routine mimics kernel api.
long VDSOSupport::GetCPUViaSyscall(unsigned *cpu, // NOLINT(runtime/int)
void *, void *) {
#ifdef SYS_getcpu
return syscall(SYS_getcpu, cpu, nullptr, nullptr);
#else
// x86_64 never implemented sys_getcpu(), except as a VDSO call.
static_cast<void>(cpu); // Avoid an unused argument compiler warning.
errno = ENOSYS;
return -1;
#endif
}
// Use fast __vdso_getcpu if available.
long VDSOSupport::InitAndGetCPU(unsigned *cpu, // NOLINT(runtime/int)
void *x, void *y) {
Init();
GetCpuFn fn = getcpu_fn_.load(std::memory_order_relaxed);
ABSL_RAW_CHECK(fn != &InitAndGetCPU, "Init() did not set getcpu_fn_");
return (*fn)(cpu, x, y);
}
// This function must be very fast, and may be called from very
// low level (e.g. tcmalloc). Hence I avoid things like
// GoogleOnceInit() and ::operator new.
ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
int GetCPU() {
unsigned cpu;
long ret_code = // NOLINT(runtime/int)
(*VDSOSupport::getcpu_fn_)(&cpu, nullptr, nullptr);
return ret_code == 0 ? static_cast<int>(cpu) : static_cast<int>(ret_code);
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_HAVE_VDSO_SUPPORT

View file

@ -0,0 +1,158 @@
//
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Allow dynamic symbol lookup in the kernel VDSO page.
//
// VDSO stands for "Virtual Dynamic Shared Object" -- a page of
// executable code, which looks like a shared library, but doesn't
// necessarily exist anywhere on disk, and which gets mmap()ed into
// every process by kernels which support VDSO, such as 2.6.x for 32-bit
// executables, and 2.6.24 and above for 64-bit executables.
//
// More details could be found here:
// http://www.trilithium.com/johan/2005/08/linux-gate/
//
// VDSOSupport -- a class representing kernel VDSO (if present).
//
// Example usage:
// VDSOSupport vdso;
// VDSOSupport::SymbolInfo info;
// typedef (*FN)(unsigned *, void *, void *);
// FN fn = nullptr;
// if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) {
// fn = reinterpret_cast<FN>(info.address);
// }
#ifndef ABSL_DEBUGGING_INTERNAL_VDSO_SUPPORT_H_
#define ABSL_DEBUGGING_INTERNAL_VDSO_SUPPORT_H_
#include <atomic>
#include "absl/base/attributes.h"
#include "absl/debugging/internal/elf_mem_image.h"
#ifdef ABSL_HAVE_ELF_MEM_IMAGE
#ifdef ABSL_HAVE_VDSO_SUPPORT
#error ABSL_HAVE_VDSO_SUPPORT cannot be directly set
#else
#define ABSL_HAVE_VDSO_SUPPORT 1
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
// NOTE: this class may be used from within tcmalloc, and can not
// use any memory allocation routines.
class VDSOSupport {
public:
VDSOSupport();
typedef ElfMemImage::SymbolInfo SymbolInfo;
typedef ElfMemImage::SymbolIterator SymbolIterator;
// On PowerPC64 VDSO symbols can either be of type STT_FUNC or STT_NOTYPE
// depending on how the kernel is built. The kernel is normally built with
// STT_NOTYPE type VDSO symbols. Let's make things simpler first by using a
// compile-time constant.
#ifdef __powerpc64__
enum { kVDSOSymbolType = STT_NOTYPE };
#else
enum { kVDSOSymbolType = STT_FUNC };
#endif
// Answers whether we have a vdso at all.
bool IsPresent() const { return image_.IsPresent(); }
// Allow to iterate over all VDSO symbols.
SymbolIterator begin() const { return image_.begin(); }
SymbolIterator end() const { return image_.end(); }
// Look up versioned dynamic symbol in the kernel VDSO.
// Returns false if VDSO is not present, or doesn't contain given
// symbol/version/type combination.
// If info_out != nullptr, additional details are filled in.
bool LookupSymbol(const char *name, const char *version,
int symbol_type, SymbolInfo *info_out) const;
// Find info about symbol (if any) which overlaps given address.
// Returns true if symbol was found; false if VDSO isn't present
// or doesn't have a symbol overlapping given address.
// If info_out != nullptr, additional details are filled in.
bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const;
// Used only for testing. Replace real VDSO base with a mock.
// Returns previous value of vdso_base_. After you are done testing,
// you are expected to call SetBase() with previous value, in order to
// reset state to the way it was.
const void *SetBase(const void *s);
// Computes vdso_base_ and returns it. Should be called as early as
// possible; before any thread creation, chroot or setuid.
static const void *Init();
private:
// image_ represents VDSO ELF image in memory.
// image_.ehdr_ == nullptr implies there is no VDSO.
ElfMemImage image_;
// Cached value of auxv AT_SYSINFO_EHDR, computed once.
// This is a tri-state:
// kInvalidBase => value hasn't been determined yet.
// 0 => there is no VDSO.
// else => vma of VDSO Elf{32,64}_Ehdr.
//
// When testing with mock VDSO, low bit is set.
// The low bit is always available because vdso_base_ is
// page-aligned.
static std::atomic<const void *> vdso_base_;
// NOLINT on 'long' because these routines mimic kernel api.
// The 'cache' parameter may be used by some versions of the kernel,
// and should be nullptr or point to a static buffer containing at
// least two 'long's.
static long InitAndGetCPU(unsigned *cpu, void *cache, // NOLINT 'long'.
void *unused);
static long GetCPUViaSyscall(unsigned *cpu, void *cache, // NOLINT 'long'.
void *unused);
typedef long (*GetCpuFn)(unsigned *cpu, void *cache, // NOLINT 'long'.
void *unused);
// This function pointer may point to InitAndGetCPU,
// GetCPUViaSyscall, or __vdso_getcpu at different stages of initialization.
ABSL_CONST_INIT static std::atomic<GetCpuFn> getcpu_fn_;
friend int GetCPU(void); // Needs access to getcpu_fn_.
VDSOSupport(const VDSOSupport&) = delete;
VDSOSupport& operator=(const VDSOSupport&) = delete;
};
// Same as sched_getcpu() on later glibc versions.
// Return current CPU, using (fast) __vdso_getcpu@LINUX_2.6 if present,
// otherwise use syscall(SYS_getcpu,...).
// May return -1 with errno == ENOSYS if the kernel doesn't
// support SYS_getcpu.
int GetCPU();
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_HAVE_ELF_MEM_IMAGE
#endif // ABSL_DEBUGGING_INTERNAL_VDSO_SUPPORT_H_