Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,2 @@
wittman@chromium.org
charliea@chromium.org

View file

@ -0,0 +1,160 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/arm_cfi_table.h"
#include <algorithm>
namespace base {
namespace {
// The value of index when the function does not have unwind information.
constexpr uint32_t kNoUnwindInformation = 0xFFFF;
// The mask on the CFI row data that is used to get the high 14 bits and
// multiply it by 4 to get CFA offset. Since the last 2 bits are masked out, a
// shift is not necessary.
constexpr uint16_t kCFAMask = 0xfffc;
// The mask on the CFI row data that is used to get the low 2 bits and multiply
// it by 4 to get the return address offset.
constexpr uint16_t kReturnAddressMask = 0x3;
constexpr uint16_t kReturnAddressShift = 2;
// The CFI data in UNW_DATA table starts with number of rows (N) encoded as
// uint16_t, followed by N 4 byte rows. The CFIDataRow represents a single row
// of CFI data of a function in the table. Since we cast the memory at the
// address after the address of number of rows into an array of CFIDataRow, the
// size of the struct should be 4 bytes and the order of the members is fixed
// according to the given format. The first 2 bytes is the address of function
// and last 2 bytes is the CFI data for the offset.
struct CFIDataRow {
// The address of the instruction as an offset from the start of the
// function.
uint16_t addr_offset;
// Represents the CFA and RA offsets to get information about next stack
// frame. This is the CFI data at the point before executing the instruction
// at |addr_offset| from the start of the function.
uint16_t cfi_data;
// Helper functions to convert the to ArmCFITable::FrameEntry
size_t ra_offset() const {
return (cfi_data & kReturnAddressMask) << kReturnAddressShift;
}
size_t cfa_offset() const { return cfi_data & kCFAMask; }
};
static_assert(sizeof(CFIDataRow) == 4,
"The CFIDataEntry struct must be exactly 4 bytes to ensure "
"correct parsing of input data");
} // namespace
// static
std::unique_ptr<ArmCFITable> ArmCFITable::Parse(span<const uint8_t> cfi_data) {
BufferIterator<const uint8_t> cfi_iterator(cfi_data);
const uint32_t* unw_index_count = cfi_iterator.Object<uint32_t>();
if (unw_index_count == nullptr || *unw_index_count == 0U)
return nullptr;
auto function_addresses = cfi_iterator.Span<uint32_t>(*unw_index_count);
auto entry_data_indices = cfi_iterator.Span<uint16_t>(*unw_index_count);
if (function_addresses.size() != *unw_index_count ||
entry_data_indices.size() != *unw_index_count)
return nullptr;
// The UNW_DATA table data is right after the end of UNW_INDEX table.
auto entry_data = cfi_iterator.Span<uint8_t>(
(cfi_iterator.total_size() - cfi_iterator.position()) / sizeof(uint8_t));
return std::make_unique<ArmCFITable>(function_addresses, entry_data_indices,
entry_data);
}
ArmCFITable::ArmCFITable(span<const uint32_t> function_addresses,
span<const uint16_t> entry_data_indices,
span<const uint8_t> entry_data)
: function_addresses_(function_addresses),
entry_data_indices_(entry_data_indices),
entry_data_(entry_data) {
DCHECK_EQ(function_addresses.size(), entry_data_indices.size());
}
ArmCFITable::~ArmCFITable() = default;
Optional<ArmCFITable::FrameEntry> ArmCFITable::FindEntryForAddress(
uintptr_t address) const {
DCHECK(!function_addresses_.empty());
// Find the required function address in UNW_INDEX as the last function lower
// or equal to |address| (the value right before the result of upper_bound(),
// if any).
auto func_it = std::upper_bound(function_addresses_.begin(),
function_addresses_.end(), address);
// If no function comes before |address|, no CFI entry is returned.
if (func_it == function_addresses_.begin())
return nullopt;
--func_it;
uint32_t func_start_addr = *func_it;
size_t row_num = func_it - function_addresses_.begin();
uint16_t index = entry_data_indices_[row_num];
DCHECK_LE(func_start_addr, address);
if (index == kNoUnwindInformation)
return nullopt;
// The unwind data for the current function is at a 2 bytes offset of the
// index found in UNW_INDEX table.
if (entry_data_.size() <= index * sizeof(uint16_t))
return nullopt;
BufferIterator<const uint8_t> entry_iterator(entry_data_);
entry_iterator.Seek(index * sizeof(uint16_t));
// The value of first 2 bytes is the CFI data row count for the function.
const uint16_t* row_count = entry_iterator.Object<uint16_t>();
if (row_count == nullptr)
return nullopt;
// And the actual CFI rows start after 2 bytes from the |unwind_data|. Cast
// the data into an array of CFIUnwindDataRow since the struct is designed to
// represent each row. We should be careful to read only |row_count| number of
// elements in the array.
auto function_cfi = entry_iterator.Span<CFIDataRow>(*row_count);
if (function_cfi.size() != *row_count)
return nullopt;
FrameEntry last_frame_entry = {0, 0};
// Iterate through all function entries to find a range covering |address|.
// In practice, the majority of functions contain very few entries.
for (const auto& entry : function_cfi) {
// The return address of the function is the instruction that is not yet
// been executed. The CFI row specifies the unwind info before executing the
// given instruction. If the given address is equal to the instruction
// offset, then use the current row. Or use the row with highest address
// less than the given address.
if (func_start_addr + entry.addr_offset > address)
break;
uint32_t cfa_offset = entry.cfa_offset();
if (cfa_offset == 0)
return nullopt;
last_frame_entry.cfa_offset = cfa_offset;
uint32_t ra_offset = entry.ra_offset();
// The RA offset of the last specified row should be used, if unspecified.
// Update |last_ra_offset| only if valid for this row. Otherwise, tthe last
// valid |last_ra_offset| is used. TODO(ssid): This should be fixed in the
// format and we should always output ra offset.
if (ra_offset)
last_frame_entry.ra_offset = ra_offset;
if (last_frame_entry.ra_offset == 0)
return nullopt;
}
return last_frame_entry;
}
} // namespace base

View file

@ -0,0 +1,75 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_ARM_CFI_TABLE_H_
#define BASE_PROFILER_ARM_CFI_TABLE_H_
#include <memory>
#include "base/containers/buffer_iterator.h"
#include "base/containers/span.h"
#include "base/macros.h"
#include "base/optional.h"
namespace base {
// This class implements methods to read and parse the arm Call Frame
// Information (CFI) for Chrome, which contains tables for unwinding Chrome
// functions. For detailed description of the format, see
// extract_unwind_tables.py.
class BASE_EXPORT ArmCFITable {
public:
// The CFI information that correspond to an instruction. {0, 0} is a valid
// entry and should be interpreted as the default rule:
// .cfa: sp; .cfa = lr (link register).
struct FrameEntry {
// The offset of the call frame address (CFA) of previous function, relative
// to the current stack pointer. Rule for unwinding CFA:
// .cfa: sp + cfa_offset.
uint16_t cfa_offset = 0;
// The offset of location of return address (RA), relative to the previous
// call frame address. Rule for unwinding RA:
// .ra = *(cfa - ra_offset).
uint16_t ra_offset = 0;
};
// Parses |cfi_data| and creates a ArmCFITable that reads from it.
// |cfi_data| is required to remain valid for the lifetime of the object.
static std::unique_ptr<ArmCFITable> Parse(span<const uint8_t> cfi_data);
ArmCFITable(span<const uint32_t> function_addresses,
span<const uint16_t> entry_data_indices,
span<const uint8_t> entry_data);
~ArmCFITable();
// Finds the CFI row for the given |address| in terms of offset from the
// start of the current binary. Concurrent calls are thread safe.
Optional<FrameEntry> FindEntryForAddress(uintptr_t address) const;
size_t GetTableSizeForTesting() const { return function_addresses_.size(); }
private:
// The UNW_INDEX table allows readers to map functions start addresses to
// that function's respective entry in the UNW_DATA table.
// - A function's start address is at 0x123, and
// - function_addresses_[2] == 0x123, and
// - entry_data_indices_[2] = 42, then
// - entry_data_[42] is the corresponding entry in the UNW_DATA table for
// the function with the start address of 0x123
//
// Note that function_addresses is sorted to facilitate easy lookup.
const span<const uint32_t> function_addresses_;
const span<const uint16_t> entry_data_indices_;
// A reference to the UNW_DATA table. Each entry in the UNW_DATA table
// corresponds to a function, which in turn corresponds to an array of
// CFIDataRows. (see arm_cfi_reader.cc).
const span<const uint8_t> entry_data_;
DISALLOW_COPY_AND_ASSIGN(ArmCFITable);
};
} // namespace base
#endif // BASE_PROFILER_ARM_CFI_TABLE_H_

View file

@ -0,0 +1,89 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/chrome_unwinder_android.h"
#include "base/numerics/checked_math.h"
#include "base/profiler/module_cache.h"
#include "base/profiler/native_unwinder.h"
#include "build/build_config.h"
namespace base {
ChromeUnwinderAndroid::ChromeUnwinderAndroid(
const ArmCFITable* cfi_table,
const ModuleCache::Module* chrome_module)
: cfi_table_(cfi_table), chrome_module_(chrome_module) {
DCHECK(cfi_table_);
DCHECK(chrome_module_);
}
ChromeUnwinderAndroid::~ChromeUnwinderAndroid() = default;
bool ChromeUnwinderAndroid::CanUnwindFrom(const Frame& current_frame) const {
return current_frame.module == chrome_module_;
}
UnwindResult ChromeUnwinderAndroid::TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const {
DCHECK(CanUnwindFrom(stack->back()));
do {
const ModuleCache::Module* module = stack->back().module;
uintptr_t pc = RegisterContextInstructionPointer(thread_context);
DCHECK_GE(pc, module->GetBaseAddress());
uintptr_t func_addr = pc - module->GetBaseAddress();
auto entry = cfi_table_->FindEntryForAddress(func_addr);
if (!entry)
return UnwindResult::ABORTED;
if (!Step(thread_context, stack_top, *entry))
return UnwindResult::ABORTED;
stack->emplace_back(RegisterContextInstructionPointer(thread_context),
module_cache->GetModuleForAddress(
RegisterContextInstructionPointer(thread_context)));
} while (CanUnwindFrom(stack->back()));
return UnwindResult::UNRECOGNIZED_FRAME;
}
// static
bool ChromeUnwinderAndroid::Step(RegisterContext* thread_context,
uintptr_t stack_top,
const ArmCFITable::FrameEntry& entry) {
CHECK_NE(RegisterContextStackPointer(thread_context), 0U);
CHECK_LE(RegisterContextStackPointer(thread_context), stack_top);
if (entry.cfa_offset == 0) {
uintptr_t pc = RegisterContextInstructionPointer(thread_context);
uintptr_t return_address = static_cast<uintptr_t>(thread_context->arm_lr);
if (pc == return_address)
return false;
RegisterContextInstructionPointer(thread_context) = return_address;
} else {
// The rules for unwinding using the CFI information are:
// SP_prev = SP_cur + cfa_offset and
// PC_prev = * (SP_prev - ra_offset).
auto new_sp =
CheckedNumeric<uintptr_t>(RegisterContextStackPointer(thread_context)) +
CheckedNumeric<uint16_t>(entry.cfa_offset);
if (!new_sp.AssignIfValid(&RegisterContextStackPointer(thread_context)) ||
RegisterContextStackPointer(thread_context) >= stack_top) {
return false;
}
if (entry.ra_offset > entry.cfa_offset)
return false;
// Underflow is prevented because |ra_offset| <= |cfa_offset|.
uintptr_t ip_address = (new_sp - CheckedNumeric<uint16_t>(entry.ra_offset))
.ValueOrDie<uintptr_t>();
RegisterContextInstructionPointer(thread_context) =
*reinterpret_cast<uintptr_t*>(ip_address);
}
return true;
}
} // namespace base

View file

@ -0,0 +1,51 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_CHROME_UNWINDER_ANDROID_H_
#define BASE_PROFILER_CHROME_UNWINDER_ANDROID_H_
#include "base/profiler/unwinder.h"
#include "base/base_export.h"
#include "base/optional.h"
#include "base/profiler/arm_cfi_table.h"
#include "base/profiler/module_cache.h"
#include "base/profiler/register_context.h"
namespace base {
// Chrome unwinder implementation for Android, using ArmCfiTable.
class BASE_EXPORT ChromeUnwinderAndroid : public Unwinder {
public:
ChromeUnwinderAndroid(const ArmCFITable* cfi_table,
const ModuleCache::Module* chrome_module);
~ChromeUnwinderAndroid() override;
ChromeUnwinderAndroid(const ChromeUnwinderAndroid&) = delete;
ChromeUnwinderAndroid& operator=(const ChromeUnwinderAndroid&) = delete;
// Unwinder:
bool CanUnwindFrom(const Frame& current_frame) const override;
UnwindResult TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const override;
static bool StepForTesting(RegisterContext* thread_context,
uintptr_t stack_top,
const ArmCFITable::FrameEntry& entry) {
return Step(thread_context, stack_top, entry);
}
private:
static bool Step(RegisterContext* thread_context,
uintptr_t stack_top,
const ArmCFITable::FrameEntry& entry);
const ArmCFITable* cfi_table_;
const ModuleCache::Module* const chrome_module_;
};
} // namespace base
#endif // BASE_PROFILER_CHROME_UNWINDER_ANDROID_H_

View file

@ -0,0 +1,14 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/frame.h"
namespace base {
Frame::Frame(uintptr_t instruction_pointer, const ModuleCache::Module* module)
: instruction_pointer(instruction_pointer), module(module) {}
Frame::~Frame() = default;
} // namespace base

View file

@ -0,0 +1,29 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_FRAME_H_
#define BASE_PROFILER_FRAME_H_
#include <memory>
#include "base/profiler/module_cache.h"
namespace base {
// Frame represents an individual sampled stack frame with full module
// information.
struct BASE_EXPORT Frame {
Frame(uintptr_t instruction_pointer, const ModuleCache::Module* module);
~Frame();
// The sampled instruction pointer within the function.
uintptr_t instruction_pointer;
// The module information.
const ModuleCache::Module* module;
};
} // namespace base
#endif // BASE_PROFILER_FRAME_H_

View file

@ -0,0 +1,208 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/metadata_recorder.h"
#include "base/metrics/histogram_macros.h"
namespace base {
MetadataRecorder::ItemInternal::ItemInternal() = default;
MetadataRecorder::ItemInternal::~ItemInternal() = default;
MetadataRecorder::MetadataRecorder() {
// Ensure that we have necessary atomic support.
DCHECK(items_[0].is_active.is_lock_free());
DCHECK(items_[0].value.is_lock_free());
}
MetadataRecorder::~MetadataRecorder() = default;
void MetadataRecorder::Set(uint64_t name_hash,
Optional<int64_t> key,
int64_t value) {
AutoLock lock(write_lock_);
// Acquiring the |write_lock_| ensures that:
//
// - We don't try to write into the same new slot at the same time as
// another thread
// - We see all writes by other threads (acquiring a mutex implies acquire
// semantics)
size_t item_slots_used = item_slots_used_.load(std::memory_order_relaxed);
for (size_t i = 0; i < item_slots_used; ++i) {
auto& item = items_[i];
if (item.name_hash == name_hash && item.key == key) {
item.value.store(value, std::memory_order_relaxed);
const bool was_active =
item.is_active.exchange(true, std::memory_order_release);
if (!was_active)
inactive_item_count_--;
UMA_HISTOGRAM_COUNTS_10000("StackSamplingProfiler.MetadataSlotsUsed",
item_slots_used);
return;
}
}
item_slots_used = TryReclaimInactiveSlots(item_slots_used);
UMA_HISTOGRAM_COUNTS_10000("StackSamplingProfiler.MetadataSlotsUsed",
item_slots_used + 1);
if (item_slots_used == items_.size()) {
// The metadata recorder is full, forcing us to drop this metadata. The
// above UMA histogram counting occupied metadata slots should help us set a
// max size that avoids this condition during normal Chrome use.
return;
}
// Wait until the item is fully created before setting |is_active| to true and
// incrementing |item_slots_used_|, which will signal to readers that the item
// is ready.
auto& item = items_[item_slots_used];
item.name_hash = name_hash;
item.key = key;
item.value.store(value, std::memory_order_relaxed);
item.is_active.store(true, std::memory_order_release);
item_slots_used_.fetch_add(1, std::memory_order_release);
}
void MetadataRecorder::Remove(uint64_t name_hash, Optional<int64_t> key) {
AutoLock lock(write_lock_);
size_t item_slots_used = item_slots_used_.load(std::memory_order_relaxed);
for (size_t i = 0; i < item_slots_used; ++i) {
auto& item = items_[i];
if (item.name_hash == name_hash && item.key == key) {
// A removed item will occupy its slot until that slot is reclaimed.
const bool was_active =
item.is_active.exchange(false, std::memory_order_relaxed);
if (was_active)
inactive_item_count_++;
return;
}
}
}
MetadataRecorder::ScopedGetItems::ScopedGetItems(
MetadataRecorder* metadata_recorder)
: metadata_recorder_(metadata_recorder),
auto_lock_(&metadata_recorder->read_lock_) {}
MetadataRecorder::ScopedGetItems::~ScopedGetItems() {}
// This function is marked as NO_THREAD_SAFETY_ANALYSIS because the analyzer
// doesn't understand that the lock is acquired in the constructor initializer
// list and can therefore be safely released here.
size_t MetadataRecorder::ScopedGetItems::GetItems(
ProfileBuilder::MetadataItemArray* const items) NO_THREAD_SAFETY_ANALYSIS {
size_t item_count = metadata_recorder_->GetItems(items);
auto_lock_.Release();
return item_count;
}
std::unique_ptr<ProfileBuilder::MetadataProvider>
MetadataRecorder::CreateMetadataProvider() {
return std::make_unique<MetadataRecorder::ScopedGetItems>(this);
}
size_t MetadataRecorder::GetItems(
ProfileBuilder::MetadataItemArray* const items) const {
read_lock_.AssertAcquired();
// If a writer adds a new item after this load, it will be ignored. We do
// this instead of calling item_slots_used_.load() explicitly in the for loop
// bounds checking, which would be expensive.
//
// Also note that items are snapshotted sequentially and that items can be
// modified mid-snapshot by non-suspended threads. This means that there's a
// small chance that some items, especially those that occur later in the
// array, may have values slightly "in the future" from when the sample was
// actually collected. It also means that the array as returned may have never
// existed in its entirety, although each name/value pair represents a
// consistent item that existed very shortly after the thread was supended.
size_t item_slots_used = item_slots_used_.load(std::memory_order_acquire);
size_t write_index = 0;
for (size_t read_index = 0; read_index < item_slots_used; ++read_index) {
const auto& item = items_[read_index];
// Because we wait until |is_active| is set to consider an item active and
// that field is always set last, we ignore half-created items.
if (item.is_active.load(std::memory_order_acquire)) {
(*items)[write_index++] = ProfileBuilder::MetadataItem{
item.name_hash, item.key, item.value.load(std::memory_order_relaxed)};
}
}
return write_index;
}
size_t MetadataRecorder::TryReclaimInactiveSlots(size_t item_slots_used) {
const size_t remaining_slots =
ProfileBuilder::MAX_METADATA_COUNT - item_slots_used;
if (inactive_item_count_ == 0 || inactive_item_count_ < remaining_slots) {
// This reclaiming threshold has a few nice properties:
//
// - It avoids reclaiming when no items have been removed
// - It makes doing so more likely as free slots become more scarce
// - It makes doing so less likely when the benefits are lower
return item_slots_used;
}
if (read_lock_.Try()) {
// The lock isn't already held by a reader or another thread reclaiming
// slots.
item_slots_used = ReclaimInactiveSlots(item_slots_used);
read_lock_.Release();
}
return item_slots_used;
}
size_t MetadataRecorder::ReclaimInactiveSlots(size_t item_slots_used) {
// From here until the end of the reclamation, we can safely use
// memory_order_relaxed for all reads and writes. We don't need
// memory_order_acquire because acquiring the write mutex gives acquire
// semantics and no other threads can write after we hold that mutex. We don't
// need memory_order_release because no readers can read until we release the
// read mutex, which itself has release semantics.
size_t first_inactive_item_idx = 0;
size_t last_active_item_idx = item_slots_used - 1;
while (first_inactive_item_idx < last_active_item_idx) {
ItemInternal& inactive_item = items_[first_inactive_item_idx];
ItemInternal& active_item = items_[last_active_item_idx];
if (inactive_item.is_active.load(std::memory_order_relaxed)) {
// Keep seeking forward to an inactive item.
++first_inactive_item_idx;
continue;
}
if (!active_item.is_active.load(std::memory_order_relaxed)) {
// Keep seeking backward to an active item. Skipping over this item
// indicates that we're freeing the slot at this index.
--last_active_item_idx;
item_slots_used--;
continue;
}
inactive_item.name_hash = active_item.name_hash;
inactive_item.value.store(active_item.value.load(std::memory_order_relaxed),
std::memory_order_relaxed);
inactive_item.is_active.store(true, std::memory_order_relaxed);
++first_inactive_item_idx;
--last_active_item_idx;
item_slots_used--;
}
item_slots_used_.store(item_slots_used, std::memory_order_relaxed);
return item_slots_used;
}
} // namespace base

View file

@ -0,0 +1,280 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_METADATA_RECORDER_H_
#define BASE_PROFILER_METADATA_RECORDER_H_
#include <array>
#include <atomic>
#include <utility>
#include "base/optional.h"
#include "base/profiler/profile_builder.h"
#include "base/synchronization/lock.h"
#include "base/thread_annotations.h"
namespace base {
// MetadataRecorder provides a data structure to store metadata key/value pairs
// to be associated with samples taken by the sampling profiler. Whatever
// metadata is present in this map when a sample is recorded is then associated
// with the sample.
//
// Methods on this class are safe to call unsynchronized from arbitrary threads.
//
// This class was designed to read metadata from a single sampling thread and
// write metadata from many Chrome threads within the same process. These other
// threads might be suspended by the sampling thread at any time in order to
// collect a sample.
//
// This class has a few notable constraints:
//
// A) If a lock that's required to read the metadata might be held while writing
// the metadata, that lock must be acquirable *before* the thread is
// suspended. Otherwise, the sampling thread might suspend the target thread
// while it is holding the required lock, causing deadlock.
//
// Ramifications:
//
// - When retrieving items, lock acquisition (through
// CreateMetadataProvider()) and actual item retrieval (through
// MetadataProvider::GetItems()) are separate.
//
// B) We can't allocate data on the heap while reading the metadata items. This
// is because, on many operating systems, there's a process-wide heap lock
// that is held while allocating on the heap. If a thread is suspended while
// holding this lock and the sampling thread then tries to allocate on the
// heap to read the metadata, it will deadlock trying to acquire the heap
// lock.
//
// Ramifications:
//
// - We hold and retrieve the metadata using a fixed-size array, which
// allows readers to preallocate the data structure that we pass back
// the metadata in.
//
// C) We shouldn't guard writes with a lock that also guards reads. It can take
// ~30us from the time that the sampling thread requests that a thread be
// suspended and the time that it actually happens. If all metadata writes
// block their thread during that time, we're very likely to block all Chrome
// threads for an additional 30us per sample.
//
// Ramifications:
//
// - We use two locks to guard the metadata: a read lock and a write
// lock. Only the write lock is required to write into the metadata, and
// only the read lock is required to read the metadata.
//
// - Because we can't guard reads and writes with the same lock, we have to
// face the possibility of writes occurring during a read. This is
// especially problematic because there's no way to read both the key and
// value for an item atomically without using mutexes, which violates
// constraint A). If the sampling thread were to see the following
// interleaving of reads and writes:
//
// * Reader thread reads key for slot 0
// * Writer thread removes item at slot 0
// * Writer thread creates new item with different key in slot 0
// * Reader thread reads value for slot 0
//
// then the reader would see an invalid value for the given key. Because
// of this possibility, we keep slots reserved for a specific key even
// after that item has been removed. We reclaim these slots on a
// best-effort basis during writes when the metadata recorder has become
// sufficiently full and we can acquire the read lock.
//
// - We use state stored in atomic data types to ensure that readers and
// writers are synchronized about where data should be written to and
// read from. We must use atomic data types to guarantee that there's no
// instruction during a write after which the recorder is in an
// inconsistent state that might yield garbage data for a reader.
//
// Here are a few of the many states the recorder can be in:
//
// - No thread is using the recorder.
//
// - A single writer is writing into the recorder without a simultaneous
// read. The write will succeed.
//
// - A reader is reading from the recorder without a simultaneous write. The
// read will succeed.
//
// - Multiple writers attempt to write into the recorder simultaneously. All
// writers but one will block because only one can hold the write lock.
//
// - A writer is writing into the recorder, which hasn't reached the threshold
// at which it will try to reclaim inactive slots. The writer won't try to
// acquire the read lock to reclaim inactive slots. The reader will therefore
// be able to immediately acquire the read lock, suspend the target thread,
// and read the metadata.
//
// - A writer is writing into the recorder, the recorder has reached the
// threshold at which it needs to reclaim inactive slots, and the writer
// thread is now in the middle of reclaiming those slots when a reader
// arrives. The reader will try to acquire the read lock before suspending the
// thread but will block until the writer thread finishes reclamation and
// releases the read lock. The reader will then be able to acquire the read
// lock and suspend the target thread.
//
// - A reader is reading the recorder when a writer attempts to write. The write
// will be successful. However, if the writer deems it necessary to reclaim
// inactive slots, it will skip doing so because it won't be able to acquire
// the read lock.
class BASE_EXPORT MetadataRecorder {
public:
MetadataRecorder();
virtual ~MetadataRecorder();
MetadataRecorder(const MetadataRecorder&) = delete;
MetadataRecorder& operator=(const MetadataRecorder&) = delete;
// Sets a value for a (|name_hash|, |key|) pair, overwriting any value
// previously set for the pair. Nullopt keys are treated as just another key
// state for the purpose of associating values.
void Set(uint64_t name_hash, Optional<int64_t> key, int64_t value);
// Removes the item with the specified name hash and optional key. Has no
// effect if such an item does not exist.
void Remove(uint64_t name_hash, Optional<int64_t> key);
// Creates a MetadataProvider object for the recorder, which acquires the
// necessary exclusive read lock and provides access to the recorder's items
// via its GetItems() function. Reclaiming of inactive slots in the recorder
// can't occur while this object lives, so it should be created as soon before
// it's needed as possible. Calling GetItems() releases the lock held by the
// object and can therefore only be called once during the object's lifetime.
//
// This object should be created *before* suspending the target
// thread. Otherwise, that thread might be suspended while reclaiming inactive
// slots and holding the read lock, which would cause the sampling thread to
// deadlock.
//
// Example usage:
//
// MetadataRecorder r;
// base::ProfileBuilder::MetadataItemArray arr;
// size_t item_count;
// ...
// {
// auto get_items = r.CreateMetadataProvider();
// item_count = get_items.GetItems(arr);
// }
std::unique_ptr<ProfileBuilder::MetadataProvider> CreateMetadataProvider();
private:
// An object that provides access to a MetadataRecorder's items and holds the
// necessary exclusive read lock until either GetItems() is called or the
// object is destroyed.
//
// For usage and more details, see CreateMetadataProvider().
class SCOPED_LOCKABLE ScopedGetItems
: public ProfileBuilder::MetadataProvider {
public:
// Acquires an exclusive read lock on the metadata recorder which is held
// until either GetItems() is called or the object is destroyed.
ScopedGetItems(MetadataRecorder* metadata_recorder)
EXCLUSIVE_LOCK_FUNCTION(metadata_recorder->read_lock_);
~ScopedGetItems() override UNLOCK_FUNCTION(metadata_recorder_->read_lock_);
ScopedGetItems(const ScopedGetItems&) = delete;
ScopedGetItems& operator=(const ScopedGetItems&) = delete;
// Retrieves the first |available_slots| items in the metadata recorder and
// copies them into |items|, returning the number of metadata items that
// were copied. To ensure that all items can be copied, |available slots|
// should be greater than or equal to |MAX_METADATA_COUNT|.
//
// This function releases the lock held by the object and can therefore only
// be called once during the object's lifetime.
size_t GetItems(ProfileBuilder::MetadataItemArray* const items) override
EXCLUSIVE_LOCKS_REQUIRED(metadata_recorder_->read_lock_);
private:
const MetadataRecorder* const metadata_recorder_;
base::ReleasableAutoLock auto_lock_;
};
// TODO(charliea): Support large quantities of metadata efficiently.
struct ItemInternal {
ItemInternal();
~ItemInternal();
// Indicates whether the metadata item is still active (i.e. not removed).
//
// Requires atomic reads and writes to avoid word tearing when reading and
// writing unsynchronized. Requires acquire/release semantics to ensure that
// the other state in this struct is visible to the reading thread before it
// is marked as active.
std::atomic<bool> is_active{false};
// Neither name_hash or key require atomicity or memory order constraints
// because no reader will attempt to read them mid-write. Specifically,
// readers wait until |is_active| is true to read them. Because |is_active|
// is always stored with a memory_order_release fence, we're guaranteed that
// |name_hash| and |key| will be finished writing before |is_active| is set
// to true.
uint64_t name_hash;
Optional<int64_t> key;
// Requires atomic reads and writes to avoid word tearing when updating an
// existing item unsynchronized. Does not require acquire/release semantics
// because we rely on the |is_active| acquire/release semantics to ensure
// that an item is fully created before we attempt to read it.
std::atomic<int64_t> value;
};
// Attempts to free slots in the metadata map that are currently allocated to
// inactive items. May fail silently if the read lock is already held, in
// which case no slots will be freed. Returns the number of item slots used
// after the reclamation.
size_t TryReclaimInactiveSlots(size_t item_slots_used)
EXCLUSIVE_LOCKS_REQUIRED(write_lock_) LOCKS_EXCLUDED(read_lock_);
// Also protected by read_lock_, but current thread annotation limitations
// prevent us from using thread annotations with locks acquired through
// Lock::Try(). Updates item_slots_used_ to reflect the new item count and
// returns the number of item slots used after the reclamation.
size_t ReclaimInactiveSlots(size_t item_slots_used)
EXCLUSIVE_LOCKS_REQUIRED(write_lock_);
// Protected by read_lock_, but current thread annotation limitations
// prevent us from using thread annotations with locks acquired through
// Lock::Try().
size_t GetItems(ProfileBuilder::MetadataItemArray* const items) const;
// Metadata items that the recorder has seen. Rather than implementing the
// metadata recorder as a dense array, we implement it as a sparse array where
// removed metadata items keep their slot with their |is_active| bit set to
// false. This avoids race conditions caused by reusing slots that might
// otherwise cause mismatches between metadata name hashes and values.
//
// For the rationale behind this design (along with others considered), see
// https://docs.google.com/document/d/18shLhVwuFbLl_jKZxCmOfRB98FmNHdKl0yZZZ3aEO4U/edit#.
std::array<ItemInternal, ProfileBuilder::MAX_METADATA_COUNT> items_;
// The number of item slots used in the metadata map.
//
// Requires atomic reads and writes to avoid word tearing when reading and
// writing unsynchronized. Requires acquire/release semantics to ensure that a
// newly-allocated slot is fully initialized before the reader becomes aware
// of its existence.
std::atomic<size_t> item_slots_used_{0};
// The number of item slots occupied by inactive items.
size_t inactive_item_count_ GUARDED_BY(write_lock_) = 0;
// A lock that guards against multiple threads trying to manipulate items_,
// item_slots_used_, or inactive_item_count_ at the same time.
base::Lock write_lock_;
// A lock that guards against a reader trying to read items_ while inactive
// slots are being reclaimed.
//
// Note that we can't enforce that this lock is properly acquired through
// thread annotations because thread annotations doesn't understand that
// ScopedGetItems::GetItems() can only be called between ScopedGetItems's
// constructor and destructor.
base::Lock read_lock_;
};
} // namespace base
#endif // BASE_PROFILER_METADATA_RECORDER_H_

View file

@ -0,0 +1,118 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/module_cache.h"
#include <algorithm>
#include <iterator>
#include <utility>
namespace base {
namespace {
// Supports heterogeneous comparisons on modules and addresses, for use in
// binary searching modules sorted by range for a contained address.
struct ModuleAddressCompare {
bool operator()(const std::unique_ptr<const ModuleCache::Module>& module,
uintptr_t address) const {
return module->GetBaseAddress() + module->GetSize() <= address;
}
bool operator()(
uintptr_t address,
const std::unique_ptr<const ModuleCache::Module>& module) const {
return address < module->GetBaseAddress();
}
};
} // namespace
ModuleCache::ModuleCache() = default;
ModuleCache::~ModuleCache() = default;
const ModuleCache::Module* ModuleCache::GetModuleForAddress(uintptr_t address) {
const auto non_native_module_loc = non_native_modules_.find(address);
if (non_native_module_loc != non_native_modules_.end())
return non_native_module_loc->get();
const auto native_module_loc = native_modules_.find(address);
if (native_module_loc != native_modules_.end())
return native_module_loc->get();
std::unique_ptr<const Module> new_module = CreateModuleForAddress(address);
if (!new_module)
return nullptr;
const auto loc = native_modules_.insert(std::move(new_module));
return loc.first->get();
}
std::vector<const ModuleCache::Module*> ModuleCache::GetModules() const {
std::vector<const Module*> result;
result.reserve(native_modules_.size());
for (const std::unique_ptr<const Module>& module : native_modules_)
result.push_back(module.get());
for (const std::unique_ptr<const Module>& module : non_native_modules_)
result.push_back(module.get());
return result;
}
void ModuleCache::UpdateNonNativeModules(
const std::vector<const Module*>& to_remove,
std::vector<std::unique_ptr<const Module>> to_add) {
// Insert the modules to remove into a set to support O(log(n)) lookup below.
flat_set<const Module*> to_remove_set(to_remove.begin(), to_remove.end());
// Reorder the modules to be removed to the last slots in the set, then move
// them to the inactive modules, then erase the moved-from modules from the
// set. The flat_set docs endorse using base::EraseIf() which performs the
// same operations -- exclusive of the moves -- so this is OK even though it
// might seem like we're messing with the internal set representation.
//
// remove_if is O(m*log(r)) where m is the number of current modules and r is
// the number of modules to remove. insert and erase are both O(r).
auto first_module_to_remove = std::remove_if(
non_native_modules_.begin(), non_native_modules_.end(),
[&to_remove_set](const std::unique_ptr<const Module>& module) {
return to_remove_set.find(module.get()) != to_remove_set.end();
});
// All modules requested to be removed should have been found.
DCHECK_EQ(static_cast<ptrdiff_t>(to_remove.size()),
std::distance(first_module_to_remove, non_native_modules_.end()));
inactive_non_native_modules_.insert(
inactive_non_native_modules_.end(),
std::make_move_iterator(first_module_to_remove),
std::make_move_iterator(non_native_modules_.end()));
non_native_modules_.erase(first_module_to_remove, non_native_modules_.end());
// Insert the modules to be added. This operation is O((m + a) + a*log(a))
// where m is the number of current modules and a is the number of modules to
// be added.
non_native_modules_.insert(std::make_move_iterator(to_add.begin()),
std::make_move_iterator(to_add.end()));
}
void ModuleCache::AddCustomNativeModule(std::unique_ptr<const Module> module) {
native_modules_.insert(std::move(module));
}
bool ModuleCache::ModuleAndAddressCompare::operator()(
const std::unique_ptr<const Module>& m1,
const std::unique_ptr<const Module>& m2) const {
return m1->GetBaseAddress() < m2->GetBaseAddress();
}
bool ModuleCache::ModuleAndAddressCompare::operator()(
const std::unique_ptr<const Module>& m1,
uintptr_t address) const {
return m1->GetBaseAddress() + m1->GetSize() <= address;
}
bool ModuleCache::ModuleAndAddressCompare::operator()(
uintptr_t address,
const std::unique_ptr<const Module>& m2) const {
return address < m2->GetBaseAddress();
}
} // namespace base

View file

@ -0,0 +1,150 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_MODULE_CACHE_H_
#define BASE_PROFILER_MODULE_CACHE_H_
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "base/base_export.h"
#include "base/containers/flat_set.h"
#include "base/files/file_path.h"
#include "build/build_config.h"
#if defined(OS_WIN)
#include <windows.h>
#endif
namespace base {
// Supports cached lookup of modules by address, with caching based on module
// address ranges.
//
// Cached lookup is necessary on Mac for performance, due to an inefficient
// dladdr implementation. See https://crrev.com/487092.
//
// Cached lookup is beneficial on Windows to minimize use of the loader
// lock. Note however that the cache retains a handle to looked-up modules for
// its lifetime, which may result in pinning modules in memory that were
// transiently loaded by the OS.
class BASE_EXPORT ModuleCache {
public:
// Module represents a binary module (executable or library) and its
// associated state.
class BASE_EXPORT Module {
public:
Module() = default;
virtual ~Module() = default;
Module(const Module&) = delete;
Module& operator=(const Module&) = delete;
// Gets the base address of the module.
virtual uintptr_t GetBaseAddress() const = 0;
// Gets the opaque binary string that uniquely identifies a particular
// program version with high probability. This is parsed from headers of the
// loaded module.
// For binaries generated by GNU tools:
// Contents of the .note.gnu.build-id field.
// On Windows:
// GUID + AGE in the debug image headers of a module.
virtual std::string GetId() const = 0;
// Gets the debug basename of the module. This is the basename of the PDB
// file on Windows and the basename of the binary on other platforms.
virtual FilePath GetDebugBasename() const = 0;
// Gets the size of the module.
virtual size_t GetSize() const = 0;
// True if this is a native module.
virtual bool IsNative() const = 0;
};
ModuleCache();
~ModuleCache();
// Gets the module containing |address| or nullptr if |address| is not within
// a module. The returned module remains owned by and has the same lifetime as
// the ModuleCache object.
const Module* GetModuleForAddress(uintptr_t address);
std::vector<const Module*> GetModules() const;
// Updates the set of non-native modules maintained by the
// ModuleCache. Non-native modules represent regions of non-native executable
// code such as V8 generated code.
//
// Note that non-native modules may be embedded within native modules, as in
// the case of V8 builtin code compiled within Chrome. In that case
// GetModuleForAddress() will return the non-native module rather than the
// native module for the memory region it occupies.
//
// Modules in |to_remove| are removed from the set of active modules;
// specifically they no longer participate in the GetModuleForAddress()
// lookup. They continue to exist for the lifetime of the ModuleCache,
// however, so that existing references to them remain valid. Modules in
// |to_add| are added to the set of active non-native modules.
void UpdateNonNativeModules(
const std::vector<const Module*>& to_remove,
std::vector<std::unique_ptr<const Module>> to_add);
// Adds a custom native module to the cache. This is intended to support
// native modules that require custom handling. In general, native modules
// will be found and added automatically when invoking GetModuleForAddress().
void AddCustomNativeModule(std::unique_ptr<const Module> module);
private:
// Heterogenously compares modules by base address, and modules and
// addresses. The module/address comparison considers the address equivalent
// to the module if the address is within the extent of the module. Combined
// with is_transparent this allows modules to be looked up by address in the
// using containers.
struct ModuleAndAddressCompare {
using is_transparent = void;
bool operator()(const std::unique_ptr<const Module>& m1,
const std::unique_ptr<const Module>& m2) const;
bool operator()(const std::unique_ptr<const Module>& m1,
uintptr_t address) const;
bool operator()(uintptr_t address,
const std::unique_ptr<const Module>& m2) const;
};
// Creates a Module object for the specified memory address. Returns null if
// the address does not belong to a module.
static std::unique_ptr<const Module> CreateModuleForAddress(
uintptr_t address);
// Set of native modules sorted by base address. We use set rather than
// flat_set because the latter type has O(n^2) runtime for adding modules
// one-at-a-time, which is how modules are added on Windows and Mac.
std::set<std::unique_ptr<const Module>, ModuleAndAddressCompare>
native_modules_;
// Set of non-native modules currently mapped into the address space, sorted
// by base address. Represented as flat_set because std::set does not support
// extracting move-only element types prior to C++17's
// std::set<>::extract(). The non-native module insertion/removal patterns --
// initial bulk insertion, then infrequent inserts/removals -- should work
// reasonably well with the flat_set complexity guarantees. Separate from
// native_modules_ to support preferential lookup of non-native modules
// embedded in native modules; see comment on UpdateNonNativeModules().
base::flat_set<std::unique_ptr<const Module>, ModuleAndAddressCompare>
non_native_modules_;
// Unsorted vector of inactive non-native modules. Inactive modules are no
// longer mapped in the address space and don't participate in address lookup,
// but are retained by the cache so that existing references to the them
// remain valid. Note that this cannot be represented as a set/flat_set
// because it can contain multiple modules that were loaded (then subsequently
// unloaded) at the same base address.
std::vector<std::unique_ptr<const Module>> inactive_non_native_modules_;
};
} // namespace base
#endif // BASE_PROFILER_MODULE_CACHE_H_

View file

@ -0,0 +1,110 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/module_cache.h"
#include <dlfcn.h>
#include <mach-o/getsect.h>
#include <uuid/uuid.h>
#include "base/strings/string_number_conversions.h"
namespace base {
namespace {
// Returns the unique build ID for a module loaded at |module_addr|. Returns the
// empty string if the function fails to get the build ID.
//
// Build IDs are created by the concatenation of the module's GUID (Windows) /
// UUID (Mac) and an "age" field that indicates how many times that GUID/UUID
// has been reused. In Windows binaries, the "age" field is present in the
// module header, but on the Mac, UUIDs are never reused and so the "age" value
// appended to the UUID is always 0.
std::string GetUniqueId(const void* module_addr) {
const mach_header_64* mach_header =
reinterpret_cast<const mach_header_64*>(module_addr);
DCHECK_EQ(MH_MAGIC_64, mach_header->magic);
size_t offset = sizeof(mach_header_64);
size_t offset_limit = sizeof(mach_header_64) + mach_header->sizeofcmds;
for (uint32_t i = 0; i < mach_header->ncmds; ++i) {
if (offset + sizeof(load_command) >= offset_limit)
return std::string();
const load_command* current_cmd = reinterpret_cast<const load_command*>(
reinterpret_cast<const uint8_t*>(mach_header) + offset);
if (offset + current_cmd->cmdsize > offset_limit) {
// This command runs off the end of the command list. This is malformed.
return std::string();
}
if (current_cmd->cmd == LC_UUID) {
if (current_cmd->cmdsize < sizeof(uuid_command)) {
// This "UUID command" is too small. This is malformed.
return std::string();
}
const uuid_command* uuid_cmd =
reinterpret_cast<const uuid_command*>(current_cmd);
static_assert(sizeof(uuid_cmd->uuid) == sizeof(uuid_t),
"UUID field of UUID command should be 16 bytes.");
// The ID is comprised of the UUID concatenated with the Mac's "age" value
// which is always 0.
return HexEncode(&uuid_cmd->uuid, sizeof(uuid_cmd->uuid)) + "0";
}
offset += current_cmd->cmdsize;
}
return std::string();
}
// Returns the size of the _TEXT segment of the module loaded at |module_addr|.
size_t GetModuleTextSize(const void* module_addr) {
const mach_header_64* mach_header =
reinterpret_cast<const mach_header_64*>(module_addr);
DCHECK_EQ(MH_MAGIC_64, mach_header->magic);
unsigned long module_size;
getsegmentdata(mach_header, SEG_TEXT, &module_size);
return module_size;
}
} // namespace
class MacModule : public ModuleCache::Module {
public:
MacModule(const Dl_info& dl_info)
: base_address_(reinterpret_cast<uintptr_t>(dl_info.dli_fbase)),
id_(GetUniqueId(dl_info.dli_fbase)),
debug_basename_(FilePath(dl_info.dli_fname).BaseName()),
size_(GetModuleTextSize(dl_info.dli_fbase)) {}
MacModule(const MacModule&) = delete;
MacModule& operator=(const MacModule&) = delete;
// ModuleCache::Module
uintptr_t GetBaseAddress() const override { return base_address_; }
std::string GetId() const override { return id_; }
FilePath GetDebugBasename() const override { return debug_basename_; }
size_t GetSize() const override { return size_; }
bool IsNative() const override { return true; }
private:
uintptr_t base_address_;
std::string id_;
FilePath debug_basename_;
size_t size_;
};
// static
std::unique_ptr<const ModuleCache::Module> ModuleCache::CreateModuleForAddress(
uintptr_t address) {
Dl_info info;
if (!dladdr(reinterpret_cast<const void*>(address), &info))
return nullptr;
return std::make_unique<MacModule>(info);
}
} // namespace base

View file

@ -0,0 +1,103 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/module_cache.h"
#include <dlfcn.h>
#include <elf.h>
#include "base/debug/elf_reader.h"
#include "build/build_config.h"
namespace base {
namespace {
// Returns the unique build ID for a module loaded at |module_addr|. Returns the
// empty string if the function fails to get the build ID.
//
// Build IDs follow a cross-platform format consisting of several fields
// concatenated together:
// - the module's unique ID, and
// - the age suffix for incremental builds.
//
// On POSIX, the unique ID is read from the ELF binary located at |module_addr|.
// The age field is always 0.
std::string GetUniqueBuildId(const void* module_addr) {
base::debug::ElfBuildIdBuffer build_id;
size_t build_id_length =
base::debug::ReadElfBuildId(module_addr, true, build_id);
if (!build_id_length)
return std::string();
// Append 0 for the age value.
return std::string(build_id, build_id_length) + "0";
}
// Returns the offset from |module_addr| to the first byte following the last
// executable segment from the ELF file mapped at |module_addr|.
// It's defined this way so that any executable address from this module is in
// range [addr, addr + GetLastExecutableOffset(addr)).
// If no executable segment is found, returns 0.
size_t GetLastExecutableOffset(const void* module_addr) {
size_t max_offset = 0;
for (const Phdr& header : base::debug::GetElfProgramHeaders(module_addr)) {
if (header.p_type != PT_LOAD || !(header.p_flags & PF_X))
continue;
max_offset = std::max(max_offset,
static_cast<size_t>(header.p_vaddr + header.p_memsz));
}
return max_offset;
}
class PosixModule : public ModuleCache::Module {
public:
PosixModule(const Dl_info& dl_info);
PosixModule(const PosixModule&) = delete;
PosixModule& operator=(const PosixModule&) = delete;
// ModuleCache::Module
uintptr_t GetBaseAddress() const override { return base_address_; }
std::string GetId() const override { return id_; }
FilePath GetDebugBasename() const override { return debug_basename_; }
size_t GetSize() const override { return size_; }
bool IsNative() const override { return true; }
private:
uintptr_t base_address_;
std::string id_;
FilePath debug_basename_;
size_t size_;
};
PosixModule::PosixModule(const Dl_info& dl_info)
: base_address_(reinterpret_cast<uintptr_t>(dl_info.dli_fbase)),
id_(GetUniqueBuildId(dl_info.dli_fbase)),
debug_basename_(FilePath(dl_info.dli_fname).BaseName()),
size_(GetLastExecutableOffset(dl_info.dli_fbase)) {}
} // namespace
// static
std::unique_ptr<const ModuleCache::Module> ModuleCache::CreateModuleForAddress(
uintptr_t address) {
#if defined(ARCH_CPU_ARM64)
// arm64 has execute-only memory (XOM) protecting code pages from being read.
// PosixModule reads executable pages in order to extract module info. This
// may result in a crash if the module is mapped as XOM
// (https://crbug.com/957801).
return nullptr;
#else
Dl_info info;
if (!dladdr(reinterpret_cast<const void*>(address), &info))
return nullptr;
return std::make_unique<PosixModule>(info);
#endif
}
} // namespace base

View file

@ -0,0 +1,15 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/module_cache.h"
namespace base {
// static
std::unique_ptr<const ModuleCache::Module> ModuleCache::CreateModuleForAddress(
uintptr_t address) {
return nullptr;
}
} // namespace base

View file

@ -0,0 +1,151 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/module_cache.h"
#include <objbase.h>
#include <psapi.h>
#include "base/process/process_handle.h"
#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/win/pe_image.h"
#include "base/win/scoped_handle.h"
#include "base/win/win_util.h"
namespace base {
namespace {
// Gets the unique build ID and the corresponding debug path for a module.
// Windows build IDs are created by a concatenation of a GUID and AGE fields
// found in the headers of a module. The GUID is stored in the first 16 bytes
// and the AGE is stored in the last 4 bytes. Returns the empty string if the
// function fails to get the build ID. The debug path (pdb file) can be found
// in the PE file and is the build time path where the debug file was produced.
//
// Example:
// dumpbin chrome.exe /headers | find "Format:"
// ... Format: RSDS, {16B2A428-1DED-442E-9A36-FCE8CBD29726}, 10, ...
//
// The resulting buildID string of this instance of chrome.exe is
// "16B2A4281DED442E9A36FCE8CBD2972610".
//
// Note that the AGE field is encoded in decimal, not hex.
void GetDebugInfoForModule(HMODULE module_handle,
std::string* build_id,
FilePath* pdb_name) {
GUID guid;
DWORD age;
LPCSTR pdb_file = nullptr;
size_t pdb_file_length = 0;
if (!win::PEImage(module_handle)
.GetDebugId(&guid, &age, &pdb_file, &pdb_file_length)) {
return;
}
FilePath::StringType pdb_filename;
if (!UTF8ToWide(pdb_file, pdb_file_length, &pdb_filename))
return;
*pdb_name = FilePath(std::move(pdb_filename)).BaseName();
auto buffer = win::String16FromGUID(guid);
RemoveChars(buffer, STRING16_LITERAL("{}-"), &buffer);
buffer.append(NumberToString16(age));
*build_id = UTF16ToUTF8(buffer);
}
// Traits class to adapt GenericScopedHandle for HMODULES.
class ModuleHandleTraits : public win::HandleTraits {
public:
using Handle = HMODULE;
static bool CloseHandle(HMODULE handle) { return ::FreeLibrary(handle) != 0; }
static bool IsHandleValid(HMODULE handle) { return handle != nullptr; }
static HMODULE NullHandle() { return nullptr; }
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ModuleHandleTraits);
};
// HMODULE is not really a handle, and has reference count semantics, so the
// standard VerifierTraits does not apply.
using ScopedModuleHandle =
win::GenericScopedHandle<ModuleHandleTraits, win::DummyVerifierTraits>;
class WindowsModule : public ModuleCache::Module {
public:
WindowsModule(ScopedModuleHandle module_handle,
const MODULEINFO module_info,
const std::string& id,
const FilePath& debug_basename)
: module_handle_(std::move(module_handle)),
module_info_(module_info),
id_(id),
debug_basename_(debug_basename) {}
WindowsModule(const WindowsModule&) = delete;
WindowsModule& operator=(const WindowsModule&) = delete;
// ModuleCache::Module
uintptr_t GetBaseAddress() const override {
return reinterpret_cast<uintptr_t>(module_info_.lpBaseOfDll);
}
std::string GetId() const override { return id_; }
FilePath GetDebugBasename() const override { return debug_basename_; }
size_t GetSize() const override { return module_info_.SizeOfImage; }
bool IsNative() const override { return true; }
private:
ScopedModuleHandle module_handle_;
const MODULEINFO module_info_;
std::string id_;
FilePath debug_basename_;
};
ScopedModuleHandle GetModuleHandleForAddress(DWORD64 address) {
HMODULE module_handle = nullptr;
// GetModuleHandleEx() increments the module reference count, which is then
// managed and ultimately decremented by ScopedModuleHandle.
if (!::GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS,
reinterpret_cast<LPCTSTR>(address),
&module_handle)) {
const DWORD error = ::GetLastError();
DCHECK_EQ(ERROR_MOD_NOT_FOUND, static_cast<int>(error));
}
return ScopedModuleHandle(module_handle);
}
std::unique_ptr<ModuleCache::Module> CreateModuleForHandle(
ScopedModuleHandle module_handle) {
FilePath pdb_name;
std::string build_id;
GetDebugInfoForModule(module_handle.Get(), &build_id, &pdb_name);
MODULEINFO module_info;
if (!::GetModuleInformation(GetCurrentProcessHandle(), module_handle.Get(),
&module_info, sizeof(module_info))) {
return nullptr;
}
return std::make_unique<WindowsModule>(std::move(module_handle), module_info,
build_id, pdb_name);
}
} // namespace
// static
std::unique_ptr<const ModuleCache::Module> ModuleCache::CreateModuleForAddress(
uintptr_t address) {
ScopedModuleHandle module_handle = GetModuleHandleForAddress(address);
if (!module_handle.IsValid())
return nullptr;
return CreateModuleForHandle(std::move(module_handle));
}
} // namespace base

View file

@ -0,0 +1,20 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_NATIVE_UNWINDER_H_
#define BASE_PROFILER_NATIVE_UNWINDER_H_
#include <memory>
namespace base {
class ModuleCache;
class Unwinder;
// Creates the native unwinder for the platform.
std::unique_ptr<Unwinder> CreateNativeUnwinder(ModuleCache* module_cache);
} // namespace base
#endif // BASE_PROFILER_NATIVE_UNWINDER_H_

View file

@ -0,0 +1,28 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/native_unwinder_android.h"
#include "base/profiler/module_cache.h"
#include "base/profiler/native_unwinder.h"
#include "base/profiler/profile_builder.h"
namespace base {
bool NativeUnwinderAndroid::CanUnwindFrom(const Frame& current_frame) const {
return false;
}
UnwindResult NativeUnwinderAndroid::TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const {
return UnwindResult::ABORTED;
}
std::unique_ptr<Unwinder> CreateNativeUnwinder(ModuleCache* module_cache) {
return std::make_unique<NativeUnwinderAndroid>();
}
} // namespace base

View file

@ -0,0 +1,33 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_NATIVE_UNWINDER_ANDROID_H_
#define BASE_PROFILER_NATIVE_UNWINDER_ANDROID_H_
#include "base/profiler/unwinder.h"
namespace base {
// Native unwinder implementation for Android, using libunwindstack.
//
// TODO(charliea): Implement this class.
// See: https://crbug.com/989102
class NativeUnwinderAndroid : public Unwinder {
public:
NativeUnwinderAndroid() = default;
NativeUnwinderAndroid(const NativeUnwinderAndroid&) = delete;
NativeUnwinderAndroid& operator=(const NativeUnwinderAndroid&) = delete;
// Unwinder
bool CanUnwindFrom(const Frame& current_frame) const override;
UnwindResult TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const override;
};
} // namespace base
#endif // BASE_PROFILER_NATIVE_UNWINDER_ANDROID_H_

View file

@ -0,0 +1,336 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/native_unwinder_mac.h"
#include <mach-o/compact_unwind_encoding.h>
#include <mach/mach.h>
#include <mach/vm_map.h>
#include <sys/ptrace.h>
#include "base/logging.h"
#include "base/profiler/module_cache.h"
#include "base/profiler/native_unwinder.h"
#include "base/profiler/profile_builder.h"
extern "C" {
void _sigtramp(int, int, struct sigset*);
}
namespace base {
namespace {
// Extracts the "frame offset" for a given frame from the compact unwind info.
// A frame offset indicates the location of saved non-volatile registers in
// relation to the frame pointer. See |mach-o/compact_unwind_encoding.h| for
// details.
uint32_t GetFrameOffset(int compact_unwind_info) {
// The frame offset lives in bytes 16-23. This shifts it down by the number of
// leading zeroes in the mask, then masks with (1 << number of one bits in the
// mask) - 1, turning 0x00FF0000 into 0x000000FF. Adapted from |EXTRACT_BITS|
// in libunwind's CompactUnwinder.hpp.
return (
(compact_unwind_info >> __builtin_ctz(UNWIND_X86_64_RBP_FRAME_OFFSET)) &
(((1 << __builtin_popcount(UNWIND_X86_64_RBP_FRAME_OFFSET))) - 1));
}
// True if the unwind from |leaf_frame_module| may trigger a crash bug in
// unw_init_local. If so, the stack walk should be aborted at the leaf frame.
bool MayTriggerUnwInitLocalCrash(const ModuleCache::Module* leaf_frame_module) {
// The issue here is a bug in unw_init_local that, in some unwinds, results in
// attempts to access memory at the address immediately following the address
// range of the library. When the library is the last of the mapped libraries
// that address is in a different memory region. Starting with 10.13.4 beta
// releases it appears that this region is sometimes either unmapped or mapped
// without read access, resulting in crashes on the attempted access. It's not
// clear what circumstances result in this situation; attempts to reproduce on
// a 10.13.4 beta did not trigger the issue.
//
// The workaround is to check if the memory address that would be accessed is
// readable, and if not, abort the stack walk before calling unw_init_local.
// As of 2018/03/19 about 0.1% of non-idle stacks on the UI and GPU main
// threads have a leaf frame in the last library. Since the issue appears to
// only occur some of the time it's expected that the quantity of lost samples
// will be lower than 0.1%, possibly significantly lower.
//
// TODO(lgrey): Add references above to LLVM/Radar bugs on unw_init_local once
// filed.
uint64_t unused;
vm_size_t size = sizeof(unused);
return vm_read_overwrite(
current_task(),
leaf_frame_module->GetBaseAddress() + leaf_frame_module->GetSize(),
sizeof(unused), reinterpret_cast<vm_address_t>(&unused),
&size) != 0;
}
// Check if the cursor contains a valid-looking frame pointer for frame pointer
// unwinds. If the stack frame has a frame pointer, stepping the cursor will
// involve indexing memory access off of that pointer. In that case,
// sanity-check the frame pointer register to ensure it's within bounds.
//
// Additionally, the stack frame might be in a prologue or epilogue, which can
// cause a crash when the unwinder attempts to access non-volatile registers
// that have not yet been pushed, or have already been popped from the
// stack. libwunwind will try to restore those registers using an offset from
// the frame pointer. However, since we copy the stack from RSP up, any
// locations below the stack pointer are before the beginning of the stack
// buffer. Account for this by checking that the expected location is above the
// stack pointer, and rejecting the sample if it isn't.
bool HasValidRbp(unw_cursor_t* unwind_cursor, uintptr_t stack_top) {
unw_proc_info_t proc_info;
unw_get_proc_info(unwind_cursor, &proc_info);
if ((proc_info.format & UNWIND_X86_64_MODE_MASK) ==
UNWIND_X86_64_MODE_RBP_FRAME) {
unw_word_t rsp, rbp;
unw_get_reg(unwind_cursor, UNW_X86_64_RSP, &rsp);
unw_get_reg(unwind_cursor, UNW_X86_64_RBP, &rbp);
uint32_t offset = GetFrameOffset(proc_info.format) * sizeof(unw_word_t);
if (rbp < offset || (rbp - offset) < rsp || rbp > stack_top)
return false;
}
return true;
}
const ModuleCache::Module* GetLibSystemKernelModule(ModuleCache* module_cache) {
const ModuleCache::Module* module =
module_cache->GetModuleForAddress(reinterpret_cast<uintptr_t>(ptrace));
DCHECK(module);
DCHECK_EQ(FilePath("libsystem_kernel.dylib"), module->GetDebugBasename());
return module;
}
void GetSigtrampRange(uintptr_t* start, uintptr_t* end) {
auto address = reinterpret_cast<uintptr_t>(&_sigtramp);
DCHECK(address != 0);
*start = address;
unw_context_t context;
unw_cursor_t cursor;
unw_proc_info_t info;
unw_getcontext(&context);
// Set the context's RIP to the beginning of sigtramp,
// +1 byte to work around a bug in 10.11 (crbug.com/764468).
context.data[16] = address + 1;
unw_init_local(&cursor, &context);
unw_get_proc_info(&cursor, &info);
DCHECK_EQ(info.start_ip, address);
*end = info.end_ip;
}
} // namespace
NativeUnwinderMac::NativeUnwinderMac(ModuleCache* module_cache)
: libsystem_kernel_module_(GetLibSystemKernelModule(module_cache)) {
GetSigtrampRange(&sigtramp_start_, &sigtramp_end_);
}
bool NativeUnwinderMac::CanUnwindFrom(const Frame& current_frame) const {
return current_frame.module && current_frame.module->IsNative();
}
UnwindResult NativeUnwinderMac::TryUnwind(x86_thread_state64_t* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const {
// We expect the frame correponding to the |thread_context| register state to
// exist within |stack|.
DCHECK_GT(stack->size(), 0u);
// There isn't an official way to create a unw_context other than to create it
// from the current state of the current thread's stack. Since we're walking a
// different thread's stack we must forge a context. The unw_context is just a
// copy of the 16 main registers followed by the instruction pointer, nothing
// more. Coincidentally, the first 17 items of the x86_thread_state64_t type
// are exactly those registers in exactly the same order, so just bulk copy
// them over.
unw_context_t unwind_context;
memcpy(&unwind_context, thread_context, sizeof(uintptr_t) * 17);
// Avoid an out-of-bounds read bug in libunwind that can crash us in some
// circumstances. If we're subject to that case, just record the first frame
// and bail. See MayTriggerUnwInitLocalCrash for details.
if (stack->back().module && MayTriggerUnwInitLocalCrash(stack->back().module))
return UnwindResult::ABORTED;
unw_cursor_t unwind_cursor;
unw_init_local(&unwind_cursor, &unwind_context);
for (;;) {
Optional<UnwindResult> result =
CheckPreconditions(&stack->back(), &unwind_cursor, stack_top);
if (result.has_value())
return *result;
unw_word_t prev_rsp;
unw_get_reg(&unwind_cursor, UNW_REG_SP, &prev_rsp);
int step_result = UnwindStep(&unwind_context, &unwind_cursor,
stack->size() == 1, module_cache);
unw_word_t rip;
unw_get_reg(&unwind_cursor, UNW_REG_IP, &rip);
unw_word_t rsp;
unw_get_reg(&unwind_cursor, UNW_REG_SP, &rsp);
bool successfully_unwound;
result = CheckPostconditions(step_result, prev_rsp, rsp, stack_top,
&successfully_unwound);
if (successfully_unwound) {
stack->emplace_back(rip, module_cache->GetModuleForAddress(rip));
// Save the relevant register state back into the thread context.
unw_word_t rbp;
unw_get_reg(&unwind_cursor, UNW_X86_64_RBP, &rbp);
thread_context->__rip = rip;
thread_context->__rsp = rsp;
thread_context->__rbp = rbp;
}
if (result.has_value())
return *result;
}
NOTREACHED();
return UnwindResult::COMPLETED;
}
// Checks preconditions for attempting an unwind. If any conditions fail,
// returns corresponding UnwindResult. Otherwise returns nullopt.
Optional<UnwindResult> NativeUnwinderMac::CheckPreconditions(
const Frame* current_frame,
unw_cursor_t* unwind_cursor,
uintptr_t stack_top) const {
if (!current_frame->module) {
// There's no loaded module containing the instruction pointer. This is
// due to either executing code that is not in a module (e.g. V8
// runtime-generated code), or to a previous bad unwind.
//
// The bad unwind scenario can occur in frameless (non-DWARF) unwinding,
// which works by fetching the function's stack size from the unwind
// encoding or stack, and adding it to the stack pointer to determine the
// function's return address.
//
// If we're in a function prologue or epilogue, the actual stack size may
// be smaller than it will be during the normal course of execution. When
// libunwind adds the expected stack size, it will look for the return
// address in the wrong place. This check ensures we don't continue trying
// to unwind using the resulting bad IP value.
return UnwindResult::ABORTED;
}
if (!current_frame->module->IsNative()) {
// This is a non-native module associated with the auxiliary unwinder
// (e.g. corresponding to a frame in V8 generated code). Report as
// UNRECOGNIZED_FRAME to allow that unwinder to unwind the frame.
return UnwindResult::UNRECOGNIZED_FRAME;
}
// Don't continue if we're in sigtramp. Unwinding this from another thread
// is very fragile. It's a complex DWARF unwind that needs to restore the
// entire thread context which was saved by the kernel when the interrupt
// occurred.
if (current_frame->instruction_pointer >= sigtramp_start_ &&
current_frame->instruction_pointer < sigtramp_end_) {
return UnwindResult::ABORTED;
}
// Don't continue if rbp appears to be invalid (due to a previous bad
// unwind).
if (!HasValidRbp(unwind_cursor, stack_top))
return UnwindResult::ABORTED;
return nullopt;
}
// Attempts to unwind the current frame using unw_step, and returns its return
// value.
int NativeUnwinderMac::UnwindStep(unw_context_t* unwind_context,
unw_cursor_t* unwind_cursor,
bool at_first_frame,
ModuleCache* module_cache) const {
int step_result = unw_step(unwind_cursor);
if (step_result == 0 && at_first_frame) {
// libunwind is designed to be triggered by user code on their own thread,
// if it hits a library that has no unwind info for the function that is
// being executed, it just stops. This isn't a problem in the normal case,
// but in the case where this is the first frame unwind, it's quite
// possible that the stack being walked is stopped in a function that
// bridges to the kernel and thus is missing the unwind info.
// For now, just unwind the single case where the thread is stopped in a
// function in libsystem_kernel.
uint64_t& rsp = unwind_context->data[7];
uint64_t& rip = unwind_context->data[16];
if (module_cache->GetModuleForAddress(rip) == libsystem_kernel_module_) {
rip = *reinterpret_cast<uint64_t*>(rsp);
rsp += 8;
// Reset the cursor.
unw_init_local(unwind_cursor, unwind_context);
// Mock a successful step_result.
return 1;
}
}
return step_result;
}
// Checks postconditions after attempting an unwind. If any conditions fail,
// returns corresponding UnwindResult. Otherwise returns nullopt. Sets
// *|successfully_unwound| if the unwind succeeded (and hence the frame should
// be recorded).
Optional<UnwindResult> NativeUnwinderMac::CheckPostconditions(
int step_result,
unw_word_t prev_rsp,
unw_word_t rsp,
uintptr_t stack_top,
bool* successfully_unwound) const {
const bool stack_pointer_was_moved_and_is_valid =
rsp > prev_rsp && rsp < stack_top;
*successfully_unwound =
step_result > 0 ||
// libunwind considers the unwind complete and returns 0 if no unwind
// info was found for the current instruction pointer. It performs this
// check both before *and* after stepping the cursor. In the former case
// no action is taken, but in the latter case an unwind was successfully
// performed prior to the check. Distinguish these cases by checking
// whether the stack pointer was moved by unw_step. If so, record the
// new frame to enable non-native unwinders to continue the unwinding.
(step_result == 0 && stack_pointer_was_moved_and_is_valid);
if (step_result < 0)
return UnwindResult::ABORTED;
// libunwind returns 0 if it can't continue because no unwind info was found
// for the current instruction pointer. This could be due to unwinding past
// the entry point, in which case the unwind would be complete. It could
// also be due to unwinding to a function that simply doesn't have unwind
// info, in which case the unwind should be aborted. Or it could be due to
// unwinding to code not in a module, in which case the unwind might be
// continuable by a non-native unwinder. We don't have a good way to
// distinguish these cases, so return UNRECOGNIZED_FRAME to at least
// signify that we couldn't unwind further.
if (step_result == 0)
return UnwindResult::UNRECOGNIZED_FRAME;
// If we succeeded but didn't advance the stack pointer, or got an invalid
// new stack pointer, abort.
if (!stack_pointer_was_moved_and_is_valid)
return UnwindResult::ABORTED;
return nullopt;
}
std::unique_ptr<Unwinder> CreateNativeUnwinder(ModuleCache* module_cache) {
return std::make_unique<NativeUnwinderMac>(module_cache);
}
} // namespace base

View file

@ -0,0 +1,58 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_NATIVE_UNWINDER_MAC_H_
#define BASE_PROFILER_NATIVE_UNWINDER_MAC_H_
#include <libunwind.h>
#include "base/macros.h"
#include "base/optional.h"
#include "base/profiler/unwinder.h"
namespace base {
// Native unwinder implementation for Mac, using libunwind.
class NativeUnwinderMac : public Unwinder {
public:
NativeUnwinderMac(ModuleCache* module_cache);
NativeUnwinderMac(const NativeUnwinderMac&) = delete;
NativeUnwinderMac& operator=(const NativeUnwinderMac&) = delete;
// Unwinder:
bool CanUnwindFrom(const Frame& current_frame) const override;
UnwindResult TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const override;
private:
Optional<UnwindResult> CheckPreconditions(const Frame* current_frame,
unw_cursor_t* unwind_cursor,
uintptr_t stack_top) const;
// Returns the result from unw_step.
int UnwindStep(unw_context_t* unwind_context,
unw_cursor_t* cursor,
bool at_first_frame,
ModuleCache* module_cache) const;
Optional<UnwindResult> CheckPostconditions(int step_result,
unw_word_t prev_rsp,
unw_word_t rsp,
uintptr_t stack_top,
bool* should_record_frame) const;
// Cached pointer to the libsystem_kernel module.
const ModuleCache::Module* const libsystem_kernel_module_;
// The address range of |_sigtramp|, the signal trampoline function.
uintptr_t sigtramp_start_;
uintptr_t sigtramp_end_;
};
} // namespace base
#endif // BASE_PROFILER_NATIVE_UNWINDER_MAC_H_

View file

@ -0,0 +1,98 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/native_unwinder_win.h"
#include <winnt.h>
#include "base/profiler/native_unwinder.h"
#include "base/profiler/win32_stack_frame_unwinder.h"
namespace base {
bool NativeUnwinderWin::CanUnwindFrom(const Frame& current_frame) const {
return current_frame.module && current_frame.module->IsNative();
}
// Attempts to unwind the frame represented by the context values. If
// successful appends frames onto the stack and returns true. Otherwise
// returns false.
UnwindResult NativeUnwinderWin::TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const {
// We expect the frame correponding to the |thread_context| register state to
// exist within |stack|.
DCHECK_GT(stack->size(), 0u);
Win32StackFrameUnwinder frame_unwinder;
for (;;) {
if (!stack->back().module) {
// There's no loaded module corresponding to the current frame. This can
// be due to executing code not in a module (e.g. runtime-generated code
// associated with third-party injected DLLs) or the module having been
// unloaded since we recorded the stack. In the latter case the function
// unwind information was part of the unloaded module, so it's not
// possible to unwind further.
//
// NB: if a module was found it's still theoretically possible for the
// detected module module to be different than the one that was loaded
// when the stack was copied, if the module was unloaded and a different
// module loaded in overlapping memory. This likely would cause a crash
// but has not been observed in practice.
return UnwindResult::ABORTED;
}
if (!stack->back().module->IsNative()) {
// This is a non-native module associated with the auxiliary unwinder
// (e.g. corresponding to a frame in V8 generated code). Report as
// UNRECOGNIZED_FRAME to allow that unwinder to unwind the frame.
return UnwindResult::UNRECOGNIZED_FRAME;
}
uintptr_t prev_stack_pointer = RegisterContextStackPointer(thread_context);
if (!frame_unwinder.TryUnwind(stack->size() == 1u, thread_context,
stack->back().module)) {
return UnwindResult::ABORTED;
}
if (ContextPC(thread_context) == 0)
return UnwindResult::COMPLETED;
// Exclusive range of expected stack pointer values after the unwind.
struct {
uintptr_t start;
uintptr_t end;
} expected_stack_pointer_range = {prev_stack_pointer, stack_top};
// Abort if the unwind produced an invalid stack pointer.
#if defined(ARCH_CPU_ARM64)
// Leaf frames on Arm can re-use the stack pointer, so they can validly have
// the same stack pointer as the previous frame.
if (stack->size() == 1u) {
expected_stack_pointer_range.start--;
}
#endif
if (RegisterContextStackPointer(thread_context) <=
expected_stack_pointer_range.start ||
RegisterContextStackPointer(thread_context) >=
expected_stack_pointer_range.end) {
return UnwindResult::ABORTED;
}
// Record the frame to which we just unwound.
stack->emplace_back(
ContextPC(thread_context),
module_cache->GetModuleForAddress(ContextPC(thread_context)));
}
NOTREACHED();
return UnwindResult::COMPLETED;
}
std::unique_ptr<Unwinder> CreateNativeUnwinder(ModuleCache* module_cache) {
return std::make_unique<NativeUnwinderWin>();
}
} // namespace base

View file

@ -0,0 +1,31 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_NATIVE_UNWINDER_WIN_H_
#define BASE_PROFILER_NATIVE_UNWINDER_WIN_H_
#include "base/macros.h"
#include "base/profiler/unwinder.h"
namespace base {
// Native unwinder implementation for Windows, using RtlVirtualUnwind.
class NativeUnwinderWin : public Unwinder {
public:
NativeUnwinderWin() = default;
NativeUnwinderWin(const NativeUnwinderWin&) = delete;
NativeUnwinderWin& operator=(const NativeUnwinderWin&) = delete;
// Unwinder:
bool CanUnwindFrom(const Frame& current_frame) const override;
UnwindResult TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const override;
};
} // namespace base
#endif // BASE_PROFILER_NATIVE_UNWINDER_WIN_H_

View file

@ -0,0 +1,23 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/profile_builder.h"
namespace base {
const size_t ProfileBuilder::MAX_METADATA_COUNT;
ProfileBuilder::MetadataItem::MetadataItem(uint64_t name_hash,
Optional<int64_t> key,
int64_t value)
: name_hash(name_hash), key(key), value(value) {}
ProfileBuilder::MetadataItem::MetadataItem() : name_hash(0), value(0) {}
ProfileBuilder::MetadataItem::MetadataItem(const MetadataItem& other) = default;
ProfileBuilder::MetadataItem& ProfileBuilder::MetadataItem::MetadataItem::
operator=(const MetadataItem& other) = default;
} // namespace base

View file

@ -0,0 +1,89 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_PROFILE_BUILDER_H_
#define BASE_PROFILER_PROFILE_BUILDER_H_
#include <memory>
#include "base/base_export.h"
#include "base/optional.h"
#include "base/profiler/frame.h"
#include "base/profiler/module_cache.h"
#include "base/time/time.h"
namespace base {
// The ProfileBuilder interface allows the user to record profile information on
// the fly in whatever format is desired. Functions are invoked by the profiler
// on its own thread so must not block or perform expensive operations.
class BASE_EXPORT ProfileBuilder {
public:
ProfileBuilder() = default;
virtual ~ProfileBuilder() = default;
// Gets the ModuleCache to be used by the StackSamplingProfiler when looking
// up modules from addresses.
virtual ModuleCache* GetModuleCache() = 0;
struct BASE_EXPORT MetadataItem {
MetadataItem(uint64_t name_hash, Optional<int64_t> key, int64_t value);
MetadataItem();
MetadataItem(const MetadataItem& other);
MetadataItem& operator=(const MetadataItem& other);
// The hash of the metadata name, as produced by HashMetricName().
uint64_t name_hash;
// The key if specified when setting the item.
Optional<int64_t> key;
// The value of the metadata item.
int64_t value;
};
static constexpr size_t MAX_METADATA_COUNT = 50;
typedef std::array<MetadataItem, MAX_METADATA_COUNT> MetadataItemArray;
class MetadataProvider {
public:
MetadataProvider() = default;
virtual ~MetadataProvider() = default;
virtual size_t GetItems(ProfileBuilder::MetadataItemArray* const items) = 0;
};
// Records metadata to be associated with the current sample. To avoid
// deadlock on locks taken by the suspended profiled thread, implementations
// of this method must not execute any code that could take a lock, including
// heap allocation or use of CHECK/DCHECK/LOG statements. Generally
// implementations should simply atomically copy metadata state to be
// associated with the sample.
virtual void RecordMetadata(MetadataProvider* metadata_provider) {}
// Applies the specified metadata |item| to samples collected in the range
// [period_start, period_end), iff the profile already captured execution that
// covers that range entirely. This restriction avoids bias in the results
// towards samples in the middle of the period, at the expense of excluding
// periods overlapping the start or end of the profile. |period_end| must be
// <= TimeTicks::Now().
virtual void ApplyMetadataRetrospectively(TimeTicks period_start,
TimeTicks period_end,
const MetadataItem& item) {}
// Records a new set of frames. Invoked when sampling a sample completes.
virtual void OnSampleCompleted(std::vector<Frame> frames,
TimeTicks sample_timestamp) = 0;
// Finishes the profile construction with |profile_duration| and
// |sampling_period|. Invoked when sampling a profile completes.
virtual void OnProfileCompleted(TimeDelta profile_duration,
TimeDelta sampling_period) = 0;
private:
DISALLOW_COPY_AND_ASSIGN(ProfileBuilder);
};
} // namespace base
#endif // BASE_PROFILER_PROFILE_BUILDER_H_

View file

@ -0,0 +1,194 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// This file provides the RegisterContext cross-platform typedef that represents
// the native register context for the platform, plus functions that provide
// access to key registers in the context.
#ifndef BASE_PROFILER_REGISTER_CONTEXT_H_
#define BASE_PROFILER_REGISTER_CONTEXT_H_
#include <cstdint>
#include "build/build_config.h"
#if defined(OS_WIN)
#include <windows.h>
#elif defined(OS_MACOSX)
#include <mach/machine/thread_status.h>
#elif defined(OS_ANDROID) || defined(OS_LINUX)
#include <sys/ucontext.h>
#endif
namespace base {
// Helper function to account for the fact that platform-specific register state
// types may be of the same size as uintptr_t, but not of the same type or
// signedness -- e.g. unsigned int vs. unsigned long on 32-bit Windows, unsigned
// long vs. unsigned long long on Mac, long long vs. unsigned long long on
// Linux.
template <typename T>
uintptr_t& AsUintPtr(T* value) {
static_assert(sizeof(T) == sizeof(uintptr_t),
"register state type must be of equivalent size to uintptr_t");
return *reinterpret_cast<uintptr_t*>(value);
}
#if defined(OS_WIN)
using RegisterContext = ::CONTEXT;
inline uintptr_t& RegisterContextStackPointer(::CONTEXT* context) {
#if defined(ARCH_CPU_X86_64)
return context->Rsp;
#elif defined(ARCH_CPU_ARM64)
return context->Sp;
#else
return AsUintPtr(&context->Esp);
#endif
}
inline uintptr_t& RegisterContextFramePointer(::CONTEXT* context) {
#if defined(ARCH_CPU_X86_64)
return context->Rbp;
#elif defined(ARCH_CPU_ARM64)
return context->Fp;
#else
return AsUintPtr(&context->Ebp);
#endif
}
inline uintptr_t& RegisterContextInstructionPointer(::CONTEXT* context) {
#if defined(ARCH_CPU_X86_64)
return context->Rip;
#elif defined(ARCH_CPU_ARM64)
return context->Pc;
#else
return AsUintPtr(&context->Eip);
#endif
}
#elif defined(OS_MACOSX) && !defined(OS_IOS) // #if defined(OS_WIN)
using RegisterContext = x86_thread_state64_t;
inline uintptr_t& RegisterContextStackPointer(x86_thread_state64_t* context) {
return AsUintPtr(&context->__rsp);
}
inline uintptr_t& RegisterContextFramePointer(x86_thread_state64_t* context) {
return AsUintPtr(&context->__rbp);
}
inline uintptr_t& RegisterContextInstructionPointer(
x86_thread_state64_t* context) {
return AsUintPtr(&context->__rip);
}
#elif defined(OS_ANDROID) || defined(OS_LINUX) // #if defined(OS_WIN)
using RegisterContext = mcontext_t;
#if defined(ARCH_CPU_ARM_FAMILY) && defined(ARCH_CPU_32_BITS)
inline uintptr_t& RegisterContextStackPointer(mcontext_t* context) {
return AsUintPtr(&context->arm_sp);
}
inline uintptr_t& RegisterContextFramePointer(mcontext_t* context) {
return AsUintPtr(&context->arm_fp);
}
inline uintptr_t& RegisterContextInstructionPointer(mcontext_t* context) {
return AsUintPtr(&context->arm_ip);
}
#elif defined(ARCH_CPU_ARM_FAMILY) && defined(ARCH_CPU_64_BITS)
inline uintptr_t& RegisterContextStackPointer(mcontext_t* context) {
return AsUintPtr(&context->sp);
}
inline uintptr_t& RegisterContextFramePointer(mcontext_t* context) {
// r29 is the FP register on 64-bit ARM per the Procedure Call Standard,
// section 5.1.1.
return AsUintPtr(&context->regs[29]);
}
inline uintptr_t& RegisterContextInstructionPointer(mcontext_t* context) {
return AsUintPtr(&context->pc);
}
#elif defined(ARCH_CPU_X86_FAMILY) && defined(ARCH_CPU_32_BITS)
inline uintptr_t& RegisterContextStackPointer(mcontext_t* context) {
return AsUintPtr(&context->gregs[REG_ESP]);
}
inline uintptr_t& RegisterContextFramePointer(mcontext_t* context) {
return AsUintPtr(&context->gregs[REG_EBP]);
}
inline uintptr_t& RegisterContextInstructionPointer(mcontext_t* context) {
return AsUintPtr(&context->gregs[REG_EIP]);
}
#elif defined(ARCH_CPU_X86_FAMILY) && defined(ARCH_CPU_64_BITS)
inline uintptr_t& RegisterContextStackPointer(mcontext_t* context) {
return AsUintPtr(&context->gregs[REG_RSP]);
}
inline uintptr_t& RegisterContextFramePointer(mcontext_t* context) {
return AsUintPtr(&context->gregs[REG_RBP]);
}
inline uintptr_t& RegisterContextInstructionPointer(mcontext_t* context) {
return AsUintPtr(&context->gregs[REG_RIP]);
}
#else // #if defined(ARCH_CPU_ARM_FAMILY) && defined(ARCH_CPU_32_BITS)
// Placeholders for other POSIX platforms that just return the first
// three register slots in the context.
inline uintptr_t& RegisterContextStackPointer(mcontext_t* context) {
return *reinterpret_cast<uintptr_t*>(context);
}
inline uintptr_t& RegisterContextFramePointer(mcontext_t* context) {
return *(reinterpret_cast<uintptr_t*>(context) + 1);
}
inline uintptr_t& RegisterContextInstructionPointer(mcontext_t* context) {
return *(reinterpret_cast<uintptr_t*>(context) + 2);
}
#endif // #if defined(ARCH_CPU_ARM_FAMILY) && defined(ARCH_CPU_32_BITS)
#else // #if defined(OS_WIN)
// Placeholders for other platforms.
struct RegisterContext {
uintptr_t stack_pointer;
uintptr_t frame_pointer;
uintptr_t instruction_pointer;
};
inline uintptr_t& RegisterContextStackPointer(RegisterContext* context) {
return context->stack_pointer;
}
inline uintptr_t& RegisterContextFramePointer(RegisterContext* context) {
return context->frame_pointer;
}
inline uintptr_t& RegisterContextInstructionPointer(RegisterContext* context) {
return context->instruction_pointer;
}
#endif // #if defined(OS_WIN)
} // namespace base
#endif // BASE_PROFILER_REGISTER_CONTEXT_H_

View file

@ -0,0 +1,78 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/sample_metadata.h"
#include "base/metrics/metrics_hashes.h"
#include "base/no_destructor.h"
#include "base/profiler/stack_sampling_profiler.h"
namespace base {
ScopedSampleMetadata::ScopedSampleMetadata(StringPiece name, int64_t value)
: name_hash_(HashMetricName(name)) {
GetSampleMetadataRecorder()->Set(name_hash_, nullopt, value);
}
ScopedSampleMetadata::ScopedSampleMetadata(StringPiece name,
int64_t key,
int64_t value)
: name_hash_(HashMetricName(name)), key_(key) {
GetSampleMetadataRecorder()->Set(name_hash_, key, value);
}
ScopedSampleMetadata::~ScopedSampleMetadata() {
GetSampleMetadataRecorder()->Remove(name_hash_, key_);
}
void SetSampleMetadata(StringPiece name, int64_t value) {
GetSampleMetadataRecorder()->Set(HashMetricName(name), nullopt, value);
}
void SetSampleMetadata(StringPiece name, int64_t key, int64_t value) {
GetSampleMetadataRecorder()->Set(HashMetricName(name), key, value);
}
void RemoveSampleMetadata(StringPiece name) {
GetSampleMetadataRecorder()->Remove(HashMetricName(name), nullopt);
}
void RemoveSampleMetadata(StringPiece name, int64_t key) {
GetSampleMetadataRecorder()->Remove(HashMetricName(name), key);
}
// This function is friended by StackSamplingProfiler so must live directly in
// the base namespace.
void ApplyMetadataToPastSamplesImpl(TimeTicks period_start,
TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value) {
StackSamplingProfiler::ApplyMetadataToPastSamples(period_start, period_end,
name_hash, key, value);
}
void ApplyMetadataToPastSamples(TimeTicks period_start,
TimeTicks period_end,
StringPiece name,
int64_t value) {
return ApplyMetadataToPastSamplesImpl(period_start, period_end,
HashMetricName(name), nullopt, value);
}
void ApplyMetadataToPastSamples(TimeTicks period_start,
TimeTicks period_end,
StringPiece name,
int64_t key,
int64_t value) {
return ApplyMetadataToPastSamplesImpl(period_start, period_end,
HashMetricName(name), key, value);
}
MetadataRecorder* GetSampleMetadataRecorder() {
static NoDestructor<MetadataRecorder> instance;
return instance.get();
}
} // namespace base

View file

@ -0,0 +1,135 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_SAMPLE_METADATA_H_
#define BASE_PROFILER_SAMPLE_METADATA_H_
#include "base/optional.h"
#include "base/profiler/metadata_recorder.h"
#include "base/strings/string_piece.h"
// -----------------------------------------------------------------------------
// Usage documentation
// -----------------------------------------------------------------------------
//
// Overview:
// These functions provide a means to control the metadata attached to samples
// collected by the stack sampling profiler. Metadata state is shared between
// all threads within a process.
//
// Any samples collected by the sampling profiler will include the active
// metadata. This enables us to later analyze targeted subsets of samples
// (e.g. those collected during paint or layout).
//
// For example:
//
// void DidStartLoad() {
// base::SetSampleMetadata("Renderer.IsLoading", 1);
// }
//
// void DidFinishLoad() {
// base::RemoveSampleMetadata("Renderer.IsLoading");
// }
//
// Alternatively, ScopedSampleMetadata can be used to ensure that the metadata
// is removed correctly.
//
// For example:
//
// void DoExpensiveWork() {
// base::ScopedSampleMetadata metadata("xyz", 1);
// if (...) {
// ...
// if (...) {
// ...
// return;
// }
// }
// ...
// }
namespace base {
class BASE_EXPORT ScopedSampleMetadata {
public:
// Set the metadata value associated with |name|.
ScopedSampleMetadata(StringPiece name, int64_t value);
// Set the metadata value associated with the pair (|name|, |key|). This
// constructor allows the metadata to be associated with an additional
// user-defined key. One might supply a key based on the frame id, for
// example, to distinguish execution in service of scrolling between different
// frames. Prefer the previous constructor if no user-defined metadata is
// required. Note: values specified for a name and key are stored separately
// from values specified with only a name.
ScopedSampleMetadata(StringPiece name, int64_t key, int64_t value);
ScopedSampleMetadata(const ScopedSampleMetadata&) = delete;
~ScopedSampleMetadata();
ScopedSampleMetadata& operator=(const ScopedSampleMetadata&) = delete;
private:
const uint64_t name_hash_;
Optional<int64_t> key_;
};
// Set the metadata value associated with |name| in the process-global stack
// sampling profiler metadata, overwriting any previous value set for that
// |name|.
BASE_EXPORT void SetSampleMetadata(StringPiece name, int64_t value);
// Set the metadata value associated with the pair (|name|, |key|) in the
// process-global stack sampling profiler metadata, overwriting any previous
// value set for that (|name|, |key|) pair. This constructor allows the metadata
// to be associated with an additional user-defined key. One might supply a key
// based on the frame id, for example, to distinguish execution in service of
// scrolling between different frames. Prefer the previous function if no
// user-defined metadata is required. Note: values specified for a name and key
// are stored separately from values specified with only a name.
BASE_EXPORT void SetSampleMetadata(StringPiece name,
int64_t key,
int64_t value);
// Removes the metadata item with the specified name from the process-global
// stack sampling profiler metadata.
//
// If such an item doesn't exist, this has no effect.
BASE_EXPORT void RemoveSampleMetadata(StringPiece name);
// Removes the metadata item with the specified (|name|, |key|) pair from the
// process-global stack sampling profiler metadata. This function does not alter
// values set with the name |name| but no key.
//
// If such an item doesn't exist, this has no effect.
BASE_EXPORT void RemoveSampleMetadata(StringPiece name, int64_t key);
// Applies the specified metadata to samples already recorded between
// |period_start| and |period_end| in all thread's active profiles, subject to
// the condition that the profile fully encompasses the period and the profile
// has not already completed. The condition ensures that the metadata is applied
// only if all execution during its scope was seen in the profile. This avoids
// biasng the samples towards the 'middle' of the execution seen during the
// metadata scope (i.e. because the start or end of execution was missed), at
// the cost of missing execution that are longer than the profiling period, or
// extend before or after it. |period_end| must be <= TimeTicks::Now().
BASE_EXPORT void ApplyMetadataToPastSamples(TimeTicks period_start,
TimeTicks period_end,
StringPiece name,
int64_t value);
BASE_EXPORT void ApplyMetadataToPastSamples(TimeTicks period_start,
TimeTicks period_end,
StringPiece name,
int64_t key,
int64_t value);
// Returns the process-global metadata recorder instance used for tracking
// sampling profiler metadata.
//
// This function should not be called by non-profiler related code.
BASE_EXPORT MetadataRecorder* GetSampleMetadataRecorder();
} // namespace base
#endif // BASE_PROFILER_SAMPLE_METADATA_H_

View file

@ -0,0 +1,17 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/sampling_profiler_thread_token.h"
namespace base {
SamplingProfilerThreadToken GetSamplingProfilerCurrentThreadToken() {
#if defined(OS_ANDROID) || defined(OS_LINUX)
return {PlatformThread::CurrentId(), pthread_self()};
#else
return {PlatformThread::CurrentId()};
#endif
}
} // namespace base

View file

@ -0,0 +1,33 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_SAMPLING_PROFILER_THREAD_TOKEN_H_
#define BASE_PROFILER_SAMPLING_PROFILER_THREAD_TOKEN_H_
#include "base/base_export.h"
#include "base/threading/platform_thread.h"
#include "build/build_config.h"
#if defined(OS_ANDROID) || defined(OS_LINUX)
#include <pthread.h>
#endif
namespace base {
// SamplingProfilerThreadToken represents the thread identifier(s) required by
// sampling profiler to operate on a thread. PlatformThreadId is needed for all
// platforms, while non-Mac POSIX also requires a pthread_t to pass to pthread
// functions used to obtain the stack base address.
struct SamplingProfilerThreadToken {
PlatformThreadId id;
#if defined(OS_ANDROID) || defined(OS_LINUX)
pthread_t pthread_id;
#endif
};
BASE_EXPORT SamplingProfilerThreadToken GetSamplingProfilerCurrentThreadToken();
} // namespace base
#endif // BASE_PROFILER_SAMPLING_PROFILER_THREAD_TOKEN_H_

View file

@ -0,0 +1,17 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_buffer.h"
namespace base {
constexpr size_t StackBuffer::kPlatformStackAlignment;
StackBuffer::StackBuffer(size_t buffer_size)
: buffer_(new uint8_t[buffer_size + kPlatformStackAlignment - 1]),
size_(buffer_size) {}
StackBuffer::~StackBuffer() = default;
} // namespace base

View file

@ -0,0 +1,59 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_BUFFER_H_
#define BASE_PROFILER_STACK_BUFFER_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "base/base_export.h"
#include "base/macros.h"
namespace base {
// This class contains a buffer for stack copies that can be shared across
// multiple instances of StackSampler.
class BASE_EXPORT StackBuffer {
public:
// The expected alignment of the stack on the current platform. Windows and
// System V AMD64 ABIs on x86, x64, and ARM require the stack to be aligned
// to twice the pointer size. Excepted from this requirement is code setting
// up the stack during function calls (between pushing the return address
// and the end of the function prologue). The profiler will sometimes
// encounter this exceptional case for leaf frames.
static constexpr size_t kPlatformStackAlignment = 2 * sizeof(uintptr_t);
StackBuffer(size_t buffer_size);
~StackBuffer();
// Returns a kPlatformStackAlignment-aligned pointer to the stack buffer.
uintptr_t* buffer() const {
// Return the first address in the buffer aligned to
// kPlatformStackAlignment. The buffer is guaranteed to have enough space
// for size() bytes beyond this value.
return reinterpret_cast<uintptr_t*>(
(reinterpret_cast<uintptr_t>(buffer_.get()) + kPlatformStackAlignment -
1) &
~(kPlatformStackAlignment - 1));
}
// Size in bytes.
size_t size() const { return size_; }
private:
// The buffer to store the stack.
const std::unique_ptr<uint8_t[]> buffer_;
// The size in bytes of the requested buffer allocation. The actual allocation
// is larger to accommodate alignment requirements.
const size_t size_;
DISALLOW_COPY_AND_ASSIGN(StackBuffer);
};
} // namespace base
#endif // BASE_PROFILER_STACK_BUFFER_H_

View file

@ -0,0 +1,74 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_copier.h"
#include "base/compiler_specific.h"
namespace base {
StackCopier::~StackCopier() = default;
// static
uintptr_t StackCopier::RewritePointerIfInOriginalStack(
const uint8_t* original_stack_bottom,
const uintptr_t* original_stack_top,
const uint8_t* stack_copy_bottom,
uintptr_t pointer) {
auto original_stack_bottom_uint =
reinterpret_cast<uintptr_t>(original_stack_bottom);
auto original_stack_top_uint =
reinterpret_cast<uintptr_t>(original_stack_top);
auto stack_copy_bottom_uint = reinterpret_cast<uintptr_t>(stack_copy_bottom);
if (pointer < original_stack_bottom_uint ||
pointer >= original_stack_top_uint)
return pointer;
return stack_copy_bottom_uint + (pointer - original_stack_bottom_uint);
}
// static
NO_SANITIZE("address")
const uint8_t* StackCopier::CopyStackContentsAndRewritePointers(
const uint8_t* original_stack_bottom,
const uintptr_t* original_stack_top,
int platform_stack_alignment,
uintptr_t* stack_buffer_bottom) {
const uint8_t* byte_src = original_stack_bottom;
// The first address in the stack with pointer alignment. Pointer-aligned
// values from this point to the end of the stack are possibly rewritten using
// RewritePointerIfInOriginalStack(). Bytes before this cannot be a pointer
// because they occupy less space than a pointer would.
const uint8_t* first_aligned_address = reinterpret_cast<uint8_t*>(
(reinterpret_cast<uintptr_t>(byte_src) + sizeof(uintptr_t) - 1) &
~(sizeof(uintptr_t) - 1));
// The stack copy bottom, which is offset from |stack_buffer_bottom| by the
// same alignment as in the original stack. This guarantees identical
// alignment between values in the original stack and the copy. This uses the
// platform stack alignment rather than pointer alignment so that the stack
// copy is aligned to platform expectations.
uint8_t* stack_copy_bottom =
reinterpret_cast<uint8_t*>(stack_buffer_bottom) +
(reinterpret_cast<uintptr_t>(byte_src) & (platform_stack_alignment - 1));
uint8_t* byte_dst = stack_copy_bottom;
// Copy bytes verbatim up to the first aligned address.
for (; byte_src < first_aligned_address; ++byte_src, ++byte_dst)
*byte_dst = *byte_src;
// Copy the remaining stack by pointer-sized values, rewriting anything that
// looks like a pointer into the stack.
const uintptr_t* src = reinterpret_cast<const uintptr_t*>(byte_src);
uintptr_t* dst = reinterpret_cast<uintptr_t*>(byte_dst);
for (; src < original_stack_top; ++src, ++dst) {
*dst = RewritePointerIfInOriginalStack(
original_stack_bottom, original_stack_top, stack_copy_bottom, *src);
}
return stack_copy_bottom;
}
} // namespace base

View file

@ -0,0 +1,99 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_COPIER_H_
#define BASE_PROFILER_STACK_COPIER_H_
#include <stdint.h>
#include "base/base_export.h"
#include "base/profiler/register_context.h"
#include "base/time/time.h"
namespace base {
class StackBuffer;
// StackCopier causes a thread to be suspended, copies its stack, and resumes
// the thread's execution. It's intended to provide an abstraction over stack
// copying techniques where the thread suspension is performed directly by the
// profiler thread (Windows and Mac platforms) vs. where the thread suspension
// is performed by the OS through signals (Android).
class BASE_EXPORT StackCopier {
public:
// Interface that may be implemented by the caller of CopyStack() to receive a
// callback when the stack is copied, while the target thread is suspended.
class BASE_EXPORT Delegate {
public:
virtual ~Delegate() {}
// Invoked at the time the stack is copied.
// IMPORTANT NOTE: to avoid deadlock implementations of this interface must
// not invoke any non-reentrant code that is also invoked by the target
// thread. In particular, it may not perform any heap allocation or
// deallocation, including indirectly via use of DCHECK/CHECK or other
// logging statements.
virtual void OnStackCopy() = 0;
// Invoked after the stack has been copied and the target thread resumed.
virtual void OnThreadResume() = 0;
};
virtual ~StackCopier();
// Copies the thread's register context into |thread_context|, the stack into
// |stack_buffer|, and the top of stack address into |stack_top|. Records
// |timestamp| at the time the stack was copied. delegate->OnStackCopy() will
// be invoked while the thread is suspended. Returns true if successful.
virtual bool CopyStack(StackBuffer* stack_buffer,
uintptr_t* stack_top,
TimeTicks* timestamp,
RegisterContext* thread_context,
Delegate* delegate) = 0;
protected:
// If the value at |pointer| points to the original stack, rewrite it to point
// to the corresponding location in the copied stack.
//
// NO HEAP ALLOCATIONS.
static uintptr_t RewritePointerIfInOriginalStack(
const uint8_t* original_stack_bottom,
const uintptr_t* original_stack_top,
const uint8_t* stack_copy_bottom,
uintptr_t pointer);
// Copies the stack to a buffer while rewriting possible pointers to locations
// within the stack to point to the corresponding locations in the copy. This
// is necessary to handle stack frames with dynamic stack allocation, where a
// pointer to the beginning of the dynamic allocation area is stored on the
// stack and/or in a non-volatile register.
//
// Eager rewriting of anything that looks like a pointer to the stack, as done
// in this function, does not adversely affect the stack unwinding. The only
// other values on the stack the unwinding depends on are return addresses,
// which should not point within the stack memory. The rewriting is guaranteed
// to catch all pointers because the stacks are guaranteed by the ABI to be
// sizeof(uintptr_t*) aligned.
//
// |original_stack_bottom| and |original_stack_top| are different pointer
// types due on their differing guaranteed alignments -- the bottom may only
// be 1-byte aligned while the top is aligned to double the pointer width.
//
// Returns a pointer to the bottom address in the copied stack. This value
// matches the alignment of |original_stack_bottom| to ensure that the stack
// contents have the same alignment as in the original stack. As a result the
// value will be different than |stack_buffer_bottom| if
// |original_stack_bottom| is not aligned to double the pointer width.
//
// NO HEAP ALLOCATIONS.
static const uint8_t* CopyStackContentsAndRewritePointers(
const uint8_t* original_stack_bottom,
const uintptr_t* original_stack_top,
int platform_stack_alignment,
uintptr_t* stack_buffer_bottom);
};
} // namespace base
#endif // BASE_PROFILER_STACK_COPIER_H_

View file

@ -0,0 +1,249 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_copier_signal.h"
#include <linux/futex.h>
#include <signal.h>
#include <sys/ucontext.h>
#include <syscall.h>
#include <atomic>
#include "base/profiler/register_context.h"
#include "base/profiler/stack_buffer.h"
#include "base/profiler/suspendable_thread_delegate.h"
#include "base/trace_event/trace_event.h"
#include "build/build_config.h"
namespace base {
namespace {
// Waitable event implementation with futex and without DCHECK(s), since signal
// handlers cannot allocate memory or use pthread api.
class AsyncSafeWaitableEvent {
public:
AsyncSafeWaitableEvent() { futex_.store(0, std::memory_order_release); }
~AsyncSafeWaitableEvent() {}
bool Wait() {
// futex() can wake up spuriously if this memory address was previously used
// for a pthread mutex. So, also check the condition.
while (true) {
int res =
syscall(SYS_futex, futex_int_ptr(), FUTEX_WAIT | FUTEX_PRIVATE_FLAG,
0, nullptr, nullptr, 0);
if (futex_.load(std::memory_order_acquire) != 0)
return true;
if (res != 0)
return false;
}
}
void Signal() {
futex_.store(1, std::memory_order_release);
syscall(SYS_futex, futex_int_ptr(), FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1,
nullptr, nullptr, 0);
}
private:
// Provides a pointer to the atomic's storage. std::atomic_int has standard
// layout so its address can be used for the pointer as long as it only
// contains the int.
int* futex_int_ptr() {
static_assert(sizeof(futex_) == sizeof(int),
"Expected std::atomic_int to be the same size as int");
return reinterpret_cast<int*>(&futex_);
}
std::atomic_int futex_{0};
};
// Scoped signal event that calls Signal on the AsyncSafeWaitableEvent at
// destructor.
class ScopedEventSignaller {
public:
ScopedEventSignaller(AsyncSafeWaitableEvent* event) : event_(event) {}
~ScopedEventSignaller() { event_->Signal(); }
private:
AsyncSafeWaitableEvent* event_;
};
// Struct to store the arguments to the signal handler.
struct HandlerParams {
uintptr_t stack_base_address;
// The event is signalled when signal handler is done executing.
AsyncSafeWaitableEvent* event;
// Return values:
// Successfully copied the stack segment.
bool* success;
// The thread context of the leaf function.
mcontext_t* context;
// Buffer to copy the stack segment.
StackBuffer* stack_buffer;
const uint8_t** stack_copy_bottom;
// The timestamp when the stack was copied.
TimeTicks* timestamp;
// The delegate provided to the StackCopier.
StackCopier::Delegate* stack_copier_delegate;
};
// Pointer to the parameters to be "passed" to the CopyStackSignalHandler() from
// the sampling thread to the sampled (stopped) thread. This value is set just
// before sending the signal to the thread and reset when the handler is done.
std::atomic<HandlerParams*> g_handler_params;
// CopyStackSignalHandler is invoked on the stopped thread and records the
// thread's stack and register context at the time the signal was received. This
// function may only call reentrant code.
void CopyStackSignalHandler(int n, siginfo_t* siginfo, void* sigcontext) {
HandlerParams* params = g_handler_params.load(std::memory_order_acquire);
// TimeTicks::Now() is implemented in terms of clock_gettime on Linux, which
// is signal safe per the signal-safety(7) man page.
*params->timestamp = TimeTicks::Now();
ScopedEventSignaller e(params->event);
*params->success = false;
const ucontext_t* ucontext = static_cast<ucontext_t*>(sigcontext);
memcpy(params->context, &ucontext->uc_mcontext, sizeof(mcontext_t));
const uintptr_t bottom = RegisterContextStackPointer(params->context);
const uintptr_t top = params->stack_base_address;
if ((top - bottom) > params->stack_buffer->size()) {
// The stack exceeds the size of the allocated buffer. The buffer is sized
// such that this shouldn't happen under typical execution so we can safely
// punt in this situation.
return;
}
params->stack_copier_delegate->OnStackCopy();
*params->stack_copy_bottom =
StackCopierSignal::CopyStackContentsAndRewritePointers(
reinterpret_cast<uint8_t*>(bottom), reinterpret_cast<uintptr_t*>(top),
StackBuffer::kPlatformStackAlignment, params->stack_buffer->buffer());
*params->success = true;
}
// Sets the global handler params for the signal handler function.
class ScopedSetSignalHandlerParams {
public:
ScopedSetSignalHandlerParams(HandlerParams* params) {
g_handler_params.store(params, std::memory_order_release);
}
~ScopedSetSignalHandlerParams() {
g_handler_params.store(nullptr, std::memory_order_release);
}
};
class ScopedSigaction {
public:
ScopedSigaction(int signal,
struct sigaction* action,
struct sigaction* original_action)
: signal_(signal),
action_(action),
original_action_(original_action),
succeeded_(sigaction(signal, action, original_action) == 0) {}
bool succeeded() const { return succeeded_; }
~ScopedSigaction() {
if (!succeeded_)
return;
bool reset_succeeded = sigaction(signal_, original_action_, action_) == 0;
DCHECK(reset_succeeded);
}
private:
const int signal_;
struct sigaction* const action_;
struct sigaction* const original_action_;
const bool succeeded_;
};
} // namespace
StackCopierSignal::StackCopierSignal(
std::unique_ptr<ThreadDelegate> thread_delegate)
: thread_delegate_(std::move(thread_delegate)) {}
StackCopierSignal::~StackCopierSignal() = default;
bool StackCopierSignal::CopyStack(StackBuffer* stack_buffer,
uintptr_t* stack_top,
TimeTicks* timestamp,
RegisterContext* thread_context,
Delegate* delegate) {
AsyncSafeWaitableEvent wait_event;
bool copied = false;
const uint8_t* stack_copy_bottom = nullptr;
const uintptr_t stack_base_address = thread_delegate_->GetStackBaseAddress();
HandlerParams params = {stack_base_address, &wait_event, &copied,
thread_context, stack_buffer, &stack_copy_bottom,
timestamp, delegate};
{
ScopedSetSignalHandlerParams scoped_handler_params(&params);
// Set the signal handler for the thread to the stack copy function.
struct sigaction action;
struct sigaction original_action;
memset(&action, 0, sizeof(action));
action.sa_sigaction = CopyStackSignalHandler;
action.sa_flags = SA_RESTART | SA_SIGINFO;
sigemptyset(&action.sa_mask);
TRACE_EVENT_BEGIN0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler.debug"),
"StackCopierSignal copy stack");
// SIGURG is chosen here because we observe no crashes with this signal and
// neither Chrome or the AOSP sets up a special handler for this signal.
ScopedSigaction scoped_sigaction(SIGURG, &action, &original_action);
if (!scoped_sigaction.succeeded())
return false;
if (syscall(SYS_tgkill, getpid(), thread_delegate_->GetThreadId(),
SIGURG) != 0) {
NOTREACHED();
return false;
}
bool finished_waiting = wait_event.Wait();
TRACE_EVENT_END0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler.debug"),
"StackCopierSignal copy stack");
if (!finished_waiting) {
NOTREACHED();
return false;
}
}
delegate->OnThreadResume();
const uintptr_t bottom = RegisterContextStackPointer(params.context);
for (uintptr_t* reg :
thread_delegate_->GetRegistersToRewrite(thread_context)) {
*reg = StackCopierSignal::RewritePointerIfInOriginalStack(
reinterpret_cast<uint8_t*>(bottom),
reinterpret_cast<uintptr_t*>(stack_base_address), stack_copy_bottom,
*reg);
}
*stack_top = reinterpret_cast<uintptr_t>(stack_copy_bottom) +
(stack_base_address - bottom);
return copied;
}
} // namespace base

View file

@ -0,0 +1,39 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_COPIER_SIGNAL_H_
#define BASE_PROFILER_STACK_COPIER_SIGNAL_H_
#include <memory>
#include "base/base_export.h"
#include "base/profiler/stack_copier.h"
namespace base {
class ThreadDelegate;
// Supports stack copying on platforms where a signal must be delivered to the
// profiled thread and the stack is copied from the signal handler.
class BASE_EXPORT StackCopierSignal : public StackCopier {
public:
StackCopierSignal(std::unique_ptr<ThreadDelegate> thread_delegate);
~StackCopierSignal() override;
// StackCopier:
bool CopyStack(StackBuffer* stack_buffer,
uintptr_t* stack_top,
TimeTicks* timestamp,
RegisterContext* thread_context,
Delegate* delegate) override;
using StackCopier::CopyStackContentsAndRewritePointers;
private:
std::unique_ptr<ThreadDelegate> thread_delegate_;
};
} // namespace base
#endif // BASE_PROFILER_STACK_COPIER_SIGNAL_H_

View file

@ -0,0 +1,81 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_copier_suspend.h"
#include "base/profiler/stack_buffer.h"
#include "base/profiler/suspendable_thread_delegate.h"
namespace base {
StackCopierSuspend::StackCopierSuspend(
std::unique_ptr<SuspendableThreadDelegate> thread_delegate)
: thread_delegate_(std::move(thread_delegate)) {}
StackCopierSuspend::~StackCopierSuspend() = default;
// Suspends the thread, copies the stack state, and resumes the thread. The
// copied stack state includes the stack itself, the top address of the stack
// copy, and the register context. Returns true on success, and returns the
// copied state via the params.
//
// NO HEAP ALLOCATIONS within the ScopedSuspendThread scope.
bool StackCopierSuspend::CopyStack(StackBuffer* stack_buffer,
uintptr_t* stack_top,
TimeTicks* timestamp,
RegisterContext* thread_context,
Delegate* delegate) {
const uintptr_t top = thread_delegate_->GetStackBaseAddress();
uintptr_t bottom = 0;
const uint8_t* stack_copy_bottom = nullptr;
{
// Allocation of the ScopedSuspendThread object itself is OK since it
// necessarily occurs before the thread is suspended by the object.
std::unique_ptr<SuspendableThreadDelegate::ScopedSuspendThread>
suspend_thread = thread_delegate_->CreateScopedSuspendThread();
// TimeTicks::Now() is implemented in terms of reads to the timer tick
// counter or TSC register on x86/x86_64 so is reentrant.
*timestamp = TimeTicks::Now();
if (!suspend_thread->WasSuccessful())
return false;
if (!thread_delegate_->GetThreadContext(thread_context))
return false;
bottom = RegisterContextStackPointer(thread_context);
// The StackBuffer allocation is expected to be at least as large as the
// largest stack region allocation on the platform, but check just in case
// it isn't *and* the actual stack itself exceeds the buffer allocation
// size.
if ((top - bottom) > stack_buffer->size())
return false;
if (!thread_delegate_->CanCopyStack(bottom))
return false;
delegate->OnStackCopy();
stack_copy_bottom = CopyStackContentsAndRewritePointers(
reinterpret_cast<uint8_t*>(bottom), reinterpret_cast<uintptr_t*>(top),
StackBuffer::kPlatformStackAlignment, stack_buffer->buffer());
}
delegate->OnThreadResume();
*stack_top = reinterpret_cast<uintptr_t>(stack_copy_bottom) + (top - bottom);
for (uintptr_t* reg :
thread_delegate_->GetRegistersToRewrite(thread_context)) {
*reg = RewritePointerIfInOriginalStack(reinterpret_cast<uint8_t*>(bottom),
reinterpret_cast<uintptr_t*>(top),
stack_copy_bottom, *reg);
}
return true;
}
} // namespace base

View file

@ -0,0 +1,39 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_COPIER_SUSPEND_H_
#define BASE_PROFILER_STACK_COPIER_SUSPEND_H_
#include <memory>
#include "base/base_export.h"
#include "base/profiler/stack_copier.h"
namespace base {
class SuspendableThreadDelegate;
// Supports stack copying on platforms where the profiled thread must be
// explicitly suspended from the profiler thread and the stack is copied from
// the profiler thread.
class BASE_EXPORT StackCopierSuspend : public StackCopier {
public:
StackCopierSuspend(
std::unique_ptr<SuspendableThreadDelegate> thread_delegate);
~StackCopierSuspend() override;
// StackCopier:
bool CopyStack(StackBuffer* stack_buffer,
uintptr_t* stack_top,
TimeTicks* timestamp,
RegisterContext* thread_context,
Delegate* delegate) override;
private:
std::unique_ptr<SuspendableThreadDelegate> thread_delegate_;
};
} // namespace base
#endif // BASE_PROFILER_STACK_COPIER_SUSPEND_H_

View file

@ -0,0 +1,27 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampler.h"
#include "base/memory/ptr_util.h"
#include "base/profiler/stack_buffer.h"
namespace base {
StackSampler::StackSampler() = default;
StackSampler::~StackSampler() = default;
std::unique_ptr<StackBuffer> StackSampler::CreateStackBuffer() {
size_t size = GetStackBufferSize();
if (size == 0)
return nullptr;
return std::make_unique<StackBuffer>(size);
}
StackSamplerTestDelegate::~StackSamplerTestDelegate() = default;
StackSamplerTestDelegate::StackSamplerTestDelegate() = default;
} // namespace base

View file

@ -0,0 +1,83 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_SAMPLER_H_
#define BASE_PROFILER_STACK_SAMPLER_H_
#include <memory>
#include "base/base_export.h"
#include "base/macros.h"
#include "base/profiler/sampling_profiler_thread_token.h"
#include "base/threading/platform_thread.h"
namespace base {
class Unwinder;
class ModuleCache;
class ProfileBuilder;
class StackBuffer;
class StackSamplerTestDelegate;
// StackSampler is an implementation detail of StackSamplingProfiler. It
// abstracts the native implementation required to record a set of stack frames
// for a given thread.
class BASE_EXPORT StackSampler {
public:
virtual ~StackSampler();
// Creates a stack sampler that records samples for thread with
// |thread_token|. Returns null if this platform does not support stack
// sampling.
static std::unique_ptr<StackSampler> Create(
SamplingProfilerThreadToken thread_token,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate,
std::unique_ptr<Unwinder> native_unwinder = nullptr);
// Gets the required size of the stack buffer.
static size_t GetStackBufferSize();
// Creates an instance of the a stack buffer that can be used for calls to
// any StackSampler object.
static std::unique_ptr<StackBuffer> CreateStackBuffer();
// The following functions are all called on the SamplingThread (not the
// thread being sampled).
// Adds an auxiliary unwinder to handle additional, non-native-code unwind
// scenarios.
virtual void AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder) = 0;
// Records a set of frames and returns them.
virtual void RecordStackFrames(StackBuffer* stackbuffer,
ProfileBuilder* profile_builder) = 0;
protected:
StackSampler();
private:
DISALLOW_COPY_AND_ASSIGN(StackSampler);
};
// StackSamplerTestDelegate provides seams for test code to execute during stack
// collection.
class BASE_EXPORT StackSamplerTestDelegate {
public:
virtual ~StackSamplerTestDelegate();
// Called after copying the stack and resuming the target thread, but prior to
// walking the stack. Invoked on the SamplingThread.
virtual void OnPreStackWalk() = 0;
protected:
StackSamplerTestDelegate();
private:
DISALLOW_COPY_AND_ASSIGN(StackSamplerTestDelegate);
};
} // namespace base
#endif // BASE_PROFILER_STACK_SAMPLER_H_

View file

@ -0,0 +1,43 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampler.h"
#include <pthread.h>
#include "base/profiler/stack_copier_signal.h"
#include "base/profiler/stack_sampler_impl.h"
#include "base/profiler/thread_delegate_posix.h"
#include "base/profiler/unwinder.h"
#include "base/threading/platform_thread.h"
namespace base {
std::unique_ptr<StackSampler> StackSampler::Create(
SamplingProfilerThreadToken thread_token,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate,
std::unique_ptr<Unwinder> native_unwinder) {
return std::make_unique<StackSamplerImpl>(
std::make_unique<StackCopierSignal>(
std::make_unique<ThreadDelegatePosix>(thread_token)),
std::move(native_unwinder), module_cache, test_delegate);
}
size_t StackSampler::GetStackBufferSize() {
size_t stack_size = PlatformThread::GetDefaultThreadStackSize();
pthread_attr_t attr;
if (stack_size == 0 && pthread_attr_init(&attr) == 0) {
if (pthread_attr_getstacksize(&attr, &stack_size) != 0)
stack_size = 0;
pthread_attr_destroy(&attr);
}
// 1MB is default thread limit set by Android at art/runtime/thread_pool.h.
constexpr size_t kDefaultStackLimit = 1 << 20;
return stack_size > 0 ? stack_size : kDefaultStackLimit;
}
} // namespace base

View file

@ -0,0 +1,181 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampler_impl.h"
#include <utility>
#include "base/compiler_specific.h"
#include "base/logging.h"
#include "base/profiler/profile_builder.h"
#include "base/profiler/sample_metadata.h"
#include "base/profiler/stack_buffer.h"
#include "base/profiler/stack_copier.h"
#include "base/profiler/suspendable_thread_delegate.h"
#include "base/profiler/unwinder.h"
#include "build/build_config.h"
// IMPORTANT NOTE: Some functions within this implementation are invoked while
// the target thread is suspended so it must not do any allocation from the
// heap, including indirectly via use of DCHECK/CHECK or other logging
// statements. Otherwise this code can deadlock on heap locks acquired by the
// target thread before it was suspended. These functions are commented with "NO
// HEAP ALLOCATIONS".
namespace base {
namespace {
// Notifies the unwinders about the stack capture, and records metadata, while
// the thread is suspended.
class StackCopierDelegate : public StackCopier::Delegate {
public:
StackCopierDelegate(ModuleCache* module_cache,
Unwinder* native_unwinder,
Unwinder* aux_unwinder,
ProfileBuilder* profile_builder)
: module_cache_(module_cache),
native_unwinder_(native_unwinder),
aux_unwinder_(aux_unwinder),
profile_builder_(profile_builder),
metadata_provider_(
GetSampleMetadataRecorder()->CreateMetadataProvider()) {}
StackCopierDelegate(const StackCopierDelegate&) = delete;
StackCopierDelegate& operator=(const StackCopierDelegate&) = delete;
// StackCopier::Delegate:
// IMPORTANT NOTE: to avoid deadlock this function must not invoke any
// non-reentrant code that is also invoked by the target thread. In
// particular, it may not perform any heap allocation or deallocation,
// including indirectly via use of DCHECK/CHECK or other logging statements.
void OnStackCopy() override {
native_unwinder_->OnStackCapture();
if (aux_unwinder_)
aux_unwinder_->OnStackCapture();
#if !defined(OS_POSIX) || defined(OS_MACOSX)
profile_builder_->RecordMetadata(metadata_provider_.get());
#else
// TODO(https://crbug.com/1056283): Support metadata recording on POSIX
// platforms.
ALLOW_UNUSED_LOCAL(profile_builder_);
#endif
}
void OnThreadResume() override {
// Reset this as soon as possible because it may hold a lock on the
// metadata.
metadata_provider_.reset();
native_unwinder_->UpdateModules(module_cache_);
if (aux_unwinder_)
aux_unwinder_->UpdateModules(module_cache_);
}
private:
ModuleCache* const module_cache_;
Unwinder* const native_unwinder_;
Unwinder* const aux_unwinder_;
ProfileBuilder* const profile_builder_;
std::unique_ptr<ProfileBuilder::MetadataProvider> metadata_provider_;
};
} // namespace
StackSamplerImpl::StackSamplerImpl(std::unique_ptr<StackCopier> stack_copier,
std::unique_ptr<Unwinder> native_unwinder,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate)
: stack_copier_(std::move(stack_copier)),
native_unwinder_(std::move(native_unwinder)),
module_cache_(module_cache),
test_delegate_(test_delegate) {}
StackSamplerImpl::~StackSamplerImpl() = default;
void StackSamplerImpl::AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder) {
aux_unwinder_ = std::move(unwinder);
aux_unwinder_->AddInitialModules(module_cache_);
}
void StackSamplerImpl::RecordStackFrames(StackBuffer* stack_buffer,
ProfileBuilder* profile_builder) {
DCHECK(stack_buffer);
RegisterContext thread_context;
uintptr_t stack_top;
TimeTicks timestamp;
StackCopierDelegate delegate(module_cache_, native_unwinder_.get(),
aux_unwinder_.get(), profile_builder);
bool success = stack_copier_->CopyStack(stack_buffer, &stack_top, &timestamp,
&thread_context, &delegate);
if (!success)
return;
if (test_delegate_)
test_delegate_->OnPreStackWalk();
profile_builder->OnSampleCompleted(
WalkStack(module_cache_, &thread_context, stack_top,
native_unwinder_.get(), aux_unwinder_.get()),
timestamp);
}
// static
std::vector<Frame> StackSamplerImpl::WalkStackForTesting(
ModuleCache* module_cache,
RegisterContext* thread_context,
uintptr_t stack_top,
Unwinder* native_unwinder,
Unwinder* aux_unwinder) {
return WalkStack(module_cache, thread_context, stack_top, native_unwinder,
aux_unwinder);
}
// static
std::vector<Frame> StackSamplerImpl::WalkStack(ModuleCache* module_cache,
RegisterContext* thread_context,
uintptr_t stack_top,
Unwinder* native_unwinder,
Unwinder* aux_unwinder) {
std::vector<Frame> stack;
// Reserve enough memory for most stacks, to avoid repeated
// allocations. Approximately 99.9% of recorded stacks are 128 frames or
// fewer.
stack.reserve(128);
// Record the first frame from the context values.
stack.emplace_back(RegisterContextInstructionPointer(thread_context),
module_cache->GetModuleForAddress(
RegisterContextInstructionPointer(thread_context)));
size_t prior_stack_size;
UnwindResult result;
do {
// Choose an authoritative unwinder for the current module. Use the aux
// unwinder if it thinks it can unwind from the current frame, otherwise use
// the native unwinder.
Unwinder* unwinder =
aux_unwinder && aux_unwinder->CanUnwindFrom(stack.back())
? aux_unwinder
: native_unwinder;
prior_stack_size = stack.size();
result =
unwinder->TryUnwind(thread_context, stack_top, module_cache, &stack);
// The native unwinder should be the only one that returns COMPLETED
// since the stack starts in native code.
DCHECK(result != UnwindResult::COMPLETED || unwinder == native_unwinder);
} while (result != UnwindResult::ABORTED &&
result != UnwindResult::COMPLETED &&
// Give up if the authoritative unwinder for the module was unable to
// unwind.
stack.size() > prior_stack_size);
return stack;
}
} // namespace base

View file

@ -0,0 +1,61 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_SAMPLER_IMPL_H_
#define BASE_PROFILER_STACK_SAMPLER_IMPL_H_
#include <memory>
#include "base/base_export.h"
#include "base/profiler/frame.h"
#include "base/profiler/register_context.h"
#include "base/profiler/stack_copier.h"
#include "base/profiler/stack_sampler.h"
namespace base {
class Unwinder;
// Cross-platform stack sampler implementation. Delegates to StackCopier for the
// platform-specific stack copying implementation.
class BASE_EXPORT StackSamplerImpl : public StackSampler {
public:
StackSamplerImpl(std::unique_ptr<StackCopier> stack_copier,
std::unique_ptr<Unwinder> native_unwinder,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate = nullptr);
~StackSamplerImpl() override;
StackSamplerImpl(const StackSamplerImpl&) = delete;
StackSamplerImpl& operator=(const StackSamplerImpl&) = delete;
// StackSampler:
void AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder) override;
void RecordStackFrames(StackBuffer* stack_buffer,
ProfileBuilder* profile_builder) override;
// Exposes the internal function for unit testing.
static std::vector<Frame> WalkStackForTesting(ModuleCache* module_cache,
RegisterContext* thread_context,
uintptr_t stack_top,
Unwinder* native_unwinder,
Unwinder* aux_unwinder);
private:
static std::vector<Frame> WalkStack(ModuleCache* module_cache,
RegisterContext* thread_context,
uintptr_t stack_top,
Unwinder* native_unwinder,
Unwinder* aux_unwinder);
const std::unique_ptr<StackCopier> stack_copier_;
const std::unique_ptr<Unwinder> native_unwinder_;
std::unique_ptr<Unwinder> aux_unwinder_;
ModuleCache* const module_cache_;
StackSamplerTestDelegate* const test_delegate_;
};
} // namespace base
#endif // BASE_PROFILER_STACK_SAMPLER_IMPL_H_

View file

@ -0,0 +1,26 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Placeholder functions for the StackSampler on iOS, which is not currently
// supported.
#include "base/profiler/stack_sampler.h"
namespace base {
// static
std::unique_ptr<StackSampler> StackSampler::Create(
SamplingProfilerThreadToken thread_token,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate,
std::unique_ptr<Unwinder> native_unwinder) {
return nullptr;
}
// static
size_t StackSampler::GetStackBufferSize() {
return 0;
}
} // namespace base

View file

@ -0,0 +1,37 @@
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampler.h"
#include "base/profiler/native_unwinder_mac.h"
#include "base/profiler/stack_copier_suspend.h"
#include "base/profiler/stack_sampler_impl.h"
#include "base/profiler/suspendable_thread_delegate_mac.h"
namespace base {
// static
std::unique_ptr<StackSampler> StackSampler::Create(
SamplingProfilerThreadToken thread_token,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate,
std::unique_ptr<Unwinder> native_unwinder) {
DCHECK(!native_unwinder);
return std::make_unique<StackSamplerImpl>(
std::make_unique<StackCopierSuspend>(
std::make_unique<SuspendableThreadDelegateMac>(thread_token)),
std::make_unique<NativeUnwinderMac>(module_cache), module_cache,
test_delegate);
}
// static
size_t StackSampler::GetStackBufferSize() {
size_t stack_size = PlatformThread::GetDefaultThreadStackSize();
// If getrlimit somehow fails, return the default macOS main thread stack size
// of 8 MB (DFLSSIZ in <i386/vmparam.h>) with extra wiggle room.
return stack_size > 0 ? stack_size : 12 * 1024 * 1024;
}
} // namespace base

View file

@ -0,0 +1,37 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampler.h"
#include <pthread.h>
#include "base/threading/platform_thread.h"
#include "build/build_config.h"
namespace base {
std::unique_ptr<StackSampler> StackSampler::Create(
SamplingProfilerThreadToken thread_token,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate,
std::unique_ptr<Unwinder> native_unwinder) {
return nullptr;
}
size_t StackSampler::GetStackBufferSize() {
size_t stack_size = PlatformThread::GetDefaultThreadStackSize();
pthread_attr_t attr;
if (stack_size == 0 && pthread_attr_init(&attr) == 0) {
if (pthread_attr_getstacksize(&attr, &stack_size) != 0)
stack_size = 0;
pthread_attr_destroy(&attr);
}
// Maximum limits under NPTL implementation.
constexpr size_t kDefaultStackLimit = 4 * (1 << 20);
return stack_size > 0 ? stack_size : kDefaultStackLimit;
}
} // namespace base

View file

@ -0,0 +1,42 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampler.h"
#include "base/profiler/native_unwinder_win.h"
#include "base/profiler/stack_copier_suspend.h"
#include "base/profiler/stack_sampler_impl.h"
#include "base/profiler/suspendable_thread_delegate_win.h"
#include "build/build_config.h"
namespace base {
// static
std::unique_ptr<StackSampler> StackSampler::Create(
SamplingProfilerThreadToken thread_token,
ModuleCache* module_cache,
StackSamplerTestDelegate* test_delegate,
std::unique_ptr<Unwinder> native_unwinder) {
DCHECK(!native_unwinder);
#if defined(ARCH_CPU_X86_64) || defined(ARCH_CPU_ARM64)
return std::make_unique<StackSamplerImpl>(
std::make_unique<StackCopierSuspend>(
std::make_unique<SuspendableThreadDelegateWin>(thread_token)),
std::make_unique<NativeUnwinderWin>(), module_cache, test_delegate);
#else
return nullptr;
#endif
}
// static
size_t StackSampler::GetStackBufferSize() {
// The default Win32 reserved stack size is 1 MB and Chrome Windows threads
// currently always use the default, but this allows for expansion if it
// occurs. The size beyond the actual stack size consists of unallocated
// virtual memory pages so carries little cost (just a bit of wasted address
// space).
return 2 << 20; // 2 MiB
}
} // namespace base

View file

@ -0,0 +1,811 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampling_profiler.h"
#include <algorithm>
#include <map>
#include <utility>
#include "base/atomic_sequence_num.h"
#include "base/atomicops.h"
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/callback.h"
#include "base/location.h"
#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "base/memory/singleton.h"
#include "base/profiler/stack_buffer.h"
#include "base/profiler/stack_sampler.h"
#include "base/profiler/unwinder.h"
#include "base/synchronization/lock.h"
#include "base/thread_annotations.h"
#include "base/threading/thread.h"
#include "base/threading/thread_restrictions.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/time/time.h"
#include "base/trace_event/trace_event.h"
namespace base {
// Allows StackSamplingProfiler to recall a thread which should already pretty
// much be dead (thus it should be a fast Join()).
class ScopedAllowThreadRecallForStackSamplingProfiler
: public ScopedAllowBaseSyncPrimitivesOutsideBlockingScope {};
namespace {
// This value is used to initialize the WaitableEvent object. This MUST BE set
// to MANUAL for correct operation of the IsSignaled() call in Start(). See the
// comment there for why.
constexpr WaitableEvent::ResetPolicy kResetPolicy =
WaitableEvent::ResetPolicy::MANUAL;
// This value is used when there is no collection in progress and thus no ID
// for referencing the active collection to the SamplingThread.
const int kNullProfilerId = -1;
} // namespace
// StackSamplingProfiler::SamplingThread --------------------------------------
class StackSamplingProfiler::SamplingThread : public Thread {
public:
class TestPeer {
public:
// Reset the existing sampler. This will unfortunately create the object
// unnecessarily if it doesn't already exist but there's no way around that.
static void Reset();
// Disables inherent idle-shutdown behavior.
static void DisableIdleShutdown();
// Begins an idle shutdown as if the idle-timer had expired and wait for
// it to execute. Since the timer would have only been started at a time
// when the sampling thread actually was idle, this must be called only
// when it is known that there are no active sampling threads. If
// |simulate_intervening_add| is true then, when executed, the shutdown
// task will believe that a new collection has been added since it was
// posted.
static void ShutdownAssumingIdle(bool simulate_intervening_add);
private:
// Calls the sampling threads ShutdownTask and then signals an event.
static void ShutdownTaskAndSignalEvent(SamplingThread* sampler,
int add_events,
WaitableEvent* event);
};
struct CollectionContext {
CollectionContext(const SamplingParams& params,
WaitableEvent* finished,
std::unique_ptr<StackSampler> sampler,
std::unique_ptr<ProfileBuilder> profile_builder)
: collection_id(next_collection_id.GetNext()),
params(params),
finished(finished),
sampler(std::move(sampler)),
profile_builder(std::move(profile_builder)) {}
~CollectionContext() = default;
// An identifier for this collection, used to uniquely identify the
// collection to outside interests.
const int collection_id;
const SamplingParams params; // Information about how to sample.
WaitableEvent* const finished; // Signaled when all sampling complete.
// Platform-specific module that does the actual sampling.
std::unique_ptr<StackSampler> sampler;
// Receives the sampling data and builds a CallStackProfile.
std::unique_ptr<ProfileBuilder> profile_builder;
// The absolute time for the next sample.
TimeTicks next_sample_time;
// The time that a profile was started, for calculating the total duration.
TimeTicks profile_start_time;
// Counter that indicates the current sample position along the acquisition.
int sample_count = 0;
// Sequence number for generating new collection ids.
static AtomicSequenceNumber next_collection_id;
};
// Gets the single instance of this class.
static SamplingThread* GetInstance();
// Adds a new CollectionContext to the thread. This can be called externally
// from any thread. This returns a collection id that can later be used to
// stop the sampling.
int Add(std::unique_ptr<CollectionContext> collection);
// Adds an auxiliary unwinder to be used for the collection, to handle
// additional, non-native-code unwind scenarios.
void AddAuxUnwinder(int collection_id, std::unique_ptr<Unwinder> unwinder);
// Applies the metadata to already recorded samples in all collections.
void ApplyMetadataToPastSamples(base::TimeTicks period_start,
base::TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value);
// Removes an active collection based on its collection id, forcing it to run
// its callback if any data has been collected. This can be called externally
// from any thread.
void Remove(int collection_id);
private:
friend struct DefaultSingletonTraits<SamplingThread>;
// The different states in which the sampling-thread can be.
enum ThreadExecutionState {
// The thread is not running because it has never been started. It will be
// started when a sampling request is received.
NOT_STARTED,
// The thread is running and processing tasks. This is the state when any
// sampling requests are active and during the "idle" period afterward
// before the thread is stopped.
RUNNING,
// Once all sampling requests have finished and the "idle" period has
// expired, the thread will be set to this state and its shutdown
// initiated. A call to Stop() must be made to ensure the previous thread
// has completely exited before calling Start() and moving back to the
// RUNNING state.
EXITING,
};
SamplingThread();
~SamplingThread() override;
// Get task runner that is usable from the outside.
scoped_refptr<SingleThreadTaskRunner> GetOrCreateTaskRunnerForAdd();
scoped_refptr<SingleThreadTaskRunner> GetTaskRunner(
ThreadExecutionState* out_state);
// Get task runner that is usable from the sampling thread itself.
scoped_refptr<SingleThreadTaskRunner> GetTaskRunnerOnSamplingThread();
// Finishes a collection. The collection's |finished| waitable event will be
// signalled. The |collection| should already have been removed from
// |active_collections_| by the caller, as this is needed to avoid flakiness
// in unit tests.
void FinishCollection(CollectionContext* collection);
// Check if the sampling thread is idle and begin a shutdown if it is.
void ScheduleShutdownIfIdle();
// These methods are tasks that get posted to the internal message queue.
void AddCollectionTask(std::unique_ptr<CollectionContext> collection);
void AddAuxUnwinderTask(int collection_id,
std::unique_ptr<Unwinder> unwinder);
void ApplyMetadataToPastSamplesTask(base::TimeTicks period_start,
base::TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value);
void RemoveCollectionTask(int collection_id);
void RecordSampleTask(int collection_id);
void ShutdownTask(int add_events);
// Thread:
void CleanUp() override;
// A stack-buffer used by the sampler for its work. This buffer is re-used
// across multiple sampler objects since their execution is serialized on the
// sampling thread.
std::unique_ptr<StackBuffer> stack_buffer_;
// A map of collection ids to collection contexts. Because this class is a
// singleton that is never destroyed, context objects will never be destructed
// except by explicit action. Thus, it's acceptable to pass unretained
// pointers to these objects when posting tasks.
std::map<int, std::unique_ptr<CollectionContext>> active_collections_;
// State maintained about the current execution (or non-execution) of
// the thread. This state must always be accessed while holding the
// lock. A copy of the task-runner is maintained here for use by any
// calling thread; this is necessary because Thread's accessor for it is
// not itself thread-safe. The lock is also used to order calls to the
// Thread API (Start, Stop, StopSoon, & DetachFromSequence) so that
// multiple threads may make those calls.
Lock thread_execution_state_lock_; // Protects all thread_execution_state_*
ThreadExecutionState thread_execution_state_
GUARDED_BY(thread_execution_state_lock_) = NOT_STARTED;
scoped_refptr<SingleThreadTaskRunner> thread_execution_state_task_runner_
GUARDED_BY(thread_execution_state_lock_);
bool thread_execution_state_disable_idle_shutdown_for_testing_
GUARDED_BY(thread_execution_state_lock_) = false;
// A counter that notes adds of new collection requests. It is incremented
// when changes occur so that delayed shutdown tasks are able to detect if
// something new has happened while it was waiting. Like all "execution_state"
// vars, this must be accessed while holding |thread_execution_state_lock_|.
int thread_execution_state_add_events_
GUARDED_BY(thread_execution_state_lock_) = 0;
DISALLOW_COPY_AND_ASSIGN(SamplingThread);
};
// static
void StackSamplingProfiler::SamplingThread::TestPeer::Reset() {
SamplingThread* sampler = SamplingThread::GetInstance();
ThreadExecutionState state;
{
AutoLock lock(sampler->thread_execution_state_lock_);
state = sampler->thread_execution_state_;
DCHECK(sampler->active_collections_.empty());
}
// Stop the thread and wait for it to exit. This has to be done through by
// the thread itself because it has taken ownership of its own lifetime.
if (state == RUNNING) {
ShutdownAssumingIdle(false);
state = EXITING;
}
// Make sure thread is cleaned up since state will be reset to NOT_STARTED.
if (state == EXITING)
sampler->Stop();
// Reset internal variables to the just-initialized state.
{
AutoLock lock(sampler->thread_execution_state_lock_);
sampler->thread_execution_state_ = NOT_STARTED;
sampler->thread_execution_state_task_runner_ = nullptr;
sampler->thread_execution_state_disable_idle_shutdown_for_testing_ = false;
sampler->thread_execution_state_add_events_ = 0;
}
}
// static
void StackSamplingProfiler::SamplingThread::TestPeer::DisableIdleShutdown() {
SamplingThread* sampler = SamplingThread::GetInstance();
{
AutoLock lock(sampler->thread_execution_state_lock_);
sampler->thread_execution_state_disable_idle_shutdown_for_testing_ = true;
}
}
// static
void StackSamplingProfiler::SamplingThread::TestPeer::ShutdownAssumingIdle(
bool simulate_intervening_add) {
SamplingThread* sampler = SamplingThread::GetInstance();
ThreadExecutionState state;
scoped_refptr<SingleThreadTaskRunner> task_runner =
sampler->GetTaskRunner(&state);
DCHECK_EQ(RUNNING, state);
DCHECK(task_runner);
int add_events;
{
AutoLock lock(sampler->thread_execution_state_lock_);
add_events = sampler->thread_execution_state_add_events_;
if (simulate_intervening_add)
++sampler->thread_execution_state_add_events_;
}
WaitableEvent executed(WaitableEvent::ResetPolicy::MANUAL,
WaitableEvent::InitialState::NOT_SIGNALED);
// PostTaskAndReply won't work because thread and associated message-loop may
// be shut down.
task_runner->PostTask(
FROM_HERE, BindOnce(&ShutdownTaskAndSignalEvent, Unretained(sampler),
add_events, Unretained(&executed)));
executed.Wait();
}
// static
void StackSamplingProfiler::SamplingThread::TestPeer::
ShutdownTaskAndSignalEvent(SamplingThread* sampler,
int add_events,
WaitableEvent* event) {
sampler->ShutdownTask(add_events);
event->Signal();
}
AtomicSequenceNumber StackSamplingProfiler::SamplingThread::CollectionContext::
next_collection_id;
StackSamplingProfiler::SamplingThread::SamplingThread()
: Thread("StackSamplingProfiler") {}
StackSamplingProfiler::SamplingThread::~SamplingThread() = default;
StackSamplingProfiler::SamplingThread*
StackSamplingProfiler::SamplingThread::GetInstance() {
return Singleton<SamplingThread, LeakySingletonTraits<SamplingThread>>::get();
}
int StackSamplingProfiler::SamplingThread::Add(
std::unique_ptr<CollectionContext> collection) {
// This is not to be run on the sampling thread.
int collection_id = collection->collection_id;
scoped_refptr<SingleThreadTaskRunner> task_runner =
GetOrCreateTaskRunnerForAdd();
task_runner->PostTask(
FROM_HERE, BindOnce(&SamplingThread::AddCollectionTask, Unretained(this),
std::move(collection)));
return collection_id;
}
void StackSamplingProfiler::SamplingThread::AddAuxUnwinder(
int collection_id,
std::unique_ptr<Unwinder> unwinder) {
ThreadExecutionState state;
scoped_refptr<SingleThreadTaskRunner> task_runner = GetTaskRunner(&state);
if (state != RUNNING)
return;
DCHECK(task_runner);
task_runner->PostTask(
FROM_HERE, BindOnce(&SamplingThread::AddAuxUnwinderTask, Unretained(this),
collection_id, std::move(unwinder)));
}
void StackSamplingProfiler::SamplingThread::ApplyMetadataToPastSamples(
base::TimeTicks period_start,
base::TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value) {
ThreadExecutionState state;
scoped_refptr<SingleThreadTaskRunner> task_runner = GetTaskRunner(&state);
if (state != RUNNING)
return;
DCHECK(task_runner);
task_runner->PostTask(
FROM_HERE, BindOnce(&SamplingThread::ApplyMetadataToPastSamplesTask,
Unretained(this), period_start, period_end, name_hash,
key, value));
}
void StackSamplingProfiler::SamplingThread::Remove(int collection_id) {
// This is not to be run on the sampling thread.
ThreadExecutionState state;
scoped_refptr<SingleThreadTaskRunner> task_runner = GetTaskRunner(&state);
if (state != RUNNING)
return;
DCHECK(task_runner);
// This can fail if the thread were to exit between acquisition of the task
// runner above and the call below. In that case, however, everything has
// stopped so there's no need to try to stop it.
task_runner->PostTask(FROM_HERE,
BindOnce(&SamplingThread::RemoveCollectionTask,
Unretained(this), collection_id));
}
scoped_refptr<SingleThreadTaskRunner>
StackSamplingProfiler::SamplingThread::GetOrCreateTaskRunnerForAdd() {
AutoLock lock(thread_execution_state_lock_);
// The increment of the "add events" count is why this method is to be only
// called from "add".
++thread_execution_state_add_events_;
if (thread_execution_state_ == RUNNING) {
DCHECK(thread_execution_state_task_runner_);
// This shouldn't be called from the sampling thread as it's inefficient.
// Use GetTaskRunnerOnSamplingThread() instead.
DCHECK_NE(GetThreadId(), PlatformThread::CurrentId());
return thread_execution_state_task_runner_;
}
if (thread_execution_state_ == EXITING) {
// StopSoon() was previously called to shut down the thread
// asynchonously. Stop() must now be called before calling Start() again to
// reset the thread state.
//
// We must allow blocking here to satisfy the Thread implementation, but in
// practice the Stop() call is unlikely to actually block. For this to
// happen a new profiling request would have to be made within the narrow
// window between StopSoon() and thread exit following the end of the 60
// second idle period.
ScopedAllowThreadRecallForStackSamplingProfiler allow_thread_join;
Stop();
}
DCHECK(!stack_buffer_);
stack_buffer_ = StackSampler::CreateStackBuffer();
// The thread is not running. Start it and get associated runner. The task-
// runner has to be saved for future use because though it can be used from
// any thread, it can be acquired via task_runner() only on the created
// thread and the thread that creates it (i.e. this thread) for thread-safety
// reasons which are alleviated in SamplingThread by gating access to it with
// the |thread_execution_state_lock_|.
Start();
thread_execution_state_ = RUNNING;
thread_execution_state_task_runner_ = Thread::task_runner();
// Detach the sampling thread from the "sequence" (i.e. thread) that
// started it so that it can be self-managed or stopped by another thread.
DetachFromSequence();
return thread_execution_state_task_runner_;
}
scoped_refptr<SingleThreadTaskRunner>
StackSamplingProfiler::SamplingThread::GetTaskRunner(
ThreadExecutionState* out_state) {
AutoLock lock(thread_execution_state_lock_);
if (out_state)
*out_state = thread_execution_state_;
if (thread_execution_state_ == RUNNING) {
// This shouldn't be called from the sampling thread as it's inefficient.
// Use GetTaskRunnerOnSamplingThread() instead.
DCHECK_NE(GetThreadId(), PlatformThread::CurrentId());
DCHECK(thread_execution_state_task_runner_);
} else {
DCHECK(!thread_execution_state_task_runner_);
}
return thread_execution_state_task_runner_;
}
scoped_refptr<SingleThreadTaskRunner>
StackSamplingProfiler::SamplingThread::GetTaskRunnerOnSamplingThread() {
// This should be called only from the sampling thread as it has limited
// accessibility.
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
return Thread::task_runner();
}
void StackSamplingProfiler::SamplingThread::FinishCollection(
CollectionContext* collection) {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
DCHECK_EQ(0u, active_collections_.count(collection->collection_id));
TimeDelta profile_duration = TimeTicks::Now() -
collection->profile_start_time +
collection->params.sampling_interval;
collection->profile_builder->OnProfileCompleted(
profile_duration, collection->params.sampling_interval);
// Signal that this collection is finished.
collection->finished->Signal();
ScheduleShutdownIfIdle();
}
void StackSamplingProfiler::SamplingThread::ScheduleShutdownIfIdle() {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
if (!active_collections_.empty())
return;
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler"),
"StackSamplingProfiler::SamplingThread::ScheduleShutdownIfIdle");
int add_events;
{
AutoLock lock(thread_execution_state_lock_);
if (thread_execution_state_disable_idle_shutdown_for_testing_)
return;
add_events = thread_execution_state_add_events_;
}
GetTaskRunnerOnSamplingThread()->PostDelayedTask(
FROM_HERE,
BindOnce(&SamplingThread::ShutdownTask, Unretained(this), add_events),
TimeDelta::FromSeconds(60));
}
void StackSamplingProfiler::SamplingThread::AddAuxUnwinderTask(
int collection_id,
std::unique_ptr<Unwinder> unwinder) {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
auto loc = active_collections_.find(collection_id);
if (loc == active_collections_.end())
return;
loc->second->sampler->AddAuxUnwinder(std::move(unwinder));
}
void StackSamplingProfiler::SamplingThread::ApplyMetadataToPastSamplesTask(
base::TimeTicks period_start,
base::TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value) {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
ProfileBuilder::MetadataItem item(name_hash, key, value);
for (auto& id_collection_pair : active_collections_) {
id_collection_pair.second->profile_builder->ApplyMetadataRetrospectively(
period_start, period_end, item);
}
}
void StackSamplingProfiler::SamplingThread::AddCollectionTask(
std::unique_ptr<CollectionContext> collection) {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
const int collection_id = collection->collection_id;
const TimeDelta initial_delay = collection->params.initial_delay;
active_collections_.insert(
std::make_pair(collection_id, std::move(collection)));
GetTaskRunnerOnSamplingThread()->PostDelayedTask(
FROM_HERE,
BindOnce(&SamplingThread::RecordSampleTask, Unretained(this),
collection_id),
initial_delay);
// Another increment of "add events" serves to invalidate any pending
// shutdown tasks that may have been initiated between the Add() and this
// task running.
{
AutoLock lock(thread_execution_state_lock_);
++thread_execution_state_add_events_;
}
}
void StackSamplingProfiler::SamplingThread::RemoveCollectionTask(
int collection_id) {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
auto found = active_collections_.find(collection_id);
if (found == active_collections_.end())
return;
// Remove |collection| from |active_collections_|.
std::unique_ptr<CollectionContext> collection = std::move(found->second);
size_t count = active_collections_.erase(collection_id);
DCHECK_EQ(1U, count);
FinishCollection(collection.get());
}
void StackSamplingProfiler::SamplingThread::RecordSampleTask(
int collection_id) {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
auto found = active_collections_.find(collection_id);
// The task won't be found if it has been stopped.
if (found == active_collections_.end())
return;
CollectionContext* collection = found->second.get();
// If this is the first sample, the collection params need to be filled.
if (collection->sample_count == 0) {
collection->profile_start_time = TimeTicks::Now();
collection->next_sample_time = TimeTicks::Now();
}
// Record a single sample.
collection->sampler->RecordStackFrames(stack_buffer_.get(),
collection->profile_builder.get());
// Schedule the next sample recording if there is one.
if (++collection->sample_count < collection->params.samples_per_profile) {
if (!collection->params.keep_consistent_sampling_interval)
collection->next_sample_time = TimeTicks::Now();
collection->next_sample_time += collection->params.sampling_interval;
bool success = GetTaskRunnerOnSamplingThread()->PostDelayedTask(
FROM_HERE,
BindOnce(&SamplingThread::RecordSampleTask, Unretained(this),
collection_id),
std::max(collection->next_sample_time - TimeTicks::Now(), TimeDelta()));
DCHECK(success);
return;
}
// Take ownership of |collection| and remove it from the map.
std::unique_ptr<CollectionContext> owned_collection =
std::move(found->second);
size_t count = active_collections_.erase(collection_id);
DCHECK_EQ(1U, count);
// All capturing has completed so finish the collection.
FinishCollection(collection);
}
void StackSamplingProfiler::SamplingThread::ShutdownTask(int add_events) {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
// Holding this lock ensures that any attempt to start another job will
// get postponed until |thread_execution_state_| is updated, thus eliminating
// the race in starting a new thread while the previous one is exiting.
AutoLock lock(thread_execution_state_lock_);
// If the current count of creation requests doesn't match the passed count
// then other tasks have been created since this was posted. Abort shutdown.
if (thread_execution_state_add_events_ != add_events)
return;
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler"),
"StackSamplingProfiler::SamplingThread::ShutdownTask");
// There can be no new AddCollectionTasks at this point because creating
// those always increments "add events". There may be other requests, like
// Remove, but it's okay to schedule the thread to stop once they've been
// executed (i.e. "soon").
DCHECK(active_collections_.empty());
StopSoon();
// StopSoon will have set the owning sequence (again) so it must be detached
// (again) in order for Stop/Start to be called (again) should more work
// come in. Holding the |thread_execution_state_lock_| ensures the necessary
// happens-after with regard to this detach and future Thread API calls.
DetachFromSequence();
// Set the thread_state variable so the thread will be restarted when new
// work comes in. Remove the |thread_execution_state_task_runner_| to avoid
// confusion.
thread_execution_state_ = EXITING;
thread_execution_state_task_runner_ = nullptr;
stack_buffer_.reset();
}
void StackSamplingProfiler::SamplingThread::CleanUp() {
DCHECK_EQ(GetThreadId(), PlatformThread::CurrentId());
// There should be no collections remaining when the thread stops.
DCHECK(active_collections_.empty());
// Let the parent clean up.
Thread::CleanUp();
}
// StackSamplingProfiler ------------------------------------------------------
// static
void StackSamplingProfiler::TestPeer::Reset() {
SamplingThread::TestPeer::Reset();
}
// static
bool StackSamplingProfiler::TestPeer::IsSamplingThreadRunning() {
return SamplingThread::GetInstance()->IsRunning();
}
// static
void StackSamplingProfiler::TestPeer::DisableIdleShutdown() {
SamplingThread::TestPeer::DisableIdleShutdown();
}
// static
void StackSamplingProfiler::TestPeer::PerformSamplingThreadIdleShutdown(
bool simulate_intervening_start) {
SamplingThread::TestPeer::ShutdownAssumingIdle(simulate_intervening_start);
}
StackSamplingProfiler::StackSamplingProfiler(
SamplingProfilerThreadToken thread_token,
const SamplingParams& params,
std::unique_ptr<ProfileBuilder> profile_builder,
StackSamplerTestDelegate* test_delegate)
: StackSamplingProfiler(params, std::move(profile_builder), nullptr) {
sampler_ = StackSampler::Create(
thread_token, profile_builder_->GetModuleCache(), test_delegate);
}
StackSamplingProfiler::StackSamplingProfiler(
const SamplingParams& params,
std::unique_ptr<ProfileBuilder> profile_builder,
std::unique_ptr<StackSampler> sampler)
: params_(params),
profile_builder_(std::move(profile_builder)),
sampler_(std::move(sampler)),
// The event starts "signaled" so code knows it's safe to start thread
// and "manual" so that it can be waited in multiple places.
profiling_inactive_(kResetPolicy, WaitableEvent::InitialState::SIGNALED),
profiler_id_(kNullProfilerId) {
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler"),
"StackSamplingProfiler::StackSamplingProfiler");
DCHECK(profile_builder_);
}
StackSamplingProfiler::~StackSamplingProfiler() {
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler"),
"StackSamplingProfiler::~StackSamplingProfiler");
// Stop returns immediately but the shutdown runs asynchronously. There is a
// non-zero probability that one more sample will be taken after this call
// returns.
Stop();
// The behavior of sampling a thread that has exited is undefined and could
// cause Bad Things(tm) to occur. The safety model provided by this class is
// that an instance of this object is expected to live at least as long as
// the thread it is sampling. However, because the sampling is performed
// asynchronously by the SamplingThread, there is no way to guarantee this
// is true without waiting for it to signal that it has finished.
//
// The wait time should, at most, be only as long as it takes to collect one
// sample (~200us) or none at all if sampling has already completed.
ScopedAllowBaseSyncPrimitivesOutsideBlockingScope allow_wait;
profiling_inactive_.Wait();
}
void StackSamplingProfiler::Start() {
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler"),
"StackSamplingProfiler::Start");
// Multiple calls to Start() for a single StackSamplingProfiler object is not
// allowed. If profile_builder_ is nullptr, then Start() has been called
// already.
DCHECK(profile_builder_);
// |sampler_| will be null if sampling isn't supported on the current
// platform.
if (!sampler_)
return;
if (pending_aux_unwinder_)
sampler_->AddAuxUnwinder(std::move(pending_aux_unwinder_));
// The IsSignaled() check below requires that the WaitableEvent be manually
// reset, to avoid signaling the event in IsSignaled() itself.
static_assert(kResetPolicy == WaitableEvent::ResetPolicy::MANUAL,
"The reset policy must be set to MANUAL");
// If a previous profiling phase is still winding down, wait for it to
// complete. We can't use task posting for this coordination because the
// thread owning the profiler may not have a message loop.
if (!profiling_inactive_.IsSignaled())
profiling_inactive_.Wait();
profiling_inactive_.Reset();
DCHECK_EQ(kNullProfilerId, profiler_id_);
profiler_id_ = SamplingThread::GetInstance()->Add(
std::make_unique<SamplingThread::CollectionContext>(
params_, &profiling_inactive_, std::move(sampler_),
std::move(profile_builder_)));
DCHECK_NE(kNullProfilerId, profiler_id_);
TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("cpu_profiler"),
"StackSamplingProfiler::Started", "profiler_id", profiler_id_);
}
void StackSamplingProfiler::Stop() {
TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("cpu_profiler"),
"StackSamplingProfiler::Stop", "profiler_id", profiler_id_);
SamplingThread::GetInstance()->Remove(profiler_id_);
profiler_id_ = kNullProfilerId;
}
void StackSamplingProfiler::AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder) {
if (profiler_id_ == kNullProfilerId) {
// We haven't started sampling, and so don't have a sampler to which we can
// pass the unwinder yet. Save it on the instance until we do.
pending_aux_unwinder_ = std::move(unwinder);
return;
}
SamplingThread::GetInstance()->AddAuxUnwinder(profiler_id_,
std::move(unwinder));
}
// static
void StackSamplingProfiler::ApplyMetadataToPastSamples(
base::TimeTicks period_start,
base::TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value) {
SamplingThread::GetInstance()->ApplyMetadataToPastSamples(
period_start, period_end, name_hash, key, value);
}
} // namespace base

View file

@ -0,0 +1,208 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_SAMPLING_PROFILER_H_
#define BASE_PROFILER_STACK_SAMPLING_PROFILER_H_
#include <memory>
#include <vector>
#include "base/base_export.h"
#include "base/macros.h"
#include "base/optional.h"
#include "base/profiler/profile_builder.h"
#include "base/profiler/sampling_profiler_thread_token.h"
#include "base/synchronization/waitable_event.h"
#include "base/threading/platform_thread.h"
#include "base/time/time.h"
namespace base {
class Unwinder;
class StackSampler;
class StackSamplerTestDelegate;
// StackSamplingProfiler periodically stops a thread to sample its stack, for
// the purpose of collecting information about which code paths are
// executing. This information is used in aggregate by UMA to identify hot
// and/or janky code paths.
//
// Sample StackSamplingProfiler usage:
//
// // Create and customize params as desired.
// base::StackStackSamplingProfiler::SamplingParams params;
//
// // To process the profiles, use a custom ProfileBuilder subclass:
// class SubProfileBuilder : public base::ProfileBuilder {...}
//
// // Then create the profiler:
// base::StackSamplingProfiler profiler(base::PlatformThread::CurrentId(),
// params, std::make_unique<SubProfileBuilder>(...));
//
// // On Android the |sampler| is not implemented in base. So, client can pass
// // in |sampler| to use while profiling.
// base::StackSamplingProfiler profiler(base::PlatformThread::CurrentId(),
// params, std::make_unique<SubProfileBuilder>(...), <optional> sampler);
//
// // Then start the profiling.
// profiler.Start();
// // ... work being done on the target thread here ...
// // Optionally stop collection before complete per params.
// profiler.Stop();
//
// The default SamplingParams causes stacks to be recorded in a single profile
// at a 10Hz interval for a total of 30 seconds. All of these parameters may be
// altered as desired.
//
// When a call stack profile is complete, or the profiler is stopped,
// ProfileBuilder's OnProfileCompleted function is called from a thread created
// by the profiler.
class BASE_EXPORT StackSamplingProfiler {
public:
// Represents parameters that configure the sampling.
struct BASE_EXPORT SamplingParams {
// Time to delay before first samples are taken.
TimeDelta initial_delay = TimeDelta::FromMilliseconds(0);
// Number of samples to record per profile.
int samples_per_profile = 300;
// Interval between samples during a sampling profile. This is the desired
// duration from the start of one sample to the start of the next sample.
TimeDelta sampling_interval = TimeDelta::FromMilliseconds(100);
// When true, keeps the average sampling interval = |sampling_interval|,
// irrespective of how long each sample takes. If a sample takes too long,
// keeping the interval constant will lock out the sampled thread. When
// false, sample is created with an interval of |sampling_interval|,
// excluding the time taken by a sample. The metrics collected will not be
// accurate, since sampling could take arbitrary amount of time, but makes
// sure that the sampled thread gets at least the interval amount of time to
// run between samples.
bool keep_consistent_sampling_interval = true;
};
// Creates a profiler for the specified thread. An optional |test_delegate|
// can be supplied by tests.
//
// The caller must ensure that this object gets destroyed before the thread
// exits.
StackSamplingProfiler(SamplingProfilerThreadToken thread_token,
const SamplingParams& params,
std::unique_ptr<ProfileBuilder> profile_builder,
StackSamplerTestDelegate* test_delegate = nullptr);
// Same as above function, with custom |sampler| implementation. The sampler
// on Android is not implemented in base.
StackSamplingProfiler(const SamplingParams& params,
std::unique_ptr<ProfileBuilder> profile_builder,
std::unique_ptr<StackSampler> sampler);
// Stops any profiling currently taking place before destroying the profiler.
// This will block until profile_builder_'s OnProfileCompleted function has
// executed if profiling has started but not already finished.
~StackSamplingProfiler();
// Initializes the profiler and starts sampling. Might block on a
// WaitableEvent if this StackSamplingProfiler was previously started and
// recently stopped, while the previous profiling phase winds down.
void Start();
// Stops the profiler and any ongoing sampling. This method will return
// immediately with the profile_builder_'s OnProfileCompleted function being
// run asynchronously. At most one more stack sample will be taken after this
// method returns. Calling this function is optional; if not invoked profiling
// terminates when all the profiling samples specified in the SamplingParams
// are completed or the profiler object is destroyed, whichever occurs first.
void Stop();
// Adds an auxiliary unwinder to handle additional, non-native-code unwind
// scenarios.
void AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder);
// Test peer class. These functions are purely for internal testing of
// StackSamplingProfiler; DO NOT USE within tests outside of this directory.
// The functions are static because they interact with the sampling thread, a
// singleton used by all StackSamplingProfiler objects. The functions can
// only be called by the same thread that started the sampling.
class BASE_EXPORT TestPeer {
public:
// Resets the internal state to that of a fresh start. This is necessary
// so that tests don't inherit state from previous tests.
static void Reset();
// Returns whether the sampling thread is currently running or not.
static bool IsSamplingThreadRunning();
// Disables inherent idle-shutdown behavior.
static void DisableIdleShutdown();
// Initiates an idle shutdown task, as though the idle timer had expired,
// causing the thread to exit. There is no "idle" check so this must be
// called only when all sampling tasks have completed. This blocks until
// the task has been executed, though the actual stopping of the thread
// still happens asynchronously. Watch IsSamplingThreadRunning() to know
// when the thread has exited. If |simulate_intervening_start| is true then
// this method will make it appear to the shutdown task that a new profiler
// was started between when the idle-shutdown was initiated and when it
// runs.
static void PerformSamplingThreadIdleShutdown(
bool simulate_intervening_start);
};
private:
// SamplingThread is a separate thread used to suspend and sample stacks from
// the target thread.
class SamplingThread;
// Friend the global function from sample_metadata.cc so that it can call into
// the function below.
friend void ApplyMetadataToPastSamplesImpl(TimeTicks period_start,
TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value);
// Apply metadata to already recorded samples. See the
// ApplyMetadataToPastSamples() docs in sample_metadata.h.
static void ApplyMetadataToPastSamples(TimeTicks period_start,
TimeTicks period_end,
int64_t name_hash,
Optional<int64_t> key,
int64_t value);
// The thread whose stack will be sampled.
SamplingProfilerThreadToken thread_token_;
const SamplingParams params_;
// Receives the sampling data and builds a profile. The ownership of this
// object will be transferred to the sampling thread when thread sampling
// starts.
std::unique_ptr<ProfileBuilder> profile_builder_;
// Stack sampler which stops the thread and collects stack frames. The
// ownership of this object will be transferred to the sampling thread when
// thread sampling starts.
std::unique_ptr<StackSampler> sampler_;
// If an AuxUnwinder is added before Start() it will be saved here until it
// can be passed to the sampling thread when thread sampling starts.
std::unique_ptr<Unwinder> pending_aux_unwinder_;
// This starts "signaled", is reset when sampling begins, and is signaled
// when that sampling is complete and the profile_builder_'s
// OnProfileCompleted function has executed.
WaitableEvent profiling_inactive_;
// An ID uniquely identifying this profiler to the sampling thread. This
// will be an internal "null" value when no collection has been started.
int profiler_id_;
DISALLOW_COPY_AND_ASSIGN(StackSamplingProfiler);
};
} // namespace base
#endif // BASE_PROFILER_STACK_SAMPLING_PROFILER_H_

View file

@ -0,0 +1,249 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/stack_sampling_profiler_test_util.h"
#include <utility>
#include "base/callback.h"
#include "base/compiler_specific.h"
#include "base/location.h"
#include "base/profiler/stack_sampling_profiler.h"
#include "base/profiler/unwinder.h"
#include "base/strings/stringprintf.h"
#include "base/test/bind_test_util.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace base {
namespace {
// A profile builder for test use that expects to receive exactly one sample.
class TestProfileBuilder : public ProfileBuilder {
public:
// The callback is passed the last sample recorded.
using CompletedCallback = OnceCallback<void(std::vector<Frame>)>;
TestProfileBuilder(ModuleCache* module_cache, CompletedCallback callback)
: module_cache_(module_cache), callback_(std::move(callback)) {}
~TestProfileBuilder() override = default;
TestProfileBuilder(const TestProfileBuilder&) = delete;
TestProfileBuilder& operator=(const TestProfileBuilder&) = delete;
// ProfileBuilder:
ModuleCache* GetModuleCache() override { return module_cache_; }
void RecordMetadata(MetadataProvider* metadata_provider) override {}
void OnSampleCompleted(std::vector<Frame> sample,
TimeTicks sample_timestamp) override {
EXPECT_TRUE(sample_.empty());
sample_ = std::move(sample);
}
void OnProfileCompleted(TimeDelta profile_duration,
TimeDelta sampling_period) override {
EXPECT_FALSE(sample_.empty());
std::move(callback_).Run(std::move(sample_));
}
private:
ModuleCache* const module_cache_;
CompletedCallback callback_;
std::vector<Frame> sample_;
};
} // namespace
TargetThread::TargetThread(OnceClosure to_run) : to_run_(std::move(to_run)) {}
TargetThread::~TargetThread() = default;
void TargetThread::ThreadMain() {
thread_token_ = GetSamplingProfilerCurrentThreadToken();
std::move(to_run_).Run();
}
UnwindScenario::UnwindScenario(const SetupFunction& setup_function)
: setup_function_(setup_function) {}
UnwindScenario::~UnwindScenario() = default;
FunctionAddressRange UnwindScenario::GetWaitForSampleAddressRange() const {
return WaitForSample(nullptr);
}
FunctionAddressRange UnwindScenario::GetSetupFunctionAddressRange() const {
return setup_function_.Run(OnceClosure());
}
FunctionAddressRange UnwindScenario::GetOuterFunctionAddressRange() const {
return InvokeSetupFunction(SetupFunction(), nullptr);
}
void UnwindScenario::Execute(SampleEvents* events) {
InvokeSetupFunction(setup_function_, events);
}
// static
// Disable inlining for this function so that it gets its own stack frame.
NOINLINE FunctionAddressRange
UnwindScenario::InvokeSetupFunction(const SetupFunction& setup_function,
SampleEvents* events) {
const void* start_program_counter = GetProgramCounter();
if (!setup_function.is_null()) {
const auto wait_for_sample_closure =
BindLambdaForTesting([&]() { UnwindScenario::WaitForSample(events); });
setup_function.Run(wait_for_sample_closure);
}
// Volatile to prevent a tail call to GetProgramCounter().
const void* volatile end_program_counter = GetProgramCounter();
return {start_program_counter, end_program_counter};
}
// static
// Disable inlining for this function so that it gets its own stack frame.
NOINLINE FunctionAddressRange
UnwindScenario::WaitForSample(SampleEvents* events) {
const void* start_program_counter = GetProgramCounter();
if (events) {
events->ready_for_sample.Signal();
events->sample_finished.Wait();
}
// Volatile to prevent a tail call to GetProgramCounter().
const void* volatile end_program_counter = GetProgramCounter();
return {start_program_counter, end_program_counter};
}
// Disable inlining for this function so that it gets its own stack frame.
NOINLINE FunctionAddressRange
CallWithPlainFunction(OnceClosure wait_for_sample) {
const void* start_program_counter = GetProgramCounter();
if (!wait_for_sample.is_null())
std::move(wait_for_sample).Run();
// Volatile to prevent a tail call to GetProgramCounter().
const void* volatile end_program_counter = GetProgramCounter();
return {start_program_counter, end_program_counter};
}
void WithTargetThread(UnwindScenario* scenario,
ProfileCallback profile_callback) {
UnwindScenario::SampleEvents events;
TargetThread target_thread(
BindLambdaForTesting([&]() { scenario->Execute(&events); }));
PlatformThreadHandle target_thread_handle;
EXPECT_TRUE(PlatformThread::Create(0, &target_thread, &target_thread_handle));
events.ready_for_sample.Wait();
std::move(profile_callback).Run(target_thread.thread_token());
events.sample_finished.Signal();
PlatformThread::Join(target_thread_handle);
}
std::vector<Frame> SampleScenario(UnwindScenario* scenario,
ModuleCache* module_cache,
UnwinderFactory aux_unwinder_factory) {
StackSamplingProfiler::SamplingParams params;
params.sampling_interval = TimeDelta::FromMilliseconds(0);
params.samples_per_profile = 1;
std::vector<Frame> sample;
WithTargetThread(
scenario,
BindLambdaForTesting(
[&](SamplingProfilerThreadToken target_thread_token) {
WaitableEvent sampling_thread_completed(
WaitableEvent::ResetPolicy::MANUAL,
WaitableEvent::InitialState::NOT_SIGNALED);
StackSamplingProfiler profiler(
target_thread_token, params,
std::make_unique<TestProfileBuilder>(
module_cache,
BindLambdaForTesting([&sample, &sampling_thread_completed](
std::vector<Frame> result_sample) {
sample = std::move(result_sample);
sampling_thread_completed.Signal();
})));
if (aux_unwinder_factory)
profiler.AddAuxUnwinder(std::move(aux_unwinder_factory).Run());
profiler.Start();
sampling_thread_completed.Wait();
}));
return sample;
}
std::string FormatSampleForDiagnosticOutput(const std::vector<Frame>& sample) {
std::string output;
for (const auto& frame : sample) {
output += StringPrintf(
"0x%p %s\n", reinterpret_cast<const void*>(frame.instruction_pointer),
frame.module ? frame.module->GetDebugBasename().AsUTF8Unsafe().c_str()
: "null module");
}
return output;
}
void ExpectStackContains(const std::vector<Frame>& stack,
const std::vector<FunctionAddressRange>& functions) {
auto frame_it = stack.begin();
auto function_it = functions.begin();
for (; frame_it != stack.end() && function_it != functions.end();
++frame_it) {
if (frame_it->instruction_pointer >=
reinterpret_cast<uintptr_t>(function_it->start) &&
frame_it->instruction_pointer <=
reinterpret_cast<uintptr_t>(function_it->end)) {
++function_it;
}
}
EXPECT_EQ(function_it, functions.end())
<< "Function in position " << function_it - functions.begin() << " at "
<< function_it->start << " was not found in stack "
<< "(or did not appear in the expected order):\n"
<< FormatSampleForDiagnosticOutput(stack);
}
void ExpectStackDoesNotContain(
const std::vector<Frame>& stack,
const std::vector<FunctionAddressRange>& functions) {
struct FunctionAddressRangeCompare {
bool operator()(const FunctionAddressRange& a,
const FunctionAddressRange& b) const {
return std::make_pair(a.start, a.end) < std::make_pair(b.start, b.end);
}
};
std::set<FunctionAddressRange, FunctionAddressRangeCompare> seen_functions;
for (const auto& frame : stack) {
for (const auto function : functions) {
if (frame.instruction_pointer >=
reinterpret_cast<uintptr_t>(function.start) &&
frame.instruction_pointer <=
reinterpret_cast<uintptr_t>(function.end)) {
seen_functions.insert(function);
}
}
}
for (const auto function : seen_functions) {
ADD_FAILURE() << "Function at " << function.start
<< " was unexpectedly found in stack:\n"
<< FormatSampleForDiagnosticOutput(stack);
}
}
} // namespace base

View file

@ -0,0 +1,127 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_STACK_SAMPLING_PROFILER_TEST_UTIL_H_
#define BASE_PROFILER_STACK_SAMPLING_PROFILER_TEST_UTIL_H_
#include <memory>
#include <vector>
#include "base/callback.h"
#include "base/profiler/frame.h"
#include "base/profiler/sampling_profiler_thread_token.h"
#include "base/synchronization/waitable_event.h"
#include "base/threading/platform_thread.h"
namespace base {
class Unwinder;
// A thread to target for profiling that will run the supplied closure.
class TargetThread : public PlatformThread::Delegate {
public:
TargetThread(OnceClosure to_run);
~TargetThread() override;
// PlatformThread::Delegate:
void ThreadMain() override;
SamplingProfilerThreadToken thread_token() const { return thread_token_; }
private:
SamplingProfilerThreadToken thread_token_ = {0};
OnceClosure to_run_;
DISALLOW_COPY_AND_ASSIGN(TargetThread);
};
// Addresses near the start and end of a function.
struct FunctionAddressRange {
const void* start;
const void* end;
};
// Represents a stack unwind scenario to be sampled by the
// StackSamplingProfiler.
class UnwindScenario {
public:
// A callback provided by the caller that sets up the unwind scenario, then
// calls into the passed closure to wait for a sample to be taken. Returns the
// address range of the function that sets up the unwind scenario. The passed
// closure will be null when invoked solely to obtain the address range.
using SetupFunction = RepeatingCallback<FunctionAddressRange(OnceClosure)>;
// Events to coordinate the sampling.
struct SampleEvents {
WaitableEvent ready_for_sample;
WaitableEvent sample_finished;
};
explicit UnwindScenario(const SetupFunction& setup_function);
~UnwindScenario();
UnwindScenario(const UnwindScenario&) = delete;
UnwindScenario& operator=(const UnwindScenario&) = delete;
// The address range of the innermost function that waits for the sample.
FunctionAddressRange GetWaitForSampleAddressRange() const;
// The address range of the provided setup function.
FunctionAddressRange GetSetupFunctionAddressRange() const;
// The address range of the outer function that indirectly invokes the setup
// function.
FunctionAddressRange GetOuterFunctionAddressRange() const;
// Executes the scenario.
void Execute(SampleEvents* events);
private:
static FunctionAddressRange InvokeSetupFunction(
const SetupFunction& setup_function,
SampleEvents* events);
static FunctionAddressRange WaitForSample(SampleEvents* events);
const SetupFunction setup_function_;
};
// UnwindScenario setup function that calls into |wait_for_sample| without doing
// any special unwinding setup, to exercise the "normal" unwind scenario.
FunctionAddressRange CallWithPlainFunction(OnceClosure wait_for_sample);
// The callback to perform profiling on the provided thread.
using ProfileCallback = OnceCallback<void(SamplingProfilerThreadToken)>;
// Executes |profile_callback| while running |scenario| on the target
// thread. Performs all necessary target thread startup and shutdown work before
// and afterward.
void WithTargetThread(UnwindScenario* scenario,
ProfileCallback profile_callback);
using UnwinderFactory = OnceCallback<std::unique_ptr<Unwinder>()>;
// Returns the sample seen when taking one sample of |scenario|.
std::vector<Frame> SampleScenario(
UnwindScenario* scenario,
ModuleCache* module_cache,
UnwinderFactory aux_unwinder_factory = UnwinderFactory());
// Formats a sample into a string that can be output for test diagnostics.
std::string FormatSampleForDiagnosticOutput(const std::vector<Frame>& sample);
// Expects that the stack contains the functions with the specified address
// ranges, in the specified order.
void ExpectStackContains(const std::vector<Frame>& stack,
const std::vector<FunctionAddressRange>& functions);
// Expects that the stack does not contain the functions with the specified
// address ranges.
void ExpectStackDoesNotContain(
const std::vector<Frame>& stack,
const std::vector<FunctionAddressRange>& functions);
} // namespace base
#endif // BASE_PROFILER_STACK_SAMPLING_PROFILER_TEST_UTIL_H_

View file

@ -0,0 +1,59 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_H_
#define BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_H_
#include <vector>
#include "base/base_export.h"
#include "base/profiler/register_context.h"
#include "base/profiler/thread_delegate.h"
namespace base {
// Platform-specific thread and stack manipulation delegate, for use by the
// platform-independent stack copying/walking implementation in
// StackSamplerImpl for suspension-based stack copying.
//
// IMPORTANT NOTE: Most methods in this interface are invoked while the target
// thread is suspended so must not do any allocation from the heap, including
// indirectly via use of DCHECK/CHECK or other logging statements. Otherwise the
// implementation can deadlock on heap locks acquired by the target thread
// before it was suspended. These functions are commented with "NO HEAP
// ALLOCATIONS".
class BASE_EXPORT SuspendableThreadDelegate : public ThreadDelegate {
public:
// Implementations of this interface should suspend the thread for the
// object's lifetime. NO HEAP ALLOCATIONS between the time the thread is
// suspended and resumed.
class BASE_EXPORT ScopedSuspendThread {
public:
ScopedSuspendThread() = default;
virtual ~ScopedSuspendThread() = default;
ScopedSuspendThread(const ScopedSuspendThread&) = delete;
ScopedSuspendThread& operator=(const ScopedSuspendThread&) = delete;
virtual bool WasSuccessful() const = 0;
};
SuspendableThreadDelegate() = default;
// Creates an object that holds the thread suspended for its lifetime.
virtual std::unique_ptr<ScopedSuspendThread> CreateScopedSuspendThread() = 0;
// Gets the register context for the thread.
// NO HEAP ALLOCATIONS.
virtual bool GetThreadContext(RegisterContext* thread_context) = 0;
// Returns true if the thread's stack can be copied, where the bottom address
// of the thread is at |stack_pointer|.
// NO HEAP ALLOCATIONS.
virtual bool CanCopyStack(uintptr_t stack_pointer) = 0;
};
} // namespace base
#endif // BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_H_

View file

@ -0,0 +1,111 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/suspendable_thread_delegate_mac.h"
#include <mach/mach.h>
#include <mach/thread_act.h>
#include <pthread.h>
#include "base/logging.h"
#include "base/mac/mach_logging.h"
#include "base/profiler/profile_builder.h"
// IMPORTANT NOTE: Some functions within this implementation are invoked while
// the target thread is suspended so it must not do any allocation from the
// heap, including indirectly via use of DCHECK/CHECK or other logging
// statements. Otherwise this code can deadlock on heap locks acquired by the
// target thread before it was suspended. These functions are commented with "NO
// HEAP ALLOCATIONS".
namespace base {
namespace {
// Fills |state| with |target_thread|'s context. NO HEAP ALLOCATIONS.
bool GetThreadState(thread_act_t target_thread, x86_thread_state64_t* state) {
auto count = static_cast<mach_msg_type_number_t>(x86_THREAD_STATE64_COUNT);
return thread_get_state(target_thread, x86_THREAD_STATE64,
reinterpret_cast<thread_state_t>(state),
&count) == KERN_SUCCESS;
}
} // namespace
// ScopedSuspendThread --------------------------------------------------------
// NO HEAP ALLOCATIONS after thread_suspend.
SuspendableThreadDelegateMac::ScopedSuspendThread::ScopedSuspendThread(
mach_port_t thread_port)
: thread_port_(thread_suspend(thread_port) == KERN_SUCCESS
? thread_port
: MACH_PORT_NULL) {}
// NO HEAP ALLOCATIONS. The MACH_CHECK is OK because it provides a more noisy
// failure mode than deadlocking.
SuspendableThreadDelegateMac::ScopedSuspendThread::~ScopedSuspendThread() {
if (!WasSuccessful())
return;
kern_return_t kr = thread_resume(thread_port_);
MACH_CHECK(kr == KERN_SUCCESS, kr) << "thread_resume";
}
bool SuspendableThreadDelegateMac::ScopedSuspendThread::WasSuccessful() const {
return thread_port_ != MACH_PORT_NULL;
}
// SuspendableThreadDelegateMac -----------------------------------------------
SuspendableThreadDelegateMac::SuspendableThreadDelegateMac(
SamplingProfilerThreadToken thread_token)
: thread_port_(thread_token.id),
thread_stack_base_address_(
reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(
pthread_from_mach_thread_np(thread_token.id)))) {
// This class suspends threads, and those threads might be suspended in dyld.
// Therefore, for all the system functions that might be linked in dynamically
// that are used while threads are suspended, make calls to them to make sure
// that they are linked up.
x86_thread_state64_t thread_state;
GetThreadState(thread_port_, &thread_state);
}
SuspendableThreadDelegateMac::~SuspendableThreadDelegateMac() = default;
std::unique_ptr<SuspendableThreadDelegate::ScopedSuspendThread>
SuspendableThreadDelegateMac::CreateScopedSuspendThread() {
return std::make_unique<ScopedSuspendThread>(thread_port_);
}
PlatformThreadId SuspendableThreadDelegateMac::GetThreadId() const {
return thread_port_;
}
// NO HEAP ALLOCATIONS.
bool SuspendableThreadDelegateMac::GetThreadContext(
x86_thread_state64_t* thread_context) {
return GetThreadState(thread_port_, thread_context);
}
// NO HEAP ALLOCATIONS.
uintptr_t SuspendableThreadDelegateMac::GetStackBaseAddress() const {
return thread_stack_base_address_;
}
// NO HEAP ALLOCATIONS.
bool SuspendableThreadDelegateMac::CanCopyStack(uintptr_t stack_pointer) {
return true;
}
std::vector<uintptr_t*> SuspendableThreadDelegateMac::GetRegistersToRewrite(
x86_thread_state64_t* thread_context) {
return {
&AsUintPtr(&thread_context->__rbx), &AsUintPtr(&thread_context->__rbp),
&AsUintPtr(&thread_context->__rsp), &AsUintPtr(&thread_context->__r12),
&AsUintPtr(&thread_context->__r13), &AsUintPtr(&thread_context->__r14),
&AsUintPtr(&thread_context->__r15)};
}
} // namespace base

View file

@ -0,0 +1,66 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_MAC_H_
#define BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_MAC_H_
#include <mach/mach.h>
#include "base/base_export.h"
#include "base/profiler/module_cache.h"
#include "base/profiler/native_unwinder_mac.h"
#include "base/profiler/sampling_profiler_thread_token.h"
#include "base/profiler/suspendable_thread_delegate.h"
#include "base/threading/platform_thread.h"
namespace base {
// Platform- and thread-specific implementation in support of stack sampling on
// Mac.
class BASE_EXPORT SuspendableThreadDelegateMac
: public SuspendableThreadDelegate {
public:
class ScopedSuspendThread
: public SuspendableThreadDelegate::ScopedSuspendThread {
public:
explicit ScopedSuspendThread(mach_port_t thread_port);
~ScopedSuspendThread() override;
ScopedSuspendThread(const ScopedSuspendThread&) = delete;
ScopedSuspendThread& operator=(const ScopedSuspendThread&) = delete;
bool WasSuccessful() const override;
private:
mach_port_t thread_port_;
};
SuspendableThreadDelegateMac(SamplingProfilerThreadToken thread_token);
~SuspendableThreadDelegateMac() override;
SuspendableThreadDelegateMac(const SuspendableThreadDelegateMac&) = delete;
SuspendableThreadDelegateMac& operator=(const SuspendableThreadDelegateMac&) =
delete;
// SuspendableThreadDelegate
std::unique_ptr<SuspendableThreadDelegate::ScopedSuspendThread>
CreateScopedSuspendThread() override;
bool GetThreadContext(x86_thread_state64_t* thread_context) override;
PlatformThreadId GetThreadId() const override;
uintptr_t GetStackBaseAddress() const override;
bool CanCopyStack(uintptr_t stack_pointer) override;
std::vector<uintptr_t*> GetRegistersToRewrite(
x86_thread_state64_t* thread_context) override;
private:
// Weak reference: Mach port for thread being profiled.
const mach_port_t thread_port_;
// The stack base address corresponding to |thread_port_|.
const uintptr_t thread_stack_base_address_;
};
} // namespace base
#endif // BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_MAC_H_

View file

@ -0,0 +1,231 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/suspendable_thread_delegate_win.h"
#include <windows.h>
#include <winternl.h>
#include "base/debug/alias.h"
#include "base/logging.h"
#include "base/profiler/native_unwinder_win.h"
#include "build/build_config.h"
// IMPORTANT NOTE: Some functions within this implementation are invoked while
// the target thread is suspended so it must not do any allocation from the
// heap, including indirectly via use of DCHECK/CHECK or other logging
// statements. Otherwise this code can deadlock on heap locks acquired by the
// target thread before it was suspended. These functions are commented with "NO
// HEAP ALLOCATIONS".
namespace base {
namespace {
// The thread environment block internal type.
struct TEB {
NT_TIB Tib;
// Rest of struct is ignored.
};
win::ScopedHandle GetThreadHandle(PlatformThreadId thread_id) {
// TODO(http://crbug.com/947459): Remove the test_handle* CHECKs once we
// understand which flag is triggering the failure.
DWORD flags = 0;
base::debug::Alias(&flags);
flags |= THREAD_GET_CONTEXT;
win::ScopedHandle test_handle1(::OpenThread(flags, FALSE, thread_id));
CHECK(test_handle1.IsValid());
flags |= THREAD_QUERY_INFORMATION;
win::ScopedHandle test_handle2(::OpenThread(flags, FALSE, thread_id));
CHECK(test_handle2.IsValid());
flags |= THREAD_SUSPEND_RESUME;
win::ScopedHandle handle(::OpenThread(flags, FALSE, thread_id));
CHECK(handle.IsValid());
return handle;
}
// Returns the thread environment block pointer for |thread_handle|.
const TEB* GetThreadEnvironmentBlock(HANDLE thread_handle) {
// Define the internal types we need to invoke NtQueryInformationThread.
enum THREAD_INFORMATION_CLASS { ThreadBasicInformation };
struct CLIENT_ID {
HANDLE UniqueProcess;
HANDLE UniqueThread;
};
struct THREAD_BASIC_INFORMATION {
NTSTATUS ExitStatus;
TEB* Teb;
CLIENT_ID ClientId;
KAFFINITY AffinityMask;
LONG Priority;
LONG BasePriority;
};
using NtQueryInformationThreadFunction =
NTSTATUS(WINAPI*)(HANDLE, THREAD_INFORMATION_CLASS, PVOID, ULONG, PULONG);
static const auto nt_query_information_thread =
reinterpret_cast<NtQueryInformationThreadFunction>(::GetProcAddress(
::GetModuleHandle(L"ntdll.dll"), "NtQueryInformationThread"));
if (!nt_query_information_thread)
return nullptr;
THREAD_BASIC_INFORMATION basic_info = {0};
NTSTATUS status = nt_query_information_thread(
thread_handle, ThreadBasicInformation, &basic_info,
sizeof(THREAD_BASIC_INFORMATION), nullptr);
if (status != 0)
return nullptr;
return basic_info.Teb;
}
// Tests whether |stack_pointer| points to a location in the guard page. NO HEAP
// ALLOCATIONS.
bool PointsToGuardPage(uintptr_t stack_pointer) {
MEMORY_BASIC_INFORMATION memory_info;
SIZE_T result = ::VirtualQuery(reinterpret_cast<LPCVOID>(stack_pointer),
&memory_info, sizeof(memory_info));
return result != 0 && (memory_info.Protect & PAGE_GUARD);
}
// ScopedDisablePriorityBoost -------------------------------------------------
// Disables priority boost on a thread for the lifetime of the object.
class ScopedDisablePriorityBoost {
public:
ScopedDisablePriorityBoost(HANDLE thread_handle);
~ScopedDisablePriorityBoost();
private:
HANDLE thread_handle_;
BOOL got_previous_boost_state_;
BOOL boost_state_was_disabled_;
DISALLOW_COPY_AND_ASSIGN(ScopedDisablePriorityBoost);
};
// NO HEAP ALLOCATIONS.
ScopedDisablePriorityBoost::ScopedDisablePriorityBoost(HANDLE thread_handle)
: thread_handle_(thread_handle),
got_previous_boost_state_(false),
boost_state_was_disabled_(false) {
got_previous_boost_state_ =
::GetThreadPriorityBoost(thread_handle_, &boost_state_was_disabled_);
if (got_previous_boost_state_) {
// Confusingly, TRUE disables priority boost.
::SetThreadPriorityBoost(thread_handle_, TRUE);
}
}
ScopedDisablePriorityBoost::~ScopedDisablePriorityBoost() {
if (got_previous_boost_state_)
::SetThreadPriorityBoost(thread_handle_, boost_state_was_disabled_);
}
} // namespace
// ScopedSuspendThread --------------------------------------------------------
// NO HEAP ALLOCATIONS after ::SuspendThread.
SuspendableThreadDelegateWin::ScopedSuspendThread::ScopedSuspendThread(
HANDLE thread_handle)
: thread_handle_(thread_handle),
was_successful_(::SuspendThread(thread_handle) !=
static_cast<DWORD>(-1)) {}
// NO HEAP ALLOCATIONS. The CHECK is OK because it provides a more noisy failure
// mode than deadlocking.
SuspendableThreadDelegateWin::ScopedSuspendThread::~ScopedSuspendThread() {
if (!was_successful_)
return;
// Disable the priority boost that the thread would otherwise receive on
// resume. We do this to avoid artificially altering the dynamics of the
// executing application any more than we already are by suspending and
// resuming the thread.
//
// Note that this can racily disable a priority boost that otherwise would
// have been given to the thread, if the thread is waiting on other wait
// conditions at the time of SuspendThread and those conditions are satisfied
// before priority boost is reenabled. The measured length of this window is
// ~100us, so this should occur fairly rarely.
ScopedDisablePriorityBoost disable_priority_boost(thread_handle_);
bool resume_thread_succeeded =
::ResumeThread(thread_handle_) != static_cast<DWORD>(-1);
CHECK(resume_thread_succeeded) << "ResumeThread failed: " << GetLastError();
}
bool SuspendableThreadDelegateWin::ScopedSuspendThread::WasSuccessful() const {
return was_successful_;
}
// SuspendableThreadDelegateWin
// ----------------------------------------------------------
SuspendableThreadDelegateWin::SuspendableThreadDelegateWin(
SamplingProfilerThreadToken thread_token)
: thread_id_(thread_token.id),
thread_handle_(GetThreadHandle(thread_token.id)),
thread_stack_base_address_(reinterpret_cast<uintptr_t>(
GetThreadEnvironmentBlock(thread_handle_.Get())->Tib.StackBase)) {}
SuspendableThreadDelegateWin::~SuspendableThreadDelegateWin() = default;
std::unique_ptr<SuspendableThreadDelegate::ScopedSuspendThread>
SuspendableThreadDelegateWin::CreateScopedSuspendThread() {
return std::make_unique<ScopedSuspendThread>(thread_handle_.Get());
}
PlatformThreadId SuspendableThreadDelegateWin::GetThreadId() const {
return thread_id_;
}
// NO HEAP ALLOCATIONS.
bool SuspendableThreadDelegateWin::GetThreadContext(CONTEXT* thread_context) {
*thread_context = {0};
thread_context->ContextFlags = CONTEXT_FULL;
return ::GetThreadContext(thread_handle_.Get(), thread_context) != 0;
}
// NO HEAP ALLOCATIONS.
uintptr_t SuspendableThreadDelegateWin::GetStackBaseAddress() const {
return thread_stack_base_address_;
}
// Tests whether |stack_pointer| points to a location in the guard page. NO HEAP
// ALLOCATIONS.
bool SuspendableThreadDelegateWin::CanCopyStack(uintptr_t stack_pointer) {
// Dereferencing a pointer in the guard page in a thread that doesn't own the
// stack results in a STATUS_GUARD_PAGE_VIOLATION exception and a crash. This
// occurs very rarely, but reliably over the population.
return !PointsToGuardPage(stack_pointer);
}
std::vector<uintptr_t*> SuspendableThreadDelegateWin::GetRegistersToRewrite(
CONTEXT* thread_context) {
// Return the set of non-volatile registers.
return {
#if defined(ARCH_CPU_X86_64)
&thread_context->R12, &thread_context->R13, &thread_context->R14,
&thread_context->R15, &thread_context->Rdi, &thread_context->Rsi,
&thread_context->Rbx, &thread_context->Rbp, &thread_context->Rsp
#elif defined(ARCH_CPU_ARM64)
&thread_context->X19, &thread_context->X20, &thread_context->X21,
&thread_context->X22, &thread_context->X23, &thread_context->X24,
&thread_context->X25, &thread_context->X26, &thread_context->X27,
&thread_context->X28, &thread_context->Fp, &thread_context->Lr,
&thread_context->Sp
#endif
};
}
} // namespace base

View file

@ -0,0 +1,64 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_WIN_H_
#define BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_WIN_H_
#include <windows.h>
#include "base/base_export.h"
#include "base/profiler/sampling_profiler_thread_token.h"
#include "base/profiler/suspendable_thread_delegate.h"
#include "base/threading/platform_thread.h"
#include "base/win/scoped_handle.h"
namespace base {
// Platform- and thread-specific implementation in support of stack sampling on
// Windows.
class BASE_EXPORT SuspendableThreadDelegateWin
: public SuspendableThreadDelegate {
public:
class ScopedSuspendThread
: public SuspendableThreadDelegate::ScopedSuspendThread {
public:
explicit ScopedSuspendThread(HANDLE thread_handle);
~ScopedSuspendThread() override;
bool WasSuccessful() const override;
private:
HANDLE thread_handle_;
bool was_successful_;
DISALLOW_COPY_AND_ASSIGN(ScopedSuspendThread);
};
explicit SuspendableThreadDelegateWin(
SamplingProfilerThreadToken thread_token);
~SuspendableThreadDelegateWin() override;
SuspendableThreadDelegateWin(const SuspendableThreadDelegateWin&) = delete;
SuspendableThreadDelegateWin& operator=(const SuspendableThreadDelegateWin&) =
delete;
// SuspendableThreadDelegate
std::unique_ptr<SuspendableThreadDelegate::ScopedSuspendThread>
CreateScopedSuspendThread() override;
bool GetThreadContext(CONTEXT* thread_context) override;
PlatformThreadId GetThreadId() const override;
uintptr_t GetStackBaseAddress() const override;
bool CanCopyStack(uintptr_t stack_pointer) override;
std::vector<uintptr_t*> GetRegistersToRewrite(
CONTEXT* thread_context) override;
private:
const PlatformThreadId thread_id_;
win::ScopedHandle thread_handle_;
const uintptr_t thread_stack_base_address_;
};
} // namespace base
#endif // BASE_PROFILER_SUSPENDABLE_THREAD_DELEGATE_WIN_H_

View file

@ -0,0 +1,32 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Note: there is intentionally no header file associated with this library so
// we don't risk implicitly demand loading it by accessing a symbol.
#include "build/build_config.h"
#if defined(OS_WIN)
#define BASE_PROFILER_TEST_SUPPORT_LIBRARY_EXPORT __declspec(dllexport)
#else // defined(OS_WIN)
#define BASE_PROFILER_TEST_SUPPORT_LIBRARY_EXPORT __attribute__((visibility("default")))
#endif
namespace base {
// Must be defined in an extern "C" block so we can look up the unmangled name.
extern "C" {
BASE_PROFILER_TEST_SUPPORT_LIBRARY_EXPORT void InvokeCallbackFunction(
void (*function)(void*),
void* arg) {
function(arg);
// Prevent tail call.
volatile int i = 0;
i = 1;
}
} // extern "C"
} // namespace base

View file

@ -0,0 +1,43 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_THREAD_DELEGATE_H_
#define BASE_PROFILER_THREAD_DELEGATE_H_
#include <vector>
#include "base/base_export.h"
#include "base/profiler/register_context.h"
#include "base/threading/platform_thread.h"
namespace base {
// Platform-specific thread and stack manipulation delegate, for use by the
// platform-independent stack copying/walking implementation in
// StackSamplerImpl. Provides the common interface across signal- and
// suspend-based stack copy implementations.
class BASE_EXPORT ThreadDelegate {
public:
ThreadDelegate() = default;
virtual ~ThreadDelegate() = default;
ThreadDelegate(const ThreadDelegate&) = delete;
ThreadDelegate& operator=(const ThreadDelegate&) = delete;
// Gets the platform-specific id for the thread.
virtual PlatformThreadId GetThreadId() const = 0;
// Gets the base address of the thread's stack.
virtual uintptr_t GetStackBaseAddress() const = 0;
// Returns a list of registers that should be rewritten to point into the
// stack copy, if they originally pointed into the original stack.
// May heap allocate.
virtual std::vector<uintptr_t*> GetRegistersToRewrite(
RegisterContext* thread_context) = 0;
};
} // namespace base
#endif // BASE_PROFILER_THREAD_DELEGATE_H_

View file

@ -0,0 +1,120 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <pthread.h>
#include "base/process/process_handle.h"
#include "base/profiler/thread_delegate_posix.h"
#include "base/stl_util.h"
#include "build/build_config.h"
namespace base {
namespace {
uintptr_t GetThreadStackBaseAddressImpl(
SamplingProfilerThreadToken thread_token) {
pthread_attr_t attr;
pthread_getattr_np(thread_token.pthread_id, &attr);
// See crbug.com/617730 for limitations of this approach on Linux.
void* address;
size_t size;
pthread_attr_getstack(&attr, &address, &size);
pthread_attr_destroy(&attr);
const uintptr_t base_address = reinterpret_cast<uintptr_t>(address) + size;
return base_address;
}
uintptr_t GetThreadStackBaseAddress(SamplingProfilerThreadToken thread_token) {
#if defined(OS_ANDROID)
// Caches the main thread base address on Android since Bionic has to read
// /proc/$PID/maps to obtain it. Other thread base addresses are sourced from
// pthread state so are cheap to get.
const bool is_main_thread = thread_token.id == GetCurrentProcId();
if (is_main_thread) {
static const uintptr_t main_thread_base_address =
GetThreadStackBaseAddressImpl(thread_token);
return main_thread_base_address;
}
#endif
return GetThreadStackBaseAddressImpl(thread_token);
}
} // namespace
ThreadDelegatePosix::ThreadDelegatePosix(
SamplingProfilerThreadToken thread_token)
: thread_id_(thread_token.id),
thread_stack_base_address_(GetThreadStackBaseAddress(thread_token)) {}
PlatformThreadId ThreadDelegatePosix::GetThreadId() const {
return thread_id_;
}
uintptr_t ThreadDelegatePosix::GetStackBaseAddress() const {
return thread_stack_base_address_;
}
std::vector<uintptr_t*> ThreadDelegatePosix::GetRegistersToRewrite(
RegisterContext* thread_context) {
#if defined(ARCH_CPU_ARM_FAMILY) && defined(ARCH_CPU_32_BITS)
return {
reinterpret_cast<uintptr_t*>(&thread_context->arm_r0),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r1),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r2),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r3),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r4),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r5),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r6),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r7),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r8),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r9),
reinterpret_cast<uintptr_t*>(&thread_context->arm_r10),
reinterpret_cast<uintptr_t*>(&thread_context->arm_fp),
reinterpret_cast<uintptr_t*>(&thread_context->arm_ip),
reinterpret_cast<uintptr_t*>(&thread_context->arm_sp),
// arm_lr and arm_pc do not require rewriting because they contain
// addresses of executable code, not addresses in the stack.
};
#elif defined(ARCH_CPU_ARM_FAMILY) && \
defined(ARCH_CPU_64_BITS) // #if defined(ARCH_CPU_ARM_FAMILY) &&
// defined(ARCH_CPU_32_BITS)
std::vector<uintptr_t*> registers;
registers.reserve(12);
// Return the set of callee-save registers per the ARM 64-bit Procedure Call
// Standard section 5.1.1, plus the stack pointer.
registers.push_back(reinterpret_cast<uintptr_t*>(&thread_context->sp));
for (size_t i = 19; i <= 29; ++i)
registers.push_back(reinterpret_cast<uintptr_t*>(&thread_context->regs[i]));
return registers;
#elif defined(ARCH_CPU_X86_FAMILY) && defined(ARCH_CPU_32_BITS)
return {
// Return the set of callee-save registers per the i386 System V ABI
// section 2.2.3, plus the stack pointer.
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_EBX]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_EBP]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_ESI]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_EDI]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_ESP]),
};
#elif defined(ARCH_CPU_X86_FAMILY) && defined(ARCH_CPU_64_BITS)
return {
// Return the set of callee-save registers per the x86-64 System V ABI
// section 3.2.1, plus the stack pointer.
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_RBP]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_RBX]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_R12]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_R13]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_R14]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_R15]),
reinterpret_cast<uintptr_t*>(&thread_context->gregs[REG_RSP]),
};
#else // #if defined(ARCH_CPU_ARM_FAMILY) && defined(ARCH_CPU_32_BITS)
// Unimplemented for other architectures.
return {};
#endif
}
} // namespace base

View file

@ -0,0 +1,37 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_THREAD_DELEGATE_POSIX_H_
#define BASE_PROFILER_THREAD_DELEGATE_POSIX_H_
#include "base/base_export.h"
#include "base/profiler/sampling_profiler_thread_token.h"
#include "base/profiler/thread_delegate.h"
#include "base/threading/platform_thread.h"
namespace base {
// Platform- and thread-specific implementation in support of stack sampling on
// POSIX.
class BASE_EXPORT ThreadDelegatePosix : public ThreadDelegate {
public:
ThreadDelegatePosix(SamplingProfilerThreadToken thread_token);
ThreadDelegatePosix(const ThreadDelegatePosix&) = delete;
ThreadDelegatePosix& operator=(const ThreadDelegatePosix&) = delete;
// ThreadDelegate
PlatformThreadId GetThreadId() const override;
uintptr_t GetStackBaseAddress() const override;
std::vector<uintptr_t*> GetRegistersToRewrite(
RegisterContext* thread_context) override;
private:
const PlatformThreadId thread_id_;
const uintptr_t thread_stack_base_address_;
};
} // namespace base
#endif // BASE_PROFILER_THREAD_DELEGATE_POSIX_H_

View file

@ -0,0 +1,87 @@
// Copyright 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_UNWINDER_H_
#define BASE_PROFILER_UNWINDER_H_
#include <vector>
#include "base/macros.h"
#include "base/profiler/frame.h"
#include "base/profiler/module_cache.h"
#include "base/profiler/register_context.h"
namespace base {
// The result of attempting to unwind stack frames.
enum class UnwindResult {
// The end of the stack was reached successfully.
COMPLETED,
// The walk reached a frame that it doesn't know how to unwind, but might be
// unwindable by the other native/aux unwinder.
UNRECOGNIZED_FRAME,
// The walk was aborted and is not resumable.
ABORTED,
};
// Unwinder provides an interface for stack frame unwinder implementations for
// use with the StackSamplingProfiler. The profiler is expected to call
// CanUnwind() to determine if the Unwinder thinks it can unwind from the frame
// represented by the context values, then TryUnwind() to attempt the
// unwind.
class Unwinder {
public:
virtual ~Unwinder() = default;
// Invoked to allow the unwinder to add any modules it recognizes to the
// ModuleCache.
virtual void AddInitialModules(ModuleCache* module_cache) {}
// Invoked at the time the stack is captured. IMPORTANT NOTE: this function is
// invoked while the target thread is suspended. To avoid deadlock it must not
// invoke any non-reentrant code that is also invoked by the target thread. In
// particular, it may not perform any heap allocation or deallocation,
// including indirectly via use of DCHECK/CHECK or other logging statements.
virtual void OnStackCapture() {}
// Allows the unwinder to update ModuleCache with any modules it's responsible
// for. Invoked for each sample between OnStackCapture() and the initial
// invocations of CanUnwindFrom()/TryUnwind().
virtual void UpdateModules(ModuleCache* module_cache) {}
// Returns true if the unwinder recognizes the code referenced by
// |current_frame| as code from which it should be able to unwind. When
// multiple unwinders are in use, each should return true for a disjoint set
// of frames. Note that if the unwinder returns true it may still legitmately
// fail to unwind; e.g. in the case of a native unwind for a function that
// doesn't have unwind information.
virtual bool CanUnwindFrom(const Frame& current_frame) const = 0;
// Attempts to unwind the frame represented by the context values.
// Walks the native frames on the stack pointed to by the stack pointer in
// |thread_context|, appending the frames to |stack|. When invoked
// stack->back() contains the frame corresponding to the state in
// |thread_context|.
// Precondition: RegisterContextStackPointer(thread_context) is less than
// |stack_top|.
// Postcondition: If the implementation returns UNRECOGNIZED_FRAME, indicating
// that it successfully unwound, RegisterContextStackPointer(thread_context)
// is greater than the previous value and less than |stack_top|.
virtual UnwindResult TryUnwind(RegisterContext* thread_context,
uintptr_t stack_top,
ModuleCache* module_cache,
std::vector<Frame>* stack) const = 0;
Unwinder(const Unwinder&) = delete;
Unwinder& operator=(const Unwinder&) = delete;
protected:
Unwinder() = default;
};
} // namespace base
#endif // BASE_PROFILER_UNWINDER_H_

View file

@ -0,0 +1,144 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/profiler/win32_stack_frame_unwinder.h"
#include <windows.h>
#include <utility>
#include "base/macros.h"
#include "build/build_config.h"
namespace base {
// Win32UnwindFunctions -------------------------------------------------------
namespace {
// Implements the UnwindFunctions interface for the corresponding Win32
// functions.
class Win32UnwindFunctions : public Win32StackFrameUnwinder::UnwindFunctions {
public:
Win32UnwindFunctions();
~Win32UnwindFunctions() override;
PRUNTIME_FUNCTION LookupFunctionEntry(DWORD64 program_counter,
PDWORD64 image_base) override;
void VirtualUnwind(DWORD64 image_base,
DWORD64 program_counter,
PRUNTIME_FUNCTION runtime_function,
CONTEXT* context) override;
private:
DISALLOW_COPY_AND_ASSIGN(Win32UnwindFunctions);
};
Win32UnwindFunctions::Win32UnwindFunctions() {}
Win32UnwindFunctions::~Win32UnwindFunctions() {}
PRUNTIME_FUNCTION Win32UnwindFunctions::LookupFunctionEntry(
DWORD64 program_counter,
PDWORD64 image_base) {
#ifdef _WIN64
return ::RtlLookupFunctionEntry(program_counter, image_base, nullptr);
#else
NOTREACHED();
return nullptr;
#endif
}
void Win32UnwindFunctions::VirtualUnwind(DWORD64 image_base,
DWORD64 program_counter,
PRUNTIME_FUNCTION runtime_function,
CONTEXT* context) {
#ifdef _WIN64
void* handler_data = nullptr;
ULONG64 establisher_frame;
KNONVOLATILE_CONTEXT_POINTERS nvcontext = {};
::RtlVirtualUnwind(UNW_FLAG_NHANDLER, image_base, program_counter,
runtime_function, context, &handler_data,
&establisher_frame, &nvcontext);
#else
NOTREACHED();
#endif
}
} // namespace
// Win32StackFrameUnwinder ----------------------------------------------------
Win32StackFrameUnwinder::UnwindFunctions::~UnwindFunctions() = default;
Win32StackFrameUnwinder::UnwindFunctions::UnwindFunctions() = default;
Win32StackFrameUnwinder::Win32StackFrameUnwinder()
: Win32StackFrameUnwinder(std::make_unique<Win32UnwindFunctions>()) {}
Win32StackFrameUnwinder::~Win32StackFrameUnwinder() {}
bool Win32StackFrameUnwinder::TryUnwind(
bool at_top_frame,
CONTEXT* context,
// The module parameter, while not directly used, is still passed because it
// represents an implicit dependency for this function. Having the Module
// ensures that we have incremented the HMODULE reference count, which is
// critical to ensuring that the module is not unloaded during the
// unwinding. Otherwise the module could be unloaded between the
// LookupFunctionEntry and VirtualUnwind calls, resulting in crashes
// accessing unwind information from the unloaded module.
const ModuleCache::Module* module) {
#ifdef _WIN64
// Ensure we found a valid module for the program counter.
DCHECK(module);
ULONG64 image_base;
// Try to look up unwind metadata for the current function.
PRUNTIME_FUNCTION runtime_function =
unwind_functions_->LookupFunctionEntry(ContextPC(context), &image_base);
DCHECK_EQ(module->GetBaseAddress(), image_base);
if (runtime_function) {
unwind_functions_->VirtualUnwind(image_base, ContextPC(context),
runtime_function, context);
return true;
}
if (at_top_frame) {
// This is a leaf function (i.e. a function that neither calls a function,
// nor allocates any stack space itself).
#if defined(ARCH_CPU_X86_64)
// For X64, return address is at RSP.
context->Rip = *reinterpret_cast<DWORD64*>(context->Rsp);
context->Rsp += 8;
#elif defined(ARCH_CPU_ARM64)
// For leaf function on Windows ARM64, return address is at LR(X30). Add
// CONTEXT_UNWOUND_TO_CALL flag to avoid unwind ambiguity for tailcall on
// ARM64, because padding after tailcall is not guaranteed.
context->Pc = context->Lr;
context->ContextFlags |= CONTEXT_UNWOUND_TO_CALL;
#else
#error Unsupported Windows 64-bit Arch
#endif
return true;
}
// In theory we shouldn't get here, as it means we've encountered a function
// without unwind information below the top of the stack, which is forbidden
// by the Microsoft x64 calling convention.
//
// The one known case in Chrome code that executes this path occurs because
// of BoringSSL unwind information inconsistent with the actual function
// code. See https://crbug.com/542919.
return false;
#else
NOTREACHED();
return false;
#endif
}
Win32StackFrameUnwinder::Win32StackFrameUnwinder(
std::unique_ptr<UnwindFunctions> unwind_functions)
: unwind_functions_(std::move(unwind_functions)) {}
} // namespace base

View file

@ -0,0 +1,87 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_PROFILER_WIN32_STACK_FRAME_UNWINDER_H_
#define BASE_PROFILER_WIN32_STACK_FRAME_UNWINDER_H_
#include <windows.h>
#include <memory>
#include "base/base_export.h"
#include "base/macros.h"
#include "base/profiler/module_cache.h"
#include "build/build_config.h"
namespace base {
#if !defined(_WIN64)
// Allows code to compile for x86. Actual support for x86 will require either
// refactoring these interfaces or separate architecture-specific interfaces.
struct RUNTIME_FUNCTION {
DWORD BeginAddress;
DWORD EndAddress;
};
using PRUNTIME_FUNCTION = RUNTIME_FUNCTION*;
#endif // !defined(_WIN64)
inline ULONG64 ContextPC(CONTEXT* context) {
#if defined(ARCH_CPU_X86_64)
return context->Rip;
#elif defined(ARCH_CPU_X86)
return context->Eip;
#elif defined(ARCH_CPU_ARM64)
return context->Pc;
#else
#error Unsupported Windows Arch
#endif
}
// This class is not used while the target thread is suspended, so may allocate
// from the default heap.
class BASE_EXPORT Win32StackFrameUnwinder {
public:
// Interface for Win32 unwind-related functionality this class depends
// on. Provides a seam for testing.
class BASE_EXPORT UnwindFunctions {
public:
virtual ~UnwindFunctions();
virtual PRUNTIME_FUNCTION LookupFunctionEntry(DWORD64 program_counter,
PDWORD64 image_base) = 0;
virtual void VirtualUnwind(DWORD64 image_base,
DWORD64 program_counter,
PRUNTIME_FUNCTION runtime_function,
CONTEXT* context) = 0;
protected:
UnwindFunctions();
private:
DISALLOW_COPY_AND_ASSIGN(UnwindFunctions);
};
explicit Win32StackFrameUnwinder();
~Win32StackFrameUnwinder();
// Attempts to unwind the frame represented by |context|, where the
// instruction pointer is known to be in |module|. Updates |context| if
// successful.
bool TryUnwind(bool at_top_frame,
CONTEXT* context,
const ModuleCache::Module* module);
private:
// This function is for internal and test purposes only.
Win32StackFrameUnwinder(std::unique_ptr<UnwindFunctions> unwind_functions);
friend class Win32StackFrameUnwinderTest;
std::unique_ptr<UnwindFunctions> unwind_functions_;
DISALLOW_COPY_AND_ASSIGN(Win32StackFrameUnwinder);
};
} // namespace base
#endif // BASE_PROFILER_WIN32_STACK_FRAME_UNWINDER_H_