Repo created

This commit is contained in:
Fr4nz D13trich 2025-11-22 14:04:28 +01:00
parent 81b91f4139
commit f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions

View file

@ -0,0 +1,3 @@
alph@chromium.org
erikchen@chromium.org
per-file module_cache*=wittman@chromium.org

View file

@ -0,0 +1,64 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include <limits>
#include "base/bits.h"
namespace base {
LockFreeAddressHashSet::LockFreeAddressHashSet(size_t buckets_count)
: buckets_(buckets_count), bucket_mask_(buckets_count - 1) {
DCHECK(bits::IsPowerOfTwo(buckets_count));
DCHECK_LE(bucket_mask_, std::numeric_limits<uint32_t>::max());
}
LockFreeAddressHashSet::~LockFreeAddressHashSet() {
for (std::atomic<Node*>& bucket : buckets_) {
Node* node = bucket.load(std::memory_order_relaxed);
while (node) {
Node* next = node->next;
delete node;
node = next;
}
}
}
void LockFreeAddressHashSet::Insert(void* key) {
DCHECK_NE(key, nullptr);
CHECK(!Contains(key));
++size_;
// Note: There's no need to use std::atomic_compare_exchange here,
// as we do not support concurrent inserts, so values cannot change midair.
std::atomic<Node*>& bucket = buckets_[Hash(key) & bucket_mask_];
Node* node = bucket.load(std::memory_order_relaxed);
// First iterate over the bucket nodes and try to reuse an empty one if found.
for (; node != nullptr; node = node->next) {
if (node->key.load(std::memory_order_relaxed) == nullptr) {
node->key.store(key, std::memory_order_relaxed);
return;
}
}
// There are no empty nodes to reuse left in the bucket.
// Create a new node first...
Node* new_node = new Node(key, bucket.load(std::memory_order_relaxed));
// ... and then publish the new chain.
bucket.store(new_node, std::memory_order_release);
}
void LockFreeAddressHashSet::Copy(const LockFreeAddressHashSet& other) {
DCHECK_EQ(0u, size());
for (const std::atomic<Node*>& bucket : other.buckets_) {
for (Node* node = bucket.load(std::memory_order_relaxed); node;
node = node->next) {
void* key = node->key.load(std::memory_order_relaxed);
if (key)
Insert(key);
}
}
}
} // namespace base

View file

@ -0,0 +1,136 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
#define BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
#include <atomic>
#include <cstdint>
#include <vector>
#include "base/compiler_specific.h"
#include "base/logging.h"
namespace base {
// A hash set container that provides lock-free version of |Contains| operation.
// It does not support concurrent write operations |Insert| and |Remove|.
// All write operations if performed from multiple threads must be properly
// guarded with a lock.
// |Contains| method can be executed concurrently with other |Insert|, |Remove|,
// or |Contains| even over the same key.
// However, please note the result of concurrent execution of |Contains|
// with |Insert| or |Remove| over the same key is racy.
//
// The hash set never rehashes, so the number of buckets stays the same
// for the lifetime of the set.
//
// Internally the hashset is implemented as a vector of N buckets
// (N has to be a power of 2). Each bucket holds a single-linked list of
// nodes each corresponding to a key.
// It is not possible to really delete nodes from the list as there might
// be concurrent reads being executed over the node. The |Remove| operation
// just marks the node as empty by placing nullptr into its key field.
// Consequent |Insert| operations may reuse empty nodes when possible.
//
// The structure of the hashset for N buckets is the following:
// 0: {*}--> {key1,*}--> {key2,*}--> NULL
// 1: {*}--> NULL
// 2: {*}--> {NULL,*}--> {key3,*}--> {key4,*}--> NULL
// ...
// N-1: {*}--> {keyM,*}--> NULL
class BASE_EXPORT LockFreeAddressHashSet {
public:
explicit LockFreeAddressHashSet(size_t buckets_count);
~LockFreeAddressHashSet();
// Checks if the |key| is in the set. Can be executed concurrently with
// |Insert|, |Remove|, and |Contains| operations.
ALWAYS_INLINE bool Contains(void* key) const;
// Removes the |key| from the set. The key must be present in the set before
// the invocation.
// Concurrent execution of |Insert|, |Remove|, or |Copy| is not supported.
ALWAYS_INLINE void Remove(void* key);
// Inserts the |key| into the set. The key must not be present in the set
// before the invocation.
// Concurrent execution of |Insert|, |Remove|, or |Copy| is not supported.
void Insert(void* key);
// Copies contents of |other| set into the current set. The current set
// must be empty before the call.
// Concurrent execution of |Insert|, |Remove|, or |Copy| is not supported.
void Copy(const LockFreeAddressHashSet& other);
size_t buckets_count() const { return buckets_.size(); }
size_t size() const { return size_; }
// Returns the average bucket utilization.
float load_factor() const { return 1.f * size() / buckets_.size(); }
private:
friend class LockFreeAddressHashSetTest;
struct Node {
ALWAYS_INLINE Node(void* key, Node* next);
std::atomic<void*> key;
Node* next;
};
ALWAYS_INLINE static uint32_t Hash(void* key);
ALWAYS_INLINE Node* FindNode(void* key) const;
std::vector<std::atomic<Node*>> buckets_;
int size_ = 0;
const size_t bucket_mask_;
};
ALWAYS_INLINE LockFreeAddressHashSet::Node::Node(void* key, Node* next)
: next(next) {
this->key.store(key, std::memory_order_relaxed);
}
ALWAYS_INLINE bool LockFreeAddressHashSet::Contains(void* key) const {
return FindNode(key) != nullptr;
}
ALWAYS_INLINE void LockFreeAddressHashSet::Remove(void* key) {
Node* node = FindNode(key);
DCHECK_NE(node, nullptr);
// We can never delete the node, nor detach it from the current bucket
// as there may always be another thread currently iterating over it.
// Instead we just mark it as empty, so |Insert| can reuse it later.
node->key.store(nullptr, std::memory_order_relaxed);
--size_;
}
ALWAYS_INLINE LockFreeAddressHashSet::Node* LockFreeAddressHashSet::FindNode(
void* key) const {
DCHECK_NE(key, nullptr);
const std::atomic<Node*>& bucket = buckets_[Hash(key) & bucket_mask_];
// It's enough to use std::memory_order_consume ordering here, as the
// node->next->...->next loads form dependency chain.
// However std::memory_order_consume is temporary deprecated in C++17.
// See https://isocpp.org/files/papers/p0636r0.html#removed
// Make use of more strong std::memory_order_acquire for now.
for (Node* node = bucket.load(std::memory_order_acquire); node != nullptr;
node = node->next) {
if (node->key.load(std::memory_order_relaxed) == key)
return node;
}
return nullptr;
}
// static
ALWAYS_INLINE uint32_t LockFreeAddressHashSet::Hash(void* key) {
// A simple fast hash function for addresses.
constexpr uintptr_t random_bits = static_cast<uintptr_t>(0x4bfdb9df5a6f243b);
uint64_t k = reinterpret_cast<uintptr_t>(key);
return static_cast<uint32_t>((k * random_bits) >> 32);
}
} // namespace base
#endif // BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_

View file

@ -0,0 +1,577 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/sampling_heap_profiler/poisson_allocation_sampler.h"
#include <algorithm>
#include <atomic>
#include <cmath>
#include <memory>
#include <utility>
#include "base/allocator/allocator_shim.h"
#include "base/allocator/buildflags.h"
#include "base/allocator/partition_allocator/partition_alloc.h"
#include "base/macros.h"
#include "base/no_destructor.h"
#include "base/partition_alloc_buildflags.h"
#include "base/rand_util.h"
#include "build/build_config.h"
#if defined(OS_MACOSX) || defined(OS_ANDROID)
#include <pthread.h>
#endif
namespace base {
using allocator::AllocatorDispatch;
namespace {
#if defined(OS_MACOSX) || defined(OS_ANDROID)
// The macOS implementation of libmalloc sometimes calls malloc recursively,
// delegating allocations between zones. That causes our hooks being called
// twice. The scoped guard allows us to detect that.
//
// Besides that the implementations of thread_local on macOS and Android
// seem to allocate memory lazily on the first access to thread_local variables.
// Make use of pthread TLS instead of C++ thread_local there.
class ReentryGuard {
public:
ReentryGuard() : allowed_(!pthread_getspecific(entered_key_)) {
pthread_setspecific(entered_key_, reinterpret_cast<void*>(true));
}
~ReentryGuard() {
if (LIKELY(allowed_))
pthread_setspecific(entered_key_, nullptr);
}
operator bool() { return allowed_; }
static void Init() {
int error = pthread_key_create(&entered_key_, nullptr);
CHECK(!error);
}
private:
bool allowed_;
static pthread_key_t entered_key_;
};
pthread_key_t ReentryGuard::entered_key_;
#else
class ReentryGuard {
public:
operator bool() { return true; }
static void Init() {}
};
#endif
const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
// Notes on TLS usage:
//
// * There's no safe way to use TLS in malloc() as both C++ thread_local and
// pthread do not pose any guarantees on whether they allocate or not.
// * We think that we can safely use thread_local w/o re-entrancy guard because
// the compiler will use "tls static access model" for static builds of
// Chrome [https://www.uclibc.org/docs/tls.pdf].
// But there's no guarantee that this will stay true, and in practice
// it seems to have problems on macOS/Android. These platforms do allocate
// on the very first access to a thread_local on each thread.
// * Directly using/warming-up platform TLS seems to work on all platforms,
// but is also not guaranteed to stay true. We make use of it for reentrancy
// guards on macOS/Android.
// * We cannot use Windows Tls[GS]etValue API as it modifies the result of
// GetLastError.
//
// Android thread_local seems to be using __emutls_get_address from libgcc:
// https://github.com/gcc-mirror/gcc/blob/master/libgcc/emutls.c
// macOS version is based on _tlv_get_addr from dyld:
// https://opensource.apple.com/source/dyld/dyld-635.2/src/threadLocalHelpers.s.auto.html
// The guard protects against reentering on platforms other the macOS and
// Android.
thread_local bool g_internal_reentry_guard;
// Accumulated bytes towards sample thread local key.
thread_local intptr_t g_accumulated_bytes_tls;
// Used as a workaround to avoid bias from muted samples. See
// ScopedMuteThreadSamples for more details.
thread_local intptr_t g_accumulated_bytes_tls_snapshot;
const intptr_t kAccumulatedBytesOffset = 1 << 29;
// A boolean used to distinguish first allocation on a thread:
// false - first allocation on the thread;
// true - otherwise.
// Since g_accumulated_bytes_tls is initialized with zero the very first
// allocation on a thread would always trigger the sample, thus skewing the
// profile towards such allocations. To mitigate that we use the flag to
// ensure the first allocation is properly accounted.
thread_local bool g_sampling_interval_initialized_tls;
// Controls if sample intervals should not be randomized. Used for testing.
bool g_deterministic;
// A positive value if profiling is running, otherwise it's zero.
std::atomic_bool g_running;
// Pointer to the current |LockFreeAddressHashSet|.
std::atomic<LockFreeAddressHashSet*> g_sampled_addresses_set;
// Sampling interval parameter, the mean value for intervals between samples.
std::atomic_size_t g_sampling_interval{kDefaultSamplingIntervalBytes};
void (*g_hooks_install_callback)();
std::atomic_bool g_hooks_installed;
void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) {
ReentryGuard guard;
void* address = self->next->alloc_function(self->next, size, context);
if (LIKELY(guard)) {
PoissonAllocationSampler::RecordAlloc(
address, size, PoissonAllocationSampler::kMalloc, nullptr);
}
return address;
}
void* AllocZeroInitializedFn(const AllocatorDispatch* self,
size_t n,
size_t size,
void* context) {
ReentryGuard guard;
void* address =
self->next->alloc_zero_initialized_function(self->next, n, size, context);
if (LIKELY(guard)) {
PoissonAllocationSampler::RecordAlloc(
address, n * size, PoissonAllocationSampler::kMalloc, nullptr);
}
return address;
}
void* AllocAlignedFn(const AllocatorDispatch* self,
size_t alignment,
size_t size,
void* context) {
ReentryGuard guard;
void* address =
self->next->alloc_aligned_function(self->next, alignment, size, context);
if (LIKELY(guard)) {
PoissonAllocationSampler::RecordAlloc(
address, size, PoissonAllocationSampler::kMalloc, nullptr);
}
return address;
}
void* ReallocFn(const AllocatorDispatch* self,
void* address,
size_t size,
void* context) {
ReentryGuard guard;
// Note: size == 0 actually performs free.
PoissonAllocationSampler::RecordFree(address);
address = self->next->realloc_function(self->next, address, size, context);
if (LIKELY(guard)) {
PoissonAllocationSampler::RecordAlloc(
address, size, PoissonAllocationSampler::kMalloc, nullptr);
}
return address;
}
void FreeFn(const AllocatorDispatch* self, void* address, void* context) {
// Note: The RecordFree should be called before free_function
// (here and in other places).
// That is because we need to remove the recorded allocation sample before
// free_function, as once the latter is executed the address becomes available
// and can be allocated by another thread. That would be racy otherwise.
PoissonAllocationSampler::RecordFree(address);
self->next->free_function(self->next, address, context);
}
size_t GetSizeEstimateFn(const AllocatorDispatch* self,
void* address,
void* context) {
return self->next->get_size_estimate_function(self->next, address, context);
}
unsigned BatchMallocFn(const AllocatorDispatch* self,
size_t size,
void** results,
unsigned num_requested,
void* context) {
ReentryGuard guard;
unsigned num_allocated = self->next->batch_malloc_function(
self->next, size, results, num_requested, context);
if (LIKELY(guard)) {
for (unsigned i = 0; i < num_allocated; ++i) {
PoissonAllocationSampler::RecordAlloc(
results[i], size, PoissonAllocationSampler::kMalloc, nullptr);
}
}
return num_allocated;
}
void BatchFreeFn(const AllocatorDispatch* self,
void** to_be_freed,
unsigned num_to_be_freed,
void* context) {
for (unsigned i = 0; i < num_to_be_freed; ++i)
PoissonAllocationSampler::RecordFree(to_be_freed[i]);
self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed,
context);
}
void FreeDefiniteSizeFn(const AllocatorDispatch* self,
void* address,
size_t size,
void* context) {
PoissonAllocationSampler::RecordFree(address);
self->next->free_definite_size_function(self->next, address, size, context);
}
static void* AlignedMallocFn(const AllocatorDispatch* self,
size_t size,
size_t alignment,
void* context) {
ReentryGuard guard;
void* address =
self->next->aligned_malloc_function(self->next, size, alignment, context);
if (LIKELY(guard)) {
PoissonAllocationSampler::RecordAlloc(
address, size, PoissonAllocationSampler::kMalloc, nullptr);
}
return address;
}
static void* AlignedReallocFn(const AllocatorDispatch* self,
void* address,
size_t size,
size_t alignment,
void* context) {
ReentryGuard guard;
// Note: size == 0 actually performs free.
PoissonAllocationSampler::RecordFree(address);
address = self->next->aligned_realloc_function(self->next, address, size,
alignment, context);
if (LIKELY(guard)) {
PoissonAllocationSampler::RecordAlloc(
address, size, PoissonAllocationSampler::kMalloc, nullptr);
}
return address;
}
static void AlignedFreeFn(const AllocatorDispatch* self,
void* address,
void* context) {
PoissonAllocationSampler::RecordFree(address);
self->next->aligned_free_function(self->next, address, context);
}
AllocatorDispatch g_allocator_dispatch = {&AllocFn,
&AllocZeroInitializedFn,
&AllocAlignedFn,
&ReallocFn,
&FreeFn,
&GetSizeEstimateFn,
&BatchMallocFn,
&BatchFreeFn,
&FreeDefiniteSizeFn,
&AlignedMallocFn,
&AlignedReallocFn,
&AlignedFreeFn,
nullptr};
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
void PartitionAllocHook(void* address, size_t size, const char* type) {
PoissonAllocationSampler::RecordAlloc(
address, size, PoissonAllocationSampler::kPartitionAlloc, type);
}
void PartitionFreeHook(void* address) {
PoissonAllocationSampler::RecordFree(address);
}
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
} // namespace
PoissonAllocationSampler::ScopedMuteThreadSamples::ScopedMuteThreadSamples() {
DCHECK(!g_internal_reentry_guard);
g_internal_reentry_guard = true;
// We mute thread samples immediately after taking a sample, which is when we
// reset g_accumulated_bytes_tls. This breaks the random sampling requirement
// of the poisson process, and causes us to systematically overcount all other
// allocations. That's because muted allocations rarely trigger a sample
// [which would cause them to be ignored] since they occur right after
// g_accumulated_bytes_tls is reset.
//
// To counteract this, we drop g_accumulated_bytes_tls by a large, fixed
// amount to lower the probability that a sample is taken to close to 0. Then
// we reset it after we're done muting thread samples.
g_accumulated_bytes_tls_snapshot = g_accumulated_bytes_tls;
g_accumulated_bytes_tls -= kAccumulatedBytesOffset;
}
PoissonAllocationSampler::ScopedMuteThreadSamples::~ScopedMuteThreadSamples() {
DCHECK(g_internal_reentry_guard);
g_internal_reentry_guard = false;
g_accumulated_bytes_tls = g_accumulated_bytes_tls_snapshot;
}
// static
bool PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted() {
return g_internal_reentry_guard;
}
PoissonAllocationSampler* PoissonAllocationSampler::instance_;
PoissonAllocationSampler::PoissonAllocationSampler() {
CHECK_EQ(nullptr, instance_);
instance_ = this;
Init();
auto* sampled_addresses = new LockFreeAddressHashSet(64);
g_sampled_addresses_set.store(sampled_addresses, std::memory_order_release);
}
// static
void PoissonAllocationSampler::Init() {
static bool init_once = []() {
ReentryGuard::Init();
return true;
}();
ignore_result(init_once);
}
// static
void PoissonAllocationSampler::InstallAllocatorHooksOnce() {
static bool hook_installed = InstallAllocatorHooks();
ignore_result(hook_installed);
}
// static
bool PoissonAllocationSampler::InstallAllocatorHooks() {
#if BUILDFLAG(USE_ALLOCATOR_SHIM)
allocator::InsertAllocatorDispatch(&g_allocator_dispatch);
#else
// If the allocator shim isn't available, then we don't install any hooks.
// There's no point in printing an error message, since this can regularly
// happen for tests.
ignore_result(g_allocator_dispatch);
#endif // BUILDFLAG(USE_ALLOCATOR_SHIM)
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
PartitionAllocHooks::SetObserverHooks(&PartitionAllocHook,
&PartitionFreeHook);
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
bool expected = false;
if (!g_hooks_installed.compare_exchange_strong(expected, true))
g_hooks_install_callback();
return true;
}
// static
void PoissonAllocationSampler::SetHooksInstallCallback(
void (*hooks_install_callback)()) {
CHECK(!g_hooks_install_callback && hooks_install_callback);
g_hooks_install_callback = hooks_install_callback;
bool expected = false;
if (!g_hooks_installed.compare_exchange_strong(expected, true))
g_hooks_install_callback();
}
void PoissonAllocationSampler::SetSamplingInterval(size_t sampling_interval) {
// TODO(alph): Reset the sample being collected if running.
g_sampling_interval = sampling_interval;
}
// static
size_t PoissonAllocationSampler::GetNextSampleInterval(size_t interval) {
if (UNLIKELY(g_deterministic))
return interval;
// We sample with a Poisson process, with constant average sampling
// interval. This follows the exponential probability distribution with
// parameter λ = 1/interval where |interval| is the average number of bytes
// between samples.
// Let u be a uniformly distributed random number between 0 and 1, then
// next_sample = -ln(u) / λ
double uniform = RandDouble();
double value = -log(uniform) * interval;
size_t min_value = sizeof(intptr_t);
// We limit the upper bound of a sample interval to make sure we don't have
// huge gaps in the sampling stream. Probability of the upper bound gets hit
// is exp(-20) ~ 2e-9, so it should not skew the distribution.
size_t max_value = interval * 20;
if (UNLIKELY(value < min_value))
return min_value;
if (UNLIKELY(value > max_value))
return max_value;
return static_cast<size_t>(value);
}
// static
void PoissonAllocationSampler::RecordAlloc(void* address,
size_t size,
AllocatorType type,
const char* context) {
g_accumulated_bytes_tls += size;
intptr_t accumulated_bytes = g_accumulated_bytes_tls;
if (LIKELY(accumulated_bytes < 0))
return;
if (UNLIKELY(!g_running.load(std::memory_order_relaxed))) {
// Sampling is in fact disabled. Reset the state of the sampler.
// We do this check off the fast-path, because it's quite a rare state when
// allocation hooks are installed but the sampler is not running.
g_sampling_interval_initialized_tls = false;
g_accumulated_bytes_tls = 0;
return;
}
instance_->DoRecordAlloc(accumulated_bytes, size, address, type, context);
}
void PoissonAllocationSampler::DoRecordAlloc(intptr_t accumulated_bytes,
size_t size,
void* address,
AllocatorType type,
const char* context) {
// Failed allocation? Skip the sample.
if (UNLIKELY(!address))
return;
size_t mean_interval = g_sampling_interval.load(std::memory_order_relaxed);
if (UNLIKELY(!g_sampling_interval_initialized_tls)) {
g_sampling_interval_initialized_tls = true;
// This is the very first allocation on the thread. It always makes it
// passing the condition at |RecordAlloc|, because g_accumulated_bytes_tls
// is initialized with zero due to TLS semantics.
// Generate proper sampling interval instance and make sure the allocation
// has indeed crossed the threshold before counting it as a sample.
accumulated_bytes -= GetNextSampleInterval(mean_interval);
if (accumulated_bytes < 0) {
g_accumulated_bytes_tls = accumulated_bytes;
return;
}
}
size_t samples = accumulated_bytes / mean_interval;
accumulated_bytes %= mean_interval;
do {
accumulated_bytes -= GetNextSampleInterval(mean_interval);
++samples;
} while (accumulated_bytes >= 0);
g_accumulated_bytes_tls = accumulated_bytes;
if (UNLIKELY(ScopedMuteThreadSamples::IsMuted()))
return;
ScopedMuteThreadSamples no_reentrancy_scope;
std::vector<SamplesObserver*> observers_copy;
{
AutoLock lock(mutex_);
// TODO(alph): Sometimes RecordAlloc is called twice in a row without
// a RecordFree in between. Investigate it.
if (sampled_addresses_set().Contains(address))
return;
sampled_addresses_set().Insert(address);
BalanceAddressesHashSet();
observers_copy = observers_;
}
size_t total_allocated = mean_interval * samples;
for (auto* observer : observers_copy)
observer->SampleAdded(address, size, total_allocated, type, context);
}
void PoissonAllocationSampler::DoRecordFree(void* address) {
if (UNLIKELY(ScopedMuteThreadSamples::IsMuted()))
return;
// There is a rare case on macOS and Android when the very first thread_local
// access in ScopedMuteThreadSamples constructor may allocate and
// thus reenter DoRecordAlloc. However the call chain won't build up further
// as RecordAlloc accesses are guarded with pthread TLS-based ReentryGuard.
ScopedMuteThreadSamples no_reentrancy_scope;
std::vector<SamplesObserver*> observers_copy;
{
AutoLock lock(mutex_);
observers_copy = observers_;
sampled_addresses_set().Remove(address);
}
for (auto* observer : observers_copy)
observer->SampleRemoved(address);
}
void PoissonAllocationSampler::BalanceAddressesHashSet() {
// Check if the load_factor of the current addresses hash set becomes higher
// than 1, allocate a new twice larger one, copy all the data,
// and switch to using it.
// During the copy process no other writes are made to both sets
// as it's behind the lock.
// All the readers continue to use the old one until the atomic switch
// process takes place.
LockFreeAddressHashSet& current_set = sampled_addresses_set();
if (current_set.load_factor() < 1)
return;
auto new_set =
std::make_unique<LockFreeAddressHashSet>(current_set.buckets_count() * 2);
new_set->Copy(current_set);
// Atomically switch all the new readers to the new set.
g_sampled_addresses_set.store(new_set.release(), std::memory_order_release);
// We leak the older set because we still have to keep all the old maps alive
// as there might be reader threads that have already obtained the map,
// but haven't yet managed to access it.
}
// static
LockFreeAddressHashSet& PoissonAllocationSampler::sampled_addresses_set() {
return *g_sampled_addresses_set.load(std::memory_order_acquire);
}
// static
PoissonAllocationSampler* PoissonAllocationSampler::Get() {
static NoDestructor<PoissonAllocationSampler> instance;
return instance.get();
}
// static
void PoissonAllocationSampler::SuppressRandomnessForTest(bool suppress) {
g_deterministic = suppress;
}
void PoissonAllocationSampler::AddSamplesObserver(SamplesObserver* observer) {
ScopedMuteThreadSamples no_reentrancy_scope;
AutoLock lock(mutex_);
DCHECK(std::find(observers_.begin(), observers_.end(), observer) ==
observers_.end());
observers_.push_back(observer);
InstallAllocatorHooksOnce();
g_running = !observers_.empty();
}
void PoissonAllocationSampler::RemoveSamplesObserver(
SamplesObserver* observer) {
ScopedMuteThreadSamples no_reentrancy_scope;
AutoLock lock(mutex_);
auto it = std::find(observers_.begin(), observers_.end(), observer);
DCHECK(it != observers_.end());
observers_.erase(it);
g_running = !observers_.empty();
}
} // namespace base

View file

@ -0,0 +1,142 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
#define BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
#include <vector>
#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/macros.h"
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include "base/synchronization/lock.h"
#include "base/thread_annotations.h"
namespace base {
template <typename T>
class NoDestructor;
// This singleton class implements Poisson sampling of the incoming allocations
// stream. It hooks onto base::allocator and base::PartitionAlloc.
// An extra custom allocator can be hooked via SetHooksInstallCallback method.
// The only control parameter is sampling interval that controls average value
// of the sampling intervals. The actual intervals between samples are
// randomized using Poisson distribution to mitigate patterns in the allocation
// stream.
// Once accumulated allocation sizes fill up the current sample interval,
// a sample is generated and sent to the observers via |SampleAdded| call.
// When the corresponding memory that triggered the sample is freed observers
// get notified with |SampleRemoved| call.
//
class BASE_EXPORT PoissonAllocationSampler {
public:
enum AllocatorType : uint32_t { kMalloc, kPartitionAlloc, kBlinkGC };
class SamplesObserver {
public:
virtual ~SamplesObserver() = default;
virtual void SampleAdded(void* address,
size_t size,
size_t total,
AllocatorType type,
const char* context) = 0;
virtual void SampleRemoved(void* address) = 0;
};
// The instance of this class makes sampler do not report samples generated
// within the object scope for the current thread.
// It allows observers to allocate/deallocate memory while holding a lock
// without a chance to get into reentrancy problems.
// The current implementation doesn't support ScopedMuteThreadSamples nesting.
class BASE_EXPORT ScopedMuteThreadSamples {
public:
ScopedMuteThreadSamples();
~ScopedMuteThreadSamples();
static bool IsMuted();
};
// Must be called early during the process initialization. It creates and
// reserves a TLS slot.
static void Init();
// This is an entry point for plugging in an external allocator.
// Profiler will invoke the provided callback upon initialization.
// The callback should install hooks onto the corresponding memory allocator
// and make them invoke PoissonAllocationSampler::RecordAlloc and
// PoissonAllocationSampler::RecordFree upon corresponding allocation events.
//
// If the method is called after profiler is initialized, the callback
// is invoked right away.
static void SetHooksInstallCallback(void (*hooks_install_callback)());
void AddSamplesObserver(SamplesObserver*);
// Note: After an observer is removed it is still possible to receive
// a notification to that observer. This is not a problem currently as
// the only client of this interface is the base::SamplingHeapProfiler,
// which is a singleton.
// If there's a need for this functionality in the future, one might
// want to put observers notification loop under a reader-writer lock.
void RemoveSamplesObserver(SamplesObserver*);
void SetSamplingInterval(size_t sampling_interval);
void SuppressRandomnessForTest(bool suppress);
static void RecordAlloc(void* address,
size_t,
AllocatorType,
const char* context);
ALWAYS_INLINE static void RecordFree(void* address);
static PoissonAllocationSampler* Get();
private:
PoissonAllocationSampler();
~PoissonAllocationSampler() = delete;
static void InstallAllocatorHooksOnce();
static bool InstallAllocatorHooks();
static size_t GetNextSampleInterval(size_t base_interval);
static LockFreeAddressHashSet& sampled_addresses_set();
void DoRecordAlloc(intptr_t accumulated_bytes,
size_t size,
void* address,
AllocatorType type,
const char* context);
void DoRecordFree(void* address);
void BalanceAddressesHashSet();
Lock mutex_;
// The |observers_| list is guarded by |mutex_|, however a copy of it
// is made before invoking the observers (to avoid performing expensive
// operations under the lock) as such the SamplesObservers themselves need
// to be thread-safe and support being invoked racily after
// RemoveSamplesObserver().
std::vector<SamplesObserver*> observers_ GUARDED_BY(mutex_);
static PoissonAllocationSampler* instance_;
friend class NoDestructor<PoissonAllocationSampler>;
friend class SamplingHeapProfilerTest;
friend class ScopedMuteThreadSamples;
DISALLOW_COPY_AND_ASSIGN(PoissonAllocationSampler);
};
// static
ALWAYS_INLINE void PoissonAllocationSampler::RecordFree(void* address) {
if (UNLIKELY(address == nullptr))
return;
if (UNLIKELY(sampled_addresses_set().Contains(address)))
instance_->DoRecordFree(address);
}
} // namespace base
#endif // BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_

View file

@ -0,0 +1,298 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/sampling_heap_profiler/sampling_heap_profiler.h"
#include <algorithm>
#include <cmath>
#include <utility>
#include "base/allocator/allocator_shim.h"
#include "base/allocator/buildflags.h"
#include "base/allocator/partition_allocator/partition_alloc.h"
#include "base/bind.h"
#include "base/debug/stack_trace.h"
#include "base/macros.h"
#include "base/no_destructor.h"
#include "base/partition_alloc_buildflags.h"
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
#include "base/threading/thread_local_storage.h"
#include "base/trace_event/heap_profiler_allocation_context_tracker.h"
#include "build/build_config.h"
#if defined(OS_MACOSX)
#include <pthread.h>
#endif
#if defined(OS_LINUX) || defined(OS_ANDROID)
#include <sys/prctl.h>
#endif
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
defined(OFFICIAL_BUILD)
#include "base/trace_event/cfi_backtrace_android.h"
#endif
namespace base {
constexpr uint32_t kMaxStackEntries = 256;
namespace {
// If a thread name has been set from ThreadIdNameManager, use that. Otherwise,
// gets the thread name from kernel if available or returns a string with id.
// This function intentionally leaks the allocated strings since they are used
// to tag allocations even after the thread dies.
const char* GetAndLeakThreadName() {
const char* thread_name =
base::ThreadIdNameManager::GetInstance()->GetNameForCurrentThread();
if (thread_name && *thread_name != '\0')
return thread_name;
// prctl requires 16 bytes, snprintf requires 19, pthread_getname_np requires
// 64 on macOS, see PlatformThread::SetName in platform_thread_mac.mm.
constexpr size_t kBufferLen = 64;
char name[kBufferLen];
#if defined(OS_LINUX) || defined(OS_ANDROID)
// If the thread name is not set, try to get it from prctl. Thread name might
// not be set in cases where the thread started before heap profiling was
// enabled.
int err = prctl(PR_GET_NAME, name);
if (!err)
return strdup(name);
#elif defined(OS_MACOSX)
int err = pthread_getname_np(pthread_self(), name, kBufferLen);
if (err == 0 && *name != '\0')
return strdup(name);
#endif // defined(OS_LINUX) || defined(OS_ANDROID)
// Use tid if we don't have a thread name.
snprintf(name, sizeof(name), "Thread %lu",
static_cast<unsigned long>(base::PlatformThread::CurrentId()));
return strdup(name);
}
const char* UpdateAndGetThreadName(const char* name) {
static thread_local const char* thread_name;
if (name)
thread_name = name;
if (!thread_name)
thread_name = GetAndLeakThreadName();
return thread_name;
}
} // namespace
SamplingHeapProfiler::Sample::Sample(size_t size,
size_t total,
uint32_t ordinal)
: size(size), total(total), ordinal(ordinal) {}
SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
SamplingHeapProfiler::Sample::~Sample() = default;
SamplingHeapProfiler::SamplingHeapProfiler() = default;
SamplingHeapProfiler::~SamplingHeapProfiler() {
if (record_thread_names_)
base::ThreadIdNameManager::GetInstance()->RemoveObserver(this);
}
uint32_t SamplingHeapProfiler::Start() {
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
defined(OFFICIAL_BUILD)
if (!trace_event::CFIBacktraceAndroid::GetInitializedInstance()
->can_unwind_stack_frames()) {
LOG(WARNING) << "Sampling heap profiler: Stack unwinding is not available.";
return 0;
}
#endif
AutoLock lock(start_stop_mutex_);
if (!running_sessions_++)
PoissonAllocationSampler::Get()->AddSamplesObserver(this);
return last_sample_ordinal_;
}
void SamplingHeapProfiler::Stop() {
AutoLock lock(start_stop_mutex_);
DCHECK_GT(running_sessions_, 0);
if (!--running_sessions_)
PoissonAllocationSampler::Get()->RemoveSamplesObserver(this);
}
void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
PoissonAllocationSampler::Get()->SetSamplingInterval(sampling_interval);
}
void SamplingHeapProfiler::SetRecordThreadNames(bool value) {
if (record_thread_names_ == value)
return;
record_thread_names_ = value;
if (value) {
base::ThreadIdNameManager::GetInstance()->AddObserver(this);
} else {
base::ThreadIdNameManager::GetInstance()->RemoveObserver(this);
}
}
// static
const char* SamplingHeapProfiler::CachedThreadName() {
return UpdateAndGetThreadName(nullptr);
}
// static
void** SamplingHeapProfiler::CaptureStackTrace(void** frames,
size_t max_entries,
size_t* count) {
// Skip top frames as they correspond to the profiler itself.
size_t skip_frames = 3;
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
defined(OFFICIAL_BUILD)
size_t frame_count =
base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind(
const_cast<const void**>(frames), max_entries);
#elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
size_t frame_count = base::debug::TraceStackFramePointers(
const_cast<const void**>(frames), max_entries, skip_frames);
skip_frames = 0;
#else
// Fall-back to capturing the stack with base::debug::CollectStackTrace,
// which is likely slower, but more reliable.
size_t frame_count =
base::debug::CollectStackTrace(const_cast<void**>(frames), max_entries);
#endif
skip_frames = std::min(skip_frames, frame_count);
*count = frame_count - skip_frames;
return frames + skip_frames;
}
void SamplingHeapProfiler::SampleAdded(
void* address,
size_t size,
size_t total,
PoissonAllocationSampler::AllocatorType type,
const char* context) {
// CaptureStack and allocation context tracking may use TLS.
// Bail out if it has been destroyed.
if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
return;
DCHECK(PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
Sample sample(size, total, ++last_sample_ordinal_);
sample.allocator = type;
using CaptureMode = trace_event::AllocationContextTracker::CaptureMode;
CaptureMode capture_mode =
trace_event::AllocationContextTracker::capture_mode();
if (capture_mode == CaptureMode::PSEUDO_STACK ||
capture_mode == CaptureMode::MIXED_STACK) {
CaptureMixedStack(context, &sample);
} else {
CaptureNativeStack(context, &sample);
}
AutoLock lock(mutex_);
RecordString(sample.context);
samples_.emplace(address, std::move(sample));
}
void SamplingHeapProfiler::CaptureMixedStack(const char* context,
Sample* sample) {
auto* tracker =
trace_event::AllocationContextTracker::GetInstanceForCurrentThread();
if (!tracker)
return;
trace_event::AllocationContext allocation_context;
if (!tracker->GetContextSnapshot(&allocation_context))
return;
const base::trace_event::Backtrace& backtrace = allocation_context.backtrace;
CHECK_LE(backtrace.frame_count, kMaxStackEntries);
std::vector<void*> stack;
stack.reserve(backtrace.frame_count);
AutoLock lock(mutex_); // Needed for RecordString call.
for (int i = base::checked_cast<int>(backtrace.frame_count) - 1; i >= 0;
--i) {
const base::trace_event::StackFrame& frame = backtrace.frames[i];
if (frame.type != base::trace_event::StackFrame::Type::PROGRAM_COUNTER)
RecordString(static_cast<const char*>(frame.value));
stack.push_back(const_cast<void*>(frame.value));
}
sample->stack = std::move(stack);
if (!context)
context = allocation_context.type_name;
sample->context = context;
}
void SamplingHeapProfiler::CaptureNativeStack(const char* context,
Sample* sample) {
void* stack[kMaxStackEntries];
size_t frame_count;
// One frame is reserved for the thread name.
void** first_frame =
CaptureStackTrace(stack, kMaxStackEntries - 1, &frame_count);
DCHECK_LT(frame_count, kMaxStackEntries);
sample->stack.assign(first_frame, first_frame + frame_count);
if (record_thread_names_)
sample->thread_name = CachedThreadName();
if (!context) {
const auto* tracker =
trace_event::AllocationContextTracker::GetInstanceForCurrentThread();
if (tracker)
context = tracker->TaskContext();
}
sample->context = context;
}
const char* SamplingHeapProfiler::RecordString(const char* string) {
return string ? *strings_.insert(string).first : nullptr;
}
void SamplingHeapProfiler::SampleRemoved(void* address) {
DCHECK(base::PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
base::AutoLock lock(mutex_);
samples_.erase(address);
}
std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
uint32_t profile_id) {
// Make sure the sampler does not invoke |SampleAdded| or |SampleRemoved|
// on this thread. Otherwise it could have end up with a deadlock.
// See crbug.com/882495
PoissonAllocationSampler::ScopedMuteThreadSamples no_samples_scope;
AutoLock lock(mutex_);
std::vector<Sample> samples;
samples.reserve(samples_.size());
for (auto& it : samples_) {
Sample& sample = it.second;
if (sample.ordinal > profile_id)
samples.push_back(sample);
}
return samples;
}
std::vector<const char*> SamplingHeapProfiler::GetStrings() {
PoissonAllocationSampler::ScopedMuteThreadSamples no_samples_scope;
AutoLock lock(mutex_);
return std::vector<const char*>(strings_.begin(), strings_.end());
}
// static
void SamplingHeapProfiler::Init() {
PoissonAllocationSampler::Init();
}
// static
SamplingHeapProfiler* SamplingHeapProfiler::Get() {
static NoDestructor<SamplingHeapProfiler> instance;
return instance.get();
}
void SamplingHeapProfiler::OnThreadNameChanged(const char* name) {
UpdateAndGetThreadName(name);
}
} // namespace base

View file

@ -0,0 +1,155 @@
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_
#define BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_
#include <atomic>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "base/base_export.h"
#include "base/macros.h"
#include "base/sampling_heap_profiler/poisson_allocation_sampler.h"
#include "base/synchronization/lock.h"
#include "base/thread_annotations.h"
#include "base/threading/thread_id_name_manager.h"
namespace base {
template <typename T>
class NoDestructor;
// The class implements sampling profiling of native memory heap.
// It uses PoissonAllocationSampler to aggregate the heap allocations and
// record samples.
// The recorded samples can then be retrieved using GetSamples method.
class BASE_EXPORT SamplingHeapProfiler
: private PoissonAllocationSampler::SamplesObserver,
public base::ThreadIdNameManager::Observer {
public:
class BASE_EXPORT Sample {
public:
Sample(const Sample&);
~Sample();
// Allocation size.
size_t size;
// Total size attributed to the sample.
size_t total;
// Type of the allocator.
PoissonAllocationSampler::AllocatorType allocator;
// Context as provided by the allocation hook.
const char* context = nullptr;
// Name of the thread that made the sampled allocation.
const char* thread_name = nullptr;
// Call stack of PC addresses responsible for the allocation.
// If AllocationContextTracker::capture_mode() is in PSEUDO or MIXED modes
// the frame pointers may point to the name strings instead of PCs. In this
// cases all the strings pointers are also reported with |GetStrings| method
// of the |SamplingHeapProfiler|. This way they can be distinguished from
// the PC pointers.
std::vector<void*> stack;
private:
friend class SamplingHeapProfiler;
Sample(size_t size, size_t total, uint32_t ordinal);
uint32_t ordinal;
};
// Starts collecting allocation samples. Returns the current profile_id.
// This value can then be passed to |GetSamples| to retrieve only samples
// recorded since the corresponding |Start| invocation.
uint32_t Start();
// Stops recording allocation samples.
void Stop();
// Sets sampling interval in bytes.
void SetSamplingInterval(size_t sampling_interval);
// Enables recording thread name that made the sampled allocation.
void SetRecordThreadNames(bool value);
// Returns the current thread name.
static const char* CachedThreadName();
// Returns current samples recorded for the profile session.
// If |profile_id| is set to the value returned by the |Start| method,
// it returns only the samples recorded after the corresponding |Start|
// invocation. To retrieve all the collected samples |profile_id| must be
// set to 0.
std::vector<Sample> GetSamples(uint32_t profile_id);
// List of strings used in the profile call stacks.
std::vector<const char*> GetStrings();
// Captures up to |max_entries| stack frames using the buffer pointed by
// |frames|. Puts the number of captured frames into the |count| output
// parameters. Returns the pointer to the topmost frame.
static void** CaptureStackTrace(void** frames,
size_t max_entries,
size_t* count);
static void Init();
static SamplingHeapProfiler* Get();
// ThreadIdNameManager::Observer implementation:
void OnThreadNameChanged(const char* name) override;
private:
SamplingHeapProfiler();
~SamplingHeapProfiler() override;
// PoissonAllocationSampler::SamplesObserver
void SampleAdded(void* address,
size_t size,
size_t total,
PoissonAllocationSampler::AllocatorType type,
const char* context) override;
void SampleRemoved(void* address) override;
void CaptureMixedStack(const char* context, Sample* sample);
void CaptureNativeStack(const char* context, Sample* sample);
const char* RecordString(const char* string) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Mutex to access |samples_| and |strings_|.
Lock mutex_;
// Samples of the currently live allocations.
std::unordered_map<void*, Sample> samples_ GUARDED_BY(mutex_);
// When CaptureMode::PSEUDO_STACK or CaptureMode::MIXED_STACK is enabled
// the call stack contents of samples may contain strings besides
// PC addresses.
// In this case each string pointer is also added to the |strings_| set.
// The set does only contain pointers to static strings that are never
// deleted.
std::unordered_set<const char*> strings_ GUARDED_BY(mutex_);
// Mutex to make |running_sessions_| and Add/Remove samples observer access
// atomic.
Lock start_stop_mutex_;
// Number of the running sessions.
int running_sessions_ = 0;
// Last sample ordinal used to mark samples recorded during single session.
std::atomic<uint32_t> last_sample_ordinal_{1};
// Whether it should record thread names.
std::atomic<bool> record_thread_names_{false};
friend class NoDestructor<SamplingHeapProfiler>;
friend class SamplingHeapProfilerTest;
DISALLOW_COPY_AND_ASSIGN(SamplingHeapProfiler);
};
} // namespace base
#endif // BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_