Repo created
This commit is contained in:
parent
81b91f4139
commit
f8c34fa5ee
22732 changed files with 4815320 additions and 2 deletions
|
|
@ -0,0 +1,3 @@
|
|||
alph@chromium.org
|
||||
erikchen@chromium.org
|
||||
per-file module_cache*=wittman@chromium.org
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "base/bits.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
LockFreeAddressHashSet::LockFreeAddressHashSet(size_t buckets_count)
|
||||
: buckets_(buckets_count), bucket_mask_(buckets_count - 1) {
|
||||
DCHECK(bits::IsPowerOfTwo(buckets_count));
|
||||
DCHECK_LE(bucket_mask_, std::numeric_limits<uint32_t>::max());
|
||||
}
|
||||
|
||||
LockFreeAddressHashSet::~LockFreeAddressHashSet() {
|
||||
for (std::atomic<Node*>& bucket : buckets_) {
|
||||
Node* node = bucket.load(std::memory_order_relaxed);
|
||||
while (node) {
|
||||
Node* next = node->next;
|
||||
delete node;
|
||||
node = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LockFreeAddressHashSet::Insert(void* key) {
|
||||
DCHECK_NE(key, nullptr);
|
||||
CHECK(!Contains(key));
|
||||
++size_;
|
||||
// Note: There's no need to use std::atomic_compare_exchange here,
|
||||
// as we do not support concurrent inserts, so values cannot change midair.
|
||||
std::atomic<Node*>& bucket = buckets_[Hash(key) & bucket_mask_];
|
||||
Node* node = bucket.load(std::memory_order_relaxed);
|
||||
// First iterate over the bucket nodes and try to reuse an empty one if found.
|
||||
for (; node != nullptr; node = node->next) {
|
||||
if (node->key.load(std::memory_order_relaxed) == nullptr) {
|
||||
node->key.store(key, std::memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// There are no empty nodes to reuse left in the bucket.
|
||||
// Create a new node first...
|
||||
Node* new_node = new Node(key, bucket.load(std::memory_order_relaxed));
|
||||
// ... and then publish the new chain.
|
||||
bucket.store(new_node, std::memory_order_release);
|
||||
}
|
||||
|
||||
void LockFreeAddressHashSet::Copy(const LockFreeAddressHashSet& other) {
|
||||
DCHECK_EQ(0u, size());
|
||||
for (const std::atomic<Node*>& bucket : other.buckets_) {
|
||||
for (Node* node = bucket.load(std::memory_order_relaxed); node;
|
||||
node = node->next) {
|
||||
void* key = node->key.load(std::memory_order_relaxed);
|
||||
if (key)
|
||||
Insert(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
|
||||
#define BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/logging.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
// A hash set container that provides lock-free version of |Contains| operation.
|
||||
// It does not support concurrent write operations |Insert| and |Remove|.
|
||||
// All write operations if performed from multiple threads must be properly
|
||||
// guarded with a lock.
|
||||
// |Contains| method can be executed concurrently with other |Insert|, |Remove|,
|
||||
// or |Contains| even over the same key.
|
||||
// However, please note the result of concurrent execution of |Contains|
|
||||
// with |Insert| or |Remove| over the same key is racy.
|
||||
//
|
||||
// The hash set never rehashes, so the number of buckets stays the same
|
||||
// for the lifetime of the set.
|
||||
//
|
||||
// Internally the hashset is implemented as a vector of N buckets
|
||||
// (N has to be a power of 2). Each bucket holds a single-linked list of
|
||||
// nodes each corresponding to a key.
|
||||
// It is not possible to really delete nodes from the list as there might
|
||||
// be concurrent reads being executed over the node. The |Remove| operation
|
||||
// just marks the node as empty by placing nullptr into its key field.
|
||||
// Consequent |Insert| operations may reuse empty nodes when possible.
|
||||
//
|
||||
// The structure of the hashset for N buckets is the following:
|
||||
// 0: {*}--> {key1,*}--> {key2,*}--> NULL
|
||||
// 1: {*}--> NULL
|
||||
// 2: {*}--> {NULL,*}--> {key3,*}--> {key4,*}--> NULL
|
||||
// ...
|
||||
// N-1: {*}--> {keyM,*}--> NULL
|
||||
class BASE_EXPORT LockFreeAddressHashSet {
|
||||
public:
|
||||
explicit LockFreeAddressHashSet(size_t buckets_count);
|
||||
~LockFreeAddressHashSet();
|
||||
|
||||
// Checks if the |key| is in the set. Can be executed concurrently with
|
||||
// |Insert|, |Remove|, and |Contains| operations.
|
||||
ALWAYS_INLINE bool Contains(void* key) const;
|
||||
|
||||
// Removes the |key| from the set. The key must be present in the set before
|
||||
// the invocation.
|
||||
// Concurrent execution of |Insert|, |Remove|, or |Copy| is not supported.
|
||||
ALWAYS_INLINE void Remove(void* key);
|
||||
|
||||
// Inserts the |key| into the set. The key must not be present in the set
|
||||
// before the invocation.
|
||||
// Concurrent execution of |Insert|, |Remove|, or |Copy| is not supported.
|
||||
void Insert(void* key);
|
||||
|
||||
// Copies contents of |other| set into the current set. The current set
|
||||
// must be empty before the call.
|
||||
// Concurrent execution of |Insert|, |Remove|, or |Copy| is not supported.
|
||||
void Copy(const LockFreeAddressHashSet& other);
|
||||
|
||||
size_t buckets_count() const { return buckets_.size(); }
|
||||
size_t size() const { return size_; }
|
||||
|
||||
// Returns the average bucket utilization.
|
||||
float load_factor() const { return 1.f * size() / buckets_.size(); }
|
||||
|
||||
private:
|
||||
friend class LockFreeAddressHashSetTest;
|
||||
|
||||
struct Node {
|
||||
ALWAYS_INLINE Node(void* key, Node* next);
|
||||
std::atomic<void*> key;
|
||||
Node* next;
|
||||
};
|
||||
|
||||
ALWAYS_INLINE static uint32_t Hash(void* key);
|
||||
ALWAYS_INLINE Node* FindNode(void* key) const;
|
||||
|
||||
std::vector<std::atomic<Node*>> buckets_;
|
||||
int size_ = 0;
|
||||
const size_t bucket_mask_;
|
||||
};
|
||||
|
||||
ALWAYS_INLINE LockFreeAddressHashSet::Node::Node(void* key, Node* next)
|
||||
: next(next) {
|
||||
this->key.store(key, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool LockFreeAddressHashSet::Contains(void* key) const {
|
||||
return FindNode(key) != nullptr;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE void LockFreeAddressHashSet::Remove(void* key) {
|
||||
Node* node = FindNode(key);
|
||||
DCHECK_NE(node, nullptr);
|
||||
// We can never delete the node, nor detach it from the current bucket
|
||||
// as there may always be another thread currently iterating over it.
|
||||
// Instead we just mark it as empty, so |Insert| can reuse it later.
|
||||
node->key.store(nullptr, std::memory_order_relaxed);
|
||||
--size_;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE LockFreeAddressHashSet::Node* LockFreeAddressHashSet::FindNode(
|
||||
void* key) const {
|
||||
DCHECK_NE(key, nullptr);
|
||||
const std::atomic<Node*>& bucket = buckets_[Hash(key) & bucket_mask_];
|
||||
// It's enough to use std::memory_order_consume ordering here, as the
|
||||
// node->next->...->next loads form dependency chain.
|
||||
// However std::memory_order_consume is temporary deprecated in C++17.
|
||||
// See https://isocpp.org/files/papers/p0636r0.html#removed
|
||||
// Make use of more strong std::memory_order_acquire for now.
|
||||
for (Node* node = bucket.load(std::memory_order_acquire); node != nullptr;
|
||||
node = node->next) {
|
||||
if (node->key.load(std::memory_order_relaxed) == key)
|
||||
return node;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// static
|
||||
ALWAYS_INLINE uint32_t LockFreeAddressHashSet::Hash(void* key) {
|
||||
// A simple fast hash function for addresses.
|
||||
constexpr uintptr_t random_bits = static_cast<uintptr_t>(0x4bfdb9df5a6f243b);
|
||||
uint64_t k = reinterpret_cast<uintptr_t>(key);
|
||||
return static_cast<uint32_t>((k * random_bits) >> 32);
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_SAMPLING_HEAP_PROFILER_LOCK_FREE_ADDRESS_HASH_SET_H_
|
||||
|
|
@ -0,0 +1,577 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/sampling_heap_profiler/poisson_allocation_sampler.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "base/allocator/allocator_shim.h"
|
||||
#include "base/allocator/buildflags.h"
|
||||
#include "base/allocator/partition_allocator/partition_alloc.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/no_destructor.h"
|
||||
#include "base/partition_alloc_buildflags.h"
|
||||
#include "base/rand_util.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
#if defined(OS_MACOSX) || defined(OS_ANDROID)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
|
||||
using allocator::AllocatorDispatch;
|
||||
|
||||
namespace {
|
||||
|
||||
#if defined(OS_MACOSX) || defined(OS_ANDROID)
|
||||
|
||||
// The macOS implementation of libmalloc sometimes calls malloc recursively,
|
||||
// delegating allocations between zones. That causes our hooks being called
|
||||
// twice. The scoped guard allows us to detect that.
|
||||
//
|
||||
// Besides that the implementations of thread_local on macOS and Android
|
||||
// seem to allocate memory lazily on the first access to thread_local variables.
|
||||
// Make use of pthread TLS instead of C++ thread_local there.
|
||||
class ReentryGuard {
|
||||
public:
|
||||
ReentryGuard() : allowed_(!pthread_getspecific(entered_key_)) {
|
||||
pthread_setspecific(entered_key_, reinterpret_cast<void*>(true));
|
||||
}
|
||||
|
||||
~ReentryGuard() {
|
||||
if (LIKELY(allowed_))
|
||||
pthread_setspecific(entered_key_, nullptr);
|
||||
}
|
||||
|
||||
operator bool() { return allowed_; }
|
||||
|
||||
static void Init() {
|
||||
int error = pthread_key_create(&entered_key_, nullptr);
|
||||
CHECK(!error);
|
||||
}
|
||||
|
||||
private:
|
||||
bool allowed_;
|
||||
static pthread_key_t entered_key_;
|
||||
};
|
||||
|
||||
pthread_key_t ReentryGuard::entered_key_;
|
||||
|
||||
#else
|
||||
|
||||
class ReentryGuard {
|
||||
public:
|
||||
operator bool() { return true; }
|
||||
static void Init() {}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
|
||||
|
||||
// Notes on TLS usage:
|
||||
//
|
||||
// * There's no safe way to use TLS in malloc() as both C++ thread_local and
|
||||
// pthread do not pose any guarantees on whether they allocate or not.
|
||||
// * We think that we can safely use thread_local w/o re-entrancy guard because
|
||||
// the compiler will use "tls static access model" for static builds of
|
||||
// Chrome [https://www.uclibc.org/docs/tls.pdf].
|
||||
// But there's no guarantee that this will stay true, and in practice
|
||||
// it seems to have problems on macOS/Android. These platforms do allocate
|
||||
// on the very first access to a thread_local on each thread.
|
||||
// * Directly using/warming-up platform TLS seems to work on all platforms,
|
||||
// but is also not guaranteed to stay true. We make use of it for reentrancy
|
||||
// guards on macOS/Android.
|
||||
// * We cannot use Windows Tls[GS]etValue API as it modifies the result of
|
||||
// GetLastError.
|
||||
//
|
||||
// Android thread_local seems to be using __emutls_get_address from libgcc:
|
||||
// https://github.com/gcc-mirror/gcc/blob/master/libgcc/emutls.c
|
||||
// macOS version is based on _tlv_get_addr from dyld:
|
||||
// https://opensource.apple.com/source/dyld/dyld-635.2/src/threadLocalHelpers.s.auto.html
|
||||
|
||||
// The guard protects against reentering on platforms other the macOS and
|
||||
// Android.
|
||||
thread_local bool g_internal_reentry_guard;
|
||||
|
||||
// Accumulated bytes towards sample thread local key.
|
||||
thread_local intptr_t g_accumulated_bytes_tls;
|
||||
|
||||
// Used as a workaround to avoid bias from muted samples. See
|
||||
// ScopedMuteThreadSamples for more details.
|
||||
thread_local intptr_t g_accumulated_bytes_tls_snapshot;
|
||||
const intptr_t kAccumulatedBytesOffset = 1 << 29;
|
||||
|
||||
// A boolean used to distinguish first allocation on a thread:
|
||||
// false - first allocation on the thread;
|
||||
// true - otherwise.
|
||||
// Since g_accumulated_bytes_tls is initialized with zero the very first
|
||||
// allocation on a thread would always trigger the sample, thus skewing the
|
||||
// profile towards such allocations. To mitigate that we use the flag to
|
||||
// ensure the first allocation is properly accounted.
|
||||
thread_local bool g_sampling_interval_initialized_tls;
|
||||
|
||||
// Controls if sample intervals should not be randomized. Used for testing.
|
||||
bool g_deterministic;
|
||||
|
||||
// A positive value if profiling is running, otherwise it's zero.
|
||||
std::atomic_bool g_running;
|
||||
|
||||
// Pointer to the current |LockFreeAddressHashSet|.
|
||||
std::atomic<LockFreeAddressHashSet*> g_sampled_addresses_set;
|
||||
|
||||
// Sampling interval parameter, the mean value for intervals between samples.
|
||||
std::atomic_size_t g_sampling_interval{kDefaultSamplingIntervalBytes};
|
||||
|
||||
void (*g_hooks_install_callback)();
|
||||
std::atomic_bool g_hooks_installed;
|
||||
|
||||
void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) {
|
||||
ReentryGuard guard;
|
||||
void* address = self->next->alloc_function(self->next, size, context);
|
||||
if (LIKELY(guard)) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
address, size, PoissonAllocationSampler::kMalloc, nullptr);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
void* AllocZeroInitializedFn(const AllocatorDispatch* self,
|
||||
size_t n,
|
||||
size_t size,
|
||||
void* context) {
|
||||
ReentryGuard guard;
|
||||
void* address =
|
||||
self->next->alloc_zero_initialized_function(self->next, n, size, context);
|
||||
if (LIKELY(guard)) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
address, n * size, PoissonAllocationSampler::kMalloc, nullptr);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
void* AllocAlignedFn(const AllocatorDispatch* self,
|
||||
size_t alignment,
|
||||
size_t size,
|
||||
void* context) {
|
||||
ReentryGuard guard;
|
||||
void* address =
|
||||
self->next->alloc_aligned_function(self->next, alignment, size, context);
|
||||
if (LIKELY(guard)) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
address, size, PoissonAllocationSampler::kMalloc, nullptr);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
void* ReallocFn(const AllocatorDispatch* self,
|
||||
void* address,
|
||||
size_t size,
|
||||
void* context) {
|
||||
ReentryGuard guard;
|
||||
// Note: size == 0 actually performs free.
|
||||
PoissonAllocationSampler::RecordFree(address);
|
||||
address = self->next->realloc_function(self->next, address, size, context);
|
||||
if (LIKELY(guard)) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
address, size, PoissonAllocationSampler::kMalloc, nullptr);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
void FreeFn(const AllocatorDispatch* self, void* address, void* context) {
|
||||
// Note: The RecordFree should be called before free_function
|
||||
// (here and in other places).
|
||||
// That is because we need to remove the recorded allocation sample before
|
||||
// free_function, as once the latter is executed the address becomes available
|
||||
// and can be allocated by another thread. That would be racy otherwise.
|
||||
PoissonAllocationSampler::RecordFree(address);
|
||||
self->next->free_function(self->next, address, context);
|
||||
}
|
||||
|
||||
size_t GetSizeEstimateFn(const AllocatorDispatch* self,
|
||||
void* address,
|
||||
void* context) {
|
||||
return self->next->get_size_estimate_function(self->next, address, context);
|
||||
}
|
||||
|
||||
unsigned BatchMallocFn(const AllocatorDispatch* self,
|
||||
size_t size,
|
||||
void** results,
|
||||
unsigned num_requested,
|
||||
void* context) {
|
||||
ReentryGuard guard;
|
||||
unsigned num_allocated = self->next->batch_malloc_function(
|
||||
self->next, size, results, num_requested, context);
|
||||
if (LIKELY(guard)) {
|
||||
for (unsigned i = 0; i < num_allocated; ++i) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
results[i], size, PoissonAllocationSampler::kMalloc, nullptr);
|
||||
}
|
||||
}
|
||||
return num_allocated;
|
||||
}
|
||||
|
||||
void BatchFreeFn(const AllocatorDispatch* self,
|
||||
void** to_be_freed,
|
||||
unsigned num_to_be_freed,
|
||||
void* context) {
|
||||
for (unsigned i = 0; i < num_to_be_freed; ++i)
|
||||
PoissonAllocationSampler::RecordFree(to_be_freed[i]);
|
||||
self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed,
|
||||
context);
|
||||
}
|
||||
|
||||
void FreeDefiniteSizeFn(const AllocatorDispatch* self,
|
||||
void* address,
|
||||
size_t size,
|
||||
void* context) {
|
||||
PoissonAllocationSampler::RecordFree(address);
|
||||
self->next->free_definite_size_function(self->next, address, size, context);
|
||||
}
|
||||
|
||||
static void* AlignedMallocFn(const AllocatorDispatch* self,
|
||||
size_t size,
|
||||
size_t alignment,
|
||||
void* context) {
|
||||
ReentryGuard guard;
|
||||
void* address =
|
||||
self->next->aligned_malloc_function(self->next, size, alignment, context);
|
||||
if (LIKELY(guard)) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
address, size, PoissonAllocationSampler::kMalloc, nullptr);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
static void* AlignedReallocFn(const AllocatorDispatch* self,
|
||||
void* address,
|
||||
size_t size,
|
||||
size_t alignment,
|
||||
void* context) {
|
||||
ReentryGuard guard;
|
||||
// Note: size == 0 actually performs free.
|
||||
PoissonAllocationSampler::RecordFree(address);
|
||||
address = self->next->aligned_realloc_function(self->next, address, size,
|
||||
alignment, context);
|
||||
if (LIKELY(guard)) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
address, size, PoissonAllocationSampler::kMalloc, nullptr);
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
static void AlignedFreeFn(const AllocatorDispatch* self,
|
||||
void* address,
|
||||
void* context) {
|
||||
PoissonAllocationSampler::RecordFree(address);
|
||||
self->next->aligned_free_function(self->next, address, context);
|
||||
}
|
||||
|
||||
AllocatorDispatch g_allocator_dispatch = {&AllocFn,
|
||||
&AllocZeroInitializedFn,
|
||||
&AllocAlignedFn,
|
||||
&ReallocFn,
|
||||
&FreeFn,
|
||||
&GetSizeEstimateFn,
|
||||
&BatchMallocFn,
|
||||
&BatchFreeFn,
|
||||
&FreeDefiniteSizeFn,
|
||||
&AlignedMallocFn,
|
||||
&AlignedReallocFn,
|
||||
&AlignedFreeFn,
|
||||
nullptr};
|
||||
|
||||
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
|
||||
|
||||
void PartitionAllocHook(void* address, size_t size, const char* type) {
|
||||
PoissonAllocationSampler::RecordAlloc(
|
||||
address, size, PoissonAllocationSampler::kPartitionAlloc, type);
|
||||
}
|
||||
|
||||
void PartitionFreeHook(void* address) {
|
||||
PoissonAllocationSampler::RecordFree(address);
|
||||
}
|
||||
|
||||
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
|
||||
|
||||
} // namespace
|
||||
|
||||
PoissonAllocationSampler::ScopedMuteThreadSamples::ScopedMuteThreadSamples() {
|
||||
DCHECK(!g_internal_reentry_guard);
|
||||
g_internal_reentry_guard = true;
|
||||
|
||||
// We mute thread samples immediately after taking a sample, which is when we
|
||||
// reset g_accumulated_bytes_tls. This breaks the random sampling requirement
|
||||
// of the poisson process, and causes us to systematically overcount all other
|
||||
// allocations. That's because muted allocations rarely trigger a sample
|
||||
// [which would cause them to be ignored] since they occur right after
|
||||
// g_accumulated_bytes_tls is reset.
|
||||
//
|
||||
// To counteract this, we drop g_accumulated_bytes_tls by a large, fixed
|
||||
// amount to lower the probability that a sample is taken to close to 0. Then
|
||||
// we reset it after we're done muting thread samples.
|
||||
g_accumulated_bytes_tls_snapshot = g_accumulated_bytes_tls;
|
||||
g_accumulated_bytes_tls -= kAccumulatedBytesOffset;
|
||||
}
|
||||
|
||||
PoissonAllocationSampler::ScopedMuteThreadSamples::~ScopedMuteThreadSamples() {
|
||||
DCHECK(g_internal_reentry_guard);
|
||||
g_internal_reentry_guard = false;
|
||||
g_accumulated_bytes_tls = g_accumulated_bytes_tls_snapshot;
|
||||
}
|
||||
|
||||
// static
|
||||
bool PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted() {
|
||||
return g_internal_reentry_guard;
|
||||
}
|
||||
|
||||
PoissonAllocationSampler* PoissonAllocationSampler::instance_;
|
||||
|
||||
PoissonAllocationSampler::PoissonAllocationSampler() {
|
||||
CHECK_EQ(nullptr, instance_);
|
||||
instance_ = this;
|
||||
Init();
|
||||
auto* sampled_addresses = new LockFreeAddressHashSet(64);
|
||||
g_sampled_addresses_set.store(sampled_addresses, std::memory_order_release);
|
||||
}
|
||||
|
||||
// static
|
||||
void PoissonAllocationSampler::Init() {
|
||||
static bool init_once = []() {
|
||||
ReentryGuard::Init();
|
||||
return true;
|
||||
}();
|
||||
ignore_result(init_once);
|
||||
}
|
||||
|
||||
// static
|
||||
void PoissonAllocationSampler::InstallAllocatorHooksOnce() {
|
||||
static bool hook_installed = InstallAllocatorHooks();
|
||||
ignore_result(hook_installed);
|
||||
}
|
||||
|
||||
// static
|
||||
bool PoissonAllocationSampler::InstallAllocatorHooks() {
|
||||
#if BUILDFLAG(USE_ALLOCATOR_SHIM)
|
||||
allocator::InsertAllocatorDispatch(&g_allocator_dispatch);
|
||||
#else
|
||||
// If the allocator shim isn't available, then we don't install any hooks.
|
||||
// There's no point in printing an error message, since this can regularly
|
||||
// happen for tests.
|
||||
ignore_result(g_allocator_dispatch);
|
||||
#endif // BUILDFLAG(USE_ALLOCATOR_SHIM)
|
||||
|
||||
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
|
||||
PartitionAllocHooks::SetObserverHooks(&PartitionAllocHook,
|
||||
&PartitionFreeHook);
|
||||
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
|
||||
|
||||
bool expected = false;
|
||||
if (!g_hooks_installed.compare_exchange_strong(expected, true))
|
||||
g_hooks_install_callback();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// static
|
||||
void PoissonAllocationSampler::SetHooksInstallCallback(
|
||||
void (*hooks_install_callback)()) {
|
||||
CHECK(!g_hooks_install_callback && hooks_install_callback);
|
||||
g_hooks_install_callback = hooks_install_callback;
|
||||
|
||||
bool expected = false;
|
||||
if (!g_hooks_installed.compare_exchange_strong(expected, true))
|
||||
g_hooks_install_callback();
|
||||
}
|
||||
|
||||
void PoissonAllocationSampler::SetSamplingInterval(size_t sampling_interval) {
|
||||
// TODO(alph): Reset the sample being collected if running.
|
||||
g_sampling_interval = sampling_interval;
|
||||
}
|
||||
|
||||
// static
|
||||
size_t PoissonAllocationSampler::GetNextSampleInterval(size_t interval) {
|
||||
if (UNLIKELY(g_deterministic))
|
||||
return interval;
|
||||
|
||||
// We sample with a Poisson process, with constant average sampling
|
||||
// interval. This follows the exponential probability distribution with
|
||||
// parameter λ = 1/interval where |interval| is the average number of bytes
|
||||
// between samples.
|
||||
// Let u be a uniformly distributed random number between 0 and 1, then
|
||||
// next_sample = -ln(u) / λ
|
||||
double uniform = RandDouble();
|
||||
double value = -log(uniform) * interval;
|
||||
size_t min_value = sizeof(intptr_t);
|
||||
// We limit the upper bound of a sample interval to make sure we don't have
|
||||
// huge gaps in the sampling stream. Probability of the upper bound gets hit
|
||||
// is exp(-20) ~ 2e-9, so it should not skew the distribution.
|
||||
size_t max_value = interval * 20;
|
||||
if (UNLIKELY(value < min_value))
|
||||
return min_value;
|
||||
if (UNLIKELY(value > max_value))
|
||||
return max_value;
|
||||
return static_cast<size_t>(value);
|
||||
}
|
||||
|
||||
// static
|
||||
void PoissonAllocationSampler::RecordAlloc(void* address,
|
||||
size_t size,
|
||||
AllocatorType type,
|
||||
const char* context) {
|
||||
g_accumulated_bytes_tls += size;
|
||||
intptr_t accumulated_bytes = g_accumulated_bytes_tls;
|
||||
if (LIKELY(accumulated_bytes < 0))
|
||||
return;
|
||||
|
||||
if (UNLIKELY(!g_running.load(std::memory_order_relaxed))) {
|
||||
// Sampling is in fact disabled. Reset the state of the sampler.
|
||||
// We do this check off the fast-path, because it's quite a rare state when
|
||||
// allocation hooks are installed but the sampler is not running.
|
||||
g_sampling_interval_initialized_tls = false;
|
||||
g_accumulated_bytes_tls = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
instance_->DoRecordAlloc(accumulated_bytes, size, address, type, context);
|
||||
}
|
||||
|
||||
void PoissonAllocationSampler::DoRecordAlloc(intptr_t accumulated_bytes,
|
||||
size_t size,
|
||||
void* address,
|
||||
AllocatorType type,
|
||||
const char* context) {
|
||||
// Failed allocation? Skip the sample.
|
||||
if (UNLIKELY(!address))
|
||||
return;
|
||||
|
||||
size_t mean_interval = g_sampling_interval.load(std::memory_order_relaxed);
|
||||
if (UNLIKELY(!g_sampling_interval_initialized_tls)) {
|
||||
g_sampling_interval_initialized_tls = true;
|
||||
// This is the very first allocation on the thread. It always makes it
|
||||
// passing the condition at |RecordAlloc|, because g_accumulated_bytes_tls
|
||||
// is initialized with zero due to TLS semantics.
|
||||
// Generate proper sampling interval instance and make sure the allocation
|
||||
// has indeed crossed the threshold before counting it as a sample.
|
||||
accumulated_bytes -= GetNextSampleInterval(mean_interval);
|
||||
if (accumulated_bytes < 0) {
|
||||
g_accumulated_bytes_tls = accumulated_bytes;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
size_t samples = accumulated_bytes / mean_interval;
|
||||
accumulated_bytes %= mean_interval;
|
||||
|
||||
do {
|
||||
accumulated_bytes -= GetNextSampleInterval(mean_interval);
|
||||
++samples;
|
||||
} while (accumulated_bytes >= 0);
|
||||
|
||||
g_accumulated_bytes_tls = accumulated_bytes;
|
||||
|
||||
if (UNLIKELY(ScopedMuteThreadSamples::IsMuted()))
|
||||
return;
|
||||
|
||||
ScopedMuteThreadSamples no_reentrancy_scope;
|
||||
std::vector<SamplesObserver*> observers_copy;
|
||||
{
|
||||
AutoLock lock(mutex_);
|
||||
|
||||
// TODO(alph): Sometimes RecordAlloc is called twice in a row without
|
||||
// a RecordFree in between. Investigate it.
|
||||
if (sampled_addresses_set().Contains(address))
|
||||
return;
|
||||
sampled_addresses_set().Insert(address);
|
||||
BalanceAddressesHashSet();
|
||||
observers_copy = observers_;
|
||||
}
|
||||
|
||||
size_t total_allocated = mean_interval * samples;
|
||||
for (auto* observer : observers_copy)
|
||||
observer->SampleAdded(address, size, total_allocated, type, context);
|
||||
}
|
||||
|
||||
void PoissonAllocationSampler::DoRecordFree(void* address) {
|
||||
if (UNLIKELY(ScopedMuteThreadSamples::IsMuted()))
|
||||
return;
|
||||
// There is a rare case on macOS and Android when the very first thread_local
|
||||
// access in ScopedMuteThreadSamples constructor may allocate and
|
||||
// thus reenter DoRecordAlloc. However the call chain won't build up further
|
||||
// as RecordAlloc accesses are guarded with pthread TLS-based ReentryGuard.
|
||||
ScopedMuteThreadSamples no_reentrancy_scope;
|
||||
std::vector<SamplesObserver*> observers_copy;
|
||||
{
|
||||
AutoLock lock(mutex_);
|
||||
observers_copy = observers_;
|
||||
sampled_addresses_set().Remove(address);
|
||||
}
|
||||
for (auto* observer : observers_copy)
|
||||
observer->SampleRemoved(address);
|
||||
}
|
||||
|
||||
void PoissonAllocationSampler::BalanceAddressesHashSet() {
|
||||
// Check if the load_factor of the current addresses hash set becomes higher
|
||||
// than 1, allocate a new twice larger one, copy all the data,
|
||||
// and switch to using it.
|
||||
// During the copy process no other writes are made to both sets
|
||||
// as it's behind the lock.
|
||||
// All the readers continue to use the old one until the atomic switch
|
||||
// process takes place.
|
||||
LockFreeAddressHashSet& current_set = sampled_addresses_set();
|
||||
if (current_set.load_factor() < 1)
|
||||
return;
|
||||
auto new_set =
|
||||
std::make_unique<LockFreeAddressHashSet>(current_set.buckets_count() * 2);
|
||||
new_set->Copy(current_set);
|
||||
// Atomically switch all the new readers to the new set.
|
||||
g_sampled_addresses_set.store(new_set.release(), std::memory_order_release);
|
||||
// We leak the older set because we still have to keep all the old maps alive
|
||||
// as there might be reader threads that have already obtained the map,
|
||||
// but haven't yet managed to access it.
|
||||
}
|
||||
|
||||
// static
|
||||
LockFreeAddressHashSet& PoissonAllocationSampler::sampled_addresses_set() {
|
||||
return *g_sampled_addresses_set.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
// static
|
||||
PoissonAllocationSampler* PoissonAllocationSampler::Get() {
|
||||
static NoDestructor<PoissonAllocationSampler> instance;
|
||||
return instance.get();
|
||||
}
|
||||
|
||||
// static
|
||||
void PoissonAllocationSampler::SuppressRandomnessForTest(bool suppress) {
|
||||
g_deterministic = suppress;
|
||||
}
|
||||
|
||||
void PoissonAllocationSampler::AddSamplesObserver(SamplesObserver* observer) {
|
||||
ScopedMuteThreadSamples no_reentrancy_scope;
|
||||
AutoLock lock(mutex_);
|
||||
DCHECK(std::find(observers_.begin(), observers_.end(), observer) ==
|
||||
observers_.end());
|
||||
observers_.push_back(observer);
|
||||
InstallAllocatorHooksOnce();
|
||||
g_running = !observers_.empty();
|
||||
}
|
||||
|
||||
void PoissonAllocationSampler::RemoveSamplesObserver(
|
||||
SamplesObserver* observer) {
|
||||
ScopedMuteThreadSamples no_reentrancy_scope;
|
||||
AutoLock lock(mutex_);
|
||||
auto it = std::find(observers_.begin(), observers_.end(), observer);
|
||||
DCHECK(it != observers_.end());
|
||||
observers_.erase(it);
|
||||
g_running = !observers_.empty();
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
|
||||
#define BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/compiler_specific.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
|
||||
#include "base/synchronization/lock.h"
|
||||
#include "base/thread_annotations.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
template <typename T>
|
||||
class NoDestructor;
|
||||
|
||||
// This singleton class implements Poisson sampling of the incoming allocations
|
||||
// stream. It hooks onto base::allocator and base::PartitionAlloc.
|
||||
// An extra custom allocator can be hooked via SetHooksInstallCallback method.
|
||||
// The only control parameter is sampling interval that controls average value
|
||||
// of the sampling intervals. The actual intervals between samples are
|
||||
// randomized using Poisson distribution to mitigate patterns in the allocation
|
||||
// stream.
|
||||
// Once accumulated allocation sizes fill up the current sample interval,
|
||||
// a sample is generated and sent to the observers via |SampleAdded| call.
|
||||
// When the corresponding memory that triggered the sample is freed observers
|
||||
// get notified with |SampleRemoved| call.
|
||||
//
|
||||
class BASE_EXPORT PoissonAllocationSampler {
|
||||
public:
|
||||
enum AllocatorType : uint32_t { kMalloc, kPartitionAlloc, kBlinkGC };
|
||||
|
||||
class SamplesObserver {
|
||||
public:
|
||||
virtual ~SamplesObserver() = default;
|
||||
virtual void SampleAdded(void* address,
|
||||
size_t size,
|
||||
size_t total,
|
||||
AllocatorType type,
|
||||
const char* context) = 0;
|
||||
virtual void SampleRemoved(void* address) = 0;
|
||||
};
|
||||
|
||||
// The instance of this class makes sampler do not report samples generated
|
||||
// within the object scope for the current thread.
|
||||
// It allows observers to allocate/deallocate memory while holding a lock
|
||||
// without a chance to get into reentrancy problems.
|
||||
// The current implementation doesn't support ScopedMuteThreadSamples nesting.
|
||||
class BASE_EXPORT ScopedMuteThreadSamples {
|
||||
public:
|
||||
ScopedMuteThreadSamples();
|
||||
~ScopedMuteThreadSamples();
|
||||
|
||||
static bool IsMuted();
|
||||
};
|
||||
|
||||
// Must be called early during the process initialization. It creates and
|
||||
// reserves a TLS slot.
|
||||
static void Init();
|
||||
|
||||
// This is an entry point for plugging in an external allocator.
|
||||
// Profiler will invoke the provided callback upon initialization.
|
||||
// The callback should install hooks onto the corresponding memory allocator
|
||||
// and make them invoke PoissonAllocationSampler::RecordAlloc and
|
||||
// PoissonAllocationSampler::RecordFree upon corresponding allocation events.
|
||||
//
|
||||
// If the method is called after profiler is initialized, the callback
|
||||
// is invoked right away.
|
||||
static void SetHooksInstallCallback(void (*hooks_install_callback)());
|
||||
|
||||
void AddSamplesObserver(SamplesObserver*);
|
||||
|
||||
// Note: After an observer is removed it is still possible to receive
|
||||
// a notification to that observer. This is not a problem currently as
|
||||
// the only client of this interface is the base::SamplingHeapProfiler,
|
||||
// which is a singleton.
|
||||
// If there's a need for this functionality in the future, one might
|
||||
// want to put observers notification loop under a reader-writer lock.
|
||||
void RemoveSamplesObserver(SamplesObserver*);
|
||||
|
||||
void SetSamplingInterval(size_t sampling_interval);
|
||||
void SuppressRandomnessForTest(bool suppress);
|
||||
|
||||
static void RecordAlloc(void* address,
|
||||
size_t,
|
||||
AllocatorType,
|
||||
const char* context);
|
||||
ALWAYS_INLINE static void RecordFree(void* address);
|
||||
|
||||
static PoissonAllocationSampler* Get();
|
||||
|
||||
private:
|
||||
PoissonAllocationSampler();
|
||||
~PoissonAllocationSampler() = delete;
|
||||
|
||||
static void InstallAllocatorHooksOnce();
|
||||
static bool InstallAllocatorHooks();
|
||||
static size_t GetNextSampleInterval(size_t base_interval);
|
||||
static LockFreeAddressHashSet& sampled_addresses_set();
|
||||
|
||||
void DoRecordAlloc(intptr_t accumulated_bytes,
|
||||
size_t size,
|
||||
void* address,
|
||||
AllocatorType type,
|
||||
const char* context);
|
||||
void DoRecordFree(void* address);
|
||||
|
||||
void BalanceAddressesHashSet();
|
||||
|
||||
Lock mutex_;
|
||||
// The |observers_| list is guarded by |mutex_|, however a copy of it
|
||||
// is made before invoking the observers (to avoid performing expensive
|
||||
// operations under the lock) as such the SamplesObservers themselves need
|
||||
// to be thread-safe and support being invoked racily after
|
||||
// RemoveSamplesObserver().
|
||||
std::vector<SamplesObserver*> observers_ GUARDED_BY(mutex_);
|
||||
|
||||
static PoissonAllocationSampler* instance_;
|
||||
|
||||
friend class NoDestructor<PoissonAllocationSampler>;
|
||||
friend class SamplingHeapProfilerTest;
|
||||
friend class ScopedMuteThreadSamples;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(PoissonAllocationSampler);
|
||||
};
|
||||
|
||||
// static
|
||||
ALWAYS_INLINE void PoissonAllocationSampler::RecordFree(void* address) {
|
||||
if (UNLIKELY(address == nullptr))
|
||||
return;
|
||||
if (UNLIKELY(sampled_addresses_set().Contains(address)))
|
||||
instance_->DoRecordFree(address);
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
|
||||
|
|
@ -0,0 +1,298 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "base/sampling_heap_profiler/sampling_heap_profiler.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
|
||||
#include "base/allocator/allocator_shim.h"
|
||||
#include "base/allocator/buildflags.h"
|
||||
#include "base/allocator/partition_allocator/partition_alloc.h"
|
||||
#include "base/bind.h"
|
||||
#include "base/debug/stack_trace.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/no_destructor.h"
|
||||
#include "base/partition_alloc_buildflags.h"
|
||||
#include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
|
||||
#include "base/threading/thread_local_storage.h"
|
||||
#include "base/trace_event/heap_profiler_allocation_context_tracker.h"
|
||||
#include "build/build_config.h"
|
||||
|
||||
#if defined(OS_MACOSX)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_ANDROID)
|
||||
#include <sys/prctl.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
|
||||
defined(OFFICIAL_BUILD)
|
||||
#include "base/trace_event/cfi_backtrace_android.h"
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
|
||||
constexpr uint32_t kMaxStackEntries = 256;
|
||||
|
||||
namespace {
|
||||
|
||||
// If a thread name has been set from ThreadIdNameManager, use that. Otherwise,
|
||||
// gets the thread name from kernel if available or returns a string with id.
|
||||
// This function intentionally leaks the allocated strings since they are used
|
||||
// to tag allocations even after the thread dies.
|
||||
const char* GetAndLeakThreadName() {
|
||||
const char* thread_name =
|
||||
base::ThreadIdNameManager::GetInstance()->GetNameForCurrentThread();
|
||||
if (thread_name && *thread_name != '\0')
|
||||
return thread_name;
|
||||
|
||||
// prctl requires 16 bytes, snprintf requires 19, pthread_getname_np requires
|
||||
// 64 on macOS, see PlatformThread::SetName in platform_thread_mac.mm.
|
||||
constexpr size_t kBufferLen = 64;
|
||||
char name[kBufferLen];
|
||||
#if defined(OS_LINUX) || defined(OS_ANDROID)
|
||||
// If the thread name is not set, try to get it from prctl. Thread name might
|
||||
// not be set in cases where the thread started before heap profiling was
|
||||
// enabled.
|
||||
int err = prctl(PR_GET_NAME, name);
|
||||
if (!err)
|
||||
return strdup(name);
|
||||
#elif defined(OS_MACOSX)
|
||||
int err = pthread_getname_np(pthread_self(), name, kBufferLen);
|
||||
if (err == 0 && *name != '\0')
|
||||
return strdup(name);
|
||||
#endif // defined(OS_LINUX) || defined(OS_ANDROID)
|
||||
|
||||
// Use tid if we don't have a thread name.
|
||||
snprintf(name, sizeof(name), "Thread %lu",
|
||||
static_cast<unsigned long>(base::PlatformThread::CurrentId()));
|
||||
return strdup(name);
|
||||
}
|
||||
|
||||
const char* UpdateAndGetThreadName(const char* name) {
|
||||
static thread_local const char* thread_name;
|
||||
if (name)
|
||||
thread_name = name;
|
||||
if (!thread_name)
|
||||
thread_name = GetAndLeakThreadName();
|
||||
return thread_name;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
SamplingHeapProfiler::Sample::Sample(size_t size,
|
||||
size_t total,
|
||||
uint32_t ordinal)
|
||||
: size(size), total(total), ordinal(ordinal) {}
|
||||
|
||||
SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
|
||||
SamplingHeapProfiler::Sample::~Sample() = default;
|
||||
|
||||
SamplingHeapProfiler::SamplingHeapProfiler() = default;
|
||||
SamplingHeapProfiler::~SamplingHeapProfiler() {
|
||||
if (record_thread_names_)
|
||||
base::ThreadIdNameManager::GetInstance()->RemoveObserver(this);
|
||||
}
|
||||
|
||||
uint32_t SamplingHeapProfiler::Start() {
|
||||
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
|
||||
defined(OFFICIAL_BUILD)
|
||||
if (!trace_event::CFIBacktraceAndroid::GetInitializedInstance()
|
||||
->can_unwind_stack_frames()) {
|
||||
LOG(WARNING) << "Sampling heap profiler: Stack unwinding is not available.";
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
AutoLock lock(start_stop_mutex_);
|
||||
if (!running_sessions_++)
|
||||
PoissonAllocationSampler::Get()->AddSamplesObserver(this);
|
||||
return last_sample_ordinal_;
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::Stop() {
|
||||
AutoLock lock(start_stop_mutex_);
|
||||
DCHECK_GT(running_sessions_, 0);
|
||||
if (!--running_sessions_)
|
||||
PoissonAllocationSampler::Get()->RemoveSamplesObserver(this);
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
|
||||
PoissonAllocationSampler::Get()->SetSamplingInterval(sampling_interval);
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::SetRecordThreadNames(bool value) {
|
||||
if (record_thread_names_ == value)
|
||||
return;
|
||||
record_thread_names_ = value;
|
||||
if (value) {
|
||||
base::ThreadIdNameManager::GetInstance()->AddObserver(this);
|
||||
} else {
|
||||
base::ThreadIdNameManager::GetInstance()->RemoveObserver(this);
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
const char* SamplingHeapProfiler::CachedThreadName() {
|
||||
return UpdateAndGetThreadName(nullptr);
|
||||
}
|
||||
|
||||
// static
|
||||
void** SamplingHeapProfiler::CaptureStackTrace(void** frames,
|
||||
size_t max_entries,
|
||||
size_t* count) {
|
||||
// Skip top frames as they correspond to the profiler itself.
|
||||
size_t skip_frames = 3;
|
||||
#if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
|
||||
defined(OFFICIAL_BUILD)
|
||||
size_t frame_count =
|
||||
base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind(
|
||||
const_cast<const void**>(frames), max_entries);
|
||||
#elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
|
||||
size_t frame_count = base::debug::TraceStackFramePointers(
|
||||
const_cast<const void**>(frames), max_entries, skip_frames);
|
||||
skip_frames = 0;
|
||||
#else
|
||||
// Fall-back to capturing the stack with base::debug::CollectStackTrace,
|
||||
// which is likely slower, but more reliable.
|
||||
size_t frame_count =
|
||||
base::debug::CollectStackTrace(const_cast<void**>(frames), max_entries);
|
||||
#endif
|
||||
|
||||
skip_frames = std::min(skip_frames, frame_count);
|
||||
*count = frame_count - skip_frames;
|
||||
return frames + skip_frames;
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::SampleAdded(
|
||||
void* address,
|
||||
size_t size,
|
||||
size_t total,
|
||||
PoissonAllocationSampler::AllocatorType type,
|
||||
const char* context) {
|
||||
// CaptureStack and allocation context tracking may use TLS.
|
||||
// Bail out if it has been destroyed.
|
||||
if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
|
||||
return;
|
||||
DCHECK(PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
|
||||
Sample sample(size, total, ++last_sample_ordinal_);
|
||||
sample.allocator = type;
|
||||
using CaptureMode = trace_event::AllocationContextTracker::CaptureMode;
|
||||
CaptureMode capture_mode =
|
||||
trace_event::AllocationContextTracker::capture_mode();
|
||||
if (capture_mode == CaptureMode::PSEUDO_STACK ||
|
||||
capture_mode == CaptureMode::MIXED_STACK) {
|
||||
CaptureMixedStack(context, &sample);
|
||||
} else {
|
||||
CaptureNativeStack(context, &sample);
|
||||
}
|
||||
AutoLock lock(mutex_);
|
||||
RecordString(sample.context);
|
||||
samples_.emplace(address, std::move(sample));
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::CaptureMixedStack(const char* context,
|
||||
Sample* sample) {
|
||||
auto* tracker =
|
||||
trace_event::AllocationContextTracker::GetInstanceForCurrentThread();
|
||||
if (!tracker)
|
||||
return;
|
||||
|
||||
trace_event::AllocationContext allocation_context;
|
||||
if (!tracker->GetContextSnapshot(&allocation_context))
|
||||
return;
|
||||
|
||||
const base::trace_event::Backtrace& backtrace = allocation_context.backtrace;
|
||||
CHECK_LE(backtrace.frame_count, kMaxStackEntries);
|
||||
std::vector<void*> stack;
|
||||
stack.reserve(backtrace.frame_count);
|
||||
|
||||
AutoLock lock(mutex_); // Needed for RecordString call.
|
||||
for (int i = base::checked_cast<int>(backtrace.frame_count) - 1; i >= 0;
|
||||
--i) {
|
||||
const base::trace_event::StackFrame& frame = backtrace.frames[i];
|
||||
if (frame.type != base::trace_event::StackFrame::Type::PROGRAM_COUNTER)
|
||||
RecordString(static_cast<const char*>(frame.value));
|
||||
stack.push_back(const_cast<void*>(frame.value));
|
||||
}
|
||||
sample->stack = std::move(stack);
|
||||
if (!context)
|
||||
context = allocation_context.type_name;
|
||||
sample->context = context;
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::CaptureNativeStack(const char* context,
|
||||
Sample* sample) {
|
||||
void* stack[kMaxStackEntries];
|
||||
size_t frame_count;
|
||||
// One frame is reserved for the thread name.
|
||||
void** first_frame =
|
||||
CaptureStackTrace(stack, kMaxStackEntries - 1, &frame_count);
|
||||
DCHECK_LT(frame_count, kMaxStackEntries);
|
||||
sample->stack.assign(first_frame, first_frame + frame_count);
|
||||
|
||||
if (record_thread_names_)
|
||||
sample->thread_name = CachedThreadName();
|
||||
|
||||
if (!context) {
|
||||
const auto* tracker =
|
||||
trace_event::AllocationContextTracker::GetInstanceForCurrentThread();
|
||||
if (tracker)
|
||||
context = tracker->TaskContext();
|
||||
}
|
||||
sample->context = context;
|
||||
}
|
||||
|
||||
const char* SamplingHeapProfiler::RecordString(const char* string) {
|
||||
return string ? *strings_.insert(string).first : nullptr;
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::SampleRemoved(void* address) {
|
||||
DCHECK(base::PoissonAllocationSampler::ScopedMuteThreadSamples::IsMuted());
|
||||
base::AutoLock lock(mutex_);
|
||||
samples_.erase(address);
|
||||
}
|
||||
|
||||
std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
|
||||
uint32_t profile_id) {
|
||||
// Make sure the sampler does not invoke |SampleAdded| or |SampleRemoved|
|
||||
// on this thread. Otherwise it could have end up with a deadlock.
|
||||
// See crbug.com/882495
|
||||
PoissonAllocationSampler::ScopedMuteThreadSamples no_samples_scope;
|
||||
AutoLock lock(mutex_);
|
||||
std::vector<Sample> samples;
|
||||
samples.reserve(samples_.size());
|
||||
for (auto& it : samples_) {
|
||||
Sample& sample = it.second;
|
||||
if (sample.ordinal > profile_id)
|
||||
samples.push_back(sample);
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
std::vector<const char*> SamplingHeapProfiler::GetStrings() {
|
||||
PoissonAllocationSampler::ScopedMuteThreadSamples no_samples_scope;
|
||||
AutoLock lock(mutex_);
|
||||
return std::vector<const char*>(strings_.begin(), strings_.end());
|
||||
}
|
||||
|
||||
// static
|
||||
void SamplingHeapProfiler::Init() {
|
||||
PoissonAllocationSampler::Init();
|
||||
}
|
||||
|
||||
// static
|
||||
SamplingHeapProfiler* SamplingHeapProfiler::Get() {
|
||||
static NoDestructor<SamplingHeapProfiler> instance;
|
||||
return instance.get();
|
||||
}
|
||||
|
||||
void SamplingHeapProfiler::OnThreadNameChanged(const char* name) {
|
||||
UpdateAndGetThreadName(name);
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
// Copyright 2018 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_
|
||||
#define BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "base/base_export.h"
|
||||
#include "base/macros.h"
|
||||
#include "base/sampling_heap_profiler/poisson_allocation_sampler.h"
|
||||
#include "base/synchronization/lock.h"
|
||||
#include "base/thread_annotations.h"
|
||||
#include "base/threading/thread_id_name_manager.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
template <typename T>
|
||||
class NoDestructor;
|
||||
|
||||
// The class implements sampling profiling of native memory heap.
|
||||
// It uses PoissonAllocationSampler to aggregate the heap allocations and
|
||||
// record samples.
|
||||
// The recorded samples can then be retrieved using GetSamples method.
|
||||
class BASE_EXPORT SamplingHeapProfiler
|
||||
: private PoissonAllocationSampler::SamplesObserver,
|
||||
public base::ThreadIdNameManager::Observer {
|
||||
public:
|
||||
class BASE_EXPORT Sample {
|
||||
public:
|
||||
Sample(const Sample&);
|
||||
~Sample();
|
||||
|
||||
// Allocation size.
|
||||
size_t size;
|
||||
// Total size attributed to the sample.
|
||||
size_t total;
|
||||
// Type of the allocator.
|
||||
PoissonAllocationSampler::AllocatorType allocator;
|
||||
// Context as provided by the allocation hook.
|
||||
const char* context = nullptr;
|
||||
// Name of the thread that made the sampled allocation.
|
||||
const char* thread_name = nullptr;
|
||||
// Call stack of PC addresses responsible for the allocation.
|
||||
// If AllocationContextTracker::capture_mode() is in PSEUDO or MIXED modes
|
||||
// the frame pointers may point to the name strings instead of PCs. In this
|
||||
// cases all the strings pointers are also reported with |GetStrings| method
|
||||
// of the |SamplingHeapProfiler|. This way they can be distinguished from
|
||||
// the PC pointers.
|
||||
std::vector<void*> stack;
|
||||
|
||||
private:
|
||||
friend class SamplingHeapProfiler;
|
||||
|
||||
Sample(size_t size, size_t total, uint32_t ordinal);
|
||||
|
||||
uint32_t ordinal;
|
||||
};
|
||||
|
||||
// Starts collecting allocation samples. Returns the current profile_id.
|
||||
// This value can then be passed to |GetSamples| to retrieve only samples
|
||||
// recorded since the corresponding |Start| invocation.
|
||||
uint32_t Start();
|
||||
|
||||
// Stops recording allocation samples.
|
||||
void Stop();
|
||||
|
||||
// Sets sampling interval in bytes.
|
||||
void SetSamplingInterval(size_t sampling_interval);
|
||||
|
||||
// Enables recording thread name that made the sampled allocation.
|
||||
void SetRecordThreadNames(bool value);
|
||||
|
||||
// Returns the current thread name.
|
||||
static const char* CachedThreadName();
|
||||
|
||||
// Returns current samples recorded for the profile session.
|
||||
// If |profile_id| is set to the value returned by the |Start| method,
|
||||
// it returns only the samples recorded after the corresponding |Start|
|
||||
// invocation. To retrieve all the collected samples |profile_id| must be
|
||||
// set to 0.
|
||||
std::vector<Sample> GetSamples(uint32_t profile_id);
|
||||
|
||||
// List of strings used in the profile call stacks.
|
||||
std::vector<const char*> GetStrings();
|
||||
|
||||
// Captures up to |max_entries| stack frames using the buffer pointed by
|
||||
// |frames|. Puts the number of captured frames into the |count| output
|
||||
// parameters. Returns the pointer to the topmost frame.
|
||||
static void** CaptureStackTrace(void** frames,
|
||||
size_t max_entries,
|
||||
size_t* count);
|
||||
|
||||
static void Init();
|
||||
static SamplingHeapProfiler* Get();
|
||||
|
||||
// ThreadIdNameManager::Observer implementation:
|
||||
void OnThreadNameChanged(const char* name) override;
|
||||
|
||||
private:
|
||||
SamplingHeapProfiler();
|
||||
~SamplingHeapProfiler() override;
|
||||
|
||||
// PoissonAllocationSampler::SamplesObserver
|
||||
void SampleAdded(void* address,
|
||||
size_t size,
|
||||
size_t total,
|
||||
PoissonAllocationSampler::AllocatorType type,
|
||||
const char* context) override;
|
||||
void SampleRemoved(void* address) override;
|
||||
|
||||
void CaptureMixedStack(const char* context, Sample* sample);
|
||||
void CaptureNativeStack(const char* context, Sample* sample);
|
||||
const char* RecordString(const char* string) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
// Mutex to access |samples_| and |strings_|.
|
||||
Lock mutex_;
|
||||
|
||||
// Samples of the currently live allocations.
|
||||
std::unordered_map<void*, Sample> samples_ GUARDED_BY(mutex_);
|
||||
|
||||
// When CaptureMode::PSEUDO_STACK or CaptureMode::MIXED_STACK is enabled
|
||||
// the call stack contents of samples may contain strings besides
|
||||
// PC addresses.
|
||||
// In this case each string pointer is also added to the |strings_| set.
|
||||
// The set does only contain pointers to static strings that are never
|
||||
// deleted.
|
||||
std::unordered_set<const char*> strings_ GUARDED_BY(mutex_);
|
||||
|
||||
// Mutex to make |running_sessions_| and Add/Remove samples observer access
|
||||
// atomic.
|
||||
Lock start_stop_mutex_;
|
||||
|
||||
// Number of the running sessions.
|
||||
int running_sessions_ = 0;
|
||||
|
||||
// Last sample ordinal used to mark samples recorded during single session.
|
||||
std::atomic<uint32_t> last_sample_ordinal_{1};
|
||||
|
||||
// Whether it should record thread names.
|
||||
std::atomic<bool> record_thread_names_{false};
|
||||
|
||||
friend class NoDestructor<SamplingHeapProfiler>;
|
||||
friend class SamplingHeapProfilerTest;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(SamplingHeapProfiler);
|
||||
};
|
||||
|
||||
} // namespace base
|
||||
|
||||
#endif // BASE_SAMPLING_HEAP_PROFILER_SAMPLING_HEAP_PROFILER_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue