From 8b36b7e2a2e642b684a34fe63a3a1f5d67cb8d6c Mon Sep 17 00:00:00 2001 From: Attila Szegedi Date: Mon, 11 May 2026 14:00:09 +0200 Subject: [PATCH] =?UTF-8?q?use=20thread-local=20for=20the=20active=20profi?= =?UTF-8?q?ler=20instead=20of=20an=20isolate=E2=86=92profiler=20map?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The signal handler previously looked up the per-isolate WallProfiler via ProtectedProfilerMap — an unordered_map shielded by an atomic-exchange dance so the SIGPROF handler could read it without taking a mutex. In Node's threading model each V8 isolate is pinned to a single thread, so a thread_local pointer captures exactly the same fact with no map, no atomics and no copy-on-write churn on every profiler start/stop. The cross-thread "gather worker CPU on the main thread" use case still needs a registry (thread-locals are per-thread), but that path has no signal-safety requirement, so it collapses to a plain mutex around an unordered_set plus the terminated-worker CPU bucket. Dispose no longer needs the removeFromMap parameter — it always removes itself from the registry and clears the thread-local. --- bindings/profilers/wall.cc | 190 +++++++++++-------------------------- bindings/profilers/wall.hh | 2 +- 2 files changed, 57 insertions(+), 135 deletions(-) diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index c4630ad3..510990ba 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "map-get.hh" @@ -179,134 +180,46 @@ int detectV8Bug(const v8::CpuProfile* profile) { return 0; } -class ProtectedProfilerMap { - public: - WallProfiler* GetProfiler(const Isolate* isolate) const { - // Prevent updates to profiler map by atomically setting g_profilers to null - auto prof_map = profilers_.exchange(nullptr, std::memory_order_acq_rel); - if (!prof_map) { - return nullptr; - } - auto prof_it = prof_map->find(isolate); - WallProfiler* profiler = nullptr; - if (prof_it != prof_map->end()) { - profiler = prof_it->second; - } - // Allow updates - profilers_.store(prof_map, std::memory_order_release); - return profiler; - } - - WallProfiler* RemoveProfilerForIsolate(const v8::Isolate* isolate) { - return UpdateProfilers([isolate](auto map) { - auto it = map->find(isolate); - if (it != map->end()) { - auto profiler = it->second; - map->erase(it); - return profiler; - } - return static_cast(nullptr); - }); - } - - bool RemoveProfiler(const v8::Isolate* isolate, WallProfiler* profiler) { - return UpdateProfilers([isolate, profiler, this](auto map) { - terminatedWorkersCpu_ += profiler->GetAndResetThreadCpu(); +// Per-thread active profiler, used by the SIGPROF handler. In Node's threading +// model each V8 isolate is pinned to a single thread, so a thread-local pointer +// is equivalent to "the profiler for the isolate that received this signal". +thread_local WallProfiler* t_active_profiler = nullptr; - if (isolate != nullptr) { - auto it = map->find(isolate); - if (it != map->end() && it->second == profiler) { - map->erase(it); - return true; - } - } else { - auto it = std::find_if(map->begin(), map->end(), [profiler](auto& x) { - return x.second == profiler; - }); - if (it != map->end()) { - map->erase(it); - return true; - } - } - return false; - }); +// Registry of all live profilers across threads. Used only by the main thread +// to gather CPU consumed by JS worker threads while building a profile. +class ActiveProfilers { + public: + void Add(WallProfiler* profiler) { + std::lock_guard lock(mutex_); + profilers_.insert(profiler); } - bool AddProfiler(const v8::Isolate* isolate, WallProfiler* profiler) { - return UpdateProfilers([isolate, profiler](auto map) { - return map->emplace(isolate, profiler).second; - }); + bool Remove(WallProfiler* profiler) { + std::lock_guard lock(mutex_); + auto it = profilers_.find(profiler); + if (it == profilers_.end()) return false; + terminatedWorkersCpu_ += profiler->GetAndResetThreadCpu(); + profilers_.erase(it); + return true; } ThreadCpuClock::duration GatherTotalWorkerCpuAndReset() { - std::lock_guard lock(update_mutex_); - - // Retrieve CPU of workers that have terminated during the last period + std::lock_guard lock(mutex_); ThreadCpuClock::duration totalWorkerCpu = terminatedWorkersCpu_; - - // Reset terminated workers cpu to 0 terminatedWorkersCpu_ = ThreadCpuClock::duration::zero(); - - if (!init_) { - return totalWorkerCpu; - } - - auto currProfilers = profilers_.load(std::memory_order_acquire); - // Wait until sighandler is done using the map - while (!currProfilers) { - currProfilers = profilers_.load(std::memory_order_relaxed); - } - - // Gather CPU of workers that are still running - for (auto& profiler : *currProfilers) { - totalWorkerCpu += profiler.second->GetAndResetThreadCpu(); + for (auto* profiler : profilers_) { + totalWorkerCpu += profiler->GetAndResetThreadCpu(); } - return totalWorkerCpu; } private: - using ProfilerMap = std::unordered_map; - - template - std::invoke_result_t UpdateProfilers(F updateFn) { - // use mutex to prevent two isolates of updating profilers concurrently - std::lock_guard lock(update_mutex_); - - if (!init_) { - profilers_.store(new ProfilerMap(), std::memory_order_release); - init_ = true; - } - - auto currProfilers = profilers_.load(std::memory_order_acquire); - // Wait until sighandler is done using the map - while (!currProfilers) { - currProfilers = profilers_.load(std::memory_order_relaxed); - } - auto newProfilers = new ProfilerMap(*currProfilers); - auto res = updateFn(newProfilers); - // Wait until sighandler is done using the map before installing a new map. - // The value in profilers is either nullptr or currProfilers. - for (;;) { - ProfilerMap* currProfilers2 = currProfilers; - if (profilers_.compare_exchange_weak( - currProfilers2, newProfilers, std::memory_order_acq_rel)) { - break; - } - } - delete currProfilers; - return res; - } - - mutable std::atomic profilers_; - std::mutex update_mutex_; - bool init_ = false; - std::chrono::nanoseconds terminatedWorkersCpu_{}; + std::mutex mutex_; + std::unordered_set profilers_; + ThreadCpuClock::duration terminatedWorkersCpu_{}; }; -using ProfilerMap = std::unordered_map; - -static ProtectedProfilerMap g_profilers; +static ActiveProfilers g_profilers; namespace { @@ -380,15 +293,9 @@ void SignalHandler::HandleProfilerSignal(int sig, if (!old_handler) { return; } - auto isolate = Isolate::GetCurrent(); - if (!isolate) { - return; - } - WallProfiler* prof = g_profilers.GetProfiler(isolate); - + WallProfiler* prof = t_active_profiler; if (!prof) { - // no profiler found for current isolate, just pass the signal to old - // handler + // no profiler active on this thread, just pass the signal to old handler old_handler(sig, info, context); return; } @@ -408,6 +315,10 @@ void SignalHandler::HandleProfilerSignal(int sig, auto time_from = Now(); old_handler(sig, info, context); auto time_to = Now(); + auto isolate = Isolate::GetCurrent(); + if (!isolate) { + return; + } prof->PushContext(time_from, time_to, cpu_time, isolate); } #else @@ -456,9 +367,12 @@ static int64_t GetV8ToEpochOffset() { } void WallProfiler::CleanupHook(void* data) { - auto isolate = static_cast(data); - auto prof = g_profilers.RemoveProfilerForIsolate(isolate); + // Environment cleanup hooks run on the isolate's own thread, so the + // thread-local pointer for this thread (if any) refers to the profiler that + // belongs to this isolate. + auto prof = t_active_profiler; if (prof) { + auto isolate = static_cast(data); prof->Cleanup(isolate); delete prof; } @@ -472,7 +386,7 @@ void WallProfiler::Cleanup(Isolate* isolate) { if (interceptSignal()) { SignalHandler::DecreaseUseCount(); } - Dispose(isolate, false); + Dispose(isolate); } } @@ -669,6 +583,14 @@ WallProfiler::WallProfiler(std::chrono::microseconds samplingPeriod, } WallProfiler::~WallProfiler() { + // Defensive: under normal use Dispose() has already cleared both of these, + // but if a profiler is destroyed without going through Dispose we still must + // not leave dangling pointers. + if (t_active_profiler == this) { + t_active_profiler = nullptr; + } + g_profilers.Remove(this); + // Delete all live contexts for (auto ptr : liveContextPtrs_) { ptr->Detach(); // so it doesn't invalidate our iterator @@ -677,13 +599,14 @@ WallProfiler::~WallProfiler() { liveContextPtrs_.clear(); } -void WallProfiler::Dispose(Isolate* isolate, bool removeFromMap) { +void WallProfiler::Dispose(Isolate* isolate) { if (cpuProfiler_ != nullptr) { cpuProfiler_->Dispose(); cpuProfiler_ = nullptr; - if (removeFromMap) { - g_profilers.RemoveProfiler(isolate, this); + g_profilers.Remove(this); + if (t_active_profiler == this) { + t_active_profiler = nullptr; } if (collectAsyncId_ || useCPED()) { @@ -1065,7 +988,7 @@ Result WallProfiler::StopCore(bool restart, ProfileBuilder&& buildProfile) { v8_profile->Delete(); if (!restart) { - Dispose(v8::Isolate::GetCurrent(), true); + Dispose(v8::Isolate::GetCurrent()); } else if (workaroundV8Bug_) { waitForSignal(callCount + 1); std::atomic_signal_fence(std::memory_order_release); @@ -1151,14 +1074,13 @@ NAN_MODULE_INIT(WallProfiler::Init) { v8::CpuProfiler* WallProfiler::CreateV8CpuProfiler() { if (cpuProfiler_ == nullptr) { - v8::Isolate* isolate = v8::Isolate::GetCurrent(); - - bool inserted = g_profilers.AddProfiler(isolate, this); - - if (!inserted) { - // refuse to create a new profiler if one is already active + if (t_active_profiler != nullptr) { + // refuse to create a new profiler if one is already active on this thread return nullptr; } + v8::Isolate* isolate = v8::Isolate::GetCurrent(); + g_profilers.Add(this); + t_active_profiler = this; cpuProfiler_ = v8::CpuProfiler::New(isolate); cpuProfiler_->SetSamplingInterval( std::chrono::microseconds(samplingPeriod_).count()); diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 4604a974..11ed4cfd 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -99,7 +99,7 @@ class WallProfiler : public Nan::ObjectWrap { ContextBuffer contexts_; ~WallProfiler(); - void Dispose(v8::Isolate* isolate, bool removeFromMap); + void Dispose(v8::Isolate* isolate); // A new CPU profiler object will be created each time profiling is started // to work around https://bugs.chromium.org/p/v8/issues/detail?id=11051.