Skip to content

Commit 5f3f54e

Browse files
ericapisaniclaude
andcommitted
fix(profiler): Prevent buffer race condition during rapid start/stop cycles
Fix a race condition in ContinuousScheduler.run() where setting self.buffer = None after the sampling loop would destroy buffers created by concurrent ensure_running() calls during rapid stop/start cycles. This caused the profiler to silently drop all samples when the new thread's buffer was destroyed by the old thread's cleanup. The fix uses a local buffer reference for flushing and never sets self.buffer = None from run(). Also updated the profiler_id property to check not self.running first. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 963b4dd commit 5f3f54e

File tree

2 files changed

+70
-4
lines changed

2 files changed

+70
-4
lines changed

sentry_sdk/profiler/continuous_profiler.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def reset_buffer(self) -> None:
308308

309309
@property
310310
def profiler_id(self) -> "Union[str, None]":
311-
if self.buffer is None:
311+
if not self.running or self.buffer is None:
312312
return None
313313
return self.buffer.profiler_id
314314

@@ -436,9 +436,10 @@ def run(self) -> None:
436436
# timestamp so we can use it next iteration
437437
last = time.perf_counter()
438438

439-
if self.buffer is not None:
440-
self.buffer.flush()
441-
self.buffer = None
439+
buffer = self.buffer
440+
if buffer is not None:
441+
buffer.flush()
442+
buffer = None
442443

443444

444445
class ThreadContinuousScheduler(ContinuousScheduler):

tests/profiler/test_continuous_profiler.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,3 +621,68 @@ def test_continuous_profiler_manual_start_and_stop_noop_when_using_trace_lifecyl
621621
) as mock_teardown:
622622
stop_profiler_func()
623623
mock_teardown.assert_not_called()
624+
625+
626+
@mock.patch("sentry_sdk.profiler.continuous_profiler.PROFILE_BUFFER_SECONDS", 0.01)
627+
def test_continuous_profiler_run_does_not_null_buffer(
628+
sentry_init,
629+
capture_envelopes,
630+
teardown_profiling,
631+
):
632+
"""
633+
Verifies that ContinuousScheduler.run() does not set self.buffer = None
634+
after exiting its sampling loop.
635+
636+
Previously, run() would execute `self.buffer = None` after the while
637+
loop exited. During rapid stop/start cycles, this could race with
638+
ensure_running() which creates a new buffer: the old thread's cleanup
639+
would destroy the newly-created buffer, causing the new profiler thread
640+
to silently drop all samples (self.buffer is None in the sampler).
641+
642+
The fix uses a local buffer reference for flushing and never sets
643+
self.buffer = None from run().
644+
"""
645+
from sentry_sdk.profiler import continuous_profiler as cp
646+
from sentry_sdk.profiler.continuous_profiler import ContinuousScheduler
647+
648+
options = get_client_options(True)(
649+
mode="thread", profile_session_sample_rate=1.0, lifecycle="manual"
650+
)
651+
sentry_init(traces_sample_rate=1.0, **options)
652+
envelopes = capture_envelopes()
653+
thread = threading.current_thread()
654+
655+
# Start and verify profiler works
656+
start_profiler()
657+
envelopes.clear()
658+
with sentry_sdk.start_transaction(name="profiling"):
659+
with sentry_sdk.start_span(op="op"):
660+
time.sleep(0.1)
661+
assert_single_transaction_with_profile_chunks(envelopes, thread)
662+
663+
# Get the scheduler and create a sentinel buffer.
664+
# We'll call run() directly to verify it doesn't null out self.buffer.
665+
scheduler = cp._scheduler
666+
assert scheduler is not None
667+
668+
# Stop the profiler so the thread exits cleanly
669+
stop_profiler()
670+
671+
# Now set up a fresh buffer and mark the scheduler as not running
672+
# (simulating the state right after ensure_running() created a new buffer
673+
# but the old thread hasn't done cleanup yet).
674+
scheduler.reset_buffer()
675+
buffer_before = scheduler.buffer
676+
assert buffer_before is not None
677+
678+
# Simulate what happens when run() exits its while loop:
679+
# self.running is already False, so the while loop exits immediately.
680+
scheduler.running = False
681+
scheduler.run()
682+
683+
# After the fix, run() should NOT have set self.buffer = None.
684+
# It should only flush using a local reference.
685+
assert scheduler.buffer is not None, (
686+
"run() must not set self.buffer = None; "
687+
"this would destroy buffers created by concurrent ensure_running() calls"
688+
)

0 commit comments

Comments
 (0)