Skip to content

Commit 2d05f5d

Browse files
authored
ref: Add sampling to span first (#5617)
Add sampling to span first. - In span first, we make a sampling decision BEFORE a span is actually created. Based on the sampling decision (and potentially other filtering mechanisms like `ignore_spans` -- coming soon) we either create a `StreamedSpan` or a `NoopStreamedSpan`. - No-op spans will remember why they were unsampled and will emit a client report accordingly when finished. - The logic that makes the sampling decision has been ported from [`_set_initial_sampling_decision`](https://github.com/getsentry/sentry-python/blob/a04ba6e1e5d67a587962f06837952bea4546193f/sentry_sdk/tracing.py#L1145-L1244). - The effective sampling rate, which might potentially differ from the incoming baggage (because e.g. backpressure handling is active and lowers the effective sample rate), is then updated in the propagation context (ported from [here](https://github.com/getsentry/sentry-python/blob/a04ba6e1e5d67a587962f06837952bea4546193f/sentry_sdk/scope.py#L1086-L1092)).
1 parent adcd90c commit 2d05f5d

File tree

3 files changed

+151
-11
lines changed

3 files changed

+151
-11
lines changed

sentry_sdk/scope.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
Baggage,
3131
has_tracing_enabled,
3232
has_span_streaming_enabled,
33+
_make_sampling_decision,
3334
normalize_incoming_data,
3435
PropagationContext,
3536
)
@@ -1199,6 +1200,21 @@ def start_streamed_span(
11991200
if parent_span is None:
12001201
propagation_context = self.get_active_propagation_context()
12011202

1203+
sampled, sample_rate, sample_rand, outcome = _make_sampling_decision(
1204+
name,
1205+
attributes,
1206+
self,
1207+
)
1208+
1209+
if sample_rate is not None:
1210+
self._update_sample_rate(sample_rate)
1211+
1212+
if sampled is False:
1213+
return NoOpStreamedSpan(
1214+
scope=self,
1215+
unsampled_reason=outcome,
1216+
)
1217+
12021218
return StreamedSpan(
12031219
name=name,
12041220
attributes=attributes,
@@ -1209,12 +1225,14 @@ def start_streamed_span(
12091225
parent_span_id=propagation_context.parent_span_id,
12101226
parent_sampled=propagation_context.parent_sampled,
12111227
baggage=propagation_context.baggage,
1228+
sample_rand=sample_rand,
1229+
sample_rate=sample_rate,
12121230
)
12131231

12141232
# This is a child span; take propagation context from the parent span
12151233
with new_scope():
12161234
if isinstance(parent_span, NoOpStreamedSpan):
1217-
return NoOpStreamedSpan()
1235+
return NoOpStreamedSpan(unsampled_reason=parent_span._unsampled_reason)
12181236

12191237
return StreamedSpan(
12201238
name=name,
@@ -1227,6 +1245,15 @@ def start_streamed_span(
12271245
parent_sampled=parent_span.sampled,
12281246
)
12291247

1248+
def _update_sample_rate(self, sample_rate: float) -> None:
1249+
# If we had to adjust the sample rate when setting the sampling decision
1250+
# for a span, it needs to be updated in the propagation context too
1251+
propagation_context = self.get_active_propagation_context()
1252+
baggage = propagation_context.baggage
1253+
1254+
if baggage is not None:
1255+
baggage.sentry_items["sample_rate"] = str(sample_rate)
1256+
12301257
def continue_trace(
12311258
self,
12321259
environ_or_headers: "Dict[str, Any]",

sentry_sdk/traces.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ class StreamedSpan:
224224
"_scope",
225225
"_previous_span_on_scope",
226226
"_baggage",
227+
"_sample_rand",
228+
"_sample_rate",
227229
)
228230

229231
def __init__(
@@ -238,6 +240,8 @@ def __init__(
238240
parent_span_id: "Optional[str]" = None,
239241
parent_sampled: "Optional[bool]" = None,
240242
baggage: "Optional[Baggage]" = None,
243+
sample_rate: "Optional[float]" = None,
244+
sample_rand: "Optional[float]" = None,
241245
):
242246
self._name: str = name
243247
self._active: bool = active
@@ -254,6 +258,8 @@ def __init__(
254258
self._parent_span_id = parent_span_id
255259
self._parent_sampled = parent_sampled
256260
self._baggage = baggage
261+
self._sample_rand = sample_rand
262+
self._sample_rate = sample_rate
257263

258264
self._start_timestamp = datetime.now(timezone.utc)
259265
self._timestamp: "Optional[datetime]" = None
@@ -350,7 +356,9 @@ def _end(self, end_timestamp: "Optional[Union[float, datetime]]" = None) -> None
350356
if isinstance(end_timestamp, datetime):
351357
self._timestamp = end_timestamp
352358
else:
353-
logger.debug("Failed to set end_timestamp. Using current time instead.")
359+
logger.debug(
360+
"[Tracing] Failed to set end_timestamp. Using current time instead."
361+
)
354362

355363
if self._timestamp is None:
356364
try:
@@ -395,7 +403,7 @@ def status(self, status: "Union[SpanStatus, str]") -> None:
395403

396404
if status not in {e.value for e in SpanStatus}:
397405
logger.debug(
398-
f'Unsupported span status {status}. Expected one of: "ok", "error"'
406+
f'[Tracing] Unsupported span status {status}. Expected one of: "ok", "error"'
399407
)
400408
return
401409

@@ -441,13 +449,20 @@ def timestamp(self) -> "Optional[datetime]":
441449

442450

443451
class NoOpStreamedSpan(StreamedSpan):
444-
__slots__ = ()
452+
__slots__ = (
453+
"_finished",
454+
"_unsampled_reason",
455+
)
445456

446457
def __init__(
447458
self,
459+
unsampled_reason: "Optional[str]" = None,
448460
scope: "Optional[sentry_sdk.Scope]" = None,
449461
) -> None:
450462
self._scope = scope # type: ignore[assignment]
463+
self._unsampled_reason = unsampled_reason
464+
465+
self._finished = False
451466

452467
self._start()
453468

@@ -471,16 +486,28 @@ def _start(self) -> None:
471486
self._previous_span_on_scope = old_span
472487

473488
def _end(self, end_timestamp: "Optional[Union[float, datetime]]" = None) -> None:
474-
if self._scope is None:
489+
if self._finished:
475490
return
476491

477-
if not hasattr(self, "_previous_span_on_scope"):
478-
return
492+
if self._unsampled_reason is not None:
493+
client = sentry_sdk.get_client()
494+
if client.is_active() and client.transport:
495+
logger.debug(
496+
f"[Tracing] Discarding span because sampled=False (reason: {self._unsampled_reason})"
497+
)
498+
client.transport.record_lost_event(
499+
reason=self._unsampled_reason,
500+
data_category="span",
501+
quantity=1,
502+
)
503+
504+
if self._scope and hasattr(self, "_previous_span_on_scope"):
505+
with capture_internal_exceptions():
506+
old_span = self._previous_span_on_scope
507+
del self._previous_span_on_scope
508+
self._scope.span = old_span
479509

480-
with capture_internal_exceptions():
481-
old_span = self._previous_span_on_scope
482-
del self._previous_span_on_scope
483-
self._scope.span = old_span
510+
self._finished = True
484511

485512
def end(self, end_timestamp: "Optional[Union[float, datetime]]" = None) -> None:
486513
self._end()

sentry_sdk/tracing_utils.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
to_string,
2424
try_convert,
2525
is_sentry_url,
26+
is_valid_sample_rate,
2627
_is_external_source,
2728
_is_in_project_root,
2829
_module_in_list,
@@ -41,6 +42,8 @@
4142

4243
from types import FrameType
4344

45+
from sentry_sdk._types import Attributes
46+
4447

4548
SENTRY_TRACE_REGEX = re.compile(
4649
"^[ \t]*" # whitespace
@@ -1379,6 +1382,89 @@ def add_sentry_baggage_to_headers(
13791382
)
13801383

13811384

1385+
def _make_sampling_decision(
1386+
name: str,
1387+
attributes: "Optional[Attributes]",
1388+
scope: "sentry_sdk.Scope",
1389+
) -> "tuple[bool, Optional[float], Optional[float], Optional[str]]":
1390+
"""
1391+
Decide whether a span should be sampled.
1392+
1393+
Returns a tuple with:
1394+
1. the sampling decision
1395+
2. the effective sample rate
1396+
3. the sample rand
1397+
4. the reason for not sampling the span, if unsampled
1398+
"""
1399+
client = sentry_sdk.get_client()
1400+
1401+
if not has_tracing_enabled(client.options):
1402+
return False, None, None, None
1403+
1404+
propagation_context = scope.get_active_propagation_context()
1405+
1406+
sample_rand = None
1407+
if propagation_context.baggage is not None:
1408+
sample_rand = propagation_context.baggage._sample_rand()
1409+
if sample_rand is None:
1410+
sample_rand = _generate_sample_rand(propagation_context.trace_id)
1411+
1412+
# If there's a traces_sampler, use that; otherwise use traces_sample_rate
1413+
traces_sampler_defined = callable(client.options.get("traces_sampler"))
1414+
if traces_sampler_defined:
1415+
sampling_context = {
1416+
"name": name,
1417+
"trace_id": propagation_context.trace_id,
1418+
"parent_span_id": propagation_context.parent_span_id,
1419+
"parent_sampled": propagation_context.parent_sampled,
1420+
"attributes": dict(attributes) if attributes else {},
1421+
}
1422+
1423+
sample_rate = client.options["traces_sampler"](sampling_context)
1424+
else:
1425+
if propagation_context.parent_sampled is not None:
1426+
sample_rate = propagation_context.parent_sampled
1427+
else:
1428+
sample_rate = client.options["traces_sample_rate"]
1429+
1430+
# Validate whether the sample_rate we got is actually valid. Since
1431+
# traces_sampler is user-provided, it could return anything.
1432+
if not is_valid_sample_rate(sample_rate, source="Tracing"):
1433+
logger.warning(f"[Tracing] Discarding {name} because of invalid sample rate.")
1434+
return False, None, None, "sample_rate"
1435+
1436+
sample_rate = float(sample_rate)
1437+
if not sample_rate:
1438+
if traces_sampler_defined:
1439+
reason = "traces_sampler returned 0 or False"
1440+
else:
1441+
reason = "traces_sample_rate is set to 0"
1442+
1443+
logger.debug(f"[Tracing] Discarding {name} because {reason}")
1444+
return False, 0.0, None, "sample_rate"
1445+
1446+
# Adjust sample rate if we're under backpressure
1447+
if client.monitor:
1448+
sample_rate /= 2**client.monitor.downsample_factor
1449+
1450+
if not sample_rate:
1451+
logger.debug(f"[Tracing] Discarding {name} because backpressure")
1452+
return False, 0.0, None, "backpressure"
1453+
1454+
sampled = sample_rand < sample_rate
1455+
1456+
if sampled:
1457+
logger.debug(f"[Tracing] Starting {name}")
1458+
outcome = None
1459+
else:
1460+
logger.debug(
1461+
f"[Tracing] Discarding {name} because it's not included in the random sample (sampling rate = {sample_rate})"
1462+
)
1463+
outcome = "sample_rate"
1464+
1465+
return sampled, sample_rate, sample_rand, outcome
1466+
1467+
13821468
# Circular imports
13831469
from sentry_sdk.tracing import (
13841470
BAGGAGE_HEADER_NAME,

0 commit comments

Comments
 (0)