Skip to content

Commit 0ad2bc9

Browse files
committed
feat(weave): add centralized trace_sample_rate setting with eval carve-out
Adds a centralized trace_sample_rate setting (WEAVE_TRACE_SAMPLE_RATE env var or weave.init) that samples whole traces at the root, composed multiplicatively with the existing per-op tracing_sample_rate. The decision is made once on the root call and children inherit it, so a trace is kept or dropped as a whole. Evaluation roots are never sampled out, so evals are always preserved. Off by default (1.0), so existing behavior is unchanged until a rate is set.
1 parent b6a899a commit 0ad2bc9

4 files changed

Lines changed: 217 additions & 1 deletion

File tree

tests/trace/test_client_trace.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
set_weave_client_global,
4747
)
4848
from weave.trace.refs import TableRef
49+
from weave.trace.settings import override_settings
4950
from weave.trace.vals import MissingSelfInstanceError
5051
from weave.trace.weave_client import sanitize_object_name
5152
from weave.trace_server import trace_server_interface as tsi
@@ -4187,6 +4188,145 @@ def parent_op(x: int) -> int:
41874188
assert child_traces == num_runs # Child was traced whenever parent was
41884189

41894190

4191+
def test_trace_sample_rate_off_by_default_and_drops_roots(client):
4192+
random.seed(0)
4193+
executed = 0
4194+
4195+
@weave.op
4196+
def my_op(x: int) -> int:
4197+
nonlocal executed
4198+
executed += 1
4199+
return x + 1
4200+
4201+
weave.publish(my_op)
4202+
4203+
# Off by default: with no centralized rate set, every root is kept.
4204+
for i in range(5):
4205+
my_op(i)
4206+
assert len(list(my_op.calls())) == 5
4207+
4208+
# Centralized rate 0.0: the root is sampled out, but the wrapped function
4209+
# still runs — sampling only skips tracing, never the user's code.
4210+
with override_settings(trace_sample_rate=0.0):
4211+
for i in range(5):
4212+
my_op(i)
4213+
4214+
assert executed == 10 # function ran all ten times
4215+
assert len(list(my_op.calls())) == 5 # only the first five were traced
4216+
4217+
4218+
def test_trace_sample_rate_env_var(client, monkeypatch):
4219+
@weave.op
4220+
def my_op(x: int) -> int:
4221+
return x + 1
4222+
4223+
weave.publish(my_op)
4224+
4225+
monkeypatch.setenv("WEAVE_TRACE_SAMPLE_RATE", "0.0")
4226+
for i in range(5):
4227+
my_op(i)
4228+
4229+
assert len(list(my_op.calls())) == 0
4230+
4231+
4232+
def test_trace_sample_rate_composition_is_multiplicative(client, monkeypatch):
4233+
# A per-op rate of 0.5 composed with a centralized rate of 0.5 yields an
4234+
# effective keep-rate of 0.25. A fixed random draw of 0.3 falls between the
4235+
# two thresholds: it is kept by 0.5 alone but dropped by the composed 0.25,
4236+
# which is exactly what proves the composition is multiplicative.
4237+
monkeypatch.setattr(random, "random", lambda: 0.3)
4238+
4239+
@weave.op(tracing_sample_rate=0.5)
4240+
def half_op(x: int) -> int:
4241+
return x + 1
4242+
4243+
weave.publish(half_op)
4244+
4245+
with override_settings(trace_sample_rate=0.5):
4246+
half_op(1) # effective 0.25; 0.3 > 0.25 -> dropped
4247+
assert len(list(half_op.calls())) == 0
4248+
4249+
with override_settings(trace_sample_rate=1.0):
4250+
half_op(2) # effective 0.5; 0.3 <= 0.5 -> kept
4251+
assert len(list(half_op.calls())) == 1
4252+
4253+
4254+
class SamplingCarveoutModel(weave.Model):
4255+
@weave.op
4256+
def predict(self, question: str) -> dict:
4257+
return {"generated_text": question}
4258+
4259+
4260+
@weave.op
4261+
def sampling_carveout_score(expected: str, output: dict) -> dict:
4262+
return {"match": expected == output["generated_text"]}
4263+
4264+
4265+
@pytest.mark.asyncio
4266+
async def test_trace_sample_rate_eval_carveout_declarative(client):
4267+
random.seed(0)
4268+
4269+
@weave.op
4270+
def plain_op(x: int) -> int:
4271+
return x + 1
4272+
4273+
examples = [
4274+
{"question": "a", "expected": "a"},
4275+
{"question": "b", "expected": "x"},
4276+
]
4277+
evaluation = weave.Evaluation(dataset=examples, scorers=[sampling_carveout_score])
4278+
4279+
# A centralized rate of 0.0 would drop every root, but evaluations are exempt.
4280+
with override_settings(trace_sample_rate=0.0):
4281+
for i in range(5):
4282+
plain_op(i) # control: a non-eval root, expected to be dropped
4283+
await evaluation.evaluate(SamplingCarveoutModel())
4284+
4285+
client.flush()
4286+
op_names = [
4287+
c.op_name
4288+
for c in client.server.calls_query(
4289+
tsi.CallsQueryReq(project_id=client.project_id)
4290+
).calls
4291+
]
4292+
4293+
# The control op was dropped, proving sampling is active at 0.0 ...
4294+
assert not any("plain_op" in name for name in op_names)
4295+
# ... yet the whole evaluation tree (root + children) survived.
4296+
assert any("Evaluation.evaluate" in name for name in op_names)
4297+
assert any("Evaluation.predict_and_score" in name for name in op_names)
4298+
assert any("SamplingCarveoutModel.predict" in name for name in op_names)
4299+
4300+
4301+
def test_trace_sample_rate_eval_carveout_imperative(client):
4302+
random.seed(0)
4303+
4304+
@weave.op
4305+
def plain_op(x: int) -> int:
4306+
return x + 1
4307+
4308+
with override_settings(trace_sample_rate=0.0):
4309+
for i in range(5):
4310+
plain_op(i) # control: a non-eval root, expected to be dropped
4311+
4312+
ev = weave.EvaluationLogger()
4313+
pred = ev.log_prediction(inputs={"q": "hello"}, output="world")
4314+
pred.log_score(scorer="accuracy", score=True)
4315+
pred.finish()
4316+
ev.log_summary({"accuracy_mean": 1.0})
4317+
4318+
client.flush()
4319+
op_names = [
4320+
c.op_name
4321+
for c in client.server.calls_query(
4322+
tsi.CallsQueryReq(project_id=client.project_id)
4323+
).calls
4324+
]
4325+
4326+
assert not any("plain_op" in name for name in op_names)
4327+
assert any("Evaluation.evaluate" in name for name in op_names)
4328+
4329+
41904330
def test_calls_len(client):
41914331
@weave.op
41924332
def test():

tests/trace/test_trace_settings.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
should_disable_weave,
3232
should_print_call_link,
3333
should_redact_pii,
34+
trace_sample_rate,
3435
)
3536
from weave.trace.weave_client import get_parallelism_settings
3637
from weave.utils.retry import with_retry
@@ -619,6 +620,33 @@ def test_parse_and_apply_settings_is_alias_for_replace_settings(self):
619620
assert should_disable_weave() is False
620621

621622

623+
@pytest.mark.usefixtures("clean_settings_env")
624+
class TestTraceSampleRate:
625+
def test_default_is_one(self):
626+
assert trace_sample_rate() == 1.0
627+
628+
def test_reads_snapshot(self):
629+
replace_settings(UserSettings(trace_sample_rate=0.25))
630+
assert trace_sample_rate() == 0.25
631+
632+
def test_clamps_above_one(self):
633+
replace_settings(UserSettings(trace_sample_rate=2.0))
634+
assert trace_sample_rate() == 1.0
635+
636+
def test_clamps_below_zero(self):
637+
replace_settings(UserSettings(trace_sample_rate=-1.0))
638+
assert trace_sample_rate() == 0.0
639+
640+
def test_env_coerces_to_float_and_wins(self, monkeypatch):
641+
replace_settings(UserSettings(trace_sample_rate=1.0))
642+
monkeypatch.setenv("WEAVE_TRACE_SAMPLE_RATE", "0.1")
643+
assert trace_sample_rate() == 0.1
644+
645+
def test_env_is_clamped(self, monkeypatch):
646+
monkeypatch.setenv("WEAVE_TRACE_SAMPLE_RATE", "5")
647+
assert trace_sample_rate() == 1.0
648+
649+
622650
class TestUserSettingsValue:
623651
def test_is_frozen(self):
624652
settings = UserSettings()

weave/trace/op.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
ProcessedInputs,
6868
)
6969
from weave.trace.util import log_once
70+
from weave.trace_server import constants
7071

7172
if TYPE_CHECKING:
7273
from weave.trace.call import Call, CallsIter, NoOpCall
@@ -417,11 +418,32 @@ def should_skip_tracing_for_op(op: Op) -> bool:
417418
return not op._tracing_enabled
418419

419420

421+
def _is_sampling_exempt(op: Op) -> bool:
422+
"""Root evaluation calls are never sampled out.
423+
424+
Preserving evaluations is the whole point of the carve-out: an evaluation
425+
that silently vanished under sampling would be far more surprising than a
426+
dropped ad-hoc trace. Both the declarative `Evaluation.evaluate` op and the
427+
imperative `EvaluationLogger` op resolve to the same op name, so a single
428+
name check covers both entry points. The check runs only for root calls
429+
(see `_should_sample_traces`), so the whole eval subtree is kept.
430+
"""
431+
return getattr(op, "name", None) == constants.EVALUATION_RUN_OP_NAME
432+
433+
420434
def _should_sample_traces(op: Op) -> bool:
421435
if call_context.get_current_call():
422436
return False # Don't sample traces for child calls
423437

424-
if random.random() > op.tracing_sample_rate:
438+
if _is_sampling_exempt(op):
439+
return False # Never sample out evaluation roots
440+
441+
# Compose the centralized rate with the per-op rate multiplicatively: both
442+
# express "fraction to keep", so the stricter of the two wins (e.g. global
443+
# 0.5 and per-op 0.5 keep ~25%). Defaults are 1.0 * 1.0 = 1.0 (keep all).
444+
effective_rate = settings.trace_sample_rate() * op.tracing_sample_rate
445+
446+
if random.random() > effective_rate:
425447
return True # Sample traces for this call
426448

427449
return False

weave/trace/settings.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,23 @@ class UserSettings:
317317
Can be overridden with the environment variable `WEAVE_USE_OTEL_V2`
318318
"""
319319

320+
trace_sample_rate: float = 1.0
321+
"""
322+
Centralized fraction of root traces to keep, from 0.0 to 1.0. Defaults to
323+
1.0 (keep everything), so sampling is off unless this is set.
324+
325+
The decision is made once on the root call and composed multiplicatively
326+
with the per-op `tracing_sample_rate` decorator argument, so the stricter of
327+
the two wins (e.g. 0.5 here and 0.5 on the op keeps ~25% of traces). Child
328+
calls inherit the root's decision, so a trace is always kept or dropped as a
329+
whole. Evaluation traces are never sampled out regardless of this value.
330+
331+
Unlike the per-op rate, this is meant to be set in one place (for example as
332+
a deployment-wide environment variable) so individual engineers do not each
333+
have to opt in per op.
334+
Can be overridden with the environment variable `WEAVE_TRACE_SAMPLE_RATE`
335+
"""
336+
320337

321338
class _SettingsOverrides(TypedDict, total=False):
322339
"""Typed kwargs accepted by :func:`override_settings`.
@@ -360,6 +377,7 @@ class _SettingsOverrides(TypedDict, total=False):
360377
enable_wal: bool
361378
disable_wal_sender: bool
362379
use_otel_v2: bool
380+
trace_sample_rate: float
363381

364382

365383
# Resolve string annotations once at import; used for env-var coercion.
@@ -634,3 +652,11 @@ def should_disable_wal_sender() -> bool:
634652
def should_use_otel_v2() -> bool:
635653
"""Returns whether OTel-capable integrations should use their OTel variant."""
636654
return _env_or_default("use_otel_v2", _current_settings.get().use_otel_v2)
655+
656+
657+
def trace_sample_rate() -> float:
658+
"""Returns the centralized fraction of root traces to keep, clamped to [0.0, 1.0]."""
659+
rate = _env_or_default(
660+
"trace_sample_rate", _current_settings.get().trace_sample_rate
661+
)
662+
return max(0.0, min(1.0, rate))

0 commit comments

Comments
 (0)