Skip to content

Commit 2cd639f

Browse files
authored
fix(batch-evaluation): pass trace ID for score creation if scope = observations (#1504)
1 parent 26a8589 commit 2cd639f

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed

langfuse/batch_evaluation.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
Protocol,
2121
Tuple,
2222
Union,
23+
cast,
2324
)
2425

2526
from langfuse.api.resources.commons.types import (
@@ -1220,6 +1221,9 @@ async def _process_batch_evaluation_item(
12201221
self._create_score_for_scope(
12211222
scope=scope,
12221223
item_id=item_id,
1224+
trace_id=cast(ObservationsView, item).trace_id
1225+
if scope == "observations"
1226+
else None,
12231227
evaluation=evaluation,
12241228
additional_metadata=metadata,
12251229
)
@@ -1242,6 +1246,9 @@ async def _process_batch_evaluation_item(
12421246
self._create_score_for_scope(
12431247
scope=scope,
12441248
item_id=item_id,
1249+
trace_id=cast(ObservationsView, item).trace_id
1250+
if scope == "observations"
1251+
else None,
12451252
evaluation=composite_eval,
12461253
additional_metadata=metadata,
12471254
)
@@ -1361,8 +1368,10 @@ async def _run_composite_evaluator(
13611368

13621369
def _create_score_for_scope(
13631370
self,
1371+
*,
13641372
scope: str,
13651373
item_id: str,
1374+
trace_id: Optional[str] = None,
13661375
evaluation: Evaluation,
13671376
additional_metadata: Optional[Dict[str, Any]],
13681377
) -> None:
@@ -1371,6 +1380,7 @@ def _create_score_for_scope(
13711380
Args:
13721381
scope: The type of entity ("traces", "observations").
13731382
item_id: The ID of the entity.
1383+
trace_id: The trace ID of the entity; required if scope=observations
13741384
evaluation: The evaluation result to create a score from.
13751385
additional_metadata: Additional metadata to merge with evaluation metadata.
13761386
"""
@@ -1393,6 +1403,7 @@ def _create_score_for_scope(
13931403
elif scope == "observations":
13941404
self.client.create_score(
13951405
observation_id=item_id,
1406+
trace_id=trace_id,
13961407
name=evaluation.name,
13971408
value=evaluation.value, # type: ignore
13981409
comment=evaluation.comment,

tests/test_batch_evaluation.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# ============================================================================
2626

2727

28-
pytestmark = pytest.mark.skip(reason="Github CI runner overwhelmed by score volume")
28+
# pytestmark = pytest.mark.skip(reason="Github CI runner overwhelmed by score volume")
2929

3030

3131
@pytest.fixture
@@ -67,6 +67,32 @@ def simple_evaluator(*, input, output, expected_output=None, metadata=None, **kw
6767
# ============================================================================
6868

6969

70+
def test_run_batched_evaluation_on_observations_basic(langfuse_client):
71+
"""Test basic batch evaluation on traces."""
72+
result = langfuse_client.run_batched_evaluation(
73+
scope="observations",
74+
mapper=simple_trace_mapper,
75+
evaluators=[simple_evaluator],
76+
max_items=1,
77+
verbose=True,
78+
)
79+
80+
# Validate result structure
81+
assert isinstance(result, BatchEvaluationResult)
82+
assert result.total_items_fetched >= 0
83+
assert result.total_items_processed >= 0
84+
assert result.total_scores_created >= 0
85+
assert result.completed is True
86+
assert isinstance(result.duration_seconds, float)
87+
assert result.duration_seconds > 0
88+
89+
# Verify evaluator stats
90+
assert len(result.evaluator_stats) == 1
91+
stats = result.evaluator_stats[0]
92+
assert isinstance(stats, EvaluatorStats)
93+
assert stats.name == "simple_evaluator"
94+
95+
7096
def test_run_batched_evaluation_on_traces_basic(langfuse_client):
7197
"""Test basic batch evaluation on traces."""
7298
result = langfuse_client.run_batched_evaluation(

0 commit comments

Comments
 (0)