@@ -2384,6 +2384,7 @@ def task_with_child_spans(*, item, **kwargs):
23842384 experiment_id = first_root ["attributes" ][
23852385 LangfuseOtelSpanAttributes .EXPERIMENT_ID
23862386 ]
2387+ assert result .experiment_id == experiment_id
23872388 experiment_item_id = first_root ["attributes" ][
23882389 LangfuseOtelSpanAttributes .EXPERIMENT_ITEM_ID
23892390 ]
@@ -2478,25 +2479,55 @@ def task_with_child_spans(*, item, **kwargs):
24782479 LangfuseOtelSpanAttributes .EXPERIMENT_DATASET_ID ,
24792480 )
24802481
2482+ def test_experiment_id_is_stable_across_local_items (
2483+ self , langfuse_client , memory_exporter
2484+ ):
2485+ """Test local experiments reuse one experiment ID across all items."""
2486+ local_data = [
2487+ {"input" : "test input 1" , "expected_output" : "expected result 1" },
2488+ {"input" : "test input 2" , "expected_output" : "expected result 2" },
2489+ ]
2490+
2491+ result = langfuse_client .run_experiment (
2492+ name = "Stable Local Experiment" ,
2493+ data = local_data ,
2494+ task = lambda * , item , ** kwargs : f"processed: { item ['input' ]} " ,
2495+ )
2496+
2497+ langfuse_client .flush ()
2498+ time .sleep (0.1 )
2499+
2500+ root_spans = self .get_spans_by_name (memory_exporter , "experiment-item-run" )
2501+ experiment_ids = {
2502+ span ["attributes" ][LangfuseOtelSpanAttributes .EXPERIMENT_ID ]
2503+ for span in root_spans
2504+ }
2505+
2506+ assert len (experiment_ids ) == 1
2507+ assert result .experiment_id == next (iter (experiment_ids ))
2508+
24812509 def test_experiment_attributes_propagate_with_dataset (
24822510 self , langfuse_client , memory_exporter , monkeypatch
24832511 ):
24842512 """Test experiment attribute propagation with Langfuse dataset."""
24852513
2486- # Mock the async API to create dataset run items
2487- async def mock_create_dataset_run_item (* args , ** kwargs ):
2514+ # Mock the sync API used by run_experiment to create dataset run items
2515+ def mock_create_dataset_run_item (* args , ** kwargs ):
24882516 from langfuse .api import DatasetRunItem
24892517
2490- request = kwargs .get ("request" )
24912518 return DatasetRunItem (
24922519 id = "mock-run-item-id" ,
24932520 dataset_run_id = "mock-dataset-run-id-123" ,
2494- dataset_item_id = request .datasetItemId if request else "mock-item-id" ,
2521+ dataset_run_name = kwargs .get ("run_name" , "Dataset Test" ),
2522+ dataset_item_id = kwargs .get ("dataset_item_id" , "mock-item-id" ),
24952523 trace_id = "mock-trace-id" ,
2524+ observation_id = kwargs .get ("observation_id" ),
2525+ created_at = datetime .now (),
2526+ updated_at = datetime .now (),
24962527 )
24972528
24982529 monkeypatch .setattr (
2499- langfuse_client .async_api .dataset_run_items ,
2530+ langfuse_client .api .dataset_run_items ,
25002531 "create" ,
25012532 mock_create_dataset_run_item ,
25022533 )
@@ -2548,7 +2579,7 @@ def task_with_children(*, item, **kwargs):
25482579
25492580 # Run experiment
25502581 experiment_metadata = {"dataset_version" : "v2" , "test_run" : "true" }
2551- dataset .run_experiment (
2582+ result = dataset .run_experiment (
25522583 name = "Dataset Test" ,
25532584 description = "Dataset experiment description" ,
25542585 task = task_with_children ,
@@ -2562,6 +2593,7 @@ def task_with_children(*, item, **kwargs):
25622593 root_spans = self .get_spans_by_name (memory_exporter , "experiment-item-run" )
25632594 assert len (root_spans ) >= 1 , "Should have at least 1 root span"
25642595 first_root = root_spans [0 ]
2596+ assert result .experiment_id == "mock-dataset-run-id-123"
25652597
25662598 # Root-only attributes should be on root
25672599 self .verify_span_attribute (
@@ -2588,6 +2620,11 @@ def task_with_children(*, item, **kwargs):
25882620 LangfuseOtelSpanAttributes .EXPERIMENT_ITEM_ID ,
25892621 dataset_item_id ,
25902622 )
2623+ self .verify_span_attribute (
2624+ first_root ,
2625+ LangfuseOtelSpanAttributes .EXPERIMENT_ID ,
2626+ result .experiment_id ,
2627+ )
25912628
25922629 # Should have experiment metadata
25932630 self .verify_span_attribute (
0 commit comments