@@ -592,23 +592,38 @@ def test_run_experiment_with_versioned_dataset():
592592 # Fetch dataset to get the actual server-assigned timestamp of item1
593593 dataset_after_item1 = langfuse .get_dataset (name )
594594 assert len (dataset_after_item1 .items ) == 1
595+ item1_id = dataset_after_item1 .items [0 ].id
595596 item1_created_at = dataset_after_item1 .items [0 ].created_at
596597
597598 # Use a timestamp 1 second after item1's creation
598599 version_timestamp = item1_created_at + timedelta (seconds = 1 )
599600 time .sleep (3 )
600601
602+ # Update item1 after the version timestamp (this should not affect versioned query)
603+ langfuse .create_dataset_item (
604+ id = item1_id ,
605+ dataset_name = name ,
606+ input = {"question" : "What is 4+4?" },
607+ expected_output = "8" ,
608+ )
609+ langfuse .flush ()
610+ time .sleep (3 )
611+
601612 # Create second item (after version timestamp)
602613 langfuse .create_dataset_item (
603614 dataset_name = name , input = {"question" : "What is 3+3?" }, expected_output = "6"
604615 )
605616 langfuse .flush ()
606617 time .sleep (3 )
607618
608- # Get versioned dataset (should only have first item)
619+ # Get versioned dataset (should only have first item with ORIGINAL state )
609620 versioned_dataset = langfuse .get_dataset (name , version = version_timestamp )
610621 assert len (versioned_dataset .items ) == 1
611622 assert versioned_dataset .version == version_timestamp
623+ # Verify it returns the ORIGINAL version of item1 (before the update)
624+ assert versioned_dataset .items [0 ].input == {"question" : "What is 2+2?" }
625+ assert versioned_dataset .items [0 ].expected_output == "4"
626+ assert versioned_dataset .items [0 ].id == item1_id
612627
613628 # Run a simple experiment on the versioned dataset
614629 def simple_task (* , item , ** kwargs ):
0 commit comments