@@ -2456,13 +2456,20 @@ def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
24562456 )
24572457
24582458 def get_dataset (
2459- self , name : str , * , fetch_items_page_size : Optional [int ] = 50
2459+ self ,
2460+ name : str ,
2461+ * ,
2462+ fetch_items_page_size : Optional [int ] = 50 ,
2463+ version : Optional [datetime ] = None ,
24602464 ) -> "DatasetClient" :
24612465 """Fetch a dataset by its name.
24622466
24632467 Args:
24642468 name (str): The name of the dataset to fetch.
24652469 fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
2470+ version (Optional[datetime]): Retrieve dataset items as they existed at this specific point in time (UTC).
2471+ If provided, returns the state of items at the specified UTC timestamp.
2472+ If not provided, returns the latest version. Must be a timezone-aware datetime object in UTC.
24662473
24672474 Returns:
24682475 DatasetClient: The dataset with the given name.
@@ -2479,6 +2486,7 @@ def get_dataset(
24792486 dataset_name = self ._url_encode (name , is_url_param = True ),
24802487 page = page ,
24812488 limit = fetch_items_page_size ,
2489+ version = version ,
24822490 )
24832491 dataset_items .extend (new_items .data )
24842492
@@ -2489,7 +2497,7 @@ def get_dataset(
24892497
24902498 items = [DatasetItemClient (i , langfuse = self ) for i in dataset_items ]
24912499
2492- return DatasetClient (dataset , items = items )
2500+ return DatasetClient (dataset , items = items , version = version )
24932501
24942502 except Error as e :
24952503 handle_fern_exception (e )
@@ -2580,6 +2588,7 @@ def run_experiment(
25802588 run_evaluators : List [RunEvaluatorFunction ] = [],
25812589 max_concurrency : int = 50 ,
25822590 metadata : Optional [Dict [str , str ]] = None ,
2591+ _dataset_version : Optional [datetime ] = None ,
25832592 ) -> ExperimentResult :
25842593 """Run an experiment on a dataset with automatic tracing and evaluation.
25852594
@@ -2757,6 +2766,7 @@ def average_accuracy(*, item_results, **kwargs):
27572766 run_evaluators = run_evaluators or [],
27582767 max_concurrency = max_concurrency ,
27592768 metadata = metadata ,
2769+ dataset_version = _dataset_version ,
27602770 ),
27612771 ),
27622772 )
@@ -2774,6 +2784,7 @@ async def _run_experiment_async(
27742784 run_evaluators : List [RunEvaluatorFunction ],
27752785 max_concurrency : int ,
27762786 metadata : Optional [Dict [str , Any ]] = None ,
2787+ dataset_version : Optional [datetime ] = None ,
27772788 ) -> ExperimentResult :
27782789 langfuse_logger .debug (
27792790 f"Starting experiment '{ name } ' run '{ run_name } ' with { len (data )} items"
@@ -2794,6 +2805,7 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
27942805 run_name ,
27952806 description ,
27962807 metadata ,
2808+ dataset_version ,
27972809 )
27982810
27992811 # Run all items concurrently
@@ -2880,6 +2892,7 @@ async def _process_experiment_item(
28802892 experiment_run_name : str ,
28812893 experiment_description : Optional [str ],
28822894 experiment_metadata : Optional [Dict [str , Any ]] = None ,
2895+ dataset_version : Optional [datetime ] = None ,
28832896 ) -> ExperimentItemResult :
28842897 span_name = "experiment-item-run"
28852898
@@ -2931,6 +2944,7 @@ async def _process_experiment_item(
29312944 datasetItemId = item .id , # type: ignore
29322945 traceId = trace_id ,
29332946 observationId = span .id ,
2947+ datasetVersion = dataset_version ,
29342948 ),
29352949 )
29362950
0 commit comments