Skip to content

Commit 2156ee8

Browse files
committed
chore: support dataset versioning via SDK
1 parent c2dd867 commit 2156ee8

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

langfuse/_client/client.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2442,13 +2442,20 @@ def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
24422442
)
24432443

24442444
def get_dataset(
2445-
self, name: str, *, fetch_items_page_size: Optional[int] = 50
2445+
self,
2446+
name: str,
2447+
*,
2448+
fetch_items_page_size: Optional[int] = 50,
2449+
version: Optional[datetime] = None,
24462450
) -> "DatasetClient":
24472451
"""Fetch a dataset by its name.
24482452
24492453
Args:
24502454
name (str): The name of the dataset to fetch.
24512455
fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
2456+
version (Optional[datetime]): Retrieve dataset items as they existed at this specific point in time (UTC).
2457+
If provided, returns the state of items at the specified UTC timestamp.
2458+
If not provided, returns the latest version. Must be a timezone-aware datetime object in UTC.
24522459
24532460
Returns:
24542461
DatasetClient: The dataset with the given name.
@@ -2465,6 +2472,7 @@ def get_dataset(
24652472
dataset_name=self._url_encode(name, is_url_param=True),
24662473
page=page,
24672474
limit=fetch_items_page_size,
2475+
version=version,
24682476
)
24692477
dataset_items.extend(new_items.data)
24702478

tests/test_datasets.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,3 +527,41 @@ def test_delete_dataset_run_with_folder_names():
527527
# Verify the run is deleted
528528
runs_after = langfuse.get_dataset_runs(dataset_name=folder_name)
529529
assert len(runs_after.data) == 0
530+
531+
532+
def test_get_dataset_with_version():
533+
"""Test that get_dataset correctly filters items by version timestamp."""
534+
from datetime import datetime, timezone
535+
import time
536+
537+
langfuse = Langfuse(debug=False)
538+
539+
# Create dataset
540+
name = create_uuid()
541+
langfuse.create_dataset(name=name)
542+
543+
# Create first item
544+
item1 = langfuse.create_dataset_item(dataset_name=name, input={"version": "v1"})
545+
langfuse.flush()
546+
time.sleep(3) # Ensure persistence and clear temporal separation
547+
548+
# Capture timestamp AFTER first item, BEFORE second item
549+
query_timestamp = datetime.now(timezone.utc)
550+
time.sleep(3) # Ensure second item is created AFTER query_timestamp
551+
552+
# Create second item
553+
langfuse.create_dataset_item(dataset_name=name, input={"version": "v2"})
554+
langfuse.flush()
555+
time.sleep(3) # Ensure persistence
556+
557+
# Fetch at the query_timestamp (should only return first item)
558+
dataset = langfuse.get_dataset(name, version=query_timestamp)
559+
560+
# Verify only first item is retrieved
561+
assert len(dataset.items) == 1
562+
assert dataset.items[0].input == {"version": "v1"}
563+
assert dataset.items[0].id == item1.id
564+
565+
# Verify fetching without version returns both items (latest)
566+
dataset_latest = langfuse.get_dataset(name)
567+
assert len(dataset_latest.items) == 2

0 commit comments

Comments
 (0)