langfuse
diff --git a/‎langfuse/_client/client.py‎
Lines changed: 16 additions & 2 deletions b/‎langfuse/_client/client.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎langfuse/_client/datasets.py‎
Lines changed: 10 additions & 2 deletions b/‎langfuse/_client/datasets.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎langfuse/api/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎langfuse/api/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎langfuse/api/reference.md‎
Lines changed: 14 additions & 1 deletion b/‎langfuse/api/reference.md‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎langfuse/api/resources/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎langfuse/api/resources/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎langfuse/api/resources/commons/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎langfuse/api/resources/commons/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎langfuse/api/resources/commons/types/__init__.py‎
Lines changed: 10 additions & 1 deletion b/‎langfuse/api/resources/commons/types/__init__.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎langfuse/api/resources/commons/types/correction_score.py‎
Lines changed: 53 additions & 0 deletions b/‎langfuse/api/resources/commons/types/correction_score.py‎
Lines changed: 53 additions & 0 deletions
@@ -2456,13 +2456,20 @@ def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
         )
 
     def get_dataset(
-        self, name: str, *, fetch_items_page_size: Optional[int] = 50
+        self,
+        name: str,
+        *,
+        fetch_items_page_size: Optional[int] = 50,
+        version: Optional[datetime] = None,
     ) -> "DatasetClient":
         """Fetch a dataset by its name.
 
         Args:
             name (str): The name of the dataset to fetch.
             fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
+            version (Optional[datetime]): Retrieve dataset items as they existed at this specific point in time (UTC).
+                If provided, returns the state of items at the specified UTC timestamp.
+                If not provided, returns the latest version. Must be a timezone-aware datetime object in UTC.
 
         Returns:
             DatasetClient: The dataset with the given name.
@@ -2479,6 +2486,7 @@ def get_dataset(
                     dataset_name=self._url_encode(name, is_url_param=True),
                     page=page,
                     limit=fetch_items_page_size,
+                    version=version,
                 )
                 dataset_items.extend(new_items.data)
 
@@ -2489,7 +2497,7 @@ def get_dataset(
 
             items = [DatasetItemClient(i, langfuse=self) for i in dataset_items]
 
-            return DatasetClient(dataset, items=items)
+            return DatasetClient(dataset, items=items, version=version)
 
         except Error as e:
             handle_fern_exception(e)
@@ -2580,6 +2588,7 @@ def run_experiment(
         run_evaluators: List[RunEvaluatorFunction] = [],
         max_concurrency: int = 50,
         metadata: Optional[Dict[str, str]] = None,
+        _dataset_version: Optional[datetime] = None,
     ) -> ExperimentResult:
         """Run an experiment on a dataset with automatic tracing and evaluation.
 
@@ -2757,6 +2766,7 @@ def average_accuracy(*, item_results, **kwargs):
                     run_evaluators=run_evaluators or [],
                     max_concurrency=max_concurrency,
                     metadata=metadata,
+                    dataset_version=_dataset_version,
                 ),
             ),
         )
@@ -2774,6 +2784,7 @@ async def _run_experiment_async(
         run_evaluators: List[RunEvaluatorFunction],
         max_concurrency: int,
         metadata: Optional[Dict[str, Any]] = None,
+        dataset_version: Optional[datetime] = None,
     ) -> ExperimentResult:
         langfuse_logger.debug(
             f"Starting experiment '{name}' run '{run_name}' with {len(data)} items"
@@ -2794,6 +2805,7 @@ async def process_item(item: ExperimentItem) -> ExperimentItemResult:
                     run_name,
                     description,
                     metadata,
+                    dataset_version,
                 )
 
         # Run all items concurrently
@@ -2880,6 +2892,7 @@ async def _process_experiment_item(
         experiment_run_name: str,
         experiment_description: Optional[str],
         experiment_metadata: Optional[Dict[str, Any]] = None,
+        dataset_version: Optional[datetime] = None,
     ) -> ExperimentItemResult:
         span_name = "experiment-item-run"
 
@@ -2931,6 +2944,7 @@ async def _process_experiment_item(
                                 datasetItemId=item.id,  # type: ignore
                                 traceId=trace_id,
                                 observationId=span.id,
+                                datasetVersion=dataset_version,
                             ),
                         )
 
 
@@ -155,7 +155,7 @@ class DatasetClient:
         created_at (datetime): Timestamp of dataset creation.
         updated_at (datetime): Timestamp of the last update to the dataset.
         items (List[DatasetItemClient]): List of dataset items associated with the dataset.
-
+        version (Optional[datetime]): Timestamp of the dataset version.
     Example:
         Print the input of each dataset item in a dataset.
         ```python
@@ -178,8 +178,14 @@ class DatasetClient:
     created_at: dt.datetime
     updated_at: dt.datetime
     items: List[DatasetItemClient]
+    version: Optional[dt.datetime]
 
-    def __init__(self, dataset: Dataset, items: List[DatasetItemClient]):
+    def __init__(
+        self,
+        dataset: Dataset,
+        items: List[DatasetItemClient],
+        version: Optional[dt.datetime] = None,
+    ):
         """Initialize the DatasetClient."""
         self.id = dataset.id
         self.name = dataset.name
@@ -189,6 +195,7 @@ def __init__(self, dataset: Dataset, items: List[DatasetItemClient]):
         self.created_at = dataset.created_at
         self.updated_at = dataset.updated_at
         self.items = items
+        self.version = version
         self._langfuse: Optional["Langfuse"] = None
 
     def _get_langfuse_client(self) -> Optional["Langfuse"]:
@@ -421,4 +428,5 @@ def content_diversity(*, item_results, **kwargs):
             run_evaluators=run_evaluators,
             max_concurrency=max_concurrency,
             metadata=metadata,
+            _dataset_version=self.version,
         )
@@ -36,6 +36,7 @@
     Comment,
     CommentObjectType,
     ConfigCategory,
+    CorrectionScore,
     CreateAnnotationQueueAssignmentResponse,
     CreateAnnotationQueueItemRequest,
     CreateAnnotationQueueRequest,
@@ -85,9 +86,11 @@
     GetScoresResponseData,
     GetScoresResponseDataBoolean,
     GetScoresResponseDataCategorical,
+    GetScoresResponseDataCorrection,
     GetScoresResponseDataNumeric,
     GetScoresResponseData_Boolean,
     GetScoresResponseData_Categorical,
+    GetScoresResponseData_Correction,
     GetScoresResponseData_Numeric,
     GetScoresResponseTraceData,
     HealthResponse,
@@ -199,6 +202,7 @@
     ScoreV1_Numeric,
     Score_Boolean,
     Score_Categorical,
+    Score_Correction,
     Score_Numeric,
     SdkLogBody,
     SdkLogEvent,
@@ -293,6 +297,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateAnnotationQueueAssignmentResponse",
     "CreateAnnotationQueueItemRequest",
     "CreateAnnotationQueueRequest",
@@ -342,9 +347,11 @@
     "GetScoresResponseData",
     "GetScoresResponseDataBoolean",
     "GetScoresResponseDataCategorical",
+    "GetScoresResponseDataCorrection",
     "GetScoresResponseDataNumeric",
     "GetScoresResponseData_Boolean",
     "GetScoresResponseData_Categorical",
+    "GetScoresResponseData_Correction",
     "GetScoresResponseData_Numeric",
     "GetScoresResponseTraceData",
     "HealthResponse",
@@ -456,6 +463,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "SdkLogBody",
     "SdkLogEvent",
 
@@ -1519,7 +1519,8 @@ client.dataset_items.get(
 <dl>
 <dd>
 
-Get dataset items
+Get dataset items. Optionally specify a version to get the items as they existed at that point in time.
+Note: If version parameter is provided, datasetName must also be provided.
 </dd>
 </dl>
 </dd>
@@ -1584,6 +1585,18 @@ client.dataset_items.list()
 <dl>
 <dd>
 
+**version:** `typing.Optional[dt.datetime]` 
+
+ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+If provided, returns state of dataset at this timestamp.
+If not provided, returns the latest version. Requires datasetName to be specified.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
 **page:** `typing.Optional[int]` — page number, starts at 1
 
 </dd>
 
@@ -67,6 +67,7 @@
     Comment,
     CommentObjectType,
     ConfigCategory,
+    CorrectionScore,
     CreateScoreValue,
     Dataset,
     DatasetItem,
@@ -101,6 +102,7 @@
     ScoreV1_Numeric,
     Score_Boolean,
     Score_Categorical,
+    Score_Correction,
     Score_Numeric,
     Session,
     SessionWithTraces,
@@ -268,9 +270,11 @@
     GetScoresResponseData,
     GetScoresResponseDataBoolean,
     GetScoresResponseDataCategorical,
+    GetScoresResponseDataCorrection,
     GetScoresResponseDataNumeric,
     GetScoresResponseData_Boolean,
     GetScoresResponseData_Categorical,
+    GetScoresResponseData_Correction,
     GetScoresResponseData_Numeric,
     GetScoresResponseTraceData,
 )
@@ -313,6 +317,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateAnnotationQueueAssignmentResponse",
     "CreateAnnotationQueueItemRequest",
     "CreateAnnotationQueueRequest",
@@ -362,9 +367,11 @@
     "GetScoresResponseData",
     "GetScoresResponseDataBoolean",
     "GetScoresResponseDataCategorical",
+    "GetScoresResponseDataCorrection",
     "GetScoresResponseDataNumeric",
     "GetScoresResponseData_Boolean",
     "GetScoresResponseData_Categorical",
+    "GetScoresResponseData_Correction",
     "GetScoresResponseData_Numeric",
     "GetScoresResponseTraceData",
     "HealthResponse",
@@ -476,6 +483,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "SdkLogBody",
     "SdkLogEvent",
 
@@ -10,6 +10,7 @@
     Comment,
     CommentObjectType,
     ConfigCategory,
+    CorrectionScore,
     CreateScoreValue,
     Dataset,
     DatasetItem,
@@ -41,6 +42,7 @@
     ScoreV1_Numeric,
     Score_Boolean,
     Score_Categorical,
+    Score_Correction,
     Score_Numeric,
     Session,
     SessionWithTraces,
@@ -68,6 +70,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateScoreValue",
     "Dataset",
     "DatasetItem",
@@ -102,6 +105,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "Session",
     "SessionWithTraces",
 
@@ -9,6 +9,7 @@
 from .comment import Comment
 from .comment_object_type import CommentObjectType
 from .config_category import ConfigCategory
+from .correction_score import CorrectionScore
 from .create_score_value import CreateScoreValue
 from .dataset import Dataset
 from .dataset_item import DatasetItem
@@ -29,7 +30,13 @@
 from .pricing_tier_condition import PricingTierCondition
 from .pricing_tier_input import PricingTierInput
 from .pricing_tier_operator import PricingTierOperator
-from .score import Score, Score_Boolean, Score_Categorical, Score_Numeric
+from .score import (
+    Score,
+    Score_Boolean,
+    Score_Categorical,
+    Score_Correction,
+    Score_Numeric,
+)
 from .score_config import ScoreConfig
 from .score_config_data_type import ScoreConfigDataType
 from .score_data_type import ScoreDataType
@@ -52,6 +59,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateScoreValue",
     "Dataset",
     "DatasetItem",
@@ -83,6 +91,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "Session",
     "SessionWithTraces",
 
@@ -0,0 +1,53 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ....core.datetime_utils import serialize_datetime
+from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
+from .base_score import BaseScore
+
+
+class CorrectionScore(BaseScore):
+    value: float = pydantic_v1.Field()
+    """
+    The numeric value of the score. Always 0 for correction scores.
+    """
+
+    string_value: str = pydantic_v1.Field(alias="stringValue")
+    """
+    The string representation of the correction content
+    """
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults_exclude_unset: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        kwargs_with_defaults_exclude_none: typing.Any = {
+            "by_alias": True,
+            "exclude_none": True,
+            **kwargs,
+        }
+
+        return deep_union_pydantic_dicts(
+            super().dict(**kwargs_with_defaults_exclude_unset),
+            super().dict(**kwargs_with_defaults_exclude_none),
+        )
+
+    class Config:
+        frozen = True
+        smart_union = True
+        allow_population_by_field_name = True
+        populate_by_name = True
+        extra = pydantic_v1.Extra.allow
+        json_encoders = {dt.datetime: serialize_datetime}