Skip to content

Commit 0701a86

Browse files
authored
feat(scores): add session and dataset run scores (#1201)
1 parent 23650a7 commit 0701a86

File tree

79 files changed

+7624
-952
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+7624
-952
lines changed

langfuse/_client/client.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,9 @@ def __init__(
166166
self._project_id = None
167167
sample_rate = sample_rate or float(os.environ.get(LANGFUSE_SAMPLE_RATE, 1.0))
168168
if not 0.0 <= sample_rate <= 1.0:
169-
raise ValueError(f"Sample rate must be between 0.0 and 1.0, got {sample_rate}")
169+
raise ValueError(
170+
f"Sample rate must be between 0.0 and 1.0, got {sample_rate}"
171+
)
170172

171173
self._tracing_enabled = (
172174
tracing_enabled
@@ -1214,12 +1216,15 @@ def create_score(
12141216
*,
12151217
name: str,
12161218
value: float,
1217-
trace_id: str,
1219+
session_id: Optional[str] = None,
1220+
dataset_run_id: Optional[str] = None,
1221+
trace_id: Optional[str] = None,
12181222
observation_id: Optional[str] = None,
12191223
score_id: Optional[str] = None,
12201224
data_type: Optional[Literal["NUMERIC", "BOOLEAN"]] = None,
12211225
comment: Optional[str] = None,
12221226
config_id: Optional[str] = None,
1227+
metadata: Optional[Any] = None,
12231228
) -> None: ...
12241229

12251230
@overload
@@ -1228,25 +1233,31 @@ def create_score(
12281233
*,
12291234
name: str,
12301235
value: str,
1231-
trace_id: str,
1236+
session_id: Optional[str] = None,
1237+
dataset_run_id: Optional[str] = None,
1238+
trace_id: Optional[str] = None,
12321239
score_id: Optional[str] = None,
12331240
observation_id: Optional[str] = None,
12341241
data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
12351242
comment: Optional[str] = None,
12361243
config_id: Optional[str] = None,
1244+
metadata: Optional[Any] = None,
12371245
) -> None: ...
12381246

12391247
def create_score(
12401248
self,
12411249
*,
12421250
name: str,
12431251
value: Union[float, str],
1244-
trace_id: str,
1252+
session_id: Optional[str] = None,
1253+
dataset_run_id: Optional[str] = None,
1254+
trace_id: Optional[str] = None,
12451255
observation_id: Optional[str] = None,
12461256
score_id: Optional[str] = None,
12471257
data_type: Optional[ScoreDataType] = None,
12481258
comment: Optional[str] = None,
12491259
config_id: Optional[str] = None,
1260+
metadata: Optional[Any] = None,
12501261
) -> None:
12511262
"""Create a score for a specific trace or observation.
12521263
@@ -1256,12 +1267,15 @@ def create_score(
12561267
Args:
12571268
name: Name of the score (e.g., "relevance", "accuracy")
12581269
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
1270+
session_id: ID of the Langfuse session to associate the score with
1271+
dataset_run_id: ID of the Langfuse dataset run to associate the score with
12591272
trace_id: ID of the Langfuse trace to associate the score with
1260-
observation_id: Optional ID of the specific observation to score
1273+
observation_id: Optional ID of the specific observation to score. Trace ID must be provided too.
12611274
score_id: Optional custom ID for the score (auto-generated if not provided)
12621275
data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
12631276
comment: Optional comment or explanation for the score
12641277
config_id: Optional ID of a score config defined in Langfuse
1278+
metadata: Optional metadata to be attached to the score
12651279
12661280
Example:
12671281
```python
@@ -1292,6 +1306,8 @@ def create_score(
12921306
try:
12931307
score_event = {
12941308
"id": score_id,
1309+
"session_id": session_id,
1310+
"dataset_run_id": dataset_run_id,
12951311
"trace_id": trace_id,
12961312
"observation_id": observation_id,
12971313
"name": name,
@@ -1300,6 +1316,7 @@ def create_score(
13001316
"comment": comment,
13011317
"config_id": config_id,
13021318
"environment": self._environment,
1319+
"metadata": metadata,
13031320
}
13041321

13051322
new_body = ScoreBody(**score_event)

langfuse/_client/resource_manager.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -256,13 +256,18 @@ def add_score_task(self, event: dict):
256256
# Sample scores with the same sampler that is used for tracing
257257
tracer_provider = cast(TracerProvider, otel_trace_api.get_tracer_provider())
258258
should_sample = (
259-
tracer_provider.sampler.should_sample(
260-
parent_context=None,
261-
trace_id=int(event["body"].trace_id, 16),
262-
name="score",
263-
).decision
264-
== Decision.RECORD_AND_SAMPLE
265-
if hasattr(event["body"], "trace_id")
259+
(
260+
tracer_provider.sampler.should_sample(
261+
parent_context=None,
262+
trace_id=int(event["body"].trace_id, 16),
263+
name="score",
264+
).decision
265+
== Decision.RECORD_AND_SAMPLE
266+
if hasattr(event["body"], "trace_id")
267+
else True
268+
)
269+
if event["body"].trace_id
270+
is not None # do not sample out session / dataset run scores
266271
else True
267272
)
268273

langfuse/api/__init__.py

Lines changed: 84 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,20 @@
66
AnnotationQueueItem,
77
AnnotationQueueObjectType,
88
AnnotationQueueStatus,
9+
ApiKeyDeletionResponse,
10+
ApiKeyList,
11+
ApiKeyResponse,
12+
ApiKeySummary,
13+
AuthenticationScheme,
914
BaseEvent,
1015
BasePrompt,
1116
BaseScore,
17+
BaseScoreV1,
1218
BooleanScore,
19+
BooleanScoreV1,
20+
BulkConfig,
1321
CategoricalScore,
22+
CategoricalScoreV1,
1423
ChatMessage,
1524
ChatPrompt,
1625
Comment,
@@ -39,8 +48,6 @@
3948
CreateSpanBody,
4049
CreateSpanEvent,
4150
CreateTextPromptRequest,
42-
DailyMetrics,
43-
DailyMetricsDetails,
4451
Dataset,
4552
DatasetItem,
4653
DatasetRun,
@@ -51,7 +58,9 @@
5158
DeleteDatasetItemResponse,
5259
DeleteDatasetRunResponse,
5360
DeleteTraceResponse,
61+
EmptyResponse,
5462
Error,
63+
FilterConfig,
5564
GetCommentsResponse,
5665
GetMediaResponse,
5766
GetMediaUploadUrlRequest,
@@ -83,11 +92,18 @@
8392
IngestionUsage,
8493
MapValue,
8594
MediaContentType,
95+
MembershipRequest,
96+
MembershipResponse,
97+
MembershipRole,
98+
MembershipsResponse,
8699
MethodNotAllowedError,
100+
MetricsResponse,
87101
Model,
102+
ModelPrice,
88103
ModelUsageUnit,
89104
NotFoundError,
90105
NumericScore,
106+
NumericScoreV1,
91107
Observation,
92108
ObservationBody,
93109
ObservationLevel,
@@ -99,33 +115,53 @@
99115
OpenAiResponseUsageSchema,
100116
OpenAiUsage,
101117
OptionalObservationBody,
118+
OrganizationProject,
119+
OrganizationProjectsResponse,
102120
PaginatedAnnotationQueueItems,
103121
PaginatedAnnotationQueues,
104122
PaginatedDatasetItems,
123+
PaginatedDatasetRunItems,
105124
PaginatedDatasetRuns,
106125
PaginatedDatasets,
107126
PaginatedModels,
108127
PaginatedSessions,
109128
PatchMediaBody,
110129
Project,
130+
ProjectDeletionResponse,
111131
Projects,
112132
Prompt,
113133
PromptMeta,
114134
PromptMetaListResponse,
115135
Prompt_Chat,
116136
Prompt_Text,
137+
ResourceMeta,
138+
ResourceType,
139+
ResourceTypesResponse,
140+
SchemaExtension,
141+
SchemaResource,
142+
SchemasResponse,
143+
ScimEmail,
144+
ScimFeatureSupport,
145+
ScimName,
146+
ScimUser,
147+
ScimUsersListResponse,
117148
Score,
118149
ScoreBody,
119150
ScoreConfig,
120151
ScoreConfigs,
121152
ScoreDataType,
122153
ScoreEvent,
123154
ScoreSource,
155+
ScoreV1,
156+
ScoreV1_Boolean,
157+
ScoreV1_Categorical,
158+
ScoreV1_Numeric,
124159
Score_Boolean,
125160
Score_Categorical,
126161
Score_Numeric,
127162
SdkLogBody,
128163
SdkLogEvent,
164+
ServiceProviderConfig,
129165
ServiceUnavailableError,
130166
Session,
131167
SessionWithTraces,
@@ -146,8 +182,8 @@
146182
UpdateSpanBody,
147183
UpdateSpanEvent,
148184
Usage,
149-
UsageByModel,
150185
UsageDetails,
186+
UserMeta,
151187
annotation_queues,
152188
comments,
153189
commons,
@@ -160,11 +196,14 @@
160196
metrics,
161197
models,
162198
observations,
199+
organizations,
163200
projects,
164201
prompt_version,
165202
prompts,
203+
scim,
166204
score,
167205
score_configs,
206+
score_v_2,
168207
sessions,
169208
trace,
170209
utils,
@@ -176,11 +215,20 @@
176215
"AnnotationQueueItem",
177216
"AnnotationQueueObjectType",
178217
"AnnotationQueueStatus",
218+
"ApiKeyDeletionResponse",
219+
"ApiKeyList",
220+
"ApiKeyResponse",
221+
"ApiKeySummary",
222+
"AuthenticationScheme",
179223
"BaseEvent",
180224
"BasePrompt",
181225
"BaseScore",
226+
"BaseScoreV1",
182227
"BooleanScore",
228+
"BooleanScoreV1",
229+
"BulkConfig",
183230
"CategoricalScore",
231+
"CategoricalScoreV1",
184232
"ChatMessage",
185233
"ChatPrompt",
186234
"Comment",
@@ -209,8 +257,6 @@
209257
"CreateSpanBody",
210258
"CreateSpanEvent",
211259
"CreateTextPromptRequest",
212-
"DailyMetrics",
213-
"DailyMetricsDetails",
214260
"Dataset",
215261
"DatasetItem",
216262
"DatasetRun",
@@ -221,7 +267,9 @@
221267
"DeleteDatasetItemResponse",
222268
"DeleteDatasetRunResponse",
223269
"DeleteTraceResponse",
270+
"EmptyResponse",
224271
"Error",
272+
"FilterConfig",
225273
"GetCommentsResponse",
226274
"GetMediaResponse",
227275
"GetMediaUploadUrlRequest",
@@ -253,11 +301,18 @@
253301
"IngestionUsage",
254302
"MapValue",
255303
"MediaContentType",
304+
"MembershipRequest",
305+
"MembershipResponse",
306+
"MembershipRole",
307+
"MembershipsResponse",
256308
"MethodNotAllowedError",
309+
"MetricsResponse",
257310
"Model",
311+
"ModelPrice",
258312
"ModelUsageUnit",
259313
"NotFoundError",
260314
"NumericScore",
315+
"NumericScoreV1",
261316
"Observation",
262317
"ObservationBody",
263318
"ObservationLevel",
@@ -269,33 +324,53 @@
269324
"OpenAiResponseUsageSchema",
270325
"OpenAiUsage",
271326
"OptionalObservationBody",
327+
"OrganizationProject",
328+
"OrganizationProjectsResponse",
272329
"PaginatedAnnotationQueueItems",
273330
"PaginatedAnnotationQueues",
274331
"PaginatedDatasetItems",
332+
"PaginatedDatasetRunItems",
275333
"PaginatedDatasetRuns",
276334
"PaginatedDatasets",
277335
"PaginatedModels",
278336
"PaginatedSessions",
279337
"PatchMediaBody",
280338
"Project",
339+
"ProjectDeletionResponse",
281340
"Projects",
282341
"Prompt",
283342
"PromptMeta",
284343
"PromptMetaListResponse",
285344
"Prompt_Chat",
286345
"Prompt_Text",
346+
"ResourceMeta",
347+
"ResourceType",
348+
"ResourceTypesResponse",
349+
"SchemaExtension",
350+
"SchemaResource",
351+
"SchemasResponse",
352+
"ScimEmail",
353+
"ScimFeatureSupport",
354+
"ScimName",
355+
"ScimUser",
356+
"ScimUsersListResponse",
287357
"Score",
288358
"ScoreBody",
289359
"ScoreConfig",
290360
"ScoreConfigs",
291361
"ScoreDataType",
292362
"ScoreEvent",
293363
"ScoreSource",
364+
"ScoreV1",
365+
"ScoreV1_Boolean",
366+
"ScoreV1_Categorical",
367+
"ScoreV1_Numeric",
294368
"Score_Boolean",
295369
"Score_Categorical",
296370
"Score_Numeric",
297371
"SdkLogBody",
298372
"SdkLogEvent",
373+
"ServiceProviderConfig",
299374
"ServiceUnavailableError",
300375
"Session",
301376
"SessionWithTraces",
@@ -316,8 +391,8 @@
316391
"UpdateSpanBody",
317392
"UpdateSpanEvent",
318393
"Usage",
319-
"UsageByModel",
320394
"UsageDetails",
395+
"UserMeta",
321396
"annotation_queues",
322397
"comments",
323398
"commons",
@@ -330,11 +405,14 @@
330405
"metrics",
331406
"models",
332407
"observations",
408+
"organizations",
333409
"projects",
334410
"prompt_version",
335411
"prompts",
412+
"scim",
336413
"score",
337414
"score_configs",
415+
"score_v_2",
338416
"sessions",
339417
"trace",
340418
"utils",

0 commit comments

Comments
 (0)