Skip to content

Commit a9d9697

Browse files
wochingeclaude
andcommitted
feat(scores): add TEXT type to score overloads and docstrings
Extend string-value overloads in create_score, score_current_span, score_current_trace, score, and score_trace to accept TEXT alongside CATEGORICAL. Update all related docstrings. Add ExperimentScoreType to exclude TEXT from experiments/evals. Add integration test for TEXT scores. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 77acd98 commit a9d9697

File tree

5 files changed

+80
-18
lines changed

5 files changed

+80
-18
lines changed

langfuse/_client/client.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,7 +1747,7 @@ def create_score(
17471747
trace_id: Optional[str] = None,
17481748
score_id: Optional[str] = None,
17491749
observation_id: Optional[str] = None,
1750-
data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
1750+
data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL",
17511751
comment: Optional[str] = None,
17521752
config_id: Optional[str] = None,
17531753
metadata: Optional[Any] = None,
@@ -1777,13 +1777,13 @@ def create_score(
17771777
17781778
Args:
17791779
name: Name of the score (e.g., "relevance", "accuracy")
1780-
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
1780+
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT)
17811781
session_id: ID of the Langfuse session to associate the score with
17821782
dataset_run_id: ID of the Langfuse dataset run to associate the score with
17831783
trace_id: ID of the Langfuse trace to associate the score with
17841784
observation_id: Optional ID of the specific observation to score. Trace ID must be provided too.
17851785
score_id: Optional custom ID for the score (auto-generated if not provided)
1786-
data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
1786+
data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
17871787
comment: Optional comment or explanation for the score
17881788
config_id: Optional ID of a score config defined in Langfuse
17891789
metadata: Optional metadata to be attached to the score
@@ -1907,7 +1907,7 @@ def score_current_span(
19071907
name: str,
19081908
value: str,
19091909
score_id: Optional[str] = None,
1910-
data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
1910+
data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL",
19111911
comment: Optional[str] = None,
19121912
config_id: Optional[str] = None,
19131913
metadata: Optional[Any] = None,
@@ -1931,9 +1931,9 @@ def score_current_span(
19311931
19321932
Args:
19331933
name: Name of the score (e.g., "relevance", "accuracy")
1934-
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
1934+
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT)
19351935
score_id: Optional custom ID for the score (auto-generated if not provided)
1936-
data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
1936+
data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
19371937
comment: Optional comment or explanation for the score
19381938
config_id: Optional ID of a score config defined in Langfuse
19391939
metadata: Optional metadata to be attached to the score
@@ -1997,7 +1997,7 @@ def score_current_trace(
19971997
name: str,
19981998
value: str,
19991999
score_id: Optional[str] = None,
2000-
data_type: Optional[Literal["CATEGORICAL"]] = "CATEGORICAL",
2000+
data_type: Optional[Literal["CATEGORICAL", "TEXT"]] = "CATEGORICAL",
20012001
comment: Optional[str] = None,
20022002
config_id: Optional[str] = None,
20032003
metadata: Optional[Any] = None,
@@ -2022,9 +2022,9 @@ def score_current_trace(
20222022
20232023
Args:
20242024
name: Name of the score (e.g., "user_satisfaction", "overall_quality")
2025-
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL)
2025+
value: Score value (can be numeric for NUMERIC/BOOLEAN types or string for CATEGORICAL/TEXT)
20262026
score_id: Optional custom ID for the score (auto-generated if not provided)
2027-
data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
2027+
data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
20282028
comment: Optional comment or explanation for the score
20292029
config_id: Optional ID of a score config defined in Langfuse
20302030
metadata: Optional metadata to be attached to the score

langfuse/_client/span.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def score(
308308
value: str,
309309
score_id: Optional[str] = None,
310310
data_type: Optional[
311-
Literal[ScoreDataType.CATEGORICAL]
311+
Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT]
312312
] = ScoreDataType.CATEGORICAL,
313313
comment: Optional[str] = None,
314314
config_id: Optional[str] = None,
@@ -335,9 +335,9 @@ def score(
335335
336336
Args:
337337
name: Name of the score (e.g., "relevance", "accuracy")
338-
value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL)
338+
value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT)
339339
score_id: Optional custom ID for the score (auto-generated if not provided)
340-
data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
340+
data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
341341
comment: Optional comment or explanation for the score
342342
config_id: Optional ID of a score config defined in Langfuse
343343
timestamp: Optional timestamp for the score (defaults to current UTC time)
@@ -395,7 +395,7 @@ def score_trace(
395395
value: str,
396396
score_id: Optional[str] = None,
397397
data_type: Optional[
398-
Literal[ScoreDataType.CATEGORICAL]
398+
Literal[ScoreDataType.CATEGORICAL, ScoreDataType.TEXT]
399399
] = ScoreDataType.CATEGORICAL,
400400
comment: Optional[str] = None,
401401
config_id: Optional[str] = None,
@@ -423,9 +423,9 @@ def score_trace(
423423
424424
Args:
425425
name: Name of the score (e.g., "user_satisfaction", "overall_quality")
426-
value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL)
426+
value: Score value (numeric for NUMERIC/BOOLEAN, string for CATEGORICAL/TEXT)
427427
score_id: Optional custom ID for the score (auto-generated if not provided)
428-
data_type: Type of score (NUMERIC, BOOLEAN, or CATEGORICAL)
428+
data_type: Type of score (NUMERIC, BOOLEAN, CATEGORICAL, or TEXT)
429429
comment: Optional comment or explanation for the score
430430
config_id: Optional ID of a score config defined in Langfuse
431431
timestamp: Optional timestamp for the score (defaults to current UTC time)

langfuse/experiment.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717
Union,
1818
)
1919

20-
from langfuse.api import DatasetItem, ScoreDataType
20+
from langfuse.api import DatasetItem
2121
from langfuse.logger import langfuse_logger as logger
22+
from langfuse.types import ExperimentScoreType
2223

2324

2425
class LocalExperimentItem(TypedDict, total=False):
@@ -184,7 +185,7 @@ def __init__(
184185
value: Union[int, float, str, bool],
185186
comment: Optional[str] = None,
186187
metadata: Optional[Dict[str, Any]] = None,
187-
data_type: Optional[ScoreDataType] = None,
188+
data_type: Optional[ExperimentScoreType] = None,
188189
config_id: Optional[str] = None,
189190
):
190191
"""Initialize an Evaluation with the provided data.

langfuse/types.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ def my_evaluator(*, output: str, **kwargs) -> Evaluation:
3535

3636
SpanLevel = Literal["DEBUG", "DEFAULT", "WARNING", "ERROR"]
3737

38-
ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"]
38+
ScoreDataType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN", "TEXT"]
39+
40+
# Text scores are not supported for evals and experiments
41+
ExperimentScoreType = Literal["NUMERIC", "CATEGORICAL", "BOOLEAN"]
3942

4043

4144
class MaskFunction(Protocol):

tests/test_core_sdk.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,64 @@ def test_create_categorical_score():
321321
assert created_score["stringValue"] == "high score"
322322

323323

324+
def test_create_text_score():
325+
langfuse = Langfuse()
326+
api_wrapper = LangfuseAPI()
327+
328+
# Create a span and set trace properties
329+
with langfuse.start_as_current_observation(name="test-span") as span:
330+
with propagate_attributes(
331+
trace_name="this-is-so-great-new",
332+
user_id="test",
333+
metadata={"test": "test"},
334+
):
335+
# Get trace ID for later use
336+
trace_id = span.trace_id
337+
338+
# Ensure data is sent
339+
langfuse.flush()
340+
sleep(2)
341+
342+
# Create a text score
343+
score_id = create_uuid()
344+
langfuse.create_score(
345+
score_id=score_id,
346+
trace_id=trace_id,
347+
name="this-is-a-score",
348+
value="This is a detailed text evaluation of the output quality.",
349+
data_type="TEXT",
350+
)
351+
352+
# Create a generation in the same trace
353+
generation = langfuse.start_observation(
354+
as_type="generation",
355+
name="yet another child",
356+
metadata="test",
357+
trace_context={"trace_id": trace_id},
358+
)
359+
generation.end()
360+
361+
# Ensure data is sent
362+
langfuse.flush()
363+
sleep(2)
364+
365+
# Retrieve and verify
366+
trace = api_wrapper.get_trace(trace_id)
367+
368+
# Find the score we created by name
369+
created_score = next(
370+
(s for s in trace["scores"] if s["name"] == "this-is-a-score"), None
371+
)
372+
assert created_score is not None, "Score not found in trace"
373+
assert created_score["id"] == score_id
374+
assert created_score["dataType"] == "TEXT"
375+
assert created_score["value"] is None
376+
assert (
377+
created_score["stringValue"]
378+
== "This is a detailed text evaluation of the output quality."
379+
)
380+
381+
324382
def test_create_score_with_custom_timestamp():
325383
langfuse = Langfuse()
326384
api_wrapper = LangfuseAPI()

0 commit comments

Comments
 (0)