Skip to content

Commit e020b82

Browse files
committed
Added langfuse compatibility with the lightspeed-eval
1 parent fd9ee3a commit e020b82

27 files changed

Lines changed: 1279 additions & 62 deletions

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,9 @@ export AZURE_API_BASE="https://your-resource.openai.azure.com/"
491491
export API_KEY="your-api-endpoint-key"
492492
```
493493

494+
#### Optional: Langfuse
495+
After a run, you can send one trace with per-metric scores to [Langfuse](https://langfuse.com/). Install `lightspeed-evaluation[langfuse]`, set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` (and `LANGFUSE_HOST` if not using the default cloud), then use `lightspeed-eval --langfuse` or set `LIGHTSPEED_USE_LANGFUSE=1`. From Python, pass `on_complete=build_langfuse_on_complete_callback()` (from `lightspeed_evaluation.integrations.langfuse_reporter`) to `evaluate()`.
496+
494497
## 📈 Output & Visualization
495498

496499
### Generated Reports

config/system.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,10 @@ storage:
291291
# database: "./eval_results.db"
292292
# table_name: "evaluation_results"
293293

294+
# Langfuse backend (optional) - stores results incrementally to Langfuse
295+
# - type: "langfuse"
296+
# host: "https://langfuse-web-ddis-asteroid--langfuse.apps.mpp-e1-prod.9e4s.p1.openshiftapps.com/"
297+
294298
# Visualization settings
295299
visualization:
296300
figsize: [12, 8] # Graph size (width, height)

pyproject.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,18 @@ nlp-metrics = [
5252
"rapidfuzz>=3.0.0,<=3.14.3", # Required for semantic_similarity_distance
5353
]
5454

55+
# Optional Langfuse reporting. Uses the v2 SDK.
56+
# pip install 'lightspeed-evaluation[langfuse]'
57+
# or
58+
# uv sync --extra langfuse
59+
langfuse = [
60+
"langfuse>=2.0.0,<3.0.0",
61+
]
62+
5563
[dependency-groups]
5664
dev = [
65+
# Matches [project.optional-dependencies] langfuse — for typecheck/tests.
66+
"langfuse>=2.0.0,<3.0.0",
5767
"bandit>=1.7.0,<=1.9.2",
5868
"black==25.1.0",
5969
"mypy>=1.15.0,<=1.17.1",

requirements-all-extras.txt

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ annotated-types==0.7.0
2020
anyio==4.13.0
2121
# via
2222
# httpx
23+
# langfuse
2324
# openai
2425
appdirs==1.4.4
2526
# via ragas
@@ -29,7 +30,9 @@ attrs==26.1.0
2930
# jsonschema
3031
# referencing
3132
backoff==2.2.1
32-
# via posthog
33+
# via
34+
# langfuse
35+
# posthog
3336
certifi==2026.2.25
3437
# via
3538
# httpcore
@@ -134,6 +137,7 @@ httpcore==1.0.9
134137
httpx==0.28.1
135138
# via
136139
# huggingface-hub
140+
# langfuse
137141
# langgraph-sdk
138142
# langsmith
139143
# lightspeed-evaluation
@@ -152,6 +156,7 @@ idna==3.11
152156
# via
153157
# anyio
154158
# httpx
159+
# langfuse
155160
# requests
156161
# yarl
157162
importlib-metadata==8.7.1
@@ -215,6 +220,8 @@ langchain-openai==1.1.12
215220
# via ragas
216221
langchain-text-splitters==1.1.1
217222
# via langchain-classic
223+
langfuse==2.60.10
224+
# via lightspeed-evaluation
218225
langgraph==1.1.6
219226
# via langchain
220227
langgraph-checkpoint==4.0.1
@@ -305,11 +312,12 @@ orjson==3.11.8
305312
# langsmith
306313
ormsgpack==1.12.2
307314
# via langgraph-checkpoint
308-
packaging==26.0
315+
packaging==24.2
309316
# via
310317
# datasets
311318
# huggingface-hub
312319
# langchain-core
320+
# langfuse
313321
# langsmith
314322
# marshmallow
315323
# matplotlib
@@ -365,6 +373,7 @@ pydantic==2.11.7
365373
# langchain-classic
366374
# langchain-core
367375
# langchain-google-genai
376+
# langfuse
368377
# langgraph
369378
# langsmith
370379
# lightspeed-evaluation
@@ -448,6 +457,7 @@ requests==2.33.1
448457
# instructor
449458
# langchain-classic
450459
# langchain-community
460+
# langfuse
451461
# langsmith
452462
# posthog
453463
# requests-toolbelt
@@ -587,6 +597,8 @@ uuid-utils==0.14.1
587597
# langsmith
588598
wheel==0.46.3
589599
# via deepeval
600+
wrapt==1.17.3
601+
# via langfuse
590602
xxhash==3.6.0
591603
# via
592604
# datasets

requirements-local-embeddings.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ orjson==3.11.8
293293
# langsmith
294294
ormsgpack==1.12.2
295295
# via langgraph-checkpoint
296-
packaging==26.0
296+
packaging==24.2
297297
# via
298298
# datasets
299299
# huggingface-hub

requirements-nlp-metrics.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ orjson==3.11.8
291291
# langsmith
292292
ormsgpack==1.12.2
293293
# via langgraph-checkpoint
294-
packaging==26.0
294+
packaging==24.2
295295
# via
296296
# datasets
297297
# huggingface-hub

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ orjson==3.11.8
279279
# langsmith
280280
ormsgpack==1.12.2
281281
# via langgraph-checkpoint
282-
packaging==26.0
282+
packaging==24.2
283283
# via
284284
# datasets
285285
# huggingface-hub

src/lightspeed_evaluation/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
APIConfig,
2727
EvaluationData,
2828
EvaluationResult,
29+
EvaluationRunContext,
2930
LLMConfig,
3031
LoggingConfig,
3132
TurnData,
@@ -80,6 +81,10 @@
8081
"EvaluationData": ("lightspeed_evaluation.core.models", "EvaluationData"),
8182
"TurnData": ("lightspeed_evaluation.core.models", "TurnData"),
8283
"EvaluationResult": ("lightspeed_evaluation.core.models", "EvaluationResult"),
84+
"EvaluationRunContext": (
85+
"lightspeed_evaluation.core.models",
86+
"EvaluationRunContext",
87+
),
8388
"EvaluationSummary": (
8489
"lightspeed_evaluation.core.models.summary",
8590
"EvaluationSummary",

0 commit comments

Comments
 (0)