lightspeed-core
diff --git a/‎README.md‎
Lines changed: 3 additions & 0 deletions b/‎README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎config/system.yaml‎
Lines changed: 4 additions & 0 deletions b/‎config/system.yaml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 10 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎requirements-all-extras.txt‎
Lines changed: 14 additions & 2 deletions b/‎requirements-all-extras.txt‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎requirements-local-embeddings.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements-local-embeddings.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements-nlp-metrics.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements-nlp-metrics.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/lightspeed_evaluation/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎src/lightspeed_evaluation/__init__.py‎
Lines changed: 5 additions & 0 deletions
@@ -491,6 +491,9 @@ export AZURE_API_BASE="https://your-resource.openai.azure.com/"
 export API_KEY="your-api-endpoint-key"
 ```
 
+#### Optional: Langfuse
+After a run, you can send one trace with per-metric scores to [Langfuse](https://langfuse.com/). Install `lightspeed-evaluation[langfuse]`, set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` (and `LANGFUSE_HOST` if not using the default cloud), then use `lightspeed-eval --langfuse` or set `LIGHTSPEED_USE_LANGFUSE=1`. From Python, pass `on_complete=build_langfuse_on_complete_callback()` (from `lightspeed_evaluation.integrations.langfuse_reporter`) to `evaluate()`.
+
 ## 📈 Output & Visualization
 
 ### Generated Reports
 
@@ -291,6 +291,10 @@ storage:
   #   database: "./eval_results.db"
   #   table_name: "evaluation_results"
 
+  # Langfuse backend (optional) - stores results incrementally to Langfuse
+  # - type: "langfuse"
+  #   host: "https://langfuse-web-ddis-asteroid--langfuse.apps.mpp-e1-prod.9e4s.p1.openshiftapps.com/"
+
 # Visualization settings
 visualization:
   figsize: [12, 8]            # Graph size (width, height)
 
@@ -52,8 +52,18 @@ nlp-metrics = [
     "rapidfuzz>=3.0.0,<=3.14.3",     # Required for semantic_similarity_distance
 ]
 
+# Optional Langfuse reporting. Uses the v2 SDK.
+#   pip install 'lightspeed-evaluation[langfuse]'
+# or
+#   uv sync --extra langfuse
+langfuse = [
+    "langfuse>=2.0.0,<3.0.0",
+]
+
 [dependency-groups]
 dev = [
+    # Matches [project.optional-dependencies] langfuse — for typecheck/tests.
+    "langfuse>=2.0.0,<3.0.0",
     "bandit>=1.7.0,<=1.9.2",
     "black==25.1.0",
     "mypy>=1.15.0,<=1.17.1",
 
@@ -20,6 +20,7 @@ annotated-types==0.7.0
 anyio==4.13.0
     # via
     #   httpx
+    #   langfuse
     #   openai
 appdirs==1.4.4
     # via ragas
@@ -29,7 +30,9 @@ attrs==26.1.0
     #   jsonschema
     #   referencing
 backoff==2.2.1
-    # via posthog
+    # via
+    #   langfuse
+    #   posthog
 certifi==2026.2.25
     # via
     #   httpcore
@@ -134,6 +137,7 @@ httpcore==1.0.9
 httpx==0.28.1
     # via
     #   huggingface-hub
+    #   langfuse
     #   langgraph-sdk
     #   langsmith
     #   lightspeed-evaluation
@@ -152,6 +156,7 @@ idna==3.11
     # via
     #   anyio
     #   httpx
+    #   langfuse
     #   requests
     #   yarl
 importlib-metadata==8.7.1
@@ -215,6 +220,8 @@ langchain-openai==1.1.12
     # via ragas
 langchain-text-splitters==1.1.1
     # via langchain-classic
+langfuse==2.60.10
+    # via lightspeed-evaluation
 langgraph==1.1.6
     # via langchain
 langgraph-checkpoint==4.0.1
@@ -305,11 +312,12 @@ orjson==3.11.8
     #   langsmith
 ormsgpack==1.12.2
     # via langgraph-checkpoint
-packaging==26.0
+packaging==24.2
     # via
     #   datasets
     #   huggingface-hub
     #   langchain-core
+    #   langfuse
     #   langsmith
     #   marshmallow
     #   matplotlib
@@ -365,6 +373,7 @@ pydantic==2.11.7
     #   langchain-classic
     #   langchain-core
     #   langchain-google-genai
+    #   langfuse
     #   langgraph
     #   langsmith
     #   lightspeed-evaluation
@@ -448,6 +457,7 @@ requests==2.33.1
     #   instructor
     #   langchain-classic
     #   langchain-community
+    #   langfuse
     #   langsmith
     #   posthog
     #   requests-toolbelt
@@ -587,6 +597,8 @@ uuid-utils==0.14.1
     #   langsmith
 wheel==0.46.3
     # via deepeval
+wrapt==1.17.3
+    # via langfuse
 xxhash==3.6.0
     # via
     #   datasets
 
@@ -293,7 +293,7 @@ orjson==3.11.8
     #   langsmith
 ormsgpack==1.12.2
     # via langgraph-checkpoint
-packaging==26.0
+packaging==24.2
     # via
     #   datasets
     #   huggingface-hub
 
@@ -291,7 +291,7 @@ orjson==3.11.8
     #   langsmith
 ormsgpack==1.12.2
     # via langgraph-checkpoint
-packaging==26.0
+packaging==24.2
     # via
     #   datasets
     #   huggingface-hub
 
@@ -279,7 +279,7 @@ orjson==3.11.8
     #   langsmith
 ormsgpack==1.12.2
     # via langgraph-checkpoint
-packaging==26.0
+packaging==24.2
     # via
     #   datasets
     #   huggingface-hub
 
@@ -26,6 +26,7 @@
         APIConfig,
         EvaluationData,
         EvaluationResult,
+        EvaluationRunContext,
         LLMConfig,
         LoggingConfig,
         TurnData,
@@ -80,6 +81,10 @@
     "EvaluationData": ("lightspeed_evaluation.core.models", "EvaluationData"),
     "TurnData": ("lightspeed_evaluation.core.models", "TurnData"),
     "EvaluationResult": ("lightspeed_evaluation.core.models", "EvaluationResult"),
+    "EvaluationRunContext": (
+        "lightspeed_evaluation.core.models",
+        "EvaluationRunContext",
+    ),
     "EvaluationSummary": (
         "lightspeed_evaluation.core.models.summary",
         "EvaluationSummary",