Anxhela21
diff --git a/‎docs/EVALUATION_GUIDE.md‎
Lines changed: 133 additions & 12 deletions b/‎docs/EVALUATION_GUIDE.md‎
Lines changed: 133 additions & 12 deletions
diff --git a/‎src/lightspeed_evaluation/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎src/lightspeed_evaluation/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/lightspeed_evaluation/api.py‎
Lines changed: 110 additions & 0 deletions b/‎src/lightspeed_evaluation/api.py‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎src/lightspeed_evaluation/core/system/loader.py‎
Lines changed: 46 additions & 0 deletions b/‎src/lightspeed_evaluation/core/system/loader.py‎
Lines changed: 46 additions & 0 deletions
@@ -21,16 +21,17 @@
 7. [Step-by-Step Setup](#7-step-by-step-setup)
 8. [Configuration Guide](#8-configuration-guide)
 9. [Running Evaluations](#9-running-evaluations)
-10. [Understanding Results](#10-understanding-results)
+10. [Programmatic API](#10-programmatic-api)
+11. [Understanding Results](#11-understanding-results)
 
 ### Part 4: Real-World Application
-11. [Common Use Cases](#11-common-use-cases)
-12. [Best Practices](#12-best-practices)
-13. [Troubleshooting](#13-troubleshooting)
+12. [Common Use Cases](#12-common-use-cases)
+13. [Best Practices](#13-best-practices)
+14. [Troubleshooting](#14-troubleshooting)
 
 ### Part 5: Reference Materials
-14. [Quick Reference Tables](#14-quick-reference-tables)
-15. [Resources & Links](#15-resources--links)
+15. [Quick Reference Tables](#15-quick-reference-tables)
+16. [Resources & Links](#16-resources--links)
 
 ---
 
@@ -856,7 +857,127 @@ lightspeed-eval \
 
 ---
 
-## 10. Understanding Results
+## 10. Programmatic API
+
+In addition to the CLI, the framework can be used as a Python library. This is useful when you want to integrate evaluations into scripts, notebooks, CI pipelines, or custom tooling—without dealing with YAML files or command-line arguments.
+
+### Available Functions
+
+| Function | Purpose |
+|----------|---------|
+| `evaluate(config, data)` | Evaluate a list of conversations |
+| `evaluate_conversation(config, data)` | Evaluate a single conversation |
+| `evaluate_turn(config, turn)` | Evaluate a single turn |
+
+All three functions return `list[EvaluationResult]`.
+
+### Basic Example
+
+```python
+from lightspeed_evaluation import (
+    evaluate,
+    EvaluationData,
+    LLMConfig,
+    SystemConfig,
+    TurnData,
+)
+
+# 1. Build configuration
+config = SystemConfig(
+    llm=LLMConfig(provider="openai", model="gpt-4o-mini"),
+)
+
+# 2. Build evaluation data
+data = EvaluationData(
+    conversation_group_id="my_eval",
+    turns=[
+        TurnData(
+            turn_id="t1",
+            query="What is OpenShift?",
+            response="OpenShift is a Kubernetes-based container platform.",
+            expected_response="OpenShift is Red Hat's Kubernetes platform.",
+            turn_metrics=["ragas:response_relevancy"],
+        ),
+    ],
+)
+
+# 3. Run evaluation
+results = evaluate(config, [data])
+
+# 4. Inspect results
+for r in results:
+    print(f"{r.metric_identifier}: {r.result} (score={r.score})")
+```
+
+### Evaluating a Single Turn
+
+Use `evaluate_turn()` when you want to evaluate one question-answer pair. You can override metrics without modifying the original turn object:
+
+```python
+from lightspeed_evaluation import evaluate_turn, SystemConfig, TurnData
+
+config = SystemConfig()
+turn = TurnData(
+    turn_id="t1",
+    query="What is a pod?",
+    response="A pod is the smallest deployable unit in Kubernetes.",
+)
+
+results = evaluate_turn(
+    config,
+    turn,
+    metrics=["ragas:response_relevancy", "ragas:faithfulness"],
+)
+```
+
+### Evaluating a Single Conversation
+
+Use `evaluate_conversation()` when you have a single `EvaluationData` object:
+
+```python
+from lightspeed_evaluation import evaluate_conversation, EvaluationData, SystemConfig, TurnData
+
+config = SystemConfig()
+data = EvaluationData(
+    conversation_group_id="support_conv",
+    turns=[
+        TurnData(turn_id="t1", query="Hello", response="Hi! How can I help?"),
+        TurnData(turn_id="t2", query="What is OCP?", response="OCP is OpenShift."),
+    ],
+    conversation_metrics=["deepeval:knowledge_retention"],
+)
+
+results = evaluate_conversation(config, data)
+```
+
+### Working with Results
+
+The `evaluate()` functions return `list[EvaluationResult]`. Each result contains:
+
+| Field | Description |
+|-------|-------------|
+| `result` | Status: `PASS`, `FAIL`, `ERROR`, or `SKIPPED` |
+| `score` | Numeric score between 0.0 and 1.0 |
+| `threshold` | Pass/fail threshold used |
+| `reason` | Explanation from the judge LLM |
+| `metric_identifier` | Which metric produced this result |
+| `turn_id` | Turn ID (for turn-level metrics) |
+| `conversation_group_id` | Conversation group ID |
+
+No files are generated by default—file output is the caller's responsibility. If you need CSV/JSON reports, use the `OutputHandler` separately.
+
+### CLI vs Programmatic API
+
+| Aspect | CLI (`lightspeed-eval`) | Programmatic API |
+|--------|------------------------|------------------|
+| Configuration | YAML files | Python objects (`SystemConfig`) |
+| Input data | YAML files | Python objects (`EvaluationData`) |
+| Output | CSV, JSON, TXT files + graphs | `list[EvaluationResult]` in memory |
+| Use case | Standalone runs, CI jobs | Library integration, notebooks, scripts |
+
+---
+
+## 11. Understanding Results
 
 ### Output Files
 
@@ -956,7 +1077,7 @@ ragas:faithfulness:
 
 # Part 4: Real-World Application
 
-## 11. Common Use Cases
+## 12. Common Use Cases
 
 ### Use Case 1: Quality Assurance for Customer Support Bot
 
@@ -1132,7 +1253,7 @@ exit $?
 
 ---
 
-## 12. Best Practices
+## 13. Best Practices
 
 ### 1. Start Small, Scale Up
 
@@ -1257,7 +1378,7 @@ llm:
 
 ---
 
-## 13. Troubleshooting
+## 14. Troubleshooting
 
 ### Issue 1: "No API key found"
 
@@ -1468,7 +1589,7 @@ lightspeed-eval --eval-data config/eval_batch2.yaml
 
 # Part 5: Reference Materials
 
-## 14. Quick Reference Tables
+## 15. Quick Reference Tables
 
 ### All Metrics at a Glance
 
@@ -1564,7 +1685,7 @@ uv run python script/run_multi_provider_eval.py \
 ---
 
 
-## 15. Resources & Links
+## 16. Resources & Links
 
 ### Official Framework Documentation
 
 
@@ -12,6 +12,7 @@
 
 if TYPE_CHECKING:
     # ruff: noqa: F401
+    from lightspeed_evaluation.api import evaluate, evaluate_conversation, evaluate_turn
     from lightspeed_evaluation.core.api import APIClient
     from lightspeed_evaluation.core.llm import LLMManager
     from lightspeed_evaluation.core.models import (
@@ -42,6 +43,10 @@
 __version__ = "0.5.0"
 
 _LAZY_IMPORTS = {
+    # Programmatic API
+    "evaluate": ("lightspeed_evaluation.api", "evaluate"),
+    "evaluate_conversation": ("lightspeed_evaluation.api", "evaluate_conversation"),
+    "evaluate_turn": ("lightspeed_evaluation.api", "evaluate_turn"),
     # Main pipeline
     "EvaluationPipeline": (
         "lightspeed_evaluation.pipeline.evaluation",
 
@@ -0,0 +1,110 @@
+"""Programmatic API for the LightSpeed Evaluation Framework.
+
+Provides clean public functions for using the framework as a Python library,
+without requiring YAML files or CLI argument parsing.
+
+Example usage::
+
+    from lightspeed_evaluation import evaluate, SystemConfig, EvaluationData, TurnData
+
+    config = SystemConfig(llm=LLMConfig(provider="openai", model="gpt-4o-mini"))
+    data = EvaluationData(
+        conversation_group_id="my_eval",
+        turns=[TurnData(turn_id="t1", query="What is OCP?", response="...")],
+    )
+    results = evaluate(config, [data])
+"""
+
+from typing import Optional
+
+from lightspeed_evaluation.core.models import (
+    EvaluationData,
+    EvaluationResult,
+    SystemConfig,
+    TurnData,
+)
+from lightspeed_evaluation.core.system import ConfigLoader
+from lightspeed_evaluation.pipeline.evaluation import EvaluationPipeline
+
+
+def evaluate(
+    config: SystemConfig,
+    data: list[EvaluationData],
+    output_dir: Optional[str] = None,
+) -> list[EvaluationResult]:
+    """Run evaluation on the provided data using the given configuration.
+
+    Creates a fully-initialized pipeline from the ``SystemConfig``, runs
+    evaluation on every conversation in *data*, and returns the raw results.
+    No reports are generated — file I/O is the caller's responsibility.
+
+    Args:
+        config: A pre-built SystemConfig instance.
+        data: List of EvaluationData conversations to evaluate.
+        output_dir: Optional override for the output directory.
+
+    Returns:
+        List of EvaluationResult objects (one per metric per turn/conversation).
+    """
+    if not data:
+        return []
+
+    loader = ConfigLoader.from_config(config)
+    pipeline = EvaluationPipeline(loader, output_dir)
+    try:
+        return pipeline.run_evaluation(data)
+    finally:
+        pipeline.close()
+
+
+def evaluate_conversation(
+    config: SystemConfig,
+    data: EvaluationData,
+    output_dir: Optional[str] = None,
+) -> list[EvaluationResult]:
+    """Evaluate a single conversation group.
+
+    Convenience wrapper around :func:`evaluate` that wraps *data* in a list.
+
+    Args:
+        config: A pre-built SystemConfig instance.
+        data: A single EvaluationData conversation to evaluate.
+        output_dir: Optional override for the output directory.
+
+    Returns:
+        List of EvaluationResult objects.
+    """
+    return evaluate(config, [data], output_dir=output_dir)
+
+
+def evaluate_turn(
+    config: SystemConfig,
+    turn: TurnData,
+    metrics: Optional[list[str]] = None,
+    conversation_group_id: str = "programmatic_eval",
+    output_dir: Optional[str] = None,
+) -> list[EvaluationResult]:
+    """Evaluate a single turn.
+
+    Wraps the turn in an :class:`EvaluationData` instance and delegates to
+    :func:`evaluate`. If *metrics* is provided, a copy of the turn is created
+    with updated ``turn_metrics``.
+
+    Args:
+        config: A pre-built SystemConfig instance.
+        turn: The TurnData to evaluate.
+        metrics: Optional list of metric identifiers to override turn_metrics.
+        conversation_group_id: Conversation group ID for the wrapper.
+        output_dir: Optional override for the output directory.
+
+    Returns:
+        List of EvaluationResult objects.
+    """
+    if metrics is not None:
+        turn = TurnData.model_validate({**turn.model_dump(), "turn_metrics": metrics})
+
+    data = EvaluationData(
+        conversation_group_id=conversation_group_id,
+        turns=[turn],
+    )
+    return evaluate(config, [data], output_dir=output_dir)
@@ -76,6 +76,52 @@ def __init__(self) -> None:
         self.evaluation_data: Optional[list[EvaluationData]] = None
         self.logger: Optional[logging.Logger] = None
 
+    @classmethod
+    def from_config(cls, system_config: SystemConfig) -> "ConfigLoader":
+        """Create a fully-initialized ConfigLoader from an existing SystemConfig.
+
+        This allows programmatic use of the evaluation pipeline without
+        loading configuration from a YAML file.
+
+        Args:
+            system_config: A pre-built SystemConfig instance.
+
+        Returns:
+            A fully-initialized ConfigLoader ready for pipeline use.
+        """
+        loader = cls()
+        loader.system_config = system_config
+
+        config_data = cls._build_config_data_from_system_config(system_config)
+        setup_environment_variables(config_data)
+        loader.logger = setup_logging(system_config.logging)
+
+        populate_metric_mappings(system_config)
+
+        return loader
+
+    @staticmethod
+    def _build_config_data_from_system_config(
+        system_config: SystemConfig,
+    ) -> dict[str, Any]:
+        """Build the minimal config dict needed by setup_environment_variables.
+
+        Extracts SSL-related fields so that ``create_ssl_certifi_bundle``
+        can discover custom certificate paths.
+
+        Args:
+            system_config: The SystemConfig to extract SSL fields from.
+
+        Returns:
+            A dict suitable for ``setup_environment_variables``.
+        """
+        return {
+            "llm": {
+                "ssl_verify": system_config.llm.ssl_verify,
+                "ssl_cert_file": system_config.llm.ssl_cert_file,
+            },
+        }
+
     def load_system_config(self, config_path: str) -> SystemConfig:
         """Load system configuration from YAML file."""
         try: