deepset-ai
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 5 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/deepset_mcp/benchmark/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎src/deepset_mcp/benchmark/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/deepset_mcp/benchmark/runner/__init__.py‎ b/‎src/deepset_mcp/benchmark/runner/__init__.py‎
diff --git a/‎src/deepset_mcp/benchmark/runner/cli.py‎
Lines changed: 206 additions & 0 deletions b/‎src/deepset_mcp/benchmark/runner/cli.py‎
Lines changed: 206 additions & 0 deletions
diff --git a/‎src/deepset_mcp/benchmark/runner/config_loader.py‎
Lines changed: 64 additions & 0 deletions b/‎src/deepset_mcp/benchmark/runner/config_loader.py‎
Lines changed: 64 additions & 0 deletions
@@ -17,7 +17,7 @@ help:
 
 # Install dependencies
 install:
-	uv sync --locked --all-extras --dev
+	uv sync --locked --all-extras --all-groups
 
 # Test commands
 test-unit:
 
@@ -16,6 +16,7 @@ dependencies = [
 
 [project.scripts]
 deepset-mcp = "deepset_mcp.main:main"
+mcp-benchmark = "deepset_mcp.benchmark.runner.cli:cli"
 
 [project.optional-dependencies]
 analysis = [
@@ -24,6 +25,9 @@ analysis = [
     "matplotlib",
     "seaborn"
 ]
+benchmark = [
+    "typer",
+]
 
 [build-system]
 requires = ["hatchling"]
@@ -83,6 +87,7 @@ isort = { combine-as-imports = true, known-first-party = ["deepset_mcp"] }
 
 [tool.ruff.lint.per-file-ignores]
 "test/*" = ["D"]
+"src/deepset_mcp/benchmark/runner/cli.py" = ["B904"] # Allow to raise Typer errors without attaching full stack trace.
 
 [tool.ruff.lint.pydocstyle]
 convention = "pep257"
 
@@ -0,0 +1 @@
+
@@ -0,0 +1,206 @@
+import typer
+
+from deepset_mcp.benchmark.runner.config_loader import (
+    find_all_test_case_paths,
+    load_test_case_by_name,
+    load_test_case_from_path,
+)
+from deepset_mcp.benchmark.runner.models import TestCaseConfig
+from deepset_mcp.benchmark.runner.setup_actions import (
+    setup_all,
+    setup_index,
+    setup_pipeline,
+    setup_test_case,
+)
+
+app = typer.Typer(help="Short commands for listing/creating test cases, pipelines, and indexes.")
+
+
+@app.command("list-cases")
+def list_cases(
+    task_dir: str | None = typer.Option(
+        None,
+        help="Directory where all test-case YAMLs live (`benchmark/tasks/*.yml`).",
+    ),
+) -> None:
+    """List all test-case files (base names) under `task_dir`."""
+    paths = find_all_test_case_paths(task_dir)
+    if not paths:
+        typer.secho(f"No test-case files found in {task_dir}", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    for p in paths:
+        typer.echo(f" • {p.stem}")
+
+
+@app.command("create-case")
+def create_case(
+    test_name: str = typer.Argument(..., help="Test-case name (without .yml)."),
+    workspace_name: str = typer.Option(
+        ..., "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
+    ),
+    api_key: str | None = typer.Option(
+        None,
+        "--api-key",
+        "-k",
+        help="Explicit DP_API_KEY to use (overrides environment).",
+    ),
+    task_dir: str | None = typer.Option(
+        None,
+        help="Directory where test-case YAMLs are stored.",
+    ),
+) -> None:
+    """Load a single test-case by name and create its pipeline + index (if any) in `workspace_name`."""
+    try:
+        test_cfg = load_test_case_by_name(name=test_name, task_dir=task_dir)
+    except FileNotFoundError:
+        typer.secho(f"Test-case '{test_name}' not found under {task_dir}.", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+    except Exception as e:
+        typer.secho(f"Failed to load test-case '{test_name}': {e}", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    typer.secho(f"→ Creating resources for '{test_name}' in '{workspace_name}'…", fg=typer.colors.GREEN)
+    try:
+        setup_test_case(test_cfg=test_cfg, workspace_name=workspace_name, api_key=api_key)
+    except Exception as e:
+        typer.secho(f"✘ Failed to set up '{test_name}': {e}", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    typer.secho(f"✔ '{test_name}' ready.", fg=typer.colors.GREEN)
+
+
+@app.command("create-all")
+def create_all(
+    workspace_name: str = typer.Option(
+        "default", "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
+    ),
+    api_key: str | None = typer.Option(
+        None,
+        "--api-key",
+        "-k",
+        help="Explicit DP_API_KEY to use (overrides environment).",
+    ),
+    concurrency: int = typer.Option(
+        5,
+        "--concurrency",
+        "-c",
+        help="Maximum number of test-cases to set up in parallel.",
+    ),
+    task_dir: str | None = typer.Option(
+        None,
+        help="Directory where test-case YAMLs are stored.",
+    ),
+) -> None:
+    """Load every test-case under `task_dir` and create pipelines + indexes in `workspace_name` in parallel."""
+    paths = find_all_test_case_paths(task_dir)
+    if not paths:
+        typer.secho(f"No test-case files found in {task_dir}", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    # 1) Load all configs
+    test_cfgs: list[TestCaseConfig] = []
+    for p in paths:
+        try:
+            cfg = load_test_case_from_path(path=p)
+            test_cfgs.append(cfg)
+        except Exception as e:
+            typer.secho(f"Skipping '{p.stem}' (load error: {e})", fg=typer.colors.YELLOW)
+
+    if not test_cfgs:
+        typer.secho("No valid test-case configs to create.", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    typer.secho(
+        f"→ Creating {len(test_cfgs)} test-cases in '{workspace_name}' (concurrency={concurrency})…",
+        fg=typer.colors.GREEN,
+    )
+    try:
+        setup_all(
+            test_cfgs=test_cfgs,
+            workspace_name=workspace_name,
+            api_key=api_key,
+            concurrency=concurrency,
+        )
+    except Exception as e:
+        typer.secho(f"✘ Some test-cases failed during creation: {e}", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    typer.secho("✔ All test-cases attempted.", fg=typer.colors.GREEN)
+
+
+@app.command("create-pipe")
+def create_pipe(
+    yaml_path: str | None = typer.Option(None, "--path", "-p", help="Path to a pipeline YAML file."),
+    yaml_content: str | None = typer.Option(
+        None, "--content", "-c", help="Raw YAML string for the pipeline (instead of a file)."
+    ),
+    pipeline_name: str = typer.Option(..., "--name", "-n", help="Name to assign to the new pipeline."),
+    workspace_name: str = typer.Option(..., "--workspace", "-w", help="Workspace in which to create the pipeline."),
+    api_key: str | None = typer.Option(
+        None,
+        "--api-key",
+        "-k",
+        help="Explicit DP_API_KEY to use (overrides environment).",
+    ),
+) -> None:
+    """Create a single pipeline in `workspace_name`."""
+    if (yaml_path and yaml_content) or (not yaml_path and not yaml_content):
+        typer.secho("Error: exactly one of `--path` or `--content` must be provided.", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    try:
+        setup_pipeline(
+            yaml_path=yaml_path,
+            yaml_content=yaml_content,
+            pipeline_name=pipeline_name,
+            workspace_name=workspace_name,
+            api_key=api_key,
+        )
+        typer.secho(f"✔ Pipeline '{pipeline_name}' created in '{workspace_name}'.", fg=typer.colors.GREEN)
+    except Exception as e:
+        typer.secho(f"✘ Failed to create pipeline '{pipeline_name}': {e}", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+
+@app.command("create-index")
+def create_index(
+    yaml_path: str | None = typer.Option(None, "--path", "-p", help="Path to an index YAML file."),
+    yaml_content: str | None = typer.Option(None, "--content", "-c", help="Raw YAML string for the index."),
+    index_name: str = typer.Option(..., "--name", "-n", help="Name to assign to the new index."),
+    workspace_name: str = typer.Option(..., "--workspace", "-w", help="Workspace in which to create the index."),
+    api_key: str | None = typer.Option(
+        None,
+        "--api-key",
+        "-k",
+        help="Explicit DP_API_KEY to use (overrides environment).",
+    ),
+    description: str | None = typer.Option(None, "--desc", help="Optional description for the index."),
+) -> None:
+    """Create a single index in `workspace_name`."""
+    if (yaml_path and yaml_content) or (not yaml_path and not yaml_content):
+        typer.secho("Error: exactly one of `--path` or `--content` must be provided.", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+    try:
+        setup_index(
+            yaml_path=yaml_path,
+            yaml_content=yaml_content,
+            index_name=index_name,
+            workspace_name=workspace_name,
+            api_key=api_key,
+            description=description,
+        )
+        typer.secho(f"✔ Index '{index_name}' created in '{workspace_name}'.", fg=typer.colors.GREEN)
+    except Exception as e:
+        typer.secho(f"✘ Failed to create index '{index_name}': {e}", fg=typer.colors.RED)
+        raise typer.Exit(code=1)
+
+
+def cli() -> None:
+    """Entrypoint for the benchmark CLI."""
+    app()
+
+
+if __name__ == "__main__":
+    cli()
@@ -0,0 +1,64 @@
+import glob
+from pathlib import Path
+
+from pydantic import ValidationError
+
+from deepset_mcp.benchmark.runner.models import TestCaseConfig
+
+
+def _default_task_dir() -> Path:
+    """Return the path to the `benchmark/tasks` directory, resolved relative to this file."""
+    return Path(__file__).parent.parent / "tasks"
+
+
+def find_all_test_case_paths(task_dir: str | Path | None = None) -> list[Path]:
+    """
+    Return a list of all `.yml` or `.yaml` files under `task_dir`.
+
+    If `task_dir` is None, we resolve to `benchmark/tasks` (relative to this file).
+    """
+    if task_dir is None:
+        base = _default_task_dir()
+    else:
+        base = Path(task_dir)
+
+    pattern1 = base / "*.yml"
+    pattern2 = base / "*.yaml"
+    return [Path(p) for p in glob.glob(str(pattern1))] + [Path(p) for p in glob.glob(str(pattern2))]
+
+
+def load_test_case_from_path(path: Path) -> TestCaseConfig:
+    """
+    Read a single test-case YAML at `path` using TestCaseConfig.from_file().
+
+    Raises RuntimeError if validation or loading fails.
+    """
+    try:
+        return TestCaseConfig.from_file(path)
+    except (ValidationError, FileNotFoundError) as e:
+        raise RuntimeError(f"Failed to load {path}: {e}") from e
+
+
+def load_test_case_by_name(name: str, task_dir: str | Path | None = None) -> TestCaseConfig:
+    """
+    Given a test‐case “name” (without extension), locate the corresponding `.yml` or `.yaml`under `task_dir`.
+
+    If `task_dir` is None, defaults to `benchmark/tasks` relative to this file.
+    Returns a loaded TestCaseConfig or raises FileNotFoundError if not found.
+    """
+    if task_dir is None:
+        base = _default_task_dir()
+    else:
+        base = Path(task_dir)
+
+    candidates: list[Path] = []
+    for ext in (".yml", ".yaml"):
+        p = base / f"{name}{ext}"
+        if p.exists():
+            candidates.append(p)
+
+    if not candidates:
+        raise FileNotFoundError(f"No test-case named '{name}' under {base}")
+
+    # If multiple matches exist, pick the first
+    return load_test_case_from_path(candidates[0])