Skip to content

Commit 9b47c0f

Browse files
authored
Feat/benchmark runs (#80)
* feat: WIP * feat: add benchmark run scaffolding
1 parent 8db340c commit 9b47c0f

15 files changed

Lines changed: 1640 additions & 2 deletions

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ help:
1717

1818
# Install dependencies
1919
install:
20-
uv sync --locked --all-extras --dev
20+
uv sync --locked --all-extras --all-groups
2121

2222
# Test commands
2323
test-unit:

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies = [
1616

1717
[project.scripts]
1818
deepset-mcp = "deepset_mcp.main:main"
19+
mcp-benchmark = "deepset_mcp.benchmark.runner.cli:cli"
1920

2021
[project.optional-dependencies]
2122
analysis = [
@@ -24,6 +25,9 @@ analysis = [
2425
"matplotlib",
2526
"seaborn"
2627
]
28+
benchmark = [
29+
"typer",
30+
]
2731

2832
[build-system]
2933
requires = ["hatchling"]
@@ -83,6 +87,7 @@ isort = { combine-as-imports = true, known-first-party = ["deepset_mcp"] }
8387

8488
[tool.ruff.lint.per-file-ignores]
8589
"test/*" = ["D"]
90+
"src/deepset_mcp/benchmark/runner/cli.py" = ["B904"] # Allow to raise Typer errors without attaching full stack trace.
8691

8792
[tool.ruff.lint.pydocstyle]
8893
convention = "pep257"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

src/deepset_mcp/benchmark/runner/__init__.py

Whitespace-only changes.
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
import typer
2+
3+
from deepset_mcp.benchmark.runner.config_loader import (
4+
find_all_test_case_paths,
5+
load_test_case_by_name,
6+
load_test_case_from_path,
7+
)
8+
from deepset_mcp.benchmark.runner.models import TestCaseConfig
9+
from deepset_mcp.benchmark.runner.setup_actions import (
10+
setup_all,
11+
setup_index,
12+
setup_pipeline,
13+
setup_test_case,
14+
)
15+
16+
app = typer.Typer(help="Short commands for listing/creating test cases, pipelines, and indexes.")
17+
18+
19+
@app.command("list-cases")
20+
def list_cases(
21+
task_dir: str | None = typer.Option(
22+
None,
23+
help="Directory where all test-case YAMLs live (`benchmark/tasks/*.yml`).",
24+
),
25+
) -> None:
26+
"""List all test-case files (base names) under `task_dir`."""
27+
paths = find_all_test_case_paths(task_dir)
28+
if not paths:
29+
typer.secho(f"No test-case files found in {task_dir}", fg=typer.colors.RED)
30+
raise typer.Exit(code=1)
31+
32+
for p in paths:
33+
typer.echo(f" • {p.stem}")
34+
35+
36+
@app.command("create-case")
37+
def create_case(
38+
test_name: str = typer.Argument(..., help="Test-case name (without .yml)."),
39+
workspace_name: str = typer.Option(
40+
..., "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
41+
),
42+
api_key: str | None = typer.Option(
43+
None,
44+
"--api-key",
45+
"-k",
46+
help="Explicit DP_API_KEY to use (overrides environment).",
47+
),
48+
task_dir: str | None = typer.Option(
49+
None,
50+
help="Directory where test-case YAMLs are stored.",
51+
),
52+
) -> None:
53+
"""Load a single test-case by name and create its pipeline + index (if any) in `workspace_name`."""
54+
try:
55+
test_cfg = load_test_case_by_name(name=test_name, task_dir=task_dir)
56+
except FileNotFoundError:
57+
typer.secho(f"Test-case '{test_name}' not found under {task_dir}.", fg=typer.colors.RED)
58+
raise typer.Exit(code=1)
59+
except Exception as e:
60+
typer.secho(f"Failed to load test-case '{test_name}': {e}", fg=typer.colors.RED)
61+
raise typer.Exit(code=1)
62+
63+
typer.secho(f"→ Creating resources for '{test_name}' in '{workspace_name}'…", fg=typer.colors.GREEN)
64+
try:
65+
setup_test_case(test_cfg=test_cfg, workspace_name=workspace_name, api_key=api_key)
66+
except Exception as e:
67+
typer.secho(f"✘ Failed to set up '{test_name}': {e}", fg=typer.colors.RED)
68+
raise typer.Exit(code=1)
69+
70+
typer.secho(f"✔ '{test_name}' ready.", fg=typer.colors.GREEN)
71+
72+
73+
@app.command("create-all")
74+
def create_all(
75+
workspace_name: str = typer.Option(
76+
"default", "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
77+
),
78+
api_key: str | None = typer.Option(
79+
None,
80+
"--api-key",
81+
"-k",
82+
help="Explicit DP_API_KEY to use (overrides environment).",
83+
),
84+
concurrency: int = typer.Option(
85+
5,
86+
"--concurrency",
87+
"-c",
88+
help="Maximum number of test-cases to set up in parallel.",
89+
),
90+
task_dir: str | None = typer.Option(
91+
None,
92+
help="Directory where test-case YAMLs are stored.",
93+
),
94+
) -> None:
95+
"""Load every test-case under `task_dir` and create pipelines + indexes in `workspace_name` in parallel."""
96+
paths = find_all_test_case_paths(task_dir)
97+
if not paths:
98+
typer.secho(f"No test-case files found in {task_dir}", fg=typer.colors.RED)
99+
raise typer.Exit(code=1)
100+
101+
# 1) Load all configs
102+
test_cfgs: list[TestCaseConfig] = []
103+
for p in paths:
104+
try:
105+
cfg = load_test_case_from_path(path=p)
106+
test_cfgs.append(cfg)
107+
except Exception as e:
108+
typer.secho(f"Skipping '{p.stem}' (load error: {e})", fg=typer.colors.YELLOW)
109+
110+
if not test_cfgs:
111+
typer.secho("No valid test-case configs to create.", fg=typer.colors.RED)
112+
raise typer.Exit(code=1)
113+
114+
typer.secho(
115+
f"→ Creating {len(test_cfgs)} test-cases in '{workspace_name}' (concurrency={concurrency})…",
116+
fg=typer.colors.GREEN,
117+
)
118+
try:
119+
setup_all(
120+
test_cfgs=test_cfgs,
121+
workspace_name=workspace_name,
122+
api_key=api_key,
123+
concurrency=concurrency,
124+
)
125+
except Exception as e:
126+
typer.secho(f"✘ Some test-cases failed during creation: {e}", fg=typer.colors.RED)
127+
raise typer.Exit(code=1)
128+
129+
typer.secho("✔ All test-cases attempted.", fg=typer.colors.GREEN)
130+
131+
132+
@app.command("create-pipe")
133+
def create_pipe(
134+
yaml_path: str | None = typer.Option(None, "--path", "-p", help="Path to a pipeline YAML file."),
135+
yaml_content: str | None = typer.Option(
136+
None, "--content", "-c", help="Raw YAML string for the pipeline (instead of a file)."
137+
),
138+
pipeline_name: str = typer.Option(..., "--name", "-n", help="Name to assign to the new pipeline."),
139+
workspace_name: str = typer.Option(..., "--workspace", "-w", help="Workspace in which to create the pipeline."),
140+
api_key: str | None = typer.Option(
141+
None,
142+
"--api-key",
143+
"-k",
144+
help="Explicit DP_API_KEY to use (overrides environment).",
145+
),
146+
) -> None:
147+
"""Create a single pipeline in `workspace_name`."""
148+
if (yaml_path and yaml_content) or (not yaml_path and not yaml_content):
149+
typer.secho("Error: exactly one of `--path` or `--content` must be provided.", fg=typer.colors.RED)
150+
raise typer.Exit(code=1)
151+
152+
try:
153+
setup_pipeline(
154+
yaml_path=yaml_path,
155+
yaml_content=yaml_content,
156+
pipeline_name=pipeline_name,
157+
workspace_name=workspace_name,
158+
api_key=api_key,
159+
)
160+
typer.secho(f"✔ Pipeline '{pipeline_name}' created in '{workspace_name}'.", fg=typer.colors.GREEN)
161+
except Exception as e:
162+
typer.secho(f"✘ Failed to create pipeline '{pipeline_name}': {e}", fg=typer.colors.RED)
163+
raise typer.Exit(code=1)
164+
165+
166+
@app.command("create-index")
167+
def create_index(
168+
yaml_path: str | None = typer.Option(None, "--path", "-p", help="Path to an index YAML file."),
169+
yaml_content: str | None = typer.Option(None, "--content", "-c", help="Raw YAML string for the index."),
170+
index_name: str = typer.Option(..., "--name", "-n", help="Name to assign to the new index."),
171+
workspace_name: str = typer.Option(..., "--workspace", "-w", help="Workspace in which to create the index."),
172+
api_key: str | None = typer.Option(
173+
None,
174+
"--api-key",
175+
"-k",
176+
help="Explicit DP_API_KEY to use (overrides environment).",
177+
),
178+
description: str | None = typer.Option(None, "--desc", help="Optional description for the index."),
179+
) -> None:
180+
"""Create a single index in `workspace_name`."""
181+
if (yaml_path and yaml_content) or (not yaml_path and not yaml_content):
182+
typer.secho("Error: exactly one of `--path` or `--content` must be provided.", fg=typer.colors.RED)
183+
raise typer.Exit(code=1)
184+
185+
try:
186+
setup_index(
187+
yaml_path=yaml_path,
188+
yaml_content=yaml_content,
189+
index_name=index_name,
190+
workspace_name=workspace_name,
191+
api_key=api_key,
192+
description=description,
193+
)
194+
typer.secho(f"✔ Index '{index_name}' created in '{workspace_name}'.", fg=typer.colors.GREEN)
195+
except Exception as e:
196+
typer.secho(f"✘ Failed to create index '{index_name}': {e}", fg=typer.colors.RED)
197+
raise typer.Exit(code=1)
198+
199+
200+
def cli() -> None:
201+
"""Entrypoint for the benchmark CLI."""
202+
app()
203+
204+
205+
if __name__ == "__main__":
206+
cli()
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import glob
2+
from pathlib import Path
3+
4+
from pydantic import ValidationError
5+
6+
from deepset_mcp.benchmark.runner.models import TestCaseConfig
7+
8+
9+
def _default_task_dir() -> Path:
10+
"""Return the path to the `benchmark/tasks` directory, resolved relative to this file."""
11+
return Path(__file__).parent.parent / "tasks"
12+
13+
14+
def find_all_test_case_paths(task_dir: str | Path | None = None) -> list[Path]:
15+
"""
16+
Return a list of all `.yml` or `.yaml` files under `task_dir`.
17+
18+
If `task_dir` is None, we resolve to `benchmark/tasks` (relative to this file).
19+
"""
20+
if task_dir is None:
21+
base = _default_task_dir()
22+
else:
23+
base = Path(task_dir)
24+
25+
pattern1 = base / "*.yml"
26+
pattern2 = base / "*.yaml"
27+
return [Path(p) for p in glob.glob(str(pattern1))] + [Path(p) for p in glob.glob(str(pattern2))]
28+
29+
30+
def load_test_case_from_path(path: Path) -> TestCaseConfig:
31+
"""
32+
Read a single test-case YAML at `path` using TestCaseConfig.from_file().
33+
34+
Raises RuntimeError if validation or loading fails.
35+
"""
36+
try:
37+
return TestCaseConfig.from_file(path)
38+
except (ValidationError, FileNotFoundError) as e:
39+
raise RuntimeError(f"Failed to load {path}: {e}") from e
40+
41+
42+
def load_test_case_by_name(name: str, task_dir: str | Path | None = None) -> TestCaseConfig:
43+
"""
44+
Given a test‐case “name” (without extension), locate the corresponding `.yml` or `.yaml`under `task_dir`.
45+
46+
If `task_dir` is None, defaults to `benchmark/tasks` relative to this file.
47+
Returns a loaded TestCaseConfig or raises FileNotFoundError if not found.
48+
"""
49+
if task_dir is None:
50+
base = _default_task_dir()
51+
else:
52+
base = Path(task_dir)
53+
54+
candidates: list[Path] = []
55+
for ext in (".yml", ".yaml"):
56+
p = base / f"{name}{ext}"
57+
if p.exists():
58+
candidates.append(p)
59+
60+
if not candidates:
61+
raise FileNotFoundError(f"No test-case named '{name}' under {base}")
62+
63+
# If multiple matches exist, pick the first
64+
return load_test_case_from_path(candidates[0])

0 commit comments

Comments
 (0)