From 21aa6c74db22e9ab1f184d905daac30a85ddec19 Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Fri, 15 May 2026 12:23:33 -0400 Subject: [PATCH] [WIP] Ship BR schema as standalone llmdbenchmark-schema package The Benchmark Report (BR) schema types are useful to external workload generators that produce BR reports (e.g. inference-perf) and to external analysis pipelines that consume them. Today, taking a dependency on these types means depending on the full llmdbenchmark distribution, which pulls in kubernetes/transformers/huggingface_hub/etc. External tools typically have none of these in their dependency closure, so they either vendor the schema files or skip the typed API entirely. This change splits the schema into its own pip-installable distribution (llmdbenchmark-schema) sitting in the same repo, with the same maintainers, published from the same CI workflow. The full llmdbenchmark package depends on it and re-exports the types under llmdbenchmark.analysis.benchmark_report so existing callers do not need to change. Layout: llmdbenchmark-schema/ pyproject.toml (name = "llmdbenchmark-schema") llmdbenchmark_schema/ __init__.py base.py (moved from analysis/benchmark_report/) schema_v0_1.py (moved) schema_v0_2.py (moved) schema_v0_2_components.py (moved) br_v0_*_example.yaml (moved) br_v0_*_json_schema.json (moved) py.typed README.md llmdbenchmark/ analysis/ benchmark_report/ __init__.py (re-exports from llmdbenchmark_schema) cli.py, core.py, native_to_br0_*.py (import from llmdbenchmark_schema) pyproject.toml (adds llmdbenchmark-schema as a dep) Runtime deps for llmdbenchmark-schema: pydantic>=2.0, PyYAML. Verified locally: - pip install -e llmdbenchmark-schema/ succeeds - from llmdbenchmark_schema import BenchmarkReportV02 works - br_v0_2_example.yaml round-trips against the schema - All updated files under llmdbenchmark/analysis/benchmark_report/ compile Open questions / WIP items: - Wire the publish workflow to ship both distributions from the same CI run. - Version policy: llmdbenchmark-schema is set to 0.2.0 (tracks the latest BR schema version it ships). Should it follow llmdbenchmark's versioning instead? - Sweep for any other places in the repo still doing relative imports of the moved files. Motivation for inference-perf: https://github.com/kubernetes-sigs/inference-perf/pull/461 inference-perf currently vendors ~1300 lines of the BR0.2 schema; this change would let it depend on llmdbenchmark-schema instead. --- llmdbenchmark-schema/README.md | 42 +++++++++++++++++++ .../llmdbenchmark_schema/__init__.py | 22 ++++++++++ .../llmdbenchmark_schema}/base.py | 0 .../br_v0_1_example.yaml | 0 .../br_v0_1_json_schema.json | 0 .../br_v0_2_example.yaml | 0 .../br_v0_2_json_schema.json | 0 .../llmdbenchmark_schema/py.typed | 0 .../llmdbenchmark_schema}/schema_v0_1.py | 0 .../llmdbenchmark_schema}/schema_v0_2.py | 0 .../schema_v0_2_components.py | 0 llmdbenchmark-schema/pyproject.toml | 40 ++++++++++++++++++ .../analysis/benchmark_report/__init__.py | 16 +++++-- .../analysis/benchmark_report/cli.py | 2 +- .../analysis/benchmark_report/core.py | 6 +-- .../benchmark_report/native_to_br0_1.py | 4 +- .../benchmark_report/native_to_br0_2.py | 6 +-- pyproject.toml | 7 ++-- 18 files changed, 130 insertions(+), 15 deletions(-) create mode 100644 llmdbenchmark-schema/README.md create mode 100644 llmdbenchmark-schema/llmdbenchmark_schema/__init__.py rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/base.py (100%) rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/br_v0_1_example.yaml (100%) rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/br_v0_1_json_schema.json (100%) rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/br_v0_2_example.yaml (100%) rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/br_v0_2_json_schema.json (100%) create mode 100644 llmdbenchmark-schema/llmdbenchmark_schema/py.typed rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/schema_v0_1.py (100%) rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/schema_v0_2.py (100%) rename {llmdbenchmark/analysis/benchmark_report => llmdbenchmark-schema/llmdbenchmark_schema}/schema_v0_2_components.py (100%) create mode 100644 llmdbenchmark-schema/pyproject.toml diff --git a/llmdbenchmark-schema/README.md b/llmdbenchmark-schema/README.md new file mode 100644 index 000000000..a02aecd7e --- /dev/null +++ b/llmdbenchmark-schema/README.md @@ -0,0 +1,42 @@ +# llmdbenchmark-schema + +Pydantic types for the **Benchmark Report (BR)** format used by +[llm-d-benchmark](https://github.com/llm-d/llm-d-benchmark). + +Published as a standalone distribution so that **external benchmark +tooling** (workload generators, dashboards, analysis pipelines) can +depend on just the schema without pulling in the rest of `llmdbenchmark` +(which carries Kubernetes/transformers/HF deps). + +## Install + +```bash +pip install llmdbenchmark-schema +``` + +Runtime deps: `pydantic>=2.0`, `PyYAML`. That's it. + +## Use + +```python +from llmdbenchmark_schema import BenchmarkReportV02 + +with open("report.yaml") as f: + import yaml + BenchmarkReportV02.model_validate(yaml.safe_load(f)) +``` + +The full `llmdbenchmark` package re-exports these types under +`llmdbenchmark.analysis.benchmark_report` for back-compat, so existing +code does not need to change. + +## Versioning + +The package version tracks the latest *schema* version it ships. Older +schema versions remain accessible (e.g. `BenchmarkReportV01`). + +## Source + +This package's source lives inside the +[llm-d-benchmark](https://github.com/llm-d/llm-d-benchmark) repository, +under `llmdbenchmark-schema/`. Changes are made there. diff --git a/llmdbenchmark-schema/llmdbenchmark_schema/__init__.py b/llmdbenchmark-schema/llmdbenchmark_schema/__init__.py new file mode 100644 index 000000000..4449b74fe --- /dev/null +++ b/llmdbenchmark-schema/llmdbenchmark_schema/__init__.py @@ -0,0 +1,22 @@ +"""llmdbenchmark-schema: BR (Benchmark Report) schema types. + +This is the schema-only subset of ``llmdbenchmark``, published as a +standalone distribution so external benchmark tooling can depend on just +the report types without pulling in the rest of ``llmdbenchmark`` +(kubernetes client, transformers, etc.). + +The full ``llmdbenchmark`` package re-exports these types under +``llmdbenchmark.analysis.benchmark_report`` for back-compat. +""" + +from .base import BenchmarkReport, Units, WorkloadGenerator +from .schema_v0_1 import BenchmarkReportV01 +from .schema_v0_2 import BenchmarkReportV02 + +__all__ = [ + "BenchmarkReport", + "BenchmarkReportV01", + "BenchmarkReportV02", + "Units", + "WorkloadGenerator", +] diff --git a/llmdbenchmark/analysis/benchmark_report/base.py b/llmdbenchmark-schema/llmdbenchmark_schema/base.py similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/base.py rename to llmdbenchmark-schema/llmdbenchmark_schema/base.py diff --git a/llmdbenchmark/analysis/benchmark_report/br_v0_1_example.yaml b/llmdbenchmark-schema/llmdbenchmark_schema/br_v0_1_example.yaml similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/br_v0_1_example.yaml rename to llmdbenchmark-schema/llmdbenchmark_schema/br_v0_1_example.yaml diff --git a/llmdbenchmark/analysis/benchmark_report/br_v0_1_json_schema.json b/llmdbenchmark-schema/llmdbenchmark_schema/br_v0_1_json_schema.json similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/br_v0_1_json_schema.json rename to llmdbenchmark-schema/llmdbenchmark_schema/br_v0_1_json_schema.json diff --git a/llmdbenchmark/analysis/benchmark_report/br_v0_2_example.yaml b/llmdbenchmark-schema/llmdbenchmark_schema/br_v0_2_example.yaml similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/br_v0_2_example.yaml rename to llmdbenchmark-schema/llmdbenchmark_schema/br_v0_2_example.yaml diff --git a/llmdbenchmark/analysis/benchmark_report/br_v0_2_json_schema.json b/llmdbenchmark-schema/llmdbenchmark_schema/br_v0_2_json_schema.json similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/br_v0_2_json_schema.json rename to llmdbenchmark-schema/llmdbenchmark_schema/br_v0_2_json_schema.json diff --git a/llmdbenchmark-schema/llmdbenchmark_schema/py.typed b/llmdbenchmark-schema/llmdbenchmark_schema/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/llmdbenchmark/analysis/benchmark_report/schema_v0_1.py b/llmdbenchmark-schema/llmdbenchmark_schema/schema_v0_1.py similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/schema_v0_1.py rename to llmdbenchmark-schema/llmdbenchmark_schema/schema_v0_1.py diff --git a/llmdbenchmark/analysis/benchmark_report/schema_v0_2.py b/llmdbenchmark-schema/llmdbenchmark_schema/schema_v0_2.py similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/schema_v0_2.py rename to llmdbenchmark-schema/llmdbenchmark_schema/schema_v0_2.py diff --git a/llmdbenchmark/analysis/benchmark_report/schema_v0_2_components.py b/llmdbenchmark-schema/llmdbenchmark_schema/schema_v0_2_components.py similarity index 100% rename from llmdbenchmark/analysis/benchmark_report/schema_v0_2_components.py rename to llmdbenchmark-schema/llmdbenchmark_schema/schema_v0_2_components.py diff --git a/llmdbenchmark-schema/pyproject.toml b/llmdbenchmark-schema/pyproject.toml new file mode 100644 index 000000000..90f34fb04 --- /dev/null +++ b/llmdbenchmark-schema/pyproject.toml @@ -0,0 +1,40 @@ +[project] +name = "llmdbenchmark-schema" +version = "0.2.0" +description = "Benchmark Report (BR) schema types for llm-d-benchmark." +readme = "README.md" +requires-python = ">=3.11" +dependencies = [ + "pydantic>=2.0", + "PyYAML", +] +license = {text = "Apache-2.0"} +authors = [{name = "llm-d-benchmark contributors"}] +keywords = ["benchmarking", "llm", "schema", "llm-d"] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Typing :: Typed", +] + +[project.urls] +Homepage = "https://github.com/llm-d/llm-d-benchmark" +Source = "https://github.com/llm-d/llm-d-benchmark" + +[build-system] +requires = ["setuptools>=61"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["."] +include = ["llmdbenchmark_schema*"] + +[tool.setuptools.package-data] +"llmdbenchmark_schema" = [ + "*.yaml", + "*.json", + "py.typed", +] diff --git a/llmdbenchmark/analysis/benchmark_report/__init__.py b/llmdbenchmark/analysis/benchmark_report/__init__.py index bf6f43d22..676222a47 100644 --- a/llmdbenchmark/analysis/benchmark_report/__init__.py +++ b/llmdbenchmark/analysis/benchmark_report/__init__.py @@ -1,8 +1,20 @@ """ Benchmark Report standardized reporting format. + +Schema types now live in the standalone ``llmdbenchmark-schema`` package +(distributed independently so external benchmark tooling can depend on +just the schema without pulling in the rest of ``llmdbenchmark``). + +This module re-exports the schema types and bundles them with the +analysis utilities (``core``, ``cli``, ``native_to_*``) so existing +``from llmdbenchmark.analysis.benchmark_report import ...`` callers keep +working. """ -from .base import BenchmarkReport +from llmdbenchmark_schema.base import BenchmarkReport +from llmdbenchmark_schema.schema_v0_1 import BenchmarkReportV01 +from llmdbenchmark_schema.schema_v0_2 import BenchmarkReportV02 + from .core import ( get_nested, import_benchmark_report, @@ -12,8 +24,6 @@ update_dict, yaml_str_to_benchmark_report, ) -from .schema_v0_1 import BenchmarkReportV01 -from .schema_v0_2 import BenchmarkReportV02 __all__ = [ "BenchmarkReport", diff --git a/llmdbenchmark/analysis/benchmark_report/cli.py b/llmdbenchmark/analysis/benchmark_report/cli.py index aa4c53f2c..c955ca5ee 100755 --- a/llmdbenchmark/analysis/benchmark_report/cli.py +++ b/llmdbenchmark/analysis/benchmark_report/cli.py @@ -15,7 +15,7 @@ import sys from . import make_json_schema -from .base import WorkloadGenerator +from llmdbenchmark_schema.base import WorkloadGenerator def main() -> None: diff --git a/llmdbenchmark/analysis/benchmark_report/core.py b/llmdbenchmark/analysis/benchmark_report/core.py index cbd21de27..72176f2b3 100755 --- a/llmdbenchmark/analysis/benchmark_report/core.py +++ b/llmdbenchmark/analysis/benchmark_report/core.py @@ -10,9 +10,9 @@ import yaml import numpy as np -from .base import BenchmarkReport -from .schema_v0_1 import BenchmarkReportV01 -from .schema_v0_2 import BenchmarkReportV02 +from llmdbenchmark_schema.base import BenchmarkReport +from llmdbenchmark_schema.schema_v0_1 import BenchmarkReportV01 +from llmdbenchmark_schema.schema_v0_2 import BenchmarkReportV02 def check_file(file_path: str) -> None: diff --git a/llmdbenchmark/analysis/benchmark_report/native_to_br0_1.py b/llmdbenchmark/analysis/benchmark_report/native_to_br0_1.py index 4c5c30ed6..fb32923c2 100644 --- a/llmdbenchmark/analysis/benchmark_report/native_to_br0_1.py +++ b/llmdbenchmark/analysis/benchmark_report/native_to_br0_1.py @@ -12,7 +12,7 @@ import numpy as np -from .base import Units +from llmdbenchmark_schema.base import Units from .core import ( check_file, get_nested, @@ -20,7 +20,7 @@ load_benchmark_report, update_dict, ) -from .schema_v0_1 import BenchmarkReportV01, HostType, WorkloadGenerator +from llmdbenchmark_schema.schema_v0_1 import BenchmarkReportV01, HostType, WorkloadGenerator def _get_llmd_benchmark_envars() -> dict: diff --git a/llmdbenchmark/analysis/benchmark_report/native_to_br0_2.py b/llmdbenchmark/analysis/benchmark_report/native_to_br0_2.py index aa6abe6f0..d8d9366a2 100644 --- a/llmdbenchmark/analysis/benchmark_report/native_to_br0_2.py +++ b/llmdbenchmark/analysis/benchmark_report/native_to_br0_2.py @@ -18,7 +18,7 @@ import numpy as np import yaml -from .base import Units, WorkloadGenerator +from llmdbenchmark_schema.base import Units, WorkloadGenerator from .core import ( check_file, get_nested, @@ -26,8 +26,8 @@ load_benchmark_report, update_dict, ) -from .schema_v0_2 import BenchmarkReportV02, Component, Distribution, LoadSource -from .schema_v0_2_components import HostType +from llmdbenchmark_schema.schema_v0_2 import BenchmarkReportV02, Component, Distribution, LoadSource +from llmdbenchmark_schema.schema_v0_2_components import HostType def _load_run_metadata() -> dict: diff --git a/pyproject.toml b/pyproject.toml index 856e44315..25e3290aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,9 @@ dependencies = [ "pydantic>=2.0", "google-auth", "google-cloud-storage>=2.10.0", + # Schema types live in their own distribution so external benchmark + # tooling can depend on just the schema. See ``llmdbenchmark-schema/``. + "llmdbenchmark-schema", ] # @@ -29,14 +32,12 @@ dependencies = [ [tool.setuptools.packages.find] where = ["."] include = ["llmdbenchmark*"] -exclude = ["templates*", "specification*"] +exclude = ["templates*", "specification*", "llmdbenchmark_schema*"] [tool.setuptools.package-data] "llmdbenchmark.analysis" = [ "scripts/*.sh", "scripts/*.py", - "benchmark_report/*.yaml", - "benchmark_report/*.json", ] [project.urls]