diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 909b71b4..0bdbac1f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,22 +47,36 @@ jobs: uv --version python --version + - name: Verify contrib packaging contract + run: make contrib-verify + - name: Sync dependencies run: make sync - - name: Lint - run: make lint + - name: Run Python checks + run: make check - - name: Type check - run: make typecheck - - - name: Test with coverage - run: make test + - name: Collect coverage files + id: coverage_files + run: | + python - <<'PY' >> "$GITHUB_OUTPUT" + from pathlib import Path + + base_reports = [ + "coverage-models.xml", + "coverage-engine.xml", + "coverage-telemetry.xml", + "coverage-server.xml", + "coverage-sdk.xml", + ] + contrib_reports = sorted(path.name for path in Path(".").glob("coverage-evaluators-*.xml")) + print(f"files={','.join([*base_reports, *contrib_reports])}") + PY - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: - files: coverage-models.xml,coverage-engine.xml,coverage-telemetry.xml,coverage-server.xml,coverage-sdk.xml + files: ${{ steps.coverage_files.outputs.files }} fail_ci_if_error: false token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 8103788a..7d46b44e 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,6 +8,9 @@ jobs: permissions: contents: write id-token: write + outputs: + released: ${{ steps.semantic_release.outputs.released }} + tag: ${{ steps.semantic_release.outputs.tag }} steps: - name: Checkout @@ -25,8 +28,11 @@ jobs: - name: Setup UV uses: astral-sh/setup-uv@v4 + - name: Verify contrib wiring + run: python scripts/contrib_packages.py verify + - name: Python Semantic Release - id: release + id: semantic_release uses: python-semantic-release/python-semantic-release@v10.5.3 with: git_committer_name: galileo-automation @@ -37,54 +43,132 @@ jobs: root_options: "-vv" - name: Build Packages - if: steps.release.outputs.released == 'true' + if: steps.semantic_release.outputs.released == 'true' run: | uv sync uv run python scripts/build.py all - # Publish in dependency order: models -> evaluators -> sdk -> evaluator-galileo + - name: Stage built distributions + if: steps.semantic_release.outputs.released == 'true' + run: | + rm -rf release-dists + mkdir -p release-dists/models + mkdir -p release-dists/evaluators/builtin + mkdir -p release-dists/evaluators/contrib + mkdir -p release-dists/pypi/evaluators + mkdir -p release-dists/sdks/python + mkdir -p release-dists/server + + cp -R models/dist release-dists/models/ + cp -R evaluators/builtin/dist release-dists/evaluators/builtin/ + cp evaluators/builtin/dist/* release-dists/pypi/evaluators/ + cp -R sdks/python/dist release-dists/sdks/python/ + cp -R server/dist release-dists/server/ + + for contrib_dir in evaluators/contrib/*/dist; do + contrib_name="$(basename "$(dirname "$contrib_dir")")" + mkdir -p "release-dists/evaluators/contrib/$contrib_name" + cp -R "$contrib_dir" "release-dists/evaluators/contrib/$contrib_name/" + cp "$contrib_dir"/* release-dists/pypi/evaluators/ + done + + - name: Upload built distributions + if: steps.semantic_release.outputs.released == 'true' + uses: actions/upload-artifact@v4 + with: + name: release-dists + if-no-files-found: error + path: release-dists/ + + publish-models: + runs-on: ubuntu-latest + needs: release + if: needs.release.outputs.released == 'true' + permissions: + id-token: write + + steps: + - name: Download built distributions + uses: actions/download-artifact@v4 + with: + name: release-dists + path: . + - name: Publish agent-control-models to PyPI - if: steps.release.outputs.released == 'true' uses: pypa/gh-action-pypi-publish@release/v1 with: - packages-dir: models/dist/ + packages-dir: release-dists/models/dist/ user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} - - name: Publish agent-control-evaluators to PyPI - if: steps.release.outputs.released == 'true' + publish-evaluators: + runs-on: ubuntu-latest + needs: [release, publish-models] + if: needs.release.outputs.released == 'true' + permissions: + id-token: write + + steps: + - name: Download built distributions + uses: actions/download-artifact@v4 + with: + name: release-dists + path: . + + - name: Publish evaluator distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - packages-dir: evaluators/builtin/dist/ + packages-dir: release-dists/pypi/evaluators/ user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} + publish-sdk: + runs-on: ubuntu-latest + needs: [release, publish-evaluators] + if: >- + always() && + needs.release.outputs.released == 'true' && + needs.publish-evaluators.result == 'success' + permissions: + id-token: write + + steps: + - name: Download built distributions + uses: actions/download-artifact@v4 + with: + name: release-dists + path: . + - name: Publish agent-control-sdk to PyPI - if: steps.release.outputs.released == 'true' uses: pypa/gh-action-pypi-publish@release/v1 with: - packages-dir: sdks/python/dist/ + packages-dir: release-dists/sdks/python/dist/ user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} - - name: Publish agent-control-evaluator-galileo to PyPI - if: steps.release.outputs.released == 'true' - uses: pypa/gh-action-pypi-publish@release/v1 + upload-release-assets: + runs-on: ubuntu-latest + needs: [release, publish-sdk] + if: needs.release.outputs.released == 'true' + permissions: + contents: write + + steps: + - name: Download built distributions + uses: actions/download-artifact@v4 with: - packages-dir: evaluators/contrib/galileo/dist/ - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} + name: release-dists + path: . - name: Upload to GitHub Release - if: steps.release.outputs.released == 'true' uses: python-semantic-release/upload-to-gh-release@main with: github_token: ${{ secrets.GALILEO_AUTOMATION_GITHUB_TOKEN || github.token }} - tag: ${{ steps.release.outputs.tag }} + tag: ${{ needs.release.outputs.tag }} root_options: "-vv" dist_glob: | - models/dist/* - evaluators/builtin/dist/* - sdks/python/dist/* - server/dist/* - evaluators/contrib/galileo/dist/* + release-dists/models/dist/* + release-dists/evaluators/builtin/dist/* + release-dists/sdks/python/dist/* + release-dists/server/dist/* + release-dists/evaluators/contrib/*/dist/* diff --git a/Makefile b/Makefile index e11ac6a0..d6b4786a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build galileo-test galileo-lint galileo-lint-fix galileo-typecheck galileo-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish +.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all contrib-verify scripts-test models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish # Workspace package names PACK_MODELS := agent-control-models @@ -17,9 +17,17 @@ TS_SDK_DIR := sdks/typescript ENGINE_DIR := engine TELEMETRY_DIR := telemetry EVALUATORS_DIR := evaluators/builtin -GALILEO_DIR := evaluators/contrib/galileo +CONTRIB_DIR := evaluators/contrib UI_DIR := ui +define run-contrib-target + @set -e; \ + packages=$$(uv run python scripts/contrib_packages.py names); \ + for package in $$packages; do \ + $(MAKE) -C $(CONTRIB_DIR)/$$package $(1); \ + done +endef + help: @echo "Agent Control - Makefile commands" @echo "" @@ -33,10 +41,12 @@ help: @echo " make openapi-spec-check - verify OpenAPI generation succeeds" @echo "" @echo "Test:" - @echo " make test - run tests for core packages (models, telemetry, server, engine, sdk, evaluators)" + @echo " make test - run tests for core packages and all discovered contrib evaluators" + @echo " make contrib-verify - verify root contrib packaging contract wiring" + @echo " make scripts-test - run root contrib packaging contract tests" @echo " make models-test - run shared model tests with coverage" - @echo " make test-extras - run tests for contrib evaluators (galileo, etc.)" - @echo " make test-all - run all tests (core + extras)" + @echo " make test-extras - run tests for all discovered contrib evaluators" + @echo " make test-all - alias for make test" @echo " make sdk-ts-test - run TypeScript SDK tests" @echo "" @echo "Quality:" @@ -84,7 +94,13 @@ openapi-spec-check: openapi-spec # Test # --------------------------- -test: models-test telemetry-test server-test engine-test sdk-test evaluators-test +test: contrib-verify scripts-test models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test + +contrib-verify: + uv run python scripts/contrib_packages.py verify + +scripts-test: + uv run --with pytest pytest scripts/tests -q models-test: cd $(MODELS_DIR) && uv run pytest --cov=src --cov-report=xml:../coverage-models.xml -q @@ -94,11 +110,11 @@ test-models: models-test telemetry-test: $(MAKE) -C $(TELEMETRY_DIR) test -# Run tests for contrib evaluators (not included in default test target) -test-extras: galileo-test +# Run tests for discovered contrib evaluators +test-extras: contrib-test -# Run all tests (core + extras) -test-all: test test-extras +# Run all tests (alias for test) +test-all: test # Run tests, lint, and typecheck check: test lint typecheck @@ -107,17 +123,17 @@ check: test lint typecheck # Quality # --------------------------- -lint: engine-lint telemetry-lint evaluators-lint +lint: engine-lint telemetry-lint evaluators-lint contrib-lint uv run --package $(PACK_MODELS) ruff check --config pyproject.toml models/src uv run --package $(PACK_SERVER) ruff check --config pyproject.toml server/src uv run --package $(PACK_SDK) ruff check --config pyproject.toml sdks/python/src -lint-fix: engine-lint-fix telemetry-lint-fix evaluators-lint-fix +lint-fix: engine-lint-fix telemetry-lint-fix evaluators-lint-fix contrib-lint-fix uv run --package $(PACK_MODELS) ruff check --config pyproject.toml --fix models/src uv run --package $(PACK_SERVER) ruff check --config pyproject.toml --fix server/src uv run --package $(PACK_SDK) ruff check --config pyproject.toml --fix sdks/python/src -typecheck: engine-typecheck telemetry-typecheck evaluators-typecheck +typecheck: engine-typecheck telemetry-typecheck evaluators-typecheck contrib-typecheck uv run --package $(PACK_MODELS) mypy --config-file pyproject.toml models/src uv run --package $(PACK_SERVER) mypy --config-file pyproject.toml server/src uv run --package $(PACK_SDK) mypy --config-file pyproject.toml sdks/python/src @@ -135,7 +151,7 @@ telemetry-typecheck: # Build / Publish # --------------------------- -build: build-models build-server build-sdk engine-build telemetry-build evaluators-build +build: build-models build-server build-sdk engine-build telemetry-build evaluators-build contrib-build build-models: cd $(MODELS_DIR) && uv build @@ -246,21 +262,17 @@ server-%: ui-%: $(MAKE) -C $(UI_DIR) $(patsubst ui-%,%,$@) -# --------------------------- -# Contrib Evaluators (Galileo) -# --------------------------- - -galileo-test: - $(MAKE) -C $(GALILEO_DIR) test +contrib-test: + $(call run-contrib-target,test) -galileo-lint: - $(MAKE) -C $(GALILEO_DIR) lint +contrib-lint: + $(call run-contrib-target,lint) -galileo-lint-fix: - $(MAKE) -C $(GALILEO_DIR) lint-fix +contrib-lint-fix: + $(call run-contrib-target,lint-fix) -galileo-typecheck: - $(MAKE) -C $(GALILEO_DIR) typecheck +contrib-typecheck: + $(call run-contrib-target,typecheck) -galileo-build: - $(MAKE) -C $(GALILEO_DIR) build +contrib-build: + $(call run-contrib-target,build) diff --git a/evaluators/builtin/pyproject.toml b/evaluators/builtin/pyproject.toml index 0c6dd587..d636a9f2 100644 --- a/evaluators/builtin/pyproject.toml +++ b/evaluators/builtin/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Agent Control Team" }] dependencies = [ - "agent-control-models", + "agent-control-models>=7.5.0", "pydantic>=2.12.4", "google-re2>=1.1", "jsonschema>=4.0.0", @@ -16,8 +16,9 @@ dependencies = [ ] [project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo>=3.0.0"] -cisco = ["agent-control-evaluator-cisco>=0.1.0"] +galileo = ["agent-control-evaluator-galileo>=7.5.0"] +budget = ["agent-control-evaluator-budget>=7.5.0"] +cisco = ["agent-control-evaluator-cisco>=7.5.0"] dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0"] [project.entry-points."agent_control.evaluators"] @@ -35,6 +36,7 @@ packages = ["src/agent_control_evaluators"] [tool.uv.sources] agent-control-models = { workspace = true } -# For local dev: use local galileo package instead of PyPI +# For local dev: use local contrib packages instead of PyPI agent-control-evaluator-galileo = { path = "../contrib/galileo", editable = true } +agent-control-evaluator-budget = { path = "../contrib/budget", editable = true } agent-control-evaluator-cisco = { path = "../contrib/cisco", editable = true } diff --git a/evaluators/builtin/tests/test_contrib_packages.py b/evaluators/builtin/tests/test_contrib_packages.py new file mode 100644 index 00000000..9f25186d --- /dev/null +++ b/evaluators/builtin/tests/test_contrib_packages.py @@ -0,0 +1,53 @@ +"""Tests for repo contrib package discovery wiring.""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + +REPO_ROOT = Path(__file__).resolve().parents[3] +SCRIPT_PATH = REPO_ROOT / "scripts" / "contrib_packages.py" +MODULE_NAME = "agent_control_repo_contrib_packages" + + +def load_contrib_packages_module() -> ModuleType: + """Load the repo contrib-packages script as a module for testing.""" + + module = sys.modules.get(MODULE_NAME) + if module is not None: + return module + + spec = importlib.util.spec_from_file_location(MODULE_NAME, SCRIPT_PATH) + assert spec is not None + assert spec.loader is not None + + module = importlib.util.module_from_spec(spec) + sys.modules[MODULE_NAME] = module + spec.loader.exec_module(module) + return module + + +def test_discover_contrib_packages_returns_expected_metadata() -> None: + """Test that real contrib packages are discovered with stable metadata.""" + + module = load_contrib_packages_module() + + packages = module.discover_contrib_packages() + + assert [(package.name, package.package, package.extra) for package in packages] == [ + ("budget", "agent-control-evaluator-budget", "budget"), + ("cisco", "agent-control-evaluator-cisco", "cisco"), + ("galileo", "agent-control-evaluator-galileo", "galileo"), + ] + + +def test_verify_contrib_packages_has_no_repo_wiring_drift() -> None: + """Test that contrib package wiring stays aligned with repo metadata.""" + + module = load_contrib_packages_module() + + packages = module.discover_contrib_packages() + + assert module.verify_contrib_packages(packages) == [] diff --git a/evaluators/contrib/budget/Makefile b/evaluators/contrib/budget/Makefile new file mode 100644 index 00000000..a6b0c609 --- /dev/null +++ b/evaluators/contrib/budget/Makefile @@ -0,0 +1,28 @@ +.PHONY: help test lint lint-fix typecheck check build + +help: + @echo "Agent Control Evaluator - Budget - Makefile commands" + @echo "" + @echo " make test - run pytest" + @echo " make lint - run ruff check" + @echo " make lint-fix - run ruff check --fix" + @echo " make typecheck - run mypy" + @echo " make check - run lint, typecheck, and test" + @echo " make build - build package" + +test: + uv run --with pytest --with pytest-asyncio --with pytest-cov pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-budget.xml -q + +lint: + uv run --with ruff ruff check --config ../../../pyproject.toml src/ + +lint-fix: + uv run --with ruff ruff check --config ../../../pyproject.toml --fix src/ + +typecheck: + uv run --with mypy mypy --config-file ../../../pyproject.toml src/ + +check: lint typecheck test + +build: + uv build diff --git a/evaluators/contrib/budget/README.md b/evaluators/contrib/budget/README.md index fa16f876..c83de1ab 100644 --- a/evaluators/contrib/budget/README.md +++ b/evaluators/contrib/budget/README.md @@ -4,10 +4,22 @@ Budget evaluator for agent-control that tracks cumulative LLM token and cost usa ## Install +```bash +pip install "agent-control-evaluators[budget]" +``` + +Fallback direct wheel install: + ```bash pip install agent-control-evaluator-budget ``` +For local development: + +```bash +uv pip install -e evaluators/contrib/budget +``` + ## Quickstart ```python @@ -87,7 +99,7 @@ ModelPricing(input_per_1k=0.04, output_per_1k=0.16) `input_per_1k` is applied to input tokens. `output_per_1k` is applied to output tokens. -Pricing is required when any rule uses `limit_unit="usd_cents"`. Token-only rules can omit pricing. If an event uses a model that is not in the pricing table and a cost rule exists, `unknown_model_behavior="block"` fails closed. Use `"warn"` to log a warning and treat the cost as 0. +Pricing and `model_path` are required when any rule uses `limit_unit="usd_cents"`. Token-only rules can omit both. If an event uses a model that is not in the pricing table and a cost rule exists, `unknown_model_behavior="block"` fails closed. Use `"warn"` to log a warning and treat the cost as 0. ## Dual Ceiling Pattern diff --git a/evaluators/contrib/budget/pyproject.toml b/evaluators/contrib/budget/pyproject.toml index 6115e442..d46cdf34 100644 --- a/evaluators/contrib/budget/pyproject.toml +++ b/evaluators/contrib/budget/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "agent-control-evaluator-budget" -version = "0.1.0" +version = "7.5.0" description = "Budget evaluator for agent-control -- cumulative LLM cost and token tracking" readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Agent Control Team" }] dependencies = [ - "agent-control-evaluators>=3.0.0", - "agent-control-models>=3.0.0", + "agent-control-evaluators>=7.5.0", + "agent-control-models>=7.5.0", ] [project.optional-dependencies] diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py b/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py index 30e658c3..795044be 100644 --- a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py +++ b/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py @@ -79,11 +79,13 @@ class BudgetEvaluatorConfig(EvaluatorConfig): pricing table and a cost-based rule exists. block=fail closed, warn=log warning and treat cost as 0. pricing: Optional model pricing table. Maps model name to ModelPricing. - Used to derive cost in USD from token counts and model name. + Required when any rule uses limit_unit="usd_cents". Used to + derive cost in USD from token counts and model name. token_path: Dot-notation path to extract token usage from step data (e.g. "usage.total_tokens"). If None, looks for standard fields (input_tokens, output_tokens, total_tokens, usage). model_path: Dot-notation path to extract model name (for pricing lookup). + Required when any rule uses limit_unit="usd_cents". metadata_paths: Mapping of metadata field name to dot-notation path in step data. Used to extract scope dimensions (channel, user_id, etc). """ @@ -109,7 +111,7 @@ class BudgetEvaluatorConfig(EvaluatorConfig): metadata_paths: dict[str, str] = Field(default_factory=dict) @model_validator(mode="after") - def require_pricing_for_cost_rules(self) -> "BudgetEvaluatorConfig": + def require_pricing_for_cost_rules(self) -> BudgetEvaluatorConfig: has_cost_rule = any(rule.limit_unit == "usd_cents" for rule in self.limits) if has_cost_rule and self.pricing is None: raise ValueError('pricing is required when any rule uses limit_unit="usd_cents"') diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/evaluator.py b/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/evaluator.py index a45d3032..c4380903 100644 --- a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/evaluator.py +++ b/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/evaluator.py @@ -15,6 +15,7 @@ import logging import math import threading +from importlib.metadata import PackageNotFoundError, version from typing import Any from agent_control_evaluators._base import Evaluator, EvaluatorMetadata @@ -27,6 +28,17 @@ logger = logging.getLogger(__name__) + +def _resolve_package_version() -> str: + """Return the installed package version, or a dev fallback during local imports.""" + try: + return version("agent-control-evaluator-budget") + except PackageNotFoundError: + return "0.0.0.dev" + + +_PACKAGE_VERSION = _resolve_package_version() + # --------------------------------------------------------------------------- # Module-level store registry # @@ -111,16 +123,23 @@ def _extract_tokens(data: Any, token_path: str | None) -> tuple[int, int]: out = usage.get("output_tokens") if out is None: out = usage.get("completion_tokens") - inp_ok = isinstance(inp, int) and not isinstance(inp, bool) - out_ok = isinstance(out, int) and not isinstance(out, bool) - if inp_ok and out_ok: - return max(0, inp), max(0, out) + input_tokens = _extract_non_negative_int(inp) + output_tokens = _extract_non_negative_int(out) + if input_tokens is not None and output_tokens is not None: + return input_tokens, output_tokens total = usage.get("total_tokens") if isinstance(total, int) and not isinstance(total, bool) and total > 0: return 0, max(0, total) return 0, 0 +def _extract_non_negative_int(value: Any) -> int | None: + """Return a non-negative integer or None for invalid token values.""" + if not isinstance(value, int) or isinstance(value, bool): + return None + return max(0, value) + + def _estimate_cost( model: str | None, input_tokens: int, @@ -173,7 +192,7 @@ class BudgetEvaluator(Evaluator[BudgetEvaluatorConfig]): metadata = EvaluatorMetadata( name="budget", - version="3.0.0", + version=_PACKAGE_VERSION, description="Cumulative LLM token and cost budget tracking", ) config_model = BudgetEvaluatorConfig @@ -190,9 +209,10 @@ async def evaluate(self, data: Any) -> EvaluatorResult: input_tokens, output_tokens = _extract_tokens(data, self.config.token_path) model: str | None = None - model_path_configured = bool(self.config.model_path) - if model_path_configured: - val = _extract_by_path(data, self.config.model_path) + model_path = self.config.model_path + model_path_configured = bool(model_path) + if model_path: + val = _extract_by_path(data, model_path) if val is not None: model = str(val) @@ -214,9 +234,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: ) if has_matching_cost_rule: if model is None: - block_reason = ( - f"Model not found at path '{self.config.model_path}'" - ) + block_reason = f"Model not found at path '{model_path}'" else: block_reason = f"Unknown model: {model}" if self.config.unknown_model_behavior == "block": diff --git a/evaluators/contrib/budget/tests/budget/test_budget.py b/evaluators/contrib/budget/tests/budget/test_budget.py index 6fdddabc..68e5cf24 100644 --- a/evaluators/contrib/budget/tests/budget/test_budget.py +++ b/evaluators/contrib/budget/tests/budget/test_budget.py @@ -5,12 +5,14 @@ from __future__ import annotations +from importlib.metadata import PackageNotFoundError, version import threading from typing import Any import pytest from pydantic import ValidationError +import agent_control_evaluator_budget.budget.evaluator as budget_evaluator_module from agent_control_evaluator_budget.budget.config import ( WINDOW_DAILY, WINDOW_MONTHLY, @@ -39,6 +41,21 @@ def _clean_store_registry() -> None: clear_budget_stores() +def test_metadata_version_matches_distribution_version() -> None: + assert BudgetEvaluator.metadata.version == version("agent-control-evaluator-budget") + + +def test_metadata_version_falls_back_without_distribution( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def _raise_not_found(_: str) -> str: + raise PackageNotFoundError + + monkeypatch.setattr(budget_evaluator_module, "version", _raise_not_found) + + assert budget_evaluator_module._resolve_package_version() == "0.0.0.dev" + + # --------------------------------------------------------------------------- # InMemoryBudgetStore # --------------------------------------------------------------------------- @@ -379,6 +396,33 @@ def test_extract_tokens_openai(self) -> None: data = {"usage": {"prompt_tokens": 80, "completion_tokens": 40}} assert _extract_tokens(data, None) == (80, 40) + def test_extract_tokens_falls_back_when_normalized_fields_are_none(self) -> None: + # Given: normalized fields present but unset, plus legacy OpenAI fields + data = { + "usage": { + "input_tokens": None, + "output_tokens": None, + "prompt_tokens": 80, + "completion_tokens": 40, + } + } + + # When/Then: fallback still uses the legacy fields + assert _extract_tokens(data, None) == (80, 40) + + def test_extract_tokens_falls_back_per_field(self) -> None: + # Given: one normalized field missing, the other present + data = { + "usage": { + "input_tokens": 100, + "output_tokens": None, + "completion_tokens": 40, + } + } + + # When/Then: fallback applies independently per token side + assert _extract_tokens(data, None) == (100, 40) + def test_extract_tokens_none(self) -> None: # Given: None data / Then: (0, 0) assert _extract_tokens(None, None) == (0, 0) diff --git a/evaluators/contrib/cisco/Makefile b/evaluators/contrib/cisco/Makefile index 136f622b..0f64617a 100644 --- a/evaluators/contrib/cisco/Makefile +++ b/evaluators/contrib/cisco/Makefile @@ -1,4 +1,6 @@ -.PHONY: help test lint lint-fix typecheck build +.PHONY: help test lint lint-fix typecheck check build + +PACKAGE := agent-control-evaluator-cisco help: @echo "Agent Control Evaluator - Cisco AI Defense - Makefile commands" @@ -6,20 +8,22 @@ help: @echo " make lint - run ruff check" @echo " make lint-fix - run ruff check --fix" @echo " make typecheck - run mypy" + @echo " make check - run test, lint, and typecheck" @echo " make build - build package" test: - uv run --with pytest --with pytest-asyncio --with pytest-cov pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-cisco.xml -q + uv run --with pytest --with pytest-asyncio --with pytest-cov --package $(PACKAGE) pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-cisco.xml -q lint: - uv run --with ruff ruff check --config ../../../pyproject.toml src/ + uv run --with ruff --package $(PACKAGE) ruff check --config ../../../pyproject.toml src/ lint-fix: - uv run --with ruff ruff check --config ../../../pyproject.toml --fix src/ + uv run --with ruff --package $(PACKAGE) ruff check --config ../../../pyproject.toml --fix src/ typecheck: - uv run --with mypy mypy --config-file ../../../pyproject.toml src/ + uv run --with mypy --package $(PACKAGE) mypy --config-file ../../../pyproject.toml src/ + +check: test lint typecheck build: uv build - diff --git a/evaluators/contrib/cisco/README.md b/evaluators/contrib/cisco/README.md index 6e5867bf..a43edb18 100644 --- a/evaluators/contrib/cisco/README.md +++ b/evaluators/contrib/cisco/README.md @@ -7,22 +7,22 @@ External evaluator that calls Cisco AI Defense Chat Inspection via REST and maps ## Installation -Install from PyPI (if available): +Canonical install path: ```bash -pip install agent-control-evaluator-cisco +pip install "agent-control-evaluators[cisco]" ``` -Or install from the workspace for local development: +Fallback direct wheel install: ```bash -uv pip install -e evaluators/contrib/cisco +pip install agent-control-evaluator-cisco ``` -Alternatively, install via the builtin evaluators package extra (one-liner): +For local development: ```bash -pip install agent-control-evaluators[cisco] +uv pip install -e evaluators/contrib/cisco ``` - Build wheel from the repo root (contrib package only): diff --git a/evaluators/contrib/cisco/pyproject.toml b/evaluators/contrib/cisco/pyproject.toml index a9fc1091..ea27c250 100644 --- a/evaluators/contrib/cisco/pyproject.toml +++ b/evaluators/contrib/cisco/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "agent-control-evaluator-cisco" -version = "0.1.0" +version = "7.5.0" description = "Cisco AI Defense evaluator for agent-control" readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Cisco AI Defense Team" }] dependencies = [ - "agent-control-evaluators>=3.0.0", - "agent-control-models>=3.0.0", + "agent-control-evaluators>=7.5.0", + "agent-control-models>=7.5.0", "httpx>=0.24.0", ] diff --git a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/evaluator.py b/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/evaluator.py index 2633c960..adbab9e5 100644 --- a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/evaluator.py +++ b/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/evaluator.py @@ -2,6 +2,7 @@ import json import os +from importlib.metadata import PackageNotFoundError, version from typing import Any from agent_control_evaluators import ( @@ -15,6 +16,17 @@ from .config import CiscoAIDefenseConfig +def _resolve_package_version() -> str: + """Return the installed package version, or a dev fallback during local imports.""" + try: + return version("agent-control-evaluator-cisco") + except PackageNotFoundError: + return "0.0.0.dev" + + +_PACKAGE_VERSION = _resolve_package_version() + + def _load_api_key(env_name: str) -> str: key = os.getenv(env_name) if not key: @@ -99,7 +111,7 @@ class CiscoAIDefenseEvaluator(Evaluator[CiscoAIDefenseConfig]): metadata = EvaluatorMetadata( name="cisco.ai_defense", - version="0.1.0", + version=_PACKAGE_VERSION, description="Cisco AI Defense Chat Inspection integration", requires_api_key=True, timeout_ms=15000, diff --git a/evaluators/contrib/cisco/tests/test_evaluator.py b/evaluators/contrib/cisco/tests/test_evaluator.py index 8bf98973..976b1941 100644 --- a/evaluators/contrib/cisco/tests/test_evaluator.py +++ b/evaluators/contrib/cisco/tests/test_evaluator.py @@ -1,4 +1,7 @@ +from importlib.metadata import PackageNotFoundError, version + import pytest +import agent_control_evaluator_cisco.ai_defense.evaluator as cisco_evaluator_module from pydantic import ValidationError from agent_control_evaluator_cisco.ai_defense import ( @@ -13,6 +16,21 @@ def _env_api_key(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv("AI_DEFENSE_API_KEY", "test-key") +def test_metadata_version_matches_distribution_version() -> None: + assert CiscoAIDefenseEvaluator.metadata.version == version("agent-control-evaluator-cisco") + + +def test_metadata_version_falls_back_without_distribution( + monkeypatch: pytest.MonkeyPatch, +) -> None: + def _raise_not_found(_: str) -> str: + raise PackageNotFoundError + + monkeypatch.setattr(cisco_evaluator_module, "version", _raise_not_found) + + assert cisco_evaluator_module._resolve_package_version() == "0.0.0.dev" + + @pytest.mark.asyncio async def test_none_input_returns_no_data() -> None: cfg = CiscoAIDefenseConfig() diff --git a/evaluators/contrib/galileo/Makefile b/evaluators/contrib/galileo/Makefile index 0deea340..89d61ac0 100644 --- a/evaluators/contrib/galileo/Makefile +++ b/evaluators/contrib/galileo/Makefile @@ -1,4 +1,4 @@ -.PHONY: help sync test lint lint-fix typecheck build publish +.PHONY: help sync test lint lint-fix typecheck check build publish PACKAGE := agent-control-evaluator-galileo @@ -9,22 +9,25 @@ help: @echo " make lint - run ruff check" @echo " make lint-fix - run ruff check --fix" @echo " make typecheck - run mypy" + @echo " make check - run test, lint, and typecheck" @echo " make build - build package" sync: uv sync test: - uv run pytest --cov=src --cov-report=xml:../../../coverage-evaluators-galileo.xml -q + uv run --with pytest --with pytest-asyncio --with pytest-cov --package $(PACKAGE) pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-galileo.xml -q lint: - uv run ruff check --config ../../../pyproject.toml src/ + uv run --with ruff --package $(PACKAGE) ruff check --config ../../../pyproject.toml src/ lint-fix: - uv run ruff check --config ../../../pyproject.toml --fix src/ + uv run --with ruff --package $(PACKAGE) ruff check --config ../../../pyproject.toml --fix src/ typecheck: - uv run mypy --config-file ../../../pyproject.toml src/ + uv run --with mypy --package $(PACKAGE) mypy --config-file ../../../pyproject.toml src/ + +check: test lint typecheck build: uv build diff --git a/evaluators/contrib/galileo/README.md b/evaluators/contrib/galileo/README.md index e9b196a4..b794c4a7 100644 --- a/evaluators/contrib/galileo/README.md +++ b/evaluators/contrib/galileo/README.md @@ -2,6 +2,26 @@ Integration package for Galileo Luna-2 evaluator. +## Install + +Canonical install path: + +```bash +pip install "agent-control-evaluators[galileo]" +``` + +Grandfathered convenience aliases remain available: + +```bash +pip install "agent-control-sdk[galileo]" +``` + +Fallback direct wheel install: + +```bash +pip install agent-control-evaluator-galileo +``` + See full documentation in: https://docs.agentcontrol.dev/concepts/evaluators/contributing-evaluator Example with usage: https://docs.agentcontrol.dev/examples/galileo-luna2 diff --git a/evaluators/contrib/galileo/pyproject.toml b/evaluators/contrib/galileo/pyproject.toml index 0b87e4c6..ff254153 100644 --- a/evaluators/contrib/galileo/pyproject.toml +++ b/evaluators/contrib/galileo/pyproject.toml @@ -7,8 +7,8 @@ requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Agent Control Team" }] dependencies = [ - "agent-control-evaluators>=3.0.0", - "agent-control-models>=3.0.0", + "agent-control-evaluators>=7.5.0", + "agent-control-models>=7.5.0", "httpx>=0.24.0", "pydantic>=2.12.4", ] diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py index 18137115..a6bb146c 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py +++ b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna2/evaluator.py @@ -6,6 +6,7 @@ import logging import os +from importlib.metadata import PackageNotFoundError, version from typing import Any from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator @@ -15,6 +16,17 @@ logger = logging.getLogger(__name__) + +def _resolve_package_version() -> str: + """Return the installed package version, or a dev fallback during local imports.""" + try: + return version("agent-control-evaluator-galileo") + except PackageNotFoundError: + return "0.0.0.dev" + + +_PACKAGE_VERSION = _resolve_package_version() + # Check if httpx is available try: import httpx @@ -84,7 +96,7 @@ class Luna2Evaluator(Evaluator[Luna2EvaluatorConfig]): metadata = EvaluatorMetadata( name="galileo.luna2", - version="3.0.0", + version=_PACKAGE_VERSION, description="Galileo Luna-2 enterprise runtime protection (direct API)", requires_api_key=True, timeout_ms=10000, diff --git a/evaluators/contrib/galileo/tests/test_luna2_evaluator.py b/evaluators/contrib/galileo/tests/test_luna2_evaluator.py index 3b34c620..0f6e45d7 100644 --- a/evaluators/contrib/galileo/tests/test_luna2_evaluator.py +++ b/evaluators/contrib/galileo/tests/test_luna2_evaluator.py @@ -7,10 +7,12 @@ The evaluator uses direct HTTP API calls instead of the galileo SDK. """ +from importlib.metadata import PackageNotFoundError, version import os from unittest.mock import AsyncMock, MagicMock, patch import pytest +import agent_control_evaluator_galileo.luna2.evaluator as galileo_evaluator_module from agent_control_evaluators import Evaluator from agent_control_models import EvaluatorResult from pydantic import ValidationError @@ -282,7 +284,19 @@ def test_luna2_evaluator_import_success(self): assert Luna2Evaluator is not None assert Luna2Evaluator.metadata.name == "galileo.luna2" - assert Luna2Evaluator.metadata.version == "3.0.0" + assert Luna2Evaluator.metadata.version == version("agent-control-evaluator-galileo") + + def test_luna2_evaluator_version_falls_back_without_distribution( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Test package version falls back when distribution metadata is unavailable.""" + + def _raise_not_found(_: str) -> str: + raise PackageNotFoundError + + monkeypatch.setattr(galileo_evaluator_module, "version", _raise_not_found) + + assert galileo_evaluator_module._resolve_package_version() == "0.0.0.dev" @patch("agent_control_evaluator_galileo.luna2.evaluator.LUNA2_AVAILABLE", False) def test_luna2_evaluator_is_available_false_without_httpx(self): diff --git a/evaluators/contrib/template/Makefile b/evaluators/contrib/template/Makefile new file mode 100644 index 00000000..d6944fc5 --- /dev/null +++ b/evaluators/contrib/template/Makefile @@ -0,0 +1,31 @@ +.PHONY: help test lint lint-fix typecheck check build + +PACKAGE_DIR := $(notdir $(abspath $(CURDIR))) +COVERAGE_XML := ../../../coverage-evaluators-$(PACKAGE_DIR).xml + +help: + @echo "Agent Control contrib evaluator template" + @echo "" + @echo " make test - run pytest" + @echo " make lint - run ruff check" + @echo " make lint-fix - run ruff check --fix" + @echo " make typecheck - run mypy" + @echo " make check - run lint, typecheck, and tests" + @echo " make build - build package" + +test: + uv run --with pytest --with pytest-asyncio --with pytest-cov pytest tests --cov=src --cov-report=xml:$(COVERAGE_XML) -q + +lint: + uv run --with ruff ruff check --config ../../../pyproject.toml src/ + +lint-fix: + uv run --with ruff ruff check --config ../../../pyproject.toml --fix src/ + +typecheck: + uv run --with mypy mypy --config-file ../../../pyproject.toml src/ + +check: lint typecheck test + +build: + uv build diff --git a/evaluators/contrib/template/README.md b/evaluators/contrib/template/README.md index f3090a02..e11b1777 100644 --- a/evaluators/contrib/template/README.md +++ b/evaluators/contrib/template/README.md @@ -1,5 +1,126 @@ -# Evaluator Template +# Contrib Evaluator Template -Starter template for creating a custom evaluator package. +This directory is scaffolding for a new contrib evaluator package. + +It is intentionally excluded from repo automation until you convert it into a real package. In +particular, `template/` does not participate in root `make check`, CI, semantic-release, or +publishing because it ships a `pyproject.toml.template` placeholder instead of a real +`pyproject.toml`. + +## Naming contract + +Pick `` as a short lowercase single-word identifier such as `galileo`, `cisco`, or +`budget`. That same value should appear in the steady-state package shape: + +- directory: `evaluators/contrib//` +- pip package: `agent-control-evaluator-` +- Python module: `agent_control_evaluator_` +- extra name: `agent-control-evaluators[]` + +The template uses `{{NAME}}` for that package identifier. It does not use `{{ORG}}`. + +Keep the public evaluator reference separate from the package identifier: + +- `{{ENTRY_POINT}}` is the user-facing evaluator name and should match + `EvaluatorMetadata.name` in your package code. +- Single-evaluator packages can keep that public name flat, such as `budget`. +- Packages that expose a family of evaluator ids should namespace it, such as + `cisco.ai_defense` or `galileo.luna2`. + +## Scaffold a new contrib package + +1. Copy the template and rename the manifest: + + ```bash + cp -r evaluators/contrib/template evaluators/contrib/ + mv evaluators/contrib//pyproject.toml.template \ + evaluators/contrib//pyproject.toml + ``` + +2. Replace placeholders in `pyproject.toml`: + + - `{{NAME}}` -> contrib package identifier + - `{{ENTRY_POINT}}` -> public evaluator reference / `EvaluatorMetadata.name` + - `{{EVALUATOR}}` -> evaluator module path segment (for example `budget` or `ai_defense`) + - `{{CLASS}}` -> evaluator class name + - `{{AUTHOR}}` -> authoring team + + For a package with one primary evaluator, `{{ENTRY_POINT}}` is often just ``. For a + package that groups provider-specific evaluators, use `.`. + + The template starts new packages at `0.1.0`; change that if your release plan differs. + Also replace the copied `README.md` with package-specific install, configuration, and usage + docs before your first build or publish. Then confirm the package `version` reflects your + release plan and that the `agent-control-evaluators` / `agent-control-models` dependency + floors match the compatibility floor you intend to support. Keep those dependency floors + aligned with the builtin extra you add below before you commit the new package. + +3. Add package code and tests: + + - `src/agent_control_evaluator_/` + - `tests/` + +4. Validate the package locally: + + ```bash + make lint + make lint-fix + make typecheck + make test + make check + make build + ``` + +## Canonical install docs + +Contributor-facing and user-facing package docs should treat this as the canonical install path: + +```bash +pip install "agent-control-evaluators[]" +``` + +Direct wheel installs such as `pip install agent-control-evaluator-` can still be +documented, but they are secondary to the extra on `agent-control-evaluators`. + +In `pyproject.toml`, replace `` intentionally before the +first build. For an in-repo contrib package on the shared Agent Control release train, +use the current monorepo release version. For an independently maintained package, +choose and document the minimum supported Agent Control version explicitly. + +## Expected repo wiring + +After the new package exists as a real contrib package, wire it into the repo contract: + +1. Add the extra to `evaluators/builtin/pyproject.toml`: + + ```toml + [project.optional-dependencies] + = ["agent-control-evaluator->="] + ``` + + Keep this extra on the current monorepo release line. The release build rewrites builtin + dependency floors to the active release version before publishing + `agent-control-evaluators`, so a lower source floor here would not survive into the + published extra metadata. + +2. Add the workspace source pin to `evaluators/builtin/pyproject.toml`: + + ```toml + [tool.uv.sources] + agent-control-evaluator- = { path = "../contrib/", editable = true } + ``` + +3. Add the package to `tool.semantic_release.version_toml` in the root `pyproject.toml`: + + ```toml + "evaluators/contrib//pyproject.toml:project.version", + ``` + + The repo's release automation discovers real contrib packages automatically via + `scripts/contrib_packages.py`, so once the package has a real `pyproject.toml` and the + builtin extra / uv source wiring above is in place, `scripts/build.py` and + `.github/workflows/release.yaml` will pick it up without additional manual edits. + +Until those steps are done, the package is still scaffolding rather than a real contrib package. Docs: https://docs.agentcontrol.dev/concepts/evaluators/contributing-evaluator diff --git a/evaluators/contrib/template/pyproject.toml.template b/evaluators/contrib/template/pyproject.toml.template index 484864bb..2ca97af4 100644 --- a/evaluators/contrib/template/pyproject.toml.template +++ b/evaluators/contrib/template/pyproject.toml.template @@ -1,13 +1,14 @@ [project] -name = "agent-control-evaluator-{{ORG}}" -version = "3.0.0" -description = "{{ORG}} evaluators for agent-control" +name = "agent-control-evaluator-{{NAME}}" +version = "0.1.0" +description = "{{NAME}} evaluators for agent-control" +readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "{{AUTHOR}}" }] dependencies = [ - "agent-control-evaluators>=3.0.0", - "agent-control-models>=3.0.0", + "agent-control-evaluators>=", + "agent-control-models>=", # Add your package-specific dependencies here ] @@ -15,20 +16,22 @@ dependencies = [ dev = [ "pytest>=8.0.0", "pytest-asyncio>=0.23.0", + "pytest-cov>=4.0.0", "ruff>=0.1.0", "mypy>=1.8.0", ] [project.entry-points."agent_control.evaluators"] -# Format: "org.evaluator_name" = "package.module:Class" -"{{ORG}}.{{EVALUATOR}}" = "agent_control_evaluator_{{ORG}}.{{EVALUATOR}}:{{CLASS}}" +# Keep this aligned with EvaluatorMetadata.name (for example "budget" or +# "cisco.ai_defense"). +"{{ENTRY_POINT}}" = "agent_control_evaluator_{{NAME}}.{{EVALUATOR}}:{{CLASS}}" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/agent_control_evaluator_{{ORG}}"] +packages = ["src/agent_control_evaluator_{{NAME}}"] [tool.uv.sources] agent-control-evaluators = { path = "../../builtin", editable = true } diff --git a/pyproject.toml b/pyproject.toml index a3272db6..89dcbfd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,8 @@ version_toml = [ "telemetry/pyproject.toml:project.version", "server/pyproject.toml:project.version", "evaluators/builtin/pyproject.toml:project.version", + "evaluators/contrib/budget/pyproject.toml:project.version", + "evaluators/contrib/cisco/pyproject.toml:project.version", "evaluators/contrib/galileo/pyproject.toml:project.version", ] version_source = "tag" diff --git a/scripts/build.py b/scripts/build.py index 43095af2..1cdc0cd4 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -6,15 +6,20 @@ then cleans up afterward. This allows the published wheels to be self-contained. Usage: - python scripts/build.py [models|evaluators|sdk|server|galileo|all] + python scripts/build.py [models|evaluators|sdk|server|contrib|all|] """ +from __future__ import annotations + +import re import shutil import subprocess -import re +import sys from pathlib import Path -ROOT = Path(__file__).parent.parent +from contrib_packages import ContribPackage, discover_contrib_packages + +ROOT = Path(__file__).resolve().parent.parent def get_global_version() -> str: @@ -38,11 +43,25 @@ def set_package_version(pyproject_path: Path, version: str) -> None: pyproject_path.write_text(updated) +def sync_dependency_floors(pyproject_path: Path, dependency_names: list[str], version: str) -> None: + """Update internal dependency lower bounds to the release version.""" + content = pyproject_path.read_text() + updated = content + for dependency_name in dependency_names: + updated = re.sub( + rf'("{re.escape(dependency_name)}\s*>=\s*)([^",;\]\s]+)', + rf"\g<1>{version}", + updated, + ) + + if updated != content: + pyproject_path.write_text(updated) + + def inject_bundle_metadata(init_file: Path, package_name: str, version: str) -> None: """Add bundling metadata to __init__.py for conflict detection.""" content = init_file.read_text() - # Only add if not already present if "__bundled_by__" in content: return @@ -53,23 +72,44 @@ def inject_bundle_metadata(init_file: Path, package_name: str, version: str) -> init_file.write_text(metadata + content) -def build_models() -> None: - """Build agent-control-models (standalone, no vendoring needed).""" - version = get_global_version() - models_dir = ROOT / "models" - - print(f"Building agent-control-models v{version}") - - # Clean previous builds - dist_dir = models_dir / "dist" +def clean_dist_dir(package_dir: Path) -> Path: + """Remove any previous build output and return the dist directory path.""" + dist_dir = package_dir / "dist" if dist_dir.exists(): shutil.rmtree(dist_dir) + return dist_dir + + +def build_python_package( + distribution_name: str, + package_dir: Path, + version: str, + dependency_names: list[str] | None = None, +) -> None: + """Build a standalone Python package into its local dist directory.""" + print(f"Building {distribution_name} v{version}") + dist_dir = clean_dist_dir(package_dir) + pyproject_path = package_dir / "pyproject.toml" + set_package_version(pyproject_path, version) + if dependency_names: + sync_dependency_floors(pyproject_path, dependency_names, version) + subprocess.run(["uv", "build", "-o", str(dist_dir)], cwd=package_dir, check=True) + print(f" Built {distribution_name} v{version}") - # Set version - set_package_version(models_dir / "pyproject.toml", version) - subprocess.run(["uv", "build", "-o", str(dist_dir)], cwd=models_dir, check=True) - print(f" Built agent-control-models v{version}") +def discover_contrib_by_name() -> dict[str, ContribPackage]: + """Return discovered contrib packages keyed by contrib name.""" + return {package.name: package for package in discover_contrib_packages()} + + +def discover_contrib_distribution_names() -> list[str]: + """Return the distribution names for all discovered contrib packages.""" + return [package.package for package in discover_contrib_packages()] + + +def build_models() -> None: + """Build agent-control-models (standalone, no vendoring needed).""" + build_python_package("agent-control-models", ROOT / "models", get_global_version()) def build_sdk() -> None: @@ -80,17 +120,13 @@ def build_sdk() -> None: print(f"Building agent-control-sdk v{version}") - # Clean previous builds and vendored code for pkg in ["agent_control_models", "agent_control_engine", "agent_control_telemetry"]: target = sdk_src / pkg if target.exists(): shutil.rmtree(target) - dist_dir = sdk_dir / "dist" - if dist_dir.exists(): - shutil.rmtree(dist_dir) + dist_dir = clean_dist_dir(sdk_dir) - # Copy vendored packages shutil.copytree( ROOT / "models" / "src" / "agent_control_models", sdk_src / "agent_control_models", @@ -104,7 +140,6 @@ def build_sdk() -> None: sdk_src / "agent_control_telemetry", ) - # Inject bundle metadata for conflict detection inject_bundle_metadata( sdk_src / "agent_control_models" / "__init__.py", "agent-control-sdk", @@ -121,14 +156,18 @@ def build_sdk() -> None: version, ) - # Set version - set_package_version(sdk_dir / "pyproject.toml", version) + sdk_pyproject = sdk_dir / "pyproject.toml" + set_package_version(sdk_pyproject, version) + sync_dependency_floors( + sdk_pyproject, + ["agent-control-evaluators", *discover_contrib_distribution_names()], + version, + ) try: subprocess.run(["uv", "build", "-o", str(dist_dir)], cwd=sdk_dir, check=True) print(f" Built agent-control-sdk v{version}") finally: - # Clean up vendored code (don't commit it) for pkg in ["agent_control_models", "agent_control_engine", "agent_control_telemetry"]: target = sdk_src / pkg if target.exists(): @@ -139,7 +178,7 @@ def build_server() -> None: """Build agent-control-server with vendored packages. Note: evaluators are NOT vendored - server uses agent-control-evaluators as a - runtime dependency to avoid duplicate module conflicts with galileo extras. + runtime dependency to avoid duplicate module conflicts with contrib extras. """ version = get_global_version() server_dir = ROOT / "server" @@ -147,17 +186,13 @@ def build_server() -> None: print(f"Building agent-control-server v{version}") - # Clean previous builds and vendored code for pkg in ["agent_control_models", "agent_control_engine", "agent_control_telemetry"]: target = server_src / pkg if target.exists(): shutil.rmtree(target) - dist_dir = server_dir / "dist" - if dist_dir.exists(): - shutil.rmtree(dist_dir) + dist_dir = clean_dist_dir(server_dir) - # Copy vendored packages (models, engine, and telemetry only, NOT evaluators) shutil.copytree( ROOT / "models" / "src" / "agent_control_models", server_src / "agent_control_models", @@ -171,7 +206,6 @@ def build_server() -> None: server_src / "agent_control_telemetry", ) - # Inject bundle metadata for conflict detection inject_bundle_metadata( server_src / "agent_control_models" / "__init__.py", "agent-control-server", @@ -188,14 +222,18 @@ def build_server() -> None: version, ) - # Set version - set_package_version(server_dir / "pyproject.toml", version) + server_pyproject = server_dir / "pyproject.toml" + set_package_version(server_pyproject, version) + sync_dependency_floors( + server_pyproject, + ["agent-control-evaluators", *discover_contrib_distribution_names()], + version, + ) try: subprocess.run(["uv", "build", "-o", str(dist_dir)], cwd=server_dir, check=True) print(f" Built agent-control-server v{version}") finally: - # Clean up vendored code (don't commit it) for pkg in ["agent_control_models", "agent_control_engine", "agent_control_telemetry"]: target = server_src / pkg if target.exists(): @@ -204,40 +242,49 @@ def build_server() -> None: def build_evaluators() -> None: """Build agent-control-evaluators (standalone, no vendoring needed).""" - version = get_global_version() - evaluators_dir = ROOT / "evaluators" / "builtin" - - print(f"Building agent-control-evaluators v{version}") - - # Clean previous builds - dist_dir = evaluators_dir / "dist" - if dist_dir.exists(): - shutil.rmtree(dist_dir) + build_python_package( + "agent-control-evaluators", + ROOT / "evaluators" / "builtin", + get_global_version(), + ["agent-control-models", *discover_contrib_distribution_names()], + ) - # Set version - set_package_version(evaluators_dir / "pyproject.toml", version) - subprocess.run(["uv", "build", "-o", str(dist_dir)], cwd=evaluators_dir, check=True) - print(f" Built agent-control-evaluators v{version}") +def build_contrib_package(package: ContribPackage, version: str) -> None: + """Build a discovered contrib evaluator package.""" + build_python_package( + package.package, + ROOT / Path(package.directory), + version, + ["agent-control-evaluators", "agent-control-models"], + ) -def build_evaluator_galileo() -> None: - """Build agent-control-evaluator-galileo (standalone, no vendoring needed).""" +def build_contrib() -> None: + """Build all discovered contrib evaluator packages.""" version = get_global_version() - galileo_dir = ROOT / "evaluators" / "contrib" / "galileo" + packages = discover_contrib_packages() + if not packages: + print("No contrib evaluator packages discovered.") + return - print(f"Building agent-control-evaluator-galileo v{version}") + package_names = ", ".join(package.name for package in packages) + print(f"Building discovered contrib packages ({package_names})") + for package in packages: + build_contrib_package(package, version) - # Clean previous builds - dist_dir = galileo_dir / "dist" - if dist_dir.exists(): - shutil.rmtree(dist_dir) - # Set version - set_package_version(galileo_dir / "pyproject.toml", version) - - subprocess.run(["uv", "build", "-o", str(dist_dir)], cwd=galileo_dir, check=True) - print(f" Built agent-control-evaluator-galileo v{version}") +def build_named_contrib_package(target: str) -> None: + """Build one discovered contrib evaluator package by name.""" + packages = discover_contrib_by_name() + package = packages.get(target) + if package is None: + available_targets = ", ".join(sorted(packages)) + raise ValueError( + "Unknown build target " + f"{target!r}. Available contrib targets: {available_targets or '(none)'}" + ) + build_contrib_package(package, get_global_version()) def build_all() -> None: @@ -245,15 +292,21 @@ def build_all() -> None: print(f"Building all packages (version {get_global_version()})\n") build_models() build_evaluators() + build_contrib() build_sdk() build_server() - build_evaluator_galileo() print("\nAll packages built successfully!") -if __name__ == "__main__": - import sys +def usage() -> str: + """Return the CLI usage string.""" + return ( + "Usage: python scripts/build.py " + "[models|evaluators|sdk|server|contrib|all|]" + ) + +if __name__ == "__main__": target = sys.argv[1] if len(sys.argv) > 1 else "all" if target == "models": @@ -264,10 +317,14 @@ def build_all() -> None: build_sdk() elif target == "server": build_server() - elif target == "galileo": - build_evaluator_galileo() + elif target == "contrib": + build_contrib() elif target == "all": build_all() else: - print("Usage: python scripts/build.py [models|evaluators|sdk|server|galileo|all]") - sys.exit(1) + try: + build_named_contrib_package(target) + except ValueError as error: + print(error) + print(usage()) + sys.exit(1) diff --git a/scripts/contrib_packages.py b/scripts/contrib_packages.py new file mode 100644 index 00000000..2f5d5e9b --- /dev/null +++ b/scripts/contrib_packages.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 +"""Discover and verify real contrib evaluator packages.""" + +from __future__ import annotations + +import argparse +import json +import sys +import tomllib +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +EVALUATOR_ENTRY_GROUP = "agent_control.evaluators" +REPO_ROOT = Path(__file__).resolve().parent.parent +CONTRIB_ROOT = REPO_ROOT / "evaluators" / "contrib" + + +class ContribPackagesError(Exception): + """Raised when contrib package discovery or verification cannot proceed.""" + + +@dataclass(frozen=True) +class ContribPackage: + """Normalized metadata for a real contrib evaluator package.""" + + name: str + directory: str + package: str + extra: str + entry_group: str = EVALUATOR_ENTRY_GROUP + + @property + def version_toml_entry(self) -> str: + return f"{self.directory}/pyproject.toml:project.version" + + @property + def builtin_uv_source_path(self) -> str: + return f"../contrib/{self.name}" + + def to_matrix_entry(self) -> dict[str, str]: + return { + "name": self.name, + "dir": self.directory, + "package": self.package, + "extra": self.extra, + "entry_group": self.entry_group, + } + + +def load_toml(path: Path) -> dict[str, Any]: + """Load a TOML file with contextual parse errors.""" + + try: + with path.open("rb") as handle: + data = tomllib.load(handle) + except FileNotFoundError as exc: + raise ContribPackagesError( + f"Required file is missing: {display_path(path)}." + ) from exc + except tomllib.TOMLDecodeError as exc: + raise ContribPackagesError( + f"Failed to parse {display_path(path)}: {exc}." + ) from exc + + if not isinstance(data, dict): + raise ContribPackagesError( + f"{display_path(path)} did not parse to a TOML table." + ) + + return data + + +def display_path(path: Path) -> str: + """Render a path relative to the repo root when possible.""" + + try: + return path.relative_to(REPO_ROOT).as_posix() + except ValueError: + return path.as_posix() + + +def require_table( + data: dict[str, Any], key: str, *, path: Path, parent_description: str +) -> dict[str, Any]: + """Return a TOML table or raise a targeted error.""" + + value = data.get(key) + if not isinstance(value, dict): + table_name = f"{parent_description}.{key}" if parent_description else key + raise ContribPackagesError( + f"{display_path(path)} must define [{table_name}] as a TOML table." + ) + return value + + +def require_string(value: Any, *, path: Path, description: str) -> str: + """Return a non-empty string or raise a targeted error.""" + + if not isinstance(value, str) or not value: + raise ContribPackagesError( + f"{display_path(path)} must define {description} as a non-empty string." + ) + return value + + +def dependency_name(requirement: str) -> str: + """Extract the distribution name from a PEP 508 requirement string.""" + + end = len(requirement) + for index, character in enumerate(requirement): + if character in " [<>=!~;": + end = index + break + return requirement[:end].strip().lower() + + +def discover_contrib_packages() -> list[ContribPackage]: + """Discover real contrib evaluator packages under evaluators/contrib.""" + + packages: list[ContribPackage] = [] + + if not CONTRIB_ROOT.is_dir(): + raise ContribPackagesError( + f"Expected contrib root at {display_path(CONTRIB_ROOT)}, but it does not exist." + ) + + for candidate in sorted(CONTRIB_ROOT.iterdir(), key=lambda path: path.name): + if not candidate.is_dir() or candidate.name == "template": + continue + + manifest_path = candidate / "pyproject.toml" + if not manifest_path.is_file(): + continue + + manifest = load_toml(manifest_path) + project = require_table(manifest, "project", path=manifest_path, parent_description="") + project_name = require_string( + project.get("name"), + path=manifest_path, + description='[project].name', + ) + + entry_points = require_table( + project, + "entry-points", + path=manifest_path, + parent_description="project", + ) + evaluator_entries = entry_points.get(EVALUATOR_ENTRY_GROUP) + if not isinstance(evaluator_entries, dict) or not evaluator_entries: + raise ContribPackagesError( + f"{display_path(manifest_path)} must define at least one " + f'[project.entry-points."{EVALUATOR_ENTRY_GROUP}"] entry.' + ) + + expected_package_name = f"agent-control-evaluator-{candidate.name}" + if project_name != expected_package_name: + raise ContribPackagesError( + f"{display_path(manifest_path)} declares project.name = {project_name!r}, " + f"but contrib package {candidate.name!r} must use {expected_package_name!r}." + ) + + packages.append( + ContribPackage( + name=candidate.name, + directory=display_path(candidate), + package=expected_package_name, + extra=candidate.name, + ) + ) + + return packages + + +def verify_contrib_packages(packages: list[ContribPackage]) -> list[str]: + """Return human-readable verification errors for contrib wiring drift.""" + + root_pyproject_path = REPO_ROOT / "pyproject.toml" + builtin_pyproject_path = REPO_ROOT / "evaluators" / "builtin" / "pyproject.toml" + + root_pyproject = load_toml(root_pyproject_path) + builtin_pyproject = load_toml(builtin_pyproject_path) + + tool_table = require_table(root_pyproject, "tool", path=root_pyproject_path, parent_description="") + semantic_release = require_table( + tool_table, + "semantic_release", + path=root_pyproject_path, + parent_description="tool", + ) + version_toml = semantic_release.get("version_toml") + if not isinstance(version_toml, list) or not all(isinstance(item, str) for item in version_toml): + raise ContribPackagesError( + f"{display_path(root_pyproject_path)} must define [tool.semantic_release].version_toml " + "as a list of strings." + ) + + builtin_project = require_table( + builtin_pyproject, + "project", + path=builtin_pyproject_path, + parent_description="", + ) + optional_dependencies = require_table( + builtin_project, + "optional-dependencies", + path=builtin_pyproject_path, + parent_description="project", + ) + + builtin_tool = require_table( + builtin_pyproject, + "tool", + path=builtin_pyproject_path, + parent_description="", + ) + builtin_uv = require_table( + builtin_tool, + "uv", + path=builtin_pyproject_path, + parent_description="tool", + ) + builtin_sources = require_table( + builtin_uv, + "sources", + path=builtin_pyproject_path, + parent_description="tool.uv", + ) + + errors: list[str] = [] + for package in packages: + if package.version_toml_entry not in version_toml: + errors.append( + f"Missing semantic-release version wiring for contrib package {package.name!r}: " + f"add {package.version_toml_entry!r} to [tool.semantic_release].version_toml " + f"in {display_path(root_pyproject_path)}." + ) + + extra_dependencies = optional_dependencies.get(package.extra) + if extra_dependencies is None: + errors.append( + f"Missing builtin extra for contrib package {package.name!r}: " + f"add [project.optional-dependencies].{package.extra} = " + f"[\"{package.package}>=\"] in {display_path(builtin_pyproject_path)}." + ) + elif not isinstance(extra_dependencies, list) or not all( + isinstance(item, str) for item in extra_dependencies + ): + errors.append( + f"Builtin extra {package.extra!r} in {display_path(builtin_pyproject_path)} must be " + "a list of dependency strings." + ) + else: + dependency_names = {dependency_name(item) for item in extra_dependencies} + if package.package not in dependency_names: + errors.append( + f"Builtin extra {package.extra!r} in {display_path(builtin_pyproject_path)} " + f"does not reference {package.package!r}: update it to include " + f"\"{package.package}>=\"." + ) + + source_entry = builtin_sources.get(package.package) + if source_entry is None: + errors.append( + f"Missing uv source for contrib package {package.name!r}: " + f"add [tool.uv.sources].{package.package} = " + f'{{ path = "{package.builtin_uv_source_path}", editable = true }} ' + f"in {display_path(builtin_pyproject_path)}." + ) + elif not isinstance(source_entry, dict): + errors.append( + f"Builtin uv source {package.package!r} in {display_path(builtin_pyproject_path)} " + "must be a TOML table." + ) + + return errors + + +def run_list(packages: list[ContribPackage]) -> int: + """Print a human-readable contrib package summary.""" + + for package in packages: + print( + f"{package.name}: dir={package.directory} package={package.package} " + f"extra={package.extra} entry_group={package.entry_group}" + ) + return 0 + + +def run_names(packages: list[ContribPackage]) -> int: + """Print newline-separated contrib package names.""" + + for package in packages: + print(package.name) + return 0 + + +def run_matrix(packages: list[ContribPackage]) -> int: + """Print a JSON matrix for GitHub Actions or other automation.""" + + print(json.dumps([package.to_matrix_entry() for package in packages], separators=(",", ":"))) + return 0 + + +def run_verify(packages: list[ContribPackage]) -> int: + """Verify root contrib wiring and print actionable drift errors.""" + + errors = verify_contrib_packages(packages) + if errors: + print("Contrib package wiring verification failed:", file=sys.stderr) + for error in errors: + print(f"- {error}", file=sys.stderr) + return 1 + + discovered = ", ".join(package.name for package in packages) or "(none)" + print(f"Verified contrib package wiring for: {discovered}") + return 0 + + +def build_parser() -> argparse.ArgumentParser: + """Build the CLI parser.""" + + parser = argparse.ArgumentParser( + description="Discover and verify real contrib evaluator packages." + ) + parser.add_argument( + "command", + choices=("list", "names", "matrix", "verify"), + help="Command to run.", + ) + return parser + + +def main(argv: list[str] | None = None) -> int: + """Program entry point.""" + + parser = build_parser() + args = parser.parse_args(argv) + + try: + packages = discover_contrib_packages() + if args.command == "list": + return run_list(packages) + if args.command == "names": + return run_names(packages) + if args.command == "matrix": + return run_matrix(packages) + if args.command == "verify": + return run_verify(packages) + except ContribPackagesError as error: + print(f"Error: {error}", file=sys.stderr) + return 1 + + parser.error(f"Unsupported command: {args.command}") + return 2 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/tests/test_build.py b/scripts/tests/test_build.py new file mode 100644 index 00000000..eeb8087c --- /dev/null +++ b/scripts/tests/test_build.py @@ -0,0 +1,102 @@ +import sys +import tomllib +from pathlib import Path + +SCRIPTS_DIR = Path(__file__).resolve().parents[1] +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +import build + + +def test_sync_dependency_floors_updates_internal_minimums(tmp_path: Path) -> None: + # Given: a package manifest with mixed internal and external dependency constraints + pyproject_path = tmp_path / "pyproject.toml" + pyproject_path.write_text( + """ +[project] +dependencies = [ + "agent-control-evaluators>=7.5.0", + "agent-control-models>=7.5.0,<8.0.0", + "httpx>=0.28.0", +] + +[project.optional-dependencies] +galileo = ["agent-control-evaluator-galileo>=7.5.0"] +""".strip() + ) + + # When: syncing internal dependency floors for a new release + build.sync_dependency_floors( + pyproject_path, + [ + "agent-control-evaluators", + "agent-control-models", + "agent-control-evaluator-galileo", + ], + "7.6.0", + ) + + # Then: only the internal minimum versions move to the release version + assert pyproject_path.read_text() == ( + """ +[project] +dependencies = [ + "agent-control-evaluators>=7.6.0", + "agent-control-models>=7.6.0,<8.0.0", + "httpx>=0.28.0", +] + +[project.optional-dependencies] +galileo = ["agent-control-evaluator-galileo>=7.6.0"] +""".strip() + ) + + +def test_sync_dependency_floors_tolerates_whitespace_around_lower_bounds(tmp_path: Path) -> None: + pyproject_path = tmp_path / "pyproject.toml" + pyproject_path.write_text( + """ +[project] +dependencies = [ + "agent-control-evaluators >= 7.5.0", + "agent-control-models >= 7.5.0,<8.0.0", +] + +[project.optional-dependencies] +galileo = ["agent-control-evaluator-galileo >= 7.5.0"] +""".strip() + ) + + build.sync_dependency_floors( + pyproject_path, + [ + "agent-control-evaluators", + "agent-control-models", + "agent-control-evaluator-galileo", + ], + "7.6.0", + ) + + assert pyproject_path.read_text() == ( + """ +[project] +dependencies = [ + "agent-control-evaluators >= 7.6.0", + "agent-control-models >= 7.6.0,<8.0.0", +] + +[project.optional-dependencies] +galileo = ["agent-control-evaluator-galileo >= 7.6.0"] +""".strip() + ) + + +def test_builtin_evaluators_manifest_keeps_models_floor_rewritable() -> None: + builtin_pyproject = SCRIPTS_DIR.parent / "evaluators" / "builtin" / "pyproject.toml" + with builtin_pyproject.open("rb") as handle: + manifest = tomllib.load(handle) + + dependencies = manifest["project"]["dependencies"] + + assert "agent-control-models>=7.5.0" in dependencies diff --git a/scripts/tests/test_contrib_packages.py b/scripts/tests/test_contrib_packages.py new file mode 100644 index 00000000..82ddb486 --- /dev/null +++ b/scripts/tests/test_contrib_packages.py @@ -0,0 +1,166 @@ +"""Tests for contrib package discovery and verification.""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from textwrap import dedent +from types import ModuleType + +import pytest + +SCRIPT_PATH = Path(__file__).resolve().parents[1] / "contrib_packages.py" + + +def _load_module() -> ModuleType: + """Load the contrib package script as a module for testing.""" + + module_name = "contrib_packages_under_test" + spec = importlib.util.spec_from_file_location(module_name, SCRIPT_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {SCRIPT_PATH}") + + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def _write_text(path: Path, contents: str) -> None: + """Write a text file, creating parent directories first.""" + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(dedent(contents).strip() + "\n") + + +def _write_fake_repo( + root: Path, + *, + include_version_entry: bool = True, + include_builtin_extra: bool = True, + include_builtin_source: bool = True, +) -> None: + """Create a minimal repo layout that exercises contrib package wiring.""" + + version_entry = ( + '"evaluators/contrib/example/pyproject.toml:project.version"' + if include_version_entry + else "" + ) + extra_entry = ( + 'example = ["agent-control-evaluator-example>=1.0.0"]' + if include_builtin_extra + else "" + ) + source_entry = ( + 'agent-control-evaluator-example = { path = "../contrib/example", editable = true }' + if include_builtin_source + else "" + ) + + _write_text( + root / "pyproject.toml", + f""" + [project] + name = "agent-control" + version = "1.0.0" + + [tool.semantic_release] + version_toml = [ + "pyproject.toml:project.version", + {version_entry} + ] + """, + ) + _write_text( + root / "evaluators" / "builtin" / "pyproject.toml", + f""" + [project] + name = "agent-control-evaluators" + version = "1.0.0" + + [project.optional-dependencies] + dev = [] + {extra_entry} + + [tool.uv.sources] + agent-control-models = {{ workspace = true }} + {source_entry} + """, + ) + _write_text( + root / "evaluators" / "contrib" / "example" / "pyproject.toml", + """ + [project] + name = "agent-control-evaluator-example" + version = "1.0.0" + + [project.entry-points."agent_control.evaluators"] + example = "agent_control_evaluator_example:ExampleEvaluator" + """, + ) + + +def test_discover_contrib_packages_skips_template_and_non_packages( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + # Given: a fake repo with one real contrib package plus ignored directories + module = _load_module() + repo_root = tmp_path / "repo" + _write_fake_repo(repo_root) + (repo_root / "evaluators" / "contrib" / "template").mkdir(parents=True) + (repo_root / "evaluators" / "contrib" / "notes").mkdir(parents=True) + monkeypatch.setattr(module, "REPO_ROOT", repo_root) + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + + # When: discovering contrib packages + packages = module.discover_contrib_packages() + + # Then: only the real package is returned + assert [package.name for package in packages] == ["example"] + assert packages[0].directory == "evaluators/contrib/example" + assert packages[0].package == "agent-control-evaluator-example" + + +def test_verify_contrib_packages_reports_missing_root_and_builtin_wiring( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + # Given: a contrib package that is missing version, extra, and source wiring + module = _load_module() + repo_root = tmp_path / "repo" + _write_fake_repo( + repo_root, + include_version_entry=False, + include_builtin_extra=False, + include_builtin_source=False, + ) + monkeypatch.setattr(module, "REPO_ROOT", repo_root) + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + + # When: verifying the contrib package wiring + packages = module.discover_contrib_packages() + errors = module.verify_contrib_packages(packages) + + # Then: the missing contract pieces are reported explicitly + assert any("Missing semantic-release version wiring" in error for error in errors) + assert any("Missing builtin extra" in error for error in errors) + assert any("Missing uv source" in error for error in errors) + + +def test_verify_contrib_packages_accepts_complete_wiring( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + # Given: a contrib package with complete root and builtin wiring + module = _load_module() + repo_root = tmp_path / "repo" + _write_fake_repo(repo_root) + monkeypatch.setattr(module, "REPO_ROOT", repo_root) + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + + # When: verifying the contrib package wiring + packages = module.discover_contrib_packages() + errors = module.verify_contrib_packages(packages) + + # Then: the wiring is accepted without errors + assert errors == [] diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml index 06953524..24998e15 100644 --- a/sdks/python/pyproject.toml +++ b/sdks/python/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "docstring-parser>=0.15", # For @tool decorator schema inference "google-re2>=1.1", # For engine (bundled) "jsonschema>=4.0.0", # For models/engine (bundled) - "agent-control-evaluators>=3.0.0", # NOT vendored - avoid conflict with galileo + "agent-control-evaluators>=7.5.0", # NOT vendored - avoid conflict with galileo ] authors = [ {name = "Agent Control Team"} @@ -38,7 +38,7 @@ Repository = "https://github.com/yourusername/agent-control" [project.optional-dependencies] strands-agents = ["strands-agents>=1.26.0"] google-adk = ["google-adk>=1.0.0"] -galileo = ["agent-control-evaluator-galileo>=3.0.0"] +galileo = ["agent-control-evaluator-galileo>=7.5.0"] [dependency-groups] dev = [ diff --git a/server/pyproject.toml b/server/pyproject.toml index c2c9f8aa..1d9d6445 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "jsonschema-rs>=0.22.0", "PyJWT>=2.8.0", "google-re2>=1.1", # For engine (bundled) - "agent-control-evaluators>=3.0.0", # NOT vendored - avoid conflict with galileo + "agent-control-evaluators>=7.5.0", # NOT vendored - avoid conflict with galileo ] authors = [ {name = "Agent Control Team"} @@ -31,7 +31,7 @@ readme = "README.md" license = {text = "Apache-2.0"} [project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo>=3.0.0"] +galileo = ["agent-control-evaluator-galileo>=7.5.0"] [dependency-groups] dev = [