From f63b56ff12b8e4b8d4062eb69b415874afa76253 Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Mon, 20 Apr 2026 17:32:28 -0700 Subject: [PATCH 1/3] ci: make root checks contrib-aware --- .github/workflows/ci.yml | 13 ++++----- Makefile | 60 +++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 909b71b4..03aa1b1c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,17 +47,14 @@ jobs: uv --version python --version + - name: Verify contrib packaging contract + run: python3 scripts/contrib_packages.py verify + - name: Sync dependencies run: make sync - - name: Lint - run: make lint - - - name: Type check - run: make typecheck - - - name: Test with coverage - run: make test + - name: Run Python checks + run: make check - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 diff --git a/Makefile b/Makefile index e11ac6a0..16b6f4f6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build galileo-test galileo-lint galileo-lint-fix galileo-typecheck galileo-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish +.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish # Workspace package names PACK_MODELS := agent-control-models @@ -17,8 +17,16 @@ TS_SDK_DIR := sdks/typescript ENGINE_DIR := engine TELEMETRY_DIR := telemetry EVALUATORS_DIR := evaluators/builtin -GALILEO_DIR := evaluators/contrib/galileo +CONTRIB_DIR := evaluators/contrib UI_DIR := ui +CONTRIB_PACKAGE_NAMES := $(shell python3 scripts/contrib_packages.py names) + +define run-contrib-target + @set -e; \ + for package in $(CONTRIB_PACKAGE_NAMES); do \ + $(MAKE) -C $(CONTRIB_DIR)/$$package $(1); \ + done +endef help: @echo "Agent Control - Makefile commands" @@ -33,10 +41,10 @@ help: @echo " make openapi-spec-check - verify OpenAPI generation succeeds" @echo "" @echo "Test:" - @echo " make test - run tests for core packages (models, telemetry, server, engine, sdk, evaluators)" + @echo " make test - run tests for core packages and all discovered contrib evaluators" @echo " make models-test - run shared model tests with coverage" - @echo " make test-extras - run tests for contrib evaluators (galileo, etc.)" - @echo " make test-all - run all tests (core + extras)" + @echo " make test-extras - run tests for all discovered contrib evaluators" + @echo " make test-all - alias for make test" @echo " make sdk-ts-test - run TypeScript SDK tests" @echo "" @echo "Quality:" @@ -84,7 +92,7 @@ openapi-spec-check: openapi-spec # Test # --------------------------- -test: models-test telemetry-test server-test engine-test sdk-test evaluators-test +test: models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test models-test: cd $(MODELS_DIR) && uv run pytest --cov=src --cov-report=xml:../coverage-models.xml -q @@ -94,11 +102,11 @@ test-models: models-test telemetry-test: $(MAKE) -C $(TELEMETRY_DIR) test -# Run tests for contrib evaluators (not included in default test target) -test-extras: galileo-test +# Run tests for discovered contrib evaluators +test-extras: contrib-test -# Run all tests (core + extras) -test-all: test test-extras +# Run all tests (alias for test) +test-all: test # Run tests, lint, and typecheck check: test lint typecheck @@ -107,17 +115,17 @@ check: test lint typecheck # Quality # --------------------------- -lint: engine-lint telemetry-lint evaluators-lint +lint: engine-lint telemetry-lint evaluators-lint contrib-lint uv run --package $(PACK_MODELS) ruff check --config pyproject.toml models/src uv run --package $(PACK_SERVER) ruff check --config pyproject.toml server/src uv run --package $(PACK_SDK) ruff check --config pyproject.toml sdks/python/src -lint-fix: engine-lint-fix telemetry-lint-fix evaluators-lint-fix +lint-fix: engine-lint-fix telemetry-lint-fix evaluators-lint-fix contrib-lint-fix uv run --package $(PACK_MODELS) ruff check --config pyproject.toml --fix models/src uv run --package $(PACK_SERVER) ruff check --config pyproject.toml --fix server/src uv run --package $(PACK_SDK) ruff check --config pyproject.toml --fix sdks/python/src -typecheck: engine-typecheck telemetry-typecheck evaluators-typecheck +typecheck: engine-typecheck telemetry-typecheck evaluators-typecheck contrib-typecheck uv run --package $(PACK_MODELS) mypy --config-file pyproject.toml models/src uv run --package $(PACK_SERVER) mypy --config-file pyproject.toml server/src uv run --package $(PACK_SDK) mypy --config-file pyproject.toml sdks/python/src @@ -135,7 +143,7 @@ telemetry-typecheck: # Build / Publish # --------------------------- -build: build-models build-server build-sdk engine-build telemetry-build evaluators-build +build: build-models build-server build-sdk engine-build telemetry-build evaluators-build contrib-build build-models: cd $(MODELS_DIR) && uv build @@ -246,21 +254,17 @@ server-%: ui-%: $(MAKE) -C $(UI_DIR) $(patsubst ui-%,%,$@) -# --------------------------- -# Contrib Evaluators (Galileo) -# --------------------------- - -galileo-test: - $(MAKE) -C $(GALILEO_DIR) test +contrib-test: + $(call run-contrib-target,test) -galileo-lint: - $(MAKE) -C $(GALILEO_DIR) lint +contrib-lint: + $(call run-contrib-target,lint) -galileo-lint-fix: - $(MAKE) -C $(GALILEO_DIR) lint-fix +contrib-lint-fix: + $(call run-contrib-target,lint-fix) -galileo-typecheck: - $(MAKE) -C $(GALILEO_DIR) typecheck +contrib-typecheck: + $(call run-contrib-target,typecheck) -galileo-build: - $(MAKE) -C $(GALILEO_DIR) build +contrib-build: + $(call run-contrib-target,build) From a9d60e15f9a221ce2b3bf5602a90c553af7ae79f Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Mon, 20 Apr 2026 18:07:02 -0700 Subject: [PATCH 2/3] fix: defer contrib discovery in root makefile --- Makefile | 12 +- scripts/tests/test_contrib_packages.py | 166 +++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 4 deletions(-) create mode 100644 scripts/tests/test_contrib_packages.py diff --git a/Makefile b/Makefile index 16b6f4f6..79af11ba 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish +.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all scripts-test models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish # Workspace package names PACK_MODELS := agent-control-models @@ -19,11 +19,11 @@ TELEMETRY_DIR := telemetry EVALUATORS_DIR := evaluators/builtin CONTRIB_DIR := evaluators/contrib UI_DIR := ui -CONTRIB_PACKAGE_NAMES := $(shell python3 scripts/contrib_packages.py names) define run-contrib-target @set -e; \ - for package in $(CONTRIB_PACKAGE_NAMES); do \ + packages=$$(python3 scripts/contrib_packages.py names); \ + for package in $$packages; do \ $(MAKE) -C $(CONTRIB_DIR)/$$package $(1); \ done endef @@ -42,6 +42,7 @@ help: @echo "" @echo "Test:" @echo " make test - run tests for core packages and all discovered contrib evaluators" + @echo " make scripts-test - run root contrib packaging contract tests" @echo " make models-test - run shared model tests with coverage" @echo " make test-extras - run tests for all discovered contrib evaluators" @echo " make test-all - alias for make test" @@ -92,7 +93,10 @@ openapi-spec-check: openapi-spec # Test # --------------------------- -test: models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test +test: scripts-test models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test + +scripts-test: + uv run --with pytest pytest scripts/tests -q models-test: cd $(MODELS_DIR) && uv run pytest --cov=src --cov-report=xml:../coverage-models.xml -q diff --git a/scripts/tests/test_contrib_packages.py b/scripts/tests/test_contrib_packages.py new file mode 100644 index 00000000..82ddb486 --- /dev/null +++ b/scripts/tests/test_contrib_packages.py @@ -0,0 +1,166 @@ +"""Tests for contrib package discovery and verification.""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from textwrap import dedent +from types import ModuleType + +import pytest + +SCRIPT_PATH = Path(__file__).resolve().parents[1] / "contrib_packages.py" + + +def _load_module() -> ModuleType: + """Load the contrib package script as a module for testing.""" + + module_name = "contrib_packages_under_test" + spec = importlib.util.spec_from_file_location(module_name, SCRIPT_PATH) + if spec is None or spec.loader is None: + raise RuntimeError(f"Unable to load module from {SCRIPT_PATH}") + + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + return module + + +def _write_text(path: Path, contents: str) -> None: + """Write a text file, creating parent directories first.""" + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(dedent(contents).strip() + "\n") + + +def _write_fake_repo( + root: Path, + *, + include_version_entry: bool = True, + include_builtin_extra: bool = True, + include_builtin_source: bool = True, +) -> None: + """Create a minimal repo layout that exercises contrib package wiring.""" + + version_entry = ( + '"evaluators/contrib/example/pyproject.toml:project.version"' + if include_version_entry + else "" + ) + extra_entry = ( + 'example = ["agent-control-evaluator-example>=1.0.0"]' + if include_builtin_extra + else "" + ) + source_entry = ( + 'agent-control-evaluator-example = { path = "../contrib/example", editable = true }' + if include_builtin_source + else "" + ) + + _write_text( + root / "pyproject.toml", + f""" + [project] + name = "agent-control" + version = "1.0.0" + + [tool.semantic_release] + version_toml = [ + "pyproject.toml:project.version", + {version_entry} + ] + """, + ) + _write_text( + root / "evaluators" / "builtin" / "pyproject.toml", + f""" + [project] + name = "agent-control-evaluators" + version = "1.0.0" + + [project.optional-dependencies] + dev = [] + {extra_entry} + + [tool.uv.sources] + agent-control-models = {{ workspace = true }} + {source_entry} + """, + ) + _write_text( + root / "evaluators" / "contrib" / "example" / "pyproject.toml", + """ + [project] + name = "agent-control-evaluator-example" + version = "1.0.0" + + [project.entry-points."agent_control.evaluators"] + example = "agent_control_evaluator_example:ExampleEvaluator" + """, + ) + + +def test_discover_contrib_packages_skips_template_and_non_packages( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + # Given: a fake repo with one real contrib package plus ignored directories + module = _load_module() + repo_root = tmp_path / "repo" + _write_fake_repo(repo_root) + (repo_root / "evaluators" / "contrib" / "template").mkdir(parents=True) + (repo_root / "evaluators" / "contrib" / "notes").mkdir(parents=True) + monkeypatch.setattr(module, "REPO_ROOT", repo_root) + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + + # When: discovering contrib packages + packages = module.discover_contrib_packages() + + # Then: only the real package is returned + assert [package.name for package in packages] == ["example"] + assert packages[0].directory == "evaluators/contrib/example" + assert packages[0].package == "agent-control-evaluator-example" + + +def test_verify_contrib_packages_reports_missing_root_and_builtin_wiring( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + # Given: a contrib package that is missing version, extra, and source wiring + module = _load_module() + repo_root = tmp_path / "repo" + _write_fake_repo( + repo_root, + include_version_entry=False, + include_builtin_extra=False, + include_builtin_source=False, + ) + monkeypatch.setattr(module, "REPO_ROOT", repo_root) + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + + # When: verifying the contrib package wiring + packages = module.discover_contrib_packages() + errors = module.verify_contrib_packages(packages) + + # Then: the missing contract pieces are reported explicitly + assert any("Missing semantic-release version wiring" in error for error in errors) + assert any("Missing builtin extra" in error for error in errors) + assert any("Missing uv source" in error for error in errors) + + +def test_verify_contrib_packages_accepts_complete_wiring( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + # Given: a contrib package with complete root and builtin wiring + module = _load_module() + repo_root = tmp_path / "repo" + _write_fake_repo(repo_root) + monkeypatch.setattr(module, "REPO_ROOT", repo_root) + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + + # When: verifying the contrib package wiring + packages = module.discover_contrib_packages() + errors = module.verify_contrib_packages(packages) + + # Then: the wiring is accepted without errors + assert errors == [] From b86e76dcc3090cdf740ff8bf5ebb9a792267932b Mon Sep 17 00:00:00 2001 From: Lev Neiman Date: Mon, 20 Apr 2026 18:32:30 -0700 Subject: [PATCH 3/3] fix(ci): align contrib verification with root checks --- .github/workflows/ci.yml | 4 ++-- Makefile | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03aa1b1c..24ce1e83 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,7 +48,7 @@ jobs: python --version - name: Verify contrib packaging contract - run: python3 scripts/contrib_packages.py verify + run: make contrib-verify - name: Sync dependencies run: make sync @@ -59,7 +59,7 @@ jobs: - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: - files: coverage-models.xml,coverage-engine.xml,coverage-telemetry.xml,coverage-server.xml,coverage-sdk.xml + files: coverage-models.xml,coverage-engine.xml,coverage-telemetry.xml,coverage-server.xml,coverage-sdk.xml,coverage-evaluators-budget.xml,coverage-evaluators-cisco.xml,coverage-evaluators-galileo.xml fail_ci_if_error: false token: ${{ secrets.CODECOV_TOKEN }} diff --git a/Makefile b/Makefile index 79af11ba..75901ef6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all scripts-test models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish +.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all contrib-verify scripts-test models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish # Workspace package names PACK_MODELS := agent-control-models @@ -42,6 +42,7 @@ help: @echo "" @echo "Test:" @echo " make test - run tests for core packages and all discovered contrib evaluators" + @echo " make contrib-verify - verify root contrib packaging contract wiring" @echo " make scripts-test - run root contrib packaging contract tests" @echo " make models-test - run shared model tests with coverage" @echo " make test-extras - run tests for all discovered contrib evaluators" @@ -93,7 +94,10 @@ openapi-spec-check: openapi-spec # Test # --------------------------- -test: scripts-test models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test +test: contrib-verify scripts-test models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test + +contrib-verify: + python3 scripts/contrib_packages.py verify scripts-test: uv run --with pytest pytest scripts/tests -q