Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 6 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,19 @@ jobs:
uv --version
python --version

- name: Verify contrib packaging contract
run: make contrib-verify

- name: Sync dependencies
run: make sync

- name: Lint
run: make lint

- name: Type check
run: make typecheck

- name: Test with coverage
run: make test
- name: Run Python checks
run: make check

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
files: coverage-models.xml,coverage-engine.xml,coverage-telemetry.xml,coverage-server.xml,coverage-sdk.xml
files: coverage-models.xml,coverage-engine.xml,coverage-telemetry.xml,coverage-server.xml,coverage-sdk.xml,coverage-evaluators-budget.xml,coverage-evaluators-cisco.xml,coverage-evaluators-galileo.xml
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}

Expand Down
68 changes: 40 additions & 28 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build galileo-test galileo-lint galileo-lint-fix galileo-typecheck galileo-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish
.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all contrib-verify scripts-test models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish

# Workspace package names
PACK_MODELS := agent-control-models
Expand All @@ -17,9 +17,17 @@ TS_SDK_DIR := sdks/typescript
ENGINE_DIR := engine
TELEMETRY_DIR := telemetry
EVALUATORS_DIR := evaluators/builtin
GALILEO_DIR := evaluators/contrib/galileo
CONTRIB_DIR := evaluators/contrib
UI_DIR := ui

define run-contrib-target
@set -e; \
packages=$$(python3 scripts/contrib_packages.py names); \
for package in $$packages; do \
$(MAKE) -C $(CONTRIB_DIR)/$$package $(1); \
done
endef

help:
@echo "Agent Control - Makefile commands"
@echo ""
Expand All @@ -33,10 +41,12 @@ help:
@echo " make openapi-spec-check - verify OpenAPI generation succeeds"
@echo ""
@echo "Test:"
@echo " make test - run tests for core packages (models, telemetry, server, engine, sdk, evaluators)"
@echo " make test - run tests for core packages and all discovered contrib evaluators"
@echo " make contrib-verify - verify root contrib packaging contract wiring"
@echo " make scripts-test - run root contrib packaging contract tests"
@echo " make models-test - run shared model tests with coverage"
@echo " make test-extras - run tests for contrib evaluators (galileo, etc.)"
@echo " make test-all - run all tests (core + extras)"
@echo " make test-extras - run tests for all discovered contrib evaluators"
@echo " make test-all - alias for make test"
@echo " make sdk-ts-test - run TypeScript SDK tests"
@echo ""
@echo "Quality:"
Expand Down Expand Up @@ -84,7 +94,13 @@ openapi-spec-check: openapi-spec
# Test
# ---------------------------

test: models-test telemetry-test server-test engine-test sdk-test evaluators-test
test: contrib-verify scripts-test models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test

contrib-verify:
python3 scripts/contrib_packages.py verify

scripts-test:
uv run --with pytest pytest scripts/tests -q

models-test:
cd $(MODELS_DIR) && uv run pytest --cov=src --cov-report=xml:../coverage-models.xml -q
Expand All @@ -94,11 +110,11 @@ test-models: models-test
telemetry-test:
$(MAKE) -C $(TELEMETRY_DIR) test

# Run tests for contrib evaluators (not included in default test target)
test-extras: galileo-test
# Run tests for discovered contrib evaluators
test-extras: contrib-test

# Run all tests (core + extras)
test-all: test test-extras
# Run all tests (alias for test)
test-all: test

# Run tests, lint, and typecheck
check: test lint typecheck
Expand All @@ -107,17 +123,17 @@ check: test lint typecheck
# Quality
# ---------------------------

lint: engine-lint telemetry-lint evaluators-lint
lint: engine-lint telemetry-lint evaluators-lint contrib-lint
uv run --package $(PACK_MODELS) ruff check --config pyproject.toml models/src
uv run --package $(PACK_SERVER) ruff check --config pyproject.toml server/src
uv run --package $(PACK_SDK) ruff check --config pyproject.toml sdks/python/src

lint-fix: engine-lint-fix telemetry-lint-fix evaluators-lint-fix
lint-fix: engine-lint-fix telemetry-lint-fix evaluators-lint-fix contrib-lint-fix
uv run --package $(PACK_MODELS) ruff check --config pyproject.toml --fix models/src
uv run --package $(PACK_SERVER) ruff check --config pyproject.toml --fix server/src
uv run --package $(PACK_SDK) ruff check --config pyproject.toml --fix sdks/python/src

typecheck: engine-typecheck telemetry-typecheck evaluators-typecheck
typecheck: engine-typecheck telemetry-typecheck evaluators-typecheck contrib-typecheck
uv run --package $(PACK_MODELS) mypy --config-file pyproject.toml models/src
uv run --package $(PACK_SERVER) mypy --config-file pyproject.toml server/src
uv run --package $(PACK_SDK) mypy --config-file pyproject.toml sdks/python/src
Expand All @@ -135,7 +151,7 @@ telemetry-typecheck:
# Build / Publish
# ---------------------------

build: build-models build-server build-sdk engine-build telemetry-build evaluators-build
build: build-models build-server build-sdk engine-build telemetry-build evaluators-build contrib-build

build-models:
cd $(MODELS_DIR) && uv build
Expand Down Expand Up @@ -246,21 +262,17 @@ server-%:
ui-%:
$(MAKE) -C $(UI_DIR) $(patsubst ui-%,%,$@)

# ---------------------------
# Contrib Evaluators (Galileo)
# ---------------------------

galileo-test:
$(MAKE) -C $(GALILEO_DIR) test
contrib-test:
$(call run-contrib-target,test)

galileo-lint:
$(MAKE) -C $(GALILEO_DIR) lint
contrib-lint:
$(call run-contrib-target,lint)

galileo-lint-fix:
$(MAKE) -C $(GALILEO_DIR) lint-fix
contrib-lint-fix:
$(call run-contrib-target,lint-fix)

galileo-typecheck:
$(MAKE) -C $(GALILEO_DIR) typecheck
contrib-typecheck:
$(call run-contrib-target,typecheck)

galileo-build:
$(MAKE) -C $(GALILEO_DIR) build
contrib-build:
$(call run-contrib-target,build)
166 changes: 166 additions & 0 deletions scripts/tests/test_contrib_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""Tests for contrib package discovery and verification."""

from __future__ import annotations

import importlib.util
import sys
from pathlib import Path
from textwrap import dedent
from types import ModuleType

import pytest

SCRIPT_PATH = Path(__file__).resolve().parents[1] / "contrib_packages.py"


def _load_module() -> ModuleType:
"""Load the contrib package script as a module for testing."""

module_name = "contrib_packages_under_test"
spec = importlib.util.spec_from_file_location(module_name, SCRIPT_PATH)
if spec is None or spec.loader is None:
raise RuntimeError(f"Unable to load module from {SCRIPT_PATH}")

module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module


def _write_text(path: Path, contents: str) -> None:
"""Write a text file, creating parent directories first."""

path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(dedent(contents).strip() + "\n")


def _write_fake_repo(
root: Path,
*,
include_version_entry: bool = True,
include_builtin_extra: bool = True,
include_builtin_source: bool = True,
) -> None:
"""Create a minimal repo layout that exercises contrib package wiring."""

version_entry = (
'"evaluators/contrib/example/pyproject.toml:project.version"'
if include_version_entry
else ""
)
extra_entry = (
'example = ["agent-control-evaluator-example>=1.0.0"]'
if include_builtin_extra
else ""
)
source_entry = (
'agent-control-evaluator-example = { path = "../contrib/example", editable = true }'
if include_builtin_source
else ""
)

_write_text(
root / "pyproject.toml",
f"""
[project]
name = "agent-control"
version = "1.0.0"

[tool.semantic_release]
version_toml = [
"pyproject.toml:project.version",
{version_entry}
]
""",
)
_write_text(
root / "evaluators" / "builtin" / "pyproject.toml",
f"""
[project]
name = "agent-control-evaluators"
version = "1.0.0"

[project.optional-dependencies]
dev = []
{extra_entry}

[tool.uv.sources]
agent-control-models = {{ workspace = true }}
{source_entry}
""",
)
_write_text(
root / "evaluators" / "contrib" / "example" / "pyproject.toml",
"""
[project]
name = "agent-control-evaluator-example"
version = "1.0.0"

[project.entry-points."agent_control.evaluators"]
example = "agent_control_evaluator_example:ExampleEvaluator"
""",
)


def test_discover_contrib_packages_skips_template_and_non_packages(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
# Given: a fake repo with one real contrib package plus ignored directories
module = _load_module()
repo_root = tmp_path / "repo"
_write_fake_repo(repo_root)
(repo_root / "evaluators" / "contrib" / "template").mkdir(parents=True)
(repo_root / "evaluators" / "contrib" / "notes").mkdir(parents=True)
monkeypatch.setattr(module, "REPO_ROOT", repo_root)
monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib")

# When: discovering contrib packages
packages = module.discover_contrib_packages()

# Then: only the real package is returned
assert [package.name for package in packages] == ["example"]
assert packages[0].directory == "evaluators/contrib/example"
assert packages[0].package == "agent-control-evaluator-example"


def test_verify_contrib_packages_reports_missing_root_and_builtin_wiring(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
# Given: a contrib package that is missing version, extra, and source wiring
module = _load_module()
repo_root = tmp_path / "repo"
_write_fake_repo(
repo_root,
include_version_entry=False,
include_builtin_extra=False,
include_builtin_source=False,
)
monkeypatch.setattr(module, "REPO_ROOT", repo_root)
monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib")

# When: verifying the contrib package wiring
packages = module.discover_contrib_packages()
errors = module.verify_contrib_packages(packages)

# Then: the missing contract pieces are reported explicitly
assert any("Missing semantic-release version wiring" in error for error in errors)
assert any("Missing builtin extra" in error for error in errors)
assert any("Missing uv source" in error for error in errors)


def test_verify_contrib_packages_accepts_complete_wiring(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
# Given: a contrib package with complete root and builtin wiring
module = _load_module()
repo_root = tmp_path / "repo"
_write_fake_repo(repo_root)
monkeypatch.setattr(module, "REPO_ROOT", repo_root)
monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib")

# When: verifying the contrib package wiring
packages = module.discover_contrib_packages()
errors = module.verify_contrib_packages(packages)

# Then: the wiring is accepted without errors
assert errors == []
Loading