test: route-versioning audit + OTel semconv backstop tests (#130, #132, #147) (#75)

constk · web-flow · commit 4de2a0790cbb · 2026-05-01T23:58:41.000+10:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "harness-python-react"
-version = "0.2.4"
+version = "0.2.5"
 description = "Production-quality LLM-driven coding harness — Python (FastAPI) backend, Vite + React + TypeScript frontend."
 readme = "README.md"
 requires-python = ">=3.14"
diff --git a/tests/test_otel_semconv.py b/tests/test_otel_semconv.py
@@ -0,0 +1,225 @@
+"""Assertions that emitted spans use OTel semantic-convention attribute names.
+
+Test-side enforcement of the rule "use the official OTel semconv attribute
+names where one exists" (see `src/observability/spans.py` and
+`docs/INVARIANTS.md`).
+
+The scaffold ships only semconv-defined keys (no custom ones); the
+`ALLOWED_CUSTOM_KEYS` set below is intentionally empty. When a project
+that extends this template needs a custom attribute (e.g.
+`agent.tool_calls_count`), it MUST add a constant to
+`src/observability/spans.py` AND register it in `ALLOWED_CUSTOM_KEYS` —
+the registry-closure test fails otherwise. That's the forcing function
+that keeps custom keys to their documented set rather than letting them
+proliferate as one-off raw strings.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from opentelemetry import trace
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
+)
+
+from src.observability import spans as spans_module
+from src.observability.spans import (
+    DB_QUERY_TEXT,
+    DB_RESPONSE_RETURNED_ROWS,
+    GENAI_CONVERSATION_ID,
+    GENAI_OPERATION_NAME,
+    GENAI_PROVIDER_NAME,
+    GENAI_REQUEST_MODEL,
+    GENAI_TOOL_CALL_ARGUMENTS,
+    GENAI_TOOL_CALL_RESULT,
+    GENAI_TOOL_NAME,
+    GENAI_USAGE_INPUT_TOKENS,
+    GENAI_USAGE_OUTPUT_TOKENS,
+    agent_span,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from opentelemetry.sdk.trace import ReadableSpan
+
+
+SEMCONV_KEYS: frozenset[str] = frozenset(
+    {
+        GENAI_CONVERSATION_ID,
+        GENAI_REQUEST_MODEL,
+        GENAI_OPERATION_NAME,
+        GENAI_PROVIDER_NAME,
+        GENAI_USAGE_INPUT_TOKENS,
+        GENAI_USAGE_OUTPUT_TOKENS,
+        GENAI_TOOL_NAME,
+        GENAI_TOOL_CALL_ARGUMENTS,
+        GENAI_TOOL_CALL_RESULT,
+        DB_QUERY_TEXT,
+        DB_RESPONSE_RETURNED_ROWS,
+    }
+)
+
+# Custom keys allowed because no semconv equivalent exists. Empty in the
+# scaffold — projects that extend this template add entries here as they
+# add custom span attributes (and constants in `src/observability/spans.py`).
+ALLOWED_CUSTOM_KEYS: frozenset[str] = frozenset()
+
+ACCEPTED_KEYS: frozenset[str] = SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS
+
+
+@pytest.fixture
+def memory_exporter(monkeypatch: pytest.MonkeyPatch) -> InMemorySpanExporter:
+    """Install a TracerProvider that records spans into an in-memory exporter.
+
+    Uses `monkeypatch.setattr` to swap `trace.get_tracer_provider` for the
+    test's lifetime instead of `trace.set_tracer_provider` — the OTel SDK
+    refuses to override the global provider once any other test has called
+    `set_tracer_provider`, which would silently route spans to the wrong
+    exporter.
+
+    Uses `SimpleSpanProcessor` so spans are visible to `get_finished_spans()`
+    immediately after the span context exits — no flush race.
+    """
+    exporter = InMemorySpanExporter()
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(exporter))
+    monkeypatch.setattr(trace, "get_tracer_provider", lambda: provider)
+    return exporter
+
+
+def _span_by_name(spans: Sequence[ReadableSpan], name: str) -> ReadableSpan:
+    """Return the unique span with the given name (assert exactly one)."""
+    matches = [s for s in spans if s.name == name]
+    assert len(matches) == 1, (
+        f"expected exactly one span named {name!r}, got {len(matches)} "
+        f"(all names: {[s.name for s in spans]})"
+    )
+    return matches[0]
+
+
+def test_tool_call_span_carries_genai_tool_attributes(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """A tool-call span carries the documented `gen_ai.tool.*` keys."""
+    tool_attrs: dict[str, str | int] = {
+        GENAI_TOOL_NAME: "echo",
+        GENAI_TOOL_CALL_ARGUMENTS: '{"msg": "hi"}',
+        GENAI_TOOL_CALL_RESULT: '{"echoed": "hi"}',
+    }
+    with agent_span("tool.echo", tool_attrs):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+    tool_span = _span_by_name(spans, "tool.echo")
+    assert tool_span.attributes is not None
+    keys = set(tool_span.attributes)
+
+    assert GENAI_TOOL_NAME in keys
+    assert GENAI_TOOL_CALL_ARGUMENTS in keys
+    assert GENAI_TOOL_CALL_RESULT in keys
+    assert tool_span.attributes[GENAI_TOOL_NAME] == "echo"
+
+
+def test_llm_call_span_carries_genai_attributes(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """An LLM-style span carries the documented `gen_ai.*` keys + token usage."""
+    llm_attrs: dict[str, str | int] = {
+        GENAI_CONVERSATION_ID: "session-test-1",
+        GENAI_REQUEST_MODEL: "gpt-4o-mini",
+        GENAI_OPERATION_NAME: "chat",
+        GENAI_PROVIDER_NAME: "openai",
+        GENAI_USAGE_INPUT_TOKENS: 42,
+        GENAI_USAGE_OUTPUT_TOKENS: 100,
+    }
+    with agent_span("agent.llm_call", llm_attrs):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+    llm_span = _span_by_name(spans, "agent.llm_call")
+    assert llm_span.attributes is not None
+
+    for key, value in llm_attrs.items():
+        assert key in llm_span.attributes, f"missing {key}"
+        assert llm_span.attributes[key] == value
+
+
+def test_db_query_span_carries_db_attributes(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """A DB-query span carries `db.query.text` + `db.response.returned_rows`."""
+    db_attrs: dict[str, str | int] = {
+        DB_QUERY_TEXT: "SELECT 1",
+        DB_RESPONSE_RETURNED_ROWS: 1,
+    }
+    with agent_span("db.example_query", db_attrs):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+    db_span = _span_by_name(spans, "db.example_query")
+    assert db_span.attributes is not None
+    keys = set(db_span.attributes)
+
+    assert DB_QUERY_TEXT in keys
+    assert DB_RESPONSE_RETURNED_ROWS in keys
+
+
+def test_no_unregistered_attribute_keys_emitted(
+    memory_exporter: InMemorySpanExporter,
+) -> None:
+    """Every emitted attribute key is in `SEMCONV_KEYS` or `ALLOWED_CUSTOM_KEYS`."""
+    with agent_span(
+        "tool.echo",
+        {
+            GENAI_TOOL_NAME: "echo",
+            GENAI_TOOL_CALL_ARGUMENTS: "{}",
+            GENAI_TOOL_CALL_RESULT: "{}",
+        },
+    ):
+        pass
+
+    spans = memory_exporter.get_finished_spans()
+
+    rogue: list[tuple[str, str]] = []
+    for span in spans:
+        if span.attributes is None:
+            continue
+        for key in span.attributes:
+            if key not in ACCEPTED_KEYS:
+                rogue.append((span.name, key))
+
+    assert not rogue, (
+        "Span attributes emitted with keys outside the documented registry: "
+        f"{rogue}. Either use a semconv name from `src/observability/spans.py` "
+        "or add the new key to both `spans.py` (with a comment explaining why "
+        "no semconv equivalent applies) and `ALLOWED_CUSTOM_KEYS` in this test."
+    )
+
+
+def test_spans_module_constants_are_registered() -> None:
+    """Every uppercase `str` constant in `spans.py` belongs to the test allowlist.
+
+    The allowlist is `SEMCONV_KEYS | ALLOWED_CUSTOM_KEYS`. Catches the
+    "added a new constant in `spans.py` without updating the test" drift class.
+    If this fails, either:
+
+    - the new constant is a semconv name → add it to `SEMCONV_KEYS`.
+    - the new constant is a custom name → add it to `ALLOWED_CUSTOM_KEYS`
+      AND ensure the comment in `spans.py` explains why no semconv applies.
+    """
+    declared = {
+        getattr(spans_module, name)
+        for name in dir(spans_module)
+        if name.isupper() and isinstance(getattr(spans_module, name), str)
+    }
+    unregistered = declared - ACCEPTED_KEYS
+    assert not unregistered, (
+        f"`spans.py` defines constants not in the test allowlist: "
+        f"{sorted(unregistered)!r}. Update `SEMCONV_KEYS` or "
+        "`ALLOWED_CUSTOM_KEYS` in this test."
+    )
diff --git a/tests/test_route_versioning.py b/tests/test_route_versioning.py
@@ -0,0 +1,105 @@
+"""Audit every FastAPI route is `/api/v*/...` or in the documented allowlist.
+
+Backstops invariant 7 in `docs/INVARIANTS.md` (was *aspirational on test*).
+A future contributor adding `@app.get("/foo")` instead of registering the
+endpoint on the versioned `APIRouter(prefix="/api/v1")` would silently
+introduce an un-versioned route — breaking the API-versioning contract from
+`docs/DEVELOPMENT.md` without any gate firing. This test is the gate.
+"""
+
+from __future__ import annotations
+
+import re
+
+from src.api.main import app
+
+VERSIONED_PREFIX = re.compile(r"^/api/v\d+/")
+
+# Routes deliberately NOT under `/api/v*/`. Add an entry here only with a
+# comment naming the structural reason — never just "we needed an exception".
+UNVERSIONED_ALLOWLIST: frozenset[str] = frozenset(
+    {
+        # FastAPI auto-generated documentation surface. Disabled by passing
+        # docs_url=None / redoc_url=None to FastAPI() if needed; the
+        # scaffold keeps them enabled at the unversioned root.
+        "/openapi.json",
+        "/docs",
+        "/docs/oauth2-redirect",
+        "/redoc",
+    }
+)
+
+
+def test_every_route_is_versioned_or_allowlisted() -> None:
+    """Every routable path is `/api/v*/` or in the explicit allowlist."""
+    offenders: list[str] = []
+    for route in app.routes:
+        path = getattr(route, "path", None)
+        if path is None:
+            # Not an APIRoute (e.g. Mount). Skip cleanly — `path` is the
+            # discriminator we care about.
+            continue
+        if path in UNVERSIONED_ALLOWLIST:
+            continue
+        if not VERSIONED_PREFIX.match(path):
+            offenders.append(path)
+
+    assert not offenders, (
+        f"un-versioned route(s) detected in app.routes: {offenders!r}. Per "
+        "docs/DEVELOPMENT.md API Versioning, every routable path must match "
+        "`/api/v*/<…>` or be added to UNVERSIONED_ALLOWLIST in this test "
+        "with a comment naming the structural reason. Common entry points "
+        'for an un-versioned path: a router wired without `prefix="/api/vN"`, '
+        "an `@app.<verb>` decorator on the FastAPI app instead of a versioned "
+        "APIRouter, a manual `app.add_api_route(...)` call, or a Mount that "
+        "exposes paths at the unversioned root. Fix at the wiring site or, "
+        "if the path is genuinely unversioned by design (cf. `/api/health`), "
+        "add it to the allowlist."
+    )
+
+
+def test_allowlist_has_no_dead_entries() -> None:
+    """Every UNVERSIONED_ALLOWLIST entry must correspond to a real route.
+
+    Without this assertion, removing a docs surface (e.g. constructing
+    FastAPI with `docs_url=None`) leaves `/docs` in the allowlist as cruft;
+    over time the allowlist becomes a graveyard that masks what is
+    *actually* deliberately unversioned. Any drop-out fails fast and forces
+    a deletion in the same PR.
+    """
+    actual_paths = {getattr(r, "path", None) for r in app.routes}
+    actual_paths.discard(None)
+    dead = sorted(UNVERSIONED_ALLOWLIST - actual_paths)
+    assert not dead, (
+        f"UNVERSIONED_ALLOWLIST entries no longer present in app.routes: "
+        f"{dead}. Remove the dead entry — keeping it is allowlist cruft. "
+        "If the path was intentionally removed (e.g. FastAPI docs disabled "
+        "via `docs_url=None`), drop the corresponding allowlist line in "
+        "this same PR so the allowlist tracks reality."
+    )
+
+
+# ---------- Regex sanity checks ----------
+#
+# Belt-and-braces against the allowlist swallowing a typo that the route
+# walk wouldn't catch (e.g. someone mutating the regex to be too permissive).
+
+
+def test_regex_accepts_v1_routes() -> None:
+    assert VERSIONED_PREFIX.match("/api/v1/chat")
+    assert VERSIONED_PREFIX.match("/api/v1/sessions")
+    assert VERSIONED_PREFIX.match("/api/v1/sessions/{session_id}")
+
+
+def test_regex_accepts_future_major_versions() -> None:
+    """`/api/v2/...` and beyond are accepted; the rule isn't v1-only."""
+    assert VERSIONED_PREFIX.match("/api/v2/chat")
+    assert VERSIONED_PREFIX.match("/api/v10/some-endpoint")
+
+
+def test_regex_rejects_unversioned_paths() -> None:
+    assert not VERSIONED_PREFIX.match("/foo")
+    assert not VERSIONED_PREFIX.match("/api/foo")
+    assert not VERSIONED_PREFIX.match("/api/health")  # in allowlist, not regex
+    assert not VERSIONED_PREFIX.match("/api/v/chat")  # missing version digit
+    assert not VERSIONED_PREFIX.match("/api/version1/chat")  # wrong prefix shape
diff --git a/uv.lock b/uv.lock