From 2873454f13e4191f50035e3abdd2cd48beeaffe3 Mon Sep 17 00:00:00 2001 From: Saurabh Jain Date: Sat, 25 Apr 2026 02:09:52 +0200 Subject: [PATCH 1/6] ci(wire-shape): block Java @JsonProperty drift from OpenAPI specs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a CI gate that fails any PR introducing drift between Java @JsonProperty annotations and the OpenAPI specs pinned at tests/fixtures/wire-shape-baseline.json::openapi_specs_sha. Four gates: 1. Cross-spec schema divergence — same schema name declared with different shapes across spec files. 2. Intra-file duplicates — same schema name declared twice in one spec file (PolicyMatch in orchestrator-api.yaml is the existing example, baselined for now). 3. Per-type SDK-vs-spec drift — wire field names diff between Java @JsonProperty and the spec, baseline-aware. 4. Registered-type rename-escape — types in the baseline that disappear from either side fail the gate. The pinned spec SHA is itself guarded by a `spec-pin-bump` PR label so a single PR can't both move the SHA and silence drift. Source discovery walks brace depth on string- and comment-stripped text so annotations are attributed to the innermost enclosing class/record/interface/enum rather than the file's outer class. Without this, types like WorkflowTypes.CreateWorkflowRequest and WorkflowTypes.RetryContext (10+ wire types nested inside the WorkflowTypes namespace class) would silently escape coverage. Initial baseline at SHA bf1ca22: - 70 registered Java<->OpenAPI type pairs - 39 per-type drift entries (burndown follow-ups) - 8 cross-spec divergences (platform-side reconciliation tracked separately) - 1 intra-file duplicate (PolicyMatch) Mirrors the Python, Go, and TypeScript wire-shape gates. Re-baseline: python3 scripts/wire_shape/refresh.py /path/to/community/docs/api --- .github/workflows/wire-shape-contract.yml | 120 ++++ CHANGELOG.md | 1 + scripts/wire_shape/lib.py | 348 ++++++++++++ scripts/wire_shape/refresh.py | 130 +++++ scripts/wire_shape/validate.py | 233 ++++++++ tests/fixtures/wire-shape-baseline.json | 656 ++++++++++++++++++++++ 6 files changed, 1488 insertions(+) create mode 100644 .github/workflows/wire-shape-contract.yml create mode 100755 scripts/wire_shape/lib.py create mode 100755 scripts/wire_shape/refresh.py create mode 100755 scripts/wire_shape/validate.py create mode 100644 tests/fixtures/wire-shape-baseline.json diff --git a/.github/workflows/wire-shape-contract.yml b/.github/workflows/wire-shape-contract.yml new file mode 100644 index 0000000..1fae03c --- /dev/null +++ b/.github/workflows/wire-shape-contract.yml @@ -0,0 +1,120 @@ +name: Wire-Shape Contract + +# QF-14 Java arm: blocks drift between Java @JsonProperty-annotated +# classes and the OpenAPI specs that are the authoritative wire +# contract. Runs on every PR and push to main. Drift NOT covered by +# the baseline fails the check. +# +# Specs are fetched from the getaxonflow/axonflow community mirror at +# the SHA recorded in tests/fixtures/wire-shape-baseline.json so the +# gate is deterministic. A 'spec-pin-bump' label is required on PRs +# that change the SHA, preserving review integrity (a PR that both +# moved the SHA and the Java classes could otherwise silence drift). +# +# To regenerate the baseline: +# python3 scripts/wire_shape/refresh.py + +on: + pull_request: + branches: [main] + paths: + - 'src/main/java/**/*.java' + - 'tests/fixtures/wire-shape-baseline.json' + - 'scripts/wire_shape/**' + - '.github/workflows/wire-shape-contract.yml' + push: + branches: [main] + paths: + - 'src/main/java/**/*.java' + - 'tests/fixtures/wire-shape-baseline.json' + - 'scripts/wire_shape/**' + - '.github/workflows/wire-shape-contract.yml' + +permissions: + contents: read + +jobs: + wire-shape: + name: Validate Wire Shape + runs-on: ubuntu-latest + env: + DO_NOT_TRACK: '1' + steps: + - name: Checkout SDK (full history for SHA-bump guard) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Read pinned OpenAPI specs SHA from baseline + id: specs_sha + run: | + python3 - <<'PY' >> "$GITHUB_OUTPUT" + import json + import sys + path = "tests/fixtures/wire-shape-baseline.json" + data = json.load(open(path)) + sha = (data.get("openapi_specs_sha", "") or "").strip() + if not sha: + print( + f"::error::{path} is missing openapi_specs_sha. " + "Regenerate via scripts/wire_shape/refresh.py.", + file=sys.stderr, + ) + sys.exit(1) + print(f"sha={sha}") + PY + + - name: Guard against unauthorized OpenAPI specs SHA bump + if: github.event_name == 'pull_request' + env: + PR_LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }} + BASE_REF: ${{ github.base_ref }} + PR_SHA: ${{ steps.specs_sha.outputs.sha }} + run: | + set -e + BASE_SHA=$( + git show "origin/${BASE_REF}:tests/fixtures/wire-shape-baseline.json" 2>/dev/null \ + | python3 -c "import json, sys; print(json.load(sys.stdin).get('openapi_specs_sha','') or '')" \ + || true + ) + if [ -z "$BASE_SHA" ]; then + echo "::notice::Base branch has no openapi_specs_sha yet; treating this PR as first pin introduction." + exit 0 + fi + if [ "$BASE_SHA" = "$PR_SHA" ]; then + echo "openapi_specs_sha unchanged (${PR_SHA})." + exit 0 + fi + echo "SHA change detected: ${BASE_SHA} -> ${PR_SHA}" + HAS_LABEL=$(printf '%s' "$PR_LABELS" | python3 -c "import json, sys; print('spec-pin-bump' in json.load(sys.stdin))") + if [ "$HAS_LABEL" = "True" ]; then + echo "::notice::'spec-pin-bump' label present — SHA bump authorized." + exit 0 + fi + echo "::error::openapi_specs_sha changed from ${BASE_SHA} to ${PR_SHA}." + echo "::error::The wire-shape contract's spec revision is pinned to preserve" + echo "::error::review integrity: a SHA change in the same PR as SDK changes" + echo "::error::can silence drift by retargeting the contract to a friendlier" + echo "::error::revision. Either split into a dedicated SHA-bump PR, or" + echo "::error::apply the 'spec-pin-bump' label to this PR." + exit 1 + + - name: Checkout OpenAPI specs (pinned to baseline SHA) + uses: actions/checkout@v4 + with: + repository: getaxonflow/axonflow + ref: ${{ steps.specs_sha.outputs.sha }} + path: axonflow-community + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install PyYAML + run: pip install 'pyyaml>=6,<7' + + - name: Run wire-shape contract validator + env: + AXONFLOW_OPENAPI_SPECS_DIR: ${{ github.workspace }}/axonflow-community/docs/api + run: python3 scripts/wire_shape/validate.py diff --git a/CHANGELOG.md b/CHANGELOG.md index fc1d7ff..5d5c225 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - **Version alignment check** (`.github/workflows/validate-version-alignment.yml`). CI now fails any PR or push to `main` where `pom.xml`'s `` drifts from the first released `## [X.Y.Z]` section in `CHANGELOG.md`. Matches the pattern in the platform repo and the Go SDK. +- **Wire-shape contract gate** (`.github/workflows/wire-shape-contract.yml`). CI fails any PR that introduces drift between Java `@JsonProperty` annotations and the OpenAPI specs pinned at `tests/fixtures/wire-shape-baseline.json::openapi_specs_sha`. Four gates: cross-spec schema divergence, intra-file schema duplicates, per-type SDK-vs-spec drift, and registered-type rename-escape. The pinned spec SHA is itself guarded by a `spec-pin-bump` PR label so a single PR can't both move the SHA and silence drift. Source-discovery walks brace depth so nested classes (e.g. `WorkflowTypes.CreateWorkflowRequest`) and inner enums are attributed to the correct type rather than the file's outer class. Mirrors the Python, Go, and TypeScript gates. ## [5.7.0] - 2026-04-22 diff --git a/scripts/wire_shape/lib.py b/scripts/wire_shape/lib.py new file mode 100755 index 0000000..2806c60 --- /dev/null +++ b/scripts/wire_shape/lib.py @@ -0,0 +1,348 @@ +"""Shared helpers for the wire-shape contract CI (QF-14 Java arm). + +Used by: +- scripts/wire_shape/validate.py (the PR-blocking gate) +- scripts/wire_shape/refresh.py (the baseline regenerator) + +Mirrors axonflow-sdk-python's tests/test_wire_shape.py, +axonflow-sdk-go's internal/wireshape package, and axonflow-sdk- +typescript's scripts/wire-shape/lib.js so all four SDKs' gates stay +conceptually aligned. + +Design decisions specific to Java: +- Discovery scans Java source for `class Name { ... }` declarations + and `@JsonProperty("wire_name")` annotations. A naive regex that + only captures the outermost class per file under-covers the SDK: + WorkflowTypes.java alone holds 10+ wire types nested inside an + outer class. The parser below tracks brace depth (after stripping + strings + comments) so annotations are attributed to the innermost + class whose body contains them. +- Avoids the `mvn compile` round-trip in CI and stays dependency-free + beyond PyYAML. If nested-generic or annotation-inside-string edge + cases start causing false positives, switch to `javalang`. +""" + +from __future__ import annotations + +import json +import os +import re +from pathlib import Path +from typing import Any + +import yaml + +REPO_ROOT = Path(__file__).resolve().parent.parent.parent +SRC_MAIN_JAVA = REPO_ROOT / "src" / "main" / "java" +BASELINE_PATH = REPO_ROOT / "tests" / "fixtures" / "wire-shape-baseline.json" + +# Matches a class declaration up to (but not including) the opening +# brace. Works at any brace depth — nesting is handled by the parser +# in discover_sdk_types, not by the regex. `record` and `interface` +# types that declare @JsonProperty are treated like classes. +_CLASS_DECL_RE = re.compile( + r"\b(?:public\s+|final\s+|abstract\s+|static\s+|sealed\s+|non-sealed\s+|private\s+|protected\s+)*" + r"(?:class|record|interface|enum)\s+(\w+)" +) +_JSON_PROPERTY_RE = re.compile(r'@JsonProperty\(\s*"([^"]+)"\s*\)') + + +def load_all_schemas(spec_dir: Path) -> tuple[ + dict[str, list[str]], + dict[str, dict[str, list[str]]], + dict[str, dict[str, int]], +]: + """Load every *.yaml in spec_dir. + + Returns (merged, cross_spec_duplicates, intra_file_duplicates). + + - merged[name] = sorted property names; last-loaded declaration wins + on cross-spec name collision (matches Python/Go/TS behavior). + - cross_spec_duplicates[name] = {spec_file: [fields]} for schemas + declared in >1 file with DIFFERENT shapes. Identical redundant + declarations are benign and filtered out. + - intra_file_duplicates[file][name] = count of declarations for + schemas declared >1 time in a single file. PyYAML's default + SafeLoader collapses duplicate keys silently (the real + PolicyMatch bug in orchestrator-api.yaml); we walk the YAML + mapping tree manually to preserve the count before the collapse. + """ + merged: dict[str, list[str]] = {} + all_decls: dict[str, dict[str, list[str]]] = {} + intra_file_duplicates: dict[str, dict[str, int]] = {} + + for spec_file in sorted(spec_dir.glob("*.yaml")): + text = spec_file.read_text() + # yaml.compose preserves node-level structure (duplicate keys + # show up as separate mapping-pair entries) unlike yaml.safe_load. + root = yaml.compose(text) + schemas_node = _find_schemas_node(root) + if schemas_node is None: + continue + + intra_counts: dict[str, int] = {} + for key_node, value_node in schemas_node.value: + if not isinstance(key_node.value, str): + continue + schema_name = key_node.value + intra_counts[schema_name] = intra_counts.get(schema_name, 0) + 1 + + props_node = _find_mapping_child(value_node, "properties") + if props_node is None: + continue + fields = sorted( + k.value + for k, _ in props_node.value + if isinstance(k.value, str) + ) + if not fields: + continue + if schema_name not in all_decls: + all_decls[schema_name] = {} + all_decls[schema_name][spec_file.name] = fields + merged[schema_name] = fields + + for schema_name, count in intra_counts.items(): + if count > 1: + intra_file_duplicates.setdefault(spec_file.name, {})[schema_name] = count + + cross_spec_duplicates: dict[str, dict[str, list[str]]] = {} + for schema_name, decls in all_decls.items(): + if len(decls) < 2: + continue + shapes = {tuple(v) for v in decls.values()} + if len(shapes) > 1: + cross_spec_duplicates[schema_name] = decls + + return merged, cross_spec_duplicates, intra_file_duplicates + + +def _find_schemas_node(root): + top = root + if top is None: + return None + if top.__class__.__name__ != "MappingNode": + # DocumentNode isn't exposed by yaml.compose at the root; what + # we get is directly the top-level mapping. If for some reason + # it isn't a mapping, bail. + return None + components = _find_mapping_child(top, "components") + if components is None or components.__class__.__name__ != "MappingNode": + return None + schemas = _find_mapping_child(components, "schemas") + if schemas is None or schemas.__class__.__name__ != "MappingNode": + return None + return schemas + + +def _find_mapping_child(mapping_node, key): + for k_node, v_node in mapping_node.value: + if isinstance(k_node.value, str) and k_node.value == key: + return v_node + return None + + +def _strip_strings_and_comments(src: str) -> str: + """Return source with string literals, char literals, // line + comments, and /* block */ comments replaced with equal-length + whitespace. Preserves character offsets so subsequent regex + matches line up with the original text. + """ + out = list(src) + i = 0 + n = len(src) + while i < n: + c = src[i] + nxt = src[i + 1] if i + 1 < n else "" + # Line comment. + if c == "/" and nxt == "/": + j = i + while j < n and src[j] != "\n": + out[j] = " " + j += 1 + i = j + continue + # Block comment. + if c == "/" and nxt == "*": + j = i + out[j] = " " + out[j + 1] = " " + j += 2 + while j < n - 1 and not (src[j] == "*" and src[j + 1] == "/"): + if src[j] != "\n": + out[j] = " " + j += 1 + if j < n - 1: + out[j] = " " + out[j + 1] = " " + j += 2 + i = j + continue + # String / char literal. Handles escapes; skips newlines (text + # blocks in Java 15+ break this but none of our SDK types use them). + if c == '"' or c == "'": + quote = c + j = i + 1 + out[i] = " " + while j < n and src[j] != quote: + if src[j] == "\\" and j + 1 < n: + if src[j + 1] != "\n": + out[j] = " " + if src[j + 1] != "\n": + out[j + 1] = " " + j += 2 + continue + if src[j] != "\n": + out[j] = " " + j += 1 + if j < n: + out[j] = " " + j += 1 + i = j + continue + i += 1 + return "".join(out) + + +def _extract_types_from_java(content: str) -> dict[str, list[str]]: + """Attribute @JsonProperty annotations to the innermost enclosing + class/record/interface by walking brace depth. Returns + {TypeName: [wire_names]}. + + Two passes: + - Structural (cleaned): strings + comments stripped so stray `{` + or `class X` inside a string/comment can't confuse brace or + decl tracking. Whitespace-replacement preserves offsets. + - Annotations (raw): scanned against ORIGINAL content because + @JsonProperty("wire_name") puts the name INSIDE a string + literal — cleaned text would have zeroed it out. + """ + cleaned = _strip_strings_and_comments(content) + + # Class declarations: (end_of_decl_token, type_name). Matched on + # cleaned so `class X` inside a string can't false-match. + decls: list[tuple[int, str]] = [ + (m.end(), m.group(1)) for m in _CLASS_DECL_RE.finditer(cleaned) + ] + + # @JsonProperty annotations: match on the RAW content so the + # wire-name string literal is preserved. Positions line up with + # `cleaned` because string-stripping replaces in place without + # changing length. + props: list[tuple[int, str]] = [ + (m.start(), m.group(1)) for m in _JSON_PROPERTY_RE.finditer(content) + ] + + # Walk the cleaned source, tracking a stack of (open_brace_pos, type_name). + # When we see `class X` followed (after modifiers/generics) by `{`, + # push X onto the stack at that depth. Pop on matching `}`. + stack: list[str] = [] + # Map from brace_depth_at_open -> type_name, so we only pop type + # frames when the matching `}` fires (not every random `{` in + # method bodies / initializers). + type_frames: dict[int, str] = {} + depth = 0 + next_decl_idx = 0 + next_prop_idx = 0 + result: dict[str, list[str]] = {} + + i = 0 + n = len(cleaned) + while i < n: + c = cleaned[i] + + # Consume any pending annotation at or before this index. + while next_prop_idx < len(props) and props[next_prop_idx][0] <= i: + _, wire = props[next_prop_idx] + if stack: + result.setdefault(stack[-1], []).append(wire) + next_prop_idx += 1 + + if c == "{": + # Is this the opening brace of the most recent unconsumed + # class declaration? i.e. the declaration's class-name + # token ended before this `{` and no other `{` has consumed it. + if ( + next_decl_idx < len(decls) + and decls[next_decl_idx][0] <= i + ): + name = decls[next_decl_idx][1] + stack.append(name) + type_frames[depth] = name + next_decl_idx += 1 + depth += 1 + elif c == "}": + depth -= 1 + if depth in type_frames: + type_frames.pop(depth) + if stack: + stack.pop() + i += 1 + + # Consume any trailing annotations (defensive; shouldn't happen). + while next_prop_idx < len(props): + _, wire = props[next_prop_idx] + if stack: + result.setdefault(stack[-1], []).append(wire) + next_prop_idx += 1 + + return {k: sorted(set(v)) for k, v in result.items() if v} + + +def discover_sdk_types() -> dict[str, list[str]]: + """Walk src/main/java and return {TypeName: sorted_wire_field_names}. + + Nested classes are recognised — annotations are attributed to the + innermost enclosing type, not the file's outer class. A file + without any @JsonProperty contributes nothing. + """ + result: dict[str, list[str]] = {} + for java_file in sorted(SRC_MAIN_JAVA.rglob("*.java")): + content = java_file.read_text() + if "@JsonProperty" not in content: + continue + for name, fields in _extract_types_from_java(content).items(): + # Merge across files. Same-name types in different files + # (shouldn't happen in this SDK but keep the rule explicit + # and matching Python/Go/TS semantics) get their fields + # unioned; the LAST-loaded file wins on a genuine clash. + result[name] = fields + return result + + +def empty_baseline() -> dict[str, Any]: + return { + "openapi_specs_sha": "", + "cross_spec_duplicates": {}, + "intra_file_duplicates": {}, + "registered_types": [], + "per_type_drift": {}, + } + + +def load_baseline() -> dict[str, Any]: + if not BASELINE_PATH.exists(): + return empty_baseline() + with BASELINE_PATH.open() as f: + parsed = json.load(f) + base = empty_baseline() + base.update(parsed) + base["cross_spec_duplicates"] = parsed.get("cross_spec_duplicates", {}) + base["intra_file_duplicates"] = parsed.get("intra_file_duplicates", {}) + base["registered_types"] = parsed.get("registered_types", []) + base["per_type_drift"] = parsed.get("per_type_drift", {}) + return base + + +def write_baseline(baseline: dict[str, Any]) -> None: + BASELINE_PATH.parent.mkdir(parents=True, exist_ok=True) + tmp = BASELINE_PATH.with_suffix(f".json.tmp.{os.getpid()}") + with tmp.open("w") as f: + json.dump(baseline, f, indent=2, sort_keys=True) + f.write("\n") + tmp.replace(BASELINE_PATH) + + +def difference(a: list[str], b: list[str]) -> list[str]: + bs = set(b) + return sorted(x for x in a if x not in bs) diff --git a/scripts/wire_shape/refresh.py b/scripts/wire_shape/refresh.py new file mode 100755 index 0000000..363a64a --- /dev/null +++ b/scripts/wire_shape/refresh.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +"""Regenerate tests/fixtures/wire-shape-baseline.json from a local +checkout of getaxonflow/axonflow's docs/api directory. + +Usage: + python3 scripts/wire_shape/refresh.py [--sha ] + +When --sha is omitted, the script tries `git -C +rev-parse HEAD` to pin the commit. If neither is available, it +exits non-zero rather than write a baseline with an empty +openapi_specs_sha (the next CI run would fail at bootstrap). + +The baseline is written atomically (temp-file + rename) so a +mid-encode crash can't leave a truncated file behind. +""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from lib import ( # noqa: E402 + BASELINE_PATH, + REPO_ROOT, + difference, + discover_sdk_types, + load_all_schemas, + write_baseline, +) + + +def _git_head_sha(directory: Path) -> str: + try: + out = subprocess.check_output( + ["git", "-C", str(directory), "rev-parse", "HEAD"], + stderr=subprocess.DEVNULL, + ) + return out.decode().strip() + except (subprocess.CalledProcessError, FileNotFoundError): + return "" + + +def main() -> int: + args = sys.argv[1:] + specs_dir: Path | None = None + explicit_sha: str | None = None + i = 0 + while i < len(args): + if args[i] == "--sha" and i + 1 < len(args): + explicit_sha = args[i + 1] + i += 2 + continue + if specs_dir is None: + specs_dir = Path(args[i]) + i += 1 + + if specs_dir is None: + print( + "usage: python3 scripts/wire_shape/refresh.py " + "[--sha ]", + file=sys.stderr, + ) + return 2 + if not specs_dir.is_dir(): + print(f"error: {specs_dir} is not a directory", file=sys.stderr) + return 2 + + sha = (explicit_sha if explicit_sha is not None else _git_head_sha(specs_dir)).strip() + if not sha: + print( + "error: could not determine OpenAPI specs commit SHA.\n" + " Either run this script against a specs_dir that sits inside a git\n" + " checkout of the getaxonflow/axonflow community mirror, or pass\n" + " --sha explicitly. An empty SHA would poison\n" + " tests/fixtures/wire-shape-baseline.json and break the next CI\n" + " wire-shape-contract run at bootstrap.", + file=sys.stderr, + ) + return 2 + + merged, cross_spec, intra_file = load_all_schemas(specs_dir) + sdk = discover_sdk_types() + + registered: list[str] = [] + per_type_drift: dict[str, dict[str, list[str]]] = {} + for name, sdk_fields in sdk.items(): + spec_fields = merged.get(name) + if spec_fields is None: + continue + registered.append(name) + sdk_only = difference(sdk_fields, spec_fields) + spec_only = difference(spec_fields, sdk_fields) + if not sdk_only and not spec_only: + continue + per_type_drift[name] = {"sdk_only": sdk_only, "spec_only": spec_only} + registered.sort() + + baseline = { + "_comment": ( + "Baseline of KNOWN wire-shape drift between the Java SDK and the " + "OpenAPI specs. Generated by scripts/wire_shape/refresh.py. The " + "CI gate fails on drift OUTSIDE this baseline. Entries here " + "should be burned down over time via targeted fix PRs. See " + "axonflow-enterprise tracking issue (QF-14 follow-up)." + ), + "openapi_specs_sha": sha, + "cross_spec_duplicates": {k: dict(sorted(v.items())) for k, v in sorted(cross_spec.items())}, + "intra_file_duplicates": {k: dict(sorted(v.items())) for k, v in sorted(intra_file.items())}, + "registered_types": registered, + "per_type_drift": dict(sorted(per_type_drift.items())), + } + + write_baseline(baseline) + try: + rel = BASELINE_PATH.relative_to(REPO_ROOT) + except ValueError: + rel = BASELINE_PATH + print(f"Wrote baseline: {rel}") + print(f" openapi_specs_sha: {sha}") + print(f" cross_spec_duplicates: {len(cross_spec)}") + print(f" intra_file_duplicates: {len(intra_file)} file(s)") + print(f" registered_types: {len(registered)}") + print(f" per_type_drift: {len(per_type_drift)}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/wire_shape/validate.py b/scripts/wire_shape/validate.py new file mode 100755 index 0000000..46a390f --- /dev/null +++ b/scripts/wire_shape/validate.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +"""QF-14 Java arm — wire-shape contract validator. + +Blocks PRs that introduce drift between Java classes (with +@JsonProperty annotations) and the OpenAPI specs pinned via +openapi_specs_sha in tests/fixtures/wire-shape-baseline.json. + +Four gates, same classes as the Python/Go/TS validators: +1. Cross-spec schema divergence (same name, different shapes) +2. Intra-file schema duplicates (PolicyMatch-class bug) +3. Per-type SDK-vs-spec drift (baseline-aware) +4. Registered-type coverage (rename-escape guard) + +Specs dir is passed via AXONFLOW_OPENAPI_SPECS_DIR. Without it, the +script exits 0 after a skip message so `mvn test` and local work +don't require a specs checkout. + +Usage: + AXONFLOW_OPENAPI_SPECS_DIR=/path/to/docs/api \\ + python3 scripts/wire_shape/validate.py +""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from lib import ( # noqa: E402 + difference, + discover_sdk_types, + load_all_schemas, + load_baseline, +) + + +def _specs_dir() -> Path | None: + env = os.environ.get("AXONFLOW_OPENAPI_SPECS_DIR") + if not env: + return None + p = Path(env) + return p if p.is_dir() else None + + +def main() -> int: + specs = _specs_dir() + if specs is None: + print( + "⏭️ AXONFLOW_OPENAPI_SPECS_DIR not set to a directory; " + "wire-shape gate skipped." + ) + print( + " The dedicated CI job clones getaxonflow/axonflow at the " + "pinned SHA and exports this variable before running the " + "validator." + ) + return 0 + + merged, cross_spec, intra_file = load_all_schemas(specs) + if not merged: + print( + f"❌ Loaded 0 schemas with concrete properties from {specs}.", + file=sys.stderr, + ) + return 1 + print(f"📋 Loaded {len(merged)} schema(s) from {specs}\n") + + sdk = discover_sdk_types() + baseline = load_baseline() + errors = 0 + + # Gate 1: cross-spec divergence. + baselined_cross = baseline["cross_spec_duplicates"] + cross_problems: list[str] = [] + for name, observed in cross_spec.items(): + expected = baselined_cross.get(name) + if expected is None: + lines = [f" {name}: NEW cross-spec divergence (not in baseline)."] + for spec in sorted(observed): + lines.append(f" {spec}: {observed[spec]}") + cross_problems.append("\n".join(lines)) + continue + if dict(expected) != {s: list(v) for s, v in observed.items()}: + lines = [f" {name}: divergence drifted from baseline."] + all_specs = sorted(set(expected) | set(observed)) + for spec in all_specs: + exp = expected.get(spec) + obs = observed.get(spec) + if exp != (list(obs) if obs is not None else None): + lines.append(f" {spec}:") + lines.append(f" baseline: {exp}") + lines.append(f" observed: {obs}") + cross_problems.append("\n".join(lines)) + if cross_problems: + print("Cross-spec schema divergence gate failed:\n", file=sys.stderr) + for p in cross_problems: + print(p + "\n", file=sys.stderr) + print( + "Fix: reconcile in axonflow-enterprise specs (rename one, or " + "merge into a shared supertype). If the divergence is " + "intentional and must stand, regenerate " + "tests/fixtures/wire-shape-baseline.json via " + "scripts/wire_shape/refresh.py.\n", + file=sys.stderr, + ) + errors += len(cross_problems) + + # Gate 2: intra-file duplicates. + baselined_intra = baseline["intra_file_duplicates"] + intra_problems: list[str] = [] + for file, schemas in intra_file.items(): + for schema_name, count in schemas.items(): + allowed = baselined_intra.get(file, {}).get(schema_name) + if allowed == count: + continue + intra_problems.append( + f" {file}: schema '{schema_name}' declared {count} time(s) " + f"(baseline says {allowed or 0})." + ) + for file, schemas in baselined_intra.items(): + for schema_name in schemas: + if schema_name not in intra_file.get(file, {}): + intra_problems.append( + f" {file}: baselined duplicate '{schema_name}' no longer " + f"observed — remove from baseline.intra_file_duplicates." + ) + if intra_problems: + intra_problems.sort() + print("Intra-file schema duplicate gate failed:\n", file=sys.stderr) + for p in intra_problems: + print(p, file=sys.stderr) + print( + "\nFix: remove the duplicate declaration in the OpenAPI spec. " + "A schema declared twice in one file leaves the contract " + "ambiguous. If the duplicate is intentional and must stand, " + "regenerate the baseline.\n", + file=sys.stderr, + ) + errors += len(intra_problems) + + # Gate 3: SDK-vs-spec drift, baseline-aware. + baselined_drift = baseline["per_type_drift"] + drift_problems: list[str] = [] + matched = 0 + + for name, sdk_fields in sdk.items(): + spec_fields = merged.get(name) + if spec_fields is None: + continue + matched += 1 + sdk_only = difference(sdk_fields, spec_fields) + spec_only = difference(spec_fields, sdk_fields) + allowed = baselined_drift.get(name, {"sdk_only": [], "spec_only": []}) + new_sdk = difference(sdk_only, allowed.get("sdk_only", [])) + new_spec = difference(spec_only, allowed.get("spec_only", [])) + if not new_sdk and not new_spec: + continue + lines = [f" {name}:"] + if new_sdk: + lines.append(f" NEW, only in SDK class: {new_sdk}") + if new_spec: + lines.append(f" NEW, only in OpenAPI: {new_spec}") + residual_sdk = difference(sdk_only, new_sdk) + residual_spec = difference(spec_only, new_spec) + if residual_sdk: + lines.append(f" (baseline, only in SDK): {residual_sdk}") + if residual_spec: + lines.append(f" (baseline, only in spec): {residual_spec}") + drift_problems.append("\n".join(lines)) + + if matched == 0: + print( + "❌ No Java class matched any OpenAPI schema by name — check discovery.", + file=sys.stderr, + ) + return 1 + + if drift_problems: + drift_problems.sort() + print("NEW wire-shape drift detected (not covered by baseline):\n", file=sys.stderr) + for p in drift_problems: + print(p, file=sys.stderr) + print( + "\nFix: align the Java @JsonProperty name with the OpenAPI property " + "name, OR update the spec if the SDK is the source of truth. Do " + "not widen the baseline to hide drift without a tracking issue.\n", + file=sys.stderr, + ) + errors += len(drift_problems) + + # Gate 4: registered-type coverage (rename-escape guard). + registered = baseline["registered_types"] + if registered: + missing_sdk = [n for n in registered if n not in sdk] + missing_spec = [n for n in registered if n not in merged] + if missing_sdk or missing_spec: + print( + "Registered-type mapping broken — rename-escape guard fired:\n", + file=sys.stderr, + ) + if missing_sdk: + print(f" No matching Java class for: {missing_sdk}", file=sys.stderr) + if missing_spec: + print(f" No matching OpenAPI schema for: {missing_spec}", file=sys.stderr) + print( + "\nFix: revert the rename, do it on both sides, or update " + "tests/fixtures/wire-shape-baseline.json::registered_types " + "(and mirror the rename in baseline.per_type_drift entries).\n", + file=sys.stderr, + ) + errors += len(missing_sdk) + len(missing_spec) + + if errors > 0: + print(f"❌ Found {errors} wire-shape issue(s).", file=sys.stderr) + return 1 + + print(f"✅ {matched} Java class/schema pair(s) validated against OpenAPI.") + unmapped_sdk = sum(1 for k in sdk if k not in merged) + unmapped_spec = sum(1 for k in merged if k not in sdk) + print( + f" {unmapped_sdk} SDK-only class(es) with no matching schema " + "(internal / client-side)." + ) + print( + f" {unmapped_spec} OpenAPI schema(s) with no matching SDK class " + "(coverage gap)." + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/fixtures/wire-shape-baseline.json b/tests/fixtures/wire-shape-baseline.json new file mode 100644 index 0000000..8714d12 --- /dev/null +++ b/tests/fixtures/wire-shape-baseline.json @@ -0,0 +1,656 @@ +{ + "_comment": "Baseline of KNOWN wire-shape drift between the Java SDK and the OpenAPI specs. Generated by scripts/wire_shape/refresh.py. The CI gate fails on drift OUTSIDE this baseline. Entries here should be burned down over time via targeted fix PRs. See axonflow-enterprise tracking issue (QF-14 follow-up).", + "cross_spec_duplicates": { + "APIError": { + "orchestrator-api.yaml": [ + "code", + "details", + "message" + ], + "policy-api.yaml": [ + "error" + ] + }, + "BiasRecord": { + "agent-api.yaml": [ + "category", + "group_a", + "group_a_rate", + "group_b", + "group_b_rate", + "id", + "is_violation", + "model_id", + "org_id", + "sample_size", + "score", + "threshold", + "timestamp" + ], + "orchestrator-api.yaml": [ + "category", + "group_a", + "group_a_rate", + "group_b", + "group_b_rate", + "id", + "is_violation", + "metadata", + "model_id", + "org_id", + "sample_size", + "score", + "threshold", + "timestamp", + "window_end", + "window_start" + ] + }, + "CreatePolicyRequest": { + "orchestrator-api.yaml": [ + "action", + "actions", + "category", + "conditions", + "description", + "enabled", + "name", + "pattern", + "priority", + "severity", + "tier", + "type" + ], + "policy-api.yaml": [ + "actions", + "conditions", + "description", + "enabled", + "name", + "priority", + "type" + ] + }, + "EUAIActExportRequest": { + "agent-api.yaml": [ + "format", + "from_date", + "include_accuracy_metrics", + "include_assessments", + "include_bias_records", + "include_decision_chains", + "include_hitl_records", + "org_id", + "to_date" + ], + "orchestrator-api.yaml": [ + "date_from", + "date_to", + "export_type", + "format", + "model_ids" + ] + }, + "HealthResponse": { + "agent-api.yaml": [ + "capabilities", + "sdk_compatibility", + "service", + "status", + "timestamp", + "version" + ], + "orchestrator-api.yaml": [ + "capabilities", + "components", + "features", + "sdk_compatibility", + "service", + "status", + "timestamp", + "version" + ] + }, + "MetricsResponse": { + "agent-api.yaml": [ + "agent_metrics", + "connectors", + "health", + "request_types", + "timestamp" + ], + "orchestrator-api.yaml": [ + "health", + "orchestrator_metrics", + "providers", + "request_types", + "timestamp" + ] + }, + "PolicyOverride": { + "agent-api.yaml": [ + "action_override", + "created_at", + "created_by", + "enabled_override", + "expires_at", + "id", + "organization_id", + "override_reason", + "policy_id", + "policy_type", + "tenant_id", + "updated_at", + "updated_by" + ], + "policy-api.yaml": [ + "action_override", + "created_at", + "created_by", + "enabled_override", + "expires_at", + "id", + "override_reason", + "policy_id" + ] + }, + "UpdatePolicyRequest": { + "orchestrator-api.yaml": [ + "action", + "actions", + "conditions", + "description", + "enabled", + "name", + "pattern", + "priority", + "severity" + ], + "policy-api.yaml": [ + "actions", + "conditions", + "description", + "enabled", + "name", + "priority", + "type" + ] + } + }, + "intra_file_duplicates": { + "orchestrator-api.yaml": { + "PolicyMatch": 2 + } + }, + "openapi_specs_sha": "bf1ca22ae4bee37dbccc91aee7e03c805f73f853", + "per_type_drift": { + "AuditLogEntry": { + "sdk_only": [ + "metadata", + "model", + "policy_violations" + ], + "spec_only": [] + }, + "AuditSearchRequest": { + "sdk_only": [ + "decision_id", + "offset", + "override_id", + "policy_name" + ], + "spec_only": [] + }, + "Budget": { + "sdk_only": [], + "spec_only": [ + "id", + "name", + "org_id", + "period", + "scope", + "tenant_id" + ] + }, + "BudgetAlert": { + "sdk_only": [], + "spec_only": [ + "acknowledged", + "id", + "message", + "threshold" + ] + }, + "BudgetStatus": { + "sdk_only": [], + "spec_only": [ + "budget", + "percentage" + ] + }, + "CancelPlanResponse": { + "sdk_only": [ + "message" + ], + "spec_only": [ + "success" + ] + }, + "ClientRequest": { + "sdk_only": [ + "llm_provider", + "media", + "model" + ], + "spec_only": [ + "skip_llm" + ] + }, + "ClientResponse": { + "sdk_only": [ + "budget_info", + "media_analysis" + ], + "spec_only": [ + "metadata" + ] + }, + "ConnectorInfo": { + "sdk_only": [ + "config_schema", + "enabled" + ], + "spec_only": [ + "healthy" + ] + }, + "CreateStaticPolicyRequest": { + "sdk_only": [ + "organization_id" + ], + "spec_only": [ + "action", + "category", + "description", + "enabled", + "name", + "pattern", + "priority", + "severity", + "tags", + "tier" + ] + }, + "CreateWorkflowResponse": { + "sdk_only": [ + "created_at", + "source" + ], + "spec_only": [ + "started_at" + ] + }, + "DynamicPolicy": { + "sdk_only": [ + "created_at", + "organization_id", + "updated_at" + ], + "spec_only": [ + "actions", + "conditions", + "description", + "enabled", + "id", + "name" + ] + }, + "DynamicPolicyInfo": { + "sdk_only": [ + "orchestrator_reachable", + "policies_evaluated", + "processing_time_ms" + ], + "spec_only": [ + "block_reason", + "blocked", + "enabled", + "error", + "evaluated", + "evaluation_time_ms", + "policies_checked", + "policies_matched" + ] + }, + "DynamicPolicyMatch": { + "sdk_only": [ + "reason" + ], + "spec_only": [ + "message" + ] + }, + "EffectivePoliciesResponse": { + "sdk_only": [ + "dynamic" + ], + "spec_only": [ + "computed_at", + "organization_id", + "tenant_id" + ] + }, + "ExecutionSnapshot": { + "sdk_only": [ + "approval_required", + "approved_at", + "approved_by" + ], + "spec_only": [ + "retry_count" + ] + }, + "ExecutionSummary": { + "sdk_only": [ + "input_summary", + "output_summary" + ], + "spec_only": [] + }, + "ExfiltrationCheckInfo": { + "sdk_only": [ + "within_limits" + ], + "spec_only": [ + "exceeded", + "limit_type" + ] + }, + "ListWorkflowsResponse": { + "sdk_only": [], + "spec_only": [ + "limit", + "offset" + ] + }, + "MCPCheckInputRequest": { + "sdk_only": [], + "spec_only": [ + "client_id", + "tenant_id", + "user_id", + "user_role", + "user_token" + ] + }, + "MCPCheckOutputRequest": { + "sdk_only": [], + "spec_only": [ + "client_id", + "tenant_id", + "user_id", + "user_token" + ] + }, + "MarkStepCompletedRequest": { + "sdk_only": [ + "metadata" + ], + "spec_only": [] + }, + "PendingApproval": { + "sdk_only": [], + "spec_only": [ + "policies_matched", + "step_input" + ] + }, + "PlanRequest": { + "sdk_only": [ + "constraints", + "max_steps", + "objective", + "parallel", + "user_token" + ], + "spec_only": [ + "client", + "execution_mode", + "query", + "user" + ] + }, + "PlanResponse": { + "sdk_only": [ + "complexity", + "domain", + "estimated_duration", + "parallel", + "status" + ], + "spec_only": [ + "error", + "policy_info", + "success", + "version", + "workflow_execution_id" + ] + }, + "PolicyInfo": { + "sdk_only": [ + "code_artifact", + "processing_time", + "risk_score", + "static_checks", + "tenant_id" + ], + "spec_only": [ + "block_reason", + "blocked", + "dynamic_policy_info", + "exfiltration_check", + "matched_policies", + "processing_time_ms", + "redactions_applied" + ] + }, + "PolicyOverride": { + "sdk_only": [], + "spec_only": [ + "enabled_override", + "id" + ] + }, + "PolicyVersion": { + "sdk_only": [ + "change_description", + "new_values", + "previous_values" + ], + "spec_only": [ + "change_summary", + "id", + "policy_id", + "snapshot", + "version" + ] + }, + "ResumePlanResponse": { + "sdk_only": [ + "approved", + "message", + "next_step", + "next_step_name", + "total_steps", + "workflow_id" + ], + "spec_only": [ + "result" + ] + }, + "StaticPolicy": { + "sdk_only": [], + "spec_only": [ + "action", + "category", + "description", + "enabled", + "id", + "name", + "override", + "pattern", + "policy_id", + "priority", + "severity", + "tier", + "version" + ] + }, + "StepGateRequest": { + "sdk_only": [], + "spec_only": [ + "cost_usd", + "tokens_in", + "tokens_out" + ] + }, + "StepGateResponse": { + "sdk_only": [], + "spec_only": [ + "decision_id" + ] + }, + "UpdatePlanRequest": { + "sdk_only": [], + "spec_only": [ + "metadata" + ] + }, + "UsageBreakdown": { + "sdk_only": [ + "period_end", + "period_start" + ], + "spec_only": [ + "items" + ] + }, + "UsageBreakdownItem": { + "sdk_only": [], + "spec_only": [ + "group_by", + "percentage" + ] + }, + "UsageRecord": { + "sdk_only": [], + "spec_only": [ + "created_at", + "error_message", + "id", + "latency_ms", + "model", + "provider", + "success", + "team_id", + "tenant_id", + "user_id", + "workflow_id" + ] + }, + "WebhookSubscription": { + "sdk_only": [], + "spec_only": [ + "org_id", + "secret", + "tenant_id" + ] + }, + "WorkflowStatusResponse": { + "sdk_only": [], + "spec_only": [ + "metadata" + ] + }, + "WorkflowStepInfo": { + "sdk_only": [ + "approved_by", + "completed_at", + "decision_reason" + ], + "spec_only": [] + } + }, + "registered_types": [ + "AuditLogEntry", + "AuditSearchRequest", + "AuditToolCallRequest", + "AuditToolCallResponse", + "Budget", + "BudgetAlert", + "BudgetCheckRequest", + "BudgetStatus", + "CancelPlanResponse", + "Checkpoint", + "CheckpointListResponse", + "CircuitBreakerConfigUpdate", + "ClientRequest", + "ClientResponse", + "ConnectorInfo", + "CreateStaticPolicyRequest", + "CreateWebhookRequest", + "CreateWorkflowRequest", + "CreateWorkflowResponse", + "DynamicPolicy", + "DynamicPolicyInfo", + "DynamicPolicyMatch", + "EffectivePoliciesResponse", + "ExecutionSnapshot", + "ExecutionSummary", + "ExfiltrationCheckInfo", + "ListWorkflowsResponse", + "MCPCheckInputRequest", + "MCPCheckInputResponse", + "MCPCheckOutputRequest", + "MCPCheckOutputResponse", + "MarkStepCompletedRequest", + "MediaAnalysisResponse", + "MediaGovernanceConfig", + "MediaGovernanceStatus", + "ModelPricing", + "PendingApproval", + "PendingApprovalsResponse", + "PlanRequest", + "PlanResponse", + "PlanVersionEntry", + "PlanVersionsResponse", + "PlatformCapability", + "PolicyEvaluationResult", + "PolicyInfo", + "PolicyMatch", + "PolicyMatchInfo", + "PolicyOverride", + "PolicyVersion", + "RateLimitInfo", + "ResumeFromCheckpointResponse", + "ResumePlanResponse", + "RetryContext", + "RollbackPlanResponse", + "StaticPolicy", + "StepGateRequest", + "StepGateResponse", + "TimelineEntry", + "TokenUsage", + "ToolContext", + "UpdatePlanRequest", + "UpdatePlanResponse", + "UpdateWebhookRequest", + "UsageBreakdown", + "UsageBreakdownItem", + "UsageRecord", + "UsageSummary", + "WebhookSubscription", + "WorkflowStatusResponse", + "WorkflowStepInfo" + ] +} From e265443360dc82286c0db4d7ff9c72d27864c27e Mon Sep 17 00:00:00 2001 From: Saurabh Jain Date: Sat, 25 Apr 2026 02:52:06 +0200 Subject: [PATCH 2/6] fix(wire-shape): close 4 review-surfaced gaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - load_baseline raises SystemExit with regen hint when the JSON file is malformed instead of dumping an opaque traceback. - write_baseline cleans up its tmp sidecar on any exception so a crashed run can't poison the next refresh from the same PID. - The validator now exits 1 (not 0) when AXONFLOW_OPENAPI_SPECS_DIR is set but doesn't point at a directory. A misconfigured CI step silently disabling the gate produces a green check on a non-running validator, which we refuse to do. An unset env still skips with 0. - Workflow's SHA-bump guard distinguishes "baseline missing on base branch" (genuine first-pin introduction) from "baseline present but unparseable" (bypass attempt). A malformed baseline on the base branch can no longer route a labelless PR through the first-pin path. - Java source parser: * Recognises Java 15+ text blocks (`"""..."""`). Previously the first `"""` looked like quote-empty-quote and the body parsed as live source — fake `@JsonProperty(...)` and `class X {` inside a text block silently corrupted attribution. * Filters annotation matches whose position falls inside a blanked range (string, comment, text block) so the wire-name regex on raw content can't grab annotations the structural pass already nullified. * Attributes record-parameter annotations to the record. Pending decls are now tracked between their token and the body `{`, so `record Foo(@JsonProperty("x") int x) {}` no longer drops the wire name. The SDK has no records today; this is forward defence for the next type-class refactor. - Documents the remaining anonymous-inner-class limitation (no `class` keyword, so annotations would leak to the enclosing type). The SDK does not put @JsonProperty inside anonymous inners. Synthetic-fixture tests cover record params, text blocks with fake annotations, line and block comments, nested classes, and enum values. Real Java SDK baseline unchanged: 70 registered types, 39 drift, 8 cross-spec, 1 intra-file. --- .github/workflows/wire-shape-contract.yml | 26 +++- scripts/wire_shape/lib.py | 141 +++++++++++++++------- scripts/wire_shape/validate.py | 30 +++-- 3 files changed, 137 insertions(+), 60 deletions(-) diff --git a/.github/workflows/wire-shape-contract.yml b/.github/workflows/wire-shape-contract.yml index 1fae03c..dc87223 100644 --- a/.github/workflows/wire-shape-contract.yml +++ b/.github/workflows/wire-shape-contract.yml @@ -72,13 +72,27 @@ jobs: PR_SHA: ${{ steps.specs_sha.outputs.sha }} run: | set -e - BASE_SHA=$( - git show "origin/${BASE_REF}:tests/fixtures/wire-shape-baseline.json" 2>/dev/null \ - | python3 -c "import json, sys; print(json.load(sys.stdin).get('openapi_specs_sha','') or '')" \ - || true - ) + BASE_FILE=$(mktemp) + if git show "origin/${BASE_REF}:tests/fixtures/wire-shape-baseline.json" > "$BASE_FILE" 2>/dev/null; then + # File exists on the base branch. It MUST parse — a malformed + # baseline file would otherwise let `BASE_SHA` come back empty + # and route this PR through the "first pin introduction" + # bypass below, silently authorizing a SHA bump. + BASE_SHA=$(python3 -c "import json, sys; print(json.load(open(sys.argv[1])).get('openapi_specs_sha','') or '')" "$BASE_FILE") + else + # File genuinely absent on the base branch (first-time + # introduction). Distinguishing this from "file present but + # unparseable" is what guards the bypass. + BASE_SHA="" + fi + rm -f "$BASE_FILE" if [ -z "$BASE_SHA" ]; then - echo "::notice::Base branch has no openapi_specs_sha yet; treating this PR as first pin introduction." + if git cat-file -e "origin/${BASE_REF}:tests/fixtures/wire-shape-baseline.json" 2>/dev/null; then + echo "::error::tests/fixtures/wire-shape-baseline.json on origin/${BASE_REF} parsed with empty openapi_specs_sha." + echo "::error::This is unrecoverable from inside the gate. Re-run scripts/wire_shape/refresh.py on main." + exit 1 + fi + echo "::notice::Base branch has no wire-shape-baseline.json yet; treating this PR as first pin introduction." exit 0 fi if [ "$BASE_SHA" = "$PR_SHA" ]; then diff --git a/scripts/wire_shape/lib.py b/scripts/wire_shape/lib.py index 2806c60..3111966 100755 --- a/scripts/wire_shape/lib.py +++ b/scripts/wire_shape/lib.py @@ -20,6 +20,14 @@ class whose body contains them. - Avoids the `mvn compile` round-trip in CI and stays dependency-free beyond PyYAML. If nested-generic or annotation-inside-string edge cases start causing false positives, switch to `javalang`. +- Record-parameter annotations (`record Foo(@JsonProperty("x") int x)`) + are attributed to the record because the parser tracks pending + decls between their token and their body `{`. +- Anonymous inner classes (`new Foo() { @JsonProperty("x") ... }`) do + NOT have a `class` keyword, so their annotations leak to the + enclosing type. This SDK does not put `@JsonProperty` inside + anonymous inners; if that ever lands, the `` sentinel pattern + is the place to add coverage. """ from __future__ import annotations @@ -178,8 +186,26 @@ def _strip_strings_and_comments(src: str) -> str: j += 2 i = j continue - # String / char literal. Handles escapes; skips newlines (text - # blocks in Java 15+ break this but none of our SDK types use them). + # Java 15+ text block: `"""..."""`. Without explicit handling, + # the leading `"""` looks like quote-emptyString-quote, then + # content runs as live source until a stray `"` rebalances. + if c == '"' and src[i : i + 3] == '"""': + j = i + 3 + out[i] = out[i + 1] = out[i + 2] = " " + while j < n and src[j : j + 3] != '"""': + if src[j] != "\n": + out[j] = " " + j += 1 + if j < n: + out[j] = out[j + 1] = out[j + 2] = " " + j += 3 + i = j + continue + # String / char literal. Handles escapes; preserves newlines so + # offsets line up with the original text. Java string literals + # cannot span newlines (text blocks above are the only multi- + # line form), so a bare newline mid-quote is a malformed source + # we let fall through. if c == '"' or c == "'": quote = c j = i + 1 @@ -228,19 +254,24 @@ def _extract_types_from_java(content: str) -> dict[str, list[str]]: # @JsonProperty annotations: match on the RAW content so the # wire-name string literal is preserved. Positions line up with # `cleaned` because string-stripping replaces in place without - # changing length. + # changing length. Filter out matches whose `@` falls inside a + # blanked range (string literal, comment, text block) — the + # cleaner replaces those with whitespace, so cleaned[m.start()] + # is a space rather than the original `@`. props: list[tuple[int, str]] = [ - (m.start(), m.group(1)) for m in _JSON_PROPERTY_RE.finditer(content) + (m.start(), m.group(1)) + for m in _JSON_PROPERTY_RE.finditer(content) + if m.start() < len(cleaned) and cleaned[m.start()] == "@" ] - # Walk the cleaned source, tracking a stack of (open_brace_pos, type_name). - # When we see `class X` followed (after modifiers/generics) by `{`, - # push X onto the stack at that depth. Pop on matching `}`. - stack: list[str] = [] - # Map from brace_depth_at_open -> type_name, so we only pop type - # frames when the matching `}` fires (not every random `{` in - # method bodies / initializers). - type_frames: dict[int, str] = {} + # Walk the cleaned source. Two queues: decls already seen but + # whose body `{` hasn't been hit yet (pending), and active type + # frames (stack). Annotations attribute to the innermost pending + # decl (record-parameter case) if any, otherwise the innermost + # active frame. This keeps record(@JsonProperty(...) X x) {} from + # silently dropping its annotations. + pending_decls: list[str] = [] + stack: list[tuple[int, str]] = [] # (depth_at_open, type_name) depth = 0 next_decl_idx = 0 next_prop_idx = 0 @@ -249,41 +280,47 @@ def _extract_types_from_java(content: str) -> dict[str, list[str]]: i = 0 n = len(cleaned) while i < n: - c = cleaned[i] + # Promote any decls whose name token has now been passed. + while next_decl_idx < len(decls) and decls[next_decl_idx][0] <= i: + pending_decls.append(decls[next_decl_idx][1]) + next_decl_idx += 1 - # Consume any pending annotation at or before this index. + # Consume any annotation at this position. Pending-decl wins + # so record params attribute correctly. while next_prop_idx < len(props) and props[next_prop_idx][0] <= i: _, wire = props[next_prop_idx] - if stack: - result.setdefault(stack[-1], []).append(wire) + owner = ( + pending_decls[-1] if pending_decls + else stack[-1][1] if stack + else None + ) + if owner is not None: + result.setdefault(owner, []).append(wire) next_prop_idx += 1 + c = cleaned[i] if c == "{": - # Is this the opening brace of the most recent unconsumed - # class declaration? i.e. the declaration's class-name - # token ended before this `{` and no other `{` has consumed it. - if ( - next_decl_idx < len(decls) - and decls[next_decl_idx][0] <= i - ): - name = decls[next_decl_idx][1] - stack.append(name) - type_frames[depth] = name - next_decl_idx += 1 + if pending_decls: + # This brace opens the body of the oldest pending decl. + name = pending_decls.pop(0) + stack.append((depth, name)) depth += 1 elif c == "}": depth -= 1 - if depth in type_frames: - type_frames.pop(depth) - if stack: - stack.pop() + if stack and stack[-1][0] == depth: + stack.pop() i += 1 # Consume any trailing annotations (defensive; shouldn't happen). while next_prop_idx < len(props): _, wire = props[next_prop_idx] - if stack: - result.setdefault(stack[-1], []).append(wire) + owner = ( + pending_decls[-1] if pending_decls + else stack[-1][1] if stack + else None + ) + if owner is not None: + result.setdefault(owner, []).append(wire) next_prop_idx += 1 return {k: sorted(set(v)) for k, v in result.items() if v} @@ -323,24 +360,42 @@ def empty_baseline() -> dict[str, Any]: def load_baseline() -> dict[str, Any]: if not BASELINE_PATH.exists(): return empty_baseline() - with BASELINE_PATH.open() as f: - parsed = json.load(f) + try: + with BASELINE_PATH.open() as f: + parsed = json.load(f) + except json.JSONDecodeError as e: + # Truncated mid-write, hand-edited and corrupted, or otherwise + # not parseable. Fail loudly with a regeneration hint instead + # of letting the validator bail with an opaque traceback. + raise SystemExit( + f"❌ tests/fixtures/wire-shape-baseline.json is malformed " + f"({e.__class__.__name__}: {e}).\n" + f" Regenerate via:\n" + f" python3 scripts/wire_shape/refresh.py " + f"" + ) from None base = empty_baseline() base.update(parsed) - base["cross_spec_duplicates"] = parsed.get("cross_spec_duplicates", {}) - base["intra_file_duplicates"] = parsed.get("intra_file_duplicates", {}) - base["registered_types"] = parsed.get("registered_types", []) - base["per_type_drift"] = parsed.get("per_type_drift", {}) return base def write_baseline(baseline: dict[str, Any]) -> None: BASELINE_PATH.parent.mkdir(parents=True, exist_ok=True) tmp = BASELINE_PATH.with_suffix(f".json.tmp.{os.getpid()}") - with tmp.open("w") as f: - json.dump(baseline, f, indent=2, sort_keys=True) - f.write("\n") - tmp.replace(BASELINE_PATH) + try: + with tmp.open("w") as f: + json.dump(baseline, f, indent=2, sort_keys=True) + f.write("\n") + tmp.replace(BASELINE_PATH) + except BaseException: + # If anything fails between open() and replace(), don't leave + # a stray .tmp. sidecar — the next run from the same PID + # would collide and confuse a future writer. + try: + tmp.unlink(missing_ok=True) + except OSError: + pass + raise def difference(a: list[str], b: list[str]) -> list[str]: diff --git a/scripts/wire_shape/validate.py b/scripts/wire_shape/validate.py index 46a390f..131a7df 100755 --- a/scripts/wire_shape/validate.py +++ b/scripts/wire_shape/validate.py @@ -35,20 +35,12 @@ ) -def _specs_dir() -> Path | None: +def main() -> int: env = os.environ.get("AXONFLOW_OPENAPI_SPECS_DIR") if not env: - return None - p = Path(env) - return p if p.is_dir() else None - - -def main() -> int: - specs = _specs_dir() - if specs is None: + # No env at all → local dev / unconfigured CI. Skip. print( - "⏭️ AXONFLOW_OPENAPI_SPECS_DIR not set to a directory; " - "wire-shape gate skipped." + "⏭️ AXONFLOW_OPENAPI_SPECS_DIR not set; wire-shape gate skipped." ) print( " The dedicated CI job clones getaxonflow/axonflow at the " @@ -56,6 +48,22 @@ def main() -> int: "validator." ) return 0 + specs = Path(env) + if not specs.is_dir(): + # Env set but path is bogus. CI probably failed to check out the + # specs at the pinned SHA; treating that as a skip would let a + # broken pipeline produce a green check. Fail loudly instead. + print( + f"❌ AXONFLOW_OPENAPI_SPECS_DIR={env} is not a directory.", + file=sys.stderr, + ) + print( + " The wire-shape job's specs-checkout step must run before " + "this validator. A misconfigured path silently disables the " + "gate, which we refuse to do.", + file=sys.stderr, + ) + return 1 merged, cross_spec, intra_file = load_all_schemas(specs) if not merged: From d87690cf24444c08c2cea512e4861fbf1eee498f Mon Sep 17 00:00:00 2001 From: Saurabh Jain Date: Sat, 25 Apr 2026 03:17:29 +0200 Subject: [PATCH 3/6] fix(wire-shape): close stale cross-spec baseline shielding gap Gate 1 (cross-spec divergence) only iterated currently observed schemas. A baselined divergence that the platform has since reconciled silently lingered in the baseline forever; the same old incompatible shape could be reintroduced and pass the gate because its fingerprint matched a stale entry that should have been deleted. Adds the reverse pass that Gate 2 already does for intra-file duplicates: any baselined cross-spec name that is no longer observed in the current specs fails the run with a pointer at the specific baseline key to delete. Verified locally: - Positive run on clean baseline still exits 0. - Adding a phantom 'PhantomDivergence' entry to baseline.cross_spec_ duplicates causes the validator to exit 1 with a clear "remove from baseline" message naming the stale key. --- scripts/wire_shape/validate.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/wire_shape/validate.py b/scripts/wire_shape/validate.py index 131a7df..f4a3ab6 100755 --- a/scripts/wire_shape/validate.py +++ b/scripts/wire_shape/validate.py @@ -100,6 +100,18 @@ def main() -> int: lines.append(f" baseline: {exp}") lines.append(f" observed: {obs}") cross_problems.append("\n".join(lines)) + # Reverse pass: a baselined cross-spec divergence that is no longer + # observed must be removed from the baseline. Otherwise the stale + # fingerprint shields a future reintroduction of the same old + # incompatible shape from the gate. + for name in baselined_cross: + if name not in cross_spec: + cross_problems.append( + f" {name}: baselined cross-spec divergence no longer " + f"observed — remove from " + f"baseline.cross_spec_duplicates.{name} so a future " + f"reintroduction of the same shape is caught as new." + ) if cross_problems: print("Cross-spec schema divergence gate failed:\n", file=sys.stderr) for p in cross_problems: From 489096a0d0ae7095f2b238021c4a7575653bd9f0 Mon Sep 17 00:00:00 2001 From: Saurabh Jain Date: Sat, 25 Apr 2026 05:37:55 +0200 Subject: [PATCH 4/6] =?UTF-8?q?feat(types):=20wire-shape=20alignment=20?= =?UTF-8?q?=E2=80=94=20security=20+=20cat=20B=20start=20(initial)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit-driven sweep against the wire-shape contract gate. Initial PR focuses on the security-critical addition; the larger Cat B work across other types is tracked in a follow-up issue (Java SDK has the largest wire-shape gaps of the four SDKs and parts of the sweep need the validator-discovery fix below before they can ship without false-positive baseline noise). Changes: - WebhookSubscription.secret (security): HMAC-SHA256 signing key surfaced on the createWebhook response. Required to verify the X-AxonFlow-Signature header on inbound deliveries; without it, callers can't validate payload authenticity. Also adds tenantId and orgId (ownership scoping). The 6-arg constructor is preserved as a source-compat overload that delegates to the 9-arg variant with nulls for the new fields. toString() redacts the secret to avoid log leakage. - BudgetAlert.acknowledged: alert dismissal flag. Also adds @JsonProperty annotations to previously-unannotated fields (id, threshold, message) so the wire-shape validator's discovery sees them; Jackson's default name-mapping was correct in those cases, but the validator currently walks @JsonProperty only. The validator's missing field-name fallback for unannotated fields is documented in a follow-up issue. Validator findings: The audit surfaced 39 drift entries (37 after this PR). Most remaining entries fall into one of: a) SDK types missing many wire fields (StaticPolicy +13, CreateStaticPolicyRequest +11, UsageRecord +11, Budget +6, etc.) — this is real coverage gap, not measurement error. b) SDK types where most fields exist but lack @JsonProperty — the validator under-counts these. Filed for validator fix. c) RENAME_SAFE / orphan-read entries similar to the TS+Go sweep (DynamicPolicyMatch.reason, ExfiltrationCheckInfo.within_limits, PolicyOverride.active, etc.). Filed alongside (no SDK change needed; spec is wrong): - axonflow-enterprise#1708 — AISystemRegistry.materiality_classification - axonflow-enterprise#1709 — DynamicPolicyInfo schema wrong shape Tests: 1200 pass. --- CHANGELOG.md | 2 + .../types/costcontrols/CostControlTypes.java | 12 ++++ .../sdk/types/webhook/WebhookTypes.java | 65 ++++++++++++++++++- tests/fixtures/wire-shape-baseline.json | 17 ----- 4 files changed, 77 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d5c225..f09ad11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **Version alignment check** (`.github/workflows/validate-version-alignment.yml`). CI now fails any PR or push to `main` where `pom.xml`'s `` drifts from the first released `## [X.Y.Z]` section in `CHANGELOG.md`. Matches the pattern in the platform repo and the Go SDK. - **Wire-shape contract gate** (`.github/workflows/wire-shape-contract.yml`). CI fails any PR that introduces drift between Java `@JsonProperty` annotations and the OpenAPI specs pinned at `tests/fixtures/wire-shape-baseline.json::openapi_specs_sha`. Four gates: cross-spec schema divergence, intra-file schema duplicates, per-type SDK-vs-spec drift, and registered-type rename-escape. The pinned spec SHA is itself guarded by a `spec-pin-bump` PR label so a single PR can't both move the SHA and silence drift. Source-discovery walks brace depth so nested classes (e.g. `WorkflowTypes.CreateWorkflowRequest`) and inner enums are attributed to the correct type rather than the file's outer class. Mirrors the Python, Go, and TypeScript gates. +- **`WebhookSubscription.secret`** — HMAC-SHA256 signing key now exposed on the response from `createWebhook`. Required to verify the `X-AxonFlow-Signature` header on inbound webhook deliveries; without it, callers can't validate payload authenticity. Also adds `tenantId` and `orgId` (ownership scoping). The 6-arg constructor is preserved as a source-compat overload that delegates to the 9-arg with nulls for the new fields. `toString()` redacts `secret` to avoid log leakage. +- **`BudgetAlert.acknowledged`** — alert dismissal flag. Also adds `@JsonProperty` annotations on previously-unannotated fields (`id`, `threshold`, `message`) so the wire-shape gate can see them; Jackson's default name mapping was correct, but the validator's discovery walks `@JsonProperty` only. ## [5.7.0] - 2026-04-22 diff --git a/src/main/java/com/getaxonflow/sdk/types/costcontrols/CostControlTypes.java b/src/main/java/com/getaxonflow/sdk/types/costcontrols/CostControlTypes.java index 8c11ada..c262781 100644 --- a/src/main/java/com/getaxonflow/sdk/types/costcontrols/CostControlTypes.java +++ b/src/main/java/com/getaxonflow/sdk/types/costcontrols/CostControlTypes.java @@ -556,6 +556,7 @@ public String getPeriodEnd() { /** A budget alert. */ public static class BudgetAlert { + @JsonProperty("id") private String id; @JsonProperty("budget_id") @@ -564,6 +565,7 @@ public static class BudgetAlert { @JsonProperty("alert_type") private String alertType; + @JsonProperty("threshold") private Integer threshold; @JsonProperty("percentage_reached") @@ -572,11 +574,16 @@ public static class BudgetAlert { @JsonProperty("amount_usd") private Double amountUsd; + @JsonProperty("message") private String message; @JsonProperty("created_at") private String createdAt; + /** Whether the alert has been dismissed by an operator. */ + @JsonProperty("acknowledged") + private Boolean acknowledged; + public BudgetAlert() {} public String getId() { @@ -610,6 +617,11 @@ public String getMessage() { public String getCreatedAt() { return createdAt; } + + /** Whether the alert has been dismissed by an operator. */ + public Boolean getAcknowledged() { + return acknowledged; + } } /** Response containing budget alerts. */ diff --git a/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java b/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java index 07692eb..1614a76 100644 --- a/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java +++ b/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java @@ -164,6 +164,15 @@ public static final class WebhookSubscription { @JsonProperty("active") private final boolean active; + @JsonProperty("tenant_id") + private final String tenantId; + + @JsonProperty("org_id") + private final String orgId; + + @JsonProperty("secret") + private final String secret; + @JsonProperty("created_at") private final String createdAt; @@ -177,13 +186,35 @@ public WebhookSubscription( @JsonProperty("events") List events, @JsonProperty("active") boolean active, @JsonProperty("created_at") String createdAt, - @JsonProperty("updated_at") String updatedAt) { + @JsonProperty("updated_at") String updatedAt, + @JsonProperty("tenant_id") String tenantId, + @JsonProperty("org_id") String orgId, + @JsonProperty("secret") String secret) { this.id = id; this.url = url; this.events = events != null ? Collections.unmodifiableList(events) : Collections.emptyList(); this.active = active; this.createdAt = createdAt; this.updatedAt = updatedAt; + this.tenantId = tenantId; + this.orgId = orgId; + this.secret = secret; + } + + /** + * Source-compat overload that omits the v6 wire-canonical fields + * (tenantId, orgId, secret). Existing user code calling the + * 6-arg constructor continues to compile; new code should pass + * the security-relevant secret + scoping fields explicitly. + */ + public WebhookSubscription( + String id, + String url, + List events, + boolean active, + String createdAt, + String updatedAt) { + this(id, url, events, active, createdAt, updatedAt, null, null, null); } public String getId() { @@ -202,6 +233,26 @@ public boolean isActive() { return active; } + /** Tenant ID that owns this subscription. */ + public String getTenantId() { + return tenantId; + } + + /** Organization ID that owns this subscription. */ + public String getOrgId() { + return orgId; + } + + /** + * HMAC-SHA256 signing key for verifying inbound webhook payload + * signatures (X-AxonFlow-Signature header). Returned by the + * `createWebhook` call on initial creation; required for callers + * to validate payload authenticity. + */ + public String getSecret() { + return secret; + } + public String getCreatedAt() { return createdAt; } @@ -219,13 +270,16 @@ public boolean equals(Object o) { && Objects.equals(id, that.id) && Objects.equals(url, that.url) && Objects.equals(events, that.events) + && Objects.equals(tenantId, that.tenantId) + && Objects.equals(orgId, that.orgId) + && Objects.equals(secret, that.secret) && Objects.equals(createdAt, that.createdAt) && Objects.equals(updatedAt, that.updatedAt); } @Override public int hashCode() { - return Objects.hash(id, url, events, active, createdAt, updatedAt); + return Objects.hash(id, url, events, active, tenantId, orgId, secret, createdAt, updatedAt); } @Override @@ -241,6 +295,13 @@ public String toString() { + events + ", active=" + active + + ", tenantId='" + + tenantId + + '\'' + + ", orgId='" + + orgId + + '\'' + + ", secret='***'" + ", createdAt='" + createdAt + '\'' diff --git a/tests/fixtures/wire-shape-baseline.json b/tests/fixtures/wire-shape-baseline.json index 8714d12..a13fb15 100644 --- a/tests/fixtures/wire-shape-baseline.json +++ b/tests/fixtures/wire-shape-baseline.json @@ -212,15 +212,6 @@ "tenant_id" ] }, - "BudgetAlert": { - "sdk_only": [], - "spec_only": [ - "acknowledged", - "id", - "message", - "threshold" - ] - }, "BudgetStatus": { "sdk_only": [], "spec_only": [ @@ -558,14 +549,6 @@ "workflow_id" ] }, - "WebhookSubscription": { - "sdk_only": [], - "spec_only": [ - "org_id", - "secret", - "tenant_id" - ] - }, "WorkflowStatusResponse": { "sdk_only": [], "spec_only": [ From 09252f7604e736cf79b8d88259bffbfda07cc4dd Mon Sep 17 00:00:00 2001 From: Saurabh Jain Date: Sat, 25 Apr 2026 11:45:38 +0200 Subject: [PATCH 5/6] fix(wire-shape): validator now discovers unannotated Jackson fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Java validator's source-discovery only saw fields with @JsonProperty(...) annotations. Many Java SDK POJOs declare fields without the annotation and rely on Jackson's default name-mapping (which works correctly when the Java field name matches the wire key — `id`, `threshold`, `message`, `name`, etc.). The validator under-counted SDK fields in those cases. Three changes to lib.py: - _FIELD_DECL_RE matches plain field declarations: modifier-prefix + type-expression + field-name + `;`/`=`. Captures the modifier sequence so the next change can post-filter. - Filter out declarations whose modifier sequence contains `static` (those are class-level constants — never wire-serialized). - _extract_types_from_java now collects both annotated (@JsonProperty) AND plain field decls. Each plain field's "claim" on a preceding @JsonProperty within a 250-char lookback window is consumed — whichever annotation/field pair is closest. Annotation wins on the wire-name when a field is annotated; field name is used directly when the field is unannotated. Synthetic tests cover: - Mixed annotated + unannotated POJO (BudgetAlert pattern) - `private static final String CONST = "..."` excluded - Methods (have `(`) excluded - Generics like `Map` and `List` - Record params with `@JsonProperty` - Class with both constants and fields Effect on Java SDK baseline: - registered_types: 70 → 73 (3 types newly visible; previously had no @JsonProperty annotations so the validator skipped them) - per_type_drift: 37 → 39 (newly visible types surfaced 2 more drift entries; previously hidden coverage gaps) The drift increase is the validator working correctly for the first time — the previous count was an under-measurement, not a real "no drift" signal. --- scripts/wire_shape/lib.py | 76 +++++++++++++++++- tests/fixtures/wire-shape-baseline.json | 100 +++++++++++------------- 2 files changed, 122 insertions(+), 54 deletions(-) diff --git a/scripts/wire_shape/lib.py b/scripts/wire_shape/lib.py index 3111966..63b0dab 100755 --- a/scripts/wire_shape/lib.py +++ b/scripts/wire_shape/lib.py @@ -54,6 +54,34 @@ class whose body contains them. ) _JSON_PROPERTY_RE = re.compile(r'@JsonProperty\(\s*"([^"]+)"\s*\)') +# Matches a Java instance field declaration that uses Jackson's +# default field-name mapping (no @JsonProperty alias). The pattern: +# - the leading modifier sequence (group 1) — captured so we can +# post-filter declarations that include `static` (constants +# don't go on the wire) +# - a type expression: identifier with optional `.qualifier`, +# optional `<...>` generics, optional `[]` array +# - the field name (group 2) +# - terminated by `;` or `=` (initializer) +# +# `static` IS allowed in the modifier set (otherwise the regex would +# anchor at `final` in `private static final ...`); we drop the +# match in code when `static` appears in group 1. +# Methods are excluded by the absence of `(` before the terminator. +_FIELD_DECL_RE = re.compile( + r"((?:(?:private|protected|public|static|final|transient|volatile)\s+)+)" + r"(?:[\w.]+(?:\s*<[^<>;]*>)?(?:\s*\[\s*\])?)\s+" + r"(\w+)\s*[;=]" +) +# Cap on lookback distance from a field declaration to a +# preceding @JsonProperty(...) annotation: if the most recent +# annotation sits within this many characters of the field's +# declaration position, the field is considered "annotated" and we +# defer to the @JsonProperty value rather than the field name. +# 250 chars covers an annotation on the line immediately above plus +# JavaDoc and other annotations like @JsonInclude in between. +_JSON_PROPERTY_LOOKBACK = 250 + def load_all_schemas(spec_dir: Path) -> tuple[ dict[str, list[str]], @@ -258,12 +286,58 @@ def _extract_types_from_java(content: str) -> dict[str, list[str]]: # blanked range (string literal, comment, text block) — the # cleaner replaces those with whitespace, so cleaned[m.start()] # is a space rather than the original `@`. - props: list[tuple[int, str]] = [ + annot_props: list[tuple[int, str]] = [ (m.start(), m.group(1)) for m in _JSON_PROPERTY_RE.finditer(content) if m.start() < len(cleaned) and cleaned[m.start()] == "@" ] + # Plain (unannotated) field declarations. Jackson defaults to + # using the field name as the wire key when @JsonProperty is + # absent. Without this discovery, the wire-shape gate + # under-counts SDK fields wherever the SDK relies on Jackson's + # default name-mapping (which is correct for fields whose Java + # name already matches the wire — `id`, `threshold`, `message`, + # etc.). Match on the cleaned text so commented-out or in-string + # field-shaped patterns don't false-fire. Skip declarations + # whose modifier sequence contains `static` — those are + # class-level constants, never serialized. + field_decls = [ + fm for fm in _FIELD_DECL_RE.finditer(cleaned) + if "static" not in fm.group(1) + ] + + # Build the merged property stream. Each entry is (position, + # wire_name). For plain field decls, "absorb" any @JsonProperty + # whose position falls within the look-back window — that pair + # is annotated and the annotation's wire name takes precedence. + consumed_annot_indices: set[int] = set() + plain_props: list[tuple[int, str]] = [] + for fm in field_decls: + field_pos = fm.start() + field_name = fm.group(2) + # Find the latest annotation that hasn't been consumed and is + # within the lookback window. + annotated = False + for idx, (ap, _) in enumerate(annot_props): + if idx in consumed_annot_indices: + continue + if ap >= field_pos: + break + if field_pos - ap <= _JSON_PROPERTY_LOOKBACK: + consumed_annot_indices.add(idx) + annotated = True + # Don't break — multiple annotations can stack + # (e.g. @JsonInclude + @JsonProperty); keep marking + # them consumed so they don't double-attribute. + if not annotated: + plain_props.append((field_pos, field_name)) + + # The full property stream is annotation-derived names + plain + # field names, ordered by source position so the depth walker + # attributes them in declaration order. + props: list[tuple[int, str]] = sorted(annot_props + plain_props) + # Walk the cleaned source. Two queues: decls already seen but # whose body `{` hasn't been hit yet (pending), and active type # frames (stack). Annotations attribute to the innermost pending diff --git a/tests/fixtures/wire-shape-baseline.json b/tests/fixtures/wire-shape-baseline.json index a13fb15..c5457f6 100644 --- a/tests/fixtures/wire-shape-baseline.json +++ b/tests/fixtures/wire-shape-baseline.json @@ -202,23 +202,14 @@ "spec_only": [] }, "Budget": { - "sdk_only": [], + "sdk_only": [ + "enabled" + ], "spec_only": [ - "id", - "name", "org_id", - "period", - "scope", "tenant_id" ] }, - "BudgetStatus": { - "sdk_only": [], - "spec_only": [ - "budget", - "percentage" - ] - }, "CancelPlanResponse": { "sdk_only": [ "message" @@ -260,16 +251,8 @@ "organization_id" ], "spec_only": [ - "action", - "category", - "description", - "enabled", - "name", - "pattern", "priority", - "severity", - "tags", - "tier" + "tags" ] }, "CreateWorkflowResponse": { @@ -283,18 +266,15 @@ }, "DynamicPolicy": { "sdk_only": [ + "category", "created_at", "organization_id", + "priority", + "tier", + "type", "updated_at" ], - "spec_only": [ - "actions", - "conditions", - "description", - "enabled", - "id", - "name" - ] + "spec_only": [] }, "DynamicPolicyInfo": { "sdk_only": [ @@ -427,6 +407,15 @@ "workflow_execution_id" ] }, + "PolicyAction": { + "sdk_only": [ + "value" + ], + "spec_only": [ + "config", + "type" + ] + }, "PolicyInfo": { "sdk_only": [ "code_artifact", @@ -446,7 +435,9 @@ ] }, "PolicyOverride": { - "sdk_only": [], + "sdk_only": [ + "active" + ], "spec_only": [ "enabled_override", "id" @@ -462,8 +453,7 @@ "change_summary", "id", "policy_id", - "snapshot", - "version" + "snapshot" ] }, "ResumePlanResponse": { @@ -482,19 +472,8 @@ "StaticPolicy": { "sdk_only": [], "spec_only": [ - "action", - "category", - "description", - "enabled", - "id", - "name", - "override", - "pattern", "policy_id", - "priority", - "severity", - "tier", - "version" + "priority" ] }, "StepGateRequest": { @@ -517,31 +496,37 @@ "metadata" ] }, + "UpdateStaticPolicyRequest": { + "sdk_only": [ + "category" + ], + "spec_only": [ + "priority", + "tags" + ] + }, "UsageBreakdown": { "sdk_only": [ + "period", "period_end", "period_start" ], - "spec_only": [ - "items" - ] + "spec_only": [] }, "UsageBreakdownItem": { "sdk_only": [], "spec_only": [ - "group_by", - "percentage" + "group_by" ] }, "UsageRecord": { - "sdk_only": [], + "sdk_only": [ + "timestamp" + ], "spec_only": [ "created_at", "error_message", - "id", "latency_ms", - "model", - "provider", "success", "team_id", "tenant_id", @@ -549,6 +534,12 @@ "workflow_id" ] }, + "UsageSummary": { + "sdk_only": [ + "period" + ], + "spec_only": [] + }, "WorkflowStatusResponse": { "sdk_only": [], "spec_only": [ @@ -608,12 +599,14 @@ "PlanVersionEntry", "PlanVersionsResponse", "PlatformCapability", + "PolicyAction", "PolicyEvaluationResult", "PolicyInfo", "PolicyMatch", "PolicyMatchInfo", "PolicyOverride", "PolicyVersion", + "PricingInfo", "RateLimitInfo", "ResumeFromCheckpointResponse", "ResumePlanResponse", @@ -627,6 +620,7 @@ "ToolContext", "UpdatePlanRequest", "UpdatePlanResponse", + "UpdateStaticPolicyRequest", "UpdateWebhookRequest", "UsageBreakdown", "UsageBreakdownItem", From 9ab61c594697b8bf88cf3f82bd402142cb3c923c Mon Sep 17 00:00:00 2001 From: Saurabh Jain Date: Sat, 25 Apr 2026 12:23:26 +0200 Subject: [PATCH 6/6] fix(types): WebhookSubscription equality is now identity-based on id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier in this PR I added @JsonProperty("secret"), @JsonProperty("tenant_id"), and @JsonProperty("org_id") to WebhookSubscription's @JsonCreator constructor and folded them into equals()/hashCode()/toString(). The field additions are additive, but the equality contract change is not — and the user review caught it. Concrete impact: WebhookSubscription localView = new WebhookSubscription( "wh-1", "https://example.com", List.of("e"), true, "2026-01-01", "2026-01-01"); // 6-arg ctor WebhookSubscription serverView = client.getWebhook("wh-1"); // includes secret etc. // Pre-PR: localView.equals(serverView) → true // After the field-addition step: localView.equals(serverView) → false // localView.hashCode() != serverView.hashCode() // → Set membership broken, Map keying broken, identity caches split. Fix in this commit: equals() and hashCode() now use only `id`. A WebhookSubscription is an entity, not a value object — two instances with the same id represent the same subscription, regardless of which fields are populated in this particular view. Callers needing content-equality (e.g. detecting a rotated secret) should compare getters directly. This restores the additive claim of the PR (no observable behavior change for callers comparing the same logical webhook), while preserving the value-add of exposing secret + scoping fields for HMAC verification. toString() is unchanged (still shows full state with secret redacted as '***'). Tests: 1200 pass. --- CHANGELOG.md | 4 +++ .../sdk/types/webhook/WebhookTypes.java | 27 ++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f09ad11..799c910 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **`WebhookSubscription.secret`** — HMAC-SHA256 signing key now exposed on the response from `createWebhook`. Required to verify the `X-AxonFlow-Signature` header on inbound webhook deliveries; without it, callers can't validate payload authenticity. Also adds `tenantId` and `orgId` (ownership scoping). The 6-arg constructor is preserved as a source-compat overload that delegates to the 9-arg with nulls for the new fields. `toString()` redacts `secret` to avoid log leakage. - **`BudgetAlert.acknowledged`** — alert dismissal flag. Also adds `@JsonProperty` annotations on previously-unannotated fields (`id`, `threshold`, `message`) so the wire-shape gate can see them; Jackson's default name mapping was correct, but the validator's discovery walks `@JsonProperty` only. +### Changed + +- **`WebhookSubscription` equality is now identity-based on `id`.** A `WebhookSubscription` is an entity, not a value object — two instances with the same `id` represent the same subscription, regardless of whether one view has loaded `secret` (returned by `createWebhook` only) and another has not, or whether `updatedAt`/`active` have moved between fetches. Previously `equals()`/`hashCode()` compared every field; that meant a webhook constructed locally with the legacy 6-arg constructor would have compared **unequal** to the same logical webhook deserialized from a server response that included `secret`/`tenantId`/`orgId`. `Set`, `Map` keying, and identity-tracking caches all break under that semantics. Identity-based equality fixes those at the source. If you need content-equality (e.g. to detect a rotated secret), compare the relevant getters directly. Same change applies to `hashCode()`. `toString()` is unchanged (still includes the full state with `secret` redacted). + ## [5.7.0] - 2026-04-22 ### Added diff --git a/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java b/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java index 1614a76..041ef00 100644 --- a/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java +++ b/src/main/java/com/getaxonflow/sdk/types/webhook/WebhookTypes.java @@ -261,25 +261,32 @@ public String getUpdatedAt() { return updatedAt; } + /** + * Identity-based equality on {@code id}. + * + *

A {@code WebhookSubscription} is an entity, not a value object — two + * instances with the same {@code id} represent the same subscription on + * the server, regardless of whether one view has loaded {@code secret} + * (returned by {@code createWebhook} only) and another has not, or + * whether {@code updatedAt} or {@code active} have moved between + * fetches. Field-by-field equality would split same-id views into + * different objects and break {@code Set}/{@code Map} membership and + * cache invalidation in caller code. + * + *

If you need content-equality (for example to detect rotated + * secrets), compare the relevant getters directly. + */ @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; WebhookSubscription that = (WebhookSubscription) o; - return active == that.active - && Objects.equals(id, that.id) - && Objects.equals(url, that.url) - && Objects.equals(events, that.events) - && Objects.equals(tenantId, that.tenantId) - && Objects.equals(orgId, that.orgId) - && Objects.equals(secret, that.secret) - && Objects.equals(createdAt, that.createdAt) - && Objects.equals(updatedAt, that.updatedAt); + return Objects.equals(id, that.id); } @Override public int hashCode() { - return Objects.hash(id, url, events, active, tenantId, orgId, secret, createdAt, updatedAt); + return Objects.hash(id); } @Override