Skip to content

Commit e0659e0

Browse files
committed
fix(mcp-proxy): harden verification integrity
Align compound risk inference with policy risk rank so destructive matches win over financial or production terms. Expand offline DecisionReceipt cross-checks to include risk class and policy context hash. Action and resource receipt fields remain intentionally out of scope because the backend sees privacy-filtered values and local export does not reconstruct that filter. Export evidence using stored prev_event_hash values and fail clearly when a stored chain link diverges from the expected chain. The chain-break message tells operators to run doctor or inspect evidence DB integrity. Warn verify users when no explicit --trusted-signer-did is provided and verification falls back to the bundle's embedded signer list. Implemented with assistance from Codex.
1 parent 8a47437 commit e0659e0

6 files changed

Lines changed: 234 additions & 26 deletions

File tree

agentveil_mcp_proxy/classification.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,13 @@ def infer_risk_class(
214214
text_parts.append(environment)
215215
text = " ".join(text_parts).lower()
216216
tokens = tuple(item for item in re.split(r"[^a-z0-9]+", text) if item)
217+
# Keep compound-keyword inference aligned with policy._RISK_RANK.
218+
if _has_prefix(tokens, _DESTRUCTIVE_PREFIXES):
219+
return RiskClass.DESTRUCTIVE
217220
if _has_prefix(tokens, _FINANCIAL_WORDS):
218221
return RiskClass.FINANCIAL
219222
if _has_prefix(tokens, _PRODUCTION_WORDS):
220223
return RiskClass.PRODUCTION
221-
if _has_prefix(tokens, _DESTRUCTIVE_PREFIXES):
222-
return RiskClass.DESTRUCTIVE
223224
if _has_prefix(tokens, _WRITE_PREFIXES):
224225
return RiskClass.WRITE
225226
if _has_prefix(tokens, _READ_PREFIXES):

agentveil_mcp_proxy/cli.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
)
3636
from agentveil_mcp_proxy.classification import ToolCallClassifier
3737
from agentveil_mcp_proxy.evidence import (
38+
ApprovalEvidenceError,
3839
ApprovalEvidenceStore,
3940
EvidenceExportError,
4041
EvidenceVerificationError,
@@ -71,6 +72,10 @@
7172
REISSUE_GRANT_FORCE_THRESHOLD_SECONDS = 24 * 60 * 60
7273
DEFAULT_ALLOWED_CATEGORIES = ("mcp_proxy",)
7374
DEFAULT_EVIDENCE_VACUUM_MAX_AGE_DAYS = 90
75+
DEFAULT_TRUST_FROM_BUNDLE_WARNING = (
76+
"default_trust_from_bundle: trusting bundle's embedded signer list; "
77+
"pass --trusted-signer-did to verify against your own pinned set"
78+
)
7479
AGENTVEIL_DEV_SIGNER_DIDS = (
7580
"did:key:z6MkkvQQ9SxaNX9eEVHd5NtEamVY3YiZSpHZE567Vxs5jQQ3",
7681
"did:key:z6Mkjw22249tpNN4LJGLyq1oGSq1Skh3ks94fiMrgi4oqveo",
@@ -881,17 +886,21 @@ def verify_evidence(
881886
"""Verify an evidence bundle offline."""
882887

883888
out = out or sys.stdout
889+
explicit_trusted_signers = tuple(trusted_signer_dids or ())
884890
result = verify_evidence_bundle_file(
885891
bundle_path,
886-
trusted_signer_dids=trusted_signer_dids,
892+
trusted_signer_dids=explicit_trusted_signers,
887893
)
894+
warnings = list(result.warnings)
895+
if not explicit_trusted_signers:
896+
warnings.append(DEFAULT_TRUST_FROM_BUNDLE_WARNING)
888897
if output_format == "json":
889898
print(json.dumps({
890899
"status": "ok",
891900
"record_count": result.record_count,
892901
"signed_receipt_count": result.signed_receipt_count,
893902
"unverified_receipt_count": result.unverified_receipt_count,
894-
"warnings": list(result.warnings),
903+
"warnings": warnings,
895904
"chain_root_hash": result.chain_root_hash,
896905
}, sort_keys=True), file=out)
897906
else:
@@ -907,7 +916,7 @@ def verify_evidence(
907916
"but no matching signed receipt in bundle",
908917
file=out,
909918
)
910-
for warning in result.warnings:
919+
for warning in warnings:
911920
print(f"WARN: {warning}", file=out)
912921
return 0
913922

@@ -1187,7 +1196,7 @@ def main(argv: list[str] | None = None) -> int:
11871196
before=args.before,
11881197
)
11891198
return 0
1190-
except (ProxyCliError, EvidenceExportError, EvidenceVerificationError) as exc:
1199+
except (ProxyCliError, ApprovalEvidenceError, EvidenceExportError, EvidenceVerificationError) as exc:
11911200
print(f"ERROR: {exc}", file=sys.stderr)
11921201
return exc.exit_code if isinstance(exc, ProxyCliError) else 1
11931202
raise AssertionError(f"unhandled command: {args.command}")

agentveil_mcp_proxy/evidence/proof.py

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121

2222

2323
EVIDENCE_EXPORT_SCHEMA_VERSION = 1
24+
_RECEIPT_RECORD_CROSS_CHECK_FIELDS = (
25+
("payload_hash", "payload_hash"),
26+
("risk_class", "client_risk_class"),
27+
("policy_context_hash", "client_policy_context_hash"),
28+
)
2429

2530

2631
class EvidenceProofError(RuntimeError):
@@ -82,7 +87,8 @@ def build_evidence_bundle(
8287
request_ids=request_ids,
8388
)
8489
signed_receipts: dict[str, str] = {}
85-
export_records = _bundle_records(records)
90+
has_filter = since_timestamp is not None or until_timestamp is not None or request_ids is not None
91+
export_records = _bundle_records(records, require_genesis=not has_filter)
8692
unverified_receipt_count = 0
8793
for record in records:
8894
if not record.decision_audit_id or not record.decision_receipt_sha256:
@@ -164,7 +170,12 @@ def verify_evidence_bundle(
164170
records = bundle.get("records")
165171
if not isinstance(records, list):
166172
raise EvidenceVerificationError("records must be a list")
167-
expected_prev = GENESIS_PREV_EVENT_HASH
173+
has_filter = _bundle_uses_filter(bundle.get("filter"))
174+
expected_prev = (
175+
records[0].get("prev_event_hash")
176+
if records and has_filter and isinstance(records[0], dict)
177+
else GENESIS_PREV_EVENT_HASH
178+
)
168179
last_hash = GENESIS_PREV_EVENT_HASH
169180
for index, record in enumerate(records):
170181
if not isinstance(record, dict):
@@ -222,10 +233,15 @@ def verify_evidence_bundle(
222233
if receipt_digest not in verified_bodies:
223234
continue
224235
receipt_body = verified_bodies[receipt_digest]
225-
expected_payload = record.get("payload_hash")
226-
receipt_payload = receipt_body.get("payload_hash")
227-
if receipt_payload is not None and receipt_payload != expected_payload:
228-
raise EvidenceVerificationError("DecisionReceipt payload_hash mismatch")
236+
for record_field, receipt_field in _RECEIPT_RECORD_CROSS_CHECK_FIELDS:
237+
record_value = record.get(record_field)
238+
receipt_value = receipt_body.get(receipt_field)
239+
if record_value is None or receipt_value is None:
240+
continue
241+
if receipt_value != record_value:
242+
raise EvidenceVerificationError(
243+
f"DecisionReceipt {receipt_field} mismatch with record {record_field}"
244+
)
229245

230246
return EvidenceVerificationResult(
231247
valid=True,
@@ -254,18 +270,36 @@ def verify_evidence_bundle_file(
254270
return verify_evidence_bundle(bundle, trusted_signer_dids=trusted_signer_dids)
255271

256272

257-
def _bundle_records(records: list[PendingApproval]) -> list[dict[str, Any]]:
273+
def _bundle_records(records: list[PendingApproval], *, require_genesis: bool = True) -> list[dict[str, Any]]:
258274
export_records: list[dict[str, Any]] = []
259-
prev_hash = GENESIS_PREV_EVENT_HASH
275+
expected_prev_hash = (
276+
GENESIS_PREV_EVENT_HASH
277+
if require_genesis or not records
278+
else records[0].prev_event_hash
279+
)
260280
for record in records:
281+
if record.prev_event_hash != expected_prev_hash:
282+
raise EvidenceExportError(
283+
f"chain integrity broken at request_id {record.request_id}: "
284+
"stored prev_event_hash diverges from expected chain link; "
285+
"run doctor or inspect evidence DB integrity"
286+
)
261287
data = asdict(record)
262-
data["prev_event_hash"] = prev_hash
263288
data["record_hash"] = record_hash(data)
264-
prev_hash = data["record_hash"]
289+
expected_prev_hash = data["record_hash"]
265290
export_records.append(data)
266291
return export_records
267292

268293

294+
def _bundle_uses_filter(filter_value: Any) -> bool:
295+
if not isinstance(filter_value, Mapping):
296+
return False
297+
return any(
298+
filter_value.get(key) is not None
299+
for key in ("since_timestamp", "until_timestamp", "request_ids")
300+
)
301+
302+
269303
def _verify_receipt_with_pinned_signers(
270304
receipt_jcs: str,
271305
trusted_signer_dids: Iterable[str],

docs/MCP_PROXY_OPERATIONS.md

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,23 @@ verification can validate backend signatures. If receipt fetch fails during
8282
export, the bundle remains valid and reports the unverified receipt count.
8383

8484
`verify` performs offline checks only: record hashes, chain linkage, signed
85-
receipt signatures against pinned trusted signer DIDs, and payload-hash binding
86-
between records and DecisionReceipts when those receipts are present. It does
87-
not call the AVP backend.
85+
receipt signatures against pinned trusted signer DIDs, and signed-field binding
86+
between records and DecisionReceipts when those receipts are present. It checks
87+
payload hash, client risk class, and client policy context hash. It does not
88+
call the AVP backend.
89+
90+
Auditors should pass their own pinned signer DID set:
91+
92+
```bash
93+
agentveil-mcp-proxy verify /secure/path/evidence-bundle.json \
94+
--trusted-signer-did did:key:z6Mk...
95+
```
96+
97+
Without `--trusted-signer-did`, verification falls back to the signer list
98+
embedded in the bundle and prints a warning. That mode confirms internal bundle
99+
consistency, but it does not prove the bundle's signer list is the auditor's
100+
trusted set. A malicious bundle can include an attacker-controlled signer DID
101+
and a matching attacker-signed receipt.
88102

89103
To prune old terminal records and rebuild the local chain:
90104

tests/test_mcp_proxy_classification.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,21 @@ def test_risk_inference_covers_core_vocab():
214214
assert infer_risk_class("custom.inspect", tool="custom_action") is RiskClass.UNKNOWN
215215

216216

217+
def test_risk_inference_destructive_wins_over_financial_compounds():
218+
assert infer_risk_class("billing.delete_payment", tool="delete_payment") is RiskClass.DESTRUCTIVE
219+
assert infer_risk_class("billing.drop_billing_table", tool="drop_billing_table") is RiskClass.DESTRUCTIVE
220+
assert infer_risk_class("auth.revoke_payment_token", tool="revoke_payment_token") is RiskClass.DESTRUCTIVE
221+
assert (
222+
infer_risk_class("bank.transfer_to_destroy_account", tool="transfer_to_destroy_account")
223+
is RiskClass.DESTRUCTIVE
224+
)
225+
226+
227+
def test_risk_inference_destructive_wins_over_production_compounds():
228+
assert infer_risk_class("deploy.drop_prod_db", tool="drop_prod_db") is RiskClass.DESTRUCTIVE
229+
assert infer_risk_class("auth.revoke_prod_access", tool="revoke_prod_access") is RiskClass.DESTRUCTIVE
230+
231+
217232
def test_risk_inference_does_not_over_classify_substring_collisions():
218233
assert infer_risk_class("github.get_infrastructure", tool="get_infrastructure") is RiskClass.READ
219234
assert infer_risk_class("github.list_endpoints", tool="list_endpoints") is RiskClass.READ

0 commit comments

Comments
 (0)