-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathcodec_consent.py
More file actions
102 lines (88 loc) · 4.25 KB
/
codec_consent.py
File metadata and controls
102 lines (88 loc) · 4.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""codec_consent — strict-consent gate for the chat + MCP skill paths.
Re-audit (red-team CHAIN-001/002/006): the Step-3 consent gate
(docs/PHASE1-STEP3-DESIGN.md §1.7) was wired ONLY into codec_agent_runner. The
chat ([SKILL:] tag + pre-LLM hijack → codec_dispatch.run_skill) and MCP
(codec_mcp.tool_fn) paths could reach high-power skills with only the
`is_dangerous` heuristic / path blocklists. This module is the shared
classifier + per-transport policy:
- MCP → hard-refuse destructive skills (claude.ai can't consent at the
operator tier; consistent with the _HTTP_BLOCKED principle).
- chat → require explicit confirmation (the handler returns consent_required;
the user confirms; re-dispatch carries a token).
- voice/agent → existing ask_user announce-and-listen (unchanged).
A skill is "destructive" if it declares `SKILL_DESTRUCTIVE = True`
(registry-AST-extracted — the extensible per-skill path, Decision C), OR is in
`codec_config._HTTP_BLOCKED`, OR is one of the known high-power built-ins below
(so coverage doesn't depend on regenerating the hash-pinned skill manifest).
Kill switch: `CONSENT_GATE_ENABLED=false`.
"""
import os
__all__ = ["gate_enabled", "is_destructive_skill", "chat_consent_ok", "mcp_refuse_message"]
# Known high-power built-ins that are destructive but NOT in _HTTP_BLOCKED.
# (terminal / python_exec / process_manager / pm2_control / ax_control are
# already covered by the _HTTP_BLOCKED backstop.)
_DESTRUCTIVE_BUILTINS = frozenset({
"file_ops", # write/append/delete to the filesystem
"file_write", # writes files
"imessage_send", # sends messages as the user
"pilot", # drives a real browser session
"skill_forge", # writes a skill to disk (no review gate)
})
def gate_enabled() -> bool:
"""Consent gate on by default; CONSENT_GATE_ENABLED=false disables it."""
return os.environ.get("CONSENT_GATE_ENABLED", "true").lower() != "false"
def is_destructive_skill(tool_name, registry=None) -> bool:
"""True if `tool_name` is a high-power/destructive skill needing consent
(chat) or refusal (MCP). Never raises."""
if not tool_name:
return False
# 1) per-skill SKILL_DESTRUCTIVE flag (extensible — user skills opt in)
try:
reg = registry
if reg is None:
from codec_dispatch import registry as reg # the singleton
if reg is not None and reg.get_destructive(tool_name):
return True
except Exception:
pass
# 2) _HTTP_BLOCKED backstop (terminal, python_exec, process_manager, …)
try:
from codec_config import _HTTP_BLOCKED
if tool_name in _HTTP_BLOCKED:
return True
except Exception:
pass
# 3) known high-power built-ins
return tool_name in _DESTRUCTIVE_BUILTINS
def chat_consent_ok(tool_name, query, *, registry=None) -> bool:
"""Chat path (A2): a destructive skill requires explicit consent via the
existing AskUserQuestion PWA panel (Phase 1 Step 3 §1.7 — literal verb-match;
generic yes/ok rejected). Returns True if the skill may run (non-destructive,
gate disabled, or consent granted); False if blocked (declined / timeout /
ask_user unavailable). BLOCKS the worker thread on ask_user until answered —
the chat handler invokes this via asyncio.to_thread, so the event loop isn't
blocked. Fail-closed: any error → False (a destructive skill never
auto-runs)."""
if not gate_enabled() or not is_destructive_skill(tool_name, registry=registry):
return True
try:
import codec_ask_user
answer = codec_ask_user.ask(
f"CODEC wants to run the '{tool_name}' skill — a destructive / "
f"high-power operation — for: {(query or '')[:200]}",
destructive=True,
asked_from="chat",
tool_name=tool_name,
)
return answer not in (
codec_ask_user.TIMEOUT_SENTINEL,
codec_ask_user.DISABLED_SENTINEL,
)
except Exception:
return False
def mcp_refuse_message(tool_name) -> str:
return (
f"Skill '{tool_name}' is a destructive/high-power operation and is not "
"permitted over MCP. Run it locally (chat or voice), where the operator "
"can confirm it."
)