Auto-Coding/apps/backend/agents/runtime/capabilities.py at develop · OBenner/Auto-Coding · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
"""
Runtime capability contracts.

The provider layer answers "which model can we call?" The runtime layer answers
"which workspace actions can this session perform safely?"

``RuntimeCapabilities`` describes what the runtime physically supports.
Promotion of a runtime to satisfy a stricter requirement (for example
treating a Generic Edit session as ``full_autonomous`` after a provider
clears the AutonomyPolicy gate) is a separate decision expressed via
``RuntimePolicy``. Capabilities must never claim a flag that the runtime
cannot back; promotion lives in policy so operators can see the two
forces independently.
"""

import warnings
from dataclasses import dataclass


@dataclass(frozen=True)
class RuntimeCapabilities:
    """Capabilities exposed by an agent runtime."""

    text_completion: bool = False
    streaming_text: bool = False
    structured_output: bool = False
    native_tool_loop: bool = False
    function_tools: bool = False
    mcp: bool = False
    filesystem_read: bool = False
    filesystem_edit: bool = False
    shell: bool = False
    apply_patch: bool = False
    subagents: bool = False
    sandbox: bool = False

    @classmethod
    def claude_agent_sdk(cls) -> "RuntimeCapabilities":
        """Capabilities expected from the existing Claude Agent SDK path."""
        return cls(
            text_completion=True,
            streaming_text=True,
            structured_output=True,
            native_tool_loop=True,
            function_tools=True,
            mcp=True,
            filesystem_read=True,
            filesystem_edit=True,
            shell=True,
            apply_patch=True,
            subagents=True,
            sandbox=True,
        )

    @classmethod
    def codex_cli(cls) -> "RuntimeCapabilities":
        """Capabilities expected from Codex CLI exec sessions."""
        return cls(
            text_completion=True,
            streaming_text=True,
            structured_output=True,
            native_tool_loop=True,
            filesystem_read=True,
            filesystem_edit=True,
            shell=True,
            apply_patch=True,
            sandbox=True,
        )

    @classmethod
    def completion_only(cls) -> "RuntimeCapabilities":
        """Capabilities for direct model SDKs and model gateways."""
        return cls(
            text_completion=True,
            streaming_text=True,
            structured_output=True,
        )

    @classmethod
    def patch_proposal(cls) -> "RuntimeCapabilities":
        """Capabilities for validated patch proposal mode."""
        return cls(
            text_completion=True,
            streaming_text=True,
            structured_output=True,
            filesystem_edit=True,
            apply_patch=True,
        )

    @classmethod
    def generic_edit(cls) -> "RuntimeCapabilities":
        """Capabilities for Auto Code's provider-neutral local tool loop."""
        return cls(
            text_completion=True,
            streaming_text=True,
            structured_output=True,
            function_tools=True,
            filesystem_read=True,
            filesystem_edit=True,
            shell=True,
            apply_patch=True,
        )

    @classmethod
    def promoted_edit(cls) -> "RuntimeCapabilities":
        """Capabilities for a Generic Edit session that may be policy-promoted.

        Physically identical to :meth:`generic_edit`. Promotion to satisfy
        ``full_coder``/``planner`` requirements is represented separately
        via :class:`RuntimePolicy` so the capability flags stay honest.
        """
        return cls.generic_edit()

    @classmethod
    def direct_api_autonomous(cls) -> "RuntimeCapabilities":
        """Deprecated alias for :meth:`promoted_edit`.

        The previous implementation set ``native_tool_loop=True`` to make
        :class:`DirectApiAutonomousRuntimeSession` satisfy ``full_coder``
        requirements through the capability check. That conflated
        capability (what the runtime physically does) with policy (whether
        the AutonomyPolicy gate promoted the runtime). Use
        :meth:`promoted_edit` for the honest capability and combine it
        with a :class:`RuntimePolicy` carrying
        ``promoted_to_full_autonomous=True`` for the promotion decision.
        """
        warnings.warn(
            "RuntimeCapabilities.direct_api_autonomous() is deprecated; "
            "use RuntimeCapabilities.promoted_edit() combined with a "
            "RuntimePolicy that carries promoted_to_full_autonomous=True.",
            DeprecationWarning,
            stacklevel=2,
        )
        return cls.promoted_edit()

    def available(self) -> list[str]:
        """Return capability names set to true."""
        return [
            name
            for name, value in self.__dict__.items()
            if isinstance(value, bool) and value
        ]

    def missing(
        self,
        requirements: "RuntimeRequirements",
        *,
        policy: "RuntimePolicy | None" = None,
    ) -> list[str]:
        """Return required capabilities this runtime does not provide.

        ``policy`` may grant the runtime additional satisfied capabilities
        through evidence-based promotion (see :class:`RuntimePolicy`).
        """
        granted = policy.granted_capabilities() if policy is not None else frozenset()
        return [
            capability
            for capability in requirements.required
            if not bool(getattr(self, capability, False)) and capability not in granted
        ]

    def supports(
        self,
        requirements: "RuntimeRequirements",
        *,
        policy: "RuntimePolicy | None" = None,
    ) -> bool:
        """Return true when all required capabilities are available.

        Pass ``policy`` to honor evidence-based promotion (for example a
        Generic Edit runtime that the AutonomyPolicy gate promoted to
        ``full_autonomous`` for a specific provider).
        """
        return not self.missing(requirements, policy=policy)


# Capabilities the promoted-edit policy grants the runtime when the
# AutonomyPolicy gate signs off. ``native_tool_loop`` is granted because
# the underlying Generic Edit engine attempts the provider's native tool
# API and falls back to its JSON action loop when the provider does not
# support tools; the gate evidence proves the provider/model combination
# can drive that loop end-to-end. Promotion alone does NOT grant
# ``subagents`` or ``sandbox``: those still require Phase 1.2 and Phase
# 1.3 capability work in docs/roadmap/non-claude-provider-autonomy.md.
_PROMOTED_FULL_AUTONOMOUS_GRANTS: frozenset[str] = frozenset({"native_tool_loop"})
# Additional capability the policy grants once the external MCP client
# bridge is enabled. Phase 1.1: direct API providers can reach Graphiti,
# Linear, Electron, Puppeteer, and custom MCP servers through the
# provider-neutral bridge so they match the Claude SDK MCP surface for
# tool discovery and invocation.
_MCP_EXECUTION_GRANTS: frozenset[str] = frozenset({"mcp"})
# Phase 1.2: mutating subagents. The orchestrator already produces
# isolated child contexts and per-child artifacts; transactional
# boundaries and conflict-aware merge are tracked separately in the
# runtime_subagent_mutation_policy matrix. This grant lets operators
# (via AutonomyLevel.BOLD or AUTO_CODE_MUTATING_SUBAGENTS=true) opt in
# once the merge protocol scaffolding lands.
_MUTATING_SUBAGENT_GRANTS: frozenset[str] = frozenset({"subagents"})
# Phase 1.3: sandbox. The cross-platform sandbox skeleton in
# ``core/sandbox.py`` detects whether Seatbelt (macOS), bubblewrap
# (Linux), or AppContainer (Windows) is available. The policy grant
# only fires when ``sandbox_enabled=True``, which the autonomy layer
# only sets when (a) the level requests sandboxing and (b) the host
# actually exposes a real backend; otherwise the runtime keeps
# ``sandbox`` missing so the capability error is honest.
_SANDBOX_GRANTS: frozenset[str] = frozenset({"sandbox"})


@dataclass(frozen=True)
class RuntimePolicy:
    """Policy-driven attributes applied on top of :class:`RuntimeCapabilities`.

    Capability and policy are intentionally separate concerns: capability
    describes what the runtime can physically do, policy describes what the
    operator (or an evidence gate) has decided to allow on top of that.
    """

    promoted_to_full_autonomous: bool = False
    mcp_execution_enabled: bool = False
    mutating_subagents_enabled: bool = False
    sandbox_enabled: bool = False

    def granted_capabilities(self) -> frozenset[str]:
        """Return capability names the policy treats as satisfied."""
        granted: set[str] = set()
        if self.promoted_to_full_autonomous:
            granted |= _PROMOTED_FULL_AUTONOMOUS_GRANTS
        if self.mcp_execution_enabled:
            granted |= _MCP_EXECUTION_GRANTS
        if self.mutating_subagents_enabled:
            granted |= _MUTATING_SUBAGENT_GRANTS
        if self.sandbox_enabled:
            granted |= _SANDBOX_GRANTS
        return frozenset(granted)

    def to_dict(self) -> dict[str, object]:
        """Return a JSON-safe policy snapshot."""
        return {
            "promoted_to_full_autonomous": self.promoted_to_full_autonomous,
            "mcp_execution_enabled": self.mcp_execution_enabled,
            "mutating_subagents_enabled": self.mutating_subagents_enabled,
            "sandbox_enabled": self.sandbox_enabled,
            "granted_capabilities": sorted(self.granted_capabilities()),
        }


@dataclass(frozen=True)
class RuntimeRequirements:
    """Capabilities required by an agent phase or execution mode."""

    mode: str
    required: tuple[str, ...]

    @classmethod
    def planner(cls) -> "RuntimeRequirements":
        # The current planner prompt investigates and writes files, so it still
        # needs the full workspace runtime. A future text planner can relax this.
        return cls(
            mode="planner",
            required=(
                "text_completion",
                "structured_output",
                "native_tool_loop",
                "filesystem_read",
                "filesystem_edit",
                "shell",
            ),
        )

    @classmethod
    def full_coder(cls) -> "RuntimeRequirements":
        return cls(
            mode="full_autonomous",
            required=(
                "text_completion",
                "native_tool_loop",
                "filesystem_edit",
                "shell",
            ),
        )

    @classmethod
    def text_only(cls, mode: str = "analysis_only") -> "RuntimeRequirements":
        return cls(mode=mode, required=("text_completion",))

    @classmethod
    def patch_proposal(cls) -> "RuntimeRequirements":
        return cls(
            mode="patch_proposal",
            required=(
                "text_completion",
                "structured_output",
                "filesystem_edit",
                "apply_patch",
            ),
        )

    @classmethod
    def generic_edit(cls) -> "RuntimeRequirements":
        return cls(
            mode="generic_edit",
            required=(
                "text_completion",
                "structured_output",
                "function_tools",
                "filesystem_read",
                "filesystem_edit",
                "shell",
                "apply_patch",
            ),
        )

    @classmethod
    def mutating_subagent(cls) -> "RuntimeRequirements":
        """Requirements for a write-confined mutating child session (Phase 1.2).

        Mutating children edit files only inside their declared write scope,
        so they need the workspace edit surface but NOT ``shell`` — opaque
        commands cannot be scope-confined. ``subagents`` is policy-granted via
        ``RuntimePolicy.mutating_subagents_enabled``, keeping mutating
        children behind the autonomy policy until the operator's level
        enables them.
        """
        return cls(
            mode="mutating_subagent",
            required=(
                "text_completion",
                "structured_output",
                "filesystem_read",
                "filesystem_edit",
                "subagents",
            ),
        )


class RuntimeCapabilityError(RuntimeError):
    """Raised when a runtime cannot satisfy phase requirements."""

    def __init__(
        self,
        *,
        provider_name: str,
        runtime_name: str,
        requirements: RuntimeRequirements,
        capabilities: RuntimeCapabilities,
        policy: RuntimePolicy | None = None,
    ):
        self.provider_name = provider_name
        self.runtime_name = runtime_name
        self.requirements = requirements
        self.capabilities = capabilities
        self.policy = policy
        self.missing = capabilities.missing(requirements, policy=policy)
        super().__init__(self._build_message())

    def _build_message(self) -> str:
        required = "\n".join(f"- {name}" for name in self.requirements.required)
        available_names = self.capabilities.available()
        available = (
            "\n".join(f"- {name}" for name in available_names)
            if available_names
            else "- none"
        )
        missing = "\n".join(f"- {name}" for name in self.missing)
        policy_block = ""
        if self.policy is not None:
            granted = sorted(self.policy.granted_capabilities())
            granted_text = "\n".join(f"- {name}" for name in granted) or "- none"
            policy_block = (
                f"\nPolicy-granted capabilities:\n{granted_text}\n"
                f"Promoted to full autonomous: "
                f"{self.policy.promoted_to_full_autonomous}\n"
            )
        return (
            f"Cannot run {self.requirements.mode} with provider={self.provider_name} "
            f"runtime={self.runtime_name}.\n\n"
            f"Missing capabilities:\n{missing}\n\n"
            f"Required capabilities:\n{required}\n\n"
            f"Available capabilities:\n{available}\n"
            f"{policy_block}\n"
            "Use Claude Agent SDK for full autonomous coding today, or run a "
            "limited generic_edit, patch_proposal, or analysis_only phase with "
            "a compatible runtime."
        )