Skip to content

Commit 1da233f

Browse files
feat(box): unify native agent tools around exec/read/write/edit
1 parent bdfc0f1 commit 1da233f

10 files changed

Lines changed: 519 additions & 114 deletions

File tree

src/langbot/pkg/box/policy.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Three-layer security policy for LangBot Box.
2+
3+
The design separates concerns into three independent layers, aligned with
4+
OpenCode / OpenClaw patterns:
5+
6+
1. **SandboxPolicy** – *where* tools run (host vs sandbox).
7+
2. **ToolPolicy** – *which* tools are allowed (allow/deny lists).
8+
3. **ElevatedPolicy** – *whether* a single exec call may temporarily
9+
escape the default sandbox boundary.
10+
11+
These three layers are orthogonal:
12+
- ToolPolicy is a hard boundary; ``elevated`` cannot bypass a denied tool.
13+
- SandboxPolicy decides the default execution location.
14+
- ElevatedPolicy only affects ``exec`` and only when the framework allows it.
15+
"""
16+
17+
from __future__ import annotations
18+
19+
import enum
20+
from typing import Sequence
21+
22+
23+
# ── Layer 1: Sandbox Policy ──────────────────────────────────────────
24+
25+
26+
class SandboxMode(str, enum.Enum):
27+
"""Determines when agent execution is routed through the sandbox."""
28+
29+
OFF = 'off'
30+
"""Sandbox disabled; all exec runs on the host."""
31+
32+
NON_DEFAULT = 'non_default'
33+
"""Only non-default sessions are sandboxed (e.g. sub-agents, MCP)."""
34+
35+
ALL = 'all'
36+
"""Every agent exec call is routed through the sandbox."""
37+
38+
39+
class SandboxPolicy:
40+
"""Decides whether a given execution context should use the sandbox."""
41+
42+
def __init__(self, mode: SandboxMode = SandboxMode.ALL):
43+
self.mode = mode
44+
45+
def should_sandbox(self, *, is_default_session: bool = True) -> bool:
46+
if self.mode == SandboxMode.OFF:
47+
return False
48+
if self.mode == SandboxMode.ALL:
49+
return True
50+
# NON_DEFAULT: sandbox everything except the default session
51+
return not is_default_session
52+
53+
54+
# ── Layer 2: Tool Policy ─────────────────────────────────────────────
55+
56+
57+
class ToolPolicy:
58+
"""Controls which tools are available to the current agent/session.
59+
60+
Rules:
61+
- ``deny`` always takes precedence over ``allow``.
62+
- An empty ``allow`` list means "all tools allowed" (no allowlist filter).
63+
- ``elevated`` cannot bypass a denied tool.
64+
"""
65+
66+
def __init__(
67+
self,
68+
allow: Sequence[str] = (),
69+
deny: Sequence[str] = (),
70+
):
71+
self._allow: frozenset[str] = frozenset(allow)
72+
self._deny: frozenset[str] = frozenset(deny)
73+
74+
def is_tool_allowed(self, tool_name: str) -> bool:
75+
if tool_name in self._deny:
76+
return False
77+
if self._allow and tool_name not in self._allow:
78+
return False
79+
return True
80+
81+
82+
# ── Layer 3: Elevated Policy ─────────────────────────────────────────
83+
84+
85+
class ElevatedPolicy:
86+
"""Controls whether ``exec`` may request temporary privilege escalation.
87+
88+
``elevated`` only applies to the ``exec`` tool. It means "run this
89+
command outside the default sandbox boundary" (e.g. with network, or
90+
on the host). The framework decides whether to honor the request.
91+
"""
92+
93+
def __init__(self, *, allow_elevated: bool = False, require_approval: bool = True):
94+
self.allow_elevated = allow_elevated
95+
self.require_approval = require_approval
96+
97+
def is_elevation_permitted(self) -> bool:
98+
return self.allow_elevated

src/langbot/pkg/box/service.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,22 @@ async def execute_spec_payload(
105105
)
106106
return self._serialize_result(result)
107107

108-
async def execute_sandbox_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
109-
spec_payload = dict(parameters)
108+
async def execute_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
109+
"""Execute an agent-facing ``exec`` tool call.
110+
111+
Translates the agent-facing ``command`` field to the internal
112+
``BoxSpec.cmd`` field and injects the session id from the query.
113+
"""
114+
spec_payload: dict = {'cmd': parameters['command']}
115+
116+
# Pass through allowed agent-facing fields
117+
for key in ('workdir', 'timeout_sec', 'env'):
118+
if key in parameters:
119+
spec_payload[key] = parameters[key]
120+
121+
# Inject context the agent must not control
110122
spec_payload.setdefault('session_id', str(query.query_id))
123+
111124
return await self.execute_spec_payload(spec_payload, query)
112125

113126
async def shutdown(self):
@@ -379,23 +392,23 @@ def get_recent_errors(self) -> list[dict]:
379392
return list(self._recent_errors)
380393

381394
def get_system_guidance(self) -> str:
382-
"""Return LLM system-prompt guidance for sandbox_exec.
395+
"""Return LLM system-prompt guidance for the exec tool.
383396
384-
All sandbox-specific prompt text is kept here so that callers
397+
All execution-specific prompt text is kept here so that callers
385398
(e.g. LocalAgentRunner) stay free of box domain knowledge.
386399
"""
387400
guidance = (
388-
'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
401+
'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, '
389402
'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
390-
'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
403+
'JSON, or other data and asks for a computed answer, prefer running a short Python script via exec '
391404
'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation '
392405
'details, do not include the generated script in the final answer; return the result and a brief explanation only.'
393406
)
394407
if self.default_host_workspace:
395408
guidance += (
396-
' A default host workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
397-
'modify local files in the working directory, use sandbox_exec with /workspace paths directly; do not ask the '
398-
'user for sandbox parameters such as host_path unless they explicitly need a different directory.'
409+
' A default workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
410+
'modify local files in the working directory, use exec with /workspace paths directly; do not ask the '
411+
'user for directory parameters unless they explicitly need a different directory.'
399412
)
400413
return guidance
401414

src/langbot/pkg/provider/runners/localagent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import typing
66
from .. import runner
77
from ..modelmgr import requester as modelmgr_requester
8-
from ..tools.loaders.native import SANDBOX_EXEC_TOOL_NAME
8+
from ..tools.loaders.native import EXEC_TOOL_NAME
99
import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
1010
import langbot_plugin.api.entities.builtin.provider.message as provider_message
1111
import langbot_plugin.api.entities.builtin.rag.context as rag_context
@@ -37,7 +37,7 @@ def _build_request_messages(
3737
) -> list[provider_message.Message]:
3838
req_messages = query.prompt.messages.copy() + query.messages.copy()
3939

40-
if any(getattr(tool, 'name', None) == SANDBOX_EXEC_TOOL_NAME for tool in query.use_funcs or []):
40+
if any(getattr(tool, 'name', None) == EXEC_TOOL_NAME for tool in query.use_funcs or []):
4141
req_messages.append(
4242
provider_message.Message(
4343
role='system',

0 commit comments

Comments
 (0)