langbot-app
diff --git a/‎src/langbot/pkg/box/policy.py‎
Lines changed: 98 additions & 0 deletions b/‎src/langbot/pkg/box/policy.py‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎src/langbot/pkg/box/service.py‎
Lines changed: 22 additions & 9 deletions b/‎src/langbot/pkg/box/service.py‎
Lines changed: 22 additions & 9 deletions
diff --git a/‎src/langbot/pkg/provider/runners/localagent.py‎
Lines changed: 2 additions & 2 deletions b/‎src/langbot/pkg/provider/runners/localagent.py‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,98 @@
+"""Three-layer security policy for LangBot Box.
+
+The design separates concerns into three independent layers, aligned with
+OpenCode / OpenClaw patterns:
+
+1. **SandboxPolicy** – *where* tools run (host vs sandbox).
+2. **ToolPolicy** – *which* tools are allowed (allow/deny lists).
+3. **ElevatedPolicy** – *whether* a single exec call may temporarily
+   escape the default sandbox boundary.
+
+These three layers are orthogonal:
+- ToolPolicy is a hard boundary; ``elevated`` cannot bypass a denied tool.
+- SandboxPolicy decides the default execution location.
+- ElevatedPolicy only affects ``exec`` and only when the framework allows it.
+"""
+
+from __future__ import annotations
+
+import enum
+from typing import Sequence
+
+
+# ── Layer 1: Sandbox Policy ──────────────────────────────────────────
+
+
+class SandboxMode(str, enum.Enum):
+    """Determines when agent execution is routed through the sandbox."""
+
+    OFF = 'off'
+    """Sandbox disabled; all exec runs on the host."""
+
+    NON_DEFAULT = 'non_default'
+    """Only non-default sessions are sandboxed (e.g. sub-agents, MCP)."""
+
+    ALL = 'all'
+    """Every agent exec call is routed through the sandbox."""
+
+
+class SandboxPolicy:
+    """Decides whether a given execution context should use the sandbox."""
+
+    def __init__(self, mode: SandboxMode = SandboxMode.ALL):
+        self.mode = mode
+
+    def should_sandbox(self, *, is_default_session: bool = True) -> bool:
+        if self.mode == SandboxMode.OFF:
+            return False
+        if self.mode == SandboxMode.ALL:
+            return True
+        # NON_DEFAULT: sandbox everything except the default session
+        return not is_default_session
+
+
+# ── Layer 2: Tool Policy ─────────────────────────────────────────────
+
+
+class ToolPolicy:
+    """Controls which tools are available to the current agent/session.
+
+    Rules:
+    - ``deny`` always takes precedence over ``allow``.
+    - An empty ``allow`` list means "all tools allowed" (no allowlist filter).
+    - ``elevated`` cannot bypass a denied tool.
+    """
+
+    def __init__(
+        self,
+        allow: Sequence[str] = (),
+        deny: Sequence[str] = (),
+    ):
+        self._allow: frozenset[str] = frozenset(allow)
+        self._deny: frozenset[str] = frozenset(deny)
+
+    def is_tool_allowed(self, tool_name: str) -> bool:
+        if tool_name in self._deny:
+            return False
+        if self._allow and tool_name not in self._allow:
+            return False
+        return True
+
+
+# ── Layer 3: Elevated Policy ─────────────────────────────────────────
+
+
+class ElevatedPolicy:
+    """Controls whether ``exec`` may request temporary privilege escalation.
+
+    ``elevated`` only applies to the ``exec`` tool.  It means "run this
+    command outside the default sandbox boundary" (e.g. with network, or
+    on the host).  The framework decides whether to honor the request.
+    """
+
+    def __init__(self, *, allow_elevated: bool = False, require_approval: bool = True):
+        self.allow_elevated = allow_elevated
+        self.require_approval = require_approval
+
+    def is_elevation_permitted(self) -> bool:
+        return self.allow_elevated
@@ -105,9 +105,22 @@ async def execute_spec_payload(
         )
         return self._serialize_result(result)
 
-    async def execute_sandbox_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
-        spec_payload = dict(parameters)
+    async def execute_tool(self, parameters: dict, query: pipeline_query.Query) -> dict:
+        """Execute an agent-facing ``exec`` tool call.
+
+        Translates the agent-facing ``command`` field to the internal
+        ``BoxSpec.cmd`` field and injects the session id from the query.
+        """
+        spec_payload: dict = {'cmd': parameters['command']}
+
+        # Pass through allowed agent-facing fields
+        for key in ('workdir', 'timeout_sec', 'env'):
+            if key in parameters:
+                spec_payload[key] = parameters[key]
+
+        # Inject context the agent must not control
         spec_payload.setdefault('session_id', str(query.query_id))
+
         return await self.execute_spec_payload(spec_payload, query)
 
     async def shutdown(self):
@@ -379,23 +392,23 @@ def get_recent_errors(self) -> list[dict]:
         return list(self._recent_errors)
 
     def get_system_guidance(self) -> str:
-        """Return LLM system-prompt guidance for sandbox_exec.
+        """Return LLM system-prompt guidance for the exec tool.
 
-        All sandbox-specific prompt text is kept here so that callers
+        All execution-specific prompt text is kept here so that callers
         (e.g. LocalAgentRunner) stay free of box domain knowledge.
         """
         guidance = (
-            'When sandbox_exec is available, use it for exact calculations, statistics, structured data parsing, '
+            'When the exec tool is available, use it for exact calculations, statistics, structured data parsing, '
             'and code execution instead of estimating mentally. If the user provides numbers, tables, CSV-like text, '
-            'JSON, or other data and asks for a computed answer, prefer running a short Python script in sandbox_exec '
+            'JSON, or other data and asks for a computed answer, prefer running a short Python script via exec '
             'and then answer from the tool result. Unless the user explicitly asks for the script, code, or implementation '
             'details, do not include the generated script in the final answer; return the result and a brief explanation only.'
         )
         if self.default_host_workspace:
             guidance += (
-                ' A default host workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
-                'modify local files in the working directory, use sandbox_exec with /workspace paths directly; do not ask the '
-                'user for sandbox parameters such as host_path unless they explicitly need a different directory.'
+                ' A default workspace is mounted at /workspace for file tasks. When the user asks to read, create, or '
+                'modify local files in the working directory, use exec with /workspace paths directly; do not ask the '
+                'user for directory parameters unless they explicitly need a different directory.'
             )
         return guidance
 
 
@@ -5,7 +5,7 @@
 import typing
 from .. import runner
 from ..modelmgr import requester as modelmgr_requester
-from ..tools.loaders.native import SANDBOX_EXEC_TOOL_NAME
+from ..tools.loaders.native import EXEC_TOOL_NAME
 import langbot_plugin.api.entities.builtin.pipeline.query as pipeline_query
 import langbot_plugin.api.entities.builtin.provider.message as provider_message
 import langbot_plugin.api.entities.builtin.rag.context as rag_context
@@ -37,7 +37,7 @@ def _build_request_messages(
     ) -> list[provider_message.Message]:
         req_messages = query.prompt.messages.copy() + query.messages.copy()
 
-        if any(getattr(tool, 'name', None) == SANDBOX_EXEC_TOOL_NAME for tool in query.use_funcs or []):
+        if any(getattr(tool, 'name', None) == EXEC_TOOL_NAME for tool in query.use_funcs or []):
             req_messages.append(
                 provider_message.Message(
                     role='system',