feat: add OpenShell sandbox provider extension

zanetworker · zanetworker · commit 45b29d68f68d · 2026-05-22T12:30:36.000+02:00
Add NVIDIA OpenShell as a sandbox provider, wrapping the `openshell` Python SDK (sync gRPC client) via run_in_executor. Implements the standard BaseSandboxClient/BaseSandboxSession contracts with gateway discovery, tar-based workspace persistence, and file I/O via exec. Closes #3468
diff --git a/docs/ref/extensions/sandbox/openshell/sandbox.md b/docs/ref/extensions/sandbox/openshell/sandbox.md
@@ -0,0 +1,3 @@
+# `Sandbox`
+
+::: agents.extensions.sandbox.openshell.sandbox
diff --git a/docs/sandbox/clients.md b/docs/sandbox/clients.md
@@ -95,6 +95,7 @@ For provider-specific setup notes and links for the checked-in extension example
 | `DaytonaSandboxClient` | `openai-agents[daytona]` | [Daytona runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/daytona/daytona_runner.py) |
 | `E2BSandboxClient` | `openai-agents[e2b]` | [E2B runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/e2b_runner.py) |
 | `ModalSandboxClient` | `openai-agents[modal]` | [Modal runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/modal_runner.py) |
+| `OpenShellSandboxClient` | `openai-agents[openshell]` | [OpenShell runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/openshell_runner.py) |
 | `RunloopSandboxClient` | `openai-agents[runloop]` | [Runloop runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/runloop/runner.py) |
 | `VercelSandboxClient` | `openai-agents[vercel]` | [Vercel runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/vercel_runner.py) |
 
@@ -113,6 +114,7 @@ Hosted sandbox clients expose provider-specific mount strategies. Choose the bac
 | `DaytonaSandboxClient` | Supports rclone-backed cloud storage mounts with `DaytonaCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
 | `E2BSandboxClient` | Supports rclone-backed cloud storage mounts with `E2BCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
 | `RunloopSandboxClient` | Supports rclone-backed cloud storage mounts with `RunloopCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. |
+| `OpenShellSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. |
 | `VercelSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. |
 
 </div>
@@ -130,6 +132,7 @@ The table below summarizes which remote storage entries each backend can mount d
 | `DaytonaSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
 | `E2BSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
 | `RunloopSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - |
+| `OpenShellSandboxClient` | - | - | - | - | - | - |
 | `VercelSandboxClient` | - | - | - | - | - | - |
 
 </div>
diff --git a/examples/sandbox/extensions/openshell_runner.py b/examples/sandbox/extensions/openshell_runner.py
@@ -0,0 +1,301 @@
+"""
+OpenShell sandbox integration example.
+
+This script exercises the OpenShell sandbox extension at two levels:
+
+1. **Session-level** (no LLM needed): Creates a sandbox, writes files, reads them
+   back, runs commands, and verifies workspace persistence. This validates the
+   extension works end-to-end with a real OpenShell gateway.
+
+2. **Agent-level** (requires OPENAI_API_KEY): Runs a SandboxAgent with a shell
+   capability inside the OpenShell sandbox.
+
+Prerequisites:
+  - An OpenShell gateway running (local, remote, or cloud).
+  - ``openshell`` Python package installed: ``uv sync --extra openshell``
+  - For agent mode: ``OPENAI_API_KEY`` environment variable set.
+
+Quick start:
+  # Session-level only (no LLM):
+  uv run python examples/sandbox/extensions/openshell_runner.py --session-only
+
+  # Full agent run:
+  uv run python examples/sandbox/extensions/openshell_runner.py
+
+  # With a specific cluster:
+  uv run python examples/sandbox/extensions/openshell_runner.py --cluster my-gateway
+
+  # With a custom image:
+  uv run python examples/sandbox/extensions/openshell_runner.py --image ubuntu:24.04
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import io
+import os
+import sys
+from pathlib import Path
+
+try:
+    from agents.extensions.sandbox import (
+        OpenShellSandboxClient,
+        OpenShellSandboxClientOptions,
+    )
+except Exception as exc:
+    raise SystemExit(
+        "OpenShell sandbox examples require the optional openshell extra.\n"
+        "Install it with: uv sync --extra openshell"
+    ) from exc
+
+
+async def session_level_test(
+    *,
+    cluster: str | None,
+    endpoint: str | None,
+    image: str | None,
+    gpu: bool,
+) -> None:
+    """Exercise the sandbox extension directly without an LLM."""
+
+    from agents.sandbox import Manifest
+    from agents.sandbox.entries import File
+
+    print("=== OpenShell Session-Level Test ===\n")
+
+    # Build a manifest with test files.
+    # OpenShell sandboxes default to /sandbox as the working directory.
+    manifest = Manifest(
+        root="/sandbox",
+        entries={
+            "hello.txt": File(content=b"Hello from OpenShell sandbox!\n"),
+            "data/numbers.csv": File(content=b"a,b,c\n1,2,3\n4,5,6\n"),
+        },
+    )
+
+    client = OpenShellSandboxClient()
+    options = OpenShellSandboxClientOptions(
+        cluster=cluster,
+        endpoint=endpoint,
+        image=image,
+        gpu=gpu,
+    )
+
+    print("1. Creating sandbox...")
+    session = await client.create(manifest=manifest, options=options)
+
+    try:
+        print("2. Starting session (materializing workspace)...")
+        await session.start()
+
+        print("3. Running 'ls -la' in workspace...")
+        result = await session.exec("ls", "-la", shell=False)
+        print(f"   exit_code={result.exit_code}")
+        print(f"   stdout:\n{result.stdout.decode()}")
+
+        print("4. Reading hello.txt...")
+        content = await session.read(Path("hello.txt"))
+        text = content.read()
+        if isinstance(text, bytes):
+            text = text.decode("utf-8")
+        print(f"   content: {text.strip()!r}")
+        assert "Hello from OpenShell sandbox!" in text, "Read verification failed."
+
+        print("5. Writing a new file...")
+        await session.write(
+            Path("output.txt"),
+            io.BytesIO(b"Written by the OpenAI Agents SDK via OpenShell.\n"),
+        )
+
+        print("6. Verifying the written file...")
+        result = await session.exec("cat", "output.txt", shell=False)
+        assert result.exit_code == 0, f"cat failed: {result.stderr.decode()}"
+        print(f"   content: {result.stdout.decode().strip()!r}")
+
+        print("7. Running a multi-step shell command...")
+        result = await session.exec("wc -l data/numbers.csv && echo 'done'")
+        print(f"   output: {result.stdout.decode().strip()}")
+
+        print("8. Checking sandbox is running...")
+        is_running = await session.running()
+        print(f"   running: {is_running}")
+        assert is_running, "Sandbox should be running."
+
+        print("9. Persisting workspace (tar snapshot)...")
+        snapshot = await session.persist_workspace()
+        snapshot_bytes = snapshot.read()
+        print(f"   snapshot size: {len(snapshot_bytes)} bytes")
+        assert len(snapshot_bytes) > 0, "Snapshot should not be empty."
+
+        print("\nAll session-level checks passed.")
+
+    finally:
+        print("\n10. Shutting down sandbox...")
+        await session.aclose()
+        print("    Done.")
+
+
+async def agent_level_test(
+    *,
+    model: str,
+    cluster: str | None,
+    endpoint: str | None,
+    image: str | None,
+    gpu: bool,
+    question: str,
+    stream: bool,
+) -> None:
+    """Run a SandboxAgent backed by OpenShell."""
+
+    from openai.types.responses import ResponseTextDeltaEvent
+
+    from agents import ModelSettings, Runner
+    from agents.run import RunConfig
+    from agents.sandbox import Manifest, SandboxAgent, SandboxRunConfig
+    from agents.sandbox.entries import File
+
+    if __package__ is None or __package__ == "":
+        sys.path.insert(0, str(Path(__file__).resolve().parents[3]))
+
+    from examples.sandbox.misc.workspace_shell import WorkspaceShellCapability
+
+    print("\n=== OpenShell Agent-Level Test ===\n")
+
+    manifest = Manifest(
+        root="/sandbox",
+        entries={
+            "README.md": File(
+                content=(
+                    b"# Project Status\n\nThis workspace contains a sample project status report.\n"
+                ),
+            ),
+            "status.md": File(
+                content=(
+                    b"# Sprint 42 Status\n\n"
+                    b"- Auth service: on track, shipping Tuesday.\n"
+                    b"- Search reindex: blocked on infra ticket INFRA-1234.\n"
+                    b"- Dashboard v2: 80% complete, needs UX review.\n"
+                ),
+            ),
+        },
+    )
+
+    agent = SandboxAgent(
+        name="OpenShell Sandbox Assistant",
+        model=model,
+        instructions=(
+            "Answer questions about the sandbox workspace. Inspect the files before answering "
+            "and keep the response concise. "
+            "Do not invent files or statuses that are not present in the workspace. Cite the "
+            "file names you inspected."
+        ),
+        default_manifest=manifest,
+        capabilities=[WorkspaceShellCapability()],
+        model_settings=ModelSettings(tool_choice="required"),
+    )
+
+    run_config = RunConfig(
+        sandbox=SandboxRunConfig(
+            client=OpenShellSandboxClient(),
+            options=OpenShellSandboxClientOptions(
+                cluster=cluster,
+                endpoint=endpoint,
+                image=image,
+                gpu=gpu,
+            ),
+        ),
+        workflow_name="OpenShell sandbox example",
+    )
+
+    if not stream:
+        result = await Runner.run(agent, question, run_config=run_config)
+        print(f"assistant> {result.final_output}")
+        return
+
+    stream_result = Runner.run_streamed(agent, question, run_config=run_config)
+    saw_text_delta = False
+    async for event in stream_result.stream_events():
+        if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
+            if not saw_text_delta:
+                print("assistant> ", end="", flush=True)
+                saw_text_delta = True
+            print(event.data.delta, end="", flush=True)
+    if saw_text_delta:
+        print()
+
+
+async def main(
+    *,
+    model: str,
+    cluster: str | None,
+    endpoint: str | None,
+    image: str | None,
+    gpu: bool,
+    question: str,
+    stream: bool,
+    session_only: bool,
+) -> None:
+    # Session-level test always runs (no LLM needed).
+    await session_level_test(
+        cluster=cluster,
+        endpoint=endpoint,
+        image=image,
+        gpu=gpu,
+    )
+
+    if session_only:
+        return
+
+    # Agent-level test requires OPENAI_API_KEY.
+    if not os.environ.get("OPENAI_API_KEY"):
+        print("\nSkipping agent-level test (OPENAI_API_KEY not set).")
+        print("Set OPENAI_API_KEY and remove --session-only to run the full test.")
+        return
+
+    await agent_level_test(
+        model=model,
+        cluster=cluster,
+        endpoint=endpoint,
+        image=image,
+        gpu=gpu,
+        question=question,
+        stream=stream,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="OpenShell sandbox integration example for the OpenAI Agents SDK."
+    )
+    parser.add_argument("--model", default="gpt-4.1-mini", help="Model name to use.")
+    parser.add_argument(
+        "--question",
+        default="Summarize the project status from the workspace files.",
+        help="Prompt to send to the agent.",
+    )
+    parser.add_argument("--cluster", default=None, help="OpenShell gateway cluster name.")
+    parser.add_argument("--endpoint", default=None, help="Explicit gateway endpoint (host:port).")
+    parser.add_argument("--image", default=None, help="Container image for the sandbox.")
+    parser.add_argument("--gpu", action="store_true", default=False, help="Request GPU.")
+    parser.add_argument("--stream", action="store_true", default=False, help="Stream the response.")
+    parser.add_argument(
+        "--session-only",
+        action="store_true",
+        default=False,
+        help="Run session-level test only (no LLM needed).",
+    )
+    args = parser.parse_args()
+
+    asyncio.run(
+        main(
+            model=args.model,
+            cluster=args.cluster,
+            endpoint=args.endpoint,
+            image=args.image,
+            gpu=args.gpu,
+            question=args.question,
+            stream=args.stream,
+            session_only=args.session_only,
+        )
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -53,6 +53,7 @@ e2b = ["e2b==2.20.0", "e2b-code-interpreter==2.4.1"]
 modal = ["modal==1.3.5"]
 runloop = ["runloop_api_client>=1.16.0,<2.0.0"]
 vercel = ["vercel>=0.5.6,<0.6"]
+openshell = ["openshell>=0.0.0a0"]
 s3 = ["boto3>=1.34"]
 temporal = [
     "temporalio==1.26.0",
@@ -164,6 +165,10 @@ ignore_missing_imports = true
 module = ["vercel", "vercel.*"]
 ignore_missing_imports = true
 
+[[tool.mypy.overrides]]
+module = ["openshell", "openshell.*"]
+ignore_missing_imports = true
+
 [tool.coverage.run]
 source = ["src/agents"]
 omit = [
diff --git a/src/agents/extensions/sandbox/__init__.py b/src/agents/extensions/sandbox/__init__.py
@@ -109,6 +109,18 @@
 except Exception:  # pragma: no cover
     _HAS_VERCEL = False
 
+try:
+    from .openshell import (
+        OpenShellSandboxClient as OpenShellSandboxClient,
+        OpenShellSandboxClientOptions as OpenShellSandboxClientOptions,
+        OpenShellSandboxSession as OpenShellSandboxSession,
+        OpenShellSandboxSessionState as OpenShellSandboxSessionState,
+    )
+
+    _HAS_OPENSHELL = True
+except Exception:  # pragma: no cover
+    _HAS_OPENSHELL = False
+
 __all__: list[str] = []
 
 if _HAS_E2B:
@@ -207,3 +219,13 @@
             "RunloopUserParameters",
         ]
     )
+
+if _HAS_OPENSHELL:
+    __all__.extend(
+        [
+            "OpenShellSandboxClient",
+            "OpenShellSandboxClientOptions",
+            "OpenShellSandboxSession",
+            "OpenShellSandboxSessionState",
+        ]
+    )
diff --git a/src/agents/extensions/sandbox/openshell/__init__.py b/src/agents/extensions/sandbox/openshell/__init__.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from .sandbox import (
+    OpenShellSandboxClient as OpenShellSandboxClient,
+    OpenShellSandboxClientOptions as OpenShellSandboxClientOptions,
+    OpenShellSandboxSession as OpenShellSandboxSession,
+    OpenShellSandboxSessionState as OpenShellSandboxSessionState,
+)
+
+__all__ = [
+    "OpenShellSandboxClient",
+    "OpenShellSandboxClientOptions",
+    "OpenShellSandboxSession",
+    "OpenShellSandboxSessionState",
+]
diff --git a/src/agents/extensions/sandbox/openshell/sandbox.py b/src/agents/extensions/sandbox/openshell/sandbox.py
diff --git a/tests/extensions/sandbox/test_openshell.py b/tests/extensions/sandbox/test_openshell.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+# `Sandbox`
	`2`	`+`
	`3`	`+::: agents.extensions.sandbox.openshell.sandbox`