Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ The OpenAI Agents Python repository provides the Python Agents SDK, examples, an
- `src/agents/run_state.py` (RunState serialization/deserialization)
- `src/agents/run_internal/session_persistence.py` (session save/rewind)
- If the serialized RunState shape changes, update `CURRENT_SCHEMA_VERSION` in `src/agents/run_state.py` and the related serialization/deserialization logic. Keep released schema versions readable, and feel free to renumber or squash unreleased schema versions before release when those intermediate snapshots are intentionally unsupported.
- When bumping `CURRENT_SCHEMA_VERSION`, also add or update the matching entry in `SCHEMA_VERSION_SUMMARIES` in `src/agents/run_state.py` so every supported version keeps a short historical note describing what changed in that schema.

## Operation Guide

Expand Down
1 change: 1 addition & 0 deletions examples/sandbox/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Make the examples/sandbox directory a package for tooling consistency.
228 changes: 228 additions & 0 deletions examples/sandbox/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
from __future__ import annotations

import argparse
import asyncio
import sys
from pathlib import Path
from typing import Any, Literal, cast

from openai.types.responses import ResponseTextDeltaEvent

from agents import ModelSettings, Runner
from agents.run import RunConfig
from agents.sandbox import Manifest, SandboxAgent, SandboxRunConfig
from agents.sandbox.entries import File

if __package__ is None or __package__ == "":
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

from examples.sandbox.misc.workspace_shell import WorkspaceShellCapability

Backend = Literal["docker", "modal"]
WorkspacePersistenceMode = Literal["tar", "snapshot_filesystem"]

DEFAULT_QUESTION = "Summarize this sandbox project in 2 sentences."
DEFAULT_BACKEND: Backend = "docker"
DEFAULT_MODAL_APP_NAME = "openai-agents-python-sandbox-example"
DEFAULT_MODAL_WORKSPACE_PERSISTENCE: WorkspacePersistenceMode = "tar"


def _stream_event_banner(event_name: str) -> str | None:
if event_name == "tool_called":
return "[tool call] shell"
if event_name == "tool_output":
return "[tool output] shell"
return None


def _build_manifest(backend: Backend) -> Manifest:
backend_label = "Docker" if backend == "docker" else "Modal"
return Manifest(
entries={
"README.md": File(
content=(
b"# Demo Project\n\n"
+ (
f"This sandbox contains a tiny demo project for the {backend_label} "
"sandbox runner.\n"
).encode()
+ b"The goal is to show how Runner can prepare a sandbox workspace.\n"
)
),
"src/app.py": File(
content=b'def greet(name: str) -> str:\n return f"Hello, {name}!"\n'
),
"docs/notes.md": File(
content=(
b"# Notes\n\n"
b"- The example is intentionally minimal.\n"
b"- The model should inspect files through the shell tool.\n"
)
),
}
)


def _build_agent(*, model: str, manifest: Manifest, backend: Backend) -> SandboxAgent:
backend_label = "Docker" if backend == "docker" else "Modal"
return SandboxAgent(
name=f"{backend_label} Sandbox Assistant",
model=model,
# `instructions` is the base agent instructions for this example's task.
instructions=(
"Answer questions about the sandbox workspace. Inspect the project before answering, "
"and keep the response concise."
),
# `developer_instructions` is appended after that as additional deterministic instructions.
# Here, the tiny-workspace constraint is kept in `developer_instructions`.
developer_instructions=(
"Do not guess file names like package.json or pyproject.toml. "
"This demo intentionally contains a tiny workspace."
),
# `default_manifest` tells the sandbox agent which workspace it should expect.
default_manifest=manifest,
# `WorkspaceShellCapability()` exposes one shell tool so the model can inspect files.
capabilities=[WorkspaceShellCapability()],
# `tool_choice="required"` makes the demo more deterministic by forcing the model
# to look at the workspace instead of answering from prior assumptions.
model_settings=ModelSettings(tool_choice="required"),
)


def _require_modal_dependency() -> tuple[Any, Any]:
try:
from agents.extensions.sandbox import ModalSandboxClient, ModalSandboxClientOptions
except Exception as exc: # pragma: no cover - import path depends on optional extras
raise SystemExit(
"Modal-backed runs require the optional repo extra.\n"
"Install it with: uv sync --extra modal"
) from exc

return ModalSandboxClient, ModalSandboxClientOptions


def _require_docker_dependency() -> tuple[Any, Any, Any]:
try:
from docker import from_env as docker_from_env # type: ignore[import-untyped]
except Exception as exc: # pragma: no cover - import path depends on local Docker setup
raise SystemExit(
"Docker-backed runs require the Docker SDK.\n"
"Install the repo dependencies with: make sync"
) from exc

from agents.sandbox.sandboxes.docker import DockerSandboxClient, DockerSandboxClientOptions

return docker_from_env, DockerSandboxClient, DockerSandboxClientOptions


async def _create_session(
*,
backend: Backend,
manifest: Manifest,
agent: SandboxAgent,
):
if backend == "docker":
docker_from_env, DockerSandboxClient, DockerSandboxClientOptions = (
_require_docker_dependency()
)
client = DockerSandboxClient(docker_from_env())
session = await client.create(
manifest=manifest,
codex=agent.codex,
options=DockerSandboxClientOptions(image="python:3.14-slim"),
)
return client, session

ModalSandboxClient, ModalSandboxClientOptions = _require_modal_dependency()
client = ModalSandboxClient()
session = await client.create(
manifest=manifest,
codex=agent.codex,
options=ModalSandboxClientOptions(
app_name=DEFAULT_MODAL_APP_NAME,
workspace_persistence=DEFAULT_MODAL_WORKSPACE_PERSISTENCE,
),
)
return client, session


async def main(
model: str,
question: str,
backend: Backend,
) -> None:
manifest = _build_manifest(backend)
agent = _build_agent(model=model, manifest=manifest, backend=backend)
client, session = await _create_session(
backend=backend,
manifest=manifest,
agent=agent,
)

await session.start()
print(await session.ls(".codex_bin/codex"))

try:
# `async with session` keeps the example on the public session lifecycle API.
# `Runner` reuses the already-running session without starting it a second time.
async with session:
# `Runner.run_streamed()` drives the model and yields text and tool events in real time.
result = Runner.run_streamed(
agent,
question,
run_config=RunConfig(
sandbox=SandboxRunConfig(session=session),
workflow_name=f"{backend.title()} sandbox example",
),
)
saw_text_delta = False
saw_any_text = False

# The stream contains raw text deltas from the assistant plus structured tool events.
async for event in result.stream_events():
if event.type == "raw_response_event" and isinstance(
event.data, ResponseTextDeltaEvent
):
if not saw_text_delta:
print("assistant> ", end="", flush=True)
saw_text_delta = True
print(event.data.delta, end="", flush=True)
saw_any_text = True
continue

if event.type != "run_item_stream_event":
continue

banner = _stream_event_banner(event.name)
if banner is not None:
if saw_text_delta:
print()
saw_text_delta = False
print(banner)

if saw_text_delta:
print()
if not saw_any_text:
print(result.final_output)
finally:
await client.delete(session)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="gpt-5.4", help="Model name to use.")
parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.")
parser.add_argument(
"--backend",
default=DEFAULT_BACKEND,
choices=["docker", "modal"],
help="Sandbox backend to use for this example.",
)
args = parser.parse_args()
asyncio.run(
main(
args.model,
args.question,
cast(Backend, args.backend),
)
)
Binary file added examples/sandbox/data/f1040.pdf
Binary file not shown.
Binary file added examples/sandbox/data/sample_w2.pdf
Binary file not shown.
125 changes: 125 additions & 0 deletions examples/sandbox/extensions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Cloud Sandbox Extension Examples

These examples are for manual verification of the cloud sandbox backends that
live under `agents.extensions.sandbox`.

They intentionally keep the flow simple:

1. Build a tiny manifest in memory.
2. Create a `SandboxAgent` that inspects that workspace through one shell tool.
3. Run the agent against either E2B or Modal.

Both examples require `OPENAI_API_KEY`, because they call the model through the
normal `Runner` path.

## E2B

### Setup

Install the repo extra:

```bash
uv sync --extra e2b
```

Create an E2B account, create an API key, and export it as `E2B_API_KEY`.
The official setup docs are:

- <https://e2b.dev/docs/api-key>
- <https://e2b.dev/docs/quickstart>

Export the required environment variables:

```bash
export OPENAI_API_KEY=...
export E2B_API_KEY=...
```

### Run

```bash
uv run python examples/sandbox/extensions/e2b_runner.py --stream
```

Useful flags:

- `--sandbox-type e2b_code_interpreter_async`
- `--template <template-name>`
- `--timeout 300`
- `--pause-on-exit`

The example defaults to `e2b_code_interpreter_async`, which matches the async
Code Interpreter backend supported by this repo.

## Modal

If you want the same explicit session lifecycle shown in
`examples/sandbox/basic.py`, that example now accepts
`--backend modal` and reuses the same streamed tool-output flow:

```bash
uv run python examples/sandbox/basic.py \
--backend modal
```

The dedicated script below stays as the smaller extension-specific example.

### Setup

Install the repo extra:

```bash
uv sync --extra modal
```

Authenticate Modal with either CLI token setup or environment variables. The
official references are:

- <https://modal.com/docs/reference/cli/token>
- <https://modal.com/docs/reference/modal.config>
- <https://modal.com/docs/guide/sandbox>

If you want to configure credentials directly from the CLI:

```bash
uv run modal token set --token-id <token-id> --token-secret <token-secret>
```

Or export environment variables for the current shell:

```bash
export OPENAI_API_KEY=...
export MODAL_TOKEN_ID=...
export MODAL_TOKEN_SECRET=...
```

### Run

```bash
uv run python examples/sandbox/extensions/modal_runner.py \
--app-name openai-agents-python-sandbox-example \
--stream
```

Useful flags:

- `--workspace-persistence tar`
- `--workspace-persistence snapshot_filesystem`
- `--sandbox-create-timeout-s 60`

`app_name` is required by `ModalSandboxClientOptions`, so the example makes it
an explicit CLI flag instead of hiding it.

## What to expect

Each script asks the model to inspect a small workspace and summarize it. A
successful run should:

1. Start the chosen cloud sandbox backend.
2. Materialize the manifest into the sandbox workspace.
3. Call the shell tool at least once.
4. Print either streamed text or a final short answer about the workspace.

These examples are not live-validated in CI because they depend on external
cloud credentials, but they are shaped so contributors can verify backend
behavior locally with one command per provider.
1 change: 1 addition & 0 deletions examples/sandbox/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Manual validation examples for cloud sandbox extensions."""
Loading
Loading