diff --git a/.agents/skills/cwsandbox b/.agents/skills/cwsandbox
new file mode 120000
index 00000000..3e8e2f82
--- /dev/null
+++ b/.agents/skills/cwsandbox
@@ -0,0 +1 @@
+../../.claude/skills/cwsandbox
\ No newline at end of file
diff --git a/.agents/skills/hf-mount b/.agents/skills/hf-mount
new file mode 120000
index 00000000..a26457ef
--- /dev/null
+++ b/.agents/skills/hf-mount
@@ -0,0 +1 @@
+../../.claude/skills/hf-mount
\ No newline at end of file
diff --git a/.agents/skills/rl-training b/.agents/skills/rl-training
new file mode 120000
index 00000000..8413e9b3
--- /dev/null
+++ b/.agents/skills/rl-training
@@ -0,0 +1 @@
+../../.claude/skills/rl-training
\ No newline at end of file
diff --git a/.agents/skills/sandbox-async b/.agents/skills/sandbox-async
new file mode 120000
index 00000000..a6a663f2
--- /dev/null
+++ b/.agents/skills/sandbox-async
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-async
\ No newline at end of file
diff --git a/.agents/skills/sandbox-cleanup b/.agents/skills/sandbox-cleanup
new file mode 120000
index 00000000..48bd8616
--- /dev/null
+++ b/.agents/skills/sandbox-cleanup
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-cleanup
\ No newline at end of file
diff --git a/.agents/skills/sandbox-config b/.agents/skills/sandbox-config
new file mode 120000
index 00000000..aab86096
--- /dev/null
+++ b/.agents/skills/sandbox-config
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-config
\ No newline at end of file
diff --git a/.agents/skills/sandbox-exec b/.agents/skills/sandbox-exec
new file mode 120000
index 00000000..2411a4ce
--- /dev/null
+++ b/.agents/skills/sandbox-exec
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-exec
\ No newline at end of file
diff --git a/.agents/skills/sandbox-files b/.agents/skills/sandbox-files
new file mode 120000
index 00000000..99d8c420
--- /dev/null
+++ b/.agents/skills/sandbox-files
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-files
\ No newline at end of file
diff --git a/.agents/skills/sandbox-functions b/.agents/skills/sandbox-functions
new file mode 120000
index 00000000..9781c4ad
--- /dev/null
+++ b/.agents/skills/sandbox-functions
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-functions
\ No newline at end of file
diff --git a/.agents/skills/sandbox-session b/.agents/skills/sandbox-session
new file mode 120000
index 00000000..7ee98532
--- /dev/null
+++ b/.agents/skills/sandbox-session
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-session
\ No newline at end of file
diff --git a/.agents/skills/sandbox-streaming b/.agents/skills/sandbox-streaming
new file mode 120000
index 00000000..9ecbaad6
--- /dev/null
+++ b/.agents/skills/sandbox-streaming
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-streaming
\ No newline at end of file
diff --git a/.agents/skills/sandbox-troubleshoot b/.agents/skills/sandbox-troubleshoot
new file mode 120000
index 00000000..ae98f8a3
--- /dev/null
+++ b/.agents/skills/sandbox-troubleshoot
@@ -0,0 +1 @@
+../../.claude/skills/sandbox-troubleshoot
\ No newline at end of file
diff --git a/.claude/skills/cwsandbox/SKILL.md b/.claude/skills/cwsandbox/SKILL.md
new file mode 100644
index 00000000..fbee93ad
--- /dev/null
+++ b/.claude/skills/cwsandbox/SKILL.md
@@ -0,0 +1,206 @@
+---
+name: cwsandbox
+description: "Use when working with cwsandbox-client library for CoreWeave Sandbox remote code execution. Covers Sandbox.run(), exec(), streaming, file operations, Session management, function decorators, cleanup patterns, and troubleshooting. Relevant for requests involving sandboxes, remote execution, containerized code running, RL training, model evaluation, or agent tool use."
+disable-model-invocation: false
+---
+
+# CoreWeave Sandbox Skill
+
+Python SDK for CoreWeave Sandbox — a compute platform for orchestrating isolated execution environments at scale.
+
+## Quick Start
+
+```python
+from cwsandbox import Sandbox
+
+with Sandbox.run(container_image="python:3.11") as sb:
+    result = sb.exec(["python", "-c", "print('Hello!')"]).result()
+    print(result.stdout)
+```
+
+## Core Patterns
+
+### Sandbox Creation
+
+```python
+# Factory method (recommended) - blocks until backend accepts
+sb = Sandbox.run("echo", "hello")
+result = sb.exec(["echo", "more"]).result()
+sb.stop().result()
+
+# Context manager - auto-stops on exit
+with Sandbox.run("sleep", "infinity") as sb:
+    result = sb.exec(["echo", "hello"]).result()
+
+# Session for multiple sandboxes with shared defaults
+with Session(defaults) as session:
+    sb = session.sandbox(command="sleep", args=["infinity"])
+    result = sb.exec(["echo", "hello"]).result()
+```
+
+### Streaming Output
+
+```python
+with Sandbox.run("sleep", "infinity") as sb:
+    process = sb.exec(["echo", "hello"])
+    for line in process.stdout:  # Stream lines as they arrive
+        print(line, end="")
+    result = process.result()
+```
+
+### Async Pattern
+
+```python
+async with Sandbox.run("sleep", "infinity") as sb:
+    result = await sb.exec(["echo", "hello"])
+```
+
+## Key Methods
+
+| Method | Returns | Notes |
+|--------|---------|-------|
+| `Sandbox.run(*args, **kwargs)` | Sandbox | Factory - creates and starts sandbox |
+| `sandbox.exec(command, cwd, check, timeout_seconds, stdin)` | Process | Execute command |
+| `sandbox.read_file(path)` | OperationRef[bytes] | Read file |
+| `sandbox.write_file(path, content)` | OperationRef[None] | Write file |
+| `sandbox.stream_logs(follow, tail_lines, timestamps)` | StreamReader[str] | Stream PID 1 logs |
+| `sandbox.shell(command, width, height)` | TerminalSession | Interactive TTY |
+| `sandbox.stop(snapshot_on_stop, graceful_shutdown_seconds, missing_ok)` | OperationRef[None] | Stop sandbox |
+| `sandbox.wait()` | self | Block until RUNNING |
+| `sandbox.wait_until_complete(timeout, raise_on_termination)` | OperationRef[Sandbox] | Block until terminal |
+
+## Lifecycle States
+
+```
+PENDING -> CREATING -> RUNNING -> COMPLETED/FAILED/TERMINATED
+```
+
+- `wait()` blocks until RUNNING or terminal
+- `wait_until_complete()` blocks until terminal state
+- `COMPLETED` = command exited 0
+- `TERMINATED` = external kill / lifetime exceeded
+- `FAILED` = startup or runtime error
+
+## SandboxStatus States
+
+`PENDING`, `CREATING`, `RUNNING`, `PAUSED`, `COMPLETED`, `TERMINATED`, `FAILED`, `UNSPECIFIED`
+
+## Configuration Kwargs
+
+```python
+Sandbox.run(
+    container_image="python:3.11",  # Container image
+    resources={...},                 # CPU, memory, GPU requests
+    mounted_files=[...],              # Files to mount
+    s3_mount={...},                  # S3 bucket mount
+    ports={...},                     # Port mappings
+    network=NetworkOptions(           # Network config
+        ingress_mode="public",
+        exposed_ports=(8080,),
+        egress_mode="internet",
+    ),
+    secrets=[Secret(...)],           # Secret injection
+    max_timeout_seconds=3600,        # Max timeout
+    environment_variables={...},     # Env vars
+    annotations={...},               # Kubernetes annotations
+)
+```
+
+## Session Management
+
+```python
+from cwsandbox import Session, SandboxDefaults
+
+with Session(SandboxDefaults(tags=("my-tag",))) as session:
+    sb = session.sandbox(command="sleep", args=["infinity"])
+    result = sb.exec(["echo", "hello"]).result()
+
+# Parallel execution
+refs = compute.map([(1, 2), (3, 4), (5, 6)])
+results = [r.result() for r in refs]
+```
+
+## Remote Functions
+
+```python
+with Session(defaults) as session:
+    @session.function()
+    def compute(x: int, y: int) -> int:
+        return x + y
+
+    ref = compute.remote(2, 3)
+    result = ref.result()
+    # or: result = compute.local(2, 3)  # local testing
+```
+
+## Exception Hierarchy
+
+```
+CWSandboxError
+├── CWSandboxAuthenticationError
+│   └── WandbAuthError
+├── SandboxError
+│   ├── SandboxNotRunningError
+│   ├── SandboxTimeoutError
+│   ├── SandboxTerminatedError
+│   ├── SandboxFailedError
+│   ├── SandboxNotFoundError
+│   ├── SandboxExecutionError
+│   └── SandboxFileError
+└── FunctionError
+    ├── AsyncFunctionError
+    └── FunctionSerializationError
+```
+
+## Authentication
+
+1. `CWSANDBOX_API_KEY` env var (Bearer token)
+2. `WANDB_API_KEY` + `WANDB_ENTITY` (W&B headers)
+3. `~/.netrc` (api.wandb.ai) + `WANDB_ENTITY`
+
+## Important Design Points
+
+- **Sync/async hybrid**: All methods return immediately; use `.result()` to block (sync) or `await` (async)
+- **Single-threaded**: Not safe to call `.result()` from multiple threads simultaneously
+- **Lazy-start**: `Sandbox.run()` returns once backend accepts — not when RUNNING
+- **Auto-start**: `exec()`, `read_file()`, `write_file()`, `wait()` all auto-start if not started
+
+## Common Issues
+
+| Issue | Solution |
+|-------|----------|
+| "Sandbox not running" on exec | Sandbox hasn't reached RUNNING yet — `exec()` waits internally |
+| Timeout during startup | Startup wait and operation timeout are separate phases |
+| Integration tests hang | Use `.result()`, not `await` in sync tests |
+| Stdin not working | Must pass `stdin=True` to `exec()` |
+
+## Running Tests
+
+```bash
+# Unit tests (284 tests, no network)
+mise run test
+
+# Integration tests (31 tests, requires auth)
+mise run test:e2e
+mise run test:e2e:parallel
+
+# Individual test
+timeout 120 uv run pytest tests/integration/cwsandbox/test_sandbox.py::test_sandbox_lifecycle -v
+```
+
+## Examples
+
+See `examples/` directory:
+- `quick_start.py` — Context manager with exec
+- `streaming_exec.py` — Real-time stdout iteration
+- `function_decorator.py` — Remote function execution
+- `error_handling.py` — Exception hierarchy
+- `multiple_sandboxes.py` — Session-based multi-sandbox
+- `parallel_batch_job.py` — Batch processing with `cwsandbox.wait()`
+- `cleanup_by_tag.py` — Tag-based cleanup
+- `interactive_streaming_sandbox.py` — Log streaming and CLI
+
+## References
+
+- Docs: https://docs.coreweave.com/products/coreweave-sandbox
+- Backend: github.com/coreweave/aviato
diff --git a/.claude/skills/hf-mount/SKILL.md b/.claude/skills/hf-mount/SKILL.md
new file mode 100644
index 00000000..86c98259
--- /dev/null
+++ b/.claude/skills/hf-mount/SKILL.md
@@ -0,0 +1,343 @@
+---
+name: hf-mount
+description: "Use when mounting HuggingFace buckets, model repos, or dataset repos as local filesystems within CoreWeave Sandbox. Covers hf-mount installation, mount commands, FUSE vs NFS backends, HF Storage Buckets, read-only vs read-write access, and integration with sandbox workflows for RL training and model evaluation."
+disable-model-invocation: false
+---
+
+# HuggingFace Mount (hf-mount)
+
+Mount HuggingFace buckets and repos as local filesystems. No download, no copy, no waiting.
+
+## Install
+
+```bash
+# Quick install
+curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh
+
+# Linux (NFS - no root required, recommended for containers)
+hf-mount start repo openai/gpt2 /tmp/gpt2
+
+# macOS (requires macFUSE)
+brew install macfuse  # reboot required
+hf-mount start --fuse repo openai/gpt2 /tmp/gpt2
+```
+
+## HF Storage Buckets vs Repos
+
+| Feature | Repositories (Git-based) | Storage Buckets |
+|---------|-------------------------|-----------------|
+| Versioning | Full Git history | None (mutable) |
+| Types | Models, Datasets, Spaces | Standalone bucket |
+| Use case | Publishing finished artifacts | Working storage, checkpoints |
+| Operations | Hub API, Git push/pull | S3-like sync, cp, rm |
+| Write support | Read-only via hf-mount | Read-write via hf-mount |
+
+**Use repos** when you want version history, collaboration (PRs, discussions), and library integrations.
+**Use buckets** for fast, mutable storage: checkpoints, logs, intermediate artifacts.
+
+## Quick Start
+
+```bash
+# Public model (no token needed)
+hf-mount start repo openai/gpt-oss-20b /tmp/model
+
+# Private model/dataset
+hf-mount start --hf-token $HF_TOKEN repo myorg/my-private-model /tmp/model
+
+# Bucket (read-write)
+hf-mount start --hf-token $HF_TOKEN bucket myuser/my-bucket /tmp/data
+```
+
+## Within CoreWeave Sandbox
+
+Use hf-mount inside a sandbox to access models/datasets without downloading:
+
+```python
+from cwsandbox import Sandbox, SandboxDefaults
+
+defaults = SandboxDefaults(
+    container_image="python:3.11",
+    environment_variables={"HF_TOKEN": "your-token"},
+)
+
+with Sandbox.run(defaults=defaults) as sandbox:
+    # Install hf-mount in sandbox
+    sandbox.exec(["bash", "-c", "curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh"]).result()
+
+    # Mount a model repo
+    sandbox.exec(["hf-mount", "start", "repo", "openai/gpt2", "/tmp/gpt2"]).result()
+
+    # Use model directly - no download step
+    result = sandbox.exec([
+        "python", "-c",
+        "from transformers import AutoModel; model = AutoModel.from_pretrained('/tmp/gpt2')"
+    ]).result()
+```
+
+## Mount Types
+
+### Repos (read-only)
+
+```bash
+# Models
+hf-mount start repo username/model-name /mnt/model
+
+# Datasets
+hf-mount start repo datasets/username/dataset-name /mnt/dataset
+
+# Specific revision
+hf-mount start repo username/model /mnt/model --revision v1.0
+
+# Subfolder only
+hf-mount start repo username/model/onnx /mnt/onnx
+```
+
+### Buckets (read-write)
+
+```bash
+# Read-write bucket
+hf-mount start --hf-token $HF_TOKEN bucket username/my-bucket /mnt/data
+
+# Read-only bucket
+hf-mount start --hf-token $HF_TOKEN --read-only bucket username/my-bucket /mnt/data
+
+# Subfolder only
+hf-mount start --hf-token $HF_TOKEN bucket username/my-bucket/checkpoints /mnt/ckpts
+```
+
+## Backend: FUSE vs NFS
+
+| Feature | FUSE | NFS |
+|---------|------|-----|
+| No root required | No | Yes (recommended for containers) |
+| Metadata freshness | ~10s | Up to poll interval |
+| Write mode | Streaming by default | Advanced always |
+| Page cache invalidation | Yes | No |
+| macOS support | Yes (macFUSE) | Yes |
+
+```bash
+# FUSE (tighter integration, requires root/FUSE)
+hf-mount start --fuse --hf-token $HF_TOKEN bucket user/bucket /mnt/data
+
+# NFS (no root, works in containers)
+hf-mount start --hf-token $HF_TOKEN bucket user/bucket /mnt/data
+```
+
+## Common Options
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--hf-token` | `$HF_TOKEN` | HF API token |
+| `--read-only` | false | Read-only mount |
+| `--cache-dir` | `/tmp/hf-mount-cache` | Local cache |
+| `--cache-size` | 10GB | Max cache size |
+| `--poll-interval-secs` | 30 | Change polling interval |
+| `--metadata-ttl-ms` | 10000 | Metadata cache TTL |
+
+## Manage Mounts
+
+```bash
+# List running mounts
+hf-mount status
+
+# Stop mount
+hf-mount stop /mnt/data
+
+# Or unmount manually
+umount /mnt/data          # NFS or FUSE (macOS)
+fusermount -u /mnt/data   # FUSE (Linux)
+```
+
+## Write Modes
+
+### Streaming (default)
+
+- Append-only writes
+- In-memory buffer, upload on close
+- No disk space needed
+- **Not safe for text editors** (use `--advanced-writes`)
+
+### Advanced Writes
+
+```bash
+hf-mount start --advanced-writes --hf-token $HF_TOKEN bucket user/bucket /mnt/data
+```
+
+- Random writes, seek, overwrite supported
+- Downloads file to local disk first
+- Async flush (2s debounce, 30s max batch)
+- Safe for editors and random I/O
+
+## Consistency Model
+
+- **Reads**: Files can be stale for up to 10s (metadata TTL)
+- **Writes**: eventual consistency, background polling syncs changes
+- **Not for**: latency-sensitive random I/O, strong consistency needs
+
+## HF CLI for Buckets
+
+```bash
+# Create bucket
+hf buckets create my-bucket
+
+# List files (human-readable with sizes)
+hf buckets list username/my-bucket -h -R
+
+# Tree view
+hf buckets list username/my-bucket --tree -h
+
+# Upload file
+hf buckets cp ./model.safetensors hf://buckets/username/my-bucket/models/
+
+# Sync directory (upload local to bucket)
+hf buckets sync ./data hf://buckets/username/my-bucket/data
+
+# Download file
+hf buckets cp hf://buckets/username/my-bucket/model.bin ./model.bin
+
+# Sync directory (download bucket to local)
+hf buckets sync hf://buckets/username/my-bucket/data ./data
+
+# Delete file
+hf buckets rm username/my-bucket/old-checkpoint.bin
+
+# Dry-run before delete
+hf buckets rm username/my-bucket/checkpoints/ --recursive --dry-run
+```
+
+## Python API for Buckets
+
+```python
+from huggingface_hub import (
+    create_bucket,
+    download_bucket_files,
+    batch_bucket_files,
+    sync_bucket,
+)
+
+# Create bucket
+create_bucket("username/my-bucket")
+create_bucket("username/my-bucket", private=True)
+
+# Upload files
+batch_bucket_files(
+    "username/my-bucket",
+    add=[
+        ("./model.safetensors", "models/model.safetensors"),
+        ("./config.json", "models/config.json"),
+    ],
+)
+
+# Download files
+download_bucket_files(
+    "username/my-bucket",
+    files=[
+        ("models/model.safetensors", "./local/model.safetensors"),
+        ("config.json", "./local/config.json"),
+    ],
+)
+
+# Sync directory (upload)
+sync_bucket("./data", "hf://buckets/username/my-bucket/data")
+
+# Sync directory (download)
+sync_bucket("hf://buckets/username/my-bucket/data", "./data")
+
+# Delete files
+batch_bucket_files("username/my-bucket", delete=["old-model.bin", "logs/debug.log"])
+```
+
+## Use Cases with Sandboxes
+
+### RL Training with HF Models (Checkpoints to Bucket)
+
+```python
+with Sandbox.run() as sandbox:
+    sandbox.exec(["bash", "-c", "curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh"]).result()
+
+    # Mount model repo
+    sandbox.exec([
+        "hf-mount", "start", "repo", "openai/gpt-oss-20b", "/tmp/model"
+    ]).result()
+
+    # Mount bucket for checkpoints
+    sandbox.exec([
+        "hf-mount", "start", "--hf-token", "$HF_TOKEN",
+        "bucket", "myuser/checkpoints", "/tmp/ckpts"
+    ]).result()
+
+    # Training with checkpointing to bucket
+    result = sandbox.exec([
+        "python", "train.py",
+        "--model-path", "/tmp/model",
+        "--checkpoint-dir", "/tmp/ckpts/run-1"
+    ]).result()
+```
+
+### Model Evaluation with Dataset
+
+```python
+with Sandbox.run() as sandbox:
+    sandbox.exec(["bash", "-c", "curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh"]).result()
+
+    # Mount model and dataset
+    sandbox.exec(["hf-mount", "start", "repo", "myorg/model", "/tmp/model"]).result()
+    sandbox.exec(["hf-mount", "start", "repo", "datasets/myorg/eval-data", "/tmp/eval-data"]).result()
+
+    # Run evaluation
+    result = sandbox.exec([
+        "python", "evaluate.py",
+        "--model", "/tmp/model",
+        "--data", "/tmp/eval-data"
+    ]).result()
+```
+
+### Data Processing Pipeline
+
+```python
+with Sandbox.run() as sandbox:
+    sandbox.exec(["bash", "-c", "curl -fsSL https://raw.githubusercontent.com/huggingface/hf-mount/main/install.sh | sh"]).result()
+
+    # Mount bucket for intermediate outputs
+    sandbox.exec([
+        "hf-mount", "start", "--hf-token", "$HF_TOKEN",
+        "bucket", "myuser/pipeline-outputs", "/tmp/outputs"
+    ]).result()
+
+    # Process and save intermediate results
+    sandbox.exec([
+        "python", "process.py",
+        "--input", "/tmp/raw-data",
+        "--output", "/tmp/outputs/batch-001"
+    ]).result()
+```
+
+## Kubernetes Integration
+
+For Kubernetes, use [hf-csi-driver](https://github.com/huggingface/hf-csi-driver):
+
+```bash
+helm install hf-csi oci://ghcr.io/huggingface/charts/hf-csi-driver
+```
+
+Then mount HF repos/buckets as Kubernetes volumes in pods.
+
+## Troubleshooting
+
+```bash
+# Debug logging
+RUST_LOG=hf_mount=debug hf-mount start repo openai/gpt2 /tmp/gpt2
+
+# Check status
+hf-mount status
+
+# Check logs
+cat ~/.hf-mount/logs/
+```
+
+## References
+
+- [hf-mount GitHub](https://github.com/huggingface/hf-mount)
+- [HF Storage Buckets](https://huggingface.co/docs/hub/storage-buckets)
+- [hf-csi-driver](https://github.com/huggingface/hf-csi-driver) for Kubernetes
+- [HF Hub Buckets Guide](https://huggingface.co/docs/huggingface_hub/guides/buckets)
diff --git a/.claude/skills/rl-training/SKILL.md b/.claude/skills/rl-training/SKILL.md
new file mode 100644
index 00000000..2203355f
--- /dev/null
+++ b/.claude/skills/rl-training/SKILL.md
@@ -0,0 +1,229 @@
+---
+name: rl-training
+description: "Use when working with RL training workflows using CoreWeave Sandbox. Covers agent tool execution in sandboxes, parallel episode processing, GRPOTrainer integration with TRL, reward function patterns, W&B metrics integration, tagging for job metadata, and monitoring training runs. Relevant for requests involving reinforcement learning, GRPO, multi-step rollouts, agent training, or tool-calling models."
+disable-model-invocation: false
+---
+
+# RL Training with CoreWeave Sandbox
+
+Use sandboxes for reinforcement learning training environments where models execute tool calls in isolated environments.
+
+## Why Sandboxes for RL
+
+Training code agents with RL requires executing tool calls (bash, file ops) in isolated environments. Sandboxes give you:
+- Isolated, ephemeral environments for untrusted model-generated code
+- State persistence across tool calls within an episode
+- File changes and installed packages carry over between steps
+- Tagging and listing APIs for cleanup and monitoring thousands of sandboxes
+
+## Core Pattern
+
+```python
+import cwsandbox
+from cwsandbox import Sandbox
+
+def run_agent_episode(model, task: dict, sandbox: Sandbox) -> tuple[list, float]:
+    messages = [{"role": "user", "content": task["prompt"]}]
+
+    for step in range(task.get("max_steps", 10)):
+        response = model.generate(messages)
+        messages.append({"role": "assistant", "content": response})
+
+        tool_calls = parse_tool_calls(response)
+        if not tool_calls:
+            break
+
+        for tool in tool_calls:
+            if tool.name == "bash":
+                result = sandbox.exec(
+                    ["bash", "-c", tool.command],
+                    timeout_seconds=30.0,
+                ).result()
+                observation = f"exit={result.returncode}\n{result.stdout}{result.stderr}"
+            elif tool.name == "read_file":
+                content = sandbox.read_file(tool.path).result()
+                observation = content.decode()
+            elif tool.name == "write_file":
+                sandbox.write_file(tool.path, tool.content.encode()).result()
+                observation = "File written successfully"
+
+            messages.append({"role": "tool", "name": tool.name, "content": observation})
+
+    test_result = sandbox.exec(task["test_command"]).result()
+    reward = 1.0 if test_result.returncode == 0 else 0.0
+
+    return messages, reward
+```
+
+## Parallel Episode Processing
+
+```python
+def training_step(model, batch: list[dict], session) -> list[float]:
+    sandboxes = [session.sandbox() for _ in batch]
+    refs = [sb.start() for sb in sandboxes]
+    [r.result() for r in refs]  # Wait for all backends to accept
+
+    trajectories = []
+    rewards = []
+
+    for task, sandbox in zip(batch, sandboxes):
+        trajectory, reward = run_agent_episode(model, task, sandbox)
+        trajectories.append(trajectory)
+        rewards.append(reward)
+        sandbox.stop()
+
+    return rewards
+```
+
+## Tagging for Job Metadata
+
+```python
+import os
+from cwsandbox import SandboxDefaults
+
+def make_defaults(model_name: str) -> SandboxDefaults:
+    return SandboxDefaults(
+        container_image="python:3.11",
+        tags=(
+            f"wandb-run:{os.environ.get('WANDB_RUN_ID', 'local')}",
+            f"slurm-job:{os.environ.get('SLURM_JOB_ID', 'interactive')}",
+            f"model:{model_name}",
+            "rl-training",
+        ),
+    )
+```
+
+| Tag pattern | Purpose |
+|-------------|---------|
+| `wandb-run:{id}` | W&B run ID for filtering by training run |
+| `slurm-job:{id}` | Slurm job ID for cluster job tracking |
+| `model:{name}` | Model name or checkpoint |
+| `env:{name}` | Environment (dev, staging, prod) |
+
+## TRL GRPOTrainer Integration
+
+Standard GRPO pattern with `<answer>` XML tags:
+
+```python
+import cwsandbox
+from cwsandbox import SandboxDefaults
+
+session = cwsandbox.Session(defaults=SandboxDefaults(
+    container_image="python:3.11",
+    tags=("trl-grpo",),
+))
+
+def extract_xml_answer(text: str) -> str:
+    if "<answer>" not in text:
+        return ""
+    return text.split("<answer>")[-1].split("</answer>")[0].strip()
+
+def reward_fn(completions, **kwargs) -> list[float]:
+    texts = [c[0]["content"] if isinstance(c, list) else c for c in completions]
+    codes = [extract_xml_answer(t) for t in texts]
+    code_indices = [(i, code) for i, code in enumerate(codes) if code]
+
+    processes = [
+        (i, session.sandbox().exec(["python", "-c", code], timeout_seconds=30.0))
+        for i, code in code_indices
+    ]
+
+    rewards = [0.0] * len(codes)
+    for i, process in processes:
+        try:
+            rewards[i] = 1.0 if process.result().returncode == 0 else 0.0
+        except Exception:
+            pass
+
+    return rewards
+```
+
+## Error Handling
+
+```python
+from cwsandbox import SandboxTimeoutError, SandboxFileError
+
+def execute_tool(sandbox, tool) -> str:
+    try:
+        if tool.name == "bash":
+            result = sandbox.exec(["bash", "-c", tool.command], timeout_seconds=30.0).result()
+            return f"exit={result.returncode}\n{result.stdout}{result.stderr}"
+        elif tool.name == "read_file":
+            content = sandbox.read_file(tool.path).result()
+            return content.decode()
+        elif tool.name == "write_file":
+            sandbox.write_file(tool.path, tool.content.encode()).result()
+            return "File written successfully"
+    except SandboxTimeoutError:
+        return "Error: command timed out after 30 seconds"
+    except SandboxFileError as e:
+        return f"Error: {e}"
+    except Exception as e:
+        return f"Error: {type(e).__name__}: {e}"
+```
+
+## W&B Metrics Integration
+
+Auto-enabled when `WANDB_API_KEY` is set and wandb run is active:
+
+```python
+import wandb
+from cwsandbox import Session, SandboxDefaults
+
+wandb.init(project="my-rl-training")
+
+with Session(defaults) as session:
+    for step in range(num_steps):
+        sandbox = session.sandbox()
+        result = sandbox.exec(["python", "-c", code]).result()
+        session.log_metrics(step=step)
+```
+
+| Metric | Description |
+|--------|-------------|
+| `cwsandbox/sandboxes_created` | Total sandboxes created |
+| `cwsandbox/executions` | Total exec() calls |
+| `cwsandbox/exec_completed_ok` | Successful executions (returncode=0) |
+| `cwsandbox/exec_completed_nonzero` | Completed with returncode!=0 |
+| `cwsandbox/exec_failures` | Failed executions (timeouts, transport) |
+| `cwsandbox/avg_execs_per_sandbox` | Average exec() calls per sandbox |
+
+## Single-Step vs Multi-Step
+
+| Pattern | Use Case |
+|---------|----------|
+| **Single-shot** | One sandbox per completion, execute once, return reward |
+| **Multi-step** | Agent takes multiple actions in same sandbox, state persists |
+
+Multi-step enables: write file → run → see error → edit → try again
+
+## Monitoring Active Sandboxes
+
+```python
+from cwsandbox import Sandbox, SandboxStatus
+
+def count_active_sandboxes(run_id: str) -> dict:
+    sandboxes = Sandbox.list(
+        tags=[f"wandb-run:{run_id}"],
+        status=[SandboxStatus.RUNNING, SandboxStatus.PENDING],
+    ).result()
+
+    return {
+        "running": sum(1 for s in sandboxes if s.status == SandboxStatus.RUNNING),
+        "pending": sum(1 for s in sandboxes if s.status == SandboxStatus.PENDING),
+        "total": len(sandboxes),
+    }
+```
+
+## Examples
+
+See `examples/rl_training/`:
+- `reward_function.py` — Binary code execution rewards with parallel sandboxes
+- `trl_grpo_integration.py` — TRL GRPOTrainer integration
+- `art/` — Multi-step rollouts with ART framework
+
+## References
+
+- [CoreWeave Sandbox RL Training Docs](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/rl-training)
+- [TRL GRPOTrainer](https://huggingface.co/docs/trl)
+- [ART (Agent Reinforcement Trainer)](https://github.com/OpenPipe/ART)
diff --git a/.claude/skills/sandbox-async/SKILL.md b/.claude/skills/sandbox-async/SKILL.md
new file mode 100644
index 00000000..10bd7af1
--- /dev/null
+++ b/.claude/skills/sandbox-async/SKILL.md
@@ -0,0 +1,247 @@
+---
+name: sandbox-async
+description: "Use when working with async patterns in CoreWeave Sandbox, in Jupyter notebooks, or when integrating with async codebases. Covers await vs .result(), OperationRef, parallel execution, and async context managers."
+disable-model-invocation: false
+---
+
+# Sync vs Async Patterns
+
+The SDK has a single async implementation. Sync/async flexibility comes from how you consume results.
+
+## Quick Decision
+
+| Use Case | Pattern | Reason |
+|----------|---------|--------|
+| Most operations | Sync | Simpler, no asyncio boilerplate |
+| Parallel execution | Sync | Non-blocking by design |
+| Jupyter notebooks | Sync | No nest_asyncio needed |
+| Async codebase | Async | Integrates with existing async |
+
+## Core Concept: OperationRef
+
+Most methods return `OperationRef[T]` — both `.result()`-able and awaitable:
+
+```python
+ref = sandbox.read_file("/path")
+
+# Sync: block
+data = ref.result()
+
+# Async: await
+data = await ref
+```
+
+## Key Rule
+
+> **Never use `.result()` in async contexts** — it blocks the event loop. Use `await` instead.
+
+## Sandbox Creation
+
+<Tabs>
+<Tab title="Sync (Recommended)">
+
+```python
+from cwsandbox import Sandbox
+
+with Sandbox.run() as sb:
+    result = sb.exec(["echo", "hello"]).result()
+```
+
+</Tab>
+<Tab title="Async">
+
+```python
+from cwsandbox import Sandbox
+
+sb = Sandbox()
+async with sb:
+    result = await sb.exec(["echo", "hello"])
+```
+
+</Tab>
+</Tabs>
+
+## exec()
+
+<Tabs>
+<Tab title="Sync">
+
+```python
+result = sb.exec(["echo", "hello"]).result()
+```
+
+</Tab>
+<Tab title="Async">
+
+```python
+result = await sb.exec(["echo", "hello"])
+```
+
+</Tab>
+</Tabs>
+
+## read_file()
+
+<Tabs>
+<Tab title="Sync">
+
+```python
+data = sb.read_file("/path").result()
+```
+
+</Tab>
+<Tab title="Async">
+
+```python
+data = await sb.read_file("/path")
+```
+
+</Tab>
+</Tabs>
+
+## stop()
+
+<Tabs>
+<Tab title="Sync">
+
+```python
+sb.stop().result()
+```
+
+</Tab>
+<Tab title="Async">
+
+```python
+await sb.stop()
+```
+
+</Tab>
+</Tabs>
+
+## start()
+
+<Tabs>
+<Tab title="Sync">
+
+```python
+sb = Sandbox()
+sb.start().result()
+```
+
+</Tab>
+<Tab title="Async">
+
+```python
+sb = Sandbox()
+await sb.start()
+```
+
+</Tab>
+</Tabs>
+
+## Sandbox.list()
+
+<Tabs>
+<Tab title="Sync">
+
+```python
+sandboxes = Sandbox.list(tags=["my-tag"]).result()
+```
+
+</Tab>
+<Tab title="Async">
+
+```python
+sandboxes = await Sandbox.list(tags=["my-tag"])
+```
+
+</Tab>
+</Tabs>
+
+## Streaming (async)
+
+```python
+process = sb.exec(["python", "-c", "import time; [print(i) for i in range(5)]"])
+
+async for line in process.stdout:
+    print(line, end="")
+
+result = await process
+```
+
+## stdin Streaming (async)
+
+```python
+process = sb.exec(["cat"], stdin=True)
+await process.stdin.write(b"hello ")
+await process.stdin.writeline("world")
+await process.stdin.close()
+result = await process
+```
+
+## Parallel Execution
+
+Operations are non-blocking by design — `exec()` returns immediately:
+
+```python
+# All start in parallel (sync API)
+p1 = sb.exec(["sleep", "1"])
+p2 = sb.exec(["sleep", "1"])
+p3 = sb.exec(["sleep", "1"])
+
+# Block ~1 second total, not 3
+cwsandbox.result([p1, p2, p3])
+```
+
+## Jupyter Notebooks
+
+Sync API works without nest_asyncio (SDK runs its own daemon thread with event loop):
+
+```python
+# Cell 1
+from cwsandbox import Sandbox
+sandbox = Sandbox.run()
+sandbox.wait()
+
+# Cell 2
+result = sandbox.exec(["python", "-c", "print(1+1)"]).result()
+
+# Cell 3
+sandbox.stop().result()
+```
+
+For async in Jupyter, `await` works directly:
+
+```python
+sandboxes = await Sandbox.list(tags=["my-tag"])
+```
+
+## @session.function() (async)
+
+```python
+async with Session(defaults) as session:
+    @session.function()
+    def compute(x: int, y: int) -> int:
+        return x + y
+
+    result = await compute.remote(2, 3)
+
+    refs = compute.map([(1, 2), (3, 4)])
+    results = [await r for r in refs]
+```
+
+## Module-Level Helpers
+
+```python
+import cwsandbox
+
+# cwsandbox.results() - batch retrieve refs
+data = cwsandbox.results(sandbox.read_file("/path"))
+
+# cwsandbox.wait() - wait for sandboxes/processes
+done, pending = cwsandbox.wait(processes)
+```
+
+## References
+
+- [Sync vs Async Guide](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/sync-vs-async)
diff --git a/.claude/skills/sandbox-cleanup/SKILL.md b/.claude/skills/sandbox-cleanup/SKILL.md
new file mode 100644
index 00000000..cfb597a2
--- /dev/null
+++ b/.claude/skills/sandbox-cleanup/SKILL.md
@@ -0,0 +1,114 @@
+---
+name: sandbox-cleanup
+description: "Use when stopping CoreWeave Sandboxes, cleaning up resources, managing orphan sandboxes, batch deletion, or proper shutdown patterns. Covers stop(), delete(), session cleanup, and tagging strategies."
+disable-model-invocation: false
+---
+
+# Sandbox Cleanup
+
+Properly terminate sandboxes and manage resource cleanup.
+
+## Context Manager (Recommended)
+
+```python
+with Sandbox.run() as sandbox:
+    sandbox.exec(["echo", "hello"]).result()
+# Stopped automatically
+```
+
+## Explicit stop()
+
+```python
+sandbox = Sandbox.run()
+sandbox.exec(["echo", "hello"]).result()
+sandbox.stop().result()
+```
+
+## stop() Options
+
+```python
+# Capture state before shutdown
+sandbox.stop(snapshot_on_stop=True).result()
+
+# Safe even if already stopped
+sandbox.stop(missing_ok=True).result()
+
+# Longer grace period
+sandbox.stop(graceful_shutdown_seconds=30.0).result()
+```
+
+## Session Cleanup
+
+Sessions auto-cleanup all sandboxes on close:
+
+```python
+with Session(defaults) as session:
+    sandboxes = [session.sandbox() for _ in range(5)]
+    # All cleaned up on exit
+```
+
+## Delete by ID
+
+Remove sandboxes by ID without needing instance:
+
+```python
+from cwsandbox import Sandbox
+
+# Delete specific sandbox
+Sandbox.delete("sandbox-abc123", missing_ok=True).result()
+
+# Batch delete old sandboxes
+for sb in old_sandboxes:
+    Sandbox.delete(sb.sandbox_id, missing_ok=True).result()
+```
+
+## Tag-Based Cleanup
+
+Clean up all sandboxes with specific tags:
+
+```python
+from cwsandbox import Sandbox, SandboxStatus
+
+# Find sandboxes by tag
+sandboxes = Sandbox.list(
+    tags=["training-run-2024-01-15"],
+    include_stopped=True,
+).result()
+
+# Delete each
+for sb in sandboxes:
+    Sandbox.delete(sb.sandbox_id).result()
+```
+
+## Age-Based Cleanup
+
+Delete sandboxes older than threshold:
+
+```python
+from datetime import datetime, timedelta
+
+threshold = datetime.now() - timedelta(days=7)
+
+for sb in Sandbox.list(include_stopped=True).result():
+    if sb.started_at and sb.started_at < threshold:
+        Sandbox.delete(sb.sandbox_id, missing_ok=True).result()
+```
+
+## Global Cleanup Handlers
+
+SDK registers atexit and signal handlers automatically:
+- On process exit: stops all registered sessions
+- On Ctrl+C / SIGTERM: cleanup then exit
+- Second interrupt during cleanup: force exit
+
+## stop() vs delete()
+
+| | stop() | delete() |
+|--|--------|---------|
+| Target | Live sandbox instance | Sandbox by ID |
+| Purpose | Graceful shutdown | Permanent removal |
+| Needs instance? | Yes | No |
+
+## References
+
+- [Cleanup Patterns Guide](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/cleanup-patterns)
diff --git a/.claude/skills/sandbox-config/SKILL.md b/.claude/skills/sandbox-config/SKILL.md
new file mode 100644
index 00000000..cbec8e3e
--- /dev/null
+++ b/.claude/skills/sandbox-config/SKILL.md
@@ -0,0 +1,86 @@
+---
+name: sandbox-config
+description: "Use when configuring CoreWeave Sandbox resources, container images, environment variables, mounted files, ports, or network settings. Covers resources (CPU/memory/GPU), SandboxDefaults, tags, and configuration kwargs."
+disable-model-invocation: false
+---
+
+# Sandbox Configuration
+
+Configure sandbox resources, images, and environment settings.
+
+## Basic Configuration
+
+```python
+from cwsandbox import Sandbox
+
+with Sandbox.run(
+    container_image="python:3.11",
+    max_lifetime_seconds=300,
+    tags=["my-app"],
+) as sandbox:
+    sandbox.exec(["python", "--version"]).result()
+```
+
+## Resources
+
+Request CPU and memory:
+
+```python
+with Sandbox.run(resources={"cpu": "1", "memory": "1Gi"}) as sandbox:
+    sandbox.exec(["python", "compute.py"]).result()
+```
+
+Kubernetes resource syntax:
+
+| Resource | Format | Examples |
+|----------|--------|---------|
+| CPU | Cores or millicores | `"1"`, `"2"`, `"500m"` |
+| Memory | Bytes with unit suffix | `"512Mi"`, `"1Gi"`, `"2Gi"` |
+
+## Mounted Files
+
+Pre-populate files at sandbox startup (read-only):
+
+```python
+with Sandbox.run(
+    mounted_files=[
+        {"path": "/app/config.json", "content": '{"debug": true}'},
+        {"path": "/app/script.py", "content": "print('hello')"},
+    ]
+) as sandbox:
+    sandbox.exec(["python", "/app/script.py"]).result()
+```
+
+## SandboxDefaults
+
+For reusable configuration across multiple sandboxes:
+
+```python
+from cwsandbox import SandboxDefaults
+
+defaults = SandboxDefaults(
+    container_image="python:3.11",
+    tags=("my-app", "production"),
+    resources={"cpu": "2", "memory": "4Gi"},
+)
+```
+
+## Configuration Kwargs
+
+| Parameter | Description |
+|-----------|-------------|
+| `container_image` | Docker image to use |
+| `resources` | CPU/memory/GPU requests |
+| `mounted_files` | Files to mount at startup |
+| `s3_mount` | S3 bucket mount |
+| `ports` | Port mappings |
+| `network` | NetworkOptions for ingress/egress |
+| `secrets` | Secret injection |
+| `max_timeout_seconds` | Max operation timeout |
+| `environment_variables` | Env vars to inject |
+| `annotations` | Kubernetes annotations |
+| `tags` | Filtering tags |
+
+## References
+
+- [CoreWeave Sandbox Configuration Guide](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/sandbox-configuration)
diff --git a/.claude/skills/sandbox-exec/SKILL.md b/.claude/skills/sandbox-exec/SKILL.md
new file mode 100644
index 00000000..cae33d66
--- /dev/null
+++ b/.claude/skills/sandbox-exec/SKILL.md
@@ -0,0 +1,91 @@
+---
+name: sandbox-exec
+description: "Use when executing commands in CoreWeave Sandbox, capturing output, handling errors, timeouts, or running commands in specific directories. Covers exec(), parallel execution, Process, and ProcessResult."
+disable-model-invocation: false
+---
+
+# Running Commands in Sandboxes
+
+Execute commands inside a sandbox and capture output.
+
+## Basic exec()
+
+```python
+sandbox.exec(["echo", "Hello"]).result()
+```
+
+## Options
+
+```python
+# Raise SandboxExecutionError on non-zero exit
+sandbox.exec(["ls", "/nonexistent"], check=True).result()
+
+# Raise SandboxTimeoutError if exceeded
+sandbox.exec(["sleep", "60"], timeout_seconds=5).result()
+
+# Run in specific directory
+sandbox.exec(["ls"], cwd="/app").result()
+
+# Enable stdin streaming
+process = sandbox.exec(["python"], stdin=True)
+process.stdin.write(b"print('hello')\n")
+process.stdin.close()
+result = process.result()
+```
+
+## Parallel Execution
+
+```python
+import cwsandbox
+
+# exec() returns immediately
+p1 = sandbox.exec(["sleep", "1"])
+p2 = sandbox.exec(["sleep", "1"])
+p3 = sandbox.exec(["sleep", "1"])
+
+# .result() blocks ~1 second total, not 3
+cwsandbox.result([p1, p2, p3])
+```
+
+## Process Result
+
+```python
+result = sandbox.exec(["echo", "hello"]).result()
+
+print(result.stdout)        # "hello\n"
+print(result.stderr)        # ""
+print(result.returncode)    # 0
+print(result.command)       # ["echo", "hello"]
+```
+
+## Streaming Output
+
+```python
+process = sandbox.exec(["echo", "hello"])
+for line in process.stdout:
+    print(line, end="")
+result = process.result()
+```
+
+## Error Handling
+
+```python
+from cwsandbox import (
+    SandboxExecutionError,
+    SandboxTimeoutError,
+)
+
+try:
+    sandbox.exec(["false"], check=True).result()
+except SandboxExecutionError as e:
+    print(f"Command failed: {e.exec_result.returncode}")
+
+try:
+    sandbox.exec(["sleep", "10"], timeout_seconds=1).result()
+except SandboxTimeoutError:
+    print("Command timed out")
+```
+
+## References
+
+- [Execution Guide](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/execution)
diff --git a/.claude/skills/sandbox-files/SKILL.md b/.claude/skills/sandbox-files/SKILL.md
new file mode 100644
index 00000000..d6d9d4bb
--- /dev/null
+++ b/.claude/skills/sandbox-files/SKILL.md
@@ -0,0 +1,121 @@
+---
+name: sandbox-files
+description: "Use when reading or writing files in CoreWeave Sandbox, transferring files between host and sandbox, or working with mounted files. Covers read_file(), write_file(), mounted_files, and binary vs text content."
+disable-model-invocation: false
+---
+
+# File Operations in Sandboxes
+
+Read and write files in sandbox environments.
+
+## read_file()
+
+Returns `OperationRef[bytes]` — call `.result()` to get content:
+
+```python
+data = sandbox.read_file("/path/to/file.txt").result()
+print(data.decode())  # Convert bytes to string
+
+# Or work with binary directly
+data = sandbox.read_file("/path/to/image.png").result()
+```
+
+## write_file()
+
+Write bytes or string content:
+
+```python
+# Write string (encoded to bytes automatically)
+sandbox.write_file("/path/to/file.txt", "hello world").result()
+
+# Write bytes
+sandbox.write_file("/path/to/data.bin", b"\x00\x01\x02").result()
+```
+
+## Binary vs Text
+
+```python
+# Text files - decode after reading
+text = sandbox.read_file("/path/to/file.txt").result().decode("utf-8")
+
+# Binary files - work with raw bytes
+png_data = sandbox.read_file("/path/to/image.png").result()
+
+# Write binary
+sandbox.write_file("/path/to/output.bin", b"\x00\x01\x02").result()
+```
+
+## Async Patterns
+
+```python
+# Async: await the OperationRef
+data = await sandbox.read_file("/path/to/file.txt")
+
+# Batch read multiple files
+files = ["/data/1.txt", "/data/2.txt", "/data/3.txt"]
+refs = [sandbox.read_file(f) for f in files]
+data_list = await cwsandbox.results(refs)
+```
+
+## Directory Operations
+
+Sandbox has no built-in directory listing. Use `exec()` for that:
+
+```python
+result = sandbox.exec(["ls", "-la", "/path"]).result()
+print(result.stdout)
+
+result = sandbox.exec(["find", "/path", "-name", "*.py"]).result()
+```
+
+## Path Handling
+
+Paths are absolute within the sandbox filesystem:
+
+```python
+# Read from sandbox temp dir
+data = sandbox.read_file("/tmp/data.txt").result()
+
+# Write to sandbox working dir
+sandbox.write_file("/tmp/results.csv", "a,b,c\n1,2,3").result()
+```
+
+## mounted_files
+
+Pre-populate read-only files at sandbox startup:
+
+```python
+with Sandbox.run(
+    mounted_files=[
+        {"path": "/app/config.json", "content": '{"debug": true}'},
+        {"path": "/app/script.py", "content": "print('hello')"},
+    ]
+) as sandbox:
+    # Files already exist when sandbox starts
+    result = sandbox.exec(["python", "/app/script.py"]).result()
+```
+
+## Error Handling
+
+```python
+from cwsandbox import SandboxFileError
+
+try:
+    data = sandbox.read_file("/nonexistent/file.txt").result()
+except SandboxFileError as e:
+    print(f"File error: {e.filepath}")
+```
+
+## Use Cases
+
+| Task | Method |
+|------|--------|
+| Read config file | `read_file("/app/config.json").result()` |
+| Write output data | `write_file("/tmp/results.csv", csv_data).result()` |
+| Load model weights | `read_file("/models/model.pt").result()` |
+| Save checkpoint | `write_file("/checkpoints/step-100.pt", state).result()` |
+| Read log file | `read_file("/var/log/app.log").result()` |
+
+## References
+
+- [File Operations Tutorial](https://docs.coreweave.com/products/coreweave-sandbox/client/tutorial/file-operations)
diff --git a/.claude/skills/sandbox-functions/SKILL.md b/.claude/skills/sandbox-functions/SKILL.md
new file mode 100644
index 00000000..5bdbb004
--- /dev/null
+++ b/.claude/skills/sandbox-functions/SKILL.md
@@ -0,0 +1,104 @@
+---
+name: sandbox-functions
+description: "Use when executing Python functions remotely in CoreWeave Sandbox, using @session.function() decorator, remote function execution, .map() for parallel, or .local() for testing. Covers serialization modes and closure capture."
+disable-model-invocation: false
+---
+
+# Remote Functions
+
+Run Python functions inside sandboxes without writing command strings.
+
+## Basic Usage
+
+```python
+from cwsandbox import Sandbox, SandboxDefaults
+
+with Sandbox.session(SandboxDefaults()) as session:
+    @session.function()
+    def add(x: int, y: int) -> int:
+        return x + y
+
+    result = add.remote(2, 3).result()  # 5
+```
+
+## How It Works
+
+1. Extracts function source via AST
+2. Captures closure variables
+3. Serializes payload (JSON or PICKLE)
+4. Creates ephemeral sandbox
+5. Executes and returns result
+
+## Parallel with .map()
+
+```python
+@session.function()
+def square(x: int) -> int:
+    return x * x
+
+refs = square.map([(1,), (2,), (3,)])
+results = [r.result() for r in refs]  # [1, 4, 9]
+```
+
+## Local Testing
+
+Test without sandbox overhead:
+
+```python
+@session.function()
+def slow_computation(x: int) -> int:
+    return x * 2
+
+# Test locally
+result = slow_computation.local(5)  # 10 - runs in-process
+```
+
+## Serialization Modes
+
+```python
+from cwsandbox import Serialization
+
+@session.function(serialization=Serialization.JSON)  # Default
+def json_func(x: list) -> dict:
+    return {"sum": sum(x)}
+
+@session.function(serialization=Serialization.PICKLE)  # Supports complex objects
+def pickle_func(x: numpy.ndarray) -> numpy.ndarray:
+    return x * 2
+```
+
+| Mode | Description |
+|------|-------------|
+| `JSON` | Safe, human-readable, JSON-serializable types only |
+| `PICKLE` | Complex Python objects, numpy arrays, requires trust |
+
+## Closure Capture
+
+Functions can reference external variables:
+
+```python
+model_path = "/tmp/model.pt"
+
+@session.function()
+def predict(x: int) -> float:
+    import torch
+    model = torch.load(model_path)
+    return model.predict(x)
+```
+
+## Error Handling
+
+```python
+from cwsandbox import FunctionError, FunctionSerializationError
+
+try:
+    result = fragile_function.remote("arg").result()
+except FunctionSerializationError:
+    print("Could not serialize arguments")
+except Exception as e:
+    print(f"Execution failed: {e}")
+```
+
+## References
+
+- [Remote Functions Guide](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/remote-functions)
diff --git a/.claude/skills/sandbox-session/SKILL.md b/.claude/skills/sandbox-session/SKILL.md
new file mode 100644
index 00000000..5916fd61
--- /dev/null
+++ b/.claude/skills/sandbox-session/SKILL.md
@@ -0,0 +1,93 @@
+---
+name: sandbox-session
+description: "Use when managing multiple CoreWeave Sandboxes simultaneously, creating sandbox pools, or sharing configuration across sandboxes via Session. Covers Session, SandboxDefaults, adopt patterns, and parallel execution."
+disable-model-invocation: false
+---
+
+# Multiple Sandboxes with Session
+
+Manage multiple sandboxes with shared defaults and automatic cleanup.
+
+## Basic Session Usage
+
+```python
+from cwsandbox import Sandbox, SandboxDefaults
+
+defaults = SandboxDefaults(
+    container_image="python:3.11",
+    tags=("my-app",),
+)
+
+with Sandbox.session(defaults) as session:
+    sb1 = session.sandbox()
+    sb2 = session.sandbox()
+
+    p1 = sb1.exec(["echo", "one"])
+    p2 = sb2.exec(["echo", "two"])
+
+    print(p1.result().stdout, p2.result().stdout)
+# All sandboxes cleaned up automatically
+```
+
+## Session Methods
+
+| Method | Description |
+|--------|-------------|
+| `session.sandbox(command, args, **kwargs)` | Create sandbox with session defaults |
+| `session.function(**kwargs)` | Decorator for remote functions |
+| `session.adopt(sandbox)` | Register existing sandbox for cleanup |
+| `session.close()` | Cleanup all sandboxes |
+| `session.list(tags, status, adopt)` | Find sandboxes matching criteria |
+| `session.from_id(id, adopt)` | Attach to sandbox by ID |
+
+## Sandbox Pools
+
+Pre-start all sandboxes for faster first-execution:
+
+```python
+with Session(defaults) as session:
+    sandboxes = [session.sandbox() for _ in range(5)]
+    
+    # Pre-start all at once
+    refs = [sb.start() for sb in sandboxes]
+    [r.result() for r in refs]
+
+    # Now exec on each runs immediately
+    processes = [sb.exec(["echo", f"hello-{i}"]) for i, sb in enumerate(sandboxes)]
+```
+
+## Adopt Existing Sandboxes
+
+Attach to sandboxes discovered via `Sandbox.list()` or `Sandbox.from_id()`:
+
+```python
+from cwsandbox import Sandbox, SandboxStatus
+
+# Find existing sandboxes
+sandboxes = Sandbox.list(
+    tags=["training-run"],
+    status=[SandboxStatus.RUNNING],
+).result()
+
+with Session(defaults) as session:
+    for sb in sandboxes:
+        session.adopt(sb)  # Will cleanup when session closes
+```
+
+## Parallel Execution with wait()
+
+```python
+import cwsandbox
+
+with Session(defaults) as session:
+    sandboxes = [session.sandbox() for _ in range(10)]
+    
+    processes = [sb.exec(["python", "-c", f"print({i**2})"]) for i, sb in enumerate(sandboxes)]
+    
+    # Wait for all to complete
+    done, pending = cwsandbox.wait(processes)
+```
+
+## References
+
+- [Sessions Guide](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/sessions)
diff --git a/.claude/skills/sandbox-streaming/SKILL.md b/.claude/skills/sandbox-streaming/SKILL.md
new file mode 100644
index 00000000..74d2e9fb
--- /dev/null
+++ b/.claude/skills/sandbox-streaming/SKILL.md
@@ -0,0 +1,123 @@
+---
+name: sandbox-streaming
+description: "Use when streaming command output in real-time from CoreWeave Sandbox, handling stdin streaming, line buffering, or continuous log following. Covers StreamReader, StreamWriter, process.stdout iteration, and follow mode."
+disable-model-invocation: false
+---
+
+# Streaming in Sandboxes
+
+Stream command output in real-time and handle stdin.
+
+## Streaming stdout
+
+Iterate over output as lines arrive (not all at once after completion):
+
+```python
+process = sandbox.exec(["python", "-c", "import time; [print(i) for i in range(5)]"])
+
+for line in process.stdout:
+    print(line, end="")
+
+result = process.result()
+```
+
+## Async Streaming
+
+```python
+process = sandbox.exec(["python", "-c", "import time; [print(i) for i in range(5)]"])
+
+async for line in process.stdout:
+    print(line, end="")
+
+result = await process
+```
+
+## stdin Streaming
+
+Enable stdin with `stdin=True` on `exec()`:
+
+```python
+process = sandbox.exec(["cat"], stdin=True)
+
+# Write bytes
+process.stdin.write(b"hello ").result()
+
+# Write text line (adds newline)
+process.stdin.writeline("world").result()
+
+# Close to signal EOF
+process.stdin.close().result()
+
+result = process.result()
+print(result.stdout)  # "hello world\n"
+```
+
+## Streaming Logs
+
+Stream the sandbox main process logs (PID 1):
+
+```python
+# Get last 10 lines
+reader = sandbox.stream_logs(tail_lines=10)
+
+for line in reader:
+    print(line, end="")
+
+# Follow mode (like tail -f)
+reader = sandbox.stream_logs(follow=True)
+
+for line in reader:
+    print(line, end="")
+    # Don't forget to close when done
+    if some_condition:
+        reader.close()
+```
+
+## Line Buffering
+
+Python buffers stdout when not connected to a TTY. Force unbuffered:
+
+```python
+# Option 1: Use -u flag
+process = sandbox.exec(["python", "-u", "script.py"])
+
+# Option 2: Set PYTHONUNBUFFERED env
+process = sandbox.exec(["sh", "-c", "PYTHONUNBUFFERED=1 python script.py"])
+```
+
+## StreamReader Methods
+
+```python
+reader = sandbox.stream_logs(follow=True)
+
+# Sync iteration
+for line in reader:
+    print(line)
+
+# Async iteration
+async for line in reader:
+    print(line)
+
+# Close the stream
+reader.close()
+```
+
+## StreamWriter Methods
+
+```python
+process = sandbox.exec(["python"], stdin=True)
+
+process.stdin.write(b"print('hello')")     # Write bytes
+process.stdin.writeline("exit()")           # Write line with newline
+process.stdin.close()                        # Signal EOF - returns OperationRef[None]
+```
+
+## Important Notes
+
+- `stream_logs()` only captures stdout/stderr from the main command passed to `Sandbox.run()` — not from `exec()` commands
+- Always close `StreamReader` in follow mode to stop the background producer
+- stdin streaming requires `stdin=True` on `exec()`
+
+## References
+
+- [Streaming Output Tutorial](https://docs.coreweave.com/products/coreweave-sandbox/client/tutorial/streaming-output)
diff --git a/.claude/skills/sandbox-troubleshoot/SKILL.md b/.claude/skills/sandbox-troubleshoot/SKILL.md
new file mode 100644
index 00000000..116c5242
--- /dev/null
+++ b/.claude/skills/sandbox-troubleshoot/SKILL.md
@@ -0,0 +1,182 @@
+---
+name: sandbox-troubleshoot
+description: "Use when debugging CoreWeave Sandbox issues, authentication errors, timeouts, streaming problems, or orphaned sandboxes. Covers error messages, exception handling, and common fixes."
+disable-model-invocation: false
+---
+
+# Troubleshooting CoreWeave Sandbox
+
+Common errors and solutions.
+
+## Authentication Issues
+
+**Error**: `CWSandboxAuthenticationError` or `WandbAuthError`
+
+Auth resolution order:
+1. `CWSANDBOX_API_KEY` env var (recommended)
+2. `WANDB_API_KEY` + `WANDB_ENTITY_NAME` env vars
+3. `~/.netrc` (api.wandb.ai) + `WANDB_ENTITY_NAME`
+
+```bash
+# Check what's configured
+echo $CWSANDBOX_API_KEY
+echo $WANDB_API_KEY
+echo $WANDB_ENTITY_NAME
+```
+
+| Issue | Solution |
+|-------|----------|
+| No credentials | Set `CWSANDBOX_API_KEY` |
+| Invalid/expired token | [Create new token](https://console.coreweave.com/tokens) |
+| W&B key but no entity | Set `WANDB_ENTITY_NAME` |
+| Netrc parse errors | Check `~/.netrc` syntax and permissions |
+
+## Timeout Issues
+
+Two timeout types:
+
+| Timeout | Scope | Where Set |
+|---------|-------|-----------|
+| `timeout_seconds` | Per-exec | `exec()` parameter |
+| `max_lifetime_seconds` | Sandbox lifetime | `SandboxDefaults` |
+
+```python
+# Per-exec timeout
+sandbox.exec(["slow_script.py"], timeout_seconds=60.0).result()
+
+# Sandbox lifetime
+defaults = SandboxDefaults(max_lifetime_seconds=3600)
+```
+
+## Exit Code Interpretation
+
+| Code | Meaning |
+|------|---------|
+| 0 | Success |
+| 1 | General error |
+| 2 | Misuse of command |
+| 126 | Not executable |
+| 127 | Command not found |
+| 128+N | Killed by signal N |
+
+```python
+# Raise on non-zero
+sandbox.exec(["false"], check=True).result()
+
+# Or catch
+result = sandbox.exec(["false"]).result()
+if result.returncode != 0:
+    print(f"Failed: {result.stderr}")
+```
+
+## Streaming Output Issues
+
+Output appears delayed or all at once? Python buffers stdout:
+
+```python
+# Fix: Use -u flag
+process = sandbox.exec(["python", "-u", "script.py"])
+
+# Or set env var
+process = sandbox.exec(["sh", "-c", "PYTHONUNBUFFERED=1 python script.py"])
+```
+
+## Common Error Messages
+
+| Error | Cause | Solution |
+|-------|-------|----------|
+| `CWSandboxAuthenticationError` | Missing/invalid credentials | Check `CWSANDBOX_API_KEY` |
+| `WandbAuthError: WANDB_ENTITY_NAME not set` | W&B key without entity | Set `WANDB_ENTITY_NAME` |
+| `SandboxNotRunningError` | Operation on stopped sandbox | Check `sandbox.status` |
+| `SandboxTimeoutError` | Command exceeded timeout | Increase `timeout_seconds` |
+| `SandboxTerminatedError` | External kill / lifetime exceeded | Check `max_lifetime_seconds` |
+| `SandboxFailedError` | Startup failed | Check container image, resources |
+| `SandboxNotFoundError` | Sandbox deleted/never existed | Verify sandbox ID |
+| `SandboxExecutionError` | Non-zero exit with `check=True` | Check `e.exec_result.stderr` |
+| `SandboxFileError` | File operation failed | Check path, permissions |
+| `FunctionSerializationError` | Can't serialize args | Use JSON types or PICKLE |
+| `AsyncFunctionError` | Async func with `@session.function()` | Use sync functions only |
+
+## Exception Handling
+
+```python
+from cwsandbox import (
+    SandboxExecutionError,
+    SandboxTimeoutError,
+    SandboxFileError,
+    SandboxNotRunningError,
+)
+
+try:
+    result = sandbox.exec(["python", "-c", "import nonexistent"], check=True).result()
+except SandboxExecutionError as e:
+    print(f"Exit code: {e.exec_result.returncode}")
+    print(f"stderr: {e.exec_result.stderr}")
+except SandboxTimeoutError:
+    print("Command timed out")
+except SandboxFileError as e:
+    print(f"File error at {e.filepath}")
+except SandboxNotRunningError:
+    print("Sandbox not running")
+```
+
+## Orphaned Sandboxes
+
+Sandboxes left running after script crashes. Prevention:
+
+```python
+# Recommended: always use context managers
+with Sandbox.run() as sandbox:
+    result = sandbox.exec(["echo", "hello"]).result()
+# Always stopped
+```
+
+Cleanup existing orphans:
+
+```python
+from cwsandbox import Sandbox, SandboxStatus
+
+# Find by tag
+sandboxes = Sandbox.list(tags=["my-job"], include_stopped=True).result()
+
+for sb in sandboxes:
+    if sb.status in (SandboxStatus.RUNNING, SandboxStatus.PENDING):
+        Sandbox.delete(sb.sandbox_id).result()
+```
+
+## Startup Failures
+
+`SandboxFailedError` on creation:
+
+1. Check container image exists
+2. Verify resources are valid
+3. Check network configuration
+
+```python
+try:
+    sandbox = Sandbox.run("nonexistent-image").wait()
+except SandboxFailedError:
+    print("Failed to start - check image name")
+```
+
+## Integration Test Hangs
+
+If tests hang:
+1. Use `.result()`, not `await` in sync tests
+2. Wait for RUNNING status before file ops
+3. Check sandbox reaches RUNNING before operations
+
+```python
+# Correct
+sandbox.wait()  # Wait until RUNNING
+result = sandbox.exec(["echo", "hello"]).result()
+```
+
+## Getting Help
+
+- Docs: https://docs.coreweave.com/products/coreweave-sandbox
+- Issues: https://github.com/coreweave/cwsandbox-client
+
+## References
+
+- [Troubleshooting Guide](https://docs.coreweave.com/products/coreweave-sandbox/client/guides/troubleshooting)