diff --git a/README.md b/README.md
index 0b9804402..96156a6e3 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ We now ask: **What if our agent was 100x simpler, and still worked nearly as wel
 - **Minimal**: Just some 100 lines of python for the [agent class](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (and a bit more for the [environment](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
 [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), and [run script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
 - **Performant:** Scores >74% on the [SWE-bench verified benchmark](https://www.swebench.com/); starts much faster than Claude Code
-- **Deployable:** Supports **local environments**, **docker/podman**, **singularity/apptainer**, **bublewrap**, **contree**, and more
+- **Deployable:** Supports **local environments**, **docker/podman**, **singularity/apptainer**, **bubblewrap**, **contree**, **[E2B](https://e2b.dev)** (no local Docker required), and more
 - **Compatible:** Supports all models via **litellm**, **openrouter**, **portkey**, and more. Support for `/completion` and `/response` endpoints, interleaved thinking etc.
 - Built by the Princeton & Stanford team behind [SWE-bench](https://swebench.com), [SWE-agent](https://swe-agent.com), and more
 - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
diff --git a/docs/advanced/environments.md b/docs/advanced/environments.md
index 7d672755d..3e30d3959 100644
--- a/docs/advanced/environments.md
+++ b/docs/advanced/environments.md
@@ -30,3 +30,5 @@ On top, there are a few more specialized environment classes that you can use:
 
 * **`contree`** ([`ContreeEnvironment`](../reference/environments/contree.md)) - Uses [ConTree](https://contree.dev/) for safe code execution sandboxing. Platform that built for agents and supports Git-like execution.
 
+* **`e2b`** ([`E2BEnvironment`](../reference/environments/e2b.md)) - [E2B](https://e2b.dev) cloud sandbox execution. Converts Docker images into persistent E2B templates so **no local Docker daemon is required**. Suitable for large-scale, fully-remote SWE-bench evaluations.
+
diff --git a/docs/reference/environments/e2b.md b/docs/reference/environments/e2b.md
new file mode 100644
index 000000000..25085afd1
--- /dev/null
+++ b/docs/reference/environments/e2b.md
@@ -0,0 +1,78 @@
+# E2B
+
+!!! note "E2B Environment class"
+
+    - [Read on GitHub](https://github.com/swe-agent/mini-swe-agent/blob/main/src/minisweagent/environments/extra/e2b.py)
+    - Requires an [E2B](https://e2b.dev) account and API key
+
+    ??? note "Full source code"
+
+        ```python
+        --8<-- "src/minisweagent/environments/extra/e2b.py"
+        ```
+
+::: minisweagent.environments.extra.e2b
+
+This environment executes commands in [E2B](https://e2b.dev) cloud sandboxes.
+E2B converts Docker images into persistent sandbox templates, so **no local Docker daemon is required** — everything runs in the cloud.
+
+This makes it well-suited for:
+
+- Large-scale, fully-remote SWE-bench evaluations
+- Environments where Docker is unavailable (CI, serverless)
+- Parallel agent runs without managing local container infrastructure
+
+## How it works
+
+The first time a Docker image is used, `E2BEnvironment` builds a persistent E2B template from that image (via `Template.build`). Subsequent runs reuse the cached template, so the build cost is paid only once per unique image.
+
+## Setup
+
+1. Install the E2B extra:
+   ```bash
+   pip install "mini-swe-agent[e2b]"
+   ```
+
+2. Set your E2B API key:
+   ```bash
+   export E2B_API_KEY="your-e2b-api-key"
+   ```
+
+## Usage
+
+Evaluate on SWE-bench using E2B as the sandbox backend:
+```bash
+mini-extra swebench \
+    --subset verified \
+    --split test \
+    --workers 50 \
+    --environment-class e2b
+```
+
+Or specify it in your YAML config:
+```yaml
+environment:
+  environment_class: e2b
+  sandbox_timeout: 3600  # seconds the sandbox stays alive
+  cpu_count: 2
+  memory_mb: 2048
+```
+
+## Configuration reference
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `image` | *(required)* | Docker Hub image to use as the sandbox base |
+| `cwd` | `/` | Default working directory for commands |
+| `timeout` | `30` | Per-command timeout in seconds |
+| `env` | `{}` | Environment variables set in every command |
+| `sandbox_timeout` | `3600` | How long the sandbox stays alive (seconds) |
+| `cpu_count` | `2` | vCPUs allocated to the sandbox |
+| `memory_mb` | `2048` | Memory allocated to the sandbox (MiB) |
+| `build_timeout` | `1800` | Max seconds to wait for a template build |
+| `skip_cache` | `False` | Force-rebuild the template even if it exists |
+| `api_key` | `None` | E2B API key (falls back to `E2B_API_KEY` env var) |
+| `registry_username` | `None` | Username for private Docker registry auth |
+| `registry_password` | `None` | Password for private Docker registry auth |
+
+{% include-markdown "../../_footer.md" %}
diff --git a/pyproject.toml b/pyproject.toml
index 808e9e10b..fdfe58008 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -81,6 +81,10 @@ contree = [
     "contree-sdk>=0.2.0",
 ]
 
+e2b = [
+    "e2b>=1.0.0",
+]
+
 [project.urls]
 Documentation = "https://mini-swe-agent.com/latest/"
 Repository = "https://github.com/SWE-agent/mini-swe-agent"
diff --git a/src/minisweagent/environments/__init__.py b/src/minisweagent/environments/__init__.py
index 08ae2c65c..8104b6fd7 100644
--- a/src/minisweagent/environments/__init__.py
+++ b/src/minisweagent/environments/__init__.py
@@ -13,6 +13,7 @@
     "swerex_modal": "minisweagent.environments.extra.swerex_modal.SwerexModalEnvironment",
     "bubblewrap": "minisweagent.environments.extra.bubblewrap.BubblewrapEnvironment",
     "contree": "minisweagent.environments.extra.contree.ContreeEnvironment",
+    "e2b": "minisweagent.environments.extra.e2b.E2BEnvironment",
 }
 
 
diff --git a/src/minisweagent/environments/extra/e2b.py b/src/minisweagent/environments/extra/e2b.py
new file mode 100644
index 000000000..144380b0c
--- /dev/null
+++ b/src/minisweagent/environments/extra/e2b.py
@@ -0,0 +1,306 @@
+"""E2B cloud sandbox environment implementation."""
+
+from __future__ import annotations
+
+import atexit
+import concurrent.futures
+import hashlib
+import logging
+import re
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+# Module-level registry of live sandboxes for best-effort cleanup on exit
+# (covers Ctrl+C and unhandled exceptions where __del__ may not be called).
+_active_sandboxes: set[E2BEnvironment] = set()
+
+
+def _cleanup_all_sandboxes() -> None:
+    """Kill all sandboxes that are still alive at interpreter shutdown."""
+    for env in list(_active_sandboxes):
+        env.cleanup()
+
+
+atexit.register(_cleanup_all_sandboxes)
+
+
+class E2BEnvironmentConfig(BaseModel):
+    image: str
+    """Docker Hub image name to use as the E2B template base.
+    Example: ``'swebench/sweb.eval.x86_64.django__django-11099:latest'``
+    """
+    cwd: str = "/"
+    """Working directory in which to execute commands."""
+    timeout: int = 30
+    """Timeout for executing commands in the sandbox."""
+    env: dict[str, str] = Field(default_factory=dict)
+    """Environment variables to set when executing commands."""
+    sandbox_timeout: int = 3600
+    """How long (in seconds) the sandbox is allowed to stay alive."""
+
+    # Template build options (passed to Template.build())
+    cpu_count: int = 2
+    """Number of vCPUs allocated to the sandbox."""
+    memory_mb: int = 2048
+    """Memory allocated to the sandbox in MiB. Default is higher than E2B's 1024 MiB default
+    to accommodate larger SWE-bench images."""
+    skip_cache: bool = False
+    """If True, force-rebuild the template even if it already exists."""
+    tags: list[str] = Field(default_factory=list)
+    """Optional tags to attach to the template."""
+    build_timeout: int = 1800
+    """Timeout for template builds in seconds (default 30 min to handle large images)."""
+
+    # E2B authentication (can also be set via the E2B_API_KEY env var)
+    api_key: str | None = None
+    """E2B API key. Falls back to the E2B_API_KEY environment variable."""
+
+    # Private registry credentials (passed to Template().from_image())
+    registry_username: str | None = None
+    """Username for authenticating against a private Docker registry."""
+    registry_password: str | None = None
+    """Password for authenticating against a private Docker registry."""
+
+
+class E2BTemplateManager:
+    """Converts Docker images to E2B templates and manages their lifecycle.
+
+    Can be used independently of :class:`E2BEnvironment` for pre-building
+    templates in batch scripts.
+    """
+
+    def __init__(self, config: E2BEnvironmentConfig) -> None:
+        self.config = config
+        self.logger = logging.getLogger("minisweagent.environment.e2b")
+
+    @staticmethod
+    def _image_to_template_name(docker_image: str) -> str:
+        """Deterministically map a Docker image name to a valid E2B template name.
+
+        A sha256 8-character suffix is appended to avoid collisions between
+        images that produce the same sanitized prefix. The result is at most
+        63 characters and contains only lower-case alphanumerics and hyphens.
+
+        Example::
+
+            'swebench/sweb.eval.x86_64.django__django-11099:latest'
+            → 'swebench-sweb-eval-x86-64-django--django-11099-l-a1b2c3d4'
+        """
+        hash_suffix = hashlib.sha256(docker_image.encode()).hexdigest()[:8]
+        name = re.sub(r"[^a-zA-Z0-9-]", "-", docker_image)
+        name = re.sub(r"-{3,}", "--", name)
+        name = name.lower()
+        # Reserve 9 characters for "-" + 8-char hash suffix → prefix max 54 chars
+        prefix = name[:54].strip("-")
+        if not prefix:
+            return hash_suffix
+        return f"{prefix}-{hash_suffix}"
+
+    def get_or_build(self, docker_image: str) -> str:
+        """Return the E2B template name for *docker_image*, building it if needed."""
+        from e2b import Template
+
+        template_name = self._image_to_template_name(docker_image)
+        if not Template.exists(template_name, api_key=self.config.api_key) or self.config.skip_cache:
+            self.logger.info(
+                "E2B template %s not found. Starting build (up to %d seconds)...",
+                template_name,
+                self.config.build_timeout,
+            )
+            self._build_template(docker_image, template_name)
+            self.logger.info("E2B template %s built successfully.", template_name)
+        else:
+            self.logger.debug("E2B template %s already exists.", template_name)
+        return template_name
+
+    def rebuild(self, docker_image: str) -> str:
+        """Force-rebuild the E2B template for *docker_image*."""
+        template_name = self._image_to_template_name(docker_image)
+        self.logger.info("Rebuilding E2B template %s...", template_name)
+        self._build_template(docker_image, template_name)
+        self.logger.info("E2B template %s rebuilt successfully.", template_name)
+        return template_name
+
+    def _build_template(self, docker_image: str, template_name: str) -> None:
+        """Build an E2B template from *docker_image*.
+
+        Uses :class:`concurrent.futures.ThreadPoolExecutor` for timeout
+        enforcement because ``signal.alarm`` only works on the main thread
+        and this method may be called from worker threads.
+        """
+        from e2b import Template
+
+        template = Template().from_image(
+            docker_image,
+            username=self.config.registry_username,
+            password=self.config.registry_password,
+        )
+
+        def _do_build() -> None:
+            Template.build(
+                template,
+                template_name,
+                cpu_count=self.config.cpu_count,
+                memory_mb=self.config.memory_mb,
+                skip_cache=self.config.skip_cache,
+                tags=self.config.tags or None,
+                api_key=self.config.api_key,
+            )
+
+        executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        future = executor.submit(_do_build)
+        try:
+            future.result(timeout=self.config.build_timeout)
+        except concurrent.futures.TimeoutError as e:
+            executor.shutdown(wait=False, cancel_futures=True)
+            msg = f"E2B template build timed out after {self.config.build_timeout}s: {template_name}"
+            raise TimeoutError(msg) from e
+        except Exception:
+            executor.shutdown(wait=False, cancel_futures=True)
+            raise
+        else:
+            executor.shutdown(wait=True)
+
+
+class E2BEnvironment:
+    """Executes bash commands inside an E2B cloud sandbox.
+
+    `E2B <https://e2b.dev>`_ provides isolated cloud sandboxes that can run
+    arbitrary Docker images without requiring a local Docker daemon. This
+    makes it suitable for large-scale, fully-remote SWE-bench evaluations.
+
+    The first time a Docker image is used it is converted into a persistent
+    E2B template; subsequent runs reuse the cached template.
+
+    See :class:`E2BEnvironmentConfig` for keyword arguments.
+    """
+
+    #: Config fields that must never leak into prompts or saved trajectories.
+    _SECRET_FIELDS = {"api_key", "registry_password", "registry_username"}
+
+    @staticmethod
+    def _is_stale_template_error(e: Exception) -> bool:
+        """Return True if *e* is a 'template not found' (HTTP 404) error.
+
+        e2b surfaces a missing template as a ``SandboxException`` whose message is
+        formatted as ``"{status_code}: {message}"`` (see ``e2b.api.handle_api_exception``).
+        Match the leading 404 status code rather than a bare ``"404"`` substring,
+        which could appear inside a sandbox id or path and trigger a costly,
+        unnecessary template rebuild.
+        """
+        match = re.match(r"\s*(\d{3})\b", str(e))
+        return match is not None and match.group(1) == "404"
+
+    def __init__(self, **kwargs: Any) -> None:
+        from e2b import Sandbox
+        from e2b.exceptions import SandboxException
+
+        self.logger = logging.getLogger("minisweagent.environment.e2b")
+        self.config = E2BEnvironmentConfig(**kwargs)
+        manager = E2BTemplateManager(self.config)
+        template_name = manager.get_or_build(self.config.image)
+        self.logger.info("Creating E2B sandbox (template: %s)...", template_name)
+        try:
+            self.sandbox = Sandbox.create(
+                template=template_name,
+                timeout=self.config.sandbox_timeout,
+                api_key=self.config.api_key,
+            )
+        except SandboxException as e:
+            if not self._is_stale_template_error(e):
+                raise
+            self.logger.warning("Template %s not found (stale cache). Rebuilding...", template_name)
+            manager.rebuild(self.config.image)
+            self.sandbox = Sandbox.create(
+                template=template_name,
+                timeout=self.config.sandbox_timeout,
+                api_key=self.config.api_key,
+            )
+        self.logger.info("E2B sandbox ready (id: %s)", self.sandbox.sandbox_id)
+        _active_sandboxes.add(self)
+
+    def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
+        """Execute a command in the sandbox and return the output."""
+        command = action.get("command", "") if isinstance(action, dict) else action
+        try:
+            result = self.sandbox.commands.run(
+                command,
+                user="root",
+                cwd=cwd or self.config.cwd,
+                timeout=timeout or self.config.timeout,
+                envs=self.config.env or None,
+            )
+            output: dict[str, Any] = {
+                "output": result.stdout + result.stderr,
+                "returncode": result.exit_code,
+                "exception_info": "",
+            }
+        except Exception as e:
+            # e2b raises ``CommandExitException`` (carrying stdout/stderr/exit_code)
+            # for any non-zero exit. That is a normal command result, not an
+            # infrastructure error, so surface the real output and exit code
+            # instead of masking it as a generic failure.
+            if (exit_code := getattr(e, "exit_code", None)) is not None:
+                output = {
+                    "output": getattr(e, "stdout", "") + getattr(e, "stderr", ""),
+                    "returncode": exit_code,
+                    "exception_info": "",
+                }
+            else:
+                output = {
+                    "output": "",
+                    "returncode": -1,
+                    "exception_info": f"An error occurred while executing the command: {e}",
+                    "extra": {"exception_type": type(e).__name__, "exception": str(e)},
+                }
+        self._check_finished(output)
+        return output
+
+    def _check_finished(self, output: dict) -> None:
+        """Raise :class:`~minisweagent.exceptions.Submitted` when the task-submission marker is detected."""
+        from minisweagent.exceptions import Submitted
+
+        lines = output.get("output", "").lstrip().splitlines(keepends=True)
+        if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
+            submission = "".join(lines[1:])
+            raise Submitted(
+                {
+                    "role": "exit",
+                    "content": submission,
+                    "extra": {"exit_status": "Submitted", "submission": submission},
+                }
+            )
+
+    def get_template_vars(self, **kwargs: Any) -> dict[str, Any]:
+        import platform
+
+        from minisweagent.utils.serialize import recursive_merge
+
+        config = self.config.model_dump(exclude=self._SECRET_FIELDS)
+        return recursive_merge(config, platform.uname()._asdict(), kwargs)
+
+    def serialize(self) -> dict:
+        return {
+            "info": {
+                "config": {
+                    "environment": self.config.model_dump(
+                        mode="json",
+                        exclude=self._SECRET_FIELDS,
+                    ),
+                    "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
+                }
+            }
+        }
+
+    def cleanup(self) -> None:
+        _active_sandboxes.discard(self)
+        sandbox = getattr(self, "sandbox", None)
+        if sandbox is not None:
+            try:
+                sandbox.kill()
+            except Exception:
+                pass
+
+    def __del__(self) -> None:
+        self.cleanup()
diff --git a/src/minisweagent/run/benchmarks/swebench.py b/src/minisweagent/run/benchmarks/swebench.py
index a708eee51..65a284af8 100644
--- a/src/minisweagent/run/benchmarks/swebench.py
+++ b/src/minisweagent/run/benchmarks/swebench.py
@@ -94,7 +94,7 @@ def get_sb_environment(config: dict, instance: dict) -> Environment:
     env_config = config.setdefault("environment", {})
     env_config["environment_class"] = env_config.get("environment_class", "docker")
     image_name = get_swebench_docker_image_name(instance)
-    if env_config["environment_class"] in ["docker", "swerex_modal"]:
+    if env_config["environment_class"] in ["docker", "swerex_modal", "e2b"]:
         env_config["image"] = image_name
     elif env_config["environment_class"] in ["singularity", "contree"]:
         env_config["image"] = "docker://" + image_name
@@ -133,6 +133,20 @@ def remove_from_preds_file(output_path: Path, instance_id: str):
             output_path.write_text(json.dumps(output_data, indent=2))
 
 
+def _teardown_environment(env: Environment | None) -> None:
+    """Release the per-instance environment resource (container / cloud sandbox).
+
+    Environments expose teardown as either ``cleanup()`` (docker, singularity,
+    bubblewrap) or ``stop()`` (swerex_modal); call whichever exists.
+    """
+    if env is None:
+        return
+    for teardown_name in ("cleanup", "stop"):
+        if callable(teardown := getattr(env, teardown_name, None)):
+            teardown()
+            break
+
+
 def process_instance(
     instance: dict,
     output_dir: Path,
@@ -156,6 +170,7 @@ def process_instance(
     result = None
     extra_info = {}
 
+    env = None
     try:
         env = get_sb_environment(config, instance)
         agent = ProgressTrackingAgent(
@@ -173,22 +188,28 @@ def process_instance(
         exit_status, result = type(e).__name__, ""
         extra_info = {"traceback": traceback.format_exc(), "exception_str": str(e)}
     finally:
-        if agent is not None:
-            traj_path = instance_dir / f"{instance_id}.traj.json"
-            agent.save(
-                traj_path,
-                {
-                    "info": {
-                        "exit_status": exit_status,
-                        "submission": result,
-                        **extra_info,
+        # Teardown lives in its own finally so the environment (container / cloud
+        # sandbox) is always released even if saving the trajectory or updating
+        # the predictions file raises.
+        try:
+            if agent is not None:
+                traj_path = instance_dir / f"{instance_id}.traj.json"
+                agent.save(
+                    traj_path,
+                    {
+                        "info": {
+                            "exit_status": exit_status,
+                            "submission": result,
+                            **extra_info,
+                        },
+                        "instance_id": instance_id,
                     },
-                    "instance_id": instance_id,
-                },
-            )
-            logger.info(f"Saved trajectory to '{traj_path}'")
-        update_preds_file(output_dir / "preds.json", instance_id, model.config.model_name, result)
-        progress_manager.on_instance_end(instance_id, exit_status)
+                )
+                logger.info(f"Saved trajectory to '{traj_path}'")
+            update_preds_file(output_dir / "preds.json", instance_id, model.config.model_name, result)
+            progress_manager.on_instance_end(instance_id, exit_status)
+        finally:
+            _teardown_environment(env)
 
 
 def filter_instances(
diff --git a/tests/environments/extra/test_e2b.py b/tests/environments/extra/test_e2b.py
new file mode 100644
index 000000000..ea7ab48e1
--- /dev/null
+++ b/tests/environments/extra/test_e2b.py
@@ -0,0 +1,304 @@
+"""Tests for the E2B cloud sandbox environment."""
+
+from types import ModuleType
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from minisweagent.environments.extra.e2b import (
+    E2BEnvironment,
+    E2BEnvironmentConfig,
+    E2BTemplateManager,
+)
+from minisweagent.exceptions import Submitted
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_mock_e2b() -> ModuleType:
+    """Return a minimal mock of the `e2b` module."""
+    mock_e2b = MagicMock()
+    mock_e2b.Template = MagicMock()
+    mock_e2b.Sandbox = MagicMock()
+    return mock_e2b
+
+
+def _make_env(**kwargs) -> E2BEnvironment:
+    """Create an E2BEnvironment without touching real E2B infrastructure."""
+    with patch.object(E2BEnvironment, "__init__", lambda self, **kw: None):
+        env = E2BEnvironment()
+        env.config = E2BEnvironmentConfig(image="swebench/test-image:latest", **kwargs)
+        env.sandbox = MagicMock()
+        env.logger = MagicMock()
+        return env
+
+
+# ---------------------------------------------------------------------------
+# E2BEnvironmentConfig
+# ---------------------------------------------------------------------------
+
+
+class TestE2BEnvironmentConfig:
+    def test_defaults(self):
+        cfg = E2BEnvironmentConfig(image="python:3.11")
+        assert cfg.cwd == "/"
+        assert cfg.timeout == 30
+        assert cfg.sandbox_timeout == 3600
+        assert cfg.cpu_count == 2
+        assert cfg.memory_mb == 2048
+        assert cfg.skip_cache is False
+        assert cfg.tags == []
+        assert cfg.build_timeout == 1800
+        assert cfg.api_key is None
+        assert cfg.registry_username is None
+        assert cfg.registry_password is None
+
+    def test_custom_values(self):
+        cfg = E2BEnvironmentConfig(image="my-image:tag", sandbox_timeout=7200, cpu_count=4)
+        assert cfg.sandbox_timeout == 7200
+        assert cfg.cpu_count == 4
+
+
+# ---------------------------------------------------------------------------
+# E2BTemplateManager._image_to_template_name
+# ---------------------------------------------------------------------------
+
+
+class TestImageToTemplateName:
+    def test_basic_sanitization(self):
+        name = E2BTemplateManager._image_to_template_name("python:3.11")
+        assert re.match(r"^[a-z0-9-]+$", name), f"Invalid chars in: {name}"
+
+    def test_length_limit(self):
+        long_image = "a" * 100 + ":latest"
+        name = E2BTemplateManager._image_to_template_name(long_image)
+        assert len(name) <= 63
+
+    def test_deterministic(self):
+        image = "swebench/sweb.eval.x86_64.django__django-11099:latest"
+        assert E2BTemplateManager._image_to_template_name(image) == E2BTemplateManager._image_to_template_name(image)
+
+    def test_different_images_different_names(self):
+        a = E2BTemplateManager._image_to_template_name("image-a:latest")
+        b = E2BTemplateManager._image_to_template_name("image-b:latest")
+        assert a != b
+
+    def test_no_triple_hyphens(self):
+        # Dots and slashes become hyphens; consecutive runs are collapsed to "--"
+        name = E2BTemplateManager._image_to_template_name("a/b/c.d.e:latest")
+        assert "---" not in name
+
+    def test_empty_prefix_falls_back_to_hash(self):
+        # An image that sanitizes to only hyphens should return just the hash
+        name = E2BTemplateManager._image_to_template_name("---")
+        assert len(name) == 8  # just the 8-char sha256 prefix
+
+
+import re  # noqa: E402 (needed after class definitions above for clarity)
+
+# ---------------------------------------------------------------------------
+# E2BEnvironment._is_stale_template_error
+# ---------------------------------------------------------------------------
+
+
+class TestIsStaleTemplateError:
+    def test_404_status_prefix_matches(self):
+        # e2b formats API errors as "{status_code}: {message}".
+        exc = Exception("404: template foo not found")
+        assert E2BEnvironment._is_stale_template_error(exc) is True
+
+    def test_other_status_does_not_match(self):
+        assert E2BEnvironment._is_stale_template_error(Exception("500: internal error")) is False
+        assert E2BEnvironment._is_stale_template_error(Exception("429: rate limited")) is False
+
+    def test_incidental_404_substring_does_not_match(self):
+        # "404" appearing inside an id/path must not trigger a costly rebuild.
+        assert E2BEnvironment._is_stale_template_error(Exception("Sandbox abc404def failed")) is False
+        assert E2BEnvironment._is_stale_template_error(Exception("error in /path/404/x")) is False
+
+
+# ---------------------------------------------------------------------------
+# E2BEnvironment.execute
+# ---------------------------------------------------------------------------
+
+
+class TestE2BEnvironmentExecute:
+    def test_execute_dict_action(self):
+        env = _make_env()
+        mock_result = MagicMock()
+        mock_result.stdout = "hello\n"
+        mock_result.stderr = ""
+        mock_result.exit_code = 0
+        env.sandbox.commands.run.return_value = mock_result
+
+        output = env.execute({"command": "echo hello"})
+
+        assert output["output"] == "hello\n"
+        assert output["returncode"] == 0
+        assert output["exception_info"] == ""
+
+    def test_execute_string_action(self):
+        env = _make_env()
+        mock_result = MagicMock()
+        mock_result.stdout = "ok\n"
+        mock_result.stderr = ""
+        mock_result.exit_code = 0
+        env.sandbox.commands.run.return_value = mock_result
+
+        output = env.execute("echo ok")
+
+        assert output["output"] == "ok\n"
+
+    def test_execute_nonzero_exit(self):
+        # e2b's commands.run() RAISES CommandExitException (carrying stdout/stderr/
+        # exit_code) on any non-zero exit. A failing command is a normal result,
+        # not an infrastructure error: its real output and exit code must survive.
+        env = _make_env()
+        exc = Exception("Command exited with code 1")
+        exc.stdout = "partial stdout\n"
+        exc.stderr = "boom\n"
+        exc.exit_code = 1
+        env.sandbox.commands.run.side_effect = exc
+
+        output = env.execute({"command": "false"})
+
+        assert output["returncode"] == 1
+        assert "boom" in output["output"]
+        assert "partial stdout" in output["output"]
+        assert output["exception_info"] == ""
+
+    def test_execute_exception(self):
+        env = _make_env()
+        env.sandbox.commands.run.side_effect = RuntimeError("connection lost")
+
+        output = env.execute({"command": "ls"})
+
+        assert output["returncode"] == -1
+        assert "connection lost" in output["exception_info"]
+
+    def test_execute_raises_submitted(self):
+        env = _make_env()
+        mock_result = MagicMock()
+        mock_result.stdout = "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT\ndiff --git a/f.py b/f.py\n"
+        mock_result.stderr = ""
+        mock_result.exit_code = 0
+        env.sandbox.commands.run.return_value = mock_result
+
+        with pytest.raises(Submitted) as exc_info:
+            env.execute({"command": "submit"})
+
+        msg = exc_info.value.messages[0]
+        assert msg["extra"]["exit_status"] == "Submitted"
+        assert "diff --git" in msg["extra"]["submission"]
+
+
+# ---------------------------------------------------------------------------
+# E2BEnvironment.get_template_vars
+# ---------------------------------------------------------------------------
+
+
+class TestE2BEnvironmentTemplateVars:
+    def test_includes_platform_uname(self):
+        # Default configs (mini/default) render {{system}}/{{machine}}/... under
+        # Jinja StrictUndefined, so these keys must be present like docker/local.
+        env = _make_env()
+        result = env.get_template_vars()
+        for key in ("system", "release", "version", "machine", "node", "processor"):
+            assert key in result
+
+    def test_excludes_credentials(self):
+        # Template vars feed the Jinja prompt context; secrets must not leak there.
+        env = _make_env(api_key="secret-key", registry_password="secret-pass", registry_username="user")
+        result = env.get_template_vars()
+        assert "api_key" not in result
+        assert "registry_password" not in result
+        assert "registry_username" not in result
+
+    def test_kwargs_override(self):
+        env = _make_env()
+        result = env.get_template_vars(extra="value")
+        assert result["extra"] == "value"
+
+
+# ---------------------------------------------------------------------------
+# E2BEnvironment.serialize
+# ---------------------------------------------------------------------------
+
+
+class TestE2BEnvironmentSerialize:
+    def test_serialize_structure(self):
+        env = _make_env()
+        result = env.serialize()
+
+        assert "info" in result
+        assert "config" in result["info"]
+        assert "environment" in result["info"]["config"]
+        assert "environment_type" in result["info"]["config"]
+        assert "E2BEnvironment" in result["info"]["config"]["environment_type"]
+
+    def test_serialize_excludes_credentials(self):
+        env = _make_env()
+        env.config.api_key = "secret-key"
+        env.config.registry_password = "secret-pass"
+
+        result = env.serialize()
+        env_cfg = result["info"]["config"]["environment"]
+
+        assert "api_key" not in env_cfg
+        assert "registry_password" not in env_cfg
+
+
+# ---------------------------------------------------------------------------
+# E2BEnvironment.cleanup / __del__
+# ---------------------------------------------------------------------------
+
+
+class TestE2BEnvironmentCleanup:
+    def test_cleanup_kills_sandbox(self):
+        env = _make_env()
+        env.cleanup()
+        env.sandbox.kill.assert_called_once()
+
+    def test_cleanup_tolerates_missing_sandbox(self):
+        with patch.object(E2BEnvironment, "__init__", lambda self, **kw: None):
+            env = E2BEnvironment()
+            # sandbox was never set
+            env.cleanup()  # should not raise
+
+    def test_cleanup_tolerates_kill_exception(self):
+        env = _make_env()
+        env.sandbox.kill.side_effect = RuntimeError("already dead")
+        env.cleanup()  # should not raise
+
+
+# ---------------------------------------------------------------------------
+# atexit cleanup registry
+# ---------------------------------------------------------------------------
+
+
+class TestAtexitCleanup:
+    def test_cleanup_removes_from_active_sandboxes(self):
+        from minisweagent.environments.extra import e2b as e2b_mod
+
+        env = _make_env()
+        e2b_mod._active_sandboxes.add(env)
+        assert env in e2b_mod._active_sandboxes
+
+        env.cleanup()
+        assert env not in e2b_mod._active_sandboxes
+
+    def test_cleanup_all_sandboxes_kills_all(self):
+        from minisweagent.environments.extra import e2b as e2b_mod
+
+        env1 = _make_env()
+        env2 = _make_env()
+        e2b_mod._active_sandboxes.update([env1, env2])
+
+        e2b_mod._cleanup_all_sandboxes()
+
+        env1.sandbox.kill.assert_called_once()
+        env2.sandbox.kill.assert_called_once()
+        assert env1 not in e2b_mod._active_sandboxes
+        assert env2 not in e2b_mod._active_sandboxes
diff --git a/tests/run/test_swebench.py b/tests/run/test_swebench.py
index cab09c0ae..244a326db 100644
--- a/tests/run/test_swebench.py
+++ b/tests/run/test_swebench.py
@@ -1,6 +1,6 @@
 import json
 import re
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 from pydantic import BaseModel
@@ -8,6 +8,7 @@
 from minisweagent import package_dir
 from minisweagent.models.test_models import DeterministicModel, make_output
 from minisweagent.run.benchmarks.swebench import (
+    _teardown_environment,
     filter_instances,
     get_swebench_docker_image_name,
     main,
@@ -16,6 +17,25 @@
 )
 
 
+class TestTeardownEnvironment:
+    def test_prefers_cleanup_over_stop(self):
+        env = MagicMock(spec=["cleanup", "stop"])
+        _teardown_environment(env)
+        env.cleanup.assert_called_once()
+        env.stop.assert_not_called()
+
+    def test_falls_back_to_stop(self):
+        env = MagicMock(spec=["stop"])
+        _teardown_environment(env)
+        env.stop.assert_called_once()
+
+    def test_tolerates_env_without_teardown(self):
+        _teardown_environment(MagicMock(spec=[]))  # should not raise
+
+    def test_tolerates_none(self):
+        _teardown_environment(None)  # should not raise
+
+
 def _make_model_from_fixture(text_outputs: list[str], cost_per_call: float = 1.0, **kwargs) -> DeterministicModel:
     """Create a DeterministicModel from trajectory fixture data (raw text outputs)."""