diff --git a/.gitignore b/.gitignore
index 1e1c6fe077..6f0c9cb603 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@
 /.idea
 /*git_ignore*
 .DS_Store
+.adk
+tmp/
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/.dockerignore b/perfkitbenchmarker/data/docker/agentic/adk-agent/.dockerignore
new file mode 100644
index 0000000000..78cf8c8595
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/.dockerignore
@@ -0,0 +1,165 @@
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+
+### OSX ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+
+### Windows ###
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+
+### Vagrant ###
+.vagrant/
+### Local rules, see .gitignore.tail to override! ###
+shippable
+.git
+
+tmp/
+sessions.db
+.adk/
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/.gcloudignore b/perfkitbenchmarker/data/docker/agentic/adk-agent/.gcloudignore
new file mode 100644
index 0000000000..fb34b7833c
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/.gcloudignore
@@ -0,0 +1,25 @@
+# This file tells gcloud builds submit which files to exclude from the upload.
+# Without it, gcloud ignores .dockerignore and uploads everything (including .venv).
+
+.git
+.venv/
+venv/
+ENV/
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+*.egg-info/
+*.egg
+dist/
+build/
+.tox/
+.cache/
+.coverage
+htmlcov/
+*.log
+.env
+.adk/
+sessions.db
+tmp/
+.DS_Store
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/Dockerfile b/perfkitbenchmarker/data/docker/agentic/adk-agent/Dockerfile
new file mode 100644
index 0000000000..417ad58946
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/Dockerfile
@@ -0,0 +1,29 @@
+FROM python:3.13-slim
+WORKDIR /app
+
+# Install kubectl (required by k8s-agent-sandbox for port-forwarding to sandbox pods)
+# Uses TARGETARCH (injected by BuildKit) to download the correct binary for amd64 or arm64
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl ca-certificates && \
+    ARCH=$(dpkg --print-architecture) && \
+    curl -LO "https://dl.k8s.io/release/$(curl -sL https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl" && \
+    install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \
+    rm kubectl && \
+    apt-get purge -y curl && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+RUN adduser --disabled-password --gecos "" myuser && \
+    chown -R myuser:myuser /app
+
+COPY . .
+
+USER myuser
+
+ENV PATH="/home/myuser/.local/bin:$PATH"
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
+
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/__init__.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/__init__.py
new file mode 100644
index 0000000000..5271a8ef60
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/__init__.py
@@ -0,0 +1 @@
+# ADK Agent package
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml b/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml
new file mode 100644
index 0000000000..653f07fcf8
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml
@@ -0,0 +1,20 @@
+# Cloud Build config for cross-compiling to ARM64.
+# Used by PKB when --container_remote_build_config points to this file.
+# The _IMAGE substitution is passed by PKB RemoteBuild() automatically.
+steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['run', '--privileged', 'multiarch/qemu-user-static', '--reset', '-p', 'yes']
+    id: 'qemu-setup'
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'create', '--use', '--name', 'multiarch-builder']
+    id: 'create-builder'
+    waitFor: ['qemu-setup']
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'build', '--platform', 'linux/arm64', '-t', '${_IMAGE}', '--push', '.']
+    id: 'build-and-push'
+    waitFor: ['create-builder']
+options:
+  logging: CLOUD_LOGGING_ONLY
+  machineType: E2_HIGHCPU_32
+substitutions:
+  _IMAGE: ''
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/__init__.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/__init__.py
new file mode 100644
index 0000000000..c6df9a7a2a
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/__init__.py
@@ -0,0 +1,2 @@
+# GKE Performance Agent package
+from . import agent
\ No newline at end of file
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py
new file mode 100644
index 0000000000..6561942960
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py
@@ -0,0 +1,276 @@
+"""GKE Performance Agent -- ADK agent definition.
+
+This file runs INSIDE the GKE cluster as part of the adk-agent Deployment
+(see gke_deploy_utils.py for the K8s manifest). It is NOT run from the
+machine executing PKB. The ADK agent pod serves a FastAPI app (main.py)
+that PKB calls via HTTP through a kubectl port-forward tunnel.
+
+Execution flow:
+  PKB (your laptop/CI) -> kubectl port-forward -> adk-agent pod -> this file
+  -> GkeCodeExecutor -> SandboxClient -> gVisor sandbox pod
+"""
+
+"""GKE Performance Agent â ADK agent definition for sandbox benchmarking.
+
+EXECUTION CONTEXT:
+    This file runs INSIDE the GKE cluster, NOT on the PKB orchestrator machine.
+    It is packaged into a container image (see ../Dockerfile) and deployed as
+    the 'adk-agent' Deployment in the benchmark namespace.
+
+    Execution flow:
+      PKB machine                          GKE Cluster
+      ----------                           -----------
+      benchmark.Run()
+        -> CallAgentApi("/benchmark/...")   -> main.py (FastAPI)
+                                              -> Runner(agent=root_agent)
+                                                -> MockLlm yields code
+                                                -> V3GkeCodeExecutor._execute_in_sandbox()
+                                                  -> SandboxClient.create_sandbox()
+                                                  -> sandbox.files.write("script.py", code)
+                                                  -> sandbox.commands.run("python3 script.py")
+                                                  -> SandboxClient.delete_sandbox()
+
+    The PKB machine communicates with this agent via HTTP (port-forwarded
+    through kubectl or via a LoadBalancer/ClusterIP service).
+"""
+
+from google.adk.agents import LlmAgent
+from google.adk.code_executors import GkeCodeExecutor
+from google.adk.code_executors.code_execution_utils import CodeExecutionResult
+from google.adk.models.base_llm import BaseLlm
+from google.adk.models.llm_response import LlmResponse
+from google.genai import types
+from concurrent.futures import ThreadPoolExecutor
+from dotenv import load_dotenv
+from google.adk.apps import App
+import logging
+import os
+
+# --- Configure Logging ---
+logging.basicConfig(level=logging.INFO)
+
+# =========================================================================
+# 1. Environment and Configuration
+# =========================================================================
+
+basedir = os.path.abspath(os.path.dirname(__file__))
+agent_dir = os.path.join(basedir, "..")
+
+# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags).
+# In GKE, K8s manifest env vars take precedence.
+load_dotenv(os.path.join(agent_dir, "generated.env"))
+
+# =========================================================================
+# 2. Mock LLM Definition (Inheriting from BaseLlm for Pydantic)
+# =========================================================================
+
+# Load the benchmark scripts
+density_script_path = os.path.join(
+    basedir, "../sandboxed_apps/python_test_app/benchmark_density.py"
+)
+try:
+    with open(density_script_path, "r") as f:
+        density_benchmark_code = f.read()
+except Exception:
+    density_benchmark_code = "import os; print(os.uname())"
+
+payload_script_path = os.path.join(
+    basedir, "../sandboxed_apps/python_test_app/benchmark_payload.py"
+)
+try:
+    with open(payload_script_path, "r") as f:
+        payload_benchmark_code = f.read()
+except Exception:
+    payload_benchmark_code = "import os; print(os.uname())"
+
+qps_script_path = os.path.join(
+    basedir, "../sandboxed_apps/python_test_app/benchmark_qps.py"
+)
+try:
+    with open(qps_script_path, "r") as f:
+        qps_benchmark_code = f.read()
+except Exception:
+    qps_benchmark_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))"
+
+# Keys that main.py sets in os.environ per-request.  We inject them into
+# the script so they reach the sandbox pod.  If unset, the benchmark scripts
+# use their own built-in defaults.
+_DENSITY_ENV_KEYS = ["SAMPLE_COUNT", "SAMPLE_WARMUP"]
+_PAYLOAD_ENV_KEYS = ["PAYLOAD_SIZE_MB", "PAYLOAD_ITERATIONS"]
+_QPS_ENV_KEYS: list[str] = []  # QPS script needs no env config
+
+
+def _build_benchmark_code() -> str:
+    """Build the benchmark script with current env values injected.
+
+    Selects the script based on BENCHMARK_MODE env var:
+      - 'density'  → benchmark_density.py
+      - 'payload'  → benchmark_payload.py
+      - 'qps'      → benchmark_qps.py
+    """
+    mode = os.getenv("BENCHMARK_MODE", "density")
+
+    if mode == "payload":
+        env_keys = _PAYLOAD_ENV_KEYS
+        script = payload_benchmark_code
+    elif mode == "qps":
+        env_keys = _QPS_ENV_KEYS
+        script = qps_benchmark_code
+    else:
+        env_keys = _DENSITY_ENV_KEYS
+        script = density_benchmark_code
+
+    lines = ["import os"]
+    for k in env_keys:
+        v = os.getenv(k)
+        if v is not None:
+            lines.append(f"os.environ['{k}'] = '{v}'")
+    return "\n".join(lines) + "\n\n" + script
+
+
+class MockLlm(BaseLlm):
+    model: str = "mock-model"
+
+    async def generate_content_async(self, llm_request, stream=False):
+        """Mock the ADK response loop.
+
+        BaseLlm.generate_content_async is an AsyncGenerator — it must YIELD
+        LlmResponse objects, never return them.
+        """
+        # ADK appends the code execution result to the conversation
+        # history before calling the LLM again.  If the history has
+        # grown beyond the initial user prompt, code has already
+        # executed — return plain text to stop the loop.
+        has_execution_result = len(llm_request.contents) > 1
+
+        if has_execution_result:
+            part = types.Part(text="Execution Complete")
+        else:
+            # Create an ADK-compliant result with executable code.
+            # Build at request time so SAMPLE_COUNT/SAMPLE_WARMUP reflect
+            # the current os.environ values set by main.py per-request.
+            part = types.Part(
+                executable_code=types.ExecutableCode(
+                    language="PYTHON", code=_build_benchmark_code()
+                )
+            )
+
+        content = types.Content(role="model", parts=[part])
+        response = LlmResponse(content=content, partial=False)
+
+        # Yield exactly one final response (both streaming and non-streaming)
+        yield response
+
+
+# =========================================================================
+# 3. Agent Initialization
+# =========================================================================
+
+
+# Module-level thread pool for sandbox I/O operations.
+# Initialized once at import time to avoid thread-safety issues
+# with lazy initialization inside _execute_in_sandbox().
+_SANDBOX_POOL = ThreadPoolExecutor(max_workers=16)
+
+
+class V3GkeCodeExecutor(GkeCodeExecutor):
+    def _execute_in_sandbox(self, code: str) -> CodeExecutionResult:
+        """Executes code using the v0.4.6 compatible SandboxClient."""
+        from k8s_agent_sandbox.sandbox_client import SandboxClient
+        from k8s_agent_sandbox.models import SandboxDirectConnectionConfig
+        import logging
+        import time
+
+        logging.info("Executing via V3 SandboxClient (v0.4.6 compatible).")
+
+        # _SANDBOX_POOL is initialized at module level (thread-safe).
+
+        # Use DirectConnection when SANDBOX_ROUTER_URL is set (in-cluster),
+        # otherwise fall back to kubectl port-forward (dev mode).
+        router_url = os.getenv("SANDBOX_ROUTER_URL")
+        if router_url:
+            client = SandboxClient(
+                connection_config=SandboxDirectConnectionConfig(api_url=router_url)
+            )
+        else:
+            client = SandboxClient()
+        # v0.4.6 create_sandbox uses 'template' and 'namespace' arguments
+        create_ms = upload_ms = run_ms = delete_ms = 0.0
+        sandbox = None
+        # Time sandbox creation
+        t0 = time.time()
+        create_future = _SANDBOX_POOL.submit(
+            client.create_sandbox,
+            template=self.sandbox_template,
+            namespace=self.namespace,
+        )
+        sandbox = create_future.result()
+        create_ms = (time.time() - t0) * 1000.0
+        try:
+            # v0.4.6 handles file I/O via the .files namespace
+            t0 = time.time()
+            upload_future = _SANDBOX_POOL.submit(sandbox.files.write, "script.py", code)
+            upload_future.result()
+            upload_ms = (time.time() - t0) * 1000.0
+
+            # SANDBOX_EXEC_TIMEOUT_S is set per-request by main.py.
+            # Default 60 s keeps density/snapshot runs tight; payload
+            # sweeps raise it for large blobs.
+            run_timeout = int(os.getenv("SANDBOX_EXEC_TIMEOUT_S", "60"))
+
+            t0 = time.time()
+            run_future = _SANDBOX_POOL.submit(
+                sandbox.commands.run, "python3 script.py", timeout=run_timeout
+            )
+            result = run_future.result()
+            run_ms = (time.time() - t0) * 1000.0
+
+            # ADK's build_code_execution_result_part discards stdout when
+            # stderr is non-empty (OUTCOME_FAILED path).  Sandbox scripts
+            # produce benign stderr (C-extension reimport noise, gVisor
+            # warnings) that would cause all sandbox_* metrics to vanish.
+            # Log stderr for debugging, then clear it so ADK passes
+            # stdout through.
+            if result.stderr:
+                logging.warning("Sandbox stderr (ignored): %s", result.stderr[:500])
+
+            logging.info(
+                "SANDBOX_TIMINGS: create_ms=%.3f upload_ms=%.3f run_ms=%.3f",
+                create_ms,
+                upload_ms,
+                run_ms,
+            )
+            return CodeExecutionResult(stdout=result.stdout, stderr="")
+        finally:
+            # Always cleanup the claim
+            t0 = time.time()
+            if sandbox is not None:
+                delete_future = _SANDBOX_POOL.submit(
+                    client.delete_sandbox, sandbox.claim_name, namespace=self.namespace
+                )
+                delete_future.result()
+            delete_ms = (time.time() - t0) * 1000.0
+            logging.info("SANDBOX_TIMINGS_DELETE: delete_ms=%.3f", delete_ms)
+
+
+gke_executor = V3GkeCodeExecutor(
+    cluster_name=os.getenv("CLUSTER_NAME"),
+    location=os.getenv("GOOGLE_CLOUD_LOCATION"),
+    namespace=os.getenv("AGENTIC_NAMESPACE"),
+    executor_type="sandbox",
+    sandbox_template="python-sandbox-template",
+)
+
+gke_performance_agent = LlmAgent(
+    name="gke_performance_agent",  # Must be a valid identifier (no dashes)
+    model=MockLlm(model="mock-model"),
+    code_executor=gke_executor,
+)
+
+root_agent = gke_performance_agent
+
+app = App(
+    name=root_agent.name,
+    root_agent=root_agent,
+    # enable_tracing=True,
+)
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/main.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/main.py
new file mode 100644
index 0000000000..473c2072c2
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/main.py
@@ -0,0 +1,1107 @@
+"""FastAPI service fronting the GKE Performance Agent.
+
+Exposes REST endpoints that PKB calls to trigger benchmarks.  The agent
+runs *inside* the GKE cluster so it can reach the Sandbox Controller and
+create gVisor sandboxes natively.
+
+Endpoints:
+  GET  /healthz                      → liveness probe
+  POST /benchmark/python/density     → run the Python density benchmark (UC-B)
+  POST /benchmark/python/payload     → run the payload transfer benchmark (UC-D)
+  POST /benchmark/python/qps         → run the QPS saturation benchmark (UC-F)
+  POST /benchmark/chromium/density   → run the Chromium density benchmark (UC-C)
+  POST /run                          → raw ADK agent interaction
+
+POST /benchmark/python/density — Request:
+  {
+    "sample_count":            int — iterations per sandbox session (default: 100)
+    "sample_warmup":           int — warmup iterations excluded from stats (default: 5)
+    "concurrent_sessions":     int — parallel sandbox sessions (default: 1)
+    "sandbox_exec_timeout_s":  int — sandbox command execution timeout in seconds (default: 60)
+  }
+
+POST /benchmark/python/density — Response:
+  {
+    "concurrent_sessions":  int — requested session count
+    "successful_sessions":  int — sessions completed without error
+    "failed_sessions":      int — sessions that returned an error
+    "aggregate": {
+      --- Orchestrator-side (timed in _run_single_session, stats in benchmark_density) ---
+      "orchestrator_cel_mean_ms":  mean round-trip across sessions
+      "orchestrator_cel_p50_ms":   P50 round-trip
+      "orchestrator_cel_p99_ms":   P99 round-trip
+      "orchestrator_cel_min_ms":   min round-trip
+      "orchestrator_cel_max_ms":   max round-trip
+
+      --- Sandbox-side overall (from benchmark_density.py, mean across sessions) ---
+      "sandbox_ttfe_ms":               Time To First Execution
+      "sandbox_total_cel_mean_ms":     mean total CEL per iteration (sum of all task types)
+      "sandbox_total_cel_p50_ms":      P50 total CEL per iteration
+      "sandbox_total_cel_p99_ms":      P99 total CEL per iteration
+      "sandbox_total_cel_min_ms":      min total CEL per iteration
+      "sandbox_total_cel_max_ms":      max total CEL per iteration
+
+      --- Sandbox RSS (from benchmark_density.py, mean across sessions) ---
+      "sandbox_rss_start_mb":      RSS at benchmark start
+      "sandbox_rss_end_mb":        RSS at benchmark end
+      "sandbox_rss_growth_mb":     RSS growth during benchmark
+
+      --- Per-type CEL breakdown (from benchmark_density.py, mean across sessions) ---
+      "sandbox_compute_cel_{mean,p50,p99,min,max}_ms":  CPU-bound (math.factorial)
+      "sandbox_syscall_cel_{mean,p50,p99,min,max}_ms":  gVisor Sentry (os.stat/listdir)
+      "sandbox_import_cel_{mean,p50,p99,min,max}_ms":   Gofer FS I/O (importlib)
+    }
+    "sessions": [             per-session detail array
+      {
+        "session_id":           int — zero-based session index
+        "orchestrator_total_ms": float — full round-trip for this session
+        "raw_output":           str — raw code execution stdout
+        "sandbox_ttfe_ms":      float — TTFE for this session
+        "sandbox_total_cel_mean_ms":  float — total CEL mean for this session
+        ...                     all other sandbox_* metrics for this session
+      }
+    ]
+  }
+
+Data Flow:
+  benchmark_density.py (inside gVisor)  → all sandbox_* metrics per session
+  main.py (this file)                  → orchestrator_* timing + cross-session aggregation
+"""
+
+import json
+import logging
+import os
+import re
+import time
+import asyncio
+from typing import Optional
+from concurrent.futures import ThreadPoolExecutor
+
+import uvicorn
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from google.genai import types
+from google.adk.sessions import InMemorySessionService
+from google.adk.artifacts import InMemoryArtifactService
+from google.adk.runners import Runner
+
+from dotenv import load_dotenv
+
+basedir = os.path.abspath(os.path.dirname(__file__))
+
+# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags).
+# In GKE, K8s manifest env vars take precedence.
+load_dotenv(os.path.join(basedir, "generated.env"))
+
+from gke_performance_agent import agent
+
+
+# ── SandboxClient factory (DirectConnection vs Dev-mode tunnel) ──────────
+def _make_sandbox_client():
+    """Create a SandboxClient with the optimal connection strategy.
+
+    When SANDBOX_ROUTER_URL is set (in-cluster), uses DirectConnectionConfig
+    to bypass kubectl port-forward SPDY tunnels — enabling true N-way
+    parallelism.  Without it, falls back to LocalTunnelConnectionConfig
+    (dev mode, serialized through a single SPDY stream).
+    """
+    from k8s_agent_sandbox.sandbox_client import SandboxClient
+
+    router_url = os.getenv("SANDBOX_ROUTER_URL")
+    if router_url:
+        from k8s_agent_sandbox.models import SandboxDirectConnectionConfig
+
+        return SandboxClient(
+            connection_config=SandboxDirectConnectionConfig(api_url=router_url)
+        )
+    return SandboxClient()
+
+
+# --- Constants ---
+APP_NAME = "gke_performance_agent_app"
+USER_ID = "benchmark_user"
+
+# --- Configure Logging ---
+try:
+    import google.cloud.logging as gcl
+
+    gcl.Client().setup_logging()
+except Exception:
+    logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+# =========================================================================
+# FastAPI Application
+# =========================================================================
+# --- Adaptive ThreadPool based on Agent CPU ---
+def _compute_thread_count() -> int:
+    """Compute a recommended max worker count for ThreadPoolExecutor.
+
+    Heuristic: use ~2x the detected CPU count to provide overlap for blocking
+    I/O (port-forward, file upload) while avoiding CPU oversubscription.
+    Cap between 2 and 64 workers.
+    """
+    cpu = os.cpu_count() or 1
+    return max(2, min(64, cpu * 2))
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan handler: configure a tuned ThreadPoolExecutor for asyncio.
+
+    Sets the default executor so `asyncio.to_thread` uses our tuned pool,
+    and shuts it down on application exit.
+    """
+    workers = _compute_thread_count()
+    executor = ThreadPoolExecutor(max_workers=workers)
+    loop = asyncio.get_running_loop()
+    loop.set_default_executor(executor)
+    logging.info(
+        "Default ThreadPoolExecutor set to %d workers (cpu=%s)", workers, os.cpu_count()
+    )
+    try:
+        yield
+    finally:
+        try:
+            executor.shutdown(wait=False)
+            logging.info("ThreadPoolExecutor shut down")
+        except Exception:
+            logging.exception("Error shutting down ThreadPoolExecutor")
+
+
+app = FastAPI(title="GKE Benchmark Agent", version="0.2.0", lifespan=lifespan)
+
+# Serialise benchmark requests so concurrent POSTs cannot clobber the
+# shared env vars (BENCHMARK_MODE, SAMPLE_COUNT, …) that agent.py reads.
+_benchmark_lock = asyncio.Lock()
+
+
+def _percentile_stats(sorted_values: list, prefix: str) -> dict:
+    """Compute mean/p50/p95/p99/min/max from a pre-sorted list of numbers."""
+    n = len(sorted_values)
+    if n == 0:
+        return {}
+    return {
+        f"{prefix}_mean_ms": round(sum(sorted_values) / n, 6),
+        f"{prefix}_p50_ms": round(sorted_values[n // 2], 6),
+        f"{prefix}_p95_ms": round(sorted_values[min(int(n * 0.95), n - 1)], 6),
+        f"{prefix}_p99_ms": round(sorted_values[min(int(n * 0.99), n - 1)], 6),
+        f"{prefix}_min_ms": round(sorted_values[0], 6),
+        f"{prefix}_max_ms": round(sorted_values[-1], 6),
+    }
+
+
+# --- Request / Response Models ---
+class BenchmarkRequest(BaseModel):
+    sample_count: int = Field(
+        default=100, ge=1, description="Sample count per sandbox session"
+    )
+    sample_warmup: int = Field(
+        default=5, ge=0, description="Warmup iterations per sandbox session"
+    )
+    concurrent_sessions: int = Field(
+        default=1, ge=1, description="Number of parallel sandbox sessions"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=60, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+class RunRequest(BaseModel):
+    prompt: str = "Please start the GKE performance benchmark workflow."
+
+
+class PayloadBenchmarkRequest(BaseModel):
+    payload_size_mb: float = Field(default=1, gt=0, description="Payload size in MB")
+    payload_iterations: int = Field(
+        default=20, ge=1, description="Number of transfer iterations"
+    )
+    concurrent_sessions: int = Field(
+        default=1, ge=1, description="Number of parallel sandbox sessions"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=60, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+class QpsBenchmarkRequest(BaseModel):
+    target_qps: float = Field(
+        default=10.0, ge=0.1, description="Target requests per second"
+    )
+    duration_s: float = Field(
+        default=60.0, ge=5.0, description="Duration of the QPS burst in seconds"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=30, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+class ChromiumBenchmarkRequest(BaseModel):
+    task_count: int = Field(
+        default=10, ge=1, description="Iterations per Chromium session"
+    )
+    warmup_tasks: int = Field(
+        default=2, ge=0, description="Warmup iterations excluded from stats"
+    )
+    concurrent_sessions: int = Field(
+        default=1, ge=1, description="Number of parallel Chromium sessions"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=120, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+# --- JSON extraction helper ---
+_JSON_RE = re.compile(r"\{[^{}]*\}", re.DOTALL)
+
+
+def _parse_sandbox_json(raw_output: str) -> Optional[dict]:
+    """Extract the sandbox JSON summary from code execution output.
+
+    The sandbox script prints a JSON blob to stdout among other log lines.
+    We find the last valid JSON object that contains sandbox_ keys.
+    """
+    matches = _JSON_RE.findall(raw_output)
+    for candidate in reversed(matches):
+        try:
+            obj = json.loads(candidate)
+            if any(k.startswith("sandbox_") for k in obj):
+                return obj
+        except json.JSONDecodeError:
+            continue
+    return None
+
+
+# --- Agent helper ---
+async def _run_agent(prompt: str) -> str:
+    """Create a fresh session, run the agent, return the final text output."""
+    session_service = InMemorySessionService()
+    artifact_service = InMemoryArtifactService()
+    session = await session_service.create_session(
+        app_name=APP_NAME,
+        user_id=USER_ID,
+        state={},
+    )
+
+    runner = Runner(
+        agent=agent.root_agent,
+        app_name=APP_NAME,
+        session_service=session_service,
+        artifact_service=artifact_service,
+    )
+
+    content = types.Content(
+        role="user",
+        parts=[types.Part(text=prompt)],
+    )
+
+    final_response = ""
+    code_execution_output = ""
+    async with runner:
+        async for event in runner.run_async(
+            user_id=USER_ID,
+            session_id=session.id,
+            new_message=content,
+        ):
+            if event.content and event.content.parts:
+                for part in event.content.parts:
+                    cer = getattr(part, "code_execution_result", None) or getattr(
+                        part, "codeExecutionResult", None
+                    )
+                    if cer:
+                        code_execution_output = getattr(cer, "output", "") or ""
+            if event.is_final_response() and event.content and event.content.parts:
+                final_response = event.content.parts[0].text
+
+    await session_service.delete_session(
+        app_name=APP_NAME,
+        user_id=USER_ID,
+        session_id=session.id,
+    )
+    return code_execution_output if code_execution_output else final_response
+
+
+async def _run_single_session(session_id: int, prompt: str) -> dict:
+    """Run one agent session and return orchestrator + sandbox metrics."""
+    orchestrator_start = time.perf_counter()
+    logging.info("SESSION_START: session_id=%d start_ts=%.3f", session_id, time.time())
+
+    try:
+        raw_output = await _run_agent(prompt)
+    except Exception as e:
+        return {
+            "session_id": session_id,
+            "error": str(e),
+        }
+
+    orchestrator_elapsed_ms = round(
+        (time.perf_counter() - orchestrator_start) * 1000, 6
+    )
+    logging.info(
+        "SESSION_END: session_id=%d elapsed_ms=%.3f",
+        session_id,
+        orchestrator_elapsed_ms,
+    )
+
+    # Parse sandbox-side metrics from the code execution output
+    sandbox_metrics = _parse_sandbox_json(raw_output) or {}
+
+    return {
+        "session_id": session_id,
+        "orchestrator_total_ms": orchestrator_elapsed_ms,
+        "raw_output": raw_output,
+        **sandbox_metrics,
+    }
+
+
+# --- Endpoints ---
+@app.get("/healthz")
+async def healthz():
+    return {"status": "ok"}
+
+
+@app.post("/benchmark/python/density")
+async def benchmark_python_density(req: BenchmarkRequest):
+    """Trigger the Python density benchmark (Use Case B).
+
+    Fires `concurrent_sessions` parallel agent sessions.  Each session
+    claims its own sandbox, runs the benchmark script with the given
+    iteration/warmup counts, and returns both orchestrator-side and
+    sandbox-side metrics.
+    """
+    async with _benchmark_lock:
+        os.environ["BENCHMARK_MODE"] = "density"
+        os.environ["SAMPLE_COUNT"] = str(req.sample_count)
+        os.environ["SAMPLE_WARMUP"] = str(req.sample_warmup)
+        os.environ["SANDBOX_EXEC_TIMEOUT_S"] = str(req.sandbox_exec_timeout_s)
+
+        logger.info(
+            "Starting Python benchmark: sample_count=%d sample_warmup=%d concurrent_sessions=%d",
+            req.sample_count,
+            req.sample_warmup,
+            req.concurrent_sessions,
+        )
+
+        prompt = "Please start the GKE performance benchmark workflow."
+
+        # Fire concurrent sessions.
+        # DESIGN NOTE: Each session runs in its own thread via asyncio.to_thread()
+        # with a nested asyncio.run() to create a per-thread event loop. This is
+        # intentional -- the ADK Runner performs blocking I/O (sandbox lifecycle
+        # via kubectl/HTTP) that would starve a shared event loop and serialize
+        # session starts. The per-thread event loop overhead (~0.1ms) is negligible
+        # compared to sandbox round-trip times (~200ms+).
+        thread_tasks = [
+            asyncio.create_task(
+                asyncio.to_thread(
+                    lambda sid=i: asyncio.run(_run_single_session(sid, prompt))
+                )
+            )
+            for i in range(req.concurrent_sessions)
+        ]
+        session_results = await asyncio.gather(*thread_tasks)
+
+    # Separate successful vs failed sessions
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Aggregate orchestrator-side metrics across all successful sessions
+    aggregate = {}
+    if successful:
+        orch_times = sorted(r["orchestrator_total_ms"] for r in successful)
+        aggregate.update(_percentile_stats(orch_times, "orchestrator_cel"))
+
+        # Aggregate sandbox-side metrics across sessions
+        sandbox_keys = [k for k in successful[0] if k.startswith("sandbox_")]
+        for key in sandbox_keys:
+            sample_val = successful[0].get(key)
+            if isinstance(sample_val, list):
+                # Pool raw latency arrays across sandboxes → true cross-sandbox stats
+                pooled = sorted(
+                    v
+                    for r in successful
+                    for v in (r.get(key) or [])
+                    if isinstance(r.get(key), list)
+                )
+                if pooled:
+                    base = key[:-3] if key.endswith("_ms") else key
+                    aggregate.update(_percentile_stats(pooled, base))
+            elif isinstance(sample_val, (int, float)):
+                vals = [
+                    r[key]
+                    for r in successful
+                    if key in r and isinstance(r[key], (int, float))
+                ]
+                if vals:
+                    if key.endswith("_cel_ms"):
+                        # Latency scalars (e.g. import_cel_ms): compute
+                        # cross-sandbox percentile stats, like array metrics.
+                        base = key[:-3]
+                        aggregate.update(_percentile_stats(sorted(vals), base))
+                    else:
+                        # Non-latency scalars (e.g. rss_mb, ttfe_ms): average
+                        aggregate[key] = round(sum(vals) / len(vals), 6)
+
+    return {
+        "concurrent_sessions": req.concurrent_sessions,
+        "successful_sessions": len(successful),
+        "failed_sessions": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/benchmark/python/payload")
+async def benchmark_python_payload(req: PayloadBenchmarkRequest):
+    """Trigger the payload transfer benchmark (Use Case D).
+
+    Measures the cost of returning large observation payloads from a
+    gVisor sandbox back to the orchestrator.  Each session generates a
+    payload of `payload_size_mb` MB, encodes it (base64), writes it
+    through the gVisor Gofer path, and reports latency breakdowns.
+    """
+    async with _benchmark_lock:
+        os.environ["BENCHMARK_MODE"] = "payload"
+        os.environ["PAYLOAD_SIZE_MB"] = str(req.payload_size_mb)
+        os.environ["PAYLOAD_ITERATIONS"] = str(req.payload_iterations)
+        os.environ["SANDBOX_EXEC_TIMEOUT_S"] = str(req.sandbox_exec_timeout_s)
+
+        logger.info(
+            "Starting Payload benchmark: payload_size_mb=%s iterations=%d concurrent_sessions=%d",
+            req.payload_size_mb,
+            req.payload_iterations,
+            req.concurrent_sessions,
+        )
+
+        prompt = "Please start the GKE performance benchmark workflow."
+
+        # Fire concurrent sessions.
+        # DESIGN NOTE: Each session runs in its own thread via asyncio.to_thread()
+        # with a nested asyncio.run() to create a per-thread event loop. This is
+        # intentional -- the ADK Runner performs blocking I/O (sandbox lifecycle
+        # via kubectl/HTTP) that would starve a shared event loop and serialize
+        # session starts. The per-thread event loop overhead (~0.1ms) is negligible
+        # compared to sandbox round-trip times (~200ms+).
+        thread_tasks = [
+            asyncio.create_task(
+                asyncio.to_thread(
+                    lambda sid=i: asyncio.run(_run_single_session(sid, prompt))
+                )
+            )
+            for i in range(req.concurrent_sessions)
+        ]
+        session_results = await asyncio.gather(*thread_tasks)
+
+    # Separate successful vs failed sessions
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Aggregate orchestrator-side metrics across all successful sessions
+    aggregate = {}
+    if successful:
+        orch_times = sorted(r["orchestrator_total_ms"] for r in successful)
+        aggregate.update(_percentile_stats(orch_times, "orchestrator_transfer"))
+
+        # Aggregate sandbox-side metrics (mean across sessions, numeric only)
+        sandbox_keys = [k for k in successful[0] if k.startswith("sandbox_")]
+        for key in sandbox_keys:
+            vals = [
+                r[key]
+                for r in successful
+                if key in r and isinstance(r[key], (int, float))
+            ]
+            if vals:
+                aggregate[key] = round(sum(vals) / len(vals), 6)
+
+    return {
+        "payload_size_mb": req.payload_size_mb,
+        "payload_iterations": req.payload_iterations,
+        "concurrent_sessions": req.concurrent_sessions,
+        "successful_sessions": len(successful),
+        "failed_sessions": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/benchmark/python/qps")
+async def benchmark_python_qps(req: QpsBenchmarkRequest):
+    """Trigger the QPS saturation benchmark (Use Case F).
+
+    Fires sandbox claim requests at a controlled rate (target_qps) for
+    duration_s seconds.  Each request claims a sandbox from the warm pool,
+    runs a trivial script, and releases it.  Returns per-request TTFE
+    (claim + upload + execute + delete) and aggregate latency stats.
+
+    Uses a lightweight path that calls SandboxClient directly — bypasses
+    the full ADK Runner/MockLLM pipeline to avoid per-request overhead
+    and accurately measure sandbox lifecycle latency at high QPS.
+
+    When the warm pool drains faster than it refills, TTFE spikes from
+    ~200ms to seconds — identifying the QPS saturation point.
+    """
+
+    # Load the QPS script once
+    qps_script_path = os.path.join(
+        basedir, "sandboxed_apps/python_test_app/benchmark_qps.py"
+    )
+    try:
+        with open(qps_script_path, "r") as f:
+            qps_code = f.read()
+    except Exception:
+        qps_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))"
+
+    sandbox_template = os.getenv("SANDBOX_TEMPLATE", "python-sandbox-template")
+    sandbox_namespace = os.getenv("AGENTIC_NAMESPACE", "agentic")
+    exec_timeout = req.sandbox_exec_timeout_s
+    qps_claim_label = {"created-by": "pkb-qps-benchmark"}
+
+    def _run_qps_request(request_id: int) -> dict:
+        """Lightweight sandbox claim→execute→release cycle."""
+        t_total = time.perf_counter()
+        client = _make_sandbox_client()
+        sandbox = None
+        try:
+            # Claim
+            t0 = time.perf_counter()
+            sandbox = client.create_sandbox(
+                template=sandbox_template,
+                namespace=sandbox_namespace,
+                labels=qps_claim_label,
+            )
+            claim_ms = (time.perf_counter() - t0) * 1000
+
+            # Upload
+            t0 = time.perf_counter()
+            sandbox.files.write("script.py", qps_code)
+            upload_ms = (time.perf_counter() - t0) * 1000
+
+            # Execute
+            t0 = time.perf_counter()
+            result = sandbox.commands.run("python3 script.py", timeout=exec_timeout)
+            exec_ms = (time.perf_counter() - t0) * 1000
+
+            ttfe_ms = (time.perf_counter() - t_total) * 1000
+
+            return {
+                "request_id": request_id,
+                "ttfe_ms": round(ttfe_ms, 3),
+                "claim_ms": round(claim_ms, 3),
+                "upload_ms": round(upload_ms, 3),
+                "exec_ms": round(exec_ms, 3),
+            }
+        except Exception as e:
+            ttfe_ms = (time.perf_counter() - t_total) * 1000
+            return {
+                "request_id": request_id,
+                "ttfe_ms": round(ttfe_ms, 3),
+                "error": f"{type(e).__name__}: {e}",
+            }
+        finally:
+            if sandbox is not None:
+                try:
+                    client.delete_sandbox(
+                        sandbox.claim_name, namespace=sandbox_namespace
+                    )
+                except Exception:
+                    pass
+
+    async with _benchmark_lock:
+        logger.info(
+            "Starting QPS benchmark: target_qps=%.1f duration_s=%.1f",
+            req.target_qps,
+            req.duration_s,
+        )
+
+        interval = 1.0 / req.target_qps
+
+        # Use a scoped executor sized to the expected concurrency.
+        # Each sandbox request takes ~0.5-5s depending on environment
+        # (in-cluster vs port-forward).  We need enough workers so the
+        # thread pool itself is never the bottleneck — only real sandbox
+        # contention should limit throughput.
+        peak_concurrency = int(req.target_qps * req.duration_s)
+        qps_workers = max(16, min(512, peak_concurrency))
+        qps_executor = ThreadPoolExecutor(max_workers=qps_workers)
+        loop = asyncio.get_running_loop()
+        logger.info(
+            "QPS executor: %d workers for ~%d expected requests",
+            qps_workers,
+            peak_concurrency,
+        )
+
+        # Schedule requests at the target QPS rate
+        tasks: list[asyncio.Task] = []
+        t_start = time.time()
+        next_fire = t_start
+        request_id = 0
+
+        while True:
+            now = time.time()
+            elapsed = now - t_start
+            if elapsed >= req.duration_s:
+                break
+            if now >= next_fire:
+                rid = request_id
+                request_id += 1
+                fut = loop.run_in_executor(qps_executor, _run_qps_request, rid)
+                tasks.append(fut)
+                next_fire += interval
+            else:
+                await asyncio.sleep(min(0.001, next_fire - now))
+
+        # Wait for in-flight requests with a drain timeout.
+        drain_timeout = max(60.0, req.duration_s)
+        done, pending = await asyncio.wait(tasks, timeout=drain_timeout)
+
+        # Clean up the scoped executor
+        qps_executor.shutdown(wait=False)
+
+        # Collect completed results (guard against individual task exceptions)
+        session_results = []
+        for t in done:
+            try:
+                session_results.append(t.result())
+            except Exception as exc:
+                session_results.append(
+                    {
+                        "request_id": -1,
+                        "error": str(exc),
+                    }
+                )
+
+        # Cancel tasks still queued/running and mark as timed out
+        for t in pending:
+            t.cancel()
+        if pending:
+            logger.warning(
+                "QPS drain timeout: %d/%d requests still pending after %.0fs",
+                len(pending),
+                len(tasks),
+                drain_timeout,
+            )
+            for t in pending:
+                session_results.append(
+                    {
+                        "request_id": -1,
+                        "error": "drain_timeout",
+                    }
+                )
+
+        # Bulk-delete SandboxClaims left by cancelled tasks.
+        # Only targets claims labelled created-by=pkb-qps-benchmark so
+        # we never touch claims created by other workloads.
+        try:
+            import subprocess as _sp
+
+            _claims = _sp.run(
+                [
+                    "kubectl",
+                    "get",
+                    "sandboxclaim",
+                    "-n",
+                    sandbox_namespace,
+                    "-l",
+                    "created-by=pkb-qps-benchmark",
+                    "-o",
+                    "jsonpath={.items[*].metadata.name}",
+                ],
+                capture_output=True,
+                text=True,
+            )
+            claim_names = _claims.stdout.strip().split()
+            if claim_names and claim_names != [""]:
+                logger.info("Cleaning up %d lingering pkb-qps claims", len(claim_names))
+                _sp.run(
+                    [
+                        "kubectl",
+                        "delete",
+                        "sandboxclaim",
+                        "-l",
+                        "created-by=pkb-qps-benchmark",
+                        "-n",
+                        sandbox_namespace,
+                        "--wait=false",
+                    ],
+                    capture_output=True,
+                    text=True,
+                )
+        except Exception:
+            logger.warning("Failed to clean up lingering claims", exc_info=True)
+
+    wall_time = time.time() - t_start
+
+    # Separate successful vs failed
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Compute TTFE stats
+    aggregate = {}
+    if successful:
+        ttfe_values = sorted(r["ttfe_ms"] for r in successful)
+        if ttfe_values:
+            aggregate.update(_percentile_stats(ttfe_values, "ttfe"))
+
+        # Also compute claim latency stats (the warm-pool-sensitive metric)
+        claim_values = sorted(r["claim_ms"] for r in successful if "claim_ms" in r)
+        if claim_values:
+            aggregate.update(_percentile_stats(claim_values, "claim"))
+
+    return {
+        "target_qps": req.target_qps,
+        "actual_qps": round(request_id / wall_time, 2) if wall_time > 0 else 0,
+        "duration_s": round(wall_time, 2),
+        "total_requests": request_id,
+        "successful_requests": len(successful),
+        "failed_requests": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/benchmark/chromium/density")
+async def benchmark_chromium_density(req: ChromiumBenchmarkRequest):
+    """Trigger the Chromium density benchmark (Use Case C).
+
+    Fires `concurrent_sessions` parallel Chromium sandbox sessions.  Each
+    session claims its own sandbox from the chromium warm pool, connects to
+    the sandbox's Chrome instance via CDP (Chrome DevTools Protocol), and
+    drives the benchmark from the orchestrator using Playwright.
+
+    Architecture:
+      - Sandbox: runs headless Chromium (upstream chrome-sandbox image) with
+        --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0
+      - Orchestrator: connects Playwright via connect_over_cdp() to the
+        sandbox pod IP:9222 and drives navigate/click/evaluate/screenshot.
+      - This isolates pure Chrome-under-gVisor overhead without Node.js or
+        a runtime server in the sandbox.
+    """
+    from playwright.async_api import async_playwright
+    from kubernetes import client as k8s_client, config as k8s_config
+
+    async with _benchmark_lock:
+
+        sandbox_namespace = os.getenv("AGENTIC_NAMESPACE", "agentic")
+        sandbox_template = "chromium-sandbox-template"
+
+        logger.info(
+            "Starting Chromium density benchmark (CDP): concurrent_sessions=%d "
+            "task_count=%d warmup_tasks=%d",
+            req.concurrent_sessions,
+            req.task_count,
+            req.warmup_tasks,
+        )
+
+        # Initialize K8s client for pod IP lookup
+        try:
+            k8s_config.load_incluster_config()
+        except k8s_config.ConfigException:
+            k8s_config.load_kube_config()
+        core_v1 = k8s_client.CoreV1Api()
+
+        # Inline HTML test page (data: URL avoids network dependencies)
+        test_page = """data:text/html,
+<!DOCTYPE html>
+<html>
+<head><title>PKB Chromium Benchmark</title></head>
+<body>
+  <h1 id="heading">Hello Sandbox</h1>
+  <input id="search" type="text" placeholder="Search..." />
+  <button id="btn">Click Me</button>
+  <div id="output"></div>
+  <script>
+    document.getElementById('btn').addEventListener('click', () => {
+      document.getElementById('output').textContent = 'clicked';
+    });
+  </script>
+</body>
+</html>"""
+
+        # Limit concurrent K8s Metrics API calls to avoid overwhelming metrics-server
+        _metrics_semaphore = asyncio.Semaphore(5)
+
+        async def _run_chromium_session_cdp(session_id: int) -> dict:
+            """Run one Chromium benchmark session via CDP."""
+            sb_client = _make_sandbox_client()
+            sandbox = None
+            t_start = time.time()
+            claim_ms = 0.0
+            cold_start_ms = 0.0
+            try:
+                # 1. Claim sandbox from warm pool
+                t0 = time.time()
+                sandbox = sb_client.create_sandbox(
+                    template=sandbox_template,
+                    namespace=sandbox_namespace,
+                )
+                claim_ms = (time.time() - t0) * 1000.0
+
+                # 2. Resolve pod IP
+                pod_name = sandbox.get_pod_name()
+                pod = core_v1.read_namespaced_pod(pod_name, sandbox_namespace)
+                pod_ip = pod.status.pod_ip
+                if not pod_ip:
+                    raise RuntimeError(f"Pod {pod_name} has no IP assigned")
+
+                cdp_url = f"http://{pod_ip}:9223"
+
+                # 3. Connect Playwright via CDP
+                async with async_playwright() as pw:
+                    # Wait for Chrome to be ready (retry connection)
+                    browser = None
+                    for attempt in range(20):
+                        try:
+                            browser = await pw.chromium.connect_over_cdp(cdp_url)
+                            break
+                        except Exception:
+                            if attempt >= 19:
+                                raise
+                            await asyncio.sleep(0.5)
+
+                    # Cold start = claim + CDP connect (time until browser ready)
+                    cold_start_ms = (time.time() - t_start) * 1000.0
+
+                    context = await browser.new_context()
+                    page = await context.new_page()
+
+                    # Navigate once before measurement loop
+                    await page.goto(test_page, wait_until="domcontentloaded")
+
+                    # Latency arrays (filled during measured runs only)
+                    navigate_ms = []
+                    screenshot_ms = []
+                    evaluate_ms = []
+                    click_ms = []
+                    fill_ms = []
+                    interaction_ms = []
+
+                    total_runs = req.warmup_tasks + req.task_count
+                    for run_idx in range(total_runs):
+                        measuring = run_idx >= req.warmup_tasks
+
+                        # 1. Navigate (reload page)
+                        t0 = time.time()
+                        await page.goto(test_page, wait_until="domcontentloaded")
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            navigate_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 2. DOM evaluate — read heading text
+                        t0 = time.time()
+                        await page.evaluate(
+                            "() => document.getElementById('heading').textContent"
+                        )
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            evaluate_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 3. Fill input
+                        t0 = time.time()
+                        await page.fill("#search", f"query-{run_idx}")
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            fill_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 4. Click button
+                        t0 = time.time()
+                        await page.click("#btn")
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            click_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 5. Verify click effect (DOM mutation)
+                        t0 = time.time()
+                        await page.evaluate(
+                            "() => document.getElementById('output').textContent"
+                        )
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            evaluate_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 6. Screenshot
+                        t0 = time.time()
+                        await page.screenshot()
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            screenshot_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                    # Read pod memory usage from K8s Metrics API
+                    rss_mb = None
+                    try:
+                        async with _metrics_semaphore:
+                            custom_api = k8s_client.CustomObjectsApi()
+                            pod_metrics = await asyncio.to_thread(
+                                custom_api.get_namespaced_custom_object,
+                                group="metrics.k8s.io",
+                                version="v1beta1",
+                                namespace=sandbox_namespace,
+                                plural="pods",
+                                name=pod_name,
+                            )
+                        for c in pod_metrics.get("containers", []):
+                            usage = c.get("usage", {}).get("memory", "")
+                            if usage.endswith("Ki"):
+                                rss_mb = round(int(usage[:-2]) / 1024, 1)
+                            elif usage.endswith("Mi"):
+                                rss_mb = round(float(usage[:-2]), 1)
+                            elif usage.endswith("Gi"):
+                                rss_mb = round(float(usage[:-2]) * 1024, 1)
+                            break
+                    except Exception:
+                        logger.warning(
+                            "Failed to read pod metrics for %s",
+                            pod_name,
+                            exc_info=True,
+                        )
+
+                    await browser.close()
+
+                total_ms = (time.time() - t_start) * 1000.0
+
+                # Compute stats helper
+                def _compute_stats(arr):
+                    if not arr:
+                        return None
+                    s = sorted(arr)
+                    n = len(s)
+                    return {
+                        "mean_ms": round(sum(s) / n, 3),
+                        "p50_ms": round(s[min(int(n * 0.50), n - 1)], 3),
+                        "p95_ms": round(s[min(int(n * 0.95), n - 1)], 3),
+                        "p99_ms": round(s[min(int(n * 0.99), n - 1)], 3),
+                        "min_ms": round(s[0], 3),
+                        "max_ms": round(s[-1], 3),
+                    }
+
+                return {
+                    "session_id": session_id,
+                    "sandbox_status": "ok",
+                    "orchestrator_total_ms": round(total_ms, 3),
+                    "claim_ms": round(claim_ms, 3),
+                    "cold_start_ms": round(cold_start_ms, 3),
+                    "rss_mb": rss_mb,
+                    "navigate": _compute_stats(navigate_ms),
+                    "evaluate": _compute_stats(evaluate_ms),
+                    "fill": _compute_stats(fill_ms),
+                    "click": _compute_stats(click_ms),
+                    "screenshot": _compute_stats(screenshot_ms),
+                    "interaction": _compute_stats(interaction_ms),
+                }
+
+            except Exception as e:
+                total_ms = (time.time() - t_start) * 1000.0
+                logger.exception("Chromium CDP session %d failed", session_id)
+                return {
+                    "session_id": session_id,
+                    "orchestrator_total_ms": round(total_ms, 3),
+                    "claim_ms": round(claim_ms, 3),
+                    "error": f"{type(e).__name__}: {e}",
+                }
+            finally:
+                if sandbox is not None:
+                    try:
+                        sb_client.delete_sandbox(
+                            sandbox.claim_name, namespace=sandbox_namespace
+                        )
+                    except Exception:
+                        logger.warning(
+                            "Failed to delete sandbox for session %d",
+                            session_id,
+                            exc_info=True,
+                        )
+
+        # Fire concurrent sessions
+        tasks = [_run_chromium_session_cdp(i) for i in range(req.concurrent_sessions)]
+        session_results = await asyncio.gather(*tasks)
+
+    # Separate successful vs failed
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Aggregate metrics
+    aggregate = {}
+    if successful:
+        orch_times = sorted(r["orchestrator_total_ms"] for r in successful)
+        aggregate.update(_percentile_stats(orch_times, "orchestrator_total"))
+
+        claim_times = sorted(r["claim_ms"] for r in successful if "claim_ms" in r)
+        if claim_times:
+            aggregate.update(_percentile_stats(claim_times, "claim"))
+
+        # Aggregate cold start and RSS
+        cold_starts = sorted(
+            r["cold_start_ms"] for r in successful if "cold_start_ms" in r
+        )
+        if cold_starts:
+            aggregate["cold_start_mean_ms"] = round(
+                sum(cold_starts) / len(cold_starts), 3
+            )
+            aggregate["cold_start_p95_ms"] = round(
+                cold_starts[min(int(len(cold_starts) * 0.95), len(cold_starts) - 1)], 3
+            )
+
+        rss_vals = sorted(
+            r["rss_mb"] for r in successful if r.get("rss_mb") is not None
+        )
+        if rss_vals:
+            aggregate["rss_end_mb"] = round(sum(rss_vals) / len(rss_vals), 1)
+
+        # Aggregate per-task-type interaction stats
+        for metric_key in (
+            "interaction",
+            "navigate",
+            "evaluate",
+            "click",
+            "fill",
+            "screenshot",
+        ):
+            means = sorted(
+                r[metric_key]["mean_ms"]
+                for r in successful
+                if isinstance(r.get(metric_key), dict) and "mean_ms" in r[metric_key]
+            )
+            p95s = sorted(
+                r[metric_key]["p95_ms"]
+                for r in successful
+                if isinstance(r.get(metric_key), dict) and "p95_ms" in r[metric_key]
+            )
+            if means:
+                aggregate[f"{metric_key}_mean_ms"] = round(sum(means) / len(means), 3)
+            if p95s:
+                aggregate[f"{metric_key}_p95_ms"] = round(
+                    p95s[min(int(len(p95s) * 0.95), len(p95s) - 1)], 3
+                )
+
+    return {
+        "concurrent_sessions": req.concurrent_sessions,
+        "successful_sessions": len(successful),
+        "failed_sessions": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/run")
+async def run_agent(req: RunRequest):
+    """Raw agent interaction — send any prompt, get back the agent text."""
+    try:
+        output = await _run_agent(req.prompt)
+        return {"response": output}
+    except Exception as e:
+        logger.exception("Agent run failed")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# =========================================================================
+# Entry point
+# =========================================================================
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/requirements.txt b/perfkitbenchmarker/data/docker/agentic/adk-agent/requirements.txt
new file mode 100644
index 0000000000..4ca072323c
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/requirements.txt
@@ -0,0 +1,11 @@
+# Requirements for GKE Performance Agent
+google-adk[gke,extensions]==1.34.1
+k8s-agent-sandbox==0.4.6
+kubernetes>=36.0.1  # Fix: v36.0.0 has auth key mismatch bug (PR #2585)
+google-cloud-aiplatform[adk]==1.153.1
+google-cloud-logging==3.15.0
+fastapi==0.135.3
+uvicorn[standard]==0.44.0
+python-dotenv==1.0.1
+playwright==1.59.0
+
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_density.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_density.py
new file mode 100644
index 0000000000..c1d20ecbfb
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_density.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+"""
+Agentic Python Sandbox Benchmark
+Measures: TTFE (Time to First Execution), CEL (Command Execution Latency), RSS Memory
+
+Three task categories:
+  - compute: CPU-bound (matrix multiply, sorting large lists)
+  - syscall:  gVisor Sentry stress (large file I/O, many stat calls)
+  - import:   Gofer FS I/O + memory (import heavy stdlib, build data)
+
+Metrics: all sandbox_* keys.
+"""
+import time
+import json
+import os
+import resource
+import sys
+import math
+import random
+import warnings
+
+warnings.filterwarnings("ignore")
+
+SAMPLE_COUNT = int(os.environ.get("SAMPLE_COUNT") or "20")
+SAMPLE_WARMUP = int(os.environ.get("SAMPLE_WARMUP") or "0")
+
+print(f"SAMPLE_COUNT: {SAMPLE_COUNT}")
+print(f"SAMPLE_WARMUP: {SAMPLE_WARMUP}")
+
+# ── Persistent allocations (retained across iterations to grow RSS) ──
+# ~20MB baseline allocation that stays resident
+_RESIDENT_DATA = [bytearray(1024 * 1024) for _ in range(20)]  # 20 × 1MB
+
+
+def get_rss_mb():
+    """Get current RSS memory in MB."""
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+
+def get_static_tasks():
+    """Return deterministic static tasks to measure execution latency.
+
+    Three task categories enable decomposition of CEL degradation:
+      - compute: sort a 100k-element list + matrix-like multiply
+      - syscall:  write/read 1MB temp files, 2000 stat calls
+      - import:   import 15 heavy stdlib modules + build large dicts
+    """
+    return [
+        {
+            "id": 1,
+            "type": "compute",
+            "code": (
+                "import math, random\n"
+                "random.seed(42)\n"
+                "data = [random.random() for _ in range(100_000)]\n"
+                "data.sort()\n"
+                "# Matrix-like multiply (flattened 200×200)\n"
+                "a = list(range(40_000))\n"
+                "b = [x * 0.001 for x in a]\n"
+                "_ = sum(x * y for x, y in zip(a, b))\n"
+            ),
+        },
+        {
+            "id": 2,
+            "type": "syscall",
+            "code": (
+                "import os, tempfile\n"
+                "d = tempfile.gettempdir()\n"
+                "# Write + read 1MB file through gVisor Gofer\n"
+                "path = os.path.join(d, 'bench_heavy.bin')\n"
+                "data = b'x' * (1024 * 1024)\n"
+                "with open(path, 'wb') as f:\n"
+                "    f.write(data)\n"
+                "with open(path, 'rb') as f:\n"
+                "    _ = f.read()\n"
+                "os.unlink(path)\n"
+                "# Heavy stat/listdir\n"
+                "[os.stat(d) for _ in range(1000)]\n"
+                "[os.listdir(d) for _ in range(1000)]\n"
+            ),
+        },
+        {
+            "id": 3,
+            "type": "import",
+            "code": (
+                "import importlib, sys\n"
+                "mods = [\n"
+                "    'json', 'csv', 'html', 'email', 'unittest', 'logging',\n"
+                "    'xml.etree.ElementTree', 'http.client', 'urllib.request',\n"
+                "    'argparse', 'pprint', 'textwrap', 'difflib',\n"
+                "]\n"
+                "for _ in range(20):\n"
+                "    for m in mods:\n"
+                "        try:\n"
+                "            sys.modules.pop(m, None)\n"
+                "            importlib.import_module(m)\n"
+                "        except Exception:\n"
+                "            pass\n"
+                "# Build a large dict to add memory pressure\n"
+                "_ = {str(i): list(range(100)) for i in range(10_000)}\n"
+            ),
+        },
+    ]
+
+
+def _percentile(sorted_vals, pct):
+    """Return the value at the given percentile from a pre-sorted list."""
+    idx = int(len(sorted_vals) * pct)
+    return sorted_vals[min(idx, len(sorted_vals) - 1)]
+
+
+def run_benchmark():
+    results = {"ttfe_ms": None, "cel_ms": [], "rss_mb_start": None, "rss_mb_end": None}
+
+    # Measure TTFE
+    ttfe_start = time.perf_counter()
+    exec("x = 1 + 1", globals())
+    results["ttfe_ms"] = round((time.perf_counter() - ttfe_start) * 1000, 6)
+
+    results["rss_mb_start"] = get_rss_mb()
+
+    tasks = get_static_tasks()
+    sampled_tasks = [t for t in tasks if t["type"] != "import"]
+    import_task = next((t for t in tasks if t["type"] == "import"), None)
+
+    # Warmup — sampled tasks only (import uses C-extension modules that
+    # error on repeated reimport, so it runs once outside the loop)
+    for _ in range(SAMPLE_WARMUP):
+        for task in sampled_tasks:
+            exec(task["code"], globals())
+
+    # Benchmark iterations — compute + syscall only
+    for i in range(SAMPLE_COUNT):
+        # Grow resident memory slightly each iteration (~100KB)
+        _RESIDENT_DATA.append(bytearray(100 * 1024))
+
+        for task in sampled_tasks:
+            start = time.perf_counter()
+            exec(task["code"], globals())
+            elapsed_ms = round((time.perf_counter() - start) * 1000, 6)
+            results["cel_ms"].append({
+                "iteration": i,
+                "task_id": task["id"],
+                "type": task["type"],
+                "latency_ms": elapsed_ms,
+            })
+
+    # Import task — single run (C-extension modules break on repeated reimport)
+    import_elapsed_ms = 0.0
+    if import_task:
+        import_start = time.perf_counter()
+        exec(import_task["code"], globals())
+        import_elapsed_ms = round((time.perf_counter() - import_start) * 1000, 6)
+
+    results["rss_mb_end"] = get_rss_mb()
+
+    # --- Raw per-iteration totals (compute + syscall) ---
+    iteration_totals = []
+    for i in range(SAMPLE_COUNT):
+        total = sum(r["latency_ms"] for r in results["cel_ms"] if r["iteration"] == i)
+        iteration_totals.append(round(total, 6))
+
+    # --- Raw per-type latencies ---
+    types_seen = sorted(set(r["type"] for r in results["cel_ms"]))
+    per_type_raw = {}
+    for t in types_seen:
+        per_type_raw[t] = [round(r["latency_ms"], 6)
+                           for r in results["cel_ms"] if r["type"] == t]
+
+    # Output raw arrays — cross-sandbox stats computed by main.py
+    summary = {
+        "hostname": os.environ.get("HOSTNAME", "unknown"),
+        "sandbox_ttfe_ms": results["ttfe_ms"],
+        "sandbox_total_cel_ms": iteration_totals,
+        "sandbox_import_cel_ms": import_elapsed_ms,
+        "sandbox_rss_start_mb": results["rss_mb_start"],
+        "sandbox_rss_end_mb": results["rss_mb_end"],
+        "sandbox_rss_growth_mb": round(results["rss_mb_end"] - results["rss_mb_start"], 6),
+        "sample_count": SAMPLE_COUNT,
+        "sample_warmup": SAMPLE_WARMUP,
+        "total_iterations": len(iteration_totals),
+        "task_types": len(types_seen) + (1 if import_task else 0),
+    }
+
+    for t, raw in per_type_raw.items():
+        summary[f"sandbox_{t}_cel_ms"] = raw
+
+    print(json.dumps(summary))
+
+    with open("/tmp/benchmark_results.json", "w") as f:
+        json.dump(results, f)
+
+    return summary
+
+if __name__ == "__main__":
+    run_benchmark()
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_payload.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_payload.py
new file mode 100644
index 0000000000..f92a3e694d
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_payload.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""Agentic Payload Transfer Benchmark (Use Case D).
+
+Measures the cost of returning large "Observation" payloads from a gVisor
+sandbox back to the Orchestrator via the real data path:
+  stdout → code_execution_result.output → orchestrator HTTP response.
+
+For a given PAYLOAD_SIZE_MB, the script:
+  1. Generates a payload of that size (os.urandom + base64)
+  2. Measures generation, serialization, and stdout-write times separately
+  3. Repeats for PAYLOAD_ITERATIONS to compute stable percentiles
+  4. On the final iteration, writes the actual payload to stdout (measuring
+     real end-to-end transfer); other iterations write to /dev/null to
+     measure write-syscall cost without flooding the return channel.
+  5. Emits a JSON summary to stderr (parsed by main.py)
+
+Metrics are split so that pass/fail thresholds can exclude generation
+time (os.urandom), which is not part of data transfer.
+
+Environment variables (injected by the agent):
+  PAYLOAD_SIZE_MB     — target payload size in megabytes (default: 1)
+  PAYLOAD_ITERATIONS  — number of transfer iterations (default: 20)
+"""
+
+import base64
+import json
+import os
+import resource
+import sys
+import time
+
+PAYLOAD_SIZE_MB = float(os.environ.get("PAYLOAD_SIZE_MB") or "1")
+PAYLOAD_ITERATIONS = int(os.environ.get("PAYLOAD_ITERATIONS") or "20")
+
+
+# Use stderr for all diagnostic/metric output so stdout is reserved for
+# the actual payload transfer (the measured data path).
+def _log(msg):
+    print(msg, file=sys.stderr, flush=True)
+
+
+_log(f"PAYLOAD_SIZE_MB: {PAYLOAD_SIZE_MB}")
+_log(f"PAYLOAD_ITERATIONS: {PAYLOAD_ITERATIONS}")
+
+
+def get_rss_mb():
+    """Get current RSS memory in MB."""
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+
+def _percentile(sorted_vals, pct):
+    """Return the value at the given percentile from a pre-sorted list."""
+    if not sorted_vals:
+        return 0.0
+    idx = int(len(sorted_vals) * pct)
+    return sorted_vals[min(idx, len(sorted_vals) - 1)]
+
+
+def _stats_for(latencies):
+    """Compute mean/p50/p95/p99/min/max for a list of latencies (ms)."""
+    latencies.sort()
+    return {
+        "mean": round(sum(latencies) / len(latencies), 6),
+        "p50": round(latencies[len(latencies) // 2], 6),
+        "p95": round(_percentile(latencies, 0.95), 6),
+        "p99": round(_percentile(latencies, 0.99), 6),
+        "min": round(latencies[0], 6),
+        "max": round(latencies[-1], 6),
+    }
+
+
+def run_benchmark():
+    """Execute the payload transfer benchmark and print JSON results."""
+    target_bytes = int(PAYLOAD_SIZE_MB * 1024 * 1024)
+    rss_start = get_rss_mb()
+
+    generation_times = []
+    serialization_times = []
+    stdout_times = []  # stdout write syscall time
+    transfer_times = []  # serialize + stdout write (the threshold metric)
+    throughputs = []  # MB/s based on stdout write time
+
+    # --- Warmup (2 iterations, not recorded) ---
+    for _ in range(2):
+        raw = os.urandom(target_bytes)
+        _ = base64.b64encode(raw).decode("ascii")
+
+    # --- Measured iterations ---
+    for i in range(PAYLOAD_ITERATIONS):
+        # 1. Generate payload (os.urandom — NOT data transfer)
+        t0 = time.perf_counter()
+        raw = os.urandom(target_bytes)
+        t_gen = time.perf_counter()
+
+        # 2. Serialize (base64 encode — mirrors real observation encoding)
+        encoded = base64.b64encode(raw).decode("ascii")
+        t_ser = time.perf_counter()
+
+        # 3. Transfer — write payload to stdout (the real sandbox→orchestrator path).
+        #    Only the final iteration writes to actual stdout to measure real
+        #    end-to-end transfer without flooding the return channel.
+        #    Other iterations write to /dev/null (same gVisor write-syscall path,
+        #    data discarded by host kernel).
+        t_xfer_start = time.perf_counter()
+        if i == PAYLOAD_ITERATIONS - 1:
+            sys.stdout.write(encoded)
+            sys.stdout.flush()
+        else:
+            with open("/dev/null", "w") as devnull:
+                devnull.write(encoded)
+        t_xfer = time.perf_counter()
+
+        gen_ms = (t_gen - t0) * 1000
+        ser_ms = (t_ser - t_gen) * 1000
+        stdout_ms = (t_xfer - t_xfer_start) * 1000
+        transfer_ms = ser_ms + stdout_ms  # excludes generation
+
+        generation_times.append(gen_ms)
+        serialization_times.append(ser_ms)
+        stdout_times.append(stdout_ms)
+        transfer_times.append(transfer_ms)
+
+        # Throughput in MB/s (based on encoded size and stdout write time)
+        encoded_size_mb = len(encoded) / (1024 * 1024)
+        if stdout_ms > 0:
+            throughputs.append(encoded_size_mb / (stdout_ms / 1000))
+
+    rss_end = get_rss_mb()
+
+    # Compute stats
+    gen_stats = _stats_for(generation_times)
+    ser_stats = _stats_for(serialization_times)
+    stdout_stats = _stats_for(stdout_times)
+    transfer_stats = _stats_for(transfer_times)
+    throughput_stats = _stats_for(throughputs) if throughputs else {}
+
+    # Payload metadata
+    encoded_size_bytes = len(base64.b64encode(os.urandom(target_bytes)))
+
+    summary = {
+        "hostname": os.environ.get("HOSTNAME", "unknown"),
+        # Payload config
+        "sandbox_payload_size_bytes": target_bytes,
+        "sandbox_payload_encoded_size_bytes": encoded_size_bytes,
+        "sandbox_payload_iterations": PAYLOAD_ITERATIONS,
+        # Generation time (os.urandom — NOT data transfer, excluded from threshold)
+        "sandbox_generation_time_mean_ms": gen_stats["mean"],
+        "sandbox_generation_time_p50_ms": gen_stats["p50"],
+        "sandbox_generation_time_p95_ms": gen_stats["p95"],
+        "sandbox_generation_time_p99_ms": gen_stats["p99"],
+        "sandbox_generation_time_min_ms": gen_stats["min"],
+        "sandbox_generation_time_max_ms": gen_stats["max"],
+        # Serialization time (base64 encode — CPU bound)
+        "sandbox_serialization_time_mean_ms": ser_stats["mean"],
+        "sandbox_serialization_time_p50_ms": ser_stats["p50"],
+        "sandbox_serialization_time_p95_ms": ser_stats["p95"],
+        "sandbox_serialization_time_p99_ms": ser_stats["p99"],
+        "sandbox_serialization_time_min_ms": ser_stats["min"],
+        "sandbox_serialization_time_max_ms": ser_stats["max"],
+        # Stdout write time (the raw write-syscall through gVisor)
+        "sandbox_stdout_time_mean_ms": stdout_stats["mean"],
+        "sandbox_stdout_time_p50_ms": stdout_stats["p50"],
+        "sandbox_stdout_time_p95_ms": stdout_stats["p95"],
+        "sandbox_stdout_time_p99_ms": stdout_stats["p99"],
+        "sandbox_stdout_time_min_ms": stdout_stats["min"],
+        "sandbox_stdout_time_max_ms": stdout_stats["max"],
+        # Transfer time (serialization + stdout write — the threshold metric)
+        "sandbox_transfer_time_mean_ms": transfer_stats["mean"],
+        "sandbox_transfer_time_p50_ms": transfer_stats["p50"],
+        "sandbox_transfer_time_p95_ms": transfer_stats["p95"],
+        "sandbox_transfer_time_p99_ms": transfer_stats["p99"],
+        "sandbox_transfer_time_min_ms": transfer_stats["min"],
+        "sandbox_transfer_time_max_ms": transfer_stats["max"],
+        # Throughput (MB/s based on transfer write time)
+        "sandbox_throughput_mean_mbps": throughput_stats.get("mean"),
+        "sandbox_throughput_p50_mbps": throughput_stats.get("p50"),
+        "sandbox_throughput_min_mbps": throughput_stats.get("min"),
+        # RSS
+        "sandbox_rss_start_mb": rss_start,
+        "sandbox_rss_end_mb": rss_end,
+        "sandbox_rss_growth_mb": rss_end - rss_start,
+    }
+
+    # Emit JSON summary to stderr for diagnostics.
+    _log("---BENCHMARK_RESULT_JSON---")
+    _log(json.dumps(summary, indent=2))
+
+    # Also emit to stdout (after the payload data) so that
+    # _parse_sandbox_json() can find it in code_execution_result.output.
+    # ADK only captures stdout, not stderr.
+    print("\n---BENCHMARK_RESULT_JSON---", flush=True)
+    print(json.dumps(summary), flush=True)
+
+    return summary
+
+
+if __name__ == "__main__":
+    try:
+        run_benchmark()
+    except Exception as e:
+        import traceback
+
+        traceback.print_exc()
diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_qps.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_qps.py
new file mode 100644
index 0000000000..07ef6309db
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_qps.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+"""Minimal QPS benchmark script for UC-F (Scheduling Throughput).
+
+Runs inside the GKE Agent Sandbox to validate claim readiness.
+Executes a trivial operation and reports status.  The orchestrator-side
+timing (orchestrator_total_ms) serves as the primary TTFE measurement —
+when the warm pool drains, that metric spikes because fresh pods must be
+cold-started.
+"""
+import json
+import time
+
+t0 = time.perf_counter()
+
+# Trivial computation to prove the sandbox is functional
+result = sum(range(10_000))
+
+elapsed_ms = (time.perf_counter() - t0) * 1000
+
+print(json.dumps({
+    "sandbox_status": "ok",
+    "sandbox_qps_exec_ms": round(elapsed_ms, 3),
+    "sandbox_compute_result": result,
+}))
diff --git a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
new file mode 100644
index 0000000000..69922efdb0
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
@@ -0,0 +1,150 @@
+# Agentic Benchmark Configuration for GKE
+# Used with: --benchmark_config_file=perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
+#
+# User/environment-specific flags that MUST be passed on CLI:
+#   --project=<project>
+#   --owner=<owner>
+#   --gce_network_name=<user>-agentic-vpc
+#   --gke_additional_flags="--workload-pool=<project>.svc.id.goog,--subnetwork=<user>-agentic-subnet,--enable-master-authorized-networks,--master-authorized-networks=$(curl -s ifconfig.me)/32"
+#
+# Per-run flags:
+#   --run_stage=provision|prepare|run,cleanup|teardown
+#   --run_uri=<unique_id>
+#   --temp_dir=<path>
+#
+# Benchmark-specific sweep parameters (vary per run):
+#   --k8s_python_density_concurrent_sandbox_count=N
+#   --k8s_snapshot_preload_mb=N
+#   etc.
+
+# ===========================================================================
+# Shared configuration (defined once, referenced by all benchmarks via YAML
+# anchors). PKB ignores top-level keys that don't match a benchmark name.
+# ===========================================================================
+
+_shared_flags: &shared_flags
+  # --- Cluster creation flags ---
+  gke_additional_flags:
+    - "--enable-pod-snapshots"
+    - "--enable-dataplane-v2"
+    - "--enable-private-nodes"
+    - "--enable-ip-alias"
+    - "--master-ipv4-cidr=172.16.0.0/28"
+  gke_additional_nodepool_flags:
+    - "--max-pods-per-node=250"
+  container_cluster_version: "1.35.5-gke.1057002"
+  gke_enable_shielded_nodes: false
+  gce_subnet_region: "us-central1"
+
+  # --- Agentic workload flags ---
+  k8s_namespace: "agentic"
+  agent_sandbox_version: "v0.4.6"
+  k8s_gvisor: true
+  k8s_agent_api_url: "http://localhost:8080"
+
+_shared_cluster: &shared_cluster
+  cloud: GCP
+  type: Kubernetes
+  vm_count: 1
+  vm_spec:
+    GCP:
+      machine_type: c4-standard-8
+      zone: us-central1-a
+      boot_disk_type: hyperdisk-balanced
+      boot_disk_size: 50
+  nodepools:
+    sandbox:
+      vm_count: 1
+      vm_spec:
+        GCP:
+          machine_type: c4-standard-8
+          zone: us-central1-a
+          boot_disk_type: hyperdisk-balanced
+          boot_disk_size: 100
+      sandbox_config:
+        type: gvisor
+
+_shared_registry: &shared_registry
+  cloud: GCP
+  spec:
+    GCP:
+      zone: us-central1-a
+
+
+_shared_container_specs: &shared_container_specs
+  adk_agent:
+    image: agentic/adk-agent
+
+# ===========================================================================
+# Benchmark definitions (each references the shared anchors above)
+# ===========================================================================
+
+k8s_python_density:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
+  container_cluster:
+    <<: *shared_cluster
+
+k8s_chromium_density:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
+  container_cluster:
+    <<: *shared_cluster
+
+k8s_payload:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
+  container_cluster:
+    <<: *shared_cluster
+
+k8s_qps:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
+  container_cluster:
+    <<: *shared_cluster
+
+k8s_snapshot:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
+  container_cluster:
+    <<: *shared_cluster
+
+k8s_warmpool:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
+  container_cluster:
+    <<: *shared_cluster
+
+k8s_deletion:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
+  container_cluster:
+    <<: *shared_cluster
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2
new file mode 100644
index 0000000000..068b50be11
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2
@@ -0,0 +1,118 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: adk-agent-sa
+  namespace: {{ ns }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: adk-agent-sandbox-role
+rules:
+  - apiGroups: ["agents.x-k8s.io"]
+    resources: ["sandboxes"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["agents.x-k8s.io"]
+    resources: ["sandboxwarmpool", "sandboxwarmpools"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["extensions.agents.x-k8s.io"]
+    resources: ["sandboxclaims"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: [""]
+    resources: ["pods", "pods/log", "pods/exec", "services", "configmaps"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods/portforward"]
+    verbs: ["create"]
+  - apiGroups: ["metrics.k8s.io"]
+    resources: ["pods"]
+    verbs: ["get", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: adk-agent-sandbox-binding
+  namespace: {{ ns }}
+subjects:
+  - kind: ServiceAccount
+    name: adk-agent-sa
+    namespace: {{ ns }}
+roleRef:
+  kind: ClusterRole
+  name: adk-agent-sandbox-role
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: adk-agent
+  namespace: {{ ns }}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: adk-agent
+  template:
+    metadata:
+      labels:
+        app: adk-agent
+    spec:
+      serviceAccountName: adk-agent-sa
+      containers:
+      - name: adk-agent
+        imagePullPolicy: Always
+        image: {{ adk_image }}
+        resources:
+          limits:
+            memory: "16384Mi"
+            cpu: "6000m"
+          requests:
+            memory: "512Mi"
+            cpu: "1000m"
+        ports:
+        - containerPort: 8080
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 15
+          periodSeconds: 30
+          timeoutSeconds: 10
+          failureThreshold: 6
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 5
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+        env:
+          - name: PORT
+            value: "8080"
+          - name: GOOGLE_CLOUD_PROJECT
+            value: "{{ project }}"
+          - name: GOOGLE_CLOUD_LOCATION
+            value: "{{ region }}"
+          - name: GOOGLE_GENAI_USE_VERTEXAI
+            value: "true"
+          - name: CLUSTER_NAME
+            value: "{{ cluster }}"
+          - name: AGENTIC_NAMESPACE
+            value: "{{ ns }}"
+          - name: SANDBOX_ROUTER_URL
+            value: "http://sandbox-router-svc.{{ ns }}.svc.cluster.local:8080"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: adk-agent
+  namespace: {{ ns }}
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 8080
+  selector:
+    app: adk-agent
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2
new file mode 100644
index 0000000000..d76f851e95
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2
@@ -0,0 +1,56 @@
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: psi-reader
+  namespace: {{ ns }}
+  labels:
+    app: psi-reader
+spec:
+  selector:
+    matchLabels:
+      app: psi-reader
+  template:
+    metadata:
+      labels:
+        app: psi-reader
+    spec:
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+      - key: "sandbox.gke.io/runtime"
+        operator: "Equal"
+        value: "gvisor"
+        effect: "NoSchedule"
+      - key: "dedicated"
+        operator: "Equal"
+        value: "agentic-sandbox"
+        effect: "NoSchedule"
+      hostPID: true
+      containers:
+      - name: reader
+        image: busybox:1.36
+        command: ["sleep", "infinity"]
+        securityContext:
+          privileged: true
+        volumeMounts:
+        - name: cgroup
+          mountPath: /host/sys/fs/cgroup
+          readOnly: true
+        - name: proc
+          mountPath: /host/proc
+          readOnly: true
+        resources:
+          requests:
+            cpu: "10m"
+            memory: "16Mi"
+          limits:
+            cpu: "50m"
+            memory: "32Mi"
+      volumes:
+      - name: cgroup
+        hostPath:
+          path: /sys/fs/cgroup
+      - name: proc
+        hostPath:
+          path: /proc
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2
new file mode 100644
index 0000000000..0d0541cfe7
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2
@@ -0,0 +1,69 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: sandbox-router-svc
+  namespace: {{ ns }}
+spec:
+  type: ClusterIP
+  selector:
+    app: sandbox-router
+  ports:
+  - name: http
+    protocol: TCP
+    port: 8080
+    targetPort: 8080
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sandbox-router-deployment
+  namespace: {{ ns }}
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: sandbox-router
+  template:
+    metadata:
+      labels:
+        app: sandbox-router
+    spec:
+      serviceAccountName: adk-agent-sa
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: topology.kubernetes.io/zone
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              app: sandbox-router
+      containers:
+      - name: router
+        image: {{ router_image }}
+        ports:
+        - containerPort: 8080
+        env:
+        - name: ALLOW_UNAUTHENTICATED_ROUTER
+          value: "true"
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 5
+          periodSeconds: 5
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 10
+          periodSeconds: 10
+        resources:
+          requests:
+            cpu: "250m"
+            memory: "512Mi"
+          limits:
+            cpu: "1000m"
+            memory: "1Gi"
+      securityContext:
+        runAsUser: 1000
+        runAsGroup: 1000
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2
new file mode 100644
index 0000000000..e9af43332d
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2
@@ -0,0 +1,103 @@
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: python-sandbox-template
+  namespace: {{ ns }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        sandbox: python-sandbox-example
+    spec:
+      runtimeClassName: gvisor
+      containers:
+      - name: python-runtime
+        image: {{ python_image }}
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+      - key: "sandbox.gke.io/runtime"
+        operator: "Equal"
+        value: "gvisor"
+        effect: "NoSchedule"
+      - key: "dedicated"
+        operator: "Equal"
+        value: "agentic-sandbox"
+        effect: "NoSchedule"
+      restartPolicy: "OnFailure"
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxWarmPool
+metadata:
+  name: python-sandbox-warmpool
+  namespace: {{ ns }}
+spec:
+  replicas: {{ warmpool_replicas }}
+  sandboxTemplateRef:
+    name: python-sandbox-template
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: chromium-sandbox-template
+  namespace: {{ ns }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        sandbox: chromium-sandbox-example
+    spec:
+      runtimeClassName: gvisor
+      containers:
+      - name: chromium-runtime
+        image: {{ chromium_image }}
+        command: ["/bin/sh", "-c"]
+        args:
+          - |
+            socat TCP-LISTEN:9223,fork,reuseaddr TCP:127.0.0.1:9222 &
+            exec chromium --headless --no-sandbox --disable-gpu --disable-dev-shm-usage --remote-debugging-port=9222 --no-first-run --disable-field-trial-config --user-data-dir=/tmp/chrome-data about:blank
+        ports:
+          - containerPort: 9223
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+      - key: "sandbox.gke.io/runtime"
+        operator: "Equal"
+        value: "gvisor"
+        effect: "NoSchedule"
+      - key: "dedicated"
+        operator: "Equal"
+        value: "agentic-sandbox"
+        effect: "NoSchedule"
+      restartPolicy: "OnFailure"
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxWarmPool
+metadata:
+  name: chromium-sandbox-warmpool
+  namespace: {{ ns }}
+spec:
+  replicas: {{ chromium_replicas }}
+  sandboxTemplateRef:
+    name: chromium-sandbox-template
+---
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: allow-orchestrator-to-chromium
+  namespace: {{ ns }}
+spec:
+  podSelector:
+    matchLabels:
+      sandbox: chromium-sandbox-example
+  policyTypes:
+  - Ingress
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          app: adk-agent
+    ports:
+    - protocol: TCP
+      port: 9223
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2
new file mode 100644
index 0000000000..afc4e0ee4c
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2
@@ -0,0 +1,24 @@
+---
+apiVersion: podsnapshot.gke.io/v1
+kind: PodSnapshotStorageConfig
+metadata:
+  name: benchmark-pssc-gcs
+spec:
+  snapshotStorageConfig:
+    gcs:
+      bucket: "{{ bucket_name }}"
+      path: "{{ snapshot_folder }}"
+---
+apiVersion: podsnapshot.gke.io/v1
+kind: PodSnapshotPolicy
+metadata:
+  name: benchmark-psp
+  namespace: {{ ns }}
+spec:
+  storageConfigName: benchmark-pssc-gcs
+  selector:
+    matchLabels:
+      app: snapshot-benchmark-workload
+  triggerConfig:
+    type: manual
+    postCheckpoint: resume
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2
new file mode 100644
index 0000000000..11850eb444
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2
@@ -0,0 +1,46 @@
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: {{ template_name }}
+  namespace: {{ namespace }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        app: snapshot-benchmark-workload
+    spec:
+      serviceAccountName: {{ ksa_name }}
+      runtimeClassName: gvisor
+      containers:
+      - name: preloader
+        image: python:3.11-slim
+        command: ["python3", "-c"]
+        args:
+          - |
+            import time, os
+            preload_mb = int(os.environ.get("PRELOAD_MB", "10"))
+            print(f"Preloading {preload_mb} MB of memory...", flush=True)
+            _ballast = bytearray(preload_mb * 1024 * 1024)
+            print(f"Preload complete. Starting counter.", flush=True)
+            i = 0
+            while True:
+                print(f"Count: {i}", flush=True)
+                i += 1
+                time.sleep(1)
+        env:
+          - name: PRELOAD_MB
+            value: "{{ preload_mb }}"
+        resources:
+          requests:
+            cpu: "250m"
+            memory: "{{ memory_mi }}Mi"
+            ephemeral-storage: "512Mi"
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+        - key: "sandbox.gke.io/runtime"
+          operator: "Equal"
+          value: "gvisor"
+          effect: "NoSchedule"
+      restartPolicy: "OnFailure"
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md
new file mode 100644
index 0000000000..86b33c8486
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md
@@ -0,0 +1,64 @@
+# Vibe Coding Startup Scripts
+
+Pluggable startup scripts for the UC-A snapshot saturation harness (`sweeps/snapshot_saturation_search.py`). Each script simulates a realistic "vibe coding" sandbox cold-start — the kind of environment setup that happens when an AI coding agent provisions a new sandbox for a user.
+
+## How It Works
+
+When `--preload_mode=script:<path>` is passed to the sweep harness:
+
+1. The script is read from disk and embedded into the pod's container entrypoint
+2. The pod runs the script to completion (installs packages, starts services, etc.)
+3. After the script exits 0, the harness prints `SCRIPT_READY` and starts a counter loop
+4. **TTFE** is measured as the total time from SandboxClaim creation to `SCRIPT_READY`
+
+This lets you compare cold-start TTFE (full script execution) against snapshot/restore TTFE (resuming from a pre-snapshotted state where the script already ran).
+
+## Scripts
+
+### startup_pip_fastapi.sh
+
+**Lightweight Python variant.** Runs natively in the `python:3.11-slim` base image.
+
+Steps: `pip install fastapi uvicorn` → create app → start uvicorn → wait for first HTTP response.
+
+Typical cold-start: ~5–8s on GKE with fast network.
+
+```bash
+# Cold-start only
+python sweeps/snapshot_saturation_search.py \
+    --skip_snapshot \
+    --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+    --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \
+    --ttfe_threshold_s=20
+
+# With snapshot/restore (shows restore speedup vs cold-start)
+python sweeps/snapshot_saturation_search.py \
+    --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+    --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \
+    --ttfe_threshold_s=20 --restore_threshold_s=10
+```
+
+### startup_npm_vite.sh
+
+**Heavier Node.js variant.** Installs Node.js + npm from apt, then npm-installs Vite and starts a dev server.
+
+Steps: `apt-get install nodejs npm` → `npm install vite` → start Vite dev server → wait for first page served.
+
+Typical cold-start: ~30–60s (apt + npm on cold cache).
+
+```bash
+python sweeps/snapshot_saturation_search.py \
+    --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \
+    --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \
+    --ttfe_threshold_s=120 --restore_threshold_s=10
+```
+
+## Writing Your Own Script
+
+Requirements:
+- Must be a bash script (runs via `bash -c` in a `python:3.11-slim` container)
+- Must exit 0 on success (use `set -e` for fail-fast)
+- Should print progress to stdout (visible in pod logs for debugging)
+- The harness appends `SCRIPT_READY` + counter loop after your script — don't add your own
+
+The `PRELOAD_MB` env var is available but unused by these scripts. The sweep varies it to test different memory request levels on the pod.
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh
new file mode 100644
index 0000000000..f3e9c9c235
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Vibe Coding Startup Script — npm + Vite dev server
+#
+# Simulates a typical agentic sandbox "vibe coding" cold-start:
+#   1. Install Node.js dependencies (bun/npm)
+#   2. Start a Vite dev server
+#   3. Wait for the server to be ready (first page served)
+#
+# This script is designed to run inside the sandbox container (python:3.11-slim).
+# It installs Node.js + npm + dependencies from scratch to measure realistic
+# cold-start latency including package installation.
+#
+# Usage (cold-start only):
+#   python sweeps/snapshot_saturation_search.py \
+#     --skip_snapshot \
+#     --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=120
+#
+# Usage (with snapshot/restore):
+#   python sweeps/snapshot_saturation_search.py \
+#     --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=120 --restore_threshold_s=10
+#
+# NOTE: --search_min/--search_max control the PRELOAD_MB env var passed to
+# the container; in script mode this is unused by the script itself but
+# varies memory requests to test different resource pressure levels.
+
+set -e
+
+echo "[vibe-coding] Installing Node.js..."
+apt-get update -qq && apt-get install -y -qq nodejs npm > /dev/null 2>&1
+
+echo "[vibe-coding] Creating project scaffold..."
+mkdir -p /tmp/vibe-project && cd /tmp/vibe-project
+
+# Create a minimal package.json with Vite
+cat > package.json << 'EOF'
+{
+  "name": "vibe-sandbox",
+  "private": true,
+  "scripts": {
+    "dev": "vite --host 0.0.0.0 --port 5173"
+  },
+  "dependencies": {
+    "vite": "^5.0.0"
+  }
+}
+EOF
+
+# Create minimal index.html for Vite to serve
+cat > index.html << 'EOF'
+<!DOCTYPE html>
+<html><head><title>Vibe</title></head>
+<body><h1>Ready</h1></body>
+</html>
+EOF
+
+echo "[vibe-coding] Installing npm dependencies..."
+npm install --prefer-offline 2>&1 | tail -5
+
+echo "[vibe-coding] Starting Vite dev server..."
+npx vite --host 0.0.0.0 --port 5173 &
+VITE_PID=$!
+
+echo "[vibe-coding] Waiting for server to be ready..."
+MAX_WAIT=60
+ELAPSED=0
+while ! curl -s http://localhost:5173 > /dev/null 2>&1; do
+    sleep 1
+    ELAPSED=$((ELAPSED + 1))
+    if [ $ELAPSED -ge $MAX_WAIT ]; then
+        echo "[vibe-coding] ERROR: Server did not start within ${MAX_WAIT}s"
+        exit 1
+    fi
+done
+
+echo "[vibe-coding] First page served successfully (${ELAPSED}s)"
+
+# Kill the vite server — we only needed to measure startup time
+kill $VITE_PID 2>/dev/null || true
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh
new file mode 100644
index 0000000000..d54a851bda
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Lightweight Vibe Coding Startup Script — pip install + FastAPI
+#
+# Simulates a Python-based agentic sandbox cold-start:
+#   1. Install Python packages (FastAPI + uvicorn)
+#   2. Start a web server
+#   3. Wait for the server to respond
+#
+# This is lighter weight than the npm/Vite variant and runs natively
+# in the python:3.11-slim base image without needing to install Node.js.
+#
+# Usage (cold-start only):
+#   python sweeps/snapshot_saturation_search.py \
+#     --skip_snapshot \
+#     --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=20
+#
+# Usage (with snapshot/restore):
+#   python sweeps/snapshot_saturation_search.py \
+#     --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=20 --restore_threshold_s=10
+#
+# NOTE: --search_min/--search_max control the PRELOAD_MB env var passed to
+# the container; in script mode this is unused by the script itself but
+# varies memory requests to test different resource pressure levels.
+
+set -e
+
+echo "[vibe-coding] Installing Python packages..."
+pip install --quiet fastapi uvicorn 2>&1 | tail -3
+
+echo "[vibe-coding] Creating app..."
+cat > /tmp/app.py << 'EOF'
+from fastapi import FastAPI
+app = FastAPI()
+
+@app.get("/")
+def root():
+    return {"status": "ready"}
+EOF
+
+echo "[vibe-coding] Starting uvicorn server..."
+python -m uvicorn app:app --host 0.0.0.0 --port 8000 --app-dir /tmp &
+SERVER_PID=$!
+
+echo "[vibe-coding] Waiting for server to be ready..."
+MAX_WAIT=30
+ELAPSED=0
+while ! python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" 2>/dev/null; do
+    sleep 1
+    ELAPSED=$((ELAPSED + 1))
+    if [ $ELAPSED -ge $MAX_WAIT ]; then
+        echo "[vibe-coding] ERROR: Server did not start within ${MAX_WAIT}s"
+        exit 1
+    fi
+done
+
+echo "[vibe-coding] First request served successfully (${ELAPSED}s)"
+
+# Kill the server — we only needed to measure startup time
+kill $SERVER_PID 2>/dev/null || true
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py
new file mode 100644
index 0000000000..6dfb59b981
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2025 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py
new file mode 100644
index 0000000000..6dfb59b981
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2025 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
new file mode 100644
index 0000000000..b2d31e026b
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
@@ -0,0 +1,497 @@
+"""Shared workload deployment utilities for GKE Agent Sandbox benchmarks.
+
+Provides idempotent functions to deploy the Agent Sandbox ecosystem
+(CRDs, templates, warm pools, router, ADK agent, PSI reader) onto a
+pre-provisioned GKE cluster. Called by each benchmark's Prepare() stage.
+
+All functions are idempotent -- safe to call repeatedly without side effects.
+"""
+
+import logging
+import os
+
+from absl import flags
+from jinja2 import Template
+from perfkitbenchmarker import data
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.resources.container_service import kubectl
+
+FLAGS = flags.FLAGS
+
+# ---------------------------------------------------------------------------
+# Flags (registered once; shared across all benchmarks)
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_string(
+    "agent_sandbox_version",
+    "v0.4.6",
+    "Agent Sandbox controller version (GitHub release tag).",
+)
+
+flags.DEFINE_string(
+    "agent_sandbox_router_image",
+    "",
+    "Sandbox router container image. If empty, router deployment is skipped.",
+)
+
+flags.DEFINE_string(
+    "k8s_agent_image",
+    "",
+    "ADK agent container image. If empty, agent deployment is skipped.",
+)
+
+flags.DEFINE_string(
+    "k8s_chromium_image",
+    "",
+    "Chromium sandbox container image. If empty, uses placeholder.",
+)
+
+flags.DEFINE_integer(
+    "agent_sandbox_warmpool_replicas",
+    2,
+    "Default warm pool replica count for SandboxWarmPool resources.",
+)
+
+flags.DEFINE_integer(
+    "agent_sandbox_chromium_replicas",
+    1,
+    "Default Chromium warm pool replica count.",
+)
+
+flags.DEFINE_string(
+    "k8s_python_image",
+    "registry.k8s.io/agent-sandbox/python-runtime-sandbox:v0.1.0",
+    "Python runtime sandbox container image.",
+)
+
+flags.DEFINE_integer(
+    "k8s_deploy_timeout",
+    120,
+    "Timeout in seconds for workload deployment rollout.",
+)
+
+
+
+
+# Module-level derived images (set during DeployWorkloads)
+_derived_images = {}
+
+# ---------------------------------------------------------------------------
+# Template loading
+# ---------------------------------------------------------------------------
+
+_MANIFESTS_DIR = "k8s_agents/manifests"
+
+
+def _LoadTemplate(template_name):
+    """Load a Jinja2 template from the data directory."""
+    template_path = os.path.join(
+        data.ResourcePath(_MANIFESTS_DIR), template_name
+    )
+    with open(template_path, "r") as f:
+        return Template(f.read())
+
+
+def _RenderAndApply(template_name, **kwargs):
+    """Load a Jinja2 template, render it, write to file, and kubectl apply."""
+    template = _LoadTemplate(template_name)
+    rendered = template.render(**kwargs)
+
+    # Write rendered YAML to tmp dir (RunKubectlCommand does not support stdin)
+    tmp_dir = os.path.join(
+        data.ResourcePath(_MANIFESTS_DIR), "tmp"
+    )
+    os.makedirs(tmp_dir, exist_ok=True)
+
+    # Strip .j2 extension for the rendered file
+    rendered_name = template_name.replace(".j2", "")
+    rendered_path = os.path.join(tmp_dir, rendered_name)
+    with open(rendered_path, "w") as f:
+        f.write(rendered)
+
+    stdout, stderr, retcode = kubectl.RunKubectlCommand(
+        ["apply", "-f", rendered_path],
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        logging.warning(
+            "kubectl apply failed for %s: %s", template_name, stderr[:500]
+        )
+    return retcode == 0
+
+
+flags.DEFINE_bool(
+    "skip_deploy_snapshots",
+    False,
+    "Skip deployment of Pod Snapshot infrastructure. "
+    "Set to True on non-GKE clusters where pod snapshots are not supported.",
+)
+
+flags.DEFINE_string(
+    "k8s_snapshot_ksa_name",
+    "pod-snapshot-sa",
+    "Kubernetes service account for pod snapshots.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def _DeriveImagePaths(project, region, arch):
+    """Derive container image paths from cluster config.
+
+    Args:
+        project: GCP project ID.
+        region: GCP region (e.g. us-central1).
+        arch: Docker platform architecture (amd64 or arm64).
+
+    Returns:
+        Dict with keys: adk_agent, sandbox_router, chromium.
+    """
+    return {
+        "adk_agent": f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{arch}",
+        "sandbox_router": f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{arch}",
+        "chromium": f"{region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{arch}",
+    }
+
+def DeployWorkloads(benchmark_spec=None):
+    """Deploy the full Agent Sandbox ecosystem onto the GKE cluster.
+
+    Idempotent: safe to call repeatedly. Sequence:
+      1. Build images (if --skip_image_build=False)
+      2. Create namespace
+      3. Install Agent Sandbox CRDs
+      4. Deploy SandboxTemplates + WarmPools
+      5. Deploy Sandbox Router
+      6. Deploy ADK Agent (Deployment + Service + RBAC)
+      7. Deploy PSI Reader DaemonSet
+      8. Wait for ADK Agent rollout
+    """
+    ns = FLAGS.k8s_namespace
+    logging.info("=== DeployWorkloads: namespace=%s ===", ns)
+
+    # Derive project, region, machine_type, cluster_name from benchmark_spec
+    project = ""
+    region = ""
+    machine_type = ""
+    cluster_name = ""
+    cluster = None
+    if benchmark_spec:
+        cluster = getattr(benchmark_spec, 'container_cluster', None)
+        if cluster:
+            project = getattr(cluster, 'project', '') or ''
+            zone = getattr(cluster, 'zone', '') or ''
+            region = zone[:-2] if zone else ''
+            cluster_name = getattr(cluster, 'name', '') or ''
+            # Prefer sandbox nodepool machine_type
+            nodepools = getattr(cluster, 'nodepools', None)
+            if nodepools and isinstance(nodepools, dict):
+                sandbox_pool = nodepools.get('sandbox')
+                if sandbox_pool and hasattr(sandbox_pool, 'vm_spec'):
+                    machine_type = getattr(sandbox_pool.vm_spec, 'machine_type', '') or ''
+            if not machine_type and hasattr(cluster, 'vm_spec'):
+                machine_type = getattr(cluster.vm_spec, 'machine_type', '') or ''
+    # Fallback to global FLAGS if benchmark_spec not available
+    if not project:
+        project = getattr(FLAGS, 'project', '') or ''
+    if not region:
+        zone = getattr(FLAGS, 'zone', '') or ''
+        region = zone[:-2] if zone else ''
+
+    # Derive image paths for template rendering.
+    # Chrome and Router images are built during prerequisites
+    # (gke_prerequisites.py), not during Prepare.
+    # ADK agent image is built by PKB container_specs during Provision.
+    from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+        gke_image_build_utils,
+    )
+    arch = FLAGS.target_arch or "amd64"
+    global _derived_images
+    _derived_images = _DeriveImagePaths(project, region, arch)
+    logging.info(
+        "DeployWorkloads: project=%s region=%s arch=%s",
+        project, region, arch,
+    )
+    logging.info("_derived_images: %s", _derived_images)
+
+    _CreateNamespace(ns)
+    _InstallCRDs()
+    _DeploySandboxTemplates(ns)
+    _DeploySandboxRouter(ns)
+    # Prefer ADK image from PKB-native container_specs (built during Provision).
+    # Falls back to FLAGS.k8s_agent_image or derived image path.
+    adk_image_from_specs = ""
+    if benchmark_spec:
+        specs = getattr(benchmark_spec, "container_specs", {})
+        adk_spec = specs.get("adk_agent")
+        if adk_spec and getattr(adk_spec, "image", None):
+            adk_image_from_specs = adk_spec.image
+            logging.info("Using ADK image from container_specs: %s", adk_image_from_specs)
+    _DeployADKAgent(ns, project=project, region=region, cluster_name=cluster_name, adk_image_override=adk_image_from_specs)
+    _DeployPSIReader(ns)
+    _WaitForAgentReady(ns)
+
+    logging.info("DeployWorkloads complete.")
+
+
+def DeploySnapshots():
+    """Deploy Pod Snapshot infrastructure.
+
+    Idempotent: safe to call repeatedly. Sequence:
+      1. Create GCS bucket (hierarchical namespace)
+      2. Create managed folder
+      3. Create KSA for snapshots
+      4. Bind IAM roles
+      5. Deploy PodSnapshotStorageConfig + PodSnapshotPolicy
+    """
+    if FLAGS.skip_deploy_snapshots:
+        logging.info("Skipping snapshot infrastructure (--skip_deploy_snapshots=True).")
+        return
+
+    ns = FLAGS.k8s_namespace
+    project = getattr(FLAGS, 'project', '') or ''
+    zone = getattr(FLAGS, 'zone', '') or ''
+    region = zone[:-2] if zone else ''
+
+    if not project:
+        logging.warning("DeploySnapshots: FLAGS.project not set, skipping.")
+        return
+
+    bucket_name = "agent-sandbox-snapshots-{}".format(project)
+    snapshot_folder = "benchmark-snapshots"
+    ksa_name = FLAGS.k8s_snapshot_ksa_name
+
+    logging.info("=== DeploySnapshots: bucket=%s ===", bucket_name)
+
+    # 1. Create GCS bucket
+    vm_util.IssueCommand(
+        [
+            "gcloud", "storage", "buckets", "create",
+            "gs://{}".format(bucket_name),
+            "--uniform-bucket-level-access",
+            "--enable-hierarchical-namespace",
+            "--soft-delete-duration=0d",
+            "--location={}".format(region),
+            "--project={}".format(project),
+        ],
+        raise_on_failure=False,
+    )
+
+    # 2. Create managed folder
+    vm_util.IssueCommand(
+        [
+            "gcloud", "storage", "managed-folders", "create",
+            "gs://{}/{}/".format(bucket_name, snapshot_folder),
+            "--project={}".format(project),
+        ],
+        raise_on_failure=False,
+    )
+
+    # 3. Create KSA
+    kubectl.RunKubectlCommand(
+        ["create", "serviceaccount", ksa_name, "--namespace", ns],
+        raise_on_failure=False,
+    )
+
+    # 4. IAM bindings
+    project_number = _GetProjectNumber(project)
+    if project_number:
+        _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name)
+
+    # 5. Deploy PSSC + PSP
+    _RenderAndApply(
+        "snapshot-crds.yaml.j2",
+        ns=ns,
+        bucket_name=bucket_name,
+        snapshot_folder=snapshot_folder,
+    )
+
+    logging.info("DeploySnapshots complete.")
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _CreateNamespace(ns):
+    """Create namespace if it doesn't exist."""
+    kubectl.RunKubectlCommand(
+        ["create", "namespace", ns],
+        raise_on_failure=False,
+    )
+
+
+def _InstallCRDs():
+    """Install Agent Sandbox CRDs from GitHub release."""
+    version = FLAGS.agent_sandbox_version
+    base_url = (
+        "https://github.com/kubernetes-sigs/agent-sandbox"
+        "/releases/download/{}".format(version)
+    )
+    logging.info("Installing Agent Sandbox CRDs (%s)", version)
+    kubectl.RunKubectlCommand(
+        [
+            "apply",
+            "-f", "{}/manifest.yaml".format(base_url),
+            "-f", "{}/extensions.yaml".format(base_url),
+        ],
+        raise_on_failure=False,
+    )
+
+
+def _DeploySandboxTemplates(ns):
+    """Deploy SandboxTemplate + WarmPool for Python and Chromium."""
+    python_image = FLAGS.k8s_python_image
+    chromium_image = FLAGS.k8s_chromium_image or _derived_images.get("chromium", "chromium-placeholder:latest")
+    warmpool_replicas = FLAGS.agent_sandbox_warmpool_replicas
+    chromium_replicas = FLAGS.agent_sandbox_chromium_replicas
+
+    _RenderAndApply(
+        "sandbox-templates.yaml.j2",
+        ns=ns,
+        python_image=python_image,
+        chromium_image=chromium_image,
+        warmpool_replicas=warmpool_replicas,
+        chromium_replicas=chromium_replicas,
+    )
+
+
+def _DeploySandboxRouter(ns):
+    """Deploy the Sandbox Router Deployment + Service."""
+    router_image = FLAGS.agent_sandbox_router_image or _derived_images.get("sandbox_router", "")
+    if not router_image:
+        logging.info("Sandbox router image not set, skipping router deployment.")
+        return
+
+    _RenderAndApply(
+        "sandbox-router.yaml.j2",
+        ns=ns,
+        router_image=router_image,
+    )
+
+
+def _DeployADKAgent(ns, project="", region="", cluster_name="", adk_image_override=""):
+    """Deploy ADK Agent: SA, ClusterRole, RoleBinding, Deployment, Service."""
+    adk_image = adk_image_override or FLAGS.k8s_agent_image or _derived_images.get("adk_agent", "")
+
+    # Validate the image looks like a registry path, not a Dockerfile path.
+    # When Prepare runs separately from Provision, container_specs may not
+    # have the built image path. The config YAML default (agentic/adk-agent)
+    # is the Dockerfile lookup path, not a valid registry reference.
+    if adk_image and "docker.pkg.dev" not in adk_image:
+        derived = _derived_images.get("adk_agent", "")
+        if derived:
+            logging.warning(
+                "ADK image %s is not a registry path. Using derived: %s",
+                adk_image, derived,
+            )
+            adk_image = derived
+
+    if not adk_image:
+        logging.info("ADK agent image not set, skipping agent deployment.")
+        return
+
+    logging.info("Using ADK image: %s", adk_image)
+
+    project = project or ""
+    region = region or ""
+    cluster = cluster_name or ""
+
+    _RenderAndApply(
+        "adk-agent.yaml.j2",
+        ns=ns,
+        adk_image=adk_image,
+        project=project,
+        region=region,
+        cluster=cluster,
+    )
+
+
+def _DeployPSIReader(ns):
+    """Deploy PSI Reader DaemonSet for cgroup pressure metrics."""
+    _RenderAndApply("psi-reader.yaml.j2", ns=ns)
+
+
+def _WaitForAgentReady(ns):
+    """Wait for ADK agent deployment to be ready.
+
+    Always attempts the rollout wait regardless of how the image was
+    specified (FLAGS.k8s_agent_image, container_specs, or _derived_images).
+    kubectl rollout status returns non-zero harmlessly if the deployment
+    does not exist, and raise_on_failure=False prevents that from
+    propagating.
+    """
+    timeout = FLAGS.k8s_deploy_timeout
+    logging.info("Waiting for adk-agent rollout (timeout=%ds)...", timeout)
+    _, stderr, retcode = kubectl.RunKubectlCommand(
+        [
+            "rollout", "status", "deployment/adk-agent",
+            "-n", ns,
+            "--timeout={}s".format(timeout),
+        ],
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        logging.warning(
+            "adk-agent rollout status returned %d: %s",
+            retcode, stderr.strip()[:200],
+        )
+
+
+def _GetProjectNumber(project):
+    """Get GCP project number from project ID."""
+    stdout, _, retcode = vm_util.IssueCommand(
+        [
+            "gcloud", "projects", "describe", project,
+            "--format=value(projectNumber)",
+        ],
+        raise_on_failure=False,
+    )
+    return stdout.strip() if retcode == 0 else None
+
+
+def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name):
+    """Bind IAM roles for pod snapshot access."""
+    # bucketViewer to namespace
+    vm_util.IssueCommand(
+        [
+            "gcloud", "storage", "buckets", "add-iam-policy-binding",
+            "gs://{}".format(bucket_name),
+            "--member=principalSet://iam.googleapis.com/projects/{}"
+            "/locations/global/workloadIdentityPools/{}.svc.id.goog"
+            "/namespace/{}".format(project_number, project, ns),
+            "--role=roles/storage.bucketViewer",
+            "--quiet",
+        ],
+        raise_on_failure=False,
+    )
+
+    # objectAdmin to KSA
+    vm_util.IssueCommand(
+        [
+            "gcloud", "storage", "buckets", "add-iam-policy-binding",
+            "gs://{}".format(bucket_name),
+            "--member=principal://iam.googleapis.com/projects/{}"
+            "/locations/global/workloadIdentityPools/{}.svc.id.goog"
+            "/subject/ns/{}/sa/{}".format(project_number, project, ns, ksa_name),
+            "--role=roles/storage.objectAdmin",
+            "--quiet",
+        ],
+        raise_on_failure=False,
+    )
+
+    # objectUser to GKE snapshot controller
+    vm_util.IssueCommand(
+        [
+            "gcloud", "storage", "buckets", "add-iam-policy-binding",
+            "gs://{}".format(bucket_name),
+            "--member=serviceAccount:service-{}"
+            "@container-engine-robot.iam.gserviceaccount.com".format(project_number),
+            "--role=roles/storage.objectUser",
+            "--quiet",
+        ],
+        raise_on_failure=False,
+    )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
new file mode 100644
index 0000000000..2e976207f5
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
@@ -0,0 +1,355 @@
+"""Shared image build utilities for GKE Agent Sandbox benchmarks.
+
+Builds and pushes container images (Chrome sandbox, Sandbox Router) via
+Google Cloud Build. Called from gke_deploy_utils.DeployWorkloads() during
+the Prepare stage.
+
+NOTE: The ADK Agent image is built by the PKB native container_specs
+mechanism during the Provision stage, not by this module.
+
+Images built:
+  - Chrome Sandbox: cloned from agent-sandbox repo
+  - Sandbox Router: cloned from agent-sandbox repo
+"""
+
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+
+from absl import flags
+from perfkitbenchmarker import vm_util
+
+FLAGS = flags.FLAGS
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Architecture detection
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_string(
+    "target_arch",
+    "",
+    "Target CPU architecture for container images (amd64 or arm64). "
+    "If set, skips gcloud machine-type detection. "
+    "Use this for non-GCP environments or when gcloud is unavailable.",
+)
+
+_ARCH_MAP = {
+    "X86_64": "amd64",
+    "ARM64": "arm64",
+}
+
+
+def _DetectArchitecture(machine_type, zone, project):
+    """Detect CPU architecture for a GCP machine type.
+
+    Uses gcloud to query the machine type's architecture, then maps
+    GCP naming (X86_64/ARM64) to Docker platform naming (amd64/arm64).
+
+    Falls back to amd64 if gcloud fails.
+    """
+    # Quick exit if user provided arch explicitly
+    if FLAGS.target_arch:
+        arch = FLAGS.target_arch.lower()
+        if arch in ("amd64", "arm64"):
+            logging.info("Using user-provided target_arch: %s", arch)
+            return arch
+        logging.warning(
+            "Invalid --target_arch='%s'. Must be amd64 or arm64. "
+            "Proceeding with gcloud detection.",
+            FLAGS.target_arch,
+        )
+
+    try:
+        stdout, _, retcode = vm_util.IssueCommand(
+            [
+                "gcloud",
+                "compute",
+                "machine-types",
+                "describe",
+                machine_type,
+                f"--zone={zone}",
+                f"--project={project}",
+                "--format=value(architecture)",
+            ],
+            raise_on_failure=False,
+            timeout=30,
+        )
+        if retcode == 0 and stdout.strip():
+            gcp_arch = stdout.strip().upper()
+            docker_arch = _ARCH_MAP.get(gcp_arch)
+            if docker_arch:
+                logging.info(
+                    "Detected architecture for %s: %s -> %s",
+                    machine_type,
+                    gcp_arch,
+                    docker_arch,
+                )
+                return docker_arch
+            logging.warning(
+                "Unknown GCP architecture '%s' for %s. Falling back to amd64.",
+                gcp_arch,
+                machine_type,
+            )
+    except Exception as e:
+        logging.warning(
+            "gcloud machine-type describe failed: %s. Falling back to amd64.", e
+        )
+
+    return "amd64"
+
+
+def build_images_with_config(project, region, machine_type, zone, arch):
+    """Core image build logic — no FLAGS dependency.
+
+    Callable from both PKB (via BuildImages()) and prerequisite_setup.py.
+    Uses the project's default Cloud Build SA (no custom SA needed).
+
+    Args:
+        project: GCP project ID.
+        region: GCP region (e.g. "us-central1").
+        machine_type: Machine type string (e.g. "c4-standard-8").
+            Used to derive target architecture (arm64 for c4a, amd64 otherwise).
+    """
+    # Architecture passed in from caller (detected via gcloud)
+    target_arch = arch
+
+    # Derive image paths
+    adk_image = f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{target_arch}"
+    chrome_image = (
+        f"{region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{target_arch}"
+    )
+    router_image = (
+        f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{target_arch}"
+    )
+
+    logger.info("=== Building Container Images (Chrome + Router only) ===")
+    logger.info("  Project: %s", project)
+    logger.info("  Region: %s", region)
+    logger.info("  Architecture: %s", target_arch)
+    logger.info("  Cloud Build SA: default (project Cloud Build SA)")
+    logger.info("  NOTE: ADK Agent image is built by PKB via container_specs")
+
+    # 1. Build Chrome Sandbox
+    _BuildChromeSandboxImage(
+        project=project,
+        region=region,
+        target_arch=target_arch,
+        image_path=chrome_image,
+    )
+
+    # 3. Build Sandbox Router
+    _BuildSandboxRouterImage(
+        project=project,
+        region=region,
+        target_arch=target_arch,
+        image_path=router_image,
+    )
+
+    logger.info("=== Chrome + Router images built successfully ===")
+    logger.info("  Chrome Sandbox: %s", chrome_image)
+    logger.info("  Sandbox Router: %s", router_image)
+    logger.info("  (ADK Agent built by PKB via container_specs)")
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _BuildChromeSandboxImage(project, region, target_arch, image_path):
+    """Build and push the Chrome Sandbox image."""
+    logger.info("Building Chrome Sandbox image: %s", image_path)
+
+    tmp_dir = tempfile.mkdtemp(prefix="chrome-sandbox-")
+    try:
+        # Clone agent-sandbox repo (sparse checkout)
+        logger.info("Cloning agent-sandbox chrome-sandbox source...")
+        _RunCmd(
+            [
+                "git",
+                "clone",
+                "--depth",
+                "1",
+                "--filter=blob:none",
+                "--sparse",
+                "https://github.com/kubernetes-sigs/agent-sandbox.git",
+                tmp_dir,
+            ]
+        )
+        _RunCmd(
+            ["git", "sparse-checkout", "set", "examples/chrome-sandbox"],
+            cwd=tmp_dir,
+        )
+
+        build_dir = os.path.join(tmp_dir, "examples", "chrome-sandbox")
+        if not os.path.isfile(os.path.join(build_dir, "Dockerfile")):
+            raise RuntimeError(f"chrome-sandbox Dockerfile not found at {build_dir}")
+
+        # Patch Dockerfile: add socat for CDP proxy
+        dockerfile_path = os.path.join(build_dir, "Dockerfile")
+        with open(dockerfile_path, "r") as f:
+            content = f.read()
+        content = content.replace(
+            "RUN apt-get update && apt-get install --yes --no-install-recommends chromium",
+            "RUN apt-get update && apt-get install --yes --no-install-recommends chromium socat",
+        )
+        with open(dockerfile_path, "w") as f:
+            f.write(content)
+
+        # Submit Cloud Build (generates cloudbuild.yaml in temp dir)
+        _SubmitCloudBuild(
+            source_dir=build_dir,
+            image_path=image_path,
+            target_arch=target_arch,
+            project=project,
+        )
+
+        logger.info("Chrome Sandbox image built successfully.")
+    finally:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+def _BuildSandboxRouterImage(project, region, target_arch, image_path):
+    """Build and push the Sandbox Router image."""
+    logger.info("Building Sandbox Router image: %s", image_path)
+
+    tmp_dir = tempfile.mkdtemp(prefix="sandbox-router-")
+    try:
+        # Clone agent-sandbox repo (sparse checkout)
+        logger.info("Cloning agent-sandbox router source...")
+        _RunCmd(
+            [
+                "git",
+                "clone",
+                "--depth",
+                "1",
+                "--filter=blob:none",
+                "--sparse",
+                "https://github.com/kubernetes-sigs/agent-sandbox.git",
+                tmp_dir,
+            ]
+        )
+        _RunCmd(
+            [
+                "git",
+                "sparse-checkout",
+                "set",
+                "clients/python/agentic-sandbox-client/sandbox-router",
+            ],
+            cwd=tmp_dir,
+        )
+
+        build_dir = os.path.join(
+            tmp_dir, "clients", "python", "agentic-sandbox-client", "sandbox-router"
+        )
+        if not os.path.isfile(os.path.join(build_dir, "Dockerfile")):
+            raise RuntimeError(f"sandbox-router Dockerfile not found at {build_dir}")
+
+        # Submit Cloud Build (generates cloudbuild.yaml in temp dir)
+        _SubmitCloudBuild(
+            source_dir=build_dir,
+            image_path=image_path,
+            target_arch=target_arch,
+            project=project,
+        )
+
+        logger.info("Sandbox Router image built successfully.")
+    finally:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+def _SubmitCloudBuild(source_dir, image_path, target_arch, project):
+    """Generate a cloudbuild.yaml with substitutions and submit via Cloud Build.
+
+    Used for Chrome and Router images (built in temp directories).
+    Uses the project's default Cloud Build SA.
+
+    For cross-architecture builds (e.g. arm64 on amd64 workers), uses
+    QEMU emulation + Docker Buildx to produce the target-arch image.
+    A high-CPU machine type (E2_HIGHCPU_32) is used to offset the
+    overhead of QEMU instruction translation.
+    """
+    if target_arch == "amd64":
+        # Native build — no emulation needed
+        cloudbuild_content = """steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.']
+    env:
+      - 'DOCKER_BUILDKIT=1'
+images:
+  - '${_IMAGE_PATH}'
+options:
+  logging: CLOUD_LOGGING_ONLY
+substitutions:
+  _IMAGE_PATH: ''
+  _PLATFORM: 'linux/amd64'
+"""
+    else:
+        # Cross-arch build — QEMU + Buildx required.
+        # Cloud Build workers are amd64; QEMU registers binfmt handlers
+        # so the kernel can execute arm64 binaries transparently.
+        # E2_HIGHCPU_32 provides 32 vCPUs to offset emulation overhead.
+        # Buildx --push handles the registry push directly, so no
+        # top-level 'images:' key is needed.
+        cloudbuild_content = """steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['run', '--privileged', 'multiarch/qemu-user-static', '--reset', '-p', 'yes']
+    id: 'qemu-setup'
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'create', '--use', '--name', 'multiarch-builder']
+    id: 'create-builder'
+    waitFor: ['qemu-setup']
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '--push', '.']
+    id: 'build-and-push'
+    waitFor: ['create-builder']
+options:
+  logging: CLOUD_LOGGING_ONLY
+  machineType: E2_HIGHCPU_32
+substitutions:
+  _IMAGE_PATH: ''
+  _PLATFORM: 'linux/amd64'
+"""
+    cloudbuild_path = os.path.join(source_dir, "cloudbuild.yaml")
+    with open(cloudbuild_path, "w") as f:
+        f.write(cloudbuild_content)
+
+    _RunCmd(
+        [
+            "gcloud",
+            "builds",
+            "submit",
+            source_dir,
+            f"--config={cloudbuild_path}",
+            f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}",
+            f"--project={project}",
+        ]
+    )
+
+
+def _RunCmd(cmd, cwd=None):
+    """Run a shell command, raising on failure."""
+    logger.info("  CMD: %s", " ".join(cmd))
+    env = os.environ.copy()
+    env["CLOUDSDK_AUTH_DISABLE_SSL_VALIDATION"] = "true"
+    proc = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        cwd=cwd,
+        timeout=2400,  # 40 min: allows for QEMU cross-arch builds
+        env=env,
+    )
+
+    if proc.returncode != 0:
+        raise RuntimeError(
+            f"Command failed (rc={proc.returncode}): {' '.join(cmd)}\n"
+            f"stderr: {proc.stderr[-500:]}"
+        )
+    return proc.stdout
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
new file mode 100644
index 0000000000..1bae7b41d4
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""Post-Teardown Cleanup for GKE Agentic Benchmarking.
+
+Cleans up infrastructure created by gke_prerequisites.py and DeploySnapshots():
+  - Delete Cloud Build service account + IAM bindings
+  - Delete GCS snapshot bucket
+  - Delete Artifact Registry repositories
+
+Run ONCE after all benchmarks are complete (after PKB Teardown has deleted the cluster):
+  python -m perfkitbenchmarker.linux_benchmarks.kubernetes.agentic.gke_post_teardown \
+      --project_id=<project> --region=<region>
+"""
+
+import argparse
+import logging
+import subprocess
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def _run(cmd, check=False, timeout=300):
+    logger.info("CMD: %s", " ".join(cmd))
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+    if check and result.returncode != 0:
+        logger.warning("Command failed (rc=%d): %s", result.returncode, result.stderr[-300:])
+    return result
+
+
+def revoke_cloudbuild_sa_permissions(project_id):
+    """Revoke extra IAM roles from Cloud Build SA(s).
+
+    Mirrors grant_cloudbuild_sa_permissions() from gke_prerequisites.py.
+    Revokes roles from both possible SAs. Does NOT delete them
+    (they are project-managed).
+    """
+    logger.info("=== Revoking extra permissions from Cloud Build SA(s) ===")
+    result = _run(["gcloud", "projects", "describe", project_id,
+                   "--format=value(projectNumber)"])
+    project_number = result.stdout.strip()
+    if not project_number:
+        logger.warning("Could not determine project number, skipping SA cleanup")
+        return
+    sa_emails = [
+        f"{project_number}@cloudbuild.gserviceaccount.com",
+        f"{project_number}-compute@developer.gserviceaccount.com",
+    ]
+    roles = ["roles/logging.logWriter", "roles/storage.objectViewer",
+             "roles/artifactregistry.writer", "roles/serviceusage.serviceUsageConsumer"]
+    for sa_email in sa_emails:
+        for role in roles:
+            _run(["gcloud", "projects", "remove-iam-policy-binding", project_id,
+                  f"--member=serviceAccount:{sa_email}", f"--role={role}", "--quiet"])
+    logger.info("Cloud Build SA extra permissions revoked.")
+
+
+def teardown_snapshot_bucket(project_id, region):
+    logger.info("=== Deleting Snapshot Bucket ===")
+    bucket_name = f"agent-sandbox-snapshots-{project_id}"
+    _run(["gcloud", "storage", "rm", f"gs://{bucket_name}/**",
+          f"--project={project_id}", "--quiet"])
+    _run(["gcloud", "storage", "buckets", "delete", f"gs://{bucket_name}",
+          f"--project={project_id}", "--quiet"])
+    logger.info("Snapshot bucket deleted.")
+
+
+def teardown_images(project_id, region):
+    logger.info("=== Deleting AR repos ===")
+    # "adk-repo" is created/deleted by PKB container_registry lifecycle
+    # (Provision creates it, Teardown deletes it). If you skip PKB Teardown,
+    # run: gcloud artifacts repositories delete adk-repo --location=<region>
+    # Only "agent-sandbox" (Chrome + Router images) needs manual cleanup here.
+    for repo in ["agent-sandbox"]:
+        _run(["gcloud", "artifacts", "repositories", "delete", repo,
+              f"--location={region}", f"--project={project_id}", "--quiet"])
+    logger.info("AR repos deleted.")
+
+
+def main():
+    p = argparse.ArgumentParser(description="GKE Agentic Benchmark Post-Teardown")
+    p.add_argument("--project_id", required=True, help="GCP project ID")
+    p.add_argument("--region", default="us-central1", help="GCP region")
+    p.add_argument("--keep_images", action="store_true", help="Skip AR repo deletion")
+    p.add_argument("--keep_bucket", action="store_true", help="Skip snapshot bucket deletion")
+    args = p.parse_args()
+    revoke_cloudbuild_sa_permissions(args.project_id)
+    if not args.keep_bucket:
+        teardown_snapshot_bucket(args.project_id, args.region)
+    if not args.keep_images:
+        teardown_images(args.project_id, args.region)
+    print("\nPost-teardown complete!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
new file mode 100644
index 0000000000..72c32d5b1f
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""Prerequisite Setup for GKE Agentic Benchmarking.
+
+Creates infrastructure that PKB cannot manage natively:
+  - Enable required GCP APIs
+  - Create Artifact Registry repositories
+  - Create Cloud Build service account + IAM bindings
+
+Run ONCE before PKB provisioning:
+  python -m perfkitbenchmarker.linux_benchmarks.kubernetes.agentic.gke_prerequisites \
+      --project_id=<project> --region=<region>
+"""
+
+import argparse
+import logging
+import os
+import subprocess
+import time
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def _run(cmd, check=True, timeout=300):
+    logger.info("CMD: %s", " ".join(cmd))
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+    if check and result.returncode != 0:
+        logger.error("Command failed (rc=%d): %s", result.returncode, result.stderr[-500:])
+        raise RuntimeError(f"Command failed: {cmd}")
+    return result
+
+
+def _exists(cmd):
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+    return result.returncode == 0
+
+
+def enable_apis(project_id):
+    logger.info("=== Enabling GCP APIs ===")
+    apis = [
+        "container.googleapis.com",
+        "artifactregistry.googleapis.com",
+        "cloudbuild.googleapis.com",
+        "aiplatform.googleapis.com",
+        "storage.googleapis.com",
+        "iam.googleapis.com",
+        "connectgateway.googleapis.com",
+        "gkehub.googleapis.com",
+        "gkeconnect.googleapis.com",
+        "iap.googleapis.com",
+    ]
+    _run(["gcloud", "services", "enable"] + apis + [f"--project={project_id}"])
+    logger.info("APIs enabled.")
+
+
+def create_artifact_registry(project_id, region):
+    logger.info("=== Creating Artifact Registry Repos ===")
+    # "adk-repo" is no longer needed here -- PKB creates its own AR repo
+    # via container_registry during the Provision stage.
+    # Only "agent-sandbox" is needed for Chrome/Router images.
+    for repo in ["agent-sandbox"]:
+        if _exists(["gcloud", "artifacts", "repositories", "describe", repo,
+                    f"--location={region}", f"--project={project_id}"]):
+            logger.info("AR repo %s already exists.", repo)
+            continue
+        _run(["gcloud", "artifacts", "repositories", "create", repo,
+              "--repository-format=docker",
+              f"--location={region}", f"--project={project_id}"])
+        logger.info("AR repo %s created.", repo)
+
+
+def grant_cloudbuild_sa_permissions(project_id):
+    """Grant required IAM roles to the Cloud Build service account(s).
+
+    Auto-detects which SA Cloud Build uses in this project:
+      - Legacy projects: {number}@cloudbuild.gserviceaccount.com
+      - Newer projects:  {number}-compute@developer.gserviceaccount.com
+
+    Grants permissions to both SAs to ensure compatibility regardless
+    of project configuration. This is idempotent and safe.
+    """
+    logger.info("=== Granting permissions to Cloud Build SA(s) ===")
+    result = _run(["gcloud", "projects", "describe", project_id,
+                   "--format=value(projectNumber)"])
+    project_number = result.stdout.strip()
+    if not project_number:
+        logger.error("Could not determine project number for %s", project_id)
+        return
+
+    # Both possible Cloud Build SAs
+    cloudbuild_sa = f"{project_number}@cloudbuild.gserviceaccount.com"
+    compute_sa = f"{project_number}-compute@developer.gserviceaccount.com"
+
+    # Detect which SA(s) exist
+    sa_emails = []
+    for sa in [cloudbuild_sa, compute_sa]:
+        if _exists(["gcloud", "iam", "service-accounts", "describe",
+                    sa, f"--project={project_id}"]):
+            sa_emails.append(sa)
+            logger.info("Found Cloud Build SA: %s", sa)
+        else:
+            logger.info("SA not found (skipping): %s", sa)
+
+    if not sa_emails:
+        logger.error("No Cloud Build SA found in project %s", project_id)
+        return
+
+    roles = [
+        "roles/logging.logWriter",
+        "roles/storage.objectViewer",
+        "roles/artifactregistry.writer",
+        "roles/serviceusage.serviceUsageConsumer",
+    ]
+    for sa_email in sa_emails:
+        logger.info("Granting roles to %s", sa_email)
+        for role in roles:
+            _run(["gcloud", "projects", "add-iam-policy-binding", project_id,
+                  f"--member=serviceAccount:{sa_email}",
+                  f"--role={role}", "--condition=None", "--quiet"], check=False)
+    logger.info("Cloud Build SA permissions granted.")
+
+
+
+
+def build_sandbox_images(project_id, region, target_arch):
+    """Build Chrome Sandbox and Sandbox Router images via Cloud Build."""
+    logger.info("=== Building Sandbox Images (arch=%s) ===", target_arch)
+    from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils
+
+    chrome_image = (
+        f"{region}-docker.pkg.dev/{project_id}/agent-sandbox/chrome-sandbox:{target_arch}"
+    )
+    router_image = (
+        f"{region}-docker.pkg.dev/{project_id}/agent-sandbox/sandbox-router:{target_arch}"
+    )
+
+    gke_image_build_utils._BuildChromeSandboxImage(
+        project=project_id,
+        region=region,
+        target_arch=target_arch,
+        image_path=chrome_image,
+    )
+
+    gke_image_build_utils._BuildSandboxRouterImage(
+        project=project_id,
+        region=region,
+        target_arch=target_arch,
+        image_path=router_image,
+    )
+
+    logger.info("Sandbox images built successfully.")
+    logger.info("  Chrome: %s", chrome_image)
+    logger.info("  Router: %s", router_image)
+
+def main():
+    p = argparse.ArgumentParser(description="GKE Agentic Benchmark Prerequisites")
+    p.add_argument("--project_id", required=True, help="GCP project ID")
+    p.add_argument("--region", default="us-central1", help="GCP region")
+    p.add_argument(
+        "--target_arch",
+        required=True,
+        choices=["amd64", "arm64"],
+        help="Target CPU architecture for container images (amd64 or arm64)",
+    )
+    p.add_argument(
+        "--skip_image_build",
+        action="store_true",
+        help="Skip Chrome and Router image builds (images already in registry)",
+    )
+    args = p.parse_args()
+    enable_apis(args.project_id)
+    create_artifact_registry(args.project_id, args.region)
+    grant_cloudbuild_sa_permissions(args.project_id)
+    if not args.skip_image_build:
+        build_sandbox_images(args.project_id, args.region, args.target_arch)
+    else:
+        logger.info("Skipping image builds (--skip_image_build)")
+    print("\nPrerequisite setup complete!")
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py
new file mode 100644
index 0000000000..e23aa32a6d
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py
@@ -0,0 +1,506 @@
+"""Shared utilities for GKE Agent Sandbox benchmarks.
+
+Provides helpers for agent API interaction, kubectl commands, warm pool
+management, and sample construction used by all GKE agent benchmark
+definitions.
+"""
+
+import json
+import logging
+import subprocess
+import time
+import urllib.request
+import urllib.error
+
+from absl import flags
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.resources.container_service import kubectl
+
+FLAGS = flags.FLAGS
+
+# Module-level benchmark_spec reference for metadata derivation.
+# Set by each benchmark's Run() via set_benchmark_spec().
+_current_benchmark_spec = None
+
+
+# ---------------------------------------------------------------------------
+# Shared flags (registered once; importable by benchmark modules)
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_string(
+    "k8s_namespace",
+    "agentic",
+    "Kubernetes namespace where the agentic workloads are deployed.",
+)
+
+flags.DEFINE_bool(
+    "k8s_gvisor",
+    True,
+    "Whether the sandbox node pool uses gVisor. Recorded in sample metadata.",
+)
+
+flags.DEFINE_string(
+    "k8s_benchmark_note",
+    "",
+    "Arbitrary note string attached to every sample for tagging runs.",
+)
+
+flags.DEFINE_string(
+    "k8s_agent_api_url",
+    "http://localhost:8080",
+    "Base URL of the ADK Agent API.",
+)
+
+flags.DEFINE_integer(
+    "k8s_agent_api_timeout",
+    600,
+    "HTTP timeout in seconds for agent API benchmark calls.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Agent API helpers
+# ---------------------------------------------------------------------------
+
+
+def GetAgentApiUrl():
+    """Return the base URL of the ADK agent API service."""
+    return FLAGS.k8s_agent_api_url.rstrip("/")
+
+
+def CheckAgentHealthz(api_url=None, required=True):
+    """Verify the agent API is reachable via /healthz.
+
+    Args:
+        api_url: Base URL to check. Defaults to FLAGS.k8s_agent_api_url.
+        required: If True (default), raise on failure. If False, log warning.
+    """
+    if api_url is None:
+        api_url = GetAgentApiUrl()
+    try:
+        req = urllib.request.Request(f"{api_url}/healthz")
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            logging.info("Agent healthz: %s", resp.read().decode())
+    except (urllib.error.URLError, urllib.error.HTTPError) as e:
+        msg = (
+            f"Agent API is not reachable at {api_url}/healthz: {e}\n"
+            "Hint: ensure kubectl port-forward is running "
+            "(kubectl port-forward svc/adk-agent -n <ns> 8080:80)."
+        )
+        if required:
+            raise RuntimeError(msg)
+        else:
+            logging.warning("Health check deferred (non-fatal): %s", msg)
+
+
+def CallAgentApi(endpoint, payload, timeout=None):
+    """POST JSON to an agent API endpoint and return the parsed response."""
+    if timeout is None:
+        timeout = FLAGS.k8s_agent_api_timeout
+    base_url = GetAgentApiUrl()
+    url = f"{base_url}{endpoint}"
+    data = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        url, data=data,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    logging.info("POST %s  payload=%s  timeout=%ds", url, payload, timeout)
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read().decode("utf-8")
+    except urllib.error.HTTPError as e:
+        body = e.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"Agent API returned HTTP {e.code}: {body[:500]}")
+    except urllib.error.URLError as e:
+        raise RuntimeError(f"Cannot reach agent API at {url}: {e.reason}")
+    try:
+        return json.loads(body)
+    except json.JSONDecodeError:
+        raise RuntimeError(f"Agent API returned non-JSON response:\n{body[:500]}")
+
+
+# ---------------------------------------------------------------------------
+# kubectl helpers
+# ---------------------------------------------------------------------------
+
+
+def RunKubectl(args, timeout=120, raise_on_failure=True):
+    """Run a kubectl command and return (stdout, stderr, retcode).
+
+    Delegates to PKB's native kubectl module which handles kubeconfig
+    and retries for transient connection errors automatically.
+    """
+    return kubectl.RunKubectlCommand(
+        list(args),
+        timeout=timeout,
+        raise_on_failure=raise_on_failure,
+    )
+
+
+def CountPods(namespace, label, phase=None):
+    """Count pods matching label (and optionally phase)."""
+    cmd = ["get", "pods", "-n", namespace, "-l", label, "-o", "name"]
+    if phase:
+        cmd += [f"--field-selector=status.phase={phase}"]
+    stdout, _, rc = RunKubectl(cmd, raise_on_failure=False)
+    if rc != 0 or not stdout:
+        return 0
+    return len(stdout.strip().splitlines())
+
+
+def PatchWarmPool(namespace, warmpool_name, replicas, label, wait_timeout=180):
+    """Patch SandboxWarmPool replicas and wait for pods to be ready."""
+    logging.info("Patching %s replicas -> %d", warmpool_name, replicas)
+    patch_json = json.dumps({"spec": {"replicas": replicas}})
+    RunKubectl([
+        "patch", "sandboxwarmpool", warmpool_name,
+        "-n", namespace, "--type=merge", f"-p={patch_json}",
+    ])
+    if replicas == 0:
+        return True
+    deadline = time.time() + wait_timeout
+    while time.time() < deadline:
+        running = CountPods(namespace, label, phase="Running")
+        logging.info("%d/%d warm pool pods Running", running, replicas)
+        if running >= replicas:
+            return True
+        time.sleep(3)
+    logging.warning("Timed out waiting for %d warm pool pods", replicas)
+    return False
+
+
+def DrainWarmPool(namespace, warmpool_name, label, timeout=120):
+    """Scale warm pool to 0 and wait for all pods to terminate."""
+    logging.info("Draining warm pool %s to 0", warmpool_name)
+    patch_json = json.dumps({"spec": {"replicas": 0}})
+    RunKubectl([
+        "patch", "sandboxwarmpool", warmpool_name,
+        "-n", namespace, "--type=merge", f"-p={patch_json}",
+    ], raise_on_failure=False)
+
+    # Delete lingering SandboxClaims that may prevent pod termination
+    RunKubectl([
+        "delete", "sandboxclaims", "--all",
+        "-n", namespace, "--ignore-not-found=true",
+    ], timeout=60, raise_on_failure=False)
+
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        remaining = CountPods(namespace, label)
+        if remaining == 0:
+            logging.info("Warm pool drained successfully")
+            return True
+        logging.info("Draining... %d pods remaining", remaining)
+        time.sleep(2)
+    logging.warning("Drain timed out, %d pods still present",
+                    CountPods(namespace, label))
+    return False
+
+
+def set_benchmark_spec(benchmark_spec):
+    """Store benchmark_spec for metadata derivation (called by Run())."""
+    global _current_benchmark_spec
+    _current_benchmark_spec = benchmark_spec
+
+
+
+
+# ---------------------------------------------------------------------------
+# Sample construction
+# ---------------------------------------------------------------------------
+
+
+def BuildMetadata(namespace, extra=None):
+    """Construct the common metadata dict for all samples."""
+    metadata = {
+        "namespace": namespace,
+        "gvisor": FLAGS.k8s_gvisor,
+    }
+    # Derive machine_type from benchmark_spec (set via set_benchmark_spec)
+    machine_type = None
+    if _current_benchmark_spec:
+        cluster = getattr(_current_benchmark_spec, 'container_cluster', None)
+        if cluster:
+            # Prefer sandbox nodepool machine_type over default pool
+            nodepools = getattr(cluster, 'nodepools', None)
+            if nodepools and isinstance(nodepools, dict):
+                sandbox_pool = nodepools.get('sandbox')
+                if sandbox_pool and hasattr(sandbox_pool, 'vm_spec'):
+                    machine_type = getattr(sandbox_pool.vm_spec, 'machine_type', None)
+            if not machine_type and hasattr(cluster, 'vm_spec'):
+                machine_type = getattr(cluster.vm_spec, 'machine_type', None)
+    if machine_type:
+        metadata["machine_type"] = machine_type
+    if FLAGS.k8s_benchmark_note:
+        metadata["note"] = FLAGS.k8s_benchmark_note
+    if extra:
+        metadata.update(extra)
+    return metadata
+
+
+def MakeSample(metric, value, unit, namespace, extra_metadata=None):
+    """Create a single sample.Sample with standard metadata."""
+    return sample.Sample(
+        metric=metric,
+        value=value,
+        unit=unit,
+        metadata=BuildMetadata(namespace, extra_metadata),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Port-forward flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_bool(
+    "k8s_auto_portforward",
+    True,
+    "Automatically manage kubectl port-forward to the agent service.",
+)
+
+flags.DEFINE_integer(
+    "k8s_portforward_local_port",
+    8080,
+    "Local port for kubectl port-forward.",
+)
+
+flags.DEFINE_integer(
+    "k8s_portforward_remote_port",
+    80,
+    "Remote service port for kubectl port-forward.",
+)
+
+flags.DEFINE_string(
+    "k8s_portforward_service",
+    "svc/adk-agent",
+    "Kubernetes service to port-forward to.",
+)
+
+flags.DEFINE_float(
+    "k8s_portforward_reconnect_delay",
+    1.0,
+    "Seconds to wait before reconnecting after port-forward drops.",
+)
+
+flags.DEFINE_float(
+    "k8s_portforward_health_timeout",
+    30.0,
+    "Seconds to wait for agent health check after starting port-forward.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Port-forward manager
+# ---------------------------------------------------------------------------
+
+import atexit
+import os as _os
+import signal
+import threading
+
+
+_PID_FILE = "/tmp/pkb_portforward.pid"
+
+
+class _PortForwardManager:
+    """Manages a kubectl port-forward subprocess with auto-reconnect.
+
+    Mimics the shell pattern:
+        while true; do
+          kubectl port-forward svc/adk-agent -n agentic 8080:80
+          echo "Reconnecting..."
+          sleep 1
+        done
+
+    Thread-safe. Idempotent start/stop. Cleans up orphans via PID file.
+    """
+
+    def __init__(self):
+        self._proc = None
+        self._thread = None
+        self._stop_event = threading.Event()
+        self._lock = threading.Lock()
+        self._started = False
+
+    @property
+    def is_running(self):
+        return self._started and not self._stop_event.is_set()
+
+    def start(self):
+        """Start the port-forward loop (idempotent)."""
+        with self._lock:
+            if self._started and not self._stop_event.is_set():
+                if self._proc and self._proc.poll() is None:
+                    return
+                return
+
+            self._kill_orphan()
+            self._stop_event.clear()
+            self._started = True
+            self._thread = threading.Thread(
+                target=self._loop, daemon=True, name="pkb-portforward"
+            )
+            self._thread.start()
+
+    def stop(self):
+        """Stop the port-forward loop and kill the subprocess."""
+        with self._lock:
+            if not self._started:
+                return
+            self._stop_event.set()
+            self._kill_proc()
+            self._started = False
+            self._cleanup_pid_file()
+
+    def _loop(self):
+        """Background reconnect loop."""
+        ns = FLAGS.k8s_namespace
+        svc = FLAGS.k8s_portforward_service
+        local_port = FLAGS.k8s_portforward_local_port
+        remote_port = FLAGS.k8s_portforward_remote_port
+        delay = FLAGS.k8s_portforward_reconnect_delay
+
+        cmd = ["kubectl"]
+        if FLAGS.kubeconfig:
+            cmd += ["--kubeconfig", FLAGS.kubeconfig]
+        cmd += [
+            "port-forward", svc,
+            "-n", ns,
+            f"{local_port}:{remote_port}",
+        ]
+
+        while not self._stop_event.is_set():
+            logging.info("Starting port-forward: %s", " ".join(cmd))
+            try:
+                self._proc = subprocess.Popen(
+                    cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
+                self._write_pid_file(self._proc.pid)
+
+                while not self._stop_event.is_set():
+                    retcode = self._proc.poll()
+                    if retcode is not None:
+                        break
+                    self._stop_event.wait(timeout=0.5)
+
+            except Exception as e:
+                logging.warning("Port-forward error: %s", e)
+
+            if not self._stop_event.is_set():
+                logging.info(
+                    "Port-forward disconnected. Reconnecting in %.1fs...", delay
+                )
+                self._stop_event.wait(timeout=delay)
+
+    def _kill_proc(self):
+        """Kill the current subprocess if alive."""
+        if self._proc and self._proc.poll() is None:
+            try:
+                self._proc.terminate()
+                self._proc.wait(timeout=5)
+            except Exception:
+                try:
+                    self._proc.kill()
+                except Exception:
+                    pass
+        self._proc = None
+
+    def _write_pid_file(self, pid):
+        """Write PID to file for orphan detection."""
+        try:
+            with open(_PID_FILE, "w") as f:
+                f.write(str(pid))
+        except Exception:
+            pass
+
+    def _cleanup_pid_file(self):
+        """Remove PID file."""
+        try:
+            _os.unlink(_PID_FILE)
+        except OSError:
+            pass
+
+    def _kill_orphan(self):
+        """Kill a port-forward process left by a previous PKB run."""
+        try:
+            if _os.path.exists(_PID_FILE):
+                with open(_PID_FILE, "r") as f:
+                    pid = int(f.read().strip())
+                logging.info("Killing orphan port-forward (PID %d)", pid)
+                _os.kill(pid, signal.SIGTERM)
+                import time as _time
+                _time.sleep(0.5)
+                try:
+                    _os.kill(pid, signal.SIGKILL)
+                except OSError:
+                    pass
+                self._cleanup_pid_file()
+        except (OSError, ValueError):
+            self._cleanup_pid_file()
+
+        local_port = FLAGS.k8s_portforward_local_port
+        try:
+            result = subprocess.run(
+                ["lsof", "-ti", f":{local_port}"],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                for pid_str in result.stdout.strip().split():
+                    try:
+                        pid = int(pid_str)
+                        _os.kill(pid, signal.SIGTERM)
+                        logging.info("Killed process %d on port %d", pid, local_port)
+                    except (OSError, ValueError):
+                        pass
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            pass
+
+
+# Singleton instance
+_port_forward_manager = _PortForwardManager()
+
+# Ensure cleanup on interpreter exit
+atexit.register(_port_forward_manager.stop)
+
+
+def EnsurePortForward():
+    """Start port-forward if auto_portforward is enabled (idempotent).
+
+    Blocks until the agent health check passes or timeout is reached.
+    Safe to call multiple times - only starts one background loop.
+    """
+    if not FLAGS.k8s_auto_portforward:
+        logging.info("Auto port-forward disabled (--k8s_auto_portforward=false)")
+        return
+
+    _port_forward_manager.start()
+
+    import time as _time
+    timeout = FLAGS.k8s_portforward_health_timeout
+    deadline = _time.time() + timeout
+    api_url = GetAgentApiUrl()
+
+    while _time.time() < deadline:
+        try:
+            req = urllib.request.Request(f"{api_url}/healthz")
+            with urllib.request.urlopen(req, timeout=3) as resp:
+                logging.info("Port-forward healthy: %s", resp.read().decode())
+                return
+        except Exception:
+            _time.sleep(1)
+
+    logging.warning(
+        "Port-forward health check did not pass within %.0fs. "
+        "Continuing anyway (Run() will fail if agent is unreachable).",
+        timeout,
+    )
+
+
+def StopPortForward():
+    """Stop the port-forward subprocess and clean up."""
+    _port_forward_manager.stop()
+    logging.info("Port-forward stopped.")
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
new file mode 100644
index 0000000000..bd9114877c
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
@@ -0,0 +1,284 @@
+"""PKB Benchmark: GKE Agent Chromium Density Saturation .
+
+Atomic single-point measurement of Chromium browser sandbox density on a
+pre-provisioned GKE cluster with gVisor isolation. Measures interaction
+latency, screenshot generation time, cold start, navigation, evaluation,
+fill, click latencies, and RSS memory at a given concurrent session count.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the density parameter across iterations to find
+the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_chromium_density \\
+                --k8s_chromium_density_concurrent_sessions=4 \\
+                --k8s_chromium_density_task_count=10 \\
+                --k8s_chromium_density_warmup_tasks=5 \\
+                --k8s_namespace=agentic \\
+                --k8s_agent_api_url=http://localhost:8080
+
+Samples emitted (per run):
+  - gke_chromium_density_interaction_mean      (ms)
+  - gke_chromium_density_interaction_p95       (ms)
+  - gke_chromium_density_navigate_mean         (ms)
+  - gke_chromium_density_navigate_p95          (ms)
+  - gke_chromium_density_evaluate_mean         (ms)
+  - gke_chromium_density_evaluate_p95          (ms)
+  - gke_chromium_density_fill_mean             (ms)
+  - gke_chromium_density_fill_p95              (ms)
+  - gke_chromium_density_click_mean            (ms)
+  - gke_chromium_density_click_p95             (ms)
+  - gke_chromium_density_screenshot_mean       (ms)
+  - gke_chromium_density_screenshot_p95        (ms)
+  - gke_chromium_density_cold_start_mean       (ms)
+  - gke_chromium_density_cold_start_p95        (ms)
+  - gke_chromium_density_rss_end               (MB)
+  - gke_chromium_density_rss_growth            (MB)
+  - gke_chromium_density_wall_time             (seconds)
+"""
+
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    k8s_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "k8s_chromium_density"
+BENCHMARK_CONFIG = """
+k8s_chromium_density:
+  description: >
+    Atomic single-point Chromium browser sandbox density measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "chromium-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=chromium-sandbox-example"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "k8s_chromium_density_concurrent_sessions",
+    1,
+    "Number of concurrent Chromium browser sessions to run.",
+)
+
+flags.DEFINE_integer(
+    "k8s_chromium_density_task_count",
+    10,
+    "Number of browser task iterations per Chromium session.",
+)
+
+flags.DEFINE_integer(
+    "k8s_chromium_density_warmup_tasks",
+    5,
+    "Number of warmup iterations per session (excluded from stats).",
+)
+
+flags.DEFINE_bool(
+    "k8s_chromium_density_patch_warmpool",
+    True,
+    "Patch SandboxWarmPool replicas to match density before measurement.",
+)
+
+flags.DEFINE_integer(
+    "k8s_chromium_density_exec_timeout",
+    120,
+    "Sandbox command execution timeout in seconds.",
+)
+
+flags.DEFINE_integer(
+    "k8s_chromium_density_provision_timeout",
+    300,
+    "Max seconds to wait for warm pool pods to reach Running.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads(benchmark_spec)
+    utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single Chromium density measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    density = FLAGS.k8s_chromium_density_concurrent_sessions
+
+    logging.info("=== Run: chromium_density=%d ===", density)
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Patch warm pool (moved from Prepare for sweep compatibility)
+    if FLAGS.k8s_chromium_density_patch_warmpool:
+        utils.PatchWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            replicas=density,
+            label=_WARMPOOL_LABEL,
+            wait_timeout=FLAGS.k8s_chromium_density_provision_timeout,
+        )
+
+    # POST to agent API
+    payload = {
+        "task_count": FLAGS.k8s_chromium_density_task_count,
+        "warmup_tasks": FLAGS.k8s_chromium_density_warmup_tasks,
+        "concurrent_sessions": density,
+        "sandbox_exec_timeout_s": FLAGS.k8s_chromium_density_exec_timeout,
+    }
+
+    t0 = time.time()
+    result = utils.CallAgentApi("/benchmark/chromium/density", payload)
+    wall_time = time.time() - t0
+
+    successful = result.get("successful_sessions", 0)
+    failed = result.get("failed_sessions", 0)
+    agg = result.get("aggregate", {})
+
+    logging.info(
+        "API response: %d successful, %d failed sessions (%.1fs)",
+        successful,
+        failed,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "density": density,
+        "successful_sessions": successful,
+        "failed_sessions": failed,
+        "task_count": FLAGS.k8s_chromium_density_task_count,
+        "warmup_tasks": FLAGS.k8s_chromium_density_warmup_tasks,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # Per-task-type latency: mean and P95 for each
+    _emit(samples, agg, "interaction_mean_ms", "interaction_mean", "ms", ns, extra)
+    _emit(samples, agg, "interaction_p95_ms", "interaction_p95", "ms", ns, extra)
+    _emit(samples, agg, "navigate_mean_ms", "navigate_mean", "ms", ns, extra)
+    _emit(samples, agg, "navigate_p95_ms", "navigate_p95", "ms", ns, extra)
+    _emit(samples, agg, "evaluate_mean_ms", "evaluate_mean", "ms", ns, extra)
+    _emit(samples, agg, "evaluate_p95_ms", "evaluate_p95", "ms", ns, extra)
+    _emit(samples, agg, "fill_mean_ms", "fill_mean", "ms", ns, extra)
+    _emit(samples, agg, "fill_p95_ms", "fill_p95", "ms", ns, extra)
+    _emit(samples, agg, "click_mean_ms", "click_mean", "ms", ns, extra)
+    _emit(samples, agg, "click_p95_ms", "click_p95", "ms", ns, extra)
+    _emit(samples, agg, "screenshot_mean_ms", "screenshot_mean", "ms", ns, extra)
+    _emit(samples, agg, "screenshot_p95_ms", "screenshot_p95", "ms", ns, extra)
+    _emit(samples, agg, "cold_start_mean_ms", "cold_start_mean", "ms", ns, extra)
+    _emit(samples, agg, "cold_start_p95_ms", "cold_start_p95", "ms", ns, extra)
+
+    # RSS memory
+    _emit(samples, agg, "rss_end_mb", "rss_end", "MB", ns, extra)
+    _emit(samples, agg, "rss_growth_mb", "rss_growth", "MB", ns, extra)
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for chromium_density=%d.", len(samples), density)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up after measurement. Delete claims and drain warm pool."""
+    ns = FLAGS.k8s_namespace
+    logging.info("Cleanup: deleting SandboxClaims and draining warm pool.")
+
+    # Delete any lingering SandboxClaims to release claimed pods
+    utils.RunKubectl(
+        [
+            "delete",
+            "sandboxclaims",
+            "--all",
+            "-n",
+            ns,
+            "--ignore-not-found=true",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+    # Drain warm pool to 0
+    utils.DrainWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        label=_WARMPOOL_LABEL,
+    )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete (cluster persists).")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the aggregate dict.
+
+    Args:
+        samples: List to append the new sample.Sample to.
+        agg: Aggregate metrics dict returned by the agent API response.
+        agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms").
+        metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric
+            name (e.g. "orchestrator_cel_mean").
+        unit: Unit string for the sample (e.g. "ms", "MB", "seconds").
+        namespace: Kubernetes namespace (included in sample metadata).
+        extra: Dict of additional metadata key-value pairs attached to
+            every sample (density, session counts, wall time, etc.).
+    """
+    value = agg.get(agg_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
new file mode 100644
index 0000000000..418b5c1ed9
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
@@ -0,0 +1,481 @@
+"""PKB Benchmark: GKE Agent Deletion & Cleanup .
+
+Atomic single-point measurement of bulk deletion efficiency and IP
+reclamation on a pre-provisioned GKE cluster with gVisor isolation.
+Provisions N sandbox pods via SandboxWarmPool, then bulk-deletes them
+and measures per-pod deletion latency, aggregate deletion stats, and
+IP address reclamation timing.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the batch_size parameter across iterations to find
+the deletion saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_deletion \\
+                --k8s_deletion_batch_size=100 \\
+                --k8s_deletion_warmpool_name=python-sandbox-warmpool \\
+                --k8s_deletion_pod_label=sandbox=python-sandbox-example \\
+                --k8s_deletion_poll_interval_s=1.0 \\
+                --k8s_deletion_provision_timeout_s=120.0 \\
+                --k8s_deletion_drain_timeout_s=300.0 \\
+                --k8s_namespace=agentic \\
+                --gke_machine_type=c4-standard-8
+
+Samples emitted (per run):
+  - gke_deletion_provision_time              (seconds)
+  - gke_deletion_total_drain_time            (seconds)
+  - gke_deletion_latency_p50                 (seconds)
+  - gke_deletion_latency_p95                 (seconds)
+  - gke_deletion_latency_p99                 (seconds)
+  - gke_deletion_latency_max                 (seconds)
+  - gke_deletion_rate                        (pods/sec)
+  - gke_deletion_ip_before                   (count)
+  - gke_deletion_ip_after                    (count)
+  - gke_deletion_ip_reclaim_time             (seconds)
+  - gke_deletion_final_running_count         (count)
+  - gke_deletion_wall_time                   (seconds)
+"""
+
+import json
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    k8s_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "k8s_deletion"
+BENCHMARK_CONFIG = """
+k8s_deletion:
+  description: >
+    Atomic single-point bulk deletion and IP reclamation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "k8s_deletion_batch_size",
+    100,
+    "Number of sandbox pods to provision then bulk-delete.",
+)
+
+flags.DEFINE_string(
+    "k8s_deletion_warmpool_name",
+    "python-sandbox-warmpool",
+    "SandboxWarmPool resource name.",
+)
+
+flags.DEFINE_string(
+    "k8s_deletion_pod_label",
+    "sandbox=python-sandbox-example",
+    "Label selector for warm pool pods.",
+)
+
+flags.DEFINE_float(
+    "k8s_deletion_poll_interval_s",
+    1.0,
+    "Seconds between kubectl polls during deletion.",
+)
+
+flags.DEFINE_float(
+    "k8s_deletion_provision_timeout_s",
+    120.0,
+    "Max seconds to wait for pods to reach Running before deletion.",
+)
+
+flags.DEFINE_float(
+    "k8s_deletion_drain_timeout_s",
+    300.0,
+    "Max seconds to wait for all pods to terminate after scale-to-0.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads onto the cluster."""
+    benchmark_spec.always_call_cleanup = True
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads(benchmark_spec)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Provision N pods, bulk-delete, measure deletion latency and IP reclamation.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    batch_size = FLAGS.k8s_deletion_batch_size
+    warmpool_name = FLAGS.k8s_deletion_warmpool_name
+    label = FLAGS.k8s_deletion_pod_label
+    poll_interval = FLAGS.k8s_deletion_poll_interval_s
+    provision_timeout = FLAGS.k8s_deletion_provision_timeout_s
+    drain_timeout = FLAGS.k8s_deletion_drain_timeout_s
+
+    logging.info("=== Run: batch_size=%d ===", batch_size)
+
+    # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility)
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(drain_timeout))
+    time.sleep(2)
+
+    t_wall_start = time.time()
+
+    # 1. Provision N pods
+    logging.info("Provisioning %d pods...", batch_size)
+    provision_start = time.time()
+    _PatchReplicas(ns, warmpool_name, batch_size)
+
+    deadline = time.time() + provision_timeout
+    while time.time() < deadline:
+        running = utils.CountPods(ns, label, phase="Running")
+        pct = (running / batch_size * 100) if batch_size > 0 else 0
+        logging.info("Provisioning... %d/%d (%.0f%%)", running, batch_size, pct)
+        if running >= batch_size:
+            break
+        time.sleep(3)
+
+    provision_time = time.time() - provision_start
+    final_running = utils.CountPods(ns, label, phase="Running")
+
+    logging.info(
+        "Provisioned %d/%d pods in %.1fs",
+        final_running,
+        batch_size,
+        provision_time,
+    )
+
+    # If not all pods reached Running, this is a failure
+    if final_running < batch_size:
+        raise RuntimeError(
+            f"Provisioning failed: only {final_running}/{batch_size} pods "
+            f"reached Running within {provision_timeout}s"
+        )
+
+    # 2. Record pod names and IP count before deletion
+    pod_names_before = set(_GetPodNames(ns, label))
+    ip_before = _CountAllocatedIPs(ns, label)
+
+    logging.info(
+        "Recorded %d pods, %d IPs allocated",
+        len(pod_names_before),
+        ip_before,
+    )
+
+    # Brief settle
+    time.sleep(1)
+
+    # 3. Bulk delete: scale to 0
+    logging.info("Scaling to 0 (bulk delete of %d pods)...", len(pod_names_before))
+    _PatchReplicas(ns, warmpool_name, 0)
+
+    # 4. Poll: track pod disappearance and IP reclamation
+    t_delete = time.time()
+    deadline_drain = t_delete + drain_timeout
+    pod_gone_times = {}  # pod_name -> elapsed_s when first absent
+    ip_reclaim_time = None
+
+    while time.time() < deadline_drain:
+        elapsed = time.time() - t_delete
+
+        # Current pod names still present
+        current_pods = set(_GetPodNames(ns, label))
+        remaining = len(current_pods)
+
+        # Track which pods have disappeared
+        gone_now = pod_names_before - current_pods
+        for pn in gone_now:
+            if pn not in pod_gone_times:
+                pod_gone_times[pn] = elapsed
+
+        # IP count (scoped to warm pool label)
+        ips = _CountAllocatedIPs(ns, label)
+        if ip_reclaim_time is None and ips == 0:
+            ip_reclaim_time = elapsed
+
+        deleted = len(pod_names_before) - remaining
+        pct = (deleted / len(pod_names_before) * 100) if pod_names_before else 0
+        logging.info(
+            "[%.1fs] Deleted: %d/%d (%.0f%%)  IPs: %d",
+            elapsed,
+            deleted,
+            len(pod_names_before),
+            pct,
+            ips,
+        )
+
+        if remaining == 0:
+            break
+
+        time.sleep(poll_interval)
+
+    total_drain_time = time.time() - t_delete
+
+    # Pods we never saw disappear (stuck) get the full drain time
+    for pn in pod_names_before:
+        if pn not in pod_gone_times:
+            pod_gone_times[pn] = total_drain_time
+
+    # 5. Compute per-pod deletion latencies
+    deletion_latencies = sorted(pod_gone_times.values())
+    n = len(deletion_latencies)
+
+    ip_after = _CountAllocatedIPs(ns, label)
+    deletion_rate = (
+        (len(pod_names_before) / total_drain_time) if total_drain_time > 0 else 0
+    )
+
+    logging.info(
+        "Drain complete: %.1fs, rate=%.1f pods/sec, IPs: %d->%d",
+        total_drain_time,
+        deletion_rate,
+        ip_before,
+        ip_after,
+    )
+
+    wall_time = time.time() - t_wall_start
+
+    # 6. Build samples
+    extra = {
+        "batch_size": batch_size,
+        "final_running_count": final_running,
+        "ip_before": ip_before,
+        "ip_after": ip_after,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_provision_time",
+            round(provision_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_drain_time",
+            round(total_drain_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    if n > 0:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_p50",
+                round(_Percentile(deletion_latencies, 50), 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_p95",
+                round(_Percentile(deletion_latencies, 95), 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_p99",
+                round(_Percentile(deletion_latencies, 99), 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_max",
+                round(deletion_latencies[-1], 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_rate",
+            round(deletion_rate, 2),
+            "pods/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_ip_before",
+            float(ip_before),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_ip_after",
+            float(ip_after),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    if ip_reclaim_time is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ip_reclaim_time",
+                round(ip_reclaim_time, 2),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_final_running_count",
+            float(final_running),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for batch_size=%d.", len(samples), batch_size)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Best-effort drain of warm pool after measurement."""
+    ns = FLAGS.k8s_namespace
+    warmpool_name = FLAGS.k8s_deletion_warmpool_name
+    label = FLAGS.k8s_deletion_pod_label
+
+    logging.info("Cleanup: draining warm pool to 0.")
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_deletion_drain_timeout_s))
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _PatchReplicas(namespace, warmpool_name, replicas):
+    """Patch SandboxWarmPool to a specific replica count."""
+    patch_json = json.dumps({"spec": {"replicas": replicas}})
+    utils.RunKubectl(
+        [
+            "patch",
+            "sandboxwarmpool",
+            warmpool_name,
+            "-n",
+            namespace,
+            "--type=merge",
+            f"-p={patch_json}",
+        ],
+        raise_on_failure=False,
+    )
+
+
+def _GetPodNames(namespace, label):
+    """Return list of pod names matching the label selector."""
+    stdout, _, rc = utils.RunKubectl(
+        [
+            "get",
+            "pods",
+            "-n",
+            namespace,
+            "-l",
+            label,
+            "-o",
+            "jsonpath={.items[*].metadata.name}",
+        ],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if rc != 0 or not stdout:
+        return []
+    return stdout.split()
+
+
+def _CountAllocatedIPs(namespace, label):
+    """Count pod IPs currently allocated for pods matching the label.
+
+    Scoped to the warm pool label to accurately measure IPAM release
+    efficiency for the specific pods being deleted.
+    """
+    stdout, _, rc = utils.RunKubectl(
+        [
+            "get",
+            "pods",
+            "-n",
+            namespace,
+            "-l",
+            label,
+            "-o",
+            "jsonpath={.items[*].status.podIP}",
+        ],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if rc != 0 or not stdout:
+        return 0
+    return len([ip for ip in stdout.split() if ip])
+
+
+def _Percentile(sorted_values, pct):
+    """Calculate percentile (0-100) with linear interpolation."""
+    if not sorted_values:
+        return 0.0
+    idx = (pct / 100) * (len(sorted_values) - 1)
+    lo = int(idx)
+    hi = min(lo + 1, len(sorted_values) - 1)
+    frac = idx - lo
+    return sorted_values[lo] * (1 - frac) + sorted_values[hi] * frac
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
new file mode 100644
index 0000000000..109ab0efe6
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
@@ -0,0 +1,617 @@
+"""PKB Benchmark: GKE Agent Payload Transfer Saturation .
+
+Atomic single-point measurement of payload transfer latency from a gVisor
+sandbox back to the orchestrator on a pre-provisioned GKE cluster.  Measures
+generation time, serialization time, stdout write time, total transfer time,
+throughput, and RSS at a given payload_size_mb and concurrent_sessions count.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the payload_size_mb parameter across iterations to
+find the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_payload \
+                --k8s_payload_size_mb=50 \
+                --k8s_payload_iterations=20 \
+                --k8s_payload_concurrent_sessions=5 \
+                --k8s_namespace=agentic \
+                --k8s_agent_api_url=http://localhost:8080
+
+Samples emitted (per run):
+  - gke_payload_orchestrator_transfer_mean       (ms)
+  - gke_payload_orchestrator_transfer_p50        (ms)
+  - gke_payload_orchestrator_transfer_p95        (ms)
+  - gke_payload_orchestrator_transfer_p99        (ms)
+  - gke_payload_orchestrator_transfer_min        (ms)
+  - gke_payload_orchestrator_transfer_max        (ms)
+  - gke_payload_sandbox_payload_size_bytes       (bytes)
+  - gke_payload_sandbox_payload_encoded_size_bytes (bytes)
+  - gke_payload_sandbox_payload_iterations       (count)
+  - gke_payload_sandbox_generation_time_mean     (ms)
+  - gke_payload_sandbox_generation_time_p50      (ms)
+  - gke_payload_sandbox_generation_time_p95      (ms)
+  - gke_payload_sandbox_generation_time_p99      (ms)
+  - gke_payload_sandbox_generation_time_min      (ms)
+  - gke_payload_sandbox_generation_time_max      (ms)
+  - gke_payload_sandbox_serialization_time_mean  (ms)
+  - gke_payload_sandbox_serialization_time_p50   (ms)
+  - gke_payload_sandbox_serialization_time_p95   (ms)
+  - gke_payload_sandbox_serialization_time_p99   (ms)
+  - gke_payload_sandbox_serialization_time_min   (ms)
+  - gke_payload_sandbox_serialization_time_max   (ms)
+  - gke_payload_sandbox_stdout_time_mean         (ms)
+  - gke_payload_sandbox_stdout_time_p50          (ms)
+  - gke_payload_sandbox_stdout_time_p95          (ms)
+  - gke_payload_sandbox_stdout_time_p99          (ms)
+  - gke_payload_sandbox_stdout_time_min          (ms)
+  - gke_payload_sandbox_stdout_time_max          (ms)
+  - gke_payload_sandbox_transfer_time_mean       (ms)
+  - gke_payload_sandbox_transfer_time_p50        (ms)
+  - gke_payload_sandbox_transfer_time_p95        (ms)
+  - gke_payload_sandbox_transfer_time_p99        (ms)
+  - gke_payload_sandbox_transfer_time_min        (ms)
+  - gke_payload_sandbox_transfer_time_max        (ms)
+  - gke_payload_sandbox_throughput_mean           (MB/s)
+  - gke_payload_sandbox_throughput_p50            (MB/s)
+  - gke_payload_sandbox_throughput_min            (MB/s)
+  - gke_payload_sandbox_rss_start                (MB)
+  - gke_payload_sandbox_rss_end                  (MB)
+  - gke_payload_sandbox_rss_growth               (MB)
+  - gke_payload_wall_time                        (seconds)
+"""
+
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    k8s_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "k8s_payload"
+BENCHMARK_CONFIG = """
+k8s_payload:
+  description: >
+    Atomic single-point payload transfer saturation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "python-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=python-sandbox-example"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_float(
+    "k8s_payload_size_mb",
+    1.0,
+    "Payload size in megabytes to transfer from the sandbox.",
+)
+
+flags.DEFINE_integer(
+    "k8s_payload_iterations",
+    20,
+    "Number of transfer iterations per sandbox session.",
+)
+
+flags.DEFINE_integer(
+    "k8s_payload_concurrent_sessions",
+    5,
+    "Number of parallel sandbox sessions.",
+)
+
+flags.DEFINE_integer(
+    "k8s_payload_exec_timeout",
+    300,
+    "Sandbox command execution timeout in seconds.",
+)
+
+flags.DEFINE_bool(
+    "k8s_payload_patch_warmpool",
+    True,
+    "Patch SandboxWarmPool replicas to match concurrent_sessions before measurement.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads(benchmark_spec)
+    utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single payload transfer measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    payload_size_mb = FLAGS.k8s_payload_size_mb
+    iterations = FLAGS.k8s_payload_iterations
+    concurrent = FLAGS.k8s_payload_concurrent_sessions
+
+    logging.info(
+        "=== Run: payload_size_mb=%s, iterations=%d, concurrent=%d ===",
+        payload_size_mb,
+        iterations,
+        concurrent,
+    )
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Patch warm pool (moved from Prepare for sweep compatibility)
+    if FLAGS.k8s_payload_patch_warmpool:
+        utils.PatchWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            replicas=concurrent,
+            label=_WARMPOOL_LABEL,
+        )
+
+    # POST to agent API
+    payload = {
+        "payload_size_mb": payload_size_mb,
+        "payload_iterations": iterations,
+        "concurrent_sessions": concurrent,
+        "sandbox_exec_timeout_s": FLAGS.k8s_payload_exec_timeout,
+    }
+
+    t0 = time.time()
+    result = utils.CallAgentApi("/benchmark/python/payload", payload)
+    wall_time = time.time() - t0
+
+    successful = result.get("successful_sessions", 0)
+    failed = result.get("failed_sessions", 0)
+    agg = result.get("aggregate", {})
+
+    logging.info(
+        "API response: %d successful, %d failed sessions (%.1fs)",
+        successful,
+        failed,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "payload_size_mb": payload_size_mb,
+        "payload_iterations": iterations,
+        "concurrent_sessions": concurrent,
+        "successful_sessions": successful,
+        "failed_sessions": failed,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # Orchestrator-side transfer latency
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_mean_ms",
+        "orchestrator_transfer_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_p50_ms",
+        "orchestrator_transfer_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_p95_ms",
+        "orchestrator_transfer_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_p99_ms",
+        "orchestrator_transfer_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_min_ms",
+        "orchestrator_transfer_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_max_ms",
+        "orchestrator_transfer_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Payload metadata
+    _emit(
+        samples,
+        agg,
+        "sandbox_payload_size_bytes",
+        "sandbox_payload_size_bytes",
+        "bytes",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_payload_encoded_size_bytes",
+        "sandbox_payload_encoded_size_bytes",
+        "bytes",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_payload_iterations",
+        "sandbox_payload_iterations",
+        "count",
+        ns,
+        extra,
+    )
+
+    # Generation time (os.urandom)
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_mean_ms",
+        "sandbox_generation_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_p50_ms",
+        "sandbox_generation_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_p95_ms",
+        "sandbox_generation_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_p99_ms",
+        "sandbox_generation_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_min_ms",
+        "sandbox_generation_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_max_ms",
+        "sandbox_generation_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Serialization time (base64 encode)
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_mean_ms",
+        "sandbox_serialization_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_p50_ms",
+        "sandbox_serialization_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_p95_ms",
+        "sandbox_serialization_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_p99_ms",
+        "sandbox_serialization_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_min_ms",
+        "sandbox_serialization_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_max_ms",
+        "sandbox_serialization_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Stdout write time (gVisor Gofer write syscall)
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_mean_ms",
+        "sandbox_stdout_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_p50_ms",
+        "sandbox_stdout_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_p95_ms",
+        "sandbox_stdout_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_p99_ms",
+        "sandbox_stdout_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_min_ms",
+        "sandbox_stdout_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_max_ms",
+        "sandbox_stdout_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Transfer time (serialization + stdout write — threshold metric)
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_mean_ms",
+        "sandbox_transfer_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_p50_ms",
+        "sandbox_transfer_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_p95_ms",
+        "sandbox_transfer_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_p99_ms",
+        "sandbox_transfer_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_min_ms",
+        "sandbox_transfer_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_max_ms",
+        "sandbox_transfer_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Throughput
+    _emit(
+        samples,
+        agg,
+        "sandbox_throughput_mean_mbps",
+        "sandbox_throughput_mean",
+        "MB/s",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_throughput_p50_mbps",
+        "sandbox_throughput_p50",
+        "MB/s",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_throughput_min_mbps",
+        "sandbox_throughput_min",
+        "MB/s",
+        ns,
+        extra,
+    )
+
+    # RSS
+    _emit(samples, agg, "sandbox_rss_start_mb", "sandbox_rss_start", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_end_mb", "sandbox_rss_end", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_growth_mb", "sandbox_rss_growth", "MB", ns, extra)
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info(
+        "Emitted %d samples for payload_size_mb=%s.", len(samples), payload_size_mb
+    )
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up after measurement. Scale warm pool to 0."""
+    ns = FLAGS.k8s_namespace
+    logging.info("Cleanup: draining warm pool.")
+
+    utils.DrainWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        label=_WARMPOOL_LABEL,
+    )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete (cluster persists).")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the aggregate dict.
+
+    Args:
+        samples: List to append the new sample.Sample to.
+        agg: Aggregate metrics dict returned by the agent API response.
+        agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms").
+        metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric
+            name (e.g. "orchestrator_cel_mean").
+        unit: Unit string for the sample (e.g. "ms", "MB", "seconds").
+        namespace: Kubernetes namespace (included in sample metadata).
+        extra: Dict of additional metadata key-value pairs attached to
+            every sample (density, session counts, wall time, etc.).
+    """
+    value = agg.get(agg_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
new file mode 100644
index 0000000000..7760f23ff7
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
@@ -0,0 +1,378 @@
+"""PKB Benchmark: GKE Agent Python Sandbox Density .
+
+Atomic single-point measurement of Python sandbox density on a
+pre-provisioned GKE cluster with gVisor isolation. Measures Code Execution
+Latency (CEL), Time To First Execution (TTFE), RSS memory growth, and
+per-type latency breakdown (compute, syscall, import) at a given
+concurrent session count.
+
+Workflow per session:
+  1. Claim a pre-warmed sandbox pod from the SandboxWarmPool
+  2. Upload and execute the benchmark script inside the gVisor sandbox
+  3. Run `sample_warmup` iterations (results discarded - stabilizes caches)
+  4. Run `sample_count` measured iterations (results recorded)
+  5. Report TTFE, per-iteration CEL, RSS, and per-task-type breakdown
+  6. Release the sandbox claim
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the density parameter across iterations to find
+the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_python_density \\
+                --k8s_python_density_concurrent_sandbox_count=16 \\
+                --k8s_python_density_sample_count=20 \\
+                --k8s_python_density_sample_warmup=0 \\
+                --k8s_namespace=agentic \\
+                --k8s_agent_api_url=http://localhost:8080
+
+Samples emitted (per run):
+  - gke_python_density_orchestrator_cel_mean       (ms)
+  - gke_python_density_orchestrator_cel_p50        (ms)
+  - gke_python_density_orchestrator_cel_p95        (ms)
+  - gke_python_density_orchestrator_cel_p99        (ms)
+  - gke_python_density_orchestrator_cel_min        (ms)
+  - gke_python_density_orchestrator_cel_max        (ms)
+  - gke_python_density_sandbox_total_cel_mean      (ms)
+  - gke_python_density_sandbox_total_cel_p50       (ms)
+  - gke_python_density_sandbox_total_cel_p95       (ms)
+  - gke_python_density_sandbox_total_cel_p99       (ms)
+  - gke_python_density_sandbox_total_cel_min       (ms)
+  - gke_python_density_sandbox_total_cel_max       (ms)
+  - gke_python_density_sandbox_ttfe                (ms)
+  - gke_python_density_sandbox_rss_start           (MB)
+  - gke_python_density_sandbox_rss_end             (MB)
+  - gke_python_density_sandbox_rss_growth          (MB)
+  - gke_python_density_sandbox_compute_cel_mean    (ms)
+  - gke_python_density_sandbox_syscall_cel_mean    (ms)
+  - gke_python_density_sandbox_import_cel_mean     (ms)
+  - gke_python_density_wall_time                   (seconds)
+"""
+
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    k8s_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "k8s_python_density"
+BENCHMARK_CONFIG = """
+k8s_python_density:
+  description: >
+    Atomic single-point Python sandbox density measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "python-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=python-sandbox-example"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "k8s_python_density_concurrent_sandbox_count",
+    1,
+    "Number of concurrent sandbox sessions to run.",
+)
+
+flags.DEFINE_integer(
+    "k8s_python_density_sample_count",
+    20,
+    "Number of sample iterations per sandbox session.",
+)
+
+flags.DEFINE_integer(
+    "k8s_python_density_sample_warmup",
+    0,
+    "Number of warmup iterations per session (excluded from stats). "
+    "Warmup iterations execute the same benchmark tasks as measured "
+    "iterations but their latency results are discarded. This allows "
+    "JIT compilation, caches, and gVisor page faults to stabilize "
+    "before measurement begins.",
+)
+
+flags.DEFINE_bool(
+    "k8s_python_density_patch_warmpool",
+    True,
+    "Patch SandboxWarmPool replicas to match density before measurement.",
+)
+
+flags.DEFINE_integer(
+    "k8s_python_density_exec_timeout",
+    600,
+    "Timeout in seconds for the API call.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads(benchmark_spec)
+    utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single density measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    density = FLAGS.k8s_python_density_concurrent_sandbox_count
+
+    logging.info("=== Run: density=%d ===", density)
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Patch warm pool to match density (moved from Prepare for sweep compatibility)
+    if FLAGS.k8s_python_density_patch_warmpool:
+        utils.PatchWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            replicas=density,
+            label=_WARMPOOL_LABEL,
+        )
+
+    # POST to agent API
+    payload = {
+        "sample_count": FLAGS.k8s_python_density_sample_count,
+        "sample_warmup": FLAGS.k8s_python_density_sample_warmup,
+        "concurrent_sessions": density,
+        "sandbox_exec_timeout_s": FLAGS.k8s_python_density_exec_timeout,
+    }
+
+    t0 = time.time()
+    result = utils.CallAgentApi("/benchmark/python/density", payload)
+    wall_time = time.time() - t0
+
+    successful = result.get("successful_sessions", 0)
+    failed = result.get("failed_sessions", 0)
+    agg = result.get("aggregate", {})
+
+    logging.info(
+        "API response: %d successful, %d failed sessions (%.1fs)",
+        successful,
+        failed,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "density": density,
+        "successful_sessions": successful,
+        "failed_sessions": failed,
+        "sample_count": FLAGS.k8s_python_density_sample_count,
+        "sample_warmup": FLAGS.k8s_python_density_sample_warmup,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # Orchestrator-side CEL
+    _emit(
+        samples,
+        agg,
+        "orchestrator_cel_mean_ms",
+        "orchestrator_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_p50_ms", "orchestrator_cel_p50", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_p95_ms", "orchestrator_cel_p95", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_p99_ms", "orchestrator_cel_p99", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_min_ms", "orchestrator_cel_min", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_max_ms", "orchestrator_cel_max", "ms", ns, extra
+    )
+
+    # Sandbox-side total CEL
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_mean_ms",
+        "sandbox_total_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_p50_ms",
+        "sandbox_total_cel_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_p95_ms",
+        "sandbox_total_cel_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_p99_ms",
+        "sandbox_total_cel_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_min_ms",
+        "sandbox_total_cel_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_max_ms",
+        "sandbox_total_cel_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # TTFE
+    _emit(samples, agg, "sandbox_ttfe_ms", "sandbox_ttfe", "ms", ns, extra)
+
+    # RSS
+    _emit(samples, agg, "sandbox_rss_start_mb", "sandbox_rss_start", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_end_mb", "sandbox_rss_end", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_growth_mb", "sandbox_rss_growth", "MB", ns, extra)
+
+    # Per-type CEL breakdown
+    _emit(
+        samples,
+        agg,
+        "sandbox_compute_cel_mean_ms",
+        "sandbox_compute_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_syscall_cel_mean_ms",
+        "sandbox_syscall_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_import_cel_mean_ms",
+        "sandbox_import_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for density=%d.", len(samples), density)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up after measurement. Scale warm pool to 0."""
+    ns = FLAGS.k8s_namespace
+    logging.info("Cleanup: draining warm pool.")
+
+    if FLAGS.k8s_python_density_patch_warmpool:
+        utils.DrainWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            label=_WARMPOOL_LABEL,
+        )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete (cluster persists).")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the aggregate dict.
+
+    Args:
+        samples: List to append the new sample.Sample to.
+        agg: Aggregate metrics dict returned by the agent API response.
+        agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms").
+        metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric
+            name (e.g. "orchestrator_cel_mean").
+        unit: Unit string for the sample (e.g. "ms", "MB", "seconds").
+        namespace: Kubernetes namespace (included in sample metadata).
+        extra: Dict of additional metadata key-value pairs attached to
+            every sample (density, session counts, wall time, etc.).
+    """
+    value = agg.get(agg_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
new file mode 100644
index 0000000000..feb82c8614
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
@@ -0,0 +1,805 @@
+"""PKB Benchmark: GKE Agent QPS Saturation .
+
+Atomic single-point measurement of scheduling throughput on a pre-provisioned
+GKE cluster.  Fires sandbox claim requests at a controlled QPS rate for a
+fixed duration and measures per-request TTFE (Time To First Execution).
+
+Supports two operating modes:
+  - **agent**: POST to the orchestrator /benchmark/python/qps endpoint
+  - **raw_claim**: Bypass the agent, create SandboxClaims directly via kubectl
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the target_qps parameter across iterations to find
+the QPS saturation point.
+
+Usage:
+  # Agent mode
+  python pkb.py --benchmarks=gke_qps \\
+                --k8s_qps_target_qps=5.0 \\
+                --k8s_qps_pool_size=70 \\
+                --k8s_qps_step_duration_s=30.0 \\
+                --k8s_qps_mode=agent \\
+                --k8s_namespace=agentic \\
+                --k8s_agent_api_url=http://localhost:8080
+
+  # Raw claim mode
+  python pkb.py --benchmarks=gke_qps \\
+                --k8s_qps_target_qps=5.0 \\
+                --k8s_qps_pool_size=70 \\
+                --k8s_qps_step_duration_s=30.0 \\
+                --k8s_qps_mode=raw_claim \\
+                --k8s_qps_claim_timeout_s=60.0 \\
+                --k8s_namespace=agentic
+
+Samples emitted (per run):
+  - gke_qps_ttfe_mean                (ms)
+  - gke_qps_ttfe_p50                 (ms)
+  - gke_qps_ttfe_p95                 (ms)
+  - gke_qps_ttfe_p99                 (ms)
+  - gke_qps_ttfe_min                 (ms)
+  - gke_qps_ttfe_max                 (ms)
+  - gke_qps_claim_mean               (ms)
+  - gke_qps_claim_p95                (ms)
+  - gke_qps_actual_qps               (requests/sec)
+  - gke_qps_duration                 (seconds)
+  - gke_qps_total_requests           (count)
+  - gke_qps_successful_requests      (count)
+  - gke_qps_failed_requests          (count)
+  - gke_qps_pool_before              (count)
+  - gke_qps_pool_after               (count)
+  - gke_qps_wall_time                (seconds)
+"""
+
+import json
+import os
+import logging
+import threading
+import time
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker import data
+from perfkitbenchmarker.resources.container_service import kubectl
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    k8s_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "k8s_qps"
+BENCHMARK_CONFIG = """
+k8s_qps:
+  description: >
+    Atomic single-point QPS saturation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "python-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=python-sandbox-example"
+_SANDBOX_TEMPLATE = "python-sandbox-template"
+_QPS_CLAIM_LABEL = "created-by=pkb-qps-benchmark"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_float(
+    "k8s_qps_target_qps",
+    5.0,
+    "Target requests per second (sandbox claims per second).",
+)
+
+flags.DEFINE_integer(
+    "k8s_qps_pool_size",
+    70,
+    "Warm pool size maintained during the measurement.",
+)
+
+flags.DEFINE_float(
+    "k8s_qps_step_duration_s",
+    30.0,
+    "Duration of the QPS burst in seconds.",
+)
+
+flags.DEFINE_integer(
+    "k8s_qps_sandbox_exec_timeout_s",
+    30,
+    "Sandbox command execution timeout in seconds.",
+)
+
+flags.DEFINE_float(
+    "k8s_qps_provision_timeout_s",
+    180.0,
+    "Max seconds to wait for pool pods to reach Running.",
+)
+
+flags.DEFINE_string(
+    "k8s_qps_mode",
+    "agent",
+    "Operating mode: 'agent' (POST to orchestrator API) or "
+    "'raw_claim' (create SandboxClaims directly via kubectl).",
+)
+
+flags.DEFINE_float(
+    "k8s_qps_claim_timeout_s",
+    60.0,
+    "Max seconds to wait for a raw claim to bind " "(only used with mode=raw_claim).",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads(benchmark_spec)
+
+    mode = FLAGS.k8s_qps_mode
+    if mode == "agent":
+        utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single QPS measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    pool_size = FLAGS.k8s_qps_pool_size
+
+    # Scale warm pool (moved from Prepare for sweep compatibility)
+    utils.PatchWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        replicas=pool_size,
+        label=_WARMPOOL_LABEL,
+        wait_timeout=int(FLAGS.k8s_qps_provision_timeout_s),
+    )
+
+    mode = FLAGS.k8s_qps_mode
+
+    if mode == "raw_claim":
+        return _RunRawClaim(benchmark_spec)
+    else:
+        return _RunAgent(benchmark_spec)
+
+
+def Cleanup(benchmark_spec):
+    """Delete benchmark claims and drain warm pool."""
+    ns = FLAGS.k8s_namespace
+    logging.info("Cleanup: deleting benchmark claims and draining warm pool.")
+
+    # Delete any lingering benchmark claims
+    _DeleteBenchmarkClaims(ns)
+
+    # Drain warm pool
+    utils.DrainWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        label=_WARMPOOL_LABEL,
+    )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+# ---------------------------------------------------------------------------
+# Agent mode
+# ---------------------------------------------------------------------------
+
+
+def _RunAgent(benchmark_spec):
+    """Fire QPS burst via the orchestrator API."""
+    ns = FLAGS.k8s_namespace
+    target_qps = FLAGS.k8s_qps_target_qps
+    pool_size = FLAGS.k8s_qps_pool_size
+    step_duration = FLAGS.k8s_qps_step_duration_s
+
+    logging.info(
+        "=== Run (agent): target_qps=%s, pool_size=%d, duration=%ss ===",
+        target_qps,
+        pool_size,
+        step_duration,
+    )
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Record pool state before burst
+    pool_before = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # POST to agent API
+    payload = {
+        "target_qps": target_qps,
+        "duration_s": step_duration,
+        "sandbox_exec_timeout_s": FLAGS.k8s_qps_sandbox_exec_timeout_s,
+    }
+
+    t0 = time.time()
+    api_timeout = int(step_duration + 300)
+    result = utils.CallAgentApi("/benchmark/python/qps", payload, timeout=api_timeout)
+    wall_time = time.time() - t0
+
+    # Record pool state after burst
+    pool_after = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # Extract response fields
+    aggregate = result.get("aggregate", {})
+    successful = result.get("successful_requests", 0)
+    failed = result.get("failed_requests", 0)
+    total = result.get("total_requests", 0)
+    actual_qps = result.get("actual_qps", 0)
+    duration_s = result.get("duration_s", 0)
+
+    logging.info(
+        "API response: actual_qps=%s, %d/%d requests ok (%.1fs)",
+        actual_qps,
+        successful,
+        total,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "target_qps": target_qps,
+        "pool_size": pool_size,
+        "step_duration_s": step_duration,
+        "mode": "agent",
+        "actual_qps": actual_qps,
+        "total_requests": total,
+        "successful_requests": successful,
+        "failed_requests": failed,
+        "pool_before": pool_before,
+        "pool_after": pool_after,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # TTFE latency stats
+    _emit(samples, aggregate, "ttfe_mean_ms", "ttfe_mean", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_p50_ms", "ttfe_p50", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_p95_ms", "ttfe_p95", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_p99_ms", "ttfe_p99", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_min_ms", "ttfe_min", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_max_ms", "ttfe_max", "ms", ns, extra)
+
+    # Claim latency stats
+    _emit(samples, aggregate, "claim_mean_ms", "claim_mean", "ms", ns, extra)
+    _emit(samples, aggregate, "claim_p95_ms", "claim_p95", "ms", ns, extra)
+
+    # Throughput and counts
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_actual_qps",
+            actual_qps,
+            "requests/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_duration",
+            duration_s,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_requests",
+            float(total),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_successful_requests",
+            float(successful),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_failed_requests",
+            float(failed),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Pool state
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_before",
+            float(pool_before),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_after",
+            float(pool_after),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for target_qps=%s.", len(samples), target_qps)
+    return samples
+
+
+# ---------------------------------------------------------------------------
+# Raw claim mode
+# ---------------------------------------------------------------------------
+
+
+def _RunRawClaim(benchmark_spec):
+    """Fire SandboxClaims directly at target_qps (no agent)."""
+    ns = FLAGS.k8s_namespace
+    target_qps = FLAGS.k8s_qps_target_qps
+    pool_size = FLAGS.k8s_qps_pool_size
+    step_duration = FLAGS.k8s_qps_step_duration_s
+    claim_timeout = FLAGS.k8s_qps_claim_timeout_s
+
+    logging.info(
+        "=== Run (raw_claim): target_qps=%s, pool_size=%d, duration=%ss ===",
+        target_qps,
+        pool_size,
+        step_duration,
+    )
+
+    # Record pool state before burst
+    pool_before = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # Calculate total claims to fire
+    total_claims = max(1, int(target_qps * step_duration))
+    interval = 1.0 / target_qps if target_qps > 0 else 1.0
+
+    logging.info(
+        "Firing %d raw SandboxClaims at %s req/s",
+        total_claims,
+        target_qps,
+    )
+
+    # Fire claims at target QPS in parallel threads
+    claim_results = []
+    lock = threading.Lock()
+
+    def _fire_and_wait(idx, fire_time):
+        claim_name = f"pkb-qps-0-{idx}-{uuid.uuid4().hex[:6]}"
+        result = {"request_id": idx, "fire_time_s": round(fire_time, 3)}
+        try:
+            t_create = _CreateClaim(ns, _SANDBOX_TEMPLATE, claim_name)
+            result["create_ts"] = t_create
+            t_bound = _WaitClaimBound(ns, claim_name, claim_timeout)
+            if t_bound is not None:
+                ttfe_ms = (t_bound - t_create) * 1000.0
+                result["ttfe_ms"] = round(ttfe_ms, 3)
+                result["claim_ms"] = round(ttfe_ms, 3)
+                result["error"] = None
+            else:
+                result["ttfe_ms"] = None
+                result["error"] = "Timeout waiting for claim to bind"
+        except Exception as e:
+            result["ttfe_ms"] = None
+            result["error"] = f"{type(e).__name__}: {e}"
+        with lock:
+            claim_results.append(result)
+
+    t0 = time.time()
+    threads = []
+    for i in range(total_claims):
+        fire_time = time.time() - t0
+        t = threading.Thread(target=_fire_and_wait, args=(i, fire_time), daemon=True)
+        threads.append(t)
+        t.start()
+        if i < total_claims - 1:
+            next_fire = t0 + (i + 1) * interval
+            sleep_time = next_fire - time.time()
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+
+    for t in threads:
+        t.join(timeout=claim_timeout + 30)
+
+    wall_time = time.time() - t0
+    actual_qps = round(total_claims / wall_time, 2) if wall_time > 0 else 0
+
+    # Record pool state after burst
+    pool_after = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # Aggregate results
+    successful = [r for r in claim_results if r.get("ttfe_ms") is not None]
+    failed = [r for r in claim_results if r.get("error")]
+    ttfe_values = sorted(r["ttfe_ms"] for r in successful)
+
+    logging.info(
+        "Raw claim burst complete: %d/%d ok, actual_qps=%s (%.1fs)",
+        len(successful),
+        total_claims,
+        actual_qps,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "target_qps": target_qps,
+        "pool_size": pool_size,
+        "step_duration_s": step_duration,
+        "mode": "raw_claim",
+        "actual_qps": actual_qps,
+        "total_requests": total_claims,
+        "successful_requests": len(successful),
+        "failed_requests": len(failed),
+        "pool_before": pool_before,
+        "pool_after": pool_after,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # TTFE latency stats (computed from raw claim results)
+    if ttfe_values:
+        n = len(ttfe_values)
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_mean",
+                round(sum(ttfe_values) / n, 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_p50",
+                round(_percentile(ttfe_values, 50), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_p95",
+                round(_percentile(ttfe_values, 95), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_p99",
+                round(_percentile(ttfe_values, 99), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_min",
+                round(ttfe_values[0], 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_max",
+                round(ttfe_values[-1], 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+
+        # Claim latency (same as TTFE in raw_claim mode)
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_claim_mean",
+                round(sum(ttfe_values) / n, 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_claim_p95",
+                round(_percentile(ttfe_values, 95), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+
+    # Throughput and counts
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_actual_qps",
+            actual_qps,
+            "requests/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_duration",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_requests",
+            float(total_claims),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_successful_requests",
+            float(len(successful)),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_failed_requests",
+            float(len(failed)),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Pool state
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_before",
+            float(pool_before),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_after",
+            float(pool_after),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    # Cleanup benchmark claims
+    _DeleteBenchmarkClaims(ns)
+
+    logging.info("Emitted %d samples for target_qps=%s.", len(samples), target_qps)
+    return samples
+
+
+# ---------------------------------------------------------------------------
+# Raw claim helpers
+# ---------------------------------------------------------------------------
+
+
+def _CreateClaim(namespace, template, claim_name):
+    """Create a single SandboxClaim via kubectl and return creation timestamp."""
+    manifest = json.dumps(
+        {
+            "apiVersion": "extensions.agents.x-k8s.io/v1alpha1",
+            "kind": "SandboxClaim",
+            "metadata": {
+                "name": claim_name,
+                "namespace": namespace,
+                "labels": {"created-by": "pkb-qps-benchmark"},
+            },
+            "spec": {
+                "sandboxTemplateRef": {"name": template},
+            },
+        }
+    )
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
+    )
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"qps-claim-{claim_name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    t_create = time.time()
+    if retcode != 0:
+        raise RuntimeError(
+            f"Failed to create claim {claim_name}: {stderr.strip()}"
+        )
+    return t_create
+
+
+def _WaitClaimBound(namespace, claim_name, timeout_s):
+    """Wait for a SandboxClaim to reach Bound phase. Returns timestamp or None."""
+    deadline = time.time() + timeout_s
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            [
+                "get",
+                "sandboxclaim",
+                claim_name,
+                "-n",
+                namespace,
+                "-o",
+                "jsonpath={.status.phase}",
+            ],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if rc == 0 and stdout.lower() in ("bound", "ready"):
+            return time.time()
+        time.sleep(0.1)
+    return None
+
+
+def _DeleteBenchmarkClaims(namespace):
+    """Delete SandboxClaims labelled created-by=pkb-qps-benchmark."""
+    stdout, _, rc = utils.RunKubectl(
+        [
+            "get",
+            "sandboxclaim",
+            "-l",
+            _QPS_CLAIM_LABEL,
+            "-n",
+            namespace,
+            "-o",
+            "jsonpath={.items[*].metadata.name}",
+        ],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    names = stdout.split() if stdout else []
+    if not names or names == [""]:
+        return 0
+
+    count = len(names)
+    logging.info("Deleting %d pkb-qps SandboxClaim(s)", count)
+    utils.RunKubectl(
+        [
+            "delete",
+            "sandboxclaim",
+            "-l",
+            _QPS_CLAIM_LABEL,
+            "-n",
+            namespace,
+            "--wait=false",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+    # Wait for claims to be fully removed
+    t0 = time.time()
+    while time.time() - t0 < 120:
+        stdout, _, _ = utils.RunKubectl(
+            [
+                "get",
+                "sandboxclaim",
+                "-l",
+                _QPS_CLAIM_LABEL,
+                "-n",
+                namespace,
+                "--no-headers",
+                "--ignore-not-found",
+            ],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        remaining = len([l for l in stdout.splitlines() if l]) if stdout else 0
+        if remaining == 0:
+            break
+        time.sleep(2)
+
+    logging.info("Claims cleaned up in %.1fs", time.time() - t0)
+    return count
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _percentile(sorted_values, pct):
+    """Calculate percentile (0-100) with linear interpolation."""
+    if not sorted_values:
+        return 0.0
+    idx = (pct / 100) * (len(sorted_values) - 1)
+    lo = int(idx)
+    hi = min(lo + 1, len(sorted_values) - 1)
+    frac = idx - lo
+    return sorted_values[lo] * (1 - frac) + sorted_values[hi] * frac
+
+
+def _emit(samples, data, data_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the data dict."""
+    value = data.get(data_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
new file mode 100644
index 0000000000..8d78c6649b
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
@@ -0,0 +1,1037 @@
+"""PKB Benchmark: GKE Agent Pod Snapshot Saturation .
+
+Atomic single-point measurement of GKE Pod Snapshot create/restore latency
+on a pre-provisioned GKE cluster with gVisor isolation.  Measures snapshot
+time, restore time, TTFE (Time To First Execution), and restore correctness
+at a given preload_mb and burst_size.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the preload_mb parameter across iterations to find
+the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_snapshot \\
+                --k8s_snapshot_preload_mb=50 \\
+                --k8s_snapshot_burst_size=3 \\
+                --k8s_namespace=agentic \\
+                --k8s_snapshot_skip_snapshot=false
+
+Samples emitted (per run):
+  - k8s_snapshot_snapshot_p50        (seconds)
+  - k8s_snapshot_snapshot_p95        (seconds)
+  - k8s_snapshot_snapshot_max        (seconds)
+  - k8s_snapshot_restore_p50         (seconds)
+  - k8s_snapshot_restore_p95         (seconds)
+  - k8s_snapshot_restore_max         (seconds)
+  - k8s_snapshot_ttfe_p50            (seconds)
+  - k8s_snapshot_ttfe_p95            (seconds)
+  - k8s_snapshot_ttfe_max            (seconds)
+  - k8s_snapshot_startup_time        (seconds)
+  - k8s_snapshot_restore_correct_count (count)
+  - k8s_snapshot_wall_time           (seconds)
+"""
+
+import json
+import logging
+import os
+import re
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+from jinja2 import Template
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker import data
+from perfkitbenchmarker.resources.container_service import kubectl
+from perfkitbenchmarker import sample
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    k8s_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "k8s_snapshot"
+BENCHMARK_CONFIG = """
+k8s_snapshot:
+  description: >
+    Atomic single-point Pod Snapshot saturation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "k8s_snapshot_preload_mb",
+    10,
+    "Megabytes of memory to pre-allocate in the sandbox before snapshot.",
+)
+
+flags.DEFINE_integer(
+    "k8s_snapshot_burst_size",
+    1,
+    "Number of concurrent source/snapshot/restore pods per measurement.",
+)
+
+# k8s_snapshot_ksa_name is defined in gke_deploy_utils.py
+# (where DeploySnapshots() consumes it) and is available here
+# via the deploy_utils import.
+
+flags.DEFINE_integer(
+    "k8s_snapshot_pod_timeout",
+    180,
+    "Max seconds to wait for pod Running / preload.",
+)
+
+flags.DEFINE_boolean(
+    "k8s_snapshot_skip_snapshot",
+    False,
+    "Skip snapshot/restore phases — measure cold-start TTFE only.",
+)
+
+flags.DEFINE_string(
+    "k8s_snapshot_preload_mode",
+    "synthetic",
+    "Preload mode: 'synthetic' (os.urandom fill) or "
+    "'script:<path>' to run a custom startup script.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads, snapshot infra, and validate readiness."""
+    benchmark_spec.always_call_cleanup = True
+    ns = FLAGS.k8s_namespace
+    preload_mb = FLAGS.k8s_snapshot_preload_mb
+
+    logging.info(
+        "=== Prepare: preload_mb=%d, burst_size=%d ===",
+        preload_mb,
+        FLAGS.k8s_snapshot_burst_size,
+    )
+
+    # Deploy Agent Sandbox ecosystem (idempotent)
+    deploy_utils.DeployWorkloads(benchmark_spec)
+
+    # Deploy Pod Snapshot infrastructure (idempotent).
+    # Pod Snapshots are GKE-specific; skip on other platforms.
+    # Only attempt deployment when we have a confirmed GCP cluster
+    # (avoids surprise failures on pre-existing clusters where
+    # benchmark_spec.container_cluster may be None).
+    cluster = getattr(benchmark_spec, "container_cluster", None)
+    if cluster and getattr(cluster, "cloud", None) == "GCP" and not FLAGS.skip_deploy_snapshots:
+        deploy_utils.DeploySnapshots()
+    elif not cluster:
+        logging.info(
+            "Pod Snapshot infrastructure skipped (no container_cluster in "
+            "benchmark_spec). Use --skip_deploy_snapshots=False to force."
+        )
+    elif getattr(cluster, "cloud", None) != "GCP":
+        logging.info(
+            "Pod Snapshot infrastructure skipped (cloud=%s, GKE required).",
+            getattr(cluster, "cloud", "unknown"),
+        )
+
+    # 1. Verify PodSnapshotStorageConfig exists (cluster-scoped).
+    _, _, retcode = utils.RunKubectl(
+        ["get", "podsnapshotstorageconfigs.podsnapshot.gke.io", "--no-headers"],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        raise RuntimeError(
+            "PodSnapshotStorageConfig CRD not found. "
+            "Ensure pod snapshots are enabled on the cluster."
+        )
+    logging.info("PodSnapshotStorageConfig verified.")
+
+    # 2. Verify PodSnapshotPolicy exists in the namespace.
+    _, _, retcode = utils.RunKubectl(
+        ["get", "podsnapshotpolicies.podsnapshot.gke.io", "-n", ns, "--no-headers"],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        logging.warning("PodSnapshotPolicy not found in namespace %s.", ns)
+
+    # 3. Verify the service account exists.
+    ksa = FLAGS.k8s_snapshot_ksa_name
+    _, _, retcode = utils.RunKubectl(
+        ["get", "serviceaccount", ksa, "-n", ns],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        raise RuntimeError(
+            f"ServiceAccount {ksa} not found in namespace {ns}. "
+            "Run setup_snapshot_gke.sh or ensure DeploySnapshots() succeeded."
+        )
+    logging.info("ServiceAccount %s verified.", ksa)
+
+    # 4. Verify the template file exists.
+    template_path = _GetTemplatePath()
+    if not os.path.isfile(template_path):
+        raise RuntimeError(f"Snapshot template not found: {template_path}")
+    logging.info("Template file verified: %s", template_path)
+
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single snapshot/restore measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    preload_mb = FLAGS.k8s_snapshot_preload_mb
+    burst_size = FLAGS.k8s_snapshot_burst_size
+    skip_snapshot = FLAGS.k8s_snapshot_skip_snapshot
+    preload_mode = FLAGS.k8s_snapshot_preload_mode
+    ksa_name = FLAGS.k8s_snapshot_ksa_name
+    pod_timeout = FLAGS.k8s_snapshot_pod_timeout
+
+    logging.info(
+        "=== Run: preload_mb=%d, burst_size=%d, skip_snapshot=%s ===",
+        preload_mb,
+        burst_size,
+        skip_snapshot,
+    )
+
+    template_path = _GetTemplatePath()
+    t0 = time.time()
+
+    # Run the snapshot/restore cycle
+    step_result = _RunSnapshotCycle(
+        namespace=ns,
+        preload_mb=preload_mb,
+        burst_size=burst_size,
+        skip_snapshot=skip_snapshot,
+        preload_mode=preload_mode,
+        ksa_name=ksa_name,
+        pod_timeout=pod_timeout,
+        template_path=template_path,
+    )
+
+    wall_time = time.time() - t0
+
+    # Build samples
+    extra = {
+        "preload_mb": preload_mb,
+        "burst_size": burst_size,
+        "skip_snapshot": skip_snapshot,
+        "preload_mode": preload_mode,
+        "restore_correct_count": step_result.get("restore_correct_count", 0),
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    if step_result.get("error"):
+        extra["error"] = step_result["error"]
+
+    samples = []
+
+    # Snapshot metrics
+    _emit(samples, step_result, "snapshot_p50_s", "snapshot_p50", "seconds", ns, extra)
+    _emit(samples, step_result, "snapshot_p95_s", "snapshot_p95", "seconds", ns, extra)
+    _emit(samples, step_result, "snapshot_max_s", "snapshot_max", "seconds", ns, extra)
+
+    # Restore metrics
+    _emit(samples, step_result, "restore_p50_s", "restore_p50", "seconds", ns, extra)
+    _emit(samples, step_result, "restore_p95_s", "restore_p95", "seconds", ns, extra)
+    _emit(samples, step_result, "restore_max_s", "restore_max", "seconds", ns, extra)
+
+    # TTFE metrics
+    _emit(samples, step_result, "ttfe_p50_s", "ttfe_p50", "seconds", ns, extra)
+    _emit(samples, step_result, "ttfe_p95_s", "ttfe_p95", "seconds", ns, extra)
+    _emit(samples, step_result, "ttfe_max_s", "ttfe_max", "seconds", ns, extra)
+
+    # Startup time
+    _emit(samples, step_result, "startup_time_s", "startup_time", "seconds", ns, extra)
+
+    # Restore correctness
+    correct = step_result.get("restore_correct_count")
+    if correct is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_restore_correct_count",
+                correct,
+                "count",
+                ns,
+                extra,
+            )
+        )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for preload_mb=%d.", len(samples), preload_mb)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up any leftover benchmark resources."""
+    ns = FLAGS.k8s_namespace
+    logging.info("Cleanup — deleting any leftover snapshot-benchmark resources.")
+
+    for kind in (
+        "sandboxclaim",
+        "sandboxtemplate",
+        "podsnapshotmanualtrigger",
+        "podsnapshots.podsnapshot.gke.io",
+    ):
+        utils.RunKubectl(
+            [
+                "delete",
+                kind,
+                "-l",
+                "app=snapshot-benchmark-workload",
+                "-n",
+                ns,
+                "--ignore-not-found=true",
+            ],
+            timeout=60,
+            raise_on_failure=False,
+        )
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+# ---------------------------------------------------------------------------
+# Core snapshot/restore logic
+# ---------------------------------------------------------------------------
+
+
+def _RunSnapshotCycle(
+    namespace,
+    preload_mb,
+    burst_size,
+    skip_snapshot,
+    preload_mode,
+    ksa_name,
+    pod_timeout,
+    template_path,
+):
+    """Execute one full snapshot/restore cycle and return a result dict.
+
+    Handles source creation, snapshot, restore, TTFE measurement,
+    correctness verification, and cleanup.
+    """
+    step_template = f"snap-bench-{preload_mb}mb"
+    source_names = [f"snap-src-0-{i}" for i in range(burst_size)]
+    restore_names = [f"snap-restore-0-{i}" for i in range(burst_size)]
+    trigger_names = [f"snap-trigger-0-{i}" for i in range(burst_size)]
+
+    result = {
+        "preload_mb": preload_mb,
+        "burst_size": burst_size,
+        "snapshot_p50_s": None,
+        "snapshot_p95_s": None,
+        "snapshot_max_s": None,
+        "restore_p50_s": None,
+        "restore_p95_s": None,
+        "restore_max_s": None,
+        "ttfe_p50_s": None,
+        "ttfe_p95_s": None,
+        "ttfe_max_s": None,
+        "startup_time_s": None,
+        "snapshot_counter": None,
+        "restore_correct_count": 0,
+        "burst_results": [],
+        "error": None,
+    }
+
+    try:
+        # 1. Create step-specific SandboxTemplate
+        logging.info(
+            "Creating SandboxTemplate '%s' (PRELOAD_MB=%d, memory=%dMi)",
+            step_template,
+            preload_mb,
+            max(512, preload_mb + 256),
+        )
+        if not _RenderAndApplyTemplate(
+            template_path,
+            step_template,
+            namespace,
+            ksa_name,
+            preload_mb,
+            preload_mode,
+        ):
+            raise RuntimeError("Failed to create SandboxTemplate")
+
+        time.sleep(2)
+
+        # 2. Create source claims and wait for Running + preload
+        logging.info("Creating %d source SandboxClaim(s)", burst_size)
+        t0_sources = time.time()
+        workers = min(burst_size, 50)
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            for sname in source_names:
+                pool.submit(_ApplyClaim, sname, namespace, step_template)
+
+        logging.info("Waiting for %d source pod(s) Running + preload", burst_size)
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            source_futs = [
+                pool.submit(
+                    _MeasureSingleSource,
+                    sname,
+                    namespace,
+                    t0_sources,
+                    pod_timeout,
+                    preload_mode,
+                )
+                for sname in source_names
+            ]
+            source_results = [f.result() for f in source_futs]
+
+        src_failed = [r for r in source_results if r.get("error")]
+        if src_failed:
+            fail_msgs = "; ".join(f"{r['pod']}: {r['error']}" for r in src_failed)
+            raise RuntimeError(
+                f"{len(src_failed)}/{burst_size} source pod(s) failed: {fail_msgs}"
+            )
+
+        startup_times = [
+            r["startup_time_s"]
+            for r in source_results
+            if r["startup_time_s"] is not None
+        ]
+        result["startup_time_s"] = (
+            round(_Percentile(startup_times, 50), 3) if startup_times else None
+        )
+
+        snapshot_counters = {r["pod"]: r["snapshot_counter"] for r in source_results}
+        min_counter = min(
+            (c for c in snapshot_counters.values() if c is not None), default=None
+        )
+        result["snapshot_counter"] = min_counter
+        logging.info("%d source pod(s) ready. Min counter: %s", burst_size, min_counter)
+
+        # --skip_snapshot: measure cold-start TTFE only
+        if skip_snapshot:
+            logging.info("skip_snapshot mode: measuring cold-start TTFE")
+            ttfe_times = []
+            burst_results = []
+            for i, sname in enumerate(source_names):
+                startup = source_results[i]["startup_time_s"]
+                counter = source_results[i]["snapshot_counter"]
+                preload_done = source_results[i].get("preload_complete_time_s")
+                ttfe_s = preload_done if preload_done else startup
+                ttfe_times.append(ttfe_s)
+                burst_results.append(
+                    {
+                        "pod": sname,
+                        "source_pod": sname,
+                        "startup_time_s": startup,
+                        "snapshot_counter": None,
+                        "snapshot_time_s": None,
+                        "restore_time_s": None,
+                        "ttfe_s": ttfe_s,
+                        "restore_counter": counter,
+                        "restore_correct": True,
+                        "error": None,
+                    }
+                )
+
+            result["burst_results"] = burst_results
+            result["restore_correct_count"] = burst_size
+
+            if ttfe_times:
+                result["ttfe_p50_s"] = round(_Percentile(ttfe_times, 50), 3)
+                result["ttfe_p95_s"] = round(_Percentile(ttfe_times, 95), 3)
+                result["ttfe_max_s"] = round(max(ttfe_times), 3)
+
+            # Skip to cleanup
+            return result
+
+        # 3. Trigger snapshots concurrently
+        logging.info("Triggering %d snapshot(s)", burst_size)
+        t0_snap = time.time()
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            snap_futs = [
+                pool.submit(
+                    _TriggerAndWaitSnapshot,
+                    tname,
+                    sname,
+                    namespace,
+                    t0_snap,
+                )
+                for tname, sname in zip(trigger_names, source_names)
+            ]
+            snap_results = [f.result() for f in snap_futs]
+
+        snap_failed = [r for r in snap_results if r.get("error")]
+        snap_times = [
+            r["snapshot_time_s"]
+            for r in snap_results
+            if r["snapshot_time_s"] is not None
+        ]
+        if snap_times:
+            result["snapshot_p50_s"] = round(_Percentile(snap_times, 50), 3)
+            result["snapshot_p95_s"] = round(_Percentile(snap_times, 95), 3)
+            result["snapshot_max_s"] = round(max(snap_times), 3)
+
+        if snap_failed:
+            fail_msgs = "; ".join(f"{r['trigger']}: {r['error']}" for r in snap_failed)
+            raise RuntimeError(
+                f"{len(snap_failed)}/{burst_size} snapshot(s) failed: {fail_msgs}"
+            )
+
+        # 4. Create restore claims concurrently
+        logging.info("Creating %d restore SandboxClaim(s)", burst_size)
+        t0_burst = time.time()
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            create_futs = [
+                pool.submit(_ApplyClaim, rname, namespace, step_template)
+                for rname in restore_names
+            ]
+            for f in create_futs:
+                f.result()
+
+        # 5. Poll restore pods for Running + TTFE
+        logging.info("Measuring restore + TTFE across %d pod(s)", burst_size)
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            measure_futs = [
+                pool.submit(
+                    _MeasureSingleRestore,
+                    rname,
+                    namespace,
+                    t0_burst,
+                    min_counter,
+                    pod_timeout,
+                )
+                for rname in restore_names
+            ]
+            burst_results = [f.result() for f in measure_futs]
+
+        # Merge source + snapshot info
+        for i in range(burst_size):
+            burst_results[i]["source_pod"] = source_names[i]
+            burst_results[i]["startup_time_s"] = source_results[i]["startup_time_s"]
+            burst_results[i]["snapshot_counter"] = source_results[i]["snapshot_counter"]
+            burst_results[i]["snapshot_time_s"] = snap_results[i]["snapshot_time_s"]
+
+        result["burst_results"] = burst_results
+
+        # 6. Aggregate
+        restore_times = [
+            r["restore_time_s"]
+            for r in burst_results
+            if r["restore_time_s"] is not None
+        ]
+        ttfe_times = [r["ttfe_s"] for r in burst_results if r["ttfe_s"] is not None]
+        correct_count = sum(1 for r in burst_results if r["restore_correct"])
+
+        result["restore_correct_count"] = correct_count
+
+        if restore_times:
+            result["restore_p50_s"] = round(_Percentile(restore_times, 50), 3)
+            result["restore_p95_s"] = round(_Percentile(restore_times, 95), 3)
+            result["restore_max_s"] = round(max(restore_times), 3)
+
+        if ttfe_times:
+            result["ttfe_p50_s"] = round(_Percentile(ttfe_times, 50), 3)
+            result["ttfe_p95_s"] = round(_Percentile(ttfe_times, 95), 3)
+            result["ttfe_max_s"] = round(max(ttfe_times), 3)
+
+        logging.info("Counter correct: %d/%d", correct_count, burst_size)
+
+    except Exception as e:
+        result["error"] = str(e)
+        logging.error("Snapshot cycle failed: %s", e)
+
+    finally:
+        # Cleanup
+        logging.info("Cleaning up step resources")
+        _CleanupStep(
+            source_names,
+            restore_names,
+            trigger_names,
+            step_template,
+            namespace,
+        )
+        time.sleep(5)
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Kubernetes interaction helpers
+# ---------------------------------------------------------------------------
+
+
+def _ApplyClaim(name, namespace, template_name):
+    """Create a SandboxClaim."""
+    manifest = json.dumps(
+        {
+            "apiVersion": "extensions.agents.x-k8s.io/v1alpha1",
+            "kind": "SandboxClaim",
+            "metadata": {
+                "name": name,
+                "namespace": namespace,
+                "labels": {"app": "snapshot-benchmark-workload"},
+            },
+            "spec": {"sandboxTemplateRef": {"name": template_name}},
+        }
+    )
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
+    )
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-claim-{name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        raise RuntimeError(f"Failed to create SandboxClaim {name}: {stderr}")
+
+
+def _RenderAndApplyTemplate(
+    template_path,
+    template_name,
+    namespace,
+    ksa_name,
+    preload_mb,
+    preload_mode,
+):
+    """Render the Jinja2 template with step-specific values and kubectl apply."""
+    if preload_mode.startswith("script:"):
+        return _RenderAndApplyScriptTemplate(
+            template_name,
+            namespace,
+            ksa_name,
+            preload_mb,
+            preload_mode,
+        )
+
+    with open(template_path) as f:
+        content = f.read()
+
+    memory_mi = max(512, preload_mb + 256)
+
+    tmpl = Template(content)
+    rendered = tmpl.render(
+        template_name=template_name,
+        namespace=namespace,
+        ksa_name=ksa_name,
+        preload_mb=preload_mb,
+        memory_mi=memory_mi,
+    )
+
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
+    )
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-template-{template_name}.yaml")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(rendered)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        logging.warning("kubectl apply stderr: %s", stderr)
+    return retcode == 0
+
+
+def _get_sandbox_node_selector():
+    """Return the nodeSelector for sandbox pods."""
+    return {"pkb_nodepool": "sandbox"}
+
+
+def _get_sandbox_tolerations():
+    """Return tolerations for sandbox pods."""
+    return [
+        {
+            "key": "sandbox.gke.io/runtime",
+            "operator": "Equal",
+            "value": "gvisor",
+            "effect": "NoSchedule",
+        },
+    ]
+
+
+def _RenderAndApplyScriptTemplate(
+    template_name,
+    namespace,
+    ksa_name,
+    preload_mb,
+    preload_mode,
+):
+    """Render a SandboxTemplate that runs a user-provided startup script."""
+    script_path = preload_mode.split(":", 1)[1]
+    if not os.path.isfile(script_path):
+        logging.error("Script not found: %s", script_path)
+        return False
+
+    with open(script_path) as f:
+        user_script = f.read()
+
+    memory_mi = max(512, preload_mb + 256)
+
+    entrypoint = (
+        "#!/bin/bash\n"
+        "set -e\n"
+        'echo "Running startup script..."\n'
+        "# --- User script start ---\n"
+        f"{user_script}\n"
+        "# --- User script end ---\n"
+        'echo "SCRIPT_READY"\n'
+        'echo "Starting counter."\n'
+        "i=0\n"
+        "while true; do\n"
+        '  echo "Count: $i"\n'
+        "  i=$((i + 1))\n"
+        "  sleep 1\n"
+        "done\n"
+    )
+
+    manifest = json.dumps({
+        "apiVersion": "extensions.agents.x-k8s.io/v1alpha1",
+        "kind": "SandboxTemplate",
+        "metadata": {
+            "name": template_name,
+            "namespace": namespace,
+        },
+        "spec": {
+            "podTemplate": {
+                "metadata": {
+                    "labels": {"app": "snapshot-benchmark-workload"},
+                },
+                "spec": {
+                    "serviceAccountName": ksa_name,
+                    "runtimeClassName": "gvisor",
+                    "containers": [
+                        {
+                            "name": "preloader",
+                            "image": "python:3.11-slim",
+                            "command": ["bash", "-c"],
+                            "args": [entrypoint],
+                            "env": [{"name": "PRELOAD_MB", "value": str(preload_mb)}],
+                            "resources": {
+                                "requests": {
+                                    "cpu": "250m",
+                                    "memory": f"{memory_mi}Mi",
+                                    "ephemeral-storage": "512Mi",
+                                }
+                            },
+                        }
+                    ],
+                    "nodeSelector": _get_sandbox_node_selector(),
+                    "tolerations": _get_sandbox_tolerations(),
+                    "restartPolicy": "OnFailure",
+                },
+            }
+        },
+    })
+
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
+    )
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-script-template-{template_name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        logging.warning("kubectl apply stderr: %s", stderr)
+    return retcode == 0
+
+
+def _MeasureSingleSource(name, namespace, t0, pod_timeout, preload_mode):
+    """Wait for a source pod to be Running and preloaded."""
+    result = {
+        "pod": name,
+        "startup_time_s": None,
+        "preload_complete_time_s": None,
+        "snapshot_counter": None,
+        "error": None,
+    }
+
+    # Wait for Running
+    deadline = t0 + pod_timeout
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["get", "pod", name, "-n", namespace, "-o", "jsonpath={.status.phase}"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if stdout == "Running":
+            result["startup_time_s"] = round(time.time() - t0, 3)
+            break
+        time.sleep(1)
+    else:
+        result["error"] = f"Pod {name} did not reach Running within {pod_timeout}s"
+        return result
+
+    # Wait for preload
+    if not _WaitForPreload(name, namespace, pod_timeout, preload_mode):
+        result["error"] = f"Preload did not complete within {pod_timeout}s"
+        return result
+
+    result["preload_complete_time_s"] = round(time.time() - t0, 3)
+
+    # Let counter tick
+    time.sleep(3)
+    result["snapshot_counter"] = _GetLastCounter(name, namespace)
+    return result
+
+
+def _WaitForPreload(name, namespace, timeout_s, preload_mode):
+    """Wait for preload to complete."""
+    deadline = time.time() + timeout_s
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["logs", name, "-n", namespace, "--tail=20"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if "SCRIPT_READY" in stdout:
+            return True
+        if "Starting counter" in stdout or re.search(r"Count:\s*\d+", stdout):
+            return True
+        time.sleep(2)
+    return False
+
+
+def _GetLastCounter(name, namespace):
+    """Extract the last Count: N value from pod logs."""
+    stdout, _, rc = utils.RunKubectl(
+        ["logs", name, "-n", namespace, "--tail=10"],
+        timeout=10,
+        raise_on_failure=False,
+    )
+    if rc != 0:
+        return None
+    matches = re.findall(r"Count:\s*(\d+)", stdout)
+    return int(matches[-1]) if matches else None
+
+
+def _TriggerAndWaitSnapshot(trigger_name, target_pod, namespace, t0, timeout_s=300):
+    """Create a snapshot trigger and wait for Complete."""
+    result = {
+        "trigger": trigger_name,
+        "pod": target_pod,
+        "snapshot_time_s": None,
+        "error": None,
+    }
+    manifest = json.dumps(
+        {
+            "apiVersion": "podsnapshot.gke.io/v1",
+            "kind": "PodSnapshotManualTrigger",
+            "metadata": {"name": trigger_name, "namespace": namespace},
+            "spec": {"targetPod": target_pod},
+        }
+    )
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
+    )
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-trigger-{trigger_name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        result["error"] = f"Failed to create trigger: {stderr}"
+        return result
+
+    deadline = t0 + timeout_s
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            [
+                "get",
+                "podsnapshotmanualtriggers.podsnapshot.gke.io",
+                trigger_name,
+                "-n",
+                namespace,
+                "-o",
+                "jsonpath={.status.conditions[0].reason}",
+            ],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if stdout == "Complete":
+            result["snapshot_time_s"] = round(time.time() - t0, 3)
+            return result
+        time.sleep(2)
+    result["error"] = f"Snapshot {trigger_name} did not complete within {timeout_s}s"
+    return result
+
+
+def _MeasureSingleRestore(name, namespace, t0, snapshot_counter, pod_timeout):
+    """Measure restore_time and TTFE for a single pod."""
+    result = {
+        "pod": name,
+        "restore_time_s": None,
+        "ttfe_s": None,
+        "restore_counter": None,
+        "restore_correct": False,
+        "error": None,
+    }
+
+    # Wait for Running
+    deadline = t0 + pod_timeout
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["get", "pod", name, "-n", namespace, "-o", "jsonpath={.status.phase}"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if stdout == "Running":
+            result["restore_time_s"] = round(time.time() - t0, 3)
+            break
+        time.sleep(1)
+    else:
+        result["error"] = f"Pod {name} did not reach Running within {pod_timeout}s"
+        return result
+
+    # Wait for first Count (TTFE)
+    ttfe_deadline = t0 + pod_timeout
+    while time.time() < ttfe_deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["logs", name, "-n", namespace, "--tail=50"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if rc == 0:
+            matches = re.findall(r"Count:\s*(\d+)", stdout)
+            if matches:
+                result["ttfe_s"] = round(time.time() - t0, 3)
+                result["restore_counter"] = int(matches[0])
+                if (
+                    snapshot_counter is not None
+                    and result["restore_counter"] >= snapshot_counter
+                ):
+                    result["restore_correct"] = True
+                return result
+        time.sleep(1)
+
+    result["error"] = f"Pod {name}: no Count output within timeout"
+    return result
+
+
+def _CleanupStep(source_names, restore_names, trigger_names, template_name, namespace):
+    """Delete source claims, restore claims, triggers, snapshots, and template."""
+    to_delete = [("sandboxtemplate", template_name)]
+    for name in source_names:
+        to_delete.append(("sandboxclaim", name))
+    for name in restore_names:
+        to_delete.append(("sandboxclaim", name))
+    for name in trigger_names:
+        to_delete.append(("podsnapshotmanualtrigger", name))
+
+    for kind, name in to_delete:
+        utils.RunKubectl(
+            ["delete", kind, name, "-n", namespace, "--ignore-not-found=true"],
+            timeout=60,
+            raise_on_failure=False,
+        )
+    # Delete any PodSnapshot resources
+    utils.RunKubectl(
+        [
+            "delete",
+            "podsnapshots.podsnapshot.gke.io",
+            "--all",
+            "-n",
+            namespace,
+            "--ignore-not-found=true",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _GetTemplatePath():
+    """Return the absolute path to the snapshot SandboxTemplate template."""
+    return os.path.join(
+        data.ResourcePath("k8s_agents/manifests"),
+        "snapshot-sandbox-template.yaml.j2",
+    )
+
+
+def _Percentile(values, pct):
+    """Calculate percentile (0-100) from a list of values."""
+    if not values:
+        return 0.0
+    s = sorted(values)
+    idx = (pct / 100) * (len(s) - 1)
+    lo = int(idx)
+    hi = min(lo + 1, len(s) - 1)
+    frac = idx - lo
+    return s[lo] * (1 - frac) + s[hi] * frac
+
+
+def _emit(samples, data, data_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the data dict."""
+    value = data.get(data_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
new file mode 100644
index 0000000000..9024f9f28e
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
@@ -0,0 +1,426 @@
+"""PKB Benchmark: GKE Agent Warmpool Scale-Up (Use Case E).
+
+Atomic single-point measurement of warm pool provisioning speed on a
+pre-provisioned GKE cluster.  Measures how quickly N sandbox pods can be
+provisioned from zero via the SandboxWarmPool controller.  No agent API
+is needed; this benchmark interacts directly with the Kubernetes API.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the target_replicas parameter across iterations to
+find the provisioning saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_warmpool \
+                --k8s_warmpool_target_replicas=100 \
+                --k8s_warmpool_name=python-sandbox-warmpool \
+                --k8s_warmpool_pod_label=sandbox=python-sandbox-example \
+                --k8s_warmpool_ready_threshold_s=300 \
+                --k8s_warmpool_poll_interval_s=2.0 \
+                --k8s_warmpool_drain_timeout_s=300 \
+                --k8s_namespace=agentic \
+                --gke_machine_type=c4-standard-8
+
+Samples emitted (per run):
+  - gke_warmpool_total_time_to_ready         (seconds)
+  - gke_warmpool_refill_rate                 (pods/sec)
+  - gke_warmpool_drain_time                  (seconds)
+  - gke_warmpool_first_pod_running           (seconds)
+  - gke_warmpool_final_running_count         (count)
+  - gke_warmpool_final_pending_count         (count)
+  - gke_warmpool_time_to_created_p50         (seconds)
+  - gke_warmpool_time_to_created_p95         (seconds)
+  - gke_warmpool_time_to_created_max         (seconds)
+  - gke_warmpool_time_to_created_count       (count)
+  - gke_warmpool_time_to_scheduled_p50       (seconds)
+  - gke_warmpool_time_to_scheduled_p95       (seconds)
+  - gke_warmpool_time_to_scheduled_max       (seconds)
+  - gke_warmpool_time_to_scheduled_count     (count)
+  - gke_warmpool_time_to_running_p50         (seconds)
+  - gke_warmpool_time_to_running_p95         (seconds)
+  - gke_warmpool_time_to_running_max         (seconds)
+  - gke_warmpool_time_to_running_count       (count)
+  - gke_warmpool_wall_time                   (seconds)
+"""
+
+import json
+import logging
+import time
+
+from absl import flags
+from datetime import datetime, timezone
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    k8s_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "k8s_warmpool"
+BENCHMARK_CONFIG = """
+k8s_warmpool:
+  description: >
+    Atomic single-point warm pool scale-up measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "k8s_warmpool_target_replicas",
+    100,
+    "Number of warm pool replicas to provision from zero.",
+)
+
+flags.DEFINE_string(
+    "k8s_warmpool_name",
+    "python-sandbox-warmpool",
+    "SandboxWarmPool resource name.",
+)
+
+flags.DEFINE_string(
+    "k8s_warmpool_pod_label",
+    "sandbox=python-sandbox-example",
+    "Label selector for warm pool pods.",
+)
+
+flags.DEFINE_float(
+    "k8s_warmpool_ready_threshold_s",
+    300.0,
+    "Max seconds allowed for all pods to reach Running.",
+)
+
+flags.DEFINE_float(
+    "k8s_warmpool_poll_interval_s",
+    2.0,
+    "Seconds between kubectl polls during provisioning.",
+)
+
+flags.DEFINE_float(
+    "k8s_warmpool_drain_timeout_s",
+    300.0,
+    "Max seconds to wait for drain to 0.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads onto the cluster."""
+    benchmark_spec.always_call_cleanup = True
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads(benchmark_spec)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Scale warm pool from 0 to target and measure provisioning time.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    target = FLAGS.k8s_warmpool_target_replicas
+    warmpool_name = FLAGS.k8s_warmpool_name
+    label = FLAGS.k8s_warmpool_pod_label
+    threshold_s = FLAGS.k8s_warmpool_ready_threshold_s
+    poll_interval = FLAGS.k8s_warmpool_poll_interval_s
+
+    # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility)
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s))
+    time.sleep(3)
+
+    logging.info("=== Run: scaling %s to %d replicas ===", warmpool_name, target)
+
+    t_wall_start = time.time()
+
+    # 1. Measure drain time (should be near-zero since Prepare drained)
+    t0 = time.time()
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s))
+    drain_time_s = round(time.time() - t0, 2)
+
+    time.sleep(2)
+
+    # 2. Scale up
+    logging.info("Patching %s replicas -> %d", warmpool_name, target)
+    patch_json = json.dumps({"spec": {"replicas": target}})
+    utils.RunKubectl(
+        [
+            "patch",
+            "sandboxwarmpool",
+            warmpool_name,
+            "-n",
+            ns,
+            "--type=merge",
+            f"-p={patch_json}",
+        ]
+    )
+
+    # 3. Poll until ready or timeout
+    t_scale = time.time()
+    scale_start_epoch = t_scale
+    deadline = t_scale + threshold_s
+    first_pod_time = None
+
+    while time.time() < deadline:
+        elapsed = time.time() - t_scale
+        running = utils.CountPods(ns, label, "Running")
+        pending = utils.CountPods(ns, label, "Pending")
+
+        if first_pod_time is None and running > 0:
+            first_pod_time = elapsed
+
+        pct = (running / target * 100) if target > 0 else 0
+        logging.info(
+            "[%.1fs] Running: %d/%d (%.0f%%)  Pending: %d",
+            elapsed,
+            running,
+            target,
+            pct,
+            pending,
+        )
+
+        if running >= target:
+            break
+
+        time.sleep(poll_interval)
+
+    total_time = round(time.time() - t_scale, 2)
+    final_running = utils.CountPods(ns, label, "Running")
+    final_pending = utils.CountPods(ns, label, "Pending")
+    rate = round(final_running / total_time, 2) if total_time > 0 else 0
+
+    logging.info(
+        "Scale-up complete: %d/%d Running in %.1fs (%.1f pods/sec)",
+        final_running,
+        target,
+        total_time,
+        rate,
+    )
+
+    # 4. Scrape pod lifecycle timestamps
+    lifecycle = _ScrapeLifecycle(ns, label, scale_start_epoch)
+
+    wall_time = round(time.time() - t_wall_start, 2)
+
+    # 5. Build samples
+    extra = {
+        "target_replicas": target,
+        "final_running_count": final_running,
+        "final_pending_count": final_pending,
+        "wall_time_s": wall_time,
+    }
+
+    samples = []
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_time_to_ready",
+            total_time,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_refill_rate",
+            rate,
+            "pods/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_drain_time",
+            drain_time_s,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    if first_pod_time is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_first_pod_running",
+                round(first_pod_time, 2),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_final_running_count",
+            float(final_running),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_final_pending_count",
+            float(final_pending),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Pod lifecycle percentiles
+    _EmitLifecycleSamples(samples, lifecycle, ns, extra)
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            wall_time,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for target_replicas=%d.", len(samples), target)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Drain warm pool back to 0 after measurement."""
+    ns = FLAGS.k8s_namespace
+    warmpool_name = FLAGS.k8s_warmpool_name
+    label = FLAGS.k8s_warmpool_pod_label
+
+    logging.info("Cleanup: draining warm pool to 0.")
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s))
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _ScrapeLifecycle(namespace, label, scale_start_epoch):
+    """Scrape pod metadata to compute time-to-created/scheduled/running.
+
+    Returns a dict with P50/P95/max/count for each phase relative to
+    scale_start_epoch.
+    """
+    stdout, _, rc = utils.RunKubectl(
+        ["get", "pods", "-n", namespace, "-l", label, "-o", "json"],
+        timeout=60,
+        raise_on_failure=False,
+    )
+    if rc != 0 or not stdout:
+        return {}
+
+    pods = json.loads(stdout).get("items", [])
+    created_deltas = []
+    scheduled_deltas = []
+    running_deltas = []
+
+    for pod in pods:
+        meta = pod.get("metadata", {})
+        status = pod.get("status", {})
+
+        # creationTimestamp -> time-to-created
+        created_str = meta.get("creationTimestamp")
+        if created_str:
+            created_ts = datetime.fromisoformat(
+                created_str.replace("Z", "+00:00")
+            ).timestamp()
+            created_deltas.append(created_ts - scale_start_epoch)
+
+        # PodScheduled condition -> time-to-scheduled
+        conditions = status.get("conditions", [])
+        for cond in conditions:
+            if cond.get("type") == "PodScheduled" and cond.get("status") == "True":
+                ts_str = cond.get("lastTransitionTime")
+                if ts_str:
+                    ts = datetime.fromisoformat(
+                        ts_str.replace("Z", "+00:00")
+                    ).timestamp()
+                    scheduled_deltas.append(ts - scale_start_epoch)
+            if cond.get("type") == "Ready" and cond.get("status") == "True":
+                ts_str = cond.get("lastTransitionTime")
+                if ts_str:
+                    ts = datetime.fromisoformat(
+                        ts_str.replace("Z", "+00:00")
+                    ).timestamp()
+                    running_deltas.append(ts - scale_start_epoch)
+
+    def _pcts(vals):
+        if not vals:
+            return {}
+        vals.sort()
+        n = len(vals)
+        return {
+            "p50": round(vals[n // 2], 2),
+            "p95": round(vals[int(n * 0.95)], 2) if n > 1 else round(vals[-1], 2),
+            "max": round(vals[-1], 2),
+            "count": n,
+        }
+
+    return {
+        "time_to_created_s": _pcts(created_deltas),
+        "time_to_scheduled_s": _pcts(scheduled_deltas),
+        "time_to_running_s": _pcts(running_deltas),
+    }
+
+
+def _EmitLifecycleSamples(samples, lifecycle, namespace, extra):
+    """Emit pod lifecycle percentile samples for all three phases."""
+    _PHASE_MAP = [
+        ("time_to_created_s", "time_to_created"),
+        ("time_to_scheduled_s", "time_to_scheduled"),
+        ("time_to_running_s", "time_to_running"),
+    ]
+    for lifecycle_key, metric_base in _PHASE_MAP:
+        phase_data = lifecycle.get(lifecycle_key, {})
+        for stat in ("p50", "p95", "max"):
+            val = phase_data.get(stat)
+            if val is not None:
+                samples.append(
+                    utils.MakeSample(
+                        f"{BENCHMARK_NAME}_{metric_base}_{stat}",
+                        val,
+                        "seconds",
+                        namespace,
+                        extra,
+                    )
+                )
+        count = phase_data.get("count")
+        if count is not None:
+            samples.append(
+                utils.MakeSample(
+                    f"{BENCHMARK_NAME}_{metric_base}_count",
+                    float(count),
+                    "count",
+                    namespace,
+                    extra,
+                )
+            )
diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py
index a56fe72b99..eeabaae0b3 100644
--- a/perfkitbenchmarker/providers/gcp/flags.py
+++ b/perfkitbenchmarker/providers/gcp/flags.py
@@ -580,6 +580,21 @@
     ' the size derived from max_vm_count. Use when the cluster will scale'
     ' beyond the default node pool (e.g. kubernetes_node_scale with 5k nodes).',
 )
+
+
+GKE_ADDITIONAL_FLAGS = flags.DEFINE_list(
+    'gke_additional_flags',
+    [],
+    'Additional flags to pass to gcloud container clusters create. '
+    'Example: --gke_additional_flags=--enable-pod-snapshots,--enable-dataplane-v2',
+)
+
+GKE_ADDITIONAL_NODEPOOL_FLAGS = flags.DEFINE_list(
+    'gke_additional_nodepool_flags',
+    [],
+    'Additional flags to pass to gcloud container node-pools create. '
+    'Example: --gke_additional_nodepool_flags=--max-pods-per-node=250',
+)
 GCE_PERFORMANCE_MONITORING_UNIT = flags.DEFINE_enum(
     'gce_performance_monitoring_unit',
     None,
diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
index f943a53ff1..06d4a295dc 100644
--- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
+++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
@@ -102,14 +102,25 @@ def _Delete(self):
     ).Issue()
 
   def RemoteBuild(self, image: container.ContainerImage):
-    """Builds the image remotely."""
-    if not gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value:
-      full_tag = self.GetFullRegistryTag(image.name)
+    """Builds the image remotely.
+
+    If --container_remote_build_config is set, uses it as the
+    --config argument to `gcloud builds submit` and passes the
+    image tag via --substitutions _IMAGE=<tag>.
+    Otherwise uses the simple --tag shorthand.
+    """
+    full_tag = self.GetFullRegistryTag(image.name)
+    if gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value:
+      build_cmd = util.GcloudCommand(
+          self, 'builds', 'submit',
+          '--config', gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value,
+          '--substitutions', f'_IMAGE={full_tag}',
+          image.directory,
+      )
     else:
-      full_tag = gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value
-    build_cmd = util.GcloudCommand(
-        self, 'builds', 'submit', '--tag', full_tag, image.directory
-    )
+      build_cmd = util.GcloudCommand(
+          self, 'builds', 'submit', '--tag', full_tag, image.directory,
+      )
     build_cmd.Issue(timeout=None)
 
 
@@ -417,6 +428,10 @@ def _Create(self):
     if self.enable_aam:
       cmd.args.append('--auto-monitoring-scope=ALL')
 
+    # --- PKB Extension: additional cluster create flags ---
+    for additional_flag in gcp_flags.GKE_ADDITIONAL_FLAGS.value:
+      cmd.args.append(additional_flag)
+
     self._RunClusterCreateCommand(cmd)
     self._GetKubeconfig()
     self._CreateCustomComputeClass(self.default_nodepool)
@@ -432,6 +447,10 @@ def _CreateNodePools(self):
           nodepool,
           cmd,
       )
+      # --- PKB Extension: additional node pool create flags ---
+      for additional_flag in gcp_flags.GKE_ADDITIONAL_NODEPOOL_FLAGS.value:
+        cmd.args.append(additional_flag)
+
       self._IssueResourceCreationCommand(cmd)
       self._CreateCustomComputeClass(nodepool)