From f614265ac60258fcfb7343747f43e7f398585c81 Mon Sep 17 00:00:00 2001
From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com>
Date: Tue, 16 Jun 2026 04:02:04 -0400
Subject: [PATCH 1/5] add agentic benchmarking on gke

---
 .gitignore                                    |    2 +
 .../data/k8s_agents/config/gke-benchmark.conf |  171 +++
 .../config/native_provision_config.yaml       |   70 ++
 .../workloads/adk_agent/.dockerignore         |  165 +++
 .../workloads/adk_agent/.gcloudignore         |   25 +
 .../k8s_agents/workloads/adk_agent/Dockerfile |   29 +
 .../workloads/adk_agent/__init__.py           |    1 +
 .../workloads/adk_agent/cloudbuild.yaml       |   13 +
 .../adk_agent/generated.env.template          |   28 +
 .../gke_performance_agent/__init__.py         |    2 +
 .../adk_agent/gke_performance_agent/agent.py  |  240 ++++
 .../k8s_agents/workloads/adk_agent/main.py    | 1097 +++++++++++++++++
 .../workloads/adk_agent/requirements.txt      |   11 +
 .../chromium_test_app/benchmark_density.js    |  177 +++
 .../python_test_app/benchmark_density.py      |  196 +++
 .../python_test_app/benchmark_payload.py      |  203 +++
 .../python_test_app/benchmark_qps.py          |   24 +
 .../workloads/vibe_coding/README.md           |   64 +
 .../workloads/vibe_coding/startup_npm_vite.sh |   84 ++
 .../vibe_coding/startup_pip_fastapi.sh        |   65 +
 .../linux_benchmarks/kubernetes/__init__.py   |   13 +
 .../kubernetes/agentic/__init__.py            |   13 +
 .../kubernetes/agentic/gke_benchmark_utils.py |  489 ++++++++
 .../agentic/gke_chromium_density_benchmark.py |  280 +++++
 .../agentic/gke_deletion_benchmark.py         |  518 ++++++++
 .../kubernetes/agentic/gke_deploy_utils.py    |  891 +++++++++++++
 .../agentic/gke_image_build_utils.py          |  403 ++++++
 .../agentic/gke_payload_benchmark.py          |  613 +++++++++
 .../agentic/gke_prerequisite_setup.py         |  516 ++++++++
 .../kubernetes/agentic/gke_provision_utils.py |  698 +++++++++++
 .../agentic/gke_python_density_benchmark.py   |  362 ++++++
 .../kubernetes/agentic/gke_qps_benchmark.py   |  802 ++++++++++++
 .../agentic/gke_snapshot_benchmark.py         | 1022 +++++++++++++++
 .../agentic/gke_warmpool_benchmark.py         |  487 ++++++++
 perfkitbenchmarker/providers/gcp/flags.py     |   21 +
 .../providers/gcp/google_kubernetes_engine.py |   10 +
 requirements.txt                              |    1 +
 37 files changed, 9806 insertions(+)
 create mode 100644 perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf
 create mode 100644 perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh
 create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py

diff --git a/.gitignore b/.gitignore
index 1e1c6fe077..6f0c9cb603 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,5 @@
 /.idea
 /*git_ignore*
 .DS_Store
+.adk
+tmp/
diff --git a/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf b/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf
new file mode 100644
index 0000000000..99e6411577
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf
@@ -0,0 +1,171 @@
+#!/bin/bash
+#
+# Agentic Workload Benchmarking configuration file for GKE
+# Adapted from nginx DPv2 baseline for Python Sandbox & Chromium Simulation
+#
+# Override machine type and cluster suffix via environment variables:
+#   MACHINE_TYPE=c4d-standard-8 CLUSTER_SUFFIX=c4d bash setup_infrastructure_gke.sh
+#
+# Supported profiles:
+#   MACHINE_TYPE=c3-standard-192-metal  CLUSTER_SUFFIX=c3metal
+#   MACHINE_TYPE=c4-standard-8          CLUSTER_SUFFIX=c4       (default)
+#   MACHINE_TYPE=c4d-standard-8         CLUSTER_SUFFIX=c4d
+#   MACHINE_TYPE=c4a-standard-8         CLUSTER_SUFFIX=c4a      (ARM64)
+
+USER_NAME_PREFIX=${USER%%.*}
+
+# GCP Project (MUST be set before running any script)
+PROJECT_ID="your-project-id"
+REGION="us-central1"
+ZONE="us-central1-a"
+
+# Google/ADK aliases (derived from canonical names above)
+# These are used by envsubst for the K8s manifest and by the ADK agent.
+GOOGLE_CLOUD_PROJECT="${PROJECT_ID}"
+GOOGLE_CLOUD_LOCATION="${REGION}"
+
+# Network Configuration
+VPC_NAME="${USER_NAME_PREFIX}-agentic-vpc"
+SUBNET_NAME="${USER_NAME_PREFIX}-agentic-subnet"
+SUBNET_CIDR="10.134.20.0/24"
+LAPTOP_IP="$(curl -s ifconfig.me)/32"  # PUBLIC IP to access the target (dynamically detected)
+# Cloud Router and NAT Configuration
+ROUTER_NAME="${USER_NAME_PREFIX}-agentic-nat-router"
+NAT_NAME="${USER_NAME_PREFIX}-agentic-nat-config"
+
+# GKE Cluster Configuration
+CLUSTER_SUFFIX="${CLUSTER_SUFFIX:-c4}"
+CLUSTER_NAME="${USER_NAME_PREFIX}-agentic-${CLUSTER_SUFFIX}"
+GKE_VERSION="1.35.3-gke.1389000"
+USE_CONNECT_GATEWAY="${USE_CONNECT_GATEWAY:-true}"  # Use Connect Gateway for kubectl access
+                                                    # Set to "false" to use direct public endpoint
+
+# =========================================================================
+# Machine Type Configuration (overridable via MACHINE_TYPE env var)
+# =========================================================================
+MACHINE_TYPE="${MACHINE_TYPE:-c4-standard-8}"
+
+# Derive disk type from machine family:
+#   C3 → pd-balanced, C4/C4D/C4A → hyperdisk-balanced
+_MACHINE_FAMILY="${MACHINE_TYPE%%-*}"  # e.g. "c4" from "c4-standard-8"
+case "${_MACHINE_FAMILY}" in
+  c3)  _DISK_TYPE="pd-balanced" ;;
+  *)   _DISK_TYPE="hyperdisk-balanced" ;;
+esac
+
+# Derive target architecture from machine family:
+#   C4A → arm64, everything else → amd64
+case "${_MACHINE_FAMILY}" in
+  c4a) _TARGET_ARCH="arm64" ;;
+  *)   _TARGET_ARCH="amd64" ;;
+esac
+
+# Derive unique master CIDR per cluster (each private cluster needs its own /28):
+#   c4 → 172.16.0.0/28, c4d → 172.16.0.16/28, c4a → 172.16.0.32/28, c3metal → 172.16.0.48/28
+case "${CLUSTER_SUFFIX}" in
+  c4)      MASTER_IPV4_CIDR="172.16.0.0/28" ;;
+  c4d)     MASTER_IPV4_CIDR="172.16.0.16/28" ;;
+  c4a)     MASTER_IPV4_CIDR="172.16.0.32/28" ;;
+  c3metal) MASTER_IPV4_CIDR="172.16.0.48/28" ;;
+  *)       MASTER_IPV4_CIDR="172.16.0.64/28" ;;  # fallback for future clusters
+esac
+
+DEFAULT_POOL_MACHINE_TYPE="${MACHINE_TYPE}"
+DEFAULT_POOL_DISK_TYPE="${_DISK_TYPE}"
+DEFAULT_POOL_DISK_SIZE="50"                 # Disk size in GB
+DEFAULT_POOL_NODE_COUNT="1"                 # Number of nodes in the default pool
+
+# =========================================================================
+# Agentic Workload NodePools
+# =========================================================================
+
+# Sandbox NodePool (Python + Chromium workloads with gVisor)
+SANDBOX_NODE_POOL_NAME="agentic-sandbox-pool"
+SANDBOX_MACHINE_TYPE="${MACHINE_TYPE}"          # Same as default pool (overridable)
+SANDBOX_DISK_SIZE="100"
+SANDBOX_DISK_TYPE="${_DISK_TYPE}"               # Derived from machine family
+SANDBOX_NODE_COUNT="1"
+SANDBOX_MAX_PODS_PER_NODE="250"             # Raise from default 110 to avoid GKE pod limit as density ceiling
+SANDBOX_ENABLE_GVISOR="true"                # Enable GKE Sandbox (gVisor) on this pool
+
+AGENT_SANDBOX_VERSION="v0.4.6"
+
+# =========================================================================
+# Workload Configuration
+# =========================================================================
+AGENTIC_NAMESPACE="agentic"
+
+# Python Sandbox Workload
+PYTHON_IMAGE="python:3.11-slim"
+PYTHON_POD_NAME="python-sandbox"
+PYTHON_REPLICAS="1"                         # Start with 1; sweep for density tests
+PYTHON_CPU_REQUEST="1"
+PYTHON_CPU_LIMIT="2"
+PYTHON_MEMORY_REQUEST="1Gi"
+PYTHON_MEMORY_LIMIT="4Gi"
+
+# Chromium Browser Simulation Workload
+CHROMIUM_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/chrome-sandbox:${_TARGET_ARCH}"
+CHROMIUM_POD_NAME="chromium-sandbox"
+CHROMIUM_REPLICAS="1"                       # Start with 1; sweep for density tests
+
+# Mock LLM Coordinator
+MOCK_LLM_IMAGE="python:3.11-slim"
+MOCK_LLM_POD_NAME="mock-llm-coordinator"
+MOCK_LLM_PORT="8080"
+
+# =========================================================================
+# Benchmark Parameters
+# =========================================================================
+
+# Python Density Benchmark (UC-B)
+SAMPLE_COUNT="20"                                 # Samples per sandbox session
+SAMPLE_WARMUP="0"                                 # Warmup samples (excluded from stats)
+
+# Payload Transfer Benchmark (UC-D)
+PAYLOAD_SIZE_MB="1"                                # Default payload size in MB
+PAYLOAD_ITERATIONS="20"                            # Transfer iterations per session
+
+# Chromium Benchmark
+CHROMIUM_TASK_COUNT="10"                          # Number of browser tasks per run
+CHROMIUM_WARMUP_TASKS="2"
+
+# General
+BENCHMARK_DURATION="300"                          # Duration in seconds per test
+NOTE="agentic-V0-gVisor-DPv2-baseline"
+
+# =========================================================================
+# Logging
+# =========================================================================
+# Log directory — defaults to tmp/ inside the repo (gitignored).
+# Override by setting BASE_LOG_DIR before sourcing this file,
+# e.g. export BASE_LOG_DIR="$HOME/agentic-logs" to keep logs outside the repo.
+_REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
+BASE_LOG_DIR="${BASE_LOG_DIR:-${_REPO_ROOT}/tmp/agentic-logs}"
+WRAPPER_LOG_DIR="${BASE_LOG_DIR}/wrapper_logs"
+
+LOG_PATH="logs"
+LOG_LEVEL="info"
+
+# =========================================================================
+# ADK Agent Deployment
+# =========================================================================
+ADK_REPO_NAME="adk-repo"                     # Artifact Registry repository name
+ADK_IMAGE_NAME="adk-agent"                   # Container image name
+GOOGLE_GENAI_USE_VERTEXAI="true"
+ADK_IMAGE_PATH="${REGION}-docker.pkg.dev/${PROJECT_ID}/${ADK_REPO_NAME}/${ADK_IMAGE_NAME}:${_TARGET_ARCH}"
+ADK_K8S_SA="adk-agent-sa"                    # Kubernetes service account for the agent
+CLOUD_BUILD_SA="adk-cloud-build-sa"             # Service account for Cloud Build submissions
+
+# Sandbox Router & Warm Pool
+SANDBOX_ROUTER_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/sandbox-router:${_TARGET_ARCH}"
+WARMPOOL_REPLICAS="2"                         # Number of pre-warmed sandbox pods
+
+# =========================================================================
+# Pod Snapshot Configuration (UC-A: Cold Start & Snapshot Pressure Test)
+# =========================================================================
+ENABLE_POD_SNAPSHOTS="true"                    # Enable pod snapshots feature on cluster
+SNAPSHOTS_BUCKET_NAME="agent-sandbox-snapshots-${PROJECT_ID}"
+SNAPSHOT_KSA_NAME="pod-snapshot-sa"            # KSA for snapshot storage access
+SNAPSHOT_FOLDER="benchmark-snapshots"          # Managed folder inside the bucket
+SNAPSHOT_PRELOAD_MB="10"                       # Default memory preload for snapshot sizing
diff --git a/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml
new file mode 100644
index 0000000000..765c7c4256
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml
@@ -0,0 +1,70 @@
+# Native PKB Provision Config for Agentic Benchmarks
+# Used with --gke_provision_mode=native
+#
+# Prerequisites (run once before PKB):
+#   python tools/agentic-benchmark/scripts/prerequisite_setup.py \
+#       --project_id=<project> --machine_type=<machine>
+#
+# IMPORTANT: Do NOT pass --gce_subnet_name on the command line.
+# PKB incorrectly resolves it as the --network value. Instead, pass the
+# subnet via --gke_additional_flags on the command line.
+#
+# Usage (provision):
+#   python pkb.py --benchmarks=gke_python_density \
+#       --gke_provision_mode=native \
+#       --benchmark_config_file=k8s_agents/config/native_provision_config.yaml \
+#       --gce_network_name=<user>-agentic-vpc \
+#       --gce_subnet_region=us-central1 \
+#       --zone=us-central1-a \
+#       --project=<project> \
+#       --owner=<owner> \
+#       --container_cluster_version=1.35.3-gke.1389000 \
+#       --gke_additional_flags="--subnetwork=<user>-agentic-subnet,--workload-pool=<project>.svc.id.goog"
+#
+# For sweeps (cluster pre-exists, PKB skips provision/teardown):
+#   The sweep bridge injects --run_stage=run,cleanup automatically.
+
+gke_python_density:
+  flags:
+    # Force gcloud beta for preview features (pod snapshots)
+    gke_use_beta: true
+
+    # Cluster-level additional flags (appended to gcloud [beta] container clusters create)
+    # NOTE: --subnetwork and --workload-pool are user/project-specific.
+    # Pass them on the command line via --gke_additional_flags=... (comma-separated).
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+
+    # Node-pool-level additional flags (appended to gcloud container node-pools create)
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+
+    # Standard PKB GKE flags
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore
new file mode 100644
index 0000000000..78cf8c8595
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore
@@ -0,0 +1,165 @@
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+
+### OSX ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+
+### Windows ###
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+
+### Vagrant ###
+.vagrant/
+### Local rules, see .gitignore.tail to override! ###
+shippable
+.git
+
+tmp/
+sessions.db
+.adk/
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore
new file mode 100644
index 0000000000..fb34b7833c
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore
@@ -0,0 +1,25 @@
+# This file tells gcloud builds submit which files to exclude from the upload.
+# Without it, gcloud ignores .dockerignore and uploads everything (including .venv).
+
+.git
+.venv/
+venv/
+ENV/
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+*.egg-info/
+*.egg
+dist/
+build/
+.tox/
+.cache/
+.coverage
+htmlcov/
+*.log
+.env
+.adk/
+sessions.db
+tmp/
+.DS_Store
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile
new file mode 100644
index 0000000000..417ad58946
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile
@@ -0,0 +1,29 @@
+FROM python:3.13-slim
+WORKDIR /app
+
+# Install kubectl (required by k8s-agent-sandbox for port-forwarding to sandbox pods)
+# Uses TARGETARCH (injected by BuildKit) to download the correct binary for amd64 or arm64
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl ca-certificates && \
+    ARCH=$(dpkg --print-architecture) && \
+    curl -LO "https://dl.k8s.io/release/$(curl -sL https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl" && \
+    install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \
+    rm kubectl && \
+    apt-get purge -y curl && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+RUN adduser --disabled-password --gecos "" myuser && \
+    chown -R myuser:myuser /app
+
+COPY . .
+
+USER myuser
+
+ENV PATH="/home/myuser/.local/bin:$PATH"
+
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
+
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py
new file mode 100644
index 0000000000..5271a8ef60
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py
@@ -0,0 +1 @@
+# ADK Agent package
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml
new file mode 100644
index 0000000000..f3f3f4b810
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml
@@ -0,0 +1,13 @@
+steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.']
+
+images:
+  - '${_IMAGE_PATH}'
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+
+substitutions:
+  _IMAGE_PATH: ''
+  _PLATFORM: 'linux/amd64'
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
new file mode 100644
index 0000000000..0828d0a5ff
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
@@ -0,0 +1,28 @@
+# ==========================================================================
+# ADK Agent — Generated Environment File Template
+# ==========================================================================
+# This file is rendered into generated.env by deploy_gke.sh using envsubst.
+# The single source of truth is: tools/agentic-benchmark/config/gke-benchmark.conf
+#
+# For local dev, run deploy_gke.sh to generate generated.env,
+# or manually create generated.env with your values.
+# ==========================================================================
+
+# --- Required: GKE executor config ---
+CLUSTER_NAME="${CLUSTER_NAME}"
+GOOGLE_CLOUD_PROJECT="${GOOGLE_CLOUD_PROJECT}"
+GOOGLE_CLOUD_LOCATION="${GOOGLE_CLOUD_LOCATION}"
+AGENTIC_NAMESPACE="${AGENTIC_NAMESPACE}"
+GOOGLE_GENAI_USE_VERTEXAI="${GOOGLE_GENAI_USE_VERTEXAI}"
+
+# --- Sandbox connection (set in-cluster; leave blank for local dev mode) ---
+# When set, SandboxClient uses DirectConnection (bypasses kubectl port-forward).
+# For local dev, set to "" to use per-pod kubectl port-forward tunnels.
+SANDBOX_ROUTER_URL="http://sandbox-router-svc.${AGENTIC_NAMESPACE}.svc.cluster.local:8080"
+
+# --- Optional: benchmark defaults (overridden by HTTP request params) ---
+SAMPLE_COUNT="${SAMPLE_COUNT}"
+SAMPLE_WARMUP="${SAMPLE_WARMUP}"
+PAYLOAD_SIZE_MB="${PAYLOAD_SIZE_MB}"
+PAYLOAD_ITERATIONS="${PAYLOAD_ITERATIONS}"
+
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py
new file mode 100644
index 0000000000..c6df9a7a2a
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py
@@ -0,0 +1,2 @@
+# GKE Performance Agent package
+from . import agent
\ No newline at end of file
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
new file mode 100644
index 0000000000..46094d244f
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
@@ -0,0 +1,240 @@
+from google.adk.agents import LlmAgent
+from google.adk.code_executors import GkeCodeExecutor
+from google.adk.code_executors.code_execution_utils import CodeExecutionResult
+from google.adk.models.base_llm import BaseLlm
+from google.adk.models.llm_response import LlmResponse
+from google.genai import types
+from dotenv import load_dotenv
+from google.adk.apps import App
+import logging
+import os
+
+# --- Configure Logging ---
+logging.basicConfig(level=logging.INFO)
+
+# =========================================================================
+# 1. Environment and Configuration
+# =========================================================================
+
+basedir = os.path.abspath(os.path.dirname(__file__))
+agent_dir = os.path.join(basedir, "..")
+
+# Load generated.env (auto-generated from gke-benchmark.conf by deploy_gke.sh).
+# In GKE, K8s manifest env vars take precedence.
+load_dotenv(os.path.join(agent_dir, "generated.env"))
+
+# =========================================================================
+# 2. Mock LLM Definition (Inheriting from BaseLlm for Pydantic)
+# =========================================================================
+
+# Load the benchmark scripts
+density_script_path = os.path.join(
+    basedir, "../sandboxed_apps/python_test_app/benchmark_density.py"
+)
+try:
+    with open(density_script_path, "r") as f:
+        density_benchmark_code = f.read()
+except Exception:
+    density_benchmark_code = "import os; print(os.uname())"
+
+payload_script_path = os.path.join(
+    basedir, "../sandboxed_apps/python_test_app/benchmark_payload.py"
+)
+try:
+    with open(payload_script_path, "r") as f:
+        payload_benchmark_code = f.read()
+except Exception:
+    payload_benchmark_code = "import os; print(os.uname())"
+
+qps_script_path = os.path.join(
+    basedir, "../sandboxed_apps/python_test_app/benchmark_qps.py"
+)
+try:
+    with open(qps_script_path, "r") as f:
+        qps_benchmark_code = f.read()
+except Exception:
+    qps_benchmark_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))"
+
+# Keys that main.py sets in os.environ per-request.  We inject them into
+# the script so they reach the sandbox pod.  If unset, the benchmark scripts
+# use their own built-in defaults.
+_DENSITY_ENV_KEYS = ["SAMPLE_COUNT", "SAMPLE_WARMUP"]
+_PAYLOAD_ENV_KEYS = ["PAYLOAD_SIZE_MB", "PAYLOAD_ITERATIONS"]
+_QPS_ENV_KEYS: list[str] = []  # QPS script needs no env config
+
+
+def _build_benchmark_code() -> str:
+    """Build the benchmark script with current env values injected.
+
+    Selects the script based on BENCHMARK_MODE env var:
+      - 'density'  → benchmark_density.py  (Use Case B)
+      - 'payload'  → benchmark_payload.py  (Use Case D)
+      - 'qps'      → benchmark_qps.py      (Use Case F)
+    """
+    mode = os.getenv("BENCHMARK_MODE", "density")
+
+    if mode == "payload":
+        env_keys = _PAYLOAD_ENV_KEYS
+        script = payload_benchmark_code
+    elif mode == "qps":
+        env_keys = _QPS_ENV_KEYS
+        script = qps_benchmark_code
+    else:
+        env_keys = _DENSITY_ENV_KEYS
+        script = density_benchmark_code
+
+    lines = ["import os"]
+    for k in env_keys:
+        v = os.getenv(k)
+        if v is not None:
+            lines.append(f"os.environ['{k}'] = '{v}'")
+    return "\n".join(lines) + "\n\n" + script
+
+
+class MockLlm(BaseLlm):
+    model: str = "mock-model"
+
+    async def generate_content_async(self, llm_request, stream=False):
+        """Mock the ADK response loop.
+
+        BaseLlm.generate_content_async is an AsyncGenerator — it must YIELD
+        LlmResponse objects, never return them.
+        """
+        # ADK appends the code execution result to the conversation
+        # history before calling the LLM again.  If the history has
+        # grown beyond the initial user prompt, code has already
+        # executed — return plain text to stop the loop.
+        has_execution_result = len(llm_request.contents) > 1
+
+        if has_execution_result:
+            part = types.Part(text="Execution Complete")
+        else:
+            # Create an ADK-compliant result with executable code.
+            # Build at request time so SAMPLE_COUNT/SAMPLE_WARMUP reflect
+            # the current os.environ values set by main.py per-request.
+            part = types.Part(
+                executable_code=types.ExecutableCode(
+                    language="PYTHON", code=_build_benchmark_code()
+                )
+            )
+
+        content = types.Content(role="model", parts=[part])
+        response = LlmResponse(content=content, partial=False)
+
+        # Yield exactly one final response (both streaming and non-streaming)
+        yield response
+
+
+# =========================================================================
+# 3. Agent Initialization
+# =========================================================================
+
+
+class V3GkeCodeExecutor(GkeCodeExecutor):
+    def _execute_in_sandbox(self, code: str) -> CodeExecutionResult:
+        """Executes code using the v0.4.6 compatible SandboxClient."""
+        from k8s_agent_sandbox.sandbox_client import SandboxClient
+        from k8s_agent_sandbox.models import SandboxDirectConnectionConfig
+        import logging
+        import time
+        from concurrent.futures import ThreadPoolExecutor
+
+        logging.info("Executing via V3 SandboxClient (v0.4.6 compatible).")
+
+        # Shared thread pool for sandbox operations to allow overlapping
+        # blocking I/O when sessions run on different threads.
+        global _SANDBOX_POOL
+        try:
+            _SANDBOX_POOL
+        except NameError:
+            _SANDBOX_POOL = ThreadPoolExecutor(max_workers=16)
+
+        # Use DirectConnection when SANDBOX_ROUTER_URL is set (in-cluster),
+        # otherwise fall back to kubectl port-forward (dev mode).
+        router_url = os.getenv("SANDBOX_ROUTER_URL")
+        if router_url:
+            client = SandboxClient(
+                connection_config=SandboxDirectConnectionConfig(api_url=router_url)
+            )
+        else:
+            client = SandboxClient()
+        # v0.4.6 create_sandbox uses 'template' and 'namespace' arguments
+        create_ms = upload_ms = run_ms = delete_ms = 0.0
+        sandbox = None
+        # Time sandbox creation
+        t0 = time.time()
+        create_future = _SANDBOX_POOL.submit(
+            client.create_sandbox,
+            template=self.sandbox_template,
+            namespace=self.namespace,
+        )
+        sandbox = create_future.result()
+        create_ms = (time.time() - t0) * 1000.0
+        try:
+            # v0.4.6 handles file I/O via the .files namespace
+            t0 = time.time()
+            upload_future = _SANDBOX_POOL.submit(sandbox.files.write, "script.py", code)
+            upload_future.result()
+            upload_ms = (time.time() - t0) * 1000.0
+
+            # SANDBOX_EXEC_TIMEOUT_S is set per-request by main.py.
+            # Default 60 s keeps density/snapshot runs tight; payload
+            # sweeps raise it for large blobs.
+            run_timeout = int(os.getenv("SANDBOX_EXEC_TIMEOUT_S", "60"))
+
+            t0 = time.time()
+            run_future = _SANDBOX_POOL.submit(
+                sandbox.commands.run, "python3 script.py", timeout=run_timeout
+            )
+            result = run_future.result()
+            run_ms = (time.time() - t0) * 1000.0
+
+            # ADK's build_code_execution_result_part discards stdout when
+            # stderr is non-empty (OUTCOME_FAILED path).  Sandbox scripts
+            # produce benign stderr (C-extension reimport noise, gVisor
+            # warnings) that would cause all sandbox_* metrics to vanish.
+            # Log stderr for debugging, then clear it so ADK passes
+            # stdout through.
+            if result.stderr:
+                logging.warning("Sandbox stderr (ignored): %s", result.stderr[:500])
+
+            logging.info(
+                "SANDBOX_TIMINGS: create_ms=%.3f upload_ms=%.3f run_ms=%.3f",
+                create_ms,
+                upload_ms,
+                run_ms,
+            )
+            return CodeExecutionResult(stdout=result.stdout, stderr="")
+        finally:
+            # Always cleanup the claim
+            t0 = time.time()
+            if sandbox is not None:
+                delete_future = _SANDBOX_POOL.submit(
+                    client.delete_sandbox, sandbox.claim_name, namespace=self.namespace
+                )
+                delete_future.result()
+            delete_ms = (time.time() - t0) * 1000.0
+            logging.info("SANDBOX_TIMINGS_DELETE: delete_ms=%.3f", delete_ms)
+
+
+gke_executor = V3GkeCodeExecutor(
+    cluster_name=os.getenv("CLUSTER_NAME"),
+    location=os.getenv("GOOGLE_CLOUD_LOCATION"),
+    namespace=os.getenv("AGENTIC_NAMESPACE"),
+    executor_type="sandbox",
+    sandbox_template="python-sandbox-template",
+)
+
+gke_performance_agent = LlmAgent(
+    name="gke_performance_agent",  # Must be a valid identifier (no dashes)
+    model=MockLlm(model="mock-model"),
+    code_executor=gke_executor,
+)
+
+root_agent = gke_performance_agent
+
+app = App(
+    name=root_agent.name,
+    root_agent=root_agent,
+    # enable_tracing=True,
+)
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
new file mode 100644
index 0000000000..fa13f11fd7
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
@@ -0,0 +1,1097 @@
+"""FastAPI service fronting the GKE Performance Agent.
+
+Exposes REST endpoints that PKB calls to trigger benchmarks.  The agent
+runs *inside* the GKE cluster so it can reach the Sandbox Controller and
+create gVisor sandboxes natively.
+
+Endpoints:
+  GET  /healthz                      → liveness probe
+  POST /benchmark/python/density     → run the Python density benchmark (UC-B)
+  POST /benchmark/python/payload     → run the payload transfer benchmark (UC-D)
+  POST /benchmark/python/qps         → run the QPS saturation benchmark (UC-F)
+  POST /benchmark/chromium/density   → run the Chromium density benchmark (UC-C)
+  POST /run                          → raw ADK agent interaction
+
+POST /benchmark/python/density — Request:
+  {
+    "sample_count":            int — iterations per sandbox session (default: 100)
+    "sample_warmup":           int — warmup iterations excluded from stats (default: 5)
+    "concurrent_sessions":     int — parallel sandbox sessions (default: 1)
+    "sandbox_exec_timeout_s":  int — sandbox command execution timeout in seconds (default: 60)
+  }
+
+POST /benchmark/python/density — Response:
+  {
+    "concurrent_sessions":  int — requested session count
+    "successful_sessions":  int — sessions completed without error
+    "failed_sessions":      int — sessions that returned an error
+    "aggregate": {
+      --- Orchestrator-side (timed in _run_single_session, stats in benchmark_density) ---
+      "orchestrator_cel_mean_ms":  mean round-trip across sessions
+      "orchestrator_cel_p50_ms":   P50 round-trip
+      "orchestrator_cel_p99_ms":   P99 round-trip
+      "orchestrator_cel_min_ms":   min round-trip
+      "orchestrator_cel_max_ms":   max round-trip
+
+      --- Sandbox-side overall (from benchmark_density.py, mean across sessions) ---
+      "sandbox_ttfe_ms":               Time To First Execution
+      "sandbox_total_cel_mean_ms":     mean total CEL per iteration (sum of all task types)
+      "sandbox_total_cel_p50_ms":      P50 total CEL per iteration
+      "sandbox_total_cel_p99_ms":      P99 total CEL per iteration
+      "sandbox_total_cel_min_ms":      min total CEL per iteration
+      "sandbox_total_cel_max_ms":      max total CEL per iteration
+
+      --- Sandbox RSS (from benchmark_density.py, mean across sessions) ---
+      "sandbox_rss_start_mb":      RSS at benchmark start
+      "sandbox_rss_end_mb":        RSS at benchmark end
+      "sandbox_rss_growth_mb":     RSS growth during benchmark
+
+      --- Per-type CEL breakdown (from benchmark_density.py, mean across sessions) ---
+      "sandbox_compute_cel_{mean,p50,p99,min,max}_ms":  CPU-bound (math.factorial)
+      "sandbox_syscall_cel_{mean,p50,p99,min,max}_ms":  gVisor Sentry (os.stat/listdir)
+      "sandbox_import_cel_{mean,p50,p99,min,max}_ms":   Gofer FS I/O (importlib)
+    }
+    "sessions": [             per-session detail array
+      {
+        "session_id":           int — zero-based session index
+        "orchestrator_total_ms": float — full round-trip for this session
+        "raw_output":           str — raw code execution stdout
+        "sandbox_ttfe_ms":      float — TTFE for this session
+        "sandbox_total_cel_mean_ms":  float — total CEL mean for this session
+        ...                     all other sandbox_* metrics for this session
+      }
+    ]
+  }
+
+Data Flow:
+  benchmark_density.py (inside gVisor)  → all sandbox_* metrics per session
+  main.py (this file)                  → orchestrator_* timing + cross-session aggregation
+"""
+
+import json
+import logging
+import os
+import re
+import time
+import asyncio
+from typing import Optional
+from concurrent.futures import ThreadPoolExecutor
+
+import uvicorn
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from google.genai import types
+from google.adk.sessions import InMemorySessionService
+from google.adk.artifacts import InMemoryArtifactService
+from google.adk.runners import Runner
+
+from dotenv import load_dotenv
+
+basedir = os.path.abspath(os.path.dirname(__file__))
+
+# Load generated.env (auto-generated from gke-benchmark.conf by build_images_gke.sh).
+# In GKE, K8s manifest env vars take precedence.
+load_dotenv(os.path.join(basedir, "generated.env"))
+
+from gke_performance_agent import agent
+
+
+# ── SandboxClient factory (DirectConnection vs Dev-mode tunnel) ──────────
+def _make_sandbox_client():
+    """Create a SandboxClient with the optimal connection strategy.
+
+    When SANDBOX_ROUTER_URL is set (in-cluster), uses DirectConnectionConfig
+    to bypass kubectl port-forward SPDY tunnels — enabling true N-way
+    parallelism.  Without it, falls back to LocalTunnelConnectionConfig
+    (dev mode, serialized through a single SPDY stream).
+    """
+    from k8s_agent_sandbox.sandbox_client import SandboxClient
+
+    router_url = os.getenv("SANDBOX_ROUTER_URL")
+    if router_url:
+        from k8s_agent_sandbox.models import SandboxDirectConnectionConfig
+
+        return SandboxClient(
+            connection_config=SandboxDirectConnectionConfig(api_url=router_url)
+        )
+    return SandboxClient()
+
+
+# --- Constants ---
+APP_NAME = "gke_performance_agent_app"
+USER_ID = "benchmark_user"
+
+# --- Configure Logging ---
+try:
+    import google.cloud.logging as gcl
+
+    gcl.Client().setup_logging()
+except Exception:
+    logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+# =========================================================================
+# FastAPI Application
+# =========================================================================
+# --- Adaptive ThreadPool based on Agent CPU ---
+def _compute_thread_count() -> int:
+    """Compute a recommended max worker count for ThreadPoolExecutor.
+
+    Heuristic: use ~2x the detected CPU count to provide overlap for blocking
+    I/O (port-forward, file upload) while avoiding CPU oversubscription.
+    Cap between 2 and 64 workers.
+    """
+    cpu = os.cpu_count() or 1
+    return max(2, min(64, cpu * 2))
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan handler: configure a tuned ThreadPoolExecutor for asyncio.
+
+    Sets the default executor so `asyncio.to_thread` uses our tuned pool,
+    and shuts it down on application exit.
+    """
+    workers = _compute_thread_count()
+    executor = ThreadPoolExecutor(max_workers=workers)
+    loop = asyncio.get_running_loop()
+    loop.set_default_executor(executor)
+    logging.info(
+        "Default ThreadPoolExecutor set to %d workers (cpu=%s)", workers, os.cpu_count()
+    )
+    try:
+        yield
+    finally:
+        try:
+            executor.shutdown(wait=False)
+            logging.info("ThreadPoolExecutor shut down")
+        except Exception:
+            logging.exception("Error shutting down ThreadPoolExecutor")
+
+
+app = FastAPI(title="GKE Benchmark Agent", version="0.2.0", lifespan=lifespan)
+
+# Serialise benchmark requests so concurrent POSTs cannot clobber the
+# shared env vars (BENCHMARK_MODE, SAMPLE_COUNT, …) that agent.py reads.
+_benchmark_lock = asyncio.Lock()
+
+
+def _percentile_stats(sorted_values: list, prefix: str) -> dict:
+    """Compute mean/p50/p95/p99/min/max from a pre-sorted list of numbers."""
+    n = len(sorted_values)
+    if n == 0:
+        return {}
+    return {
+        f"{prefix}_mean_ms": round(sum(sorted_values) / n, 6),
+        f"{prefix}_p50_ms": round(sorted_values[n // 2], 6),
+        f"{prefix}_p95_ms": round(sorted_values[min(int(n * 0.95), n - 1)], 6),
+        f"{prefix}_p99_ms": round(sorted_values[min(int(n * 0.99), n - 1)], 6),
+        f"{prefix}_min_ms": round(sorted_values[0], 6),
+        f"{prefix}_max_ms": round(sorted_values[-1], 6),
+    }
+
+
+# --- Request / Response Models ---
+class BenchmarkRequest(BaseModel):
+    sample_count: int = Field(
+        default=100, ge=1, description="Sample count per sandbox session"
+    )
+    sample_warmup: int = Field(
+        default=5, ge=0, description="Warmup iterations per sandbox session"
+    )
+    concurrent_sessions: int = Field(
+        default=1, ge=1, description="Number of parallel sandbox sessions"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=60, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+class RunRequest(BaseModel):
+    prompt: str = "Please start the GKE performance benchmark workflow."
+
+
+class PayloadBenchmarkRequest(BaseModel):
+    payload_size_mb: float = Field(default=1, gt=0, description="Payload size in MB")
+    payload_iterations: int = Field(
+        default=20, ge=1, description="Number of transfer iterations"
+    )
+    concurrent_sessions: int = Field(
+        default=1, ge=1, description="Number of parallel sandbox sessions"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=60, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+class QpsBenchmarkRequest(BaseModel):
+    target_qps: float = Field(
+        default=10.0, ge=0.1, description="Target requests per second"
+    )
+    duration_s: float = Field(
+        default=60.0, ge=5.0, description="Duration of the QPS burst in seconds"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=30, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+class ChromiumBenchmarkRequest(BaseModel):
+    task_count: int = Field(
+        default=10, ge=1, description="Iterations per Chromium session"
+    )
+    warmup_tasks: int = Field(
+        default=2, ge=0, description="Warmup iterations excluded from stats"
+    )
+    concurrent_sessions: int = Field(
+        default=1, ge=1, description="Number of parallel Chromium sessions"
+    )
+    sandbox_exec_timeout_s: int = Field(
+        default=120, ge=10, description="Sandbox command execution timeout in seconds"
+    )
+
+
+# --- JSON extraction helper ---
+_JSON_RE = re.compile(r"\{[^{}]*\}", re.DOTALL)
+
+
+def _parse_sandbox_json(raw_output: str) -> Optional[dict]:
+    """Extract the sandbox JSON summary from code execution output.
+
+    The sandbox script prints a JSON blob to stdout among other log lines.
+    We find the last valid JSON object that contains sandbox_ keys.
+    """
+    matches = _JSON_RE.findall(raw_output)
+    for candidate in reversed(matches):
+        try:
+            obj = json.loads(candidate)
+            if any(k.startswith("sandbox_") for k in obj):
+                return obj
+        except json.JSONDecodeError:
+            continue
+    return None
+
+
+# --- Agent helper ---
+async def _run_agent(prompt: str) -> str:
+    """Create a fresh session, run the agent, return the final text output."""
+    session_service = InMemorySessionService()
+    artifact_service = InMemoryArtifactService()
+    session = await session_service.create_session(
+        app_name=APP_NAME,
+        user_id=USER_ID,
+        state={},
+    )
+
+    runner = Runner(
+        agent=agent.root_agent,
+        app_name=APP_NAME,
+        session_service=session_service,
+        artifact_service=artifact_service,
+    )
+
+    content = types.Content(
+        role="user",
+        parts=[types.Part(text=prompt)],
+    )
+
+    final_response = ""
+    code_execution_output = ""
+    async with runner:
+        async for event in runner.run_async(
+            user_id=USER_ID,
+            session_id=session.id,
+            new_message=content,
+        ):
+            if event.content and event.content.parts:
+                for part in event.content.parts:
+                    cer = getattr(part, "code_execution_result", None) or getattr(
+                        part, "codeExecutionResult", None
+                    )
+                    if cer:
+                        code_execution_output = getattr(cer, "output", "") or ""
+            if event.is_final_response() and event.content and event.content.parts:
+                final_response = event.content.parts[0].text
+
+    await session_service.delete_session(
+        app_name=APP_NAME,
+        user_id=USER_ID,
+        session_id=session.id,
+    )
+    return code_execution_output if code_execution_output else final_response
+
+
+async def _run_single_session(session_id: int, prompt: str) -> dict:
+    """Run one agent session and return orchestrator + sandbox metrics."""
+    orchestrator_start = time.perf_counter()
+    logging.info("SESSION_START: session_id=%d start_ts=%.3f", session_id, time.time())
+
+    try:
+        raw_output = await _run_agent(prompt)
+    except Exception as e:
+        return {
+            "session_id": session_id,
+            "error": str(e),
+        }
+
+    orchestrator_elapsed_ms = round(
+        (time.perf_counter() - orchestrator_start) * 1000, 6
+    )
+    logging.info(
+        "SESSION_END: session_id=%d elapsed_ms=%.3f",
+        session_id,
+        orchestrator_elapsed_ms,
+    )
+
+    # Parse sandbox-side metrics from the code execution output
+    sandbox_metrics = _parse_sandbox_json(raw_output) or {}
+
+    return {
+        "session_id": session_id,
+        "orchestrator_total_ms": orchestrator_elapsed_ms,
+        "raw_output": raw_output,
+        **sandbox_metrics,
+    }
+
+
+# --- Endpoints ---
+@app.get("/healthz")
+async def healthz():
+    return {"status": "ok"}
+
+
+@app.post("/benchmark/python/density")
+async def benchmark_python_density(req: BenchmarkRequest):
+    """Trigger the Python density benchmark (Use Case B).
+
+    Fires `concurrent_sessions` parallel agent sessions.  Each session
+    claims its own sandbox, runs the benchmark script with the given
+    iteration/warmup counts, and returns both orchestrator-side and
+    sandbox-side metrics.
+    """
+    async with _benchmark_lock:
+        os.environ["BENCHMARK_MODE"] = "density"
+        os.environ["SAMPLE_COUNT"] = str(req.sample_count)
+        os.environ["SAMPLE_WARMUP"] = str(req.sample_warmup)
+        os.environ["SANDBOX_EXEC_TIMEOUT_S"] = str(req.sandbox_exec_timeout_s)
+
+        logger.info(
+            "Starting Python benchmark: sample_count=%d sample_warmup=%d concurrent_sessions=%d",
+            req.sample_count,
+            req.sample_warmup,
+            req.concurrent_sessions,
+        )
+
+        prompt = "Please start the GKE performance benchmark workflow."
+
+        # Fire concurrent sessions. Run each session in its own thread so
+        # blocking ADK/Runner activity cannot serialize session start.
+        thread_tasks = [
+            asyncio.create_task(
+                asyncio.to_thread(
+                    lambda sid=i: asyncio.run(_run_single_session(sid, prompt))
+                )
+            )
+            for i in range(req.concurrent_sessions)
+        ]
+        session_results = await asyncio.gather(*thread_tasks)
+
+    # Separate successful vs failed sessions
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Aggregate orchestrator-side metrics across all successful sessions
+    aggregate = {}
+    if successful:
+        orch_times = sorted(r["orchestrator_total_ms"] for r in successful)
+        aggregate.update(_percentile_stats(orch_times, "orchestrator_cel"))
+
+        # Aggregate sandbox-side metrics across sessions
+        sandbox_keys = [k for k in successful[0] if k.startswith("sandbox_")]
+        for key in sandbox_keys:
+            sample_val = successful[0].get(key)
+            if isinstance(sample_val, list):
+                # Pool raw latency arrays across sandboxes → true cross-sandbox stats
+                pooled = sorted(
+                    v
+                    for r in successful
+                    for v in (r.get(key) or [])
+                    if isinstance(r.get(key), list)
+                )
+                if pooled:
+                    base = key[:-3] if key.endswith("_ms") else key
+                    aggregate.update(_percentile_stats(pooled, base))
+            elif isinstance(sample_val, (int, float)):
+                vals = [
+                    r[key]
+                    for r in successful
+                    if key in r and isinstance(r[key], (int, float))
+                ]
+                if vals:
+                    if key.endswith("_cel_ms"):
+                        # Latency scalars (e.g. import_cel_ms): compute
+                        # cross-sandbox percentile stats, like array metrics.
+                        base = key[:-3]
+                        aggregate.update(_percentile_stats(sorted(vals), base))
+                    else:
+                        # Non-latency scalars (e.g. rss_mb, ttfe_ms): average
+                        aggregate[key] = round(sum(vals) / len(vals), 6)
+
+    return {
+        "concurrent_sessions": req.concurrent_sessions,
+        "successful_sessions": len(successful),
+        "failed_sessions": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/benchmark/python/payload")
+async def benchmark_python_payload(req: PayloadBenchmarkRequest):
+    """Trigger the payload transfer benchmark (Use Case D).
+
+    Measures the cost of returning large observation payloads from a
+    gVisor sandbox back to the orchestrator.  Each session generates a
+    payload of `payload_size_mb` MB, encodes it (base64), writes it
+    through the gVisor Gofer path, and reports latency breakdowns.
+    """
+    async with _benchmark_lock:
+        os.environ["BENCHMARK_MODE"] = "payload"
+        os.environ["PAYLOAD_SIZE_MB"] = str(req.payload_size_mb)
+        os.environ["PAYLOAD_ITERATIONS"] = str(req.payload_iterations)
+        os.environ["SANDBOX_EXEC_TIMEOUT_S"] = str(req.sandbox_exec_timeout_s)
+
+        logger.info(
+            "Starting Payload benchmark: payload_size_mb=%s iterations=%d concurrent_sessions=%d",
+            req.payload_size_mb,
+            req.payload_iterations,
+            req.concurrent_sessions,
+        )
+
+        prompt = "Please start the GKE performance benchmark workflow."
+
+        # Fire concurrent sessions. Run each session in its own thread so
+        # blocking ADK/Runner activity cannot serialize session start.
+        thread_tasks = [
+            asyncio.create_task(
+                asyncio.to_thread(
+                    lambda sid=i: asyncio.run(_run_single_session(sid, prompt))
+                )
+            )
+            for i in range(req.concurrent_sessions)
+        ]
+        session_results = await asyncio.gather(*thread_tasks)
+
+    # Separate successful vs failed sessions
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Aggregate orchestrator-side metrics across all successful sessions
+    aggregate = {}
+    if successful:
+        orch_times = sorted(r["orchestrator_total_ms"] for r in successful)
+        aggregate.update(_percentile_stats(orch_times, "orchestrator_transfer"))
+
+        # Aggregate sandbox-side metrics (mean across sessions, numeric only)
+        sandbox_keys = [k for k in successful[0] if k.startswith("sandbox_")]
+        for key in sandbox_keys:
+            vals = [
+                r[key]
+                for r in successful
+                if key in r and isinstance(r[key], (int, float))
+            ]
+            if vals:
+                aggregate[key] = round(sum(vals) / len(vals), 6)
+
+    return {
+        "payload_size_mb": req.payload_size_mb,
+        "payload_iterations": req.payload_iterations,
+        "concurrent_sessions": req.concurrent_sessions,
+        "successful_sessions": len(successful),
+        "failed_sessions": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/benchmark/python/qps")
+async def benchmark_python_qps(req: QpsBenchmarkRequest):
+    """Trigger the QPS saturation benchmark (Use Case F).
+
+    Fires sandbox claim requests at a controlled rate (target_qps) for
+    duration_s seconds.  Each request claims a sandbox from the warm pool,
+    runs a trivial script, and releases it.  Returns per-request TTFE
+    (claim + upload + execute + delete) and aggregate latency stats.
+
+    Uses a lightweight path that calls SandboxClient directly — bypasses
+    the full ADK Runner/MockLLM pipeline to avoid per-request overhead
+    and accurately measure sandbox lifecycle latency at high QPS.
+
+    When the warm pool drains faster than it refills, TTFE spikes from
+    ~200ms to seconds — identifying the QPS saturation point.
+    """
+
+    # Load the QPS script once
+    qps_script_path = os.path.join(
+        basedir, "sandboxed_apps/python_test_app/benchmark_qps.py"
+    )
+    try:
+        with open(qps_script_path, "r") as f:
+            qps_code = f.read()
+    except Exception:
+        qps_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))"
+
+    sandbox_template = os.getenv("SANDBOX_TEMPLATE", "python-sandbox-template")
+    sandbox_namespace = os.getenv("SANDBOX_NAMESPACE", "agentic")
+    exec_timeout = req.sandbox_exec_timeout_s
+    qps_claim_label = {"created-by": "pkb-qps-benchmark"}
+
+    def _run_qps_request(request_id: int) -> dict:
+        """Lightweight sandbox claim→execute→release cycle."""
+        t_total = time.perf_counter()
+        client = _make_sandbox_client()
+        sandbox = None
+        try:
+            # Claim
+            t0 = time.perf_counter()
+            sandbox = client.create_sandbox(
+                template=sandbox_template,
+                namespace=sandbox_namespace,
+                labels=qps_claim_label,
+            )
+            claim_ms = (time.perf_counter() - t0) * 1000
+
+            # Upload
+            t0 = time.perf_counter()
+            sandbox.files.write("script.py", qps_code)
+            upload_ms = (time.perf_counter() - t0) * 1000
+
+            # Execute
+            t0 = time.perf_counter()
+            result = sandbox.commands.run("python3 script.py", timeout=exec_timeout)
+            exec_ms = (time.perf_counter() - t0) * 1000
+
+            ttfe_ms = (time.perf_counter() - t_total) * 1000
+
+            return {
+                "request_id": request_id,
+                "ttfe_ms": round(ttfe_ms, 3),
+                "claim_ms": round(claim_ms, 3),
+                "upload_ms": round(upload_ms, 3),
+                "exec_ms": round(exec_ms, 3),
+            }
+        except Exception as e:
+            ttfe_ms = (time.perf_counter() - t_total) * 1000
+            return {
+                "request_id": request_id,
+                "ttfe_ms": round(ttfe_ms, 3),
+                "error": f"{type(e).__name__}: {e}",
+            }
+        finally:
+            if sandbox is not None:
+                try:
+                    client.delete_sandbox(
+                        sandbox.claim_name, namespace=sandbox_namespace
+                    )
+                except Exception:
+                    pass
+
+    async with _benchmark_lock:
+        logger.info(
+            "Starting QPS benchmark: target_qps=%.1f duration_s=%.1f",
+            req.target_qps,
+            req.duration_s,
+        )
+
+        interval = 1.0 / req.target_qps
+
+        # Use a scoped executor sized to the expected concurrency.
+        # Each sandbox request takes ~0.5-5s depending on environment
+        # (in-cluster vs port-forward).  We need enough workers so the
+        # thread pool itself is never the bottleneck — only real sandbox
+        # contention should limit throughput.
+        peak_concurrency = int(req.target_qps * req.duration_s)
+        qps_workers = max(16, min(512, peak_concurrency))
+        qps_executor = ThreadPoolExecutor(max_workers=qps_workers)
+        loop = asyncio.get_running_loop()
+        logger.info(
+            "QPS executor: %d workers for ~%d expected requests",
+            qps_workers,
+            peak_concurrency,
+        )
+
+        # Schedule requests at the target QPS rate
+        tasks: list[asyncio.Task] = []
+        t_start = time.time()
+        next_fire = t_start
+        request_id = 0
+
+        while True:
+            now = time.time()
+            elapsed = now - t_start
+            if elapsed >= req.duration_s:
+                break
+            if now >= next_fire:
+                rid = request_id
+                request_id += 1
+                fut = loop.run_in_executor(qps_executor, _run_qps_request, rid)
+                tasks.append(fut)
+                next_fire += interval
+            else:
+                await asyncio.sleep(min(0.001, next_fire - now))
+
+        # Wait for in-flight requests with a drain timeout.
+        drain_timeout = max(60.0, req.duration_s)
+        done, pending = await asyncio.wait(tasks, timeout=drain_timeout)
+
+        # Clean up the scoped executor
+        qps_executor.shutdown(wait=False)
+
+        # Collect completed results (guard against individual task exceptions)
+        session_results = []
+        for t in done:
+            try:
+                session_results.append(t.result())
+            except Exception as exc:
+                session_results.append(
+                    {
+                        "request_id": -1,
+                        "error": str(exc),
+                    }
+                )
+
+        # Cancel tasks still queued/running and mark as timed out
+        for t in pending:
+            t.cancel()
+        if pending:
+            logger.warning(
+                "QPS drain timeout: %d/%d requests still pending after %.0fs",
+                len(pending),
+                len(tasks),
+                drain_timeout,
+            )
+            for t in pending:
+                session_results.append(
+                    {
+                        "request_id": -1,
+                        "error": "drain_timeout",
+                    }
+                )
+
+        # Bulk-delete SandboxClaims left by cancelled tasks.
+        # Only targets claims labelled created-by=pkb-qps-benchmark so
+        # we never touch claims created by other workloads.
+        try:
+            import subprocess as _sp
+
+            _claims = _sp.run(
+                [
+                    "kubectl",
+                    "get",
+                    "sandboxclaim",
+                    "-n",
+                    sandbox_namespace,
+                    "-l",
+                    "created-by=pkb-qps-benchmark",
+                    "-o",
+                    "jsonpath={.items[*].metadata.name}",
+                ],
+                capture_output=True,
+                text=True,
+            )
+            claim_names = _claims.stdout.strip().split()
+            if claim_names and claim_names != [""]:
+                logger.info("Cleaning up %d lingering pkb-qps claims", len(claim_names))
+                _sp.run(
+                    [
+                        "kubectl",
+                        "delete",
+                        "sandboxclaim",
+                        "-l",
+                        "created-by=pkb-qps-benchmark",
+                        "-n",
+                        sandbox_namespace,
+                        "--wait=false",
+                    ],
+                    capture_output=True,
+                    text=True,
+                )
+        except Exception:
+            logger.warning("Failed to clean up lingering claims", exc_info=True)
+
+    wall_time = time.time() - t_start
+
+    # Separate successful vs failed
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Compute TTFE stats
+    aggregate = {}
+    if successful:
+        ttfe_values = sorted(r["ttfe_ms"] for r in successful)
+        if ttfe_values:
+            aggregate.update(_percentile_stats(ttfe_values, "ttfe"))
+
+        # Also compute claim latency stats (the warm-pool-sensitive metric)
+        claim_values = sorted(r["claim_ms"] for r in successful if "claim_ms" in r)
+        if claim_values:
+            aggregate.update(_percentile_stats(claim_values, "claim"))
+
+    return {
+        "target_qps": req.target_qps,
+        "actual_qps": round(request_id / wall_time, 2) if wall_time > 0 else 0,
+        "duration_s": round(wall_time, 2),
+        "total_requests": request_id,
+        "successful_requests": len(successful),
+        "failed_requests": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/benchmark/chromium/density")
+async def benchmark_chromium_density(req: ChromiumBenchmarkRequest):
+    """Trigger the Chromium density benchmark (Use Case C).
+
+    Fires `concurrent_sessions` parallel Chromium sandbox sessions.  Each
+    session claims its own sandbox from the chromium warm pool, connects to
+    the sandbox's Chrome instance via CDP (Chrome DevTools Protocol), and
+    drives the benchmark from the orchestrator using Playwright.
+
+    Architecture:
+      - Sandbox: runs headless Chromium (upstream chrome-sandbox image) with
+        --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0
+      - Orchestrator: connects Playwright via connect_over_cdp() to the
+        sandbox pod IP:9222 and drives navigate/click/evaluate/screenshot.
+      - This isolates pure Chrome-under-gVisor overhead without Node.js or
+        a runtime server in the sandbox.
+    """
+    from playwright.async_api import async_playwright
+    from kubernetes import client as k8s_client, config as k8s_config
+
+    async with _benchmark_lock:
+
+        sandbox_namespace = os.getenv("AGENTIC_NAMESPACE", "agentic")
+        sandbox_template = "chromium-sandbox-template"
+
+        logger.info(
+            "Starting Chromium density benchmark (CDP): concurrent_sessions=%d "
+            "task_count=%d warmup_tasks=%d",
+            req.concurrent_sessions,
+            req.task_count,
+            req.warmup_tasks,
+        )
+
+        # Initialize K8s client for pod IP lookup
+        try:
+            k8s_config.load_incluster_config()
+        except k8s_config.ConfigException:
+            k8s_config.load_kube_config()
+        core_v1 = k8s_client.CoreV1Api()
+
+        # Inline HTML test page (same as benchmark_density.js used)
+        test_page = """data:text/html,
+<!DOCTYPE html>
+<html>
+<head><title>PKB Chromium Benchmark</title></head>
+<body>
+  <h1 id="heading">Hello Sandbox</h1>
+  <input id="search" type="text" placeholder="Search..." />
+  <button id="btn">Click Me</button>
+  <div id="output"></div>
+  <script>
+    document.getElementById('btn').addEventListener('click', () => {
+      document.getElementById('output').textContent = 'clicked';
+    });
+  </script>
+</body>
+</html>"""
+
+        # Limit concurrent K8s Metrics API calls to avoid overwhelming metrics-server
+        _metrics_semaphore = asyncio.Semaphore(5)
+
+        async def _run_chromium_session_cdp(session_id: int) -> dict:
+            """Run one Chromium benchmark session via CDP."""
+            sb_client = _make_sandbox_client()
+            sandbox = None
+            t_start = time.time()
+            claim_ms = 0.0
+            cold_start_ms = 0.0
+            try:
+                # 1. Claim sandbox from warm pool
+                t0 = time.time()
+                sandbox = sb_client.create_sandbox(
+                    template=sandbox_template,
+                    namespace=sandbox_namespace,
+                )
+                claim_ms = (time.time() - t0) * 1000.0
+
+                # 2. Resolve pod IP
+                pod_name = sandbox.get_pod_name()
+                pod = core_v1.read_namespaced_pod(pod_name, sandbox_namespace)
+                pod_ip = pod.status.pod_ip
+                if not pod_ip:
+                    raise RuntimeError(f"Pod {pod_name} has no IP assigned")
+
+                cdp_url = f"http://{pod_ip}:9223"
+
+                # 3. Connect Playwright via CDP
+                async with async_playwright() as pw:
+                    # Wait for Chrome to be ready (retry connection)
+                    browser = None
+                    for attempt in range(20):
+                        try:
+                            browser = await pw.chromium.connect_over_cdp(cdp_url)
+                            break
+                        except Exception:
+                            if attempt >= 19:
+                                raise
+                            await asyncio.sleep(0.5)
+
+                    # Cold start = claim + CDP connect (time until browser ready)
+                    cold_start_ms = (time.time() - t_start) * 1000.0
+
+                    context = await browser.new_context()
+                    page = await context.new_page()
+
+                    # Navigate once before measurement loop
+                    await page.goto(test_page, wait_until="domcontentloaded")
+
+                    # Latency arrays (filled during measured runs only)
+                    navigate_ms = []
+                    screenshot_ms = []
+                    evaluate_ms = []
+                    click_ms = []
+                    fill_ms = []
+                    interaction_ms = []
+
+                    total_runs = req.warmup_tasks + req.task_count
+                    for run_idx in range(total_runs):
+                        measuring = run_idx >= req.warmup_tasks
+
+                        # 1. Navigate (reload page)
+                        t0 = time.time()
+                        await page.goto(test_page, wait_until="domcontentloaded")
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            navigate_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 2. DOM evaluate — read heading text
+                        t0 = time.time()
+                        await page.evaluate(
+                            "() => document.getElementById('heading').textContent"
+                        )
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            evaluate_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 3. Fill input
+                        t0 = time.time()
+                        await page.fill("#search", f"query-{run_idx}")
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            fill_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 4. Click button
+                        t0 = time.time()
+                        await page.click("#btn")
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            click_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 5. Verify click effect (DOM mutation)
+                        t0 = time.time()
+                        await page.evaluate(
+                            "() => document.getElementById('output').textContent"
+                        )
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            evaluate_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                        # 6. Screenshot
+                        t0 = time.time()
+                        await page.screenshot()
+                        elapsed = (time.time() - t0) * 1000.0
+                        if measuring:
+                            screenshot_ms.append(elapsed)
+                            interaction_ms.append(elapsed)
+
+                    # Read pod memory usage from K8s Metrics API
+                    rss_mb = None
+                    try:
+                        async with _metrics_semaphore:
+                            custom_api = k8s_client.CustomObjectsApi()
+                            pod_metrics = await asyncio.to_thread(
+                                custom_api.get_namespaced_custom_object,
+                                group="metrics.k8s.io",
+                                version="v1beta1",
+                                namespace=sandbox_namespace,
+                                plural="pods",
+                                name=pod_name,
+                            )
+                        for c in pod_metrics.get("containers", []):
+                            usage = c.get("usage", {}).get("memory", "")
+                            if usage.endswith("Ki"):
+                                rss_mb = round(int(usage[:-2]) / 1024, 1)
+                            elif usage.endswith("Mi"):
+                                rss_mb = round(float(usage[:-2]), 1)
+                            elif usage.endswith("Gi"):
+                                rss_mb = round(float(usage[:-2]) * 1024, 1)
+                            break
+                    except Exception:
+                        logger.warning(
+                            "Failed to read pod metrics for %s",
+                            pod_name,
+                            exc_info=True,
+                        )
+
+                    await browser.close()
+
+                total_ms = (time.time() - t_start) * 1000.0
+
+                # Compute stats helper
+                def _compute_stats(arr):
+                    if not arr:
+                        return None
+                    s = sorted(arr)
+                    n = len(s)
+                    return {
+                        "mean_ms": round(sum(s) / n, 3),
+                        "p50_ms": round(s[min(int(n * 0.50), n - 1)], 3),
+                        "p95_ms": round(s[min(int(n * 0.95), n - 1)], 3),
+                        "p99_ms": round(s[min(int(n * 0.99), n - 1)], 3),
+                        "min_ms": round(s[0], 3),
+                        "max_ms": round(s[-1], 3),
+                    }
+
+                return {
+                    "session_id": session_id,
+                    "sandbox_status": "ok",
+                    "orchestrator_total_ms": round(total_ms, 3),
+                    "claim_ms": round(claim_ms, 3),
+                    "cold_start_ms": round(cold_start_ms, 3),
+                    "rss_mb": rss_mb,
+                    "navigate": _compute_stats(navigate_ms),
+                    "evaluate": _compute_stats(evaluate_ms),
+                    "fill": _compute_stats(fill_ms),
+                    "click": _compute_stats(click_ms),
+                    "screenshot": _compute_stats(screenshot_ms),
+                    "interaction": _compute_stats(interaction_ms),
+                }
+
+            except Exception as e:
+                total_ms = (time.time() - t_start) * 1000.0
+                logger.exception("Chromium CDP session %d failed", session_id)
+                return {
+                    "session_id": session_id,
+                    "orchestrator_total_ms": round(total_ms, 3),
+                    "claim_ms": round(claim_ms, 3),
+                    "error": f"{type(e).__name__}: {e}",
+                }
+            finally:
+                if sandbox is not None:
+                    try:
+                        sb_client.delete_sandbox(
+                            sandbox.claim_name, namespace=sandbox_namespace
+                        )
+                    except Exception:
+                        logger.warning(
+                            "Failed to delete sandbox for session %d",
+                            session_id,
+                            exc_info=True,
+                        )
+
+        # Fire concurrent sessions
+        tasks = [_run_chromium_session_cdp(i) for i in range(req.concurrent_sessions)]
+        session_results = await asyncio.gather(*tasks)
+
+    # Separate successful vs failed
+    successful = [r for r in session_results if "error" not in r]
+    failed = [r for r in session_results if "error" in r]
+
+    # Aggregate metrics
+    aggregate = {}
+    if successful:
+        orch_times = sorted(r["orchestrator_total_ms"] for r in successful)
+        aggregate.update(_percentile_stats(orch_times, "orchestrator_total"))
+
+        claim_times = sorted(r["claim_ms"] for r in successful if "claim_ms" in r)
+        if claim_times:
+            aggregate.update(_percentile_stats(claim_times, "claim"))
+
+        # Aggregate cold start and RSS
+        cold_starts = sorted(
+            r["cold_start_ms"] for r in successful if "cold_start_ms" in r
+        )
+        if cold_starts:
+            aggregate["cold_start_mean_ms"] = round(
+                sum(cold_starts) / len(cold_starts), 3
+            )
+            aggregate["cold_start_p95_ms"] = round(
+                cold_starts[min(int(len(cold_starts) * 0.95), len(cold_starts) - 1)], 3
+            )
+
+        rss_vals = sorted(
+            r["rss_mb"] for r in successful if r.get("rss_mb") is not None
+        )
+        if rss_vals:
+            aggregate["rss_end_mb"] = round(sum(rss_vals) / len(rss_vals), 1)
+
+        # Aggregate per-task-type interaction stats
+        for metric_key in (
+            "interaction",
+            "navigate",
+            "evaluate",
+            "click",
+            "fill",
+            "screenshot",
+        ):
+            means = sorted(
+                r[metric_key]["mean_ms"]
+                for r in successful
+                if isinstance(r.get(metric_key), dict) and "mean_ms" in r[metric_key]
+            )
+            p95s = sorted(
+                r[metric_key]["p95_ms"]
+                for r in successful
+                if isinstance(r.get(metric_key), dict) and "p95_ms" in r[metric_key]
+            )
+            if means:
+                aggregate[f"{metric_key}_mean_ms"] = round(sum(means) / len(means), 3)
+            if p95s:
+                aggregate[f"{metric_key}_p95_ms"] = round(
+                    p95s[min(int(len(p95s) * 0.95), len(p95s) - 1)], 3
+                )
+
+    return {
+        "concurrent_sessions": req.concurrent_sessions,
+        "successful_sessions": len(successful),
+        "failed_sessions": len(failed),
+        "aggregate": aggregate,
+        "sessions": session_results,
+    }
+
+
+@app.post("/run")
+async def run_agent(req: RunRequest):
+    """Raw agent interaction — send any prompt, get back the agent text."""
+    try:
+        output = await _run_agent(req.prompt)
+        return {"response": output}
+    except Exception as e:
+        logger.exception("Agent run failed")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# =========================================================================
+# Entry point
+# =========================================================================
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt
new file mode 100644
index 0000000000..4ca072323c
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt
@@ -0,0 +1,11 @@
+# Requirements for GKE Performance Agent
+google-adk[gke,extensions]==1.34.1
+k8s-agent-sandbox==0.4.6
+kubernetes>=36.0.1  # Fix: v36.0.0 has auth key mismatch bug (PR #2585)
+google-cloud-aiplatform[adk]==1.153.1
+google-cloud-logging==3.15.0
+fastapi==0.135.3
+uvicorn[standard]==0.44.0
+python-dotenv==1.0.1
+playwright==1.59.0
+
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js
new file mode 100644
index 0000000000..7638720691
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js
@@ -0,0 +1,177 @@
+// Agentic Chromium Sandbox Benchmark (UC-C)
+// Measures: Interaction Latency, Screenshot Generation, DOM Evaluation, RSS
+// Requires: Playwright (pre-installed in the container image)
+//
+// Self-contained — no external Mock LLM service needed.  Uses data: URLs
+// and inline HTML to avoid network dependencies so the benchmark measures
+// pure gVisor + Chromium overhead.
+//
+// Environment variables (injected by orchestrator):
+//   TASK_COUNT    — iterations per run (default: 10)
+//   WARMUP_TASKS  — warmup iterations excluded from stats (default: 2)
+
+const { chromium } = require('playwright');
+const os = require('os');
+
+const TASK_COUNT = parseInt(process.env.TASK_COUNT || '10');
+const WARMUP_TASKS = parseInt(process.env.WARMUP_TASKS || '2');
+
+// Inline HTML page — avoids network round-trips so we measure pure
+// browser engine + gVisor overhead.
+const TEST_PAGE = `data:text/html,
+<!DOCTYPE html>
+<html>
+<head><title>PKB Chromium Benchmark</title></head>
+<body>
+  <h1 id="heading">Hello Sandbox</h1>
+  <input id="search" type="text" placeholder="Search..." />
+  <button id="btn">Click Me</button>
+  <div id="output"></div>
+  <script>
+    document.getElementById('btn').addEventListener('click', () => {
+      document.getElementById('output').textContent = 'clicked';
+    });
+  </script>
+</body>
+</html>`;
+
+function percentile(sorted, p) {
+  if (!sorted.length) return null;
+  const idx = Math.min(Math.floor(sorted.length * p), sorted.length - 1);
+  return sorted[idx];
+}
+
+function getMemoryMB() {
+  try {
+    const usage = process.memoryUsage();
+    return {
+      rss_mb: Math.round(usage.rss / 1024 / 1024 * 100) / 100,
+      heap_used_mb: Math.round(usage.heapUsed / 1024 / 1024 * 100) / 100,
+      heap_total_mb: Math.round(usage.heapTotal / 1024 / 1024 * 100) / 100,
+    };
+  } catch (e) {
+    return { rss_mb: null, heap_used_mb: null, heap_total_mb: null };
+  }
+}
+
+async function runBenchmark() {
+  const memStart = getMemoryMB();
+
+  // ── Cold Start: browser launch ──
+  const coldStart = performance.now();
+  const browser = await chromium.launch({
+    headless: true,
+    args: [
+      '--no-sandbox',
+      '--disable-gpu',
+      '--disable-dev-shm-usage',
+      '--disable-async-dns',
+      '--single-process',
+    ],
+  });
+  const cold_start_ms = performance.now() - coldStart;
+
+  const context = await browser.newContext();
+  const page = await context.newPage();
+
+  // Navigate once before the loop — amortize first-navigation overhead
+  await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' });
+
+  // Per-task latency arrays (filled during measured runs only)
+  const navigate_ms = [];
+  const screenshot_ms = [];
+  const evaluate_ms = [];
+  const click_ms = [];
+  const fill_ms = [];
+  const interaction_ms = []; // all task types pooled
+
+  for (let run = 0; run < WARMUP_TASKS + TASK_COUNT; run++) {
+    const measuring = run >= WARMUP_TASKS;
+
+    // 1. Navigate (reload the data: page)
+    let t0 = performance.now();
+    await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' });
+    let elapsed = performance.now() - t0;
+    if (measuring) { navigate_ms.push(elapsed); interaction_ms.push(elapsed); }
+
+    // 2. DOM evaluate — read heading text
+    t0 = performance.now();
+    await page.evaluate(() => document.getElementById('heading').textContent);
+    elapsed = performance.now() - t0;
+    if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); }
+
+    // 3. Fill input
+    t0 = performance.now();
+    await page.fill('#search', `query-${run}`);
+    elapsed = performance.now() - t0;
+    if (measuring) { fill_ms.push(elapsed); interaction_ms.push(elapsed); }
+
+    // 4. Click button
+    t0 = performance.now();
+    await page.click('#btn');
+    elapsed = performance.now() - t0;
+    if (measuring) { click_ms.push(elapsed); interaction_ms.push(elapsed); }
+
+    // 5. Verify click effect (DOM mutation)
+    t0 = performance.now();
+    await page.evaluate(() => document.getElementById('output').textContent);
+    elapsed = performance.now() - t0;
+    if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); }
+
+    // 6. Screenshot (snapshot generation)
+    t0 = performance.now();
+    await page.screenshot({ path: '/tmp/snap.png' });
+    elapsed = performance.now() - t0;
+    if (measuring) { screenshot_ms.push(elapsed); interaction_ms.push(elapsed); }
+  }
+
+  await browser.close();
+  const memEnd = getMemoryMB();
+
+  // ── Compute stats ──
+  const computeStats = (arr) => {
+    if (!arr.length) return null;
+    const sorted = [...arr].sort((a, b) => a - b);
+    const sum = sorted.reduce((a, b) => a + b, 0);
+    return {
+      mean_ms: Math.round(sum / sorted.length * 1000) / 1000,
+      p50_ms: Math.round(percentile(sorted, 0.50) * 1000) / 1000,
+      p95_ms: Math.round(percentile(sorted, 0.95) * 1000) / 1000,
+      p99_ms: Math.round(percentile(sorted, 0.99) * 1000) / 1000,
+      min_ms: Math.round(sorted[0] * 1000) / 1000,
+      max_ms: Math.round(sorted[sorted.length - 1] * 1000) / 1000,
+    };
+  };
+
+  const summary = {
+    sandbox_status: 'ok',
+    cold_start_ms: Math.round(cold_start_ms * 1000) / 1000,
+    task_count: TASK_COUNT,
+    warmup_tasks: WARMUP_TASKS,
+    // Per-task-type latency stats
+    navigate: computeStats(navigate_ms),
+    evaluate: computeStats(evaluate_ms),
+    fill: computeStats(fill_ms),
+    click: computeStats(click_ms),
+    screenshot: computeStats(screenshot_ms),
+    // Pooled interaction latency (all types)
+    interaction: computeStats(interaction_ms),
+    // Memory
+    rss_start_mb: memStart.rss_mb,
+    rss_end_mb: memEnd.rss_mb,
+    rss_growth_mb: memEnd.rss_mb != null && memStart.rss_mb != null
+      ? Math.round((memEnd.rss_mb - memStart.rss_mb) * 100) / 100
+      : null,
+  };
+
+  // Print JSON to stdout — orchestrator parses this
+  console.log(JSON.stringify(summary));
+}
+
+runBenchmark().catch((e) => {
+  console.log(JSON.stringify({
+    sandbox_status: 'error',
+    error: `${e.name}: ${e.message}`,
+  }));
+  process.exit(1);
+});
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py
new file mode 100644
index 0000000000..c1d20ecbfb
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+"""
+Agentic Python Sandbox Benchmark
+Measures: TTFE (Time to First Execution), CEL (Command Execution Latency), RSS Memory
+
+Three task categories:
+  - compute: CPU-bound (matrix multiply, sorting large lists)
+  - syscall:  gVisor Sentry stress (large file I/O, many stat calls)
+  - import:   Gofer FS I/O + memory (import heavy stdlib, build data)
+
+Metrics: all sandbox_* keys.
+"""
+import time
+import json
+import os
+import resource
+import sys
+import math
+import random
+import warnings
+
+warnings.filterwarnings("ignore")
+
+SAMPLE_COUNT = int(os.environ.get("SAMPLE_COUNT") or "20")
+SAMPLE_WARMUP = int(os.environ.get("SAMPLE_WARMUP") or "0")
+
+print(f"SAMPLE_COUNT: {SAMPLE_COUNT}")
+print(f"SAMPLE_WARMUP: {SAMPLE_WARMUP}")
+
+# ── Persistent allocations (retained across iterations to grow RSS) ──
+# ~20MB baseline allocation that stays resident
+_RESIDENT_DATA = [bytearray(1024 * 1024) for _ in range(20)]  # 20 × 1MB
+
+
+def get_rss_mb():
+    """Get current RSS memory in MB."""
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+
+def get_static_tasks():
+    """Return deterministic static tasks to measure execution latency.
+
+    Three task categories enable decomposition of CEL degradation:
+      - compute: sort a 100k-element list + matrix-like multiply
+      - syscall:  write/read 1MB temp files, 2000 stat calls
+      - import:   import 15 heavy stdlib modules + build large dicts
+    """
+    return [
+        {
+            "id": 1,
+            "type": "compute",
+            "code": (
+                "import math, random\n"
+                "random.seed(42)\n"
+                "data = [random.random() for _ in range(100_000)]\n"
+                "data.sort()\n"
+                "# Matrix-like multiply (flattened 200×200)\n"
+                "a = list(range(40_000))\n"
+                "b = [x * 0.001 for x in a]\n"
+                "_ = sum(x * y for x, y in zip(a, b))\n"
+            ),
+        },
+        {
+            "id": 2,
+            "type": "syscall",
+            "code": (
+                "import os, tempfile\n"
+                "d = tempfile.gettempdir()\n"
+                "# Write + read 1MB file through gVisor Gofer\n"
+                "path = os.path.join(d, 'bench_heavy.bin')\n"
+                "data = b'x' * (1024 * 1024)\n"
+                "with open(path, 'wb') as f:\n"
+                "    f.write(data)\n"
+                "with open(path, 'rb') as f:\n"
+                "    _ = f.read()\n"
+                "os.unlink(path)\n"
+                "# Heavy stat/listdir\n"
+                "[os.stat(d) for _ in range(1000)]\n"
+                "[os.listdir(d) for _ in range(1000)]\n"
+            ),
+        },
+        {
+            "id": 3,
+            "type": "import",
+            "code": (
+                "import importlib, sys\n"
+                "mods = [\n"
+                "    'json', 'csv', 'html', 'email', 'unittest', 'logging',\n"
+                "    'xml.etree.ElementTree', 'http.client', 'urllib.request',\n"
+                "    'argparse', 'pprint', 'textwrap', 'difflib',\n"
+                "]\n"
+                "for _ in range(20):\n"
+                "    for m in mods:\n"
+                "        try:\n"
+                "            sys.modules.pop(m, None)\n"
+                "            importlib.import_module(m)\n"
+                "        except Exception:\n"
+                "            pass\n"
+                "# Build a large dict to add memory pressure\n"
+                "_ = {str(i): list(range(100)) for i in range(10_000)}\n"
+            ),
+        },
+    ]
+
+
+def _percentile(sorted_vals, pct):
+    """Return the value at the given percentile from a pre-sorted list."""
+    idx = int(len(sorted_vals) * pct)
+    return sorted_vals[min(idx, len(sorted_vals) - 1)]
+
+
+def run_benchmark():
+    results = {"ttfe_ms": None, "cel_ms": [], "rss_mb_start": None, "rss_mb_end": None}
+
+    # Measure TTFE
+    ttfe_start = time.perf_counter()
+    exec("x = 1 + 1", globals())
+    results["ttfe_ms"] = round((time.perf_counter() - ttfe_start) * 1000, 6)
+
+    results["rss_mb_start"] = get_rss_mb()
+
+    tasks = get_static_tasks()
+    sampled_tasks = [t for t in tasks if t["type"] != "import"]
+    import_task = next((t for t in tasks if t["type"] == "import"), None)
+
+    # Warmup — sampled tasks only (import uses C-extension modules that
+    # error on repeated reimport, so it runs once outside the loop)
+    for _ in range(SAMPLE_WARMUP):
+        for task in sampled_tasks:
+            exec(task["code"], globals())
+
+    # Benchmark iterations — compute + syscall only
+    for i in range(SAMPLE_COUNT):
+        # Grow resident memory slightly each iteration (~100KB)
+        _RESIDENT_DATA.append(bytearray(100 * 1024))
+
+        for task in sampled_tasks:
+            start = time.perf_counter()
+            exec(task["code"], globals())
+            elapsed_ms = round((time.perf_counter() - start) * 1000, 6)
+            results["cel_ms"].append({
+                "iteration": i,
+                "task_id": task["id"],
+                "type": task["type"],
+                "latency_ms": elapsed_ms,
+            })
+
+    # Import task — single run (C-extension modules break on repeated reimport)
+    import_elapsed_ms = 0.0
+    if import_task:
+        import_start = time.perf_counter()
+        exec(import_task["code"], globals())
+        import_elapsed_ms = round((time.perf_counter() - import_start) * 1000, 6)
+
+    results["rss_mb_end"] = get_rss_mb()
+
+    # --- Raw per-iteration totals (compute + syscall) ---
+    iteration_totals = []
+    for i in range(SAMPLE_COUNT):
+        total = sum(r["latency_ms"] for r in results["cel_ms"] if r["iteration"] == i)
+        iteration_totals.append(round(total, 6))
+
+    # --- Raw per-type latencies ---
+    types_seen = sorted(set(r["type"] for r in results["cel_ms"]))
+    per_type_raw = {}
+    for t in types_seen:
+        per_type_raw[t] = [round(r["latency_ms"], 6)
+                           for r in results["cel_ms"] if r["type"] == t]
+
+    # Output raw arrays — cross-sandbox stats computed by main.py
+    summary = {
+        "hostname": os.environ.get("HOSTNAME", "unknown"),
+        "sandbox_ttfe_ms": results["ttfe_ms"],
+        "sandbox_total_cel_ms": iteration_totals,
+        "sandbox_import_cel_ms": import_elapsed_ms,
+        "sandbox_rss_start_mb": results["rss_mb_start"],
+        "sandbox_rss_end_mb": results["rss_mb_end"],
+        "sandbox_rss_growth_mb": round(results["rss_mb_end"] - results["rss_mb_start"], 6),
+        "sample_count": SAMPLE_COUNT,
+        "sample_warmup": SAMPLE_WARMUP,
+        "total_iterations": len(iteration_totals),
+        "task_types": len(types_seen) + (1 if import_task else 0),
+    }
+
+    for t, raw in per_type_raw.items():
+        summary[f"sandbox_{t}_cel_ms"] = raw
+
+    print(json.dumps(summary))
+
+    with open("/tmp/benchmark_results.json", "w") as f:
+        json.dump(results, f)
+
+    return summary
+
+if __name__ == "__main__":
+    run_benchmark()
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py
new file mode 100644
index 0000000000..f92a3e694d
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""Agentic Payload Transfer Benchmark (Use Case D).
+
+Measures the cost of returning large "Observation" payloads from a gVisor
+sandbox back to the Orchestrator via the real data path:
+  stdout → code_execution_result.output → orchestrator HTTP response.
+
+For a given PAYLOAD_SIZE_MB, the script:
+  1. Generates a payload of that size (os.urandom + base64)
+  2. Measures generation, serialization, and stdout-write times separately
+  3. Repeats for PAYLOAD_ITERATIONS to compute stable percentiles
+  4. On the final iteration, writes the actual payload to stdout (measuring
+     real end-to-end transfer); other iterations write to /dev/null to
+     measure write-syscall cost without flooding the return channel.
+  5. Emits a JSON summary to stderr (parsed by main.py)
+
+Metrics are split so that pass/fail thresholds can exclude generation
+time (os.urandom), which is not part of data transfer.
+
+Environment variables (injected by the agent):
+  PAYLOAD_SIZE_MB     — target payload size in megabytes (default: 1)
+  PAYLOAD_ITERATIONS  — number of transfer iterations (default: 20)
+"""
+
+import base64
+import json
+import os
+import resource
+import sys
+import time
+
+PAYLOAD_SIZE_MB = float(os.environ.get("PAYLOAD_SIZE_MB") or "1")
+PAYLOAD_ITERATIONS = int(os.environ.get("PAYLOAD_ITERATIONS") or "20")
+
+
+# Use stderr for all diagnostic/metric output so stdout is reserved for
+# the actual payload transfer (the measured data path).
+def _log(msg):
+    print(msg, file=sys.stderr, flush=True)
+
+
+_log(f"PAYLOAD_SIZE_MB: {PAYLOAD_SIZE_MB}")
+_log(f"PAYLOAD_ITERATIONS: {PAYLOAD_ITERATIONS}")
+
+
+def get_rss_mb():
+    """Get current RSS memory in MB."""
+    return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024
+
+
+def _percentile(sorted_vals, pct):
+    """Return the value at the given percentile from a pre-sorted list."""
+    if not sorted_vals:
+        return 0.0
+    idx = int(len(sorted_vals) * pct)
+    return sorted_vals[min(idx, len(sorted_vals) - 1)]
+
+
+def _stats_for(latencies):
+    """Compute mean/p50/p95/p99/min/max for a list of latencies (ms)."""
+    latencies.sort()
+    return {
+        "mean": round(sum(latencies) / len(latencies), 6),
+        "p50": round(latencies[len(latencies) // 2], 6),
+        "p95": round(_percentile(latencies, 0.95), 6),
+        "p99": round(_percentile(latencies, 0.99), 6),
+        "min": round(latencies[0], 6),
+        "max": round(latencies[-1], 6),
+    }
+
+
+def run_benchmark():
+    """Execute the payload transfer benchmark and print JSON results."""
+    target_bytes = int(PAYLOAD_SIZE_MB * 1024 * 1024)
+    rss_start = get_rss_mb()
+
+    generation_times = []
+    serialization_times = []
+    stdout_times = []  # stdout write syscall time
+    transfer_times = []  # serialize + stdout write (the threshold metric)
+    throughputs = []  # MB/s based on stdout write time
+
+    # --- Warmup (2 iterations, not recorded) ---
+    for _ in range(2):
+        raw = os.urandom(target_bytes)
+        _ = base64.b64encode(raw).decode("ascii")
+
+    # --- Measured iterations ---
+    for i in range(PAYLOAD_ITERATIONS):
+        # 1. Generate payload (os.urandom — NOT data transfer)
+        t0 = time.perf_counter()
+        raw = os.urandom(target_bytes)
+        t_gen = time.perf_counter()
+
+        # 2. Serialize (base64 encode — mirrors real observation encoding)
+        encoded = base64.b64encode(raw).decode("ascii")
+        t_ser = time.perf_counter()
+
+        # 3. Transfer — write payload to stdout (the real sandbox→orchestrator path).
+        #    Only the final iteration writes to actual stdout to measure real
+        #    end-to-end transfer without flooding the return channel.
+        #    Other iterations write to /dev/null (same gVisor write-syscall path,
+        #    data discarded by host kernel).
+        t_xfer_start = time.perf_counter()
+        if i == PAYLOAD_ITERATIONS - 1:
+            sys.stdout.write(encoded)
+            sys.stdout.flush()
+        else:
+            with open("/dev/null", "w") as devnull:
+                devnull.write(encoded)
+        t_xfer = time.perf_counter()
+
+        gen_ms = (t_gen - t0) * 1000
+        ser_ms = (t_ser - t_gen) * 1000
+        stdout_ms = (t_xfer - t_xfer_start) * 1000
+        transfer_ms = ser_ms + stdout_ms  # excludes generation
+
+        generation_times.append(gen_ms)
+        serialization_times.append(ser_ms)
+        stdout_times.append(stdout_ms)
+        transfer_times.append(transfer_ms)
+
+        # Throughput in MB/s (based on encoded size and stdout write time)
+        encoded_size_mb = len(encoded) / (1024 * 1024)
+        if stdout_ms > 0:
+            throughputs.append(encoded_size_mb / (stdout_ms / 1000))
+
+    rss_end = get_rss_mb()
+
+    # Compute stats
+    gen_stats = _stats_for(generation_times)
+    ser_stats = _stats_for(serialization_times)
+    stdout_stats = _stats_for(stdout_times)
+    transfer_stats = _stats_for(transfer_times)
+    throughput_stats = _stats_for(throughputs) if throughputs else {}
+
+    # Payload metadata
+    encoded_size_bytes = len(base64.b64encode(os.urandom(target_bytes)))
+
+    summary = {
+        "hostname": os.environ.get("HOSTNAME", "unknown"),
+        # Payload config
+        "sandbox_payload_size_bytes": target_bytes,
+        "sandbox_payload_encoded_size_bytes": encoded_size_bytes,
+        "sandbox_payload_iterations": PAYLOAD_ITERATIONS,
+        # Generation time (os.urandom — NOT data transfer, excluded from threshold)
+        "sandbox_generation_time_mean_ms": gen_stats["mean"],
+        "sandbox_generation_time_p50_ms": gen_stats["p50"],
+        "sandbox_generation_time_p95_ms": gen_stats["p95"],
+        "sandbox_generation_time_p99_ms": gen_stats["p99"],
+        "sandbox_generation_time_min_ms": gen_stats["min"],
+        "sandbox_generation_time_max_ms": gen_stats["max"],
+        # Serialization time (base64 encode — CPU bound)
+        "sandbox_serialization_time_mean_ms": ser_stats["mean"],
+        "sandbox_serialization_time_p50_ms": ser_stats["p50"],
+        "sandbox_serialization_time_p95_ms": ser_stats["p95"],
+        "sandbox_serialization_time_p99_ms": ser_stats["p99"],
+        "sandbox_serialization_time_min_ms": ser_stats["min"],
+        "sandbox_serialization_time_max_ms": ser_stats["max"],
+        # Stdout write time (the raw write-syscall through gVisor)
+        "sandbox_stdout_time_mean_ms": stdout_stats["mean"],
+        "sandbox_stdout_time_p50_ms": stdout_stats["p50"],
+        "sandbox_stdout_time_p95_ms": stdout_stats["p95"],
+        "sandbox_stdout_time_p99_ms": stdout_stats["p99"],
+        "sandbox_stdout_time_min_ms": stdout_stats["min"],
+        "sandbox_stdout_time_max_ms": stdout_stats["max"],
+        # Transfer time (serialization + stdout write — the threshold metric)
+        "sandbox_transfer_time_mean_ms": transfer_stats["mean"],
+        "sandbox_transfer_time_p50_ms": transfer_stats["p50"],
+        "sandbox_transfer_time_p95_ms": transfer_stats["p95"],
+        "sandbox_transfer_time_p99_ms": transfer_stats["p99"],
+        "sandbox_transfer_time_min_ms": transfer_stats["min"],
+        "sandbox_transfer_time_max_ms": transfer_stats["max"],
+        # Throughput (MB/s based on transfer write time)
+        "sandbox_throughput_mean_mbps": throughput_stats.get("mean"),
+        "sandbox_throughput_p50_mbps": throughput_stats.get("p50"),
+        "sandbox_throughput_min_mbps": throughput_stats.get("min"),
+        # RSS
+        "sandbox_rss_start_mb": rss_start,
+        "sandbox_rss_end_mb": rss_end,
+        "sandbox_rss_growth_mb": rss_end - rss_start,
+    }
+
+    # Emit JSON summary to stderr for diagnostics.
+    _log("---BENCHMARK_RESULT_JSON---")
+    _log(json.dumps(summary, indent=2))
+
+    # Also emit to stdout (after the payload data) so that
+    # _parse_sandbox_json() can find it in code_execution_result.output.
+    # ADK only captures stdout, not stderr.
+    print("\n---BENCHMARK_RESULT_JSON---", flush=True)
+    print(json.dumps(summary), flush=True)
+
+    return summary
+
+
+if __name__ == "__main__":
+    try:
+        run_benchmark()
+    except Exception as e:
+        import traceback
+
+        traceback.print_exc()
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py
new file mode 100644
index 0000000000..07ef6309db
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+"""Minimal QPS benchmark script for UC-F (Scheduling Throughput).
+
+Runs inside the GKE Agent Sandbox to validate claim readiness.
+Executes a trivial operation and reports status.  The orchestrator-side
+timing (orchestrator_total_ms) serves as the primary TTFE measurement —
+when the warm pool drains, that metric spikes because fresh pods must be
+cold-started.
+"""
+import json
+import time
+
+t0 = time.perf_counter()
+
+# Trivial computation to prove the sandbox is functional
+result = sum(range(10_000))
+
+elapsed_ms = (time.perf_counter() - t0) * 1000
+
+print(json.dumps({
+    "sandbox_status": "ok",
+    "sandbox_qps_exec_ms": round(elapsed_ms, 3),
+    "sandbox_compute_result": result,
+}))
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md
new file mode 100644
index 0000000000..86b33c8486
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md
@@ -0,0 +1,64 @@
+# Vibe Coding Startup Scripts
+
+Pluggable startup scripts for the UC-A snapshot saturation harness (`sweeps/snapshot_saturation_search.py`). Each script simulates a realistic "vibe coding" sandbox cold-start — the kind of environment setup that happens when an AI coding agent provisions a new sandbox for a user.
+
+## How It Works
+
+When `--preload_mode=script:<path>` is passed to the sweep harness:
+
+1. The script is read from disk and embedded into the pod's container entrypoint
+2. The pod runs the script to completion (installs packages, starts services, etc.)
+3. After the script exits 0, the harness prints `SCRIPT_READY` and starts a counter loop
+4. **TTFE** is measured as the total time from SandboxClaim creation to `SCRIPT_READY`
+
+This lets you compare cold-start TTFE (full script execution) against snapshot/restore TTFE (resuming from a pre-snapshotted state where the script already ran).
+
+## Scripts
+
+### startup_pip_fastapi.sh
+
+**Lightweight Python variant.** Runs natively in the `python:3.11-slim` base image.
+
+Steps: `pip install fastapi uvicorn` → create app → start uvicorn → wait for first HTTP response.
+
+Typical cold-start: ~5–8s on GKE with fast network.
+
+```bash
+# Cold-start only
+python sweeps/snapshot_saturation_search.py \
+    --skip_snapshot \
+    --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+    --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \
+    --ttfe_threshold_s=20
+
+# With snapshot/restore (shows restore speedup vs cold-start)
+python sweeps/snapshot_saturation_search.py \
+    --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+    --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \
+    --ttfe_threshold_s=20 --restore_threshold_s=10
+```
+
+### startup_npm_vite.sh
+
+**Heavier Node.js variant.** Installs Node.js + npm from apt, then npm-installs Vite and starts a dev server.
+
+Steps: `apt-get install nodejs npm` → `npm install vite` → start Vite dev server → wait for first page served.
+
+Typical cold-start: ~30–60s (apt + npm on cold cache).
+
+```bash
+python sweeps/snapshot_saturation_search.py \
+    --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \
+    --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \
+    --ttfe_threshold_s=120 --restore_threshold_s=10
+```
+
+## Writing Your Own Script
+
+Requirements:
+- Must be a bash script (runs via `bash -c` in a `python:3.11-slim` container)
+- Must exit 0 on success (use `set -e` for fail-fast)
+- Should print progress to stdout (visible in pod logs for debugging)
+- The harness appends `SCRIPT_READY` + counter loop after your script — don't add your own
+
+The `PRELOAD_MB` env var is available but unused by these scripts. The sweep varies it to test different memory request levels on the pod.
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh
new file mode 100644
index 0000000000..f3e9c9c235
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Vibe Coding Startup Script — npm + Vite dev server
+#
+# Simulates a typical agentic sandbox "vibe coding" cold-start:
+#   1. Install Node.js dependencies (bun/npm)
+#   2. Start a Vite dev server
+#   3. Wait for the server to be ready (first page served)
+#
+# This script is designed to run inside the sandbox container (python:3.11-slim).
+# It installs Node.js + npm + dependencies from scratch to measure realistic
+# cold-start latency including package installation.
+#
+# Usage (cold-start only):
+#   python sweeps/snapshot_saturation_search.py \
+#     --skip_snapshot \
+#     --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=120
+#
+# Usage (with snapshot/restore):
+#   python sweeps/snapshot_saturation_search.py \
+#     --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=120 --restore_threshold_s=10
+#
+# NOTE: --search_min/--search_max control the PRELOAD_MB env var passed to
+# the container; in script mode this is unused by the script itself but
+# varies memory requests to test different resource pressure levels.
+
+set -e
+
+echo "[vibe-coding] Installing Node.js..."
+apt-get update -qq && apt-get install -y -qq nodejs npm > /dev/null 2>&1
+
+echo "[vibe-coding] Creating project scaffold..."
+mkdir -p /tmp/vibe-project && cd /tmp/vibe-project
+
+# Create a minimal package.json with Vite
+cat > package.json << 'EOF'
+{
+  "name": "vibe-sandbox",
+  "private": true,
+  "scripts": {
+    "dev": "vite --host 0.0.0.0 --port 5173"
+  },
+  "dependencies": {
+    "vite": "^5.0.0"
+  }
+}
+EOF
+
+# Create minimal index.html for Vite to serve
+cat > index.html << 'EOF'
+<!DOCTYPE html>
+<html><head><title>Vibe</title></head>
+<body><h1>Ready</h1></body>
+</html>
+EOF
+
+echo "[vibe-coding] Installing npm dependencies..."
+npm install --prefer-offline 2>&1 | tail -5
+
+echo "[vibe-coding] Starting Vite dev server..."
+npx vite --host 0.0.0.0 --port 5173 &
+VITE_PID=$!
+
+echo "[vibe-coding] Waiting for server to be ready..."
+MAX_WAIT=60
+ELAPSED=0
+while ! curl -s http://localhost:5173 > /dev/null 2>&1; do
+    sleep 1
+    ELAPSED=$((ELAPSED + 1))
+    if [ $ELAPSED -ge $MAX_WAIT ]; then
+        echo "[vibe-coding] ERROR: Server did not start within ${MAX_WAIT}s"
+        exit 1
+    fi
+done
+
+echo "[vibe-coding] First page served successfully (${ELAPSED}s)"
+
+# Kill the vite server — we only needed to measure startup time
+kill $VITE_PID 2>/dev/null || true
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh
new file mode 100644
index 0000000000..d54a851bda
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Lightweight Vibe Coding Startup Script — pip install + FastAPI
+#
+# Simulates a Python-based agentic sandbox cold-start:
+#   1. Install Python packages (FastAPI + uvicorn)
+#   2. Start a web server
+#   3. Wait for the server to respond
+#
+# This is lighter weight than the npm/Vite variant and runs natively
+# in the python:3.11-slim base image without needing to install Node.js.
+#
+# Usage (cold-start only):
+#   python sweeps/snapshot_saturation_search.py \
+#     --skip_snapshot \
+#     --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=20
+#
+# Usage (with snapshot/restore):
+#   python sweeps/snapshot_saturation_search.py \
+#     --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \
+#     --burst_size=3 \
+#     --search_mode=binary --search_min=10 --search_max=30 \
+#     --ttfe_threshold_s=20 --restore_threshold_s=10
+#
+# NOTE: --search_min/--search_max control the PRELOAD_MB env var passed to
+# the container; in script mode this is unused by the script itself but
+# varies memory requests to test different resource pressure levels.
+
+set -e
+
+echo "[vibe-coding] Installing Python packages..."
+pip install --quiet fastapi uvicorn 2>&1 | tail -3
+
+echo "[vibe-coding] Creating app..."
+cat > /tmp/app.py << 'EOF'
+from fastapi import FastAPI
+app = FastAPI()
+
+@app.get("/")
+def root():
+    return {"status": "ready"}
+EOF
+
+echo "[vibe-coding] Starting uvicorn server..."
+python -m uvicorn app:app --host 0.0.0.0 --port 8000 --app-dir /tmp &
+SERVER_PID=$!
+
+echo "[vibe-coding] Waiting for server to be ready..."
+MAX_WAIT=30
+ELAPSED=0
+while ! python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" 2>/dev/null; do
+    sleep 1
+    ELAPSED=$((ELAPSED + 1))
+    if [ $ELAPSED -ge $MAX_WAIT ]; then
+        echo "[vibe-coding] ERROR: Server did not start within ${MAX_WAIT}s"
+        exit 1
+    fi
+done
+
+echo "[vibe-coding] First request served successfully (${ELAPSED}s)"
+
+# Kill the server — we only needed to measure startup time
+kill $SERVER_PID 2>/dev/null || true
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py
new file mode 100644
index 0000000000..6dfb59b981
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2025 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py
new file mode 100644
index 0000000000..6dfb59b981
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2025 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
new file mode 100644
index 0000000000..ee4603a4b3
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
@@ -0,0 +1,489 @@
+"""Shared utilities for GKE Agent Sandbox benchmarks.
+
+Provides helpers for agent API interaction, kubectl commands, warm pool
+management, and sample construction used by all GKE agent benchmark
+definitions.
+"""
+
+import json
+import logging
+import subprocess
+import time
+import urllib.request
+import urllib.error
+
+from absl import flags
+from perfkitbenchmarker import sample
+
+FLAGS = flags.FLAGS
+
+# ---------------------------------------------------------------------------
+# Shared flags (registered once; importable by benchmark modules)
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_string(
+    "gke_namespace",
+    "agentic",
+    "Kubernetes namespace where the agentic workloads are deployed.",
+)
+
+flags.DEFINE_string(
+    "gke_machine_type",
+    "",
+    "Machine type of the sandbox node pool. Recorded in sample metadata.",
+)
+
+flags.DEFINE_string(
+    "gke_kubeconfig",
+    "",
+    "Path to a kubeconfig file. If empty, the system default is used.",
+)
+
+flags.DEFINE_bool(
+    "gke_gvisor",
+    True,
+    "Whether the sandbox node pool uses gVisor. Recorded in sample metadata.",
+)
+
+flags.DEFINE_string(
+    "gke_note",
+    "",
+    "Arbitrary note string attached to every sample for tagging runs.",
+)
+
+flags.DEFINE_string(
+    "gke_api_url",
+    "http://localhost:8080",
+    "Base URL of the ADK Agent API.",
+)
+
+flags.DEFINE_integer(
+    "gke_api_timeout",
+    600,
+    "HTTP timeout in seconds for agent API benchmark calls.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Agent API helpers
+# ---------------------------------------------------------------------------
+
+
+def GetAgentApiUrl():
+    """Return the base URL of the ADK agent API service."""
+    return FLAGS.gke_api_url.rstrip("/")
+
+
+def CheckAgentHealthz(api_url=None, required=True):
+    """Verify the agent API is reachable via /healthz.
+
+    Args:
+        api_url: Base URL to check. Defaults to FLAGS.gke_api_url.
+        required: If True (default), raise on failure. If False, log warning.
+    """
+    if api_url is None:
+        api_url = GetAgentApiUrl()
+    try:
+        req = urllib.request.Request(f"{api_url}/healthz")
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            logging.info("Agent healthz: %s", resp.read().decode())
+    except (urllib.error.URLError, urllib.error.HTTPError) as e:
+        msg = (
+            f"Agent API is not reachable at {api_url}/healthz: {e}\n"
+            "Hint: ensure kubectl port-forward is running "
+            "(kubectl port-forward svc/adk-agent -n <ns> 8080:80)."
+        )
+        if required:
+            raise RuntimeError(msg)
+        else:
+            logging.warning("Health check deferred (non-fatal): %s", msg)
+
+
+def CallAgentApi(endpoint, payload, timeout=None):
+    """POST JSON to an agent API endpoint and return the parsed response."""
+    if timeout is None:
+        timeout = FLAGS.gke_api_timeout
+    base_url = GetAgentApiUrl()
+    url = f"{base_url}{endpoint}"
+    data = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        url, data=data,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    logging.info("POST %s  payload=%s  timeout=%ds", url, payload, timeout)
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read().decode("utf-8")
+    except urllib.error.HTTPError as e:
+        body = e.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"Agent API returned HTTP {e.code}: {body[:500]}")
+    except urllib.error.URLError as e:
+        raise RuntimeError(f"Cannot reach agent API at {url}: {e.reason}")
+    try:
+        return json.loads(body)
+    except json.JSONDecodeError:
+        raise RuntimeError(f"Agent API returned non-JSON response:\n{body[:500]}")
+
+
+# ---------------------------------------------------------------------------
+# kubectl helpers
+# ---------------------------------------------------------------------------
+
+
+def _KubectlCmd(args):
+    """Build a kubectl command list, optionally injecting --kubeconfig."""
+    cmd = ["kubectl"]
+    if FLAGS.gke_kubeconfig:
+        cmd += ["--kubeconfig", FLAGS.gke_kubeconfig]
+    return cmd + list(args)
+
+
+def RunKubectl(args, timeout=120, raise_on_failure=True):
+    """Run a kubectl command and return (stdout, stderr, retcode)."""
+    cmd = _KubectlCmd(args)
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+    if raise_on_failure and proc.returncode != 0:
+        raise RuntimeError(
+            f"kubectl failed (rc={proc.returncode}): {proc.stderr}"
+        )
+    return proc.stdout, proc.stderr, proc.returncode
+
+
+def CountPods(namespace, label, phase=None):
+    """Count pods matching label (and optionally phase)."""
+    cmd = ["get", "pods", "-n", namespace, "-l", label, "-o", "name"]
+    if phase:
+        cmd += [f"--field-selector=status.phase={phase}"]
+    stdout, _, rc = RunKubectl(cmd, raise_on_failure=False)
+    if rc != 0 or not stdout:
+        return 0
+    return len(stdout.strip().splitlines())
+
+
+def PatchWarmPool(namespace, warmpool_name, replicas, label, wait_timeout=180):
+    """Patch SandboxWarmPool replicas and wait for pods to be ready."""
+    logging.info("Patching %s replicas -> %d", warmpool_name, replicas)
+    patch_json = json.dumps({"spec": {"replicas": replicas}})
+    RunKubectl([
+        "patch", "sandboxwarmpool", warmpool_name,
+        "-n", namespace, "--type=merge", f"-p={patch_json}",
+    ])
+    if replicas == 0:
+        return True
+    deadline = time.time() + wait_timeout
+    while time.time() < deadline:
+        running = CountPods(namespace, label, phase="Running")
+        logging.info("%d/%d warm pool pods Running", running, replicas)
+        if running >= replicas:
+            return True
+        time.sleep(3)
+    logging.warning("Timed out waiting for %d warm pool pods", replicas)
+    return False
+
+
+def DrainWarmPool(namespace, warmpool_name, label, timeout=120):
+    """Scale warm pool to 0 and wait for all pods to terminate."""
+    logging.info("Draining warm pool %s to 0", warmpool_name)
+    patch_json = json.dumps({"spec": {"replicas": 0}})
+    RunKubectl([
+        "patch", "sandboxwarmpool", warmpool_name,
+        "-n", namespace, "--type=merge", f"-p={patch_json}",
+    ], raise_on_failure=False)
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        remaining = CountPods(namespace, label)
+        if remaining == 0:
+            logging.info("Warm pool drained successfully")
+            return True
+        logging.info("Draining... %d pods remaining", remaining)
+        time.sleep(3)
+    logging.warning("Drain timed out, %d pods still present",
+                    CountPods(namespace, label))
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Sample construction
+# ---------------------------------------------------------------------------
+
+
+def BuildMetadata(namespace, extra=None):
+    """Construct the common metadata dict for all samples."""
+    metadata = {
+        "namespace": namespace,
+        "gvisor": FLAGS.gke_gvisor,
+    }
+    if FLAGS.gke_machine_type:
+        metadata["machine_type"] = FLAGS.gke_machine_type
+    if FLAGS.gke_note:
+        metadata["note"] = FLAGS.gke_note
+    if extra:
+        metadata.update(extra)
+    return metadata
+
+
+def MakeSample(metric, value, unit, namespace, extra_metadata=None):
+    """Create a single sample.Sample with standard metadata."""
+    return sample.Sample(
+        metric=metric,
+        value=value,
+        unit=unit,
+        metadata=BuildMetadata(namespace, extra_metadata),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Port-forward flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_bool(
+    "gke_auto_portforward",
+    True,
+    "Automatically manage kubectl port-forward to the agent service.",
+)
+
+flags.DEFINE_integer(
+    "gke_portforward_local_port",
+    8080,
+    "Local port for kubectl port-forward.",
+)
+
+flags.DEFINE_integer(
+    "gke_portforward_remote_port",
+    80,
+    "Remote service port for kubectl port-forward.",
+)
+
+flags.DEFINE_string(
+    "gke_portforward_service",
+    "svc/adk-agent",
+    "Kubernetes service to port-forward to.",
+)
+
+flags.DEFINE_float(
+    "gke_portforward_reconnect_delay",
+    1.0,
+    "Seconds to wait before reconnecting after port-forward drops.",
+)
+
+flags.DEFINE_float(
+    "gke_portforward_health_timeout",
+    30.0,
+    "Seconds to wait for agent health check after starting port-forward.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Port-forward manager
+# ---------------------------------------------------------------------------
+
+import atexit
+import os as _os
+import signal
+import threading
+
+
+_PID_FILE = "/tmp/pkb_portforward.pid"
+
+
+class _PortForwardManager:
+    """Manages a kubectl port-forward subprocess with auto-reconnect.
+
+    Mimics the shell pattern:
+        while true; do
+          kubectl port-forward svc/adk-agent -n agentic 8080:80
+          echo "Reconnecting..."
+          sleep 1
+        done
+
+    Thread-safe. Idempotent start/stop. Cleans up orphans via PID file.
+    """
+
+    def __init__(self):
+        self._proc = None
+        self._thread = None
+        self._stop_event = threading.Event()
+        self._lock = threading.Lock()
+        self._started = False
+
+    @property
+    def is_running(self):
+        return self._started and not self._stop_event.is_set()
+
+    def start(self):
+        """Start the port-forward loop (idempotent)."""
+        with self._lock:
+            if self._started and not self._stop_event.is_set():
+                if self._proc and self._proc.poll() is None:
+                    return
+                return
+
+            self._kill_orphan()
+            self._stop_event.clear()
+            self._started = True
+            self._thread = threading.Thread(
+                target=self._loop, daemon=True, name="pkb-portforward"
+            )
+            self._thread.start()
+
+    def stop(self):
+        """Stop the port-forward loop and kill the subprocess."""
+        with self._lock:
+            if not self._started:
+                return
+            self._stop_event.set()
+            self._kill_proc()
+            self._started = False
+            self._cleanup_pid_file()
+
+    def _loop(self):
+        """Background reconnect loop."""
+        ns = FLAGS.gke_namespace
+        svc = FLAGS.gke_portforward_service
+        local_port = FLAGS.gke_portforward_local_port
+        remote_port = FLAGS.gke_portforward_remote_port
+        delay = FLAGS.gke_portforward_reconnect_delay
+
+        cmd = ["kubectl"]
+        if FLAGS.gke_kubeconfig:
+            cmd += ["--kubeconfig", FLAGS.gke_kubeconfig]
+        cmd += [
+            "port-forward", svc,
+            "-n", ns,
+            f"{local_port}:{remote_port}",
+        ]
+
+        while not self._stop_event.is_set():
+            logging.info("Starting port-forward: %s", " ".join(cmd))
+            try:
+                self._proc = subprocess.Popen(
+                    cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
+                self._write_pid_file(self._proc.pid)
+
+                while not self._stop_event.is_set():
+                    retcode = self._proc.poll()
+                    if retcode is not None:
+                        break
+                    self._stop_event.wait(timeout=0.5)
+
+            except Exception as e:
+                logging.warning("Port-forward error: %s", e)
+
+            if not self._stop_event.is_set():
+                logging.info(
+                    "Port-forward disconnected. Reconnecting in %.1fs...", delay
+                )
+                self._stop_event.wait(timeout=delay)
+
+    def _kill_proc(self):
+        """Kill the current subprocess if alive."""
+        if self._proc and self._proc.poll() is None:
+            try:
+                self._proc.terminate()
+                self._proc.wait(timeout=5)
+            except Exception:
+                try:
+                    self._proc.kill()
+                except Exception:
+                    pass
+        self._proc = None
+
+    def _write_pid_file(self, pid):
+        """Write PID to file for orphan detection."""
+        try:
+            with open(_PID_FILE, "w") as f:
+                f.write(str(pid))
+        except Exception:
+            pass
+
+    def _cleanup_pid_file(self):
+        """Remove PID file."""
+        try:
+            _os.unlink(_PID_FILE)
+        except OSError:
+            pass
+
+    def _kill_orphan(self):
+        """Kill a port-forward process left by a previous PKB run."""
+        try:
+            if _os.path.exists(_PID_FILE):
+                with open(_PID_FILE, "r") as f:
+                    pid = int(f.read().strip())
+                logging.info("Killing orphan port-forward (PID %d)", pid)
+                _os.kill(pid, signal.SIGTERM)
+                import time as _time
+                _time.sleep(0.5)
+                try:
+                    _os.kill(pid, signal.SIGKILL)
+                except OSError:
+                    pass
+                self._cleanup_pid_file()
+        except (OSError, ValueError):
+            self._cleanup_pid_file()
+
+        local_port = FLAGS.gke_portforward_local_port
+        try:
+            result = subprocess.run(
+                ["lsof", "-ti", f":{local_port}"],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                for pid_str in result.stdout.strip().split():
+                    try:
+                        pid = int(pid_str)
+                        _os.kill(pid, signal.SIGTERM)
+                        logging.info("Killed process %d on port %d", pid, local_port)
+                    except (OSError, ValueError):
+                        pass
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            pass
+
+
+# Singleton instance
+_port_forward_manager = _PortForwardManager()
+
+# Ensure cleanup on interpreter exit
+atexit.register(_port_forward_manager.stop)
+
+
+def EnsurePortForward():
+    """Start port-forward if auto_portforward is enabled (idempotent).
+
+    Blocks until the agent health check passes or timeout is reached.
+    Safe to call multiple times - only starts one background loop.
+    """
+    if not FLAGS.gke_auto_portforward:
+        logging.info("Auto port-forward disabled (--gke_auto_portforward=false)")
+        return
+
+    _port_forward_manager.start()
+
+    import time as _time
+    timeout = FLAGS.gke_portforward_health_timeout
+    deadline = _time.time() + timeout
+    api_url = GetAgentApiUrl()
+
+    while _time.time() < deadline:
+        try:
+            req = urllib.request.Request(f"{api_url}/healthz")
+            with urllib.request.urlopen(req, timeout=3) as resp:
+                logging.info("Port-forward healthy: %s", resp.read().decode())
+                return
+        except Exception:
+            _time.sleep(1)
+
+    logging.warning(
+        "Port-forward health check did not pass within %.0fs. "
+        "Continuing anyway (Run() will fail if agent is unreachable).",
+        timeout,
+    )
+
+
+def StopPortForward():
+    """Stop the port-forward subprocess and clean up."""
+    _port_forward_manager.stop()
+    logging.info("Port-forward stopped.")
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
new file mode 100644
index 0000000000..0da929cbbd
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
@@ -0,0 +1,280 @@
+"""PKB Benchmark: GKE Agent Chromium Density Saturation (Use Case C).
+
+Atomic single-point measurement of Chromium browser sandbox density on a
+pre-provisioned GKE cluster with gVisor isolation. Measures interaction
+latency, screenshot generation time, cold start, navigation, evaluation,
+fill, click latencies, and RSS memory at a given concurrent session count.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the density parameter across iterations to find
+the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_chromium_density \\
+                --gke_chromium_density=4 \\
+                --gke_chromium_density_task_count=10 \\
+                --gke_chromium_density_warmup_tasks=5 \\
+                --gke_namespace=agentic \\
+                --gke_api_url=http://localhost:8080
+
+Samples emitted (per run):
+  - gke_chromium_density_interaction_mean      (ms)
+  - gke_chromium_density_interaction_p95       (ms)
+  - gke_chromium_density_navigate_mean         (ms)
+  - gke_chromium_density_navigate_p95          (ms)
+  - gke_chromium_density_evaluate_mean         (ms)
+  - gke_chromium_density_evaluate_p95          (ms)
+  - gke_chromium_density_fill_mean             (ms)
+  - gke_chromium_density_fill_p95              (ms)
+  - gke_chromium_density_click_mean            (ms)
+  - gke_chromium_density_click_p95             (ms)
+  - gke_chromium_density_screenshot_mean       (ms)
+  - gke_chromium_density_screenshot_p95        (ms)
+  - gke_chromium_density_cold_start_mean       (ms)
+  - gke_chromium_density_cold_start_p95        (ms)
+  - gke_chromium_density_rss_end               (MB)
+  - gke_chromium_density_rss_growth            (MB)
+  - gke_chromium_density_wall_time             (seconds)
+"""
+
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "gke_chromium_density"
+BENCHMARK_CONFIG = """
+gke_chromium_density:
+  description: >
+    Atomic single-point Chromium browser sandbox density measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "chromium-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=chromium-sandbox-example"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "gke_chromium_density",
+    1,
+    "Number of concurrent Chromium browser sessions to run.",
+)
+
+flags.DEFINE_integer(
+    "gke_chromium_density_task_count",
+    10,
+    "Number of browser task iterations per Chromium session.",
+)
+
+flags.DEFINE_integer(
+    "gke_chromium_density_warmup_tasks",
+    5,
+    "Number of warmup iterations per session (excluded from stats).",
+)
+
+flags.DEFINE_bool(
+    "gke_chromium_density_patch_warmpool",
+    True,
+    "Patch SandboxWarmPool replicas to match density before measurement.",
+)
+
+flags.DEFINE_integer(
+    "gke_chromium_density_exec_timeout",
+    120,
+    "Sandbox command execution timeout in seconds.",
+)
+
+flags.DEFINE_integer(
+    "gke_chromium_density_provision_timeout",
+    300,
+    "Max seconds to wait for warm pool pods to reach Running.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def Provision(benchmark_spec):
+    """Provision GKE cluster and all dependencies."""
+    gke_provision_utils.Provision()
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads()
+    utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single Chromium density measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    ns = FLAGS.gke_namespace
+    density = FLAGS.gke_chromium_density
+
+    logging.info("=== Run: chromium_density=%d ===", density)
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Patch warm pool (moved from Prepare for sweep compatibility)
+    if FLAGS.gke_chromium_density_patch_warmpool:
+        utils.PatchWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            replicas=density,
+            label=_WARMPOOL_LABEL,
+            wait_timeout=FLAGS.gke_chromium_density_provision_timeout,
+        )
+
+    # POST to agent API
+    payload = {
+        "task_count": FLAGS.gke_chromium_density_task_count,
+        "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks,
+        "concurrent_sessions": density,
+        "sandbox_exec_timeout_s": FLAGS.gke_chromium_density_exec_timeout,
+    }
+
+    t0 = time.time()
+    result = utils.CallAgentApi("/benchmark/chromium/density", payload)
+    wall_time = time.time() - t0
+
+    successful = result.get("successful_sessions", 0)
+    failed = result.get("failed_sessions", 0)
+    agg = result.get("aggregate", {})
+
+    logging.info(
+        "API response: %d successful, %d failed sessions (%.1fs)",
+        successful,
+        failed,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "density": density,
+        "successful_sessions": successful,
+        "failed_sessions": failed,
+        "task_count": FLAGS.gke_chromium_density_task_count,
+        "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # Per-task-type latency: mean and P95 for each
+    _emit(samples, agg, "interaction_mean_ms", "interaction_mean", "ms", ns, extra)
+    _emit(samples, agg, "interaction_p95_ms", "interaction_p95", "ms", ns, extra)
+    _emit(samples, agg, "navigate_mean_ms", "navigate_mean", "ms", ns, extra)
+    _emit(samples, agg, "navigate_p95_ms", "navigate_p95", "ms", ns, extra)
+    _emit(samples, agg, "evaluate_mean_ms", "evaluate_mean", "ms", ns, extra)
+    _emit(samples, agg, "evaluate_p95_ms", "evaluate_p95", "ms", ns, extra)
+    _emit(samples, agg, "fill_mean_ms", "fill_mean", "ms", ns, extra)
+    _emit(samples, agg, "fill_p95_ms", "fill_p95", "ms", ns, extra)
+    _emit(samples, agg, "click_mean_ms", "click_mean", "ms", ns, extra)
+    _emit(samples, agg, "click_p95_ms", "click_p95", "ms", ns, extra)
+    _emit(samples, agg, "screenshot_mean_ms", "screenshot_mean", "ms", ns, extra)
+    _emit(samples, agg, "screenshot_p95_ms", "screenshot_p95", "ms", ns, extra)
+    _emit(samples, agg, "cold_start_mean_ms", "cold_start_mean", "ms", ns, extra)
+    _emit(samples, agg, "cold_start_p95_ms", "cold_start_p95", "ms", ns, extra)
+
+    # RSS memory
+    _emit(samples, agg, "rss_end_mb", "rss_end", "MB", ns, extra)
+    _emit(samples, agg, "rss_growth_mb", "rss_growth", "MB", ns, extra)
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for chromium_density=%d.", len(samples), density)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up after measurement. Delete claims and drain warm pool."""
+    ns = FLAGS.gke_namespace
+    logging.info("Cleanup: deleting SandboxClaims and draining warm pool.")
+
+    # Delete any lingering SandboxClaims to release claimed pods
+    utils.RunKubectl(
+        [
+            "delete",
+            "sandboxclaims",
+            "--all",
+            "-n",
+            ns,
+            "--ignore-not-found=true",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+    # Drain warm pool to 0
+    utils.DrainWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        label=_WARMPOOL_LABEL,
+    )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete (cluster persists).")
+
+
+def Teardown(benchmark_spec):
+    """Teardown GKE cluster and all dependencies."""
+    gke_provision_utils.Teardown()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the aggregate dict."""
+    value = agg.get(agg_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
new file mode 100644
index 0000000000..cd12169fcd
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
@@ -0,0 +1,518 @@
+"""PKB Benchmark: GKE Agent Deletion & Cleanup (Use Case G).
+
+Atomic single-point measurement of bulk deletion efficiency and IP
+reclamation on a pre-provisioned GKE cluster with gVisor isolation.
+Provisions N sandbox pods via SandboxWarmPool, then bulk-deletes them
+and measures per-pod deletion latency, aggregate deletion stats, and
+IP address reclamation timing.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the batch_size parameter across iterations to find
+the deletion saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_deletion \\
+                --gke_deletion_batch_size=100 \\
+                --gke_deletion_warmpool_name=python-sandbox-warmpool \\
+                --gke_deletion_pod_label=sandbox=python-sandbox-example \\
+                --gke_deletion_poll_interval_s=1.0 \\
+                --gke_deletion_provision_timeout_s=120.0 \\
+                --gke_deletion_drain_timeout_s=300.0 \\
+                --gke_namespace=agentic \\
+                --gke_machine_type=c4-standard-8
+
+Samples emitted (per run):
+  - gke_deletion_provision_time              (seconds)
+  - gke_deletion_total_drain_time            (seconds)
+  - gke_deletion_latency_p50                 (seconds)
+  - gke_deletion_latency_p95                 (seconds)
+  - gke_deletion_latency_p99                 (seconds)
+  - gke_deletion_latency_max                 (seconds)
+  - gke_deletion_rate                        (pods/sec)
+  - gke_deletion_ip_before                   (count)
+  - gke_deletion_ip_after                    (count)
+  - gke_deletion_ip_reclaim_time             (seconds)
+  - gke_deletion_final_running_count         (count)
+  - gke_deletion_wall_time                   (seconds)
+"""
+
+import json
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "gke_deletion"
+BENCHMARK_CONFIG = """
+gke_deletion:
+  description: >
+    Atomic single-point bulk deletion and IP reclamation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "gke_deletion_batch_size",
+    100,
+    "Number of sandbox pods to provision then bulk-delete.",
+)
+
+flags.DEFINE_string(
+    "gke_deletion_warmpool_name",
+    "python-sandbox-warmpool",
+    "SandboxWarmPool resource name.",
+)
+
+flags.DEFINE_string(
+    "gke_deletion_pod_label",
+    "sandbox=python-sandbox-example",
+    "Label selector for warm pool pods.",
+)
+
+flags.DEFINE_float(
+    "gke_deletion_poll_interval_s",
+    1.0,
+    "Seconds between kubectl polls during deletion.",
+)
+
+flags.DEFINE_float(
+    "gke_deletion_provision_timeout_s",
+    120.0,
+    "Max seconds to wait for pods to reach Running before deletion.",
+)
+
+flags.DEFINE_float(
+    "gke_deletion_drain_timeout_s",
+    300.0,
+    "Max seconds to wait for all pods to terminate after scale-to-0.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def Provision(benchmark_spec):
+    """Provision GKE cluster and all dependencies."""
+    gke_provision_utils.Provision()
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads onto the cluster."""
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads()
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Provision N pods, bulk-delete, measure deletion latency and IP reclamation.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    ns = FLAGS.gke_namespace
+    batch_size = FLAGS.gke_deletion_batch_size
+    warmpool_name = FLAGS.gke_deletion_warmpool_name
+    label = FLAGS.gke_deletion_pod_label
+    poll_interval = FLAGS.gke_deletion_poll_interval_s
+    provision_timeout = FLAGS.gke_deletion_provision_timeout_s
+    drain_timeout = FLAGS.gke_deletion_drain_timeout_s
+
+    logging.info("=== Run: batch_size=%d ===", batch_size)
+
+    # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility)
+    _DrainPool(ns, warmpool_name, label, drain_timeout)
+    time.sleep(2)
+
+    t_wall_start = time.time()
+
+    # 1. Provision N pods
+    logging.info("Provisioning %d pods...", batch_size)
+    provision_start = time.time()
+    _PatchReplicas(ns, warmpool_name, batch_size)
+
+    deadline = time.time() + provision_timeout
+    while time.time() < deadline:
+        running = utils.CountPods(ns, label, phase="Running")
+        pct = (running / batch_size * 100) if batch_size > 0 else 0
+        logging.info("Provisioning... %d/%d (%.0f%%)", running, batch_size, pct)
+        if running >= batch_size:
+            break
+        time.sleep(3)
+
+    provision_time = time.time() - provision_start
+    final_running = utils.CountPods(ns, label, phase="Running")
+
+    logging.info(
+        "Provisioned %d/%d pods in %.1fs",
+        final_running,
+        batch_size,
+        provision_time,
+    )
+
+    # If not all pods reached Running, this is a failure
+    if final_running < batch_size:
+        raise RuntimeError(
+            f"Provisioning failed: only {final_running}/{batch_size} pods "
+            f"reached Running within {provision_timeout}s"
+        )
+
+    # 2. Record pod names and IP count before deletion
+    pod_names_before = set(_GetPodNames(ns, label))
+    ip_before = _CountAllocatedIPs(ns, label)
+
+    logging.info(
+        "Recorded %d pods, %d IPs allocated",
+        len(pod_names_before),
+        ip_before,
+    )
+
+    # Brief settle
+    time.sleep(1)
+
+    # 3. Bulk delete: scale to 0
+    logging.info("Scaling to 0 (bulk delete of %d pods)...", len(pod_names_before))
+    _PatchReplicas(ns, warmpool_name, 0)
+
+    # 4. Poll: track pod disappearance and IP reclamation
+    t_delete = time.time()
+    deadline_drain = t_delete + drain_timeout
+    pod_gone_times = {}  # pod_name -> elapsed_s when first absent
+    ip_reclaim_time = None
+
+    while time.time() < deadline_drain:
+        elapsed = time.time() - t_delete
+
+        # Current pod names still present
+        current_pods = set(_GetPodNames(ns, label))
+        remaining = len(current_pods)
+
+        # Track which pods have disappeared
+        gone_now = pod_names_before - current_pods
+        for pn in gone_now:
+            if pn not in pod_gone_times:
+                pod_gone_times[pn] = elapsed
+
+        # IP count (scoped to warm pool label)
+        ips = _CountAllocatedIPs(ns, label)
+        if ip_reclaim_time is None and ips == 0:
+            ip_reclaim_time = elapsed
+
+        deleted = len(pod_names_before) - remaining
+        pct = (deleted / len(pod_names_before) * 100) if pod_names_before else 0
+        logging.info(
+            "[%.1fs] Deleted: %d/%d (%.0f%%)  IPs: %d",
+            elapsed,
+            deleted,
+            len(pod_names_before),
+            pct,
+            ips,
+        )
+
+        if remaining == 0:
+            break
+
+        time.sleep(poll_interval)
+
+    total_drain_time = time.time() - t_delete
+
+    # Pods we never saw disappear (stuck) get the full drain time
+    for pn in pod_names_before:
+        if pn not in pod_gone_times:
+            pod_gone_times[pn] = total_drain_time
+
+    # 5. Compute per-pod deletion latencies
+    deletion_latencies = sorted(pod_gone_times.values())
+    n = len(deletion_latencies)
+
+    ip_after = _CountAllocatedIPs(ns, label)
+    deletion_rate = (
+        (len(pod_names_before) / total_drain_time) if total_drain_time > 0 else 0
+    )
+
+    logging.info(
+        "Drain complete: %.1fs, rate=%.1f pods/sec, IPs: %d->%d",
+        total_drain_time,
+        deletion_rate,
+        ip_before,
+        ip_after,
+    )
+
+    wall_time = time.time() - t_wall_start
+
+    # 6. Build samples
+    extra = {
+        "batch_size": batch_size,
+        "final_running_count": final_running,
+        "ip_before": ip_before,
+        "ip_after": ip_after,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_provision_time",
+            round(provision_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_drain_time",
+            round(total_drain_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    if n > 0:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_p50",
+                round(_Percentile(deletion_latencies, 50), 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_p95",
+                round(_Percentile(deletion_latencies, 95), 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_p99",
+                round(_Percentile(deletion_latencies, 99), 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_latency_max",
+                round(deletion_latencies[-1], 3),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_rate",
+            round(deletion_rate, 2),
+            "pods/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_ip_before",
+            float(ip_before),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_ip_after",
+            float(ip_after),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    if ip_reclaim_time is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ip_reclaim_time",
+                round(ip_reclaim_time, 2),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_final_running_count",
+            float(final_running),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for batch_size=%d.", len(samples), batch_size)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Best-effort drain of warm pool after measurement."""
+    ns = FLAGS.gke_namespace
+    warmpool_name = FLAGS.gke_deletion_warmpool_name
+    label = FLAGS.gke_deletion_pod_label
+
+    logging.info("Cleanup: draining warm pool to 0.")
+    _DrainPool(ns, warmpool_name, label, FLAGS.gke_deletion_drain_timeout_s)
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+def Teardown(benchmark_spec):
+    """Teardown GKE cluster and all dependencies."""
+    gke_provision_utils.Teardown()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _PatchReplicas(namespace, warmpool_name, replicas):
+    """Patch SandboxWarmPool to a specific replica count."""
+    patch_json = json.dumps({"spec": {"replicas": replicas}})
+    utils.RunKubectl(
+        [
+            "patch",
+            "sandboxwarmpool",
+            warmpool_name,
+            "-n",
+            namespace,
+            "--type=merge",
+            f"-p={patch_json}",
+        ],
+        raise_on_failure=False,
+    )
+
+
+def _DrainPool(namespace, warmpool_name, label, timeout_s):
+    """Scale pool to 0 and wait for all pods to terminate."""
+    _PatchReplicas(namespace, warmpool_name, 0)
+
+    # Delete any lingering SandboxClaims
+    utils.RunKubectl(
+        [
+            "delete",
+            "sandboxclaims",
+            "--all",
+            "-n",
+            namespace,
+            "--ignore-not-found=true",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+    t0 = time.time()
+    while time.time() - t0 < timeout_s:
+        remaining = utils.CountPods(namespace, label)
+        if remaining == 0:
+            logging.info("Pool drained in %.1fs", time.time() - t0)
+            return
+        time.sleep(2)
+
+    logging.warning("Drain timed out after %.0fs", timeout_s)
+
+
+def _GetPodNames(namespace, label):
+    """Return list of pod names matching the label selector."""
+    stdout, _, rc = utils.RunKubectl(
+        [
+            "get",
+            "pods",
+            "-n",
+            namespace,
+            "-l",
+            label,
+            "-o",
+            "jsonpath={.items[*].metadata.name}",
+        ],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if rc != 0 or not stdout:
+        return []
+    return stdout.split()
+
+
+def _CountAllocatedIPs(namespace, label):
+    """Count pod IPs currently allocated for pods matching the label.
+
+    Scoped to the warm pool label to accurately measure IPAM release
+    efficiency for the specific pods being deleted.
+    """
+    stdout, _, rc = utils.RunKubectl(
+        [
+            "get",
+            "pods",
+            "-n",
+            namespace,
+            "-l",
+            label,
+            "-o",
+            "jsonpath={.items[*].status.podIP}",
+        ],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if rc != 0 or not stdout:
+        return 0
+    return len([ip for ip in stdout.split() if ip])
+
+
+def _Percentile(sorted_values, pct):
+    """Calculate percentile (0-100) with linear interpolation."""
+    if not sorted_values:
+        return 0.0
+    idx = (pct / 100) * (len(sorted_values) - 1)
+    lo = int(idx)
+    hi = min(lo + 1, len(sorted_values) - 1)
+    frac = idx - lo
+    return sorted_values[lo] * (1 - frac) + sorted_values[hi] * frac
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
new file mode 100644
index 0000000000..ff35f2e92e
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
@@ -0,0 +1,891 @@
+"""Shared workload deployment utilities for GKE Agent Sandbox benchmarks.
+
+Provides idempotent functions to deploy the Agent Sandbox ecosystem
+(CRDs, templates, warm pools, router, ADK agent, PSI reader) onto a
+pre-provisioned GKE cluster. Called by each benchmark's Prepare() stage.
+
+All functions are idempotent -- safe to call repeatedly without side effects.
+"""
+
+import json
+import logging
+import os
+import subprocess
+import time
+
+from absl import flags
+
+FLAGS = flags.FLAGS
+
+# ---------------------------------------------------------------------------
+# Flags (registered once; shared across all benchmarks)
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_string(
+    "gke_sandbox_version",
+    "v0.4.6",
+    "Agent Sandbox controller version (GitHub release tag).",
+)
+
+flags.DEFINE_string(
+    "gke_sandbox_router_image",
+    "",
+    "Sandbox router container image. If empty, router deployment is skipped.",
+)
+
+flags.DEFINE_string(
+    "gke_adk_image",
+    "",
+    "ADK agent container image. If empty, agent deployment is skipped.",
+)
+
+flags.DEFINE_string(
+    "gke_chromium_image",
+    "",
+    "Chromium sandbox container image. If empty, uses placeholder.",
+)
+
+flags.DEFINE_integer(
+    "gke_warmpool_replicas",
+    2,
+    "Default warm pool replica count for SandboxWarmPool resources.",
+)
+
+flags.DEFINE_integer(
+    "gke_chromium_replicas",
+    1,
+    "Default Chromium warm pool replica count.",
+)
+
+flags.DEFINE_string(
+    "gke_python_image",
+    "registry.k8s.io/agent-sandbox/python-runtime-sandbox:v0.1.0",
+    "Python runtime sandbox container image.",
+)
+
+flags.DEFINE_integer(
+    "gke_deploy_timeout",
+    120,
+    "Timeout in seconds for workload deployment rollout.",
+)
+
+flags.DEFINE_string(
+    "gke_cluster_name",
+    "",
+    "GKE cluster name. Used in ADK agent env vars for Workload Identity.",
+)
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------
+# Image path auto-derivation and mode-aware scheduling
+# (Insert this block BEFORE the "def DeployWorkloads():" function)
+# ---------------------------------------------------------------------------
+
+
+def _DeriveImagePaths():
+    """Auto-derive container image paths from project/region/machine_type.
+
+    When --gke_adk_image or --gke_sandbox_router_image are empty,
+    derives them from --gke_project_id, --gke_region, and
+    --gke_sandbox_machine_type using the same convention as
+    gke_image_build_utils.py and the bash build scripts.
+    """
+    project = getattr(FLAGS, "gke_project_id", "") or ""
+    region = getattr(FLAGS, "gke_region", "") or ""
+    machine_type = getattr(FLAGS, "gke_sandbox_machine_type", "") or ""
+
+    if not project or not region:
+        logging.info("Cannot auto-derive images: project=%s region=%s", project, region)
+        return
+
+    machine_family = machine_type.split("-")[0] if machine_type else "c4"
+    target_arch = "arm64" if machine_family == "c4a" else "amd64"
+
+    if not FLAGS.gke_adk_image:
+        FLAGS.gke_adk_image = "{}-docker.pkg.dev/{}/adk-repo/adk-agent:{}".format(
+            region, project, target_arch
+        )
+        logging.info("Auto-derived gke_adk_image: %s", FLAGS.gke_adk_image)
+
+    if not FLAGS.gke_sandbox_router_image:
+        FLAGS.gke_sandbox_router_image = (
+            "{}-docker.pkg.dev/{}/agent-sandbox/sandbox-router:{}".format(
+                region, project, target_arch
+            )
+        )
+        logging.info(
+            "Auto-derived gke_sandbox_router_image: %s",
+            FLAGS.gke_sandbox_router_image,
+        )
+
+    if not FLAGS.gke_chromium_image:
+        FLAGS.gke_chromium_image = (
+            "{}-docker.pkg.dev/{}/agent-sandbox/chrome-sandbox:{}".format(
+                region, project, target_arch
+            )
+        )
+        logging.info(
+            "Auto-derived gke_chromium_image: %s", FLAGS.gke_chromium_image
+        )
+
+    if not FLAGS.gke_cluster_name:
+        import os as _os
+
+        user_prefix = _os.environ.get("USER", "pkb").split(".")[0]
+        suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"}
+        cluster_suffix = suffix_map.get(machine_family, machine_family)
+        FLAGS.gke_cluster_name = "{}-agentic-{}".format(
+            user_prefix, cluster_suffix
+        )
+        logging.info(
+            "Auto-derived gke_cluster_name: %s", FLAGS.gke_cluster_name
+        )
+
+
+def _GetSandboxNodeSelector():
+    """Return the correct nodeSelector dict based on provisioning mode.
+
+    - native mode: PKB auto-labels nodes with pkb_nodepool=<pool_name>
+    - custom mode: bash scripts label nodes with dedicated=agentic-sandbox
+    """
+    try:
+        mode = FLAGS.gke_provision_mode
+    except (AttributeError, KeyError):
+        mode = "custom"
+    if mode == "native":
+        return {"pkb_nodepool": "sandbox"}
+    return {"dedicated": "agentic-sandbox"}
+
+
+def _GetSandboxTolerations():
+    """Return tolerations list based on provisioning mode.
+
+    Both modes need the gVisor toleration (auto-applied by GKE to sandbox pools).
+    Custom mode additionally needs the dedicated=agentic-sandbox toleration
+    (manually applied by setup_infrastructure_gke.sh).
+    """
+    try:
+        mode = FLAGS.gke_provision_mode
+    except (AttributeError, KeyError):
+        mode = "custom"
+    tolerations = [
+        {
+            "key": "sandbox.gke.io/runtime",
+            "operator": "Equal",
+            "value": "gvisor",
+            "effect": "NoSchedule",
+        },
+    ]
+    if mode != "native":
+        tolerations.insert(
+            0,
+            {
+                "key": "dedicated",
+                "operator": "Equal",
+                "value": "agentic-sandbox",
+                "effect": "NoSchedule",
+            },
+        )
+    return tolerations
+
+
+def _NodeSelectorYaml(indent=6):
+    """Generate nodeSelector YAML block for embedding in manifests."""
+    selector = _GetSandboxNodeSelector()
+    spaces = " " * indent
+    lines = ["{}nodeSelector:".format(spaces)]
+    for k, v in selector.items():
+        lines.append("{}  {}: {}".format(spaces, k, v))
+    return "\n".join(lines)
+
+
+def _TolerationsYaml(indent=6):
+    """Generate tolerations YAML block for embedding in manifests."""
+    tolerations = _GetSandboxTolerations()
+    spaces = " " * indent
+    lines = ["{}tolerations:".format(spaces)]
+    for t in tolerations:
+        lines.append('{}  - key: "{}"'.format(spaces, t["key"]))
+        lines.append('{}    operator: "{}"'.format(spaces, t["operator"]))
+        lines.append('{}    value: "{}"'.format(spaces, t["value"]))
+        lines.append('{}    effect: "{}"'.format(spaces, t["effect"]))
+    return "\n".join(lines)
+
+
+def DeployWorkloads():
+    """Deploy the full Agent Sandbox ecosystem onto the GKE cluster.
+
+    Idempotent: safe to call repeatedly. Sequence:
+      1. Create namespace
+      2. Install Agent Sandbox CRDs
+      3. Deploy SandboxTemplates + WarmPools
+      4. Deploy Sandbox Router
+      5. Deploy ADK Agent (Deployment + Service + RBAC)
+      6. Deploy PSI Reader DaemonSet
+      7. Wait for ADK Agent rollout
+    """
+    _DeriveImagePaths()
+    ns = FLAGS.gke_namespace
+    logging.info("=== DeployWorkloads: namespace=%s ===", ns)
+
+    _CreateNamespace(ns)
+    _InstallCRDs()
+    _DeploySandboxTemplates(ns)
+    _DeploySandboxRouter(ns)
+    _DeployADKAgent(ns)
+    _DeployPSIReader(ns)
+    _WaitForAgentReady(ns)
+
+    logging.info("DeployWorkloads complete.")
+
+
+def DeploySnapshots():
+    """Deploy Pod Snapshot infrastructure (UC-A only).
+
+    Idempotent: safe to call repeatedly. Sequence:
+      1. Create GCS bucket (hierarchical namespace)
+      2. Create managed folder
+      3. Create KSA for snapshots
+      4. Bind IAM roles
+      5. Deploy PodSnapshotStorageConfig + PodSnapshotPolicy
+    """
+    ns = FLAGS.gke_namespace
+    project = FLAGS.gke_project_id
+    region = FLAGS.gke_region
+
+    if not project:
+        logging.warning("DeploySnapshots: gke_project_id not set, skipping.")
+        return
+
+    bucket_name = "agent-sandbox-snapshots-{}".format(project)
+    snapshot_folder = "benchmark-snapshots"
+    ksa_name = "pod-snapshot-sa"
+
+    logging.info("=== DeploySnapshots: bucket=%s ===", bucket_name)
+
+    # 1. Create GCS bucket
+    _RunCmd(
+        [
+            "gcloud",
+            "storage",
+            "buckets",
+            "create",
+            "gs://{}".format(bucket_name),
+            "--uniform-bucket-level-access",
+            "--enable-hierarchical-namespace",
+            "--soft-delete-duration=0d",
+            "--location={}".format(region),
+            "--project={}".format(project),
+        ],
+        check=False,
+    )
+
+    # 2. Create managed folder
+    _RunCmd(
+        [
+            "gcloud",
+            "storage",
+            "managed-folders",
+            "create",
+            "gs://{}/{}/".format(bucket_name, snapshot_folder),
+            "--project={}".format(project),
+        ],
+        check=False,
+    )
+
+    # 3. Create KSA
+    _RunKubectl(
+        [
+            "create",
+            "serviceaccount",
+            ksa_name,
+            "--namespace",
+            ns,
+        ],
+        check=False,
+    )
+
+    # 4. IAM bindings
+    project_number = _GetProjectNumber(project)
+    if project_number:
+        _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name)
+
+    # 5. Deploy PSSC + PSP
+    _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder)
+
+    logging.info("DeploySnapshots complete.")
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _RunCmd(cmd, check=True, timeout=120):
+    """Run a shell command and return (stdout, returncode)."""
+    logging.info("CMD: %s", " ".join(cmd))
+    proc = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+    if check and proc.returncode != 0:
+        logging.warning(
+            "Command failed (rc=%d): %s", proc.returncode, proc.stderr[:500]
+        )
+    return proc.stdout.strip(), proc.returncode
+
+
+def _RunKubectl(args, check=True, timeout=120):
+    """Run kubectl with optional kubeconfig."""
+    cmd = ["kubectl"]
+    if FLAGS.gke_kubeconfig:
+        cmd += ["--kubeconfig", FLAGS.gke_kubeconfig]
+    cmd += list(args)
+    return _RunCmd(cmd, check=check, timeout=timeout)
+
+
+def _KubectlApply(manifest_str):
+    """Apply a YAML manifest string via kubectl stdin."""
+    cmd = ["kubectl", "apply", "-f", "-"]
+    if FLAGS.gke_kubeconfig:
+        cmd = [
+            "kubectl",
+            "--kubeconfig",
+            FLAGS.gke_kubeconfig,
+            "apply",
+            "-f",
+            "-",
+        ]
+    proc = subprocess.run(
+        cmd,
+        input=manifest_str,
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+    if proc.returncode != 0:
+        logging.warning("kubectl apply failed: %s", proc.stderr[:500])
+    return proc.returncode == 0
+
+
+def _CreateNamespace(ns):
+    """Create namespace if it doesn't exist."""
+    _RunKubectl(["create", "namespace", ns], check=False)
+
+
+def _InstallCRDs():
+    """Install Agent Sandbox CRDs from GitHub release."""
+    version = FLAGS.gke_sandbox_version
+    base_url = (
+        "https://github.com/kubernetes-sigs/agent-sandbox"
+        "/releases/download/{}".format(version)
+    )
+    logging.info("Installing Agent Sandbox CRDs (%s)", version)
+    _RunKubectl(
+        [
+            "apply",
+            "-f",
+            "{}/manifest.yaml".format(base_url),
+            "-f",
+            "{}/extensions.yaml".format(base_url),
+        ],
+        check=False,
+    )
+
+
+def _DeploySandboxTemplates(ns):
+    """Deploy SandboxTemplate + WarmPool for Python and Chromium."""
+    python_image = FLAGS.gke_python_image
+    chromium_image = FLAGS.gke_chromium_image or "chromium-placeholder:latest"
+    warmpool_replicas = FLAGS.gke_warmpool_replicas
+    chromium_replicas = FLAGS.gke_chromium_replicas
+
+    manifest = """---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: python-sandbox-template
+  namespace: {ns}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        sandbox: python-sandbox-example
+    spec:
+      runtimeClassName: gvisor
+      containers:
+      - name: python-runtime
+        image: {python_image}
+{node_selector_yaml}
+{tolerations_yaml}
+      restartPolicy: "OnFailure"
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxWarmPool
+metadata:
+  name: python-sandbox-warmpool
+  namespace: {ns}
+spec:
+  replicas: {warmpool_replicas}
+  sandboxTemplateRef:
+    name: python-sandbox-template
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: chromium-sandbox-template
+  namespace: {ns}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        sandbox: chromium-sandbox-example
+    spec:
+      runtimeClassName: gvisor
+      containers:
+      - name: chromium-runtime
+        image: {chromium_image}
+        command: ["/bin/sh", "-c"]
+        args:
+          - |
+            socat TCP-LISTEN:9223,fork,reuseaddr TCP:127.0.0.1:9222 &
+            exec chromium --headless --no-sandbox --disable-gpu --disable-dev-shm-usage --remote-debugging-port=9222 --no-first-run --disable-field-trial-config --user-data-dir=/tmp/chrome-data about:blank
+        ports:
+          - containerPort: 9223
+{node_selector_yaml}
+{tolerations_yaml}
+      restartPolicy: "OnFailure"
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxWarmPool
+metadata:
+  name: chromium-sandbox-warmpool
+  namespace: {ns}
+spec:
+  replicas: {chromium_replicas}
+  sandboxTemplateRef:
+    name: chromium-sandbox-template
+---
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: allow-orchestrator-to-chromium
+  namespace: {ns}
+spec:
+  podSelector:
+    matchLabels:
+      sandbox: chromium-sandbox-example
+  policyTypes:
+  - Ingress
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          app: adk-agent
+    ports:
+    - protocol: TCP
+      port: 9223
+""".format(
+        ns=ns,
+        python_image=python_image,
+        chromium_image=chromium_image,
+        warmpool_replicas=warmpool_replicas,
+        chromium_replicas=chromium_replicas,
+        node_selector_yaml=_NodeSelectorYaml(),
+        tolerations_yaml=_TolerationsYaml(),
+    )
+    _KubectlApply(manifest)
+
+
+def _DeploySandboxRouter(ns):
+    """Deploy the Sandbox Router Deployment + Service."""
+    router_image = FLAGS.gke_sandbox_router_image
+    if not router_image:
+        logging.info("Sandbox router image not set, skipping router deployment.")
+        return
+
+    manifest = """---
+apiVersion: v1
+kind: Service
+metadata:
+  name: sandbox-router-svc
+  namespace: {ns}
+spec:
+  type: ClusterIP
+  selector:
+    app: sandbox-router
+  ports:
+  - name: http
+    protocol: TCP
+    port: 8080
+    targetPort: 8080
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sandbox-router-deployment
+  namespace: {ns}
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: sandbox-router
+  template:
+    metadata:
+      labels:
+        app: sandbox-router
+    spec:
+      serviceAccountName: adk-agent-sa
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: topology.kubernetes.io/zone
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              app: sandbox-router
+      containers:
+      - name: router
+        image: {router_image}
+        ports:
+        - containerPort: 8080
+        env:
+        - name: ALLOW_UNAUTHENTICATED_ROUTER
+          value: "true"
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 5
+          periodSeconds: 5
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 10
+          periodSeconds: 10
+        resources:
+          requests:
+            cpu: "250m"
+            memory: "512Mi"
+          limits:
+            cpu: "1000m"
+            memory: "1Gi"
+      securityContext:
+        runAsUser: 1000
+        runAsGroup: 1000
+""".format(ns=ns, router_image=router_image)
+    _KubectlApply(manifest)
+
+
+def _DeployADKAgent(ns):
+    """Deploy ADK Agent: SA, ClusterRole, RoleBinding, Deployment, Service."""
+    adk_image = FLAGS.gke_adk_image
+    if not adk_image:
+        logging.info("ADK agent image not set, skipping agent deployment.")
+        return
+
+    project = FLAGS.gke_project_id or ""
+    region = FLAGS.gke_region or ""
+    cluster = FLAGS.gke_cluster_name or ""
+
+    manifest = """---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: adk-agent-sa
+  namespace: {ns}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: adk-agent-sandbox-role
+rules:
+  - apiGroups: ["agents.x-k8s.io"]
+    resources: ["sandboxes"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["agents.x-k8s.io"]
+    resources: ["sandboxwarmpool", "sandboxwarmpools"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["extensions.agents.x-k8s.io"]
+    resources: ["sandboxclaims"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: [""]
+    resources: ["pods", "pods/log", "pods/exec", "services", "configmaps"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods/portforward"]
+    verbs: ["create"]
+  - apiGroups: ["metrics.k8s.io"]
+    resources: ["pods"]
+    verbs: ["get", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: adk-agent-sandbox-binding
+  namespace: {ns}
+subjects:
+  - kind: ServiceAccount
+    name: adk-agent-sa
+    namespace: {ns}
+roleRef:
+  kind: ClusterRole
+  name: adk-agent-sandbox-role
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: adk-agent
+  namespace: {ns}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: adk-agent
+  template:
+    metadata:
+      labels:
+        app: adk-agent
+    spec:
+      serviceAccountName: adk-agent-sa
+      containers:
+      - name: adk-agent
+        imagePullPolicy: Always
+        image: {adk_image}
+        resources:
+          limits:
+            memory: "16384Mi"
+            cpu: "6000m"
+          requests:
+            memory: "512Mi"
+            cpu: "1000m"
+        ports:
+        - containerPort: 8080
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 15
+          periodSeconds: 30
+          timeoutSeconds: 10
+          failureThreshold: 6
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 5
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+        env:
+          - name: PORT
+            value: "8080"
+          - name: GOOGLE_CLOUD_PROJECT
+            value: "{project}"
+          - name: GOOGLE_CLOUD_LOCATION
+            value: "{region}"
+          - name: GOOGLE_GENAI_USE_VERTEXAI
+            value: "true"
+          - name: CLUSTER_NAME
+            value: "{cluster}"
+          - name: AGENTIC_NAMESPACE
+            value: "{ns}"
+          - name: SANDBOX_ROUTER_URL
+            value: "http://sandbox-router-svc.{ns}.svc.cluster.local:8080"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: adk-agent
+  namespace: {ns}
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 8080
+  selector:
+    app: adk-agent
+""".format(ns=ns, adk_image=adk_image, project=project, region=region, cluster=cluster)
+    _KubectlApply(manifest)
+
+
+def _DeployPSIReader(ns):
+    """Deploy PSI Reader DaemonSet for cgroup pressure metrics."""
+    manifest = """---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: psi-reader
+  namespace: {ns}
+  labels:
+    app: psi-reader
+spec:
+  selector:
+    matchLabels:
+      app: psi-reader
+  template:
+    metadata:
+      labels:
+        app: psi-reader
+    spec:
+{node_selector_yaml}
+{tolerations_yaml}
+      hostPID: true
+      containers:
+      - name: reader
+        image: busybox:1.36
+        command: ["sleep", "infinity"]
+        securityContext:
+          privileged: true
+        volumeMounts:
+        - name: cgroup
+          mountPath: /host/sys/fs/cgroup
+          readOnly: true
+        - name: proc
+          mountPath: /host/proc
+          readOnly: true
+        resources:
+          requests:
+            cpu: "10m"
+            memory: "16Mi"
+          limits:
+            cpu: "50m"
+            memory: "32Mi"
+      volumes:
+      - name: cgroup
+        hostPath:
+          path: /sys/fs/cgroup
+      - name: proc
+        hostPath:
+          path: /proc
+""".format(
+        ns=ns,
+        node_selector_yaml=_NodeSelectorYaml(),
+        tolerations_yaml=_TolerationsYaml(),
+    )
+    _KubectlApply(manifest)
+
+
+def _WaitForAgentReady(ns):
+    """Wait for ADK agent deployment to be ready."""
+    adk_image = FLAGS.gke_adk_image
+    if not adk_image:
+        logging.info("ADK agent not deployed, skipping rollout wait.")
+        return
+    timeout = FLAGS.gke_deploy_timeout
+    logging.info("Waiting for adk-agent rollout (timeout=%ds)...", timeout)
+    _RunKubectl(
+        [
+            "rollout",
+            "status",
+            "deployment/adk-agent",
+            "-n",
+            ns,
+            "--timeout={}s".format(timeout),
+        ],
+        check=False,
+    )
+
+
+def _GetProjectNumber(project):
+    """Get GCP project number from project ID."""
+    stdout, rc = _RunCmd(
+        [
+            "gcloud",
+            "projects",
+            "describe",
+            project,
+            "--format=value(projectNumber)",
+        ],
+        check=False,
+    )
+    return stdout if rc == 0 else None
+
+
+def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name):
+    """Bind IAM roles for pod snapshot access."""
+    # bucketViewer to namespace
+    _RunCmd(
+        [
+            "gcloud",
+            "storage",
+            "buckets",
+            "add-iam-policy-binding",
+            "gs://{}".format(bucket_name),
+            "--member=principalSet://iam.googleapis.com/projects/{}"
+            "/locations/global/workloadIdentityPools/{}.svc.id.goog"
+            "/namespace/{}".format(project_number, project, ns),
+            "--role=roles/storage.bucketViewer",
+            "--quiet",
+        ],
+        check=False,
+    )
+
+    # objectAdmin to KSA
+    _RunCmd(
+        [
+            "gcloud",
+            "storage",
+            "buckets",
+            "add-iam-policy-binding",
+            "gs://{}".format(bucket_name),
+            "--member=principal://iam.googleapis.com/projects/{}"
+            "/locations/global/workloadIdentityPools/{}.svc.id.goog"
+            "/subject/ns/{}/sa/{}".format(project_number, project, ns, ksa_name),
+            "--role=roles/storage.objectAdmin",
+            "--quiet",
+        ],
+        check=False,
+    )
+
+    # objectUser to GKE snapshot controller
+    _RunCmd(
+        [
+            "gcloud",
+            "storage",
+            "buckets",
+            "add-iam-policy-binding",
+            "gs://{}".format(bucket_name),
+            "--member=serviceAccount:service-{}"
+            "@container-engine-robot.iam.gserviceaccount.com".format(project_number),
+            "--role=roles/storage.objectUser",
+            "--quiet",
+        ],
+        check=False,
+    )
+
+
+def _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder):
+    """Deploy PodSnapshotStorageConfig + PodSnapshotPolicy."""
+    manifest = """---
+apiVersion: podsnapshot.gke.io/v1
+kind: PodSnapshotStorageConfig
+metadata:
+  name: benchmark-pssc-gcs
+spec:
+  snapshotStorageConfig:
+    gcs:
+      bucket: "{bucket_name}"
+      path: "{snapshot_folder}"
+---
+apiVersion: podsnapshot.gke.io/v1
+kind: PodSnapshotPolicy
+metadata:
+  name: benchmark-psp
+  namespace: {ns}
+spec:
+  storageConfigName: benchmark-pssc-gcs
+  selector:
+    matchLabels:
+      app: snapshot-benchmark-workload
+  triggerConfig:
+    type: manual
+    postCheckpoint: resume
+""".format(ns=ns, bucket_name=bucket_name, snapshot_folder=snapshot_folder)
+    _KubectlApply(manifest)
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
new file mode 100644
index 0000000000..38b85b4e11
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
@@ -0,0 +1,403 @@
+"""Shared image build utilities for GKE Agent Sandbox benchmarks.
+
+Builds and pushes container images (ADK agent, Chrome sandbox, Sandbox Router)
+via Google Cloud Build. Called from:
+  - Provision() when --gke_skip_image_build is False (via BuildImages())
+  - prerequisite_setup.py (via build_images_with_config())
+
+Images built:
+  - ADK Agent: perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/ -> {region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{arch}
+  - Chrome Sandbox: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{arch}
+  - Sandbox Router: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{arch}
+"""
+
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+
+from absl import flags
+
+FLAGS = flags.FLAGS
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def build_images_with_config(project, region, machine_type, cloud_build_sa=None):
+    """Core image build logic — no FLAGS dependency.
+
+    Callable from both PKB (via BuildImages()) and prerequisite_setup.py.
+
+    Args:
+        project: GCP project ID.
+        region: GCP region (e.g. "us-central1").
+        machine_type: Machine type string (e.g. "c4-standard-8").
+            Used to derive target architecture (arm64 for c4a, amd64 otherwise).
+        cloud_build_sa: Cloud Build service account email.
+            If None, defaults to "adk-cloud-build-sa@{project}.iam.gserviceaccount.com".
+    """
+    # Derive architecture from machine family
+    machine_family = machine_type.split("-")[0] if machine_type else "c4"
+    target_arch = "arm64" if machine_family == "c4a" else "amd64"
+
+    # Derive image paths
+    adk_image = f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{target_arch}"
+    chrome_image = (
+        f"{region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{target_arch}"
+    )
+    router_image = (
+        f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{target_arch}"
+    )
+
+    # Cloud Build SA
+    if cloud_build_sa is None:
+        cloud_build_sa = f"adk-cloud-build-sa@{project}.iam.gserviceaccount.com"
+
+    logger.info("=== Building Container Images ===")
+    logger.info("  Project: %s", project)
+    logger.info("  Region: %s", region)
+    logger.info("  Architecture: %s", target_arch)
+    logger.info("  Cloud Build SA: %s", cloud_build_sa)
+
+    # 1. Build ADK Agent
+    _BuildADKAgentImage(
+        project=project,
+        region=region,
+        target_arch=target_arch,
+        image_path=adk_image,
+        cloud_build_sa=cloud_build_sa,
+        machine_type=machine_type,
+    )
+
+    # 2. Build Chrome Sandbox
+    _BuildChromeSandboxImage(
+        project=project,
+        region=region,
+        target_arch=target_arch,
+        image_path=chrome_image,
+        cloud_build_sa=cloud_build_sa,
+    )
+
+    # 3. Build Sandbox Router
+    _BuildSandboxRouterImage(
+        project=project,
+        region=region,
+        target_arch=target_arch,
+        image_path=router_image,
+        cloud_build_sa=cloud_build_sa,
+    )
+
+    logger.info("=== All images built successfully ===")
+    logger.info("  ADK Agent:      %s", adk_image)
+    logger.info("  Chrome Sandbox: %s", chrome_image)
+    logger.info("  Sandbox Router: %s", router_image)
+
+
+def BuildImages():
+    """FLAGS-based entry point (called from PKB Provision).
+
+    Reads configuration from FLAGS (set in gke_provision_utils.py).
+    Delegates to build_images_with_config() for the actual work.
+    """
+    build_images_with_config(
+        project=FLAGS.gke_project_id,
+        region=FLAGS.gke_region,
+        machine_type=FLAGS.gke_sandbox_machine_type,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _BuildADKAgentImage(
+    project, region, target_arch, image_path, cloud_build_sa, machine_type=None
+):
+    """Build and push the ADK Agent image.
+
+    Uses the existing perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml with --substitutions
+    rather than generating a new one (avoids overwriting the committed file).
+    """
+    logger.info("Building ADK Agent image: %s", image_path)
+
+    # Locate the agent source directory
+    # Expected layout: repo_root/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/
+    repo_root = _FindRepoRoot()
+    agent_dir = os.path.join(repo_root, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")
+
+    if not os.path.isdir(agent_dir):
+        raise RuntimeError(
+            f"ADK agent source not found at {agent_dir}. "
+            "Ensure you are running from the repository root."
+        )
+
+    # Generate generated.env from template
+    _GenerateEnvFile(agent_dir, project, region, machine_type=machine_type)
+
+    # Use the existing cloudbuild.yaml with substitutions (don't overwrite)
+    cloudbuild_path = os.path.join(agent_dir, "cloudbuild.yaml")
+    if not os.path.isfile(cloudbuild_path):
+        raise RuntimeError(
+            f"cloudbuild.yaml not found at {cloudbuild_path}. "
+            "Expected perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml to exist."
+        )
+
+    _RunCmd(
+        [
+            "gcloud",
+            "builds",
+            "submit",
+            agent_dir,
+            f"--config={cloudbuild_path}",
+            f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}",
+            f"--project={project}",
+            f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}",
+        ]
+    )
+
+    logger.info("ADK Agent image built successfully.")
+
+
+def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_build_sa):
+    """Build and push the Chrome Sandbox image."""
+    logger.info("Building Chrome Sandbox image: %s", image_path)
+
+    tmp_dir = tempfile.mkdtemp(prefix="chrome-sandbox-")
+    try:
+        # Clone agent-sandbox repo (sparse checkout)
+        logger.info("Cloning agent-sandbox chrome-sandbox source...")
+        _RunCmd(
+            [
+                "git",
+                "clone",
+                "--depth",
+                "1",
+                "--filter=blob:none",
+                "--sparse",
+                "https://github.com/kubernetes-sigs/agent-sandbox.git",
+                tmp_dir,
+            ]
+        )
+        _RunCmd(
+            ["git", "sparse-checkout", "set", "examples/chrome-sandbox"],
+            cwd=tmp_dir,
+        )
+
+        build_dir = os.path.join(tmp_dir, "examples", "chrome-sandbox")
+        if not os.path.isfile(os.path.join(build_dir, "Dockerfile")):
+            raise RuntimeError(f"chrome-sandbox Dockerfile not found at {build_dir}")
+
+        # Patch Dockerfile: add socat for CDP proxy
+        dockerfile_path = os.path.join(build_dir, "Dockerfile")
+        with open(dockerfile_path, "r") as f:
+            content = f.read()
+        content = content.replace(
+            "RUN apt-get update && apt-get install --yes --no-install-recommends chromium",
+            "RUN apt-get update && apt-get install --yes --no-install-recommends chromium socat",
+        )
+        with open(dockerfile_path, "w") as f:
+            f.write(content)
+
+        # Submit Cloud Build (generates cloudbuild.yaml in temp dir)
+        _SubmitCloudBuild(
+            source_dir=build_dir,
+            image_path=image_path,
+            target_arch=target_arch,
+            project=project,
+            cloud_build_sa=cloud_build_sa,
+        )
+
+        logger.info("Chrome Sandbox image built successfully.")
+    finally:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_build_sa):
+    """Build and push the Sandbox Router image."""
+    logger.info("Building Sandbox Router image: %s", image_path)
+
+    tmp_dir = tempfile.mkdtemp(prefix="sandbox-router-")
+    try:
+        # Clone agent-sandbox repo (sparse checkout)
+        logger.info("Cloning agent-sandbox router source...")
+        _RunCmd(
+            [
+                "git",
+                "clone",
+                "--depth",
+                "1",
+                "--filter=blob:none",
+                "--sparse",
+                "https://github.com/kubernetes-sigs/agent-sandbox.git",
+                tmp_dir,
+            ]
+        )
+        _RunCmd(
+            [
+                "git",
+                "sparse-checkout",
+                "set",
+                "clients/python/agentic-sandbox-client/sandbox-router",
+            ],
+            cwd=tmp_dir,
+        )
+
+        build_dir = os.path.join(
+            tmp_dir, "clients", "python", "agentic-sandbox-client", "sandbox-router"
+        )
+        if not os.path.isfile(os.path.join(build_dir, "Dockerfile")):
+            raise RuntimeError(f"sandbox-router Dockerfile not found at {build_dir}")
+
+        # Submit Cloud Build (generates cloudbuild.yaml in temp dir)
+        _SubmitCloudBuild(
+            source_dir=build_dir,
+            image_path=image_path,
+            target_arch=target_arch,
+            project=project,
+            cloud_build_sa=cloud_build_sa,
+        )
+
+        logger.info("Sandbox Router image built successfully.")
+    finally:
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+def _GenerateEnvFile(
+    agent_dir, project, region, machine_type=None, namespace="agentic"
+):
+    """Render generated.env from template with current config values."""
+    template_path = os.path.join(agent_dir, "generated.env.template")
+    output_path = os.path.join(agent_dir, "generated.env")
+
+    if not os.path.isfile(template_path):
+        logger.warning(
+            "generated.env.template not found at %s, skipping.", template_path
+        )
+        return
+
+    with open(template_path, "r") as f:
+        content = f.read()
+
+    # Derive cluster name
+    machine_family = machine_type.split("-")[0] if machine_type else "c4"
+    suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"}
+    cluster_suffix = suffix_map.get(machine_family, "c4")
+
+    # Get username prefix for cluster name
+    user = os.environ.get("USER", "benchmark")
+    user_prefix = user.split(".")[0] if "." in user else user
+    cluster_name = f"{user_prefix}-agentic-{cluster_suffix}"
+
+    # Substitute variables
+    replacements = {
+        "${CLUSTER_NAME}": cluster_name,
+        "${GOOGLE_CLOUD_PROJECT}": project,
+        "${GOOGLE_CLOUD_LOCATION}": region,
+        "${AGENTIC_NAMESPACE}": namespace,
+        "${GOOGLE_GENAI_USE_VERTEXAI}": "true",
+        "${SANDBOX_ROUTER_URL}": f"http://sandbox-router-svc.{namespace}.svc.cluster.local:8080",
+        "${SAMPLE_COUNT}": "20",
+        "${SAMPLE_WARMUP}": "0",
+        "${PAYLOAD_SIZE_MB}": "1",
+        "${PAYLOAD_ITERATIONS}": "20",
+    }
+
+    for key, value in replacements.items():
+        content = content.replace(key, value)
+
+    with open(output_path, "w") as f:
+        f.write(content)
+
+    logger.info("Generated %s", output_path)
+
+
+def _SubmitCloudBuild(source_dir, image_path, target_arch, project, cloud_build_sa):
+    """Generate a cloudbuild.yaml with substitutions and submit via Cloud Build.
+
+    Used for Chrome and Router images (built in temp directories).
+    The ADK agent uses its own committed cloudbuild.yaml instead.
+    """
+    cloudbuild_content = """steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.']
+    env:
+      - 'DOCKER_BUILDKIT=1'
+images:
+  - '${_IMAGE_PATH}'
+options:
+  logging: CLOUD_LOGGING_ONLY
+substitutions:
+  _IMAGE_PATH: ''
+  _PLATFORM: 'linux/amd64'
+"""
+    cloudbuild_path = os.path.join(source_dir, "cloudbuild.yaml")
+    with open(cloudbuild_path, "w") as f:
+        f.write(cloudbuild_content)
+
+    _RunCmd(
+        [
+            "gcloud",
+            "builds",
+            "submit",
+            source_dir,
+            f"--config={cloudbuild_path}",
+            f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}",
+            f"--project={project}",
+            f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}",
+        ]
+    )
+
+
+def _FindRepoRoot():
+    """Find the repository root by looking for known markers."""
+    # Try relative to this file
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    # Expected: perfkitbenchmarker/linux_benchmarks/ -> go up 2 levels
+    candidate = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(this_dir))))
+    if os.path.isdir(os.path.join(candidate, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")):
+        return candidate
+
+    # Try CWD
+    cwd = os.getcwd()
+    if os.path.isdir(os.path.join(cwd, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")):
+        return cwd
+
+    # Try parent of CWD
+    parent = os.path.dirname(cwd)
+    if os.path.isdir(os.path.join(parent, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")):
+        return parent
+
+    raise RuntimeError(
+        "Cannot locate repository root (looking for perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/). "
+        "Run from the repository root directory."
+    )
+
+
+def _RunCmd(cmd, cwd=None):
+    """Run a shell command, raising on failure."""
+    logger.info("  CMD: %s", " ".join(cmd))
+    env = os.environ.copy()
+    env["CLOUDSDK_AUTH_DISABLE_SSL_VALIDATION"] = "true"
+    proc = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        cwd=cwd,
+        timeout=600,
+        env=env,
+    )
+
+    if proc.returncode != 0:
+        raise RuntimeError(
+            f"Command failed (rc={proc.returncode}): {' '.join(cmd)}\n"
+            f"stderr: {proc.stderr[-500:]}"
+        )
+    return proc.stdout
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
new file mode 100644
index 0000000000..9ddac86ea0
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
@@ -0,0 +1,613 @@
+"""PKB Benchmark: GKE Agent Payload Transfer Saturation (Use Case D).
+
+Atomic single-point measurement of payload transfer latency from a gVisor
+sandbox back to the orchestrator on a pre-provisioned GKE cluster.  Measures
+generation time, serialization time, stdout write time, total transfer time,
+throughput, and RSS at a given payload_size_mb and concurrent_sessions count.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the payload_size_mb parameter across iterations to
+find the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_payload \
+                --gke_payload_size_mb=50 \
+                --gke_payload_iterations=20 \
+                --gke_payload_concurrent_sessions=5 \
+                --gke_namespace=agentic \
+                --gke_api_url=http://localhost:8080
+
+Samples emitted (per run):
+  - gke_payload_orchestrator_transfer_mean       (ms)
+  - gke_payload_orchestrator_transfer_p50        (ms)
+  - gke_payload_orchestrator_transfer_p95        (ms)
+  - gke_payload_orchestrator_transfer_p99        (ms)
+  - gke_payload_orchestrator_transfer_min        (ms)
+  - gke_payload_orchestrator_transfer_max        (ms)
+  - gke_payload_sandbox_payload_size_bytes       (bytes)
+  - gke_payload_sandbox_payload_encoded_size_bytes (bytes)
+  - gke_payload_sandbox_payload_iterations       (count)
+  - gke_payload_sandbox_generation_time_mean     (ms)
+  - gke_payload_sandbox_generation_time_p50      (ms)
+  - gke_payload_sandbox_generation_time_p95      (ms)
+  - gke_payload_sandbox_generation_time_p99      (ms)
+  - gke_payload_sandbox_generation_time_min      (ms)
+  - gke_payload_sandbox_generation_time_max      (ms)
+  - gke_payload_sandbox_serialization_time_mean  (ms)
+  - gke_payload_sandbox_serialization_time_p50   (ms)
+  - gke_payload_sandbox_serialization_time_p95   (ms)
+  - gke_payload_sandbox_serialization_time_p99   (ms)
+  - gke_payload_sandbox_serialization_time_min   (ms)
+  - gke_payload_sandbox_serialization_time_max   (ms)
+  - gke_payload_sandbox_stdout_time_mean         (ms)
+  - gke_payload_sandbox_stdout_time_p50          (ms)
+  - gke_payload_sandbox_stdout_time_p95          (ms)
+  - gke_payload_sandbox_stdout_time_p99          (ms)
+  - gke_payload_sandbox_stdout_time_min          (ms)
+  - gke_payload_sandbox_stdout_time_max          (ms)
+  - gke_payload_sandbox_transfer_time_mean       (ms)
+  - gke_payload_sandbox_transfer_time_p50        (ms)
+  - gke_payload_sandbox_transfer_time_p95        (ms)
+  - gke_payload_sandbox_transfer_time_p99        (ms)
+  - gke_payload_sandbox_transfer_time_min        (ms)
+  - gke_payload_sandbox_transfer_time_max        (ms)
+  - gke_payload_sandbox_throughput_mean           (MB/s)
+  - gke_payload_sandbox_throughput_p50            (MB/s)
+  - gke_payload_sandbox_throughput_min            (MB/s)
+  - gke_payload_sandbox_rss_start                (MB)
+  - gke_payload_sandbox_rss_end                  (MB)
+  - gke_payload_sandbox_rss_growth               (MB)
+  - gke_payload_wall_time                        (seconds)
+"""
+
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "gke_payload"
+BENCHMARK_CONFIG = """
+gke_payload:
+  description: >
+    Atomic single-point payload transfer saturation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "python-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=python-sandbox-example"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_float(
+    "gke_payload_size_mb",
+    1.0,
+    "Payload size in megabytes to transfer from the sandbox.",
+)
+
+flags.DEFINE_integer(
+    "gke_payload_iterations",
+    20,
+    "Number of transfer iterations per sandbox session.",
+)
+
+flags.DEFINE_integer(
+    "gke_payload_concurrent_sessions",
+    5,
+    "Number of parallel sandbox sessions.",
+)
+
+flags.DEFINE_integer(
+    "gke_payload_exec_timeout",
+    300,
+    "Sandbox command execution timeout in seconds.",
+)
+
+flags.DEFINE_bool(
+    "gke_payload_patch_warmpool",
+    True,
+    "Patch SandboxWarmPool replicas to match concurrent_sessions before measurement.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def Provision(benchmark_spec):
+    """Provision GKE cluster and all dependencies."""
+    gke_provision_utils.Provision()
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads()
+    utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single payload transfer measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    ns = FLAGS.gke_namespace
+    payload_size_mb = FLAGS.gke_payload_size_mb
+    iterations = FLAGS.gke_payload_iterations
+    concurrent = FLAGS.gke_payload_concurrent_sessions
+
+    logging.info(
+        "=== Run: payload_size_mb=%s, iterations=%d, concurrent=%d ===",
+        payload_size_mb,
+        iterations,
+        concurrent,
+    )
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Patch warm pool (moved from Prepare for sweep compatibility)
+    if FLAGS.gke_payload_patch_warmpool:
+        utils.PatchWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            replicas=concurrent,
+            label=_WARMPOOL_LABEL,
+        )
+
+    # POST to agent API
+    payload = {
+        "payload_size_mb": payload_size_mb,
+        "payload_iterations": iterations,
+        "concurrent_sessions": concurrent,
+        "sandbox_exec_timeout_s": FLAGS.gke_payload_exec_timeout,
+    }
+
+    t0 = time.time()
+    result = utils.CallAgentApi("/benchmark/python/payload", payload)
+    wall_time = time.time() - t0
+
+    successful = result.get("successful_sessions", 0)
+    failed = result.get("failed_sessions", 0)
+    agg = result.get("aggregate", {})
+
+    logging.info(
+        "API response: %d successful, %d failed sessions (%.1fs)",
+        successful,
+        failed,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "payload_size_mb": payload_size_mb,
+        "payload_iterations": iterations,
+        "concurrent_sessions": concurrent,
+        "successful_sessions": successful,
+        "failed_sessions": failed,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # Orchestrator-side transfer latency
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_mean_ms",
+        "orchestrator_transfer_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_p50_ms",
+        "orchestrator_transfer_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_p95_ms",
+        "orchestrator_transfer_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_p99_ms",
+        "orchestrator_transfer_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_min_ms",
+        "orchestrator_transfer_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "orchestrator_transfer_max_ms",
+        "orchestrator_transfer_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Payload metadata
+    _emit(
+        samples,
+        agg,
+        "sandbox_payload_size_bytes",
+        "sandbox_payload_size_bytes",
+        "bytes",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_payload_encoded_size_bytes",
+        "sandbox_payload_encoded_size_bytes",
+        "bytes",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_payload_iterations",
+        "sandbox_payload_iterations",
+        "count",
+        ns,
+        extra,
+    )
+
+    # Generation time (os.urandom)
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_mean_ms",
+        "sandbox_generation_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_p50_ms",
+        "sandbox_generation_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_p95_ms",
+        "sandbox_generation_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_p99_ms",
+        "sandbox_generation_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_min_ms",
+        "sandbox_generation_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_generation_time_max_ms",
+        "sandbox_generation_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Serialization time (base64 encode)
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_mean_ms",
+        "sandbox_serialization_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_p50_ms",
+        "sandbox_serialization_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_p95_ms",
+        "sandbox_serialization_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_p99_ms",
+        "sandbox_serialization_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_min_ms",
+        "sandbox_serialization_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_serialization_time_max_ms",
+        "sandbox_serialization_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Stdout write time (gVisor Gofer write syscall)
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_mean_ms",
+        "sandbox_stdout_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_p50_ms",
+        "sandbox_stdout_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_p95_ms",
+        "sandbox_stdout_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_p99_ms",
+        "sandbox_stdout_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_min_ms",
+        "sandbox_stdout_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_stdout_time_max_ms",
+        "sandbox_stdout_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Transfer time (serialization + stdout write — threshold metric)
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_mean_ms",
+        "sandbox_transfer_time_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_p50_ms",
+        "sandbox_transfer_time_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_p95_ms",
+        "sandbox_transfer_time_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_p99_ms",
+        "sandbox_transfer_time_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_min_ms",
+        "sandbox_transfer_time_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_transfer_time_max_ms",
+        "sandbox_transfer_time_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Throughput
+    _emit(
+        samples,
+        agg,
+        "sandbox_throughput_mean_mbps",
+        "sandbox_throughput_mean",
+        "MB/s",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_throughput_p50_mbps",
+        "sandbox_throughput_p50",
+        "MB/s",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_throughput_min_mbps",
+        "sandbox_throughput_min",
+        "MB/s",
+        ns,
+        extra,
+    )
+
+    # RSS
+    _emit(samples, agg, "sandbox_rss_start_mb", "sandbox_rss_start", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_end_mb", "sandbox_rss_end", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_growth_mb", "sandbox_rss_growth", "MB", ns, extra)
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info(
+        "Emitted %d samples for payload_size_mb=%s.", len(samples), payload_size_mb
+    )
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up after measurement. Scale warm pool to 0."""
+    ns = FLAGS.gke_namespace
+    logging.info("Cleanup: draining warm pool.")
+
+    utils.DrainWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        label=_WARMPOOL_LABEL,
+    )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete (cluster persists).")
+
+
+def Teardown(benchmark_spec):
+    """Teardown GKE cluster and all dependencies."""
+    gke_provision_utils.Teardown()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the aggregate dict."""
+    value = agg.get(agg_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py
new file mode 100644
index 0000000000..70b9d95a4c
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py
@@ -0,0 +1,516 @@
+#!/usr/bin/env python3
+"""Prerequisite Setup for GKE Agentic Benchmarking.
+
+Creates infrastructure that PKB's native container_cluster provisioner
+cannot manage: VPC, Subnet, Cloud Router, NAT, Firewall Rules, Artifact
+Registry, Cloud Build SA, IAM bindings, and container image builds.
+
+This script is run ONCE before PKB provisioning. PKB then references the
+pre-existing VPC/subnet via --gce_network_name and --gce_subnet_name flags.
+
+Usage:
+  # Full setup (including image builds):
+  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
+      --project_id=my-project \
+      --region=us-central1 --zone=us-central1-a \
+      --machine_type=c4-standard-8
+
+  # Setup without image builds:
+  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
+      --project_id=my-project \
+      --region=us-central1 --zone=us-central1-a \
+      --skip_image_build
+
+  # Teardown:
+  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
+      --project_id=my-project \
+      --region=us-central1 --zone=us-central1-a \
+      --teardown
+
+  # Teardown (keep images):
+  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
+      --project_id=my-project \
+      --region=us-central1 --zone=us-central1-a \
+      --teardown --keep_images
+"""
+
+import argparse
+import logging
+import os
+import subprocess
+import sys
+import time
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(message)s",
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _run(cmd, check=True, timeout=300, capture=False):
+    """Run a shell command, logging it first."""
+    cmd_str = " ".join(cmd) if isinstance(cmd, list) else cmd
+    logging.info("CMD: %s", cmd_str)
+    result = subprocess.run(
+        cmd if isinstance(cmd, list) else cmd.split(),
+        capture_output=capture,
+        text=True,
+        timeout=timeout,
+    )
+    if check and result.returncode != 0:
+        stderr = result.stderr if capture else ""
+        logging.error("Command failed (rc=%d): %s", result.returncode, stderr)
+        raise RuntimeError(f"Command failed: {cmd_str}")
+    return result
+
+
+def _exists(cmd):
+    """Return True if a gcloud describe/get command succeeds."""
+    result = subprocess.run(
+        cmd if isinstance(cmd, list) else cmd.split(),
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+    return result.returncode == 0
+
+
+def _derive_config(args):
+    """Derive configuration values from arguments."""
+    user_prefix = os.environ.get("USER", "pkb").split(".")[0]
+    machine_family = args.machine_type.split("-")[0]
+
+    # Disk type
+    disk_type = "pd-balanced" if machine_family == "c3" else "hyperdisk-balanced"
+
+    # Architecture
+    target_arch = "arm64" if machine_family == "c4a" else "amd64"
+
+    # Cluster suffix
+    if "metal" in args.machine_type:
+        cluster_suffix = "c3metal"
+    else:
+        cluster_suffix = machine_family
+
+    # Master CIDR (unique per cluster suffix)
+    master_cidrs = {
+        "c4": "172.16.0.0/28",
+        "c4d": "172.16.0.16/28",
+        "c4a": "172.16.0.32/28",
+        "c3metal": "172.16.0.48/28",
+    }
+    master_cidr = master_cidrs.get(cluster_suffix, "172.16.0.64/28")
+
+    return {
+        "user_prefix": user_prefix,
+        "machine_family": machine_family,
+        "disk_type": disk_type,
+        "target_arch": target_arch,
+        "cluster_suffix": cluster_suffix,
+        "master_cidr": master_cidr,
+        "vpc_name": f"{user_prefix}-agentic-vpc",
+        "subnet_name": f"{user_prefix}-agentic-subnet",
+        "subnet_cidr": args.subnet_cidr,
+        "router_name": f"{user_prefix}-agentic-nat-router",
+        "nat_name": f"{user_prefix}-agentic-nat-config",
+        "adk_repo_name": "adk-repo",
+        "sandbox_repo_name": "agent-sandbox",
+        "cloud_build_sa": "adk-cloud-build-sa",
+        "cloud_build_sa_email": f"adk-cloud-build-sa@{args.project_id}.iam.gserviceaccount.com",
+        "adk_image": f"{args.region}-docker.pkg.dev/{args.project_id}/adk-repo/adk-agent:{target_arch}",
+        "chromium_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/chrome-sandbox:{target_arch}",
+        "router_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/sandbox-router:{target_arch}",
+    }
+
+
+# ---------------------------------------------------------------------------
+# Setup Steps
+# ---------------------------------------------------------------------------
+
+
+def enable_apis(args):
+    """Enable required GCP APIs."""
+    logging.info("=== Enabling GCP APIs ===")
+    apis = [
+        "container.googleapis.com",
+        "artifactregistry.googleapis.com",
+        "cloudbuild.googleapis.com",
+        "aiplatform.googleapis.com",
+        "storage.googleapis.com",
+        "iam.googleapis.com",
+        "connectgateway.googleapis.com",
+        "gkehub.googleapis.com",
+        "gkeconnect.googleapis.com",
+        "iap.googleapis.com",
+    ]
+    _run([
+        "gcloud", "services", "enable", *apis,
+        f"--project={args.project_id}",
+    ])
+    logging.info("APIs enabled.")
+
+
+def create_vpc(args, config):
+    """Create custom VPC."""
+    logging.info("=== Creating VPC ===")
+    if _exists([
+        "gcloud", "compute", "networks", "describe", config["vpc_name"],
+        f"--project={args.project_id}",
+    ]):
+        logging.info("VPC %s already exists.", config["vpc_name"])
+        return
+
+    _run([
+        "gcloud", "compute", "networks", "create", config["vpc_name"],
+        "--subnet-mode=custom",
+        f"--project={args.project_id}",
+    ])
+    logging.info("VPC %s created.", config["vpc_name"])
+
+
+def create_subnet(args, config):
+    """Create subnet in the VPC."""
+    logging.info("=== Creating Subnet ===")
+    if _exists([
+        "gcloud", "compute", "networks", "subnets", "describe",
+        config["subnet_name"],
+        f"--region={args.region}",
+        f"--project={args.project_id}",
+    ]):
+        logging.info("Subnet %s already exists.", config["subnet_name"])
+        return
+
+    _run([
+        "gcloud", "compute", "networks", "subnets", "create",
+        config["subnet_name"],
+        f"--network={config['vpc_name']}",
+        f"--region={args.region}",
+        f"--range={config['subnet_cidr']}",
+        f"--project={args.project_id}",
+    ])
+    logging.info("Subnet %s created.", config["subnet_name"])
+
+
+def create_firewall_rules(args, config):
+    """Create firewall rules."""
+    logging.info("=== Creating Firewall Rules ===")
+
+    rules = [
+        {
+            "name": f"{config['vpc_name']}-allow-iap-ssh",
+            "rules": "tcp:22",
+            "source_ranges": "35.235.240.0/20",
+            "priority": "1000",
+        },
+        {
+            "name": f"{config['vpc_name']}-allow-internal",
+            "rules": "tcp,udp,icmp",
+            "source_ranges": config["subnet_cidr"],
+            "priority": "1000",
+        },
+    ]
+
+    for rule in rules:
+        if _exists([
+            "gcloud", "compute", "firewall-rules", "describe", rule["name"],
+            f"--project={args.project_id}",
+        ]):
+            logging.info("Firewall rule %s already exists.", rule["name"])
+            continue
+
+        _run([
+            "gcloud", "compute", "firewall-rules", "create", rule["name"],
+            f"--network={config['vpc_name']}",
+            "--direction=INGRESS",
+            "--action=ALLOW",
+            f"--rules={rule['rules']}",
+            f"--source-ranges={rule['source_ranges']}",
+            f"--priority={rule['priority']}",
+            f"--project={args.project_id}",
+        ])
+        logging.info("Firewall rule %s created.", rule["name"])
+
+
+def create_router_and_nat(args, config):
+    """Create Cloud Router and NAT for private node internet access."""
+    logging.info("=== Creating Cloud Router + NAT ===")
+
+    # Router
+    if not _exists([
+        "gcloud", "compute", "routers", "describe", config["router_name"],
+        f"--region={args.region}",
+        f"--project={args.project_id}",
+    ]):
+        _run([
+            "gcloud", "compute", "routers", "create", config["router_name"],
+            f"--network={config['vpc_name']}",
+            f"--region={args.region}",
+            f"--project={args.project_id}",
+        ])
+        logging.info("Router %s created.", config["router_name"])
+    else:
+        logging.info("Router %s already exists.", config["router_name"])
+
+    # NAT
+    if not _exists([
+        "gcloud", "compute", "routers", "nats", "describe", config["nat_name"],
+        f"--router={config['router_name']}",
+        f"--region={args.region}",
+        f"--project={args.project_id}",
+    ]):
+        _run([
+            "gcloud", "compute", "routers", "nats", "create", config["nat_name"],
+            f"--router={config['router_name']}",
+            f"--region={args.region}",
+            "--nat-all-subnet-ip-ranges",
+            "--auto-allocate-nat-external-ips",
+            f"--project={args.project_id}",
+        ])
+        logging.info("NAT %s created.", config["nat_name"])
+    else:
+        logging.info("NAT %s already exists.", config["nat_name"])
+
+
+def create_artifact_registry(args, config):
+    """Create Artifact Registry repositories."""
+    logging.info("=== Creating Artifact Registry Repos ===")
+
+    for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]:
+        result = subprocess.run(
+            [
+                "gcloud", "artifacts", "repositories", "describe", repo,
+                f"--location={args.region}",
+                f"--project={args.project_id}",
+            ],
+            capture_output=True, text=True, timeout=30,
+        )
+        if result.returncode == 0:
+            logging.info("AR repo %s already exists.", repo)
+            continue
+
+        _run([
+            "gcloud", "artifacts", "repositories", "create", repo,
+            "--repository-format=docker",
+            f"--location={args.region}",
+            f"--project={args.project_id}",
+        ])
+        logging.info("AR repo %s created.", repo)
+
+
+def create_cloud_build_sa(args, config):
+    """Create Cloud Build service account and bind IAM roles."""
+    logging.info("=== Creating Cloud Build SA ===")
+
+    sa_email = config["cloud_build_sa_email"]
+
+    # Create SA
+    if not _exists([
+        "gcloud", "iam", "service-accounts", "describe", sa_email,
+        f"--project={args.project_id}",
+    ]):
+        _run([
+            "gcloud", "iam", "service-accounts", "create",
+            config["cloud_build_sa"],
+            f"--display-name={config['cloud_build_sa']}",
+            f"--project={args.project_id}",
+        ])
+        logging.info("SA %s created. Waiting for propagation...", sa_email)
+        time.sleep(10)
+    else:
+        logging.info("SA %s already exists.", sa_email)
+
+    # Bind roles
+    roles = [
+        "roles/logging.logWriter",
+        "roles/storage.objectViewer",
+        "roles/artifactregistry.writer",
+        "roles/serviceusage.serviceUsageConsumer",
+    ]
+    for role in roles:
+        _run([
+            "gcloud", "projects", "add-iam-policy-binding", args.project_id,
+            f"--member=serviceAccount:{sa_email}",
+            f"--role={role}",
+            "--condition=None", "--quiet",
+        ], check=False)
+
+    logging.info("Cloud Build SA roles bound.")
+
+
+def build_images(args, config):
+    """Build and push container images via Cloud Build.
+
+    Delegates to gke_image_build_utils.build_images_with_config()
+    to avoid duplicating Cloud Build logic.
+    """
+    if args.skip_image_build:
+        logging.info("=== Skipping Image Builds (--skip_image_build) ===")
+        return
+
+    logging.info("=== Building Container Images ===")
+
+    # Import the shared image build module (same package)
+    from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils
+
+    gke_image_build_utils.build_images_with_config(
+        project=args.project_id,
+        region=args.region,
+        machine_type=args.machine_type,
+        cloud_build_sa=config["cloud_build_sa_email"],
+    )
+
+    logging.info("=== Image builds complete ===")
+
+
+# ---------------------------------------------------------------------------
+# Teardown Steps
+# ---------------------------------------------------------------------------
+
+
+def teardown(args, config):
+    """Tear down all prerequisite resources."""
+    logging.info("=== Prerequisite Teardown ===")
+
+    # AR repos
+    if not args.keep_images:
+        logging.info("Deleting Artifact Registry repos...")
+        for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]:
+            _run([
+                "gcloud", "artifacts", "repositories", "delete", repo,
+                f"--location={args.region}",
+                f"--project={args.project_id}", "--quiet",
+            ], check=False)
+    else:
+        logging.info("Keeping AR repos (--keep_images).")
+
+    # Cloud Build SA
+    logging.info("Deleting Cloud Build SA...")
+    sa_email = config["cloud_build_sa_email"]
+    roles = [
+        "roles/logging.logWriter",
+        "roles/storage.objectViewer",
+        "roles/artifactregistry.writer",
+        "roles/serviceusage.serviceUsageConsumer",
+    ]
+    for role in roles:
+        _run([
+            "gcloud", "projects", "remove-iam-policy-binding", args.project_id,
+            f"--member=serviceAccount:{sa_email}",
+            f"--role={role}", "--quiet",
+        ], check=False)
+    _run([
+        "gcloud", "iam", "service-accounts", "delete", sa_email,
+        f"--project={args.project_id}", "--quiet",
+    ], check=False)
+
+    # NAT + Router
+    logging.info("Deleting NAT + Router...")
+    _run([
+        "gcloud", "compute", "routers", "nats", "delete", config["nat_name"],
+        f"--router={config['router_name']}",
+        f"--region={args.region}",
+        f"--project={args.project_id}", "--quiet",
+    ], check=False)
+    _run([
+        "gcloud", "compute", "routers", "delete", config["router_name"],
+        f"--region={args.region}",
+        f"--project={args.project_id}", "--quiet",
+    ], check=False)
+
+    # Firewall rules
+    logging.info("Deleting firewall rules...")
+    for suffix in ["allow-iap-ssh", "allow-internal"]:
+        _run([
+            "gcloud", "compute", "firewall-rules", "delete",
+            f"{config['vpc_name']}-{suffix}",
+            f"--project={args.project_id}", "--quiet",
+        ], check=False)
+
+    # Subnet + VPC
+    logging.info("Deleting subnet + VPC...")
+    _run([
+        "gcloud", "compute", "networks", "subnets", "delete",
+        config["subnet_name"],
+        f"--region={args.region}",
+        f"--project={args.project_id}", "--quiet",
+    ], check=False)
+    _run([
+        "gcloud", "compute", "networks", "delete", config["vpc_name"],
+        f"--project={args.project_id}", "--quiet",
+    ], check=False)
+
+    logging.info("=== Prerequisite Teardown Complete ===")
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def parse_args():
+    p = argparse.ArgumentParser(
+        description="Prerequisite Setup for GKE Agentic Benchmarking",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    p.add_argument("--project_id", required=True, help="GCP project ID")
+    p.add_argument("--region", default="us-central1", help="GCP region (default: us-central1)")
+    p.add_argument("--zone", default="us-central1-a", help="GCP zone (default: us-central1-a)")
+    p.add_argument("--machine_type", default="c4-standard-8",
+                   help="Machine type for sandbox nodes (default: c4-standard-8)")
+    p.add_argument("--subnet_cidr", default="10.134.20.0/24",
+                   help="Subnet CIDR range (default: 10.134.20.0/24)")
+    p.add_argument("--skip_image_build", action="store_true", default=False,
+                   help="Skip container image builds")
+    p.add_argument("--teardown", action="store_true", default=False,
+                   help="Tear down prerequisite resources instead of creating them")
+    p.add_argument("--keep_images", action="store_true", default=False,
+                   help="Keep AR repos during teardown")
+    return p.parse_args()
+
+
+def main():
+    args = parse_args()
+    config = _derive_config(args)
+
+    print(f"\n{'='*60}")
+    print(f"Project:      {args.project_id}")
+    print(f"Region:       {args.region}")
+    print(f"Zone:         {args.zone}")
+    print(f"Machine Type: {args.machine_type}")
+    print(f"VPC:          {config['vpc_name']}")
+    print(f"Subnet:       {config['subnet_name']} ({config['subnet_cidr']})")
+    print(f"Mode:         {'TEARDOWN' if args.teardown else 'SETUP'}")
+    print(f"{'='*60}\n")
+
+    if args.teardown:
+        teardown(args, config)
+    else:
+        enable_apis(args)
+        create_vpc(args, config)
+        create_subnet(args, config)
+        create_firewall_rules(args, config)
+        create_router_and_nat(args, config)
+        create_artifact_registry(args, config)
+        create_cloud_build_sa(args, config)
+        build_images(args, config)
+
+        print(f"\n{'='*60}")
+        print("Prerequisite setup complete!")
+        print(f"{'='*60}")
+        print(f"\nPKB flags to reference this infrastructure:")
+        print(f"  --gce_network_name={config['vpc_name']}")
+        print(f"\nNext: Run PKB with container_cluster provisioning:")
+        print(f"  python pkb.py --benchmarks=gke_python_density \\")
+        print(f"      --gce_network_name={config['vpc_name']} \\")
+        print(f"      --zone={args.zone} \\")
+        print(f"      --gke_use_beta=true \\")
+        print(f"      --gke_additional_flags=\"--enable-pod-snapshots,...,--subnetwork={config['subnet_name']}\"")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py
new file mode 100644
index 0000000000..4792f5a543
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py
@@ -0,0 +1,698 @@
+"""Shared Provision/Teardown utilities for GKE Agent Sandbox benchmarks.
+
+Provides the full GKE infrastructure lifecycle (create and destroy) used
+by all seven UC benchmark scripts.  Each benchmark's Provision() and
+Teardown() functions delegate to the public functions in this module.
+
+Infrastructure created (in order):
+  1. VPC + Subnet
+  2. Firewall rules (IAP SSH, internal, laptop IP)
+  3. Cloud Router + NAT
+  4. GKE Cluster (DPv2, Workload Identity, optional Pod Snapshots)
+  5. Fleet registration / credential retrieval
+  6. gVisor sandbox node pool
+  7. Artifact Registry repositories
+  8. Cloud Build service account + IAM bindings
+  9. Container images (optional, gated by --gke_skip_image_build)
+
+Teardown respects two flags:
+  --gke_teardown_keep_images: skip AR repo deletion
+  --gke_teardown_keep_infra:  only delete K8s workloads, keep cluster/network
+"""
+
+import logging
+import subprocess
+import time
+
+from absl import flags
+
+FLAGS = flags.FLAGS
+
+# Image build utilities (Phase 3)
+# Imported after FLAGS to avoid circular dependency
+# The actual import is deferred to Provision() to allow flag registration order
+
+# ---------------------------------------------------------------------------
+# Provision/Teardown flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_string(
+    "gke_project_id",
+    "",
+    "GCP project ID for the benchmark cluster. Required for Provision/Teardown.",
+)
+
+flags.DEFINE_string(
+    "gke_region",
+    "us-central1",
+    "GCP region for networking and Artifact Registry.",
+)
+
+flags.DEFINE_string(
+    "gke_zone",
+    "us-central1-a",
+    "GCP zone for the GKE cluster and node pools.",
+)
+
+flags.DEFINE_string(
+    "gke_sandbox_machine_type",
+    "c4-standard-8",
+    "Machine type for the gVisor sandbox node pool.",
+)
+
+flags.DEFINE_string(
+    "gke_cluster_suffix",
+    "",
+    "Cluster name suffix. If empty, derived from machine family (e.g. 'c4').",
+)
+
+flags.DEFINE_string(
+    "gke_gke_version",
+    "1.35.3-gke.1389000",
+    "GKE cluster version.",
+)
+
+flags.DEFINE_bool(
+    "gke_use_connect_gateway",
+    True,
+    "Use Connect Gateway for kubectl access instead of direct public endpoint.",
+)
+
+flags.DEFINE_bool(
+    "gke_enable_pod_snapshots",
+    True,
+    "Enable GKE Pod Snapshots (Preview feature, uses gcloud beta).",
+)
+
+flags.DEFINE_bool(
+    "gke_skip_image_build",
+    True,
+    "Skip container image builds during Provision. Set to False on first run.",
+)
+
+flags.DEFINE_integer(
+    "gke_sandbox_node_count",
+    1,
+    "Number of nodes in the gVisor sandbox node pool.",
+)
+
+flags.DEFINE_integer(
+    "gke_sandbox_disk_size",
+    100,
+    "Disk size in GB for sandbox node pool nodes.",
+)
+
+flags.DEFINE_integer(
+    "gke_sandbox_max_pods_per_node",
+    250,
+    "Max pods per node on the sandbox node pool.",
+)
+
+flags.DEFINE_string(
+    "gke_subnet_cidr",
+    "10.134.20.0/24",
+    "CIDR range for the benchmark subnet.",
+)
+
+flags.DEFINE_bool(
+    "gke_teardown_keep_images",
+    False,
+    "If True, skip Artifact Registry repo deletion during Teardown.",
+)
+
+flags.DEFINE_bool(
+    "gke_teardown_keep_infra",
+    False,
+    "If True, only delete K8s workloads during Teardown (keep cluster/network).",
+)
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _run(cmd, timeout=300, check=True):
+    """Run a shell command and return CompletedProcess.
+
+    Args:
+        cmd: List of command arguments.
+        timeout: Max seconds to wait.
+        check: If True, raise on non-zero exit.
+
+    Returns:
+        subprocess.CompletedProcess
+    """
+    logging.info("CMD: %s", " ".join(cmd))
+    proc = subprocess.run(
+        cmd, capture_output=True, text=True, timeout=timeout,
+    )
+    if proc.returncode != 0:
+        logging.warning("CMD stderr: %s", proc.stderr[-500:] if proc.stderr else "")
+        if check:
+            raise RuntimeError(
+                f"Command failed (rc={proc.returncode}): {' '.join(cmd[:6])}\n"
+                f"{proc.stderr[-300:]}"
+            )
+    return proc
+
+
+def _run_quiet(cmd, timeout=300):
+    """Run a command, suppress errors (idempotent checks)."""
+    return _run(cmd, timeout=timeout, check=False)
+
+
+def _resource_exists(cmd):
+    """Return True if a gcloud describe/get command succeeds."""
+    proc = _run_quiet(cmd)
+    return proc.returncode == 0
+
+
+def _derive_config():
+    """Derive computed configuration values from flags.
+
+    Returns:
+        dict with all computed names and settings.
+    """
+    project = FLAGS.gke_project_id
+    if not project:
+        raise RuntimeError("--gke_project_id is required for Provision/Teardown.")
+
+    region = FLAGS.gke_region
+    zone = FLAGS.gke_zone
+    machine_type = FLAGS.gke_sandbox_machine_type
+
+    # Derive machine family (e.g. "c4" from "c4-standard-8")
+    machine_family = machine_type.split("-")[0]
+
+    # Derive cluster suffix
+    cluster_suffix = FLAGS.gke_cluster_suffix
+    if not cluster_suffix:
+        if machine_family == "c3" and "metal" in machine_type:
+            cluster_suffix = "c3metal"
+        else:
+            cluster_suffix = machine_family
+
+    # Derive disk type
+    if machine_family == "c3":
+        disk_type = "pd-balanced"
+    else:
+        disk_type = "hyperdisk-balanced"
+
+    # Derive architecture
+    if machine_family == "c4a":
+        target_arch = "arm64"
+    else:
+        target_arch = "amd64"
+
+    # Derive master CIDR
+    master_cidr_map = {
+        "c4": "172.16.0.0/28",
+        "c4d": "172.16.0.16/28",
+        "c4a": "172.16.0.32/28",
+        "c3metal": "172.16.0.48/28",
+    }
+    master_cidr = master_cidr_map.get(cluster_suffix, "172.16.0.64/28")
+
+    # Use a prefix derived from project for naming
+    name_prefix = "pkb"
+
+    cluster_name = f"{name_prefix}-agentic-{cluster_suffix}"
+    vpc_name = f"{name_prefix}-agentic-vpc"
+    subnet_name = f"{name_prefix}-agentic-subnet"
+    router_name = f"{name_prefix}-agentic-nat-router"
+    nat_name = f"{name_prefix}-agentic-nat-config"
+    sandbox_pool_name = "agentic-sandbox-pool"
+    adk_repo_name = "adk-repo"
+    sandbox_repo_name = "agent-sandbox"
+    cloud_build_sa = "adk-cloud-build-sa"
+    cloud_build_sa_email = f"{cloud_build_sa}@{project}.iam.gserviceaccount.com"
+    namespace = FLAGS.gke_namespace
+
+    return {
+        "project": project,
+        "region": region,
+        "zone": zone,
+        "machine_type": machine_type,
+        "machine_family": machine_family,
+        "cluster_suffix": cluster_suffix,
+        "disk_type": disk_type,
+        "target_arch": target_arch,
+        "master_cidr": master_cidr,
+        "cluster_name": cluster_name,
+        "vpc_name": vpc_name,
+        "subnet_name": subnet_name,
+        "subnet_cidr": FLAGS.gke_subnet_cidr,
+        "router_name": router_name,
+        "nat_name": nat_name,
+        "sandbox_pool_name": sandbox_pool_name,
+        "adk_repo_name": adk_repo_name,
+        "sandbox_repo_name": sandbox_repo_name,
+        "cloud_build_sa": cloud_build_sa,
+        "cloud_build_sa_email": cloud_build_sa_email,
+        "namespace": namespace,
+        "gke_version": FLAGS.gke_gke_version,
+        "sandbox_node_count": FLAGS.gke_sandbox_node_count,
+        "sandbox_disk_size": FLAGS.gke_sandbox_disk_size,
+        "sandbox_max_pods": FLAGS.gke_sandbox_max_pods_per_node,
+        "use_connect_gateway": FLAGS.gke_use_connect_gateway,
+        "enable_pod_snapshots": FLAGS.gke_enable_pod_snapshots,
+        "sandbox_version": FLAGS.gke_sandbox_version,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Provision steps
+# ---------------------------------------------------------------------------
+
+
+def _enable_apis(cfg):
+    """Enable required GCP services."""
+    logging.info("Enabling required GCP APIs...")
+    apis = [
+        "iap.googleapis.com",
+        "container.googleapis.com",
+        "artifactregistry.googleapis.com",
+        "cloudbuild.googleapis.com",
+        "aiplatform.googleapis.com",
+        "storage.googleapis.com",
+        "iam.googleapis.com",
+        "connectgateway.googleapis.com",
+        "gkehub.googleapis.com",
+        "gkeconnect.googleapis.com",
+    ]
+    _run(["gcloud", "services", "enable"] + apis + [f"--project={cfg['project']}"],
+         timeout=120)
+
+
+def _create_network(cfg):
+    """Create VPC, subnet, firewall rules, Cloud Router, and NAT."""
+    project = cfg["project"]
+    region = cfg["region"]
+    vpc = cfg["vpc_name"]
+    subnet = cfg["subnet_name"]
+    cidr = cfg["subnet_cidr"]
+    router = cfg["router_name"]
+    nat = cfg["nat_name"]
+
+    # VPC
+    if not _resource_exists(["gcloud", "compute", "networks", "describe", vpc,
+                             f"--project={project}"]):
+        logging.info("Creating VPC %s...", vpc)
+        _run(["gcloud", "compute", "networks", "create", vpc,
+              "--subnet-mode=custom", f"--project={project}"])
+
+    # Subnet
+    if not _resource_exists(["gcloud", "compute", "networks", "subnets", "describe",
+                             subnet, f"--region={region}", f"--project={project}"]):
+        logging.info("Creating subnet %s...", subnet)
+        _run(["gcloud", "compute", "networks", "subnets", "create", subnet,
+              f"--network={vpc}", f"--region={region}",
+              f"--range={cidr}", f"--project={project}"])
+
+    # Firewall: IAP SSH
+    fw_iap = f"{vpc}-allow-iap-ssh"
+    if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe",
+                             fw_iap, f"--project={project}"]):
+        logging.info("Creating firewall rule %s...", fw_iap)
+        _run(["gcloud", "compute", "firewall-rules", "create", fw_iap,
+              f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW",
+              "--rules=tcp:22", "--source-ranges=35.235.240.0/20",
+              "--priority=1000", f"--project={project}"])
+
+    # Firewall: internal
+    fw_int = f"{vpc}-allow-internal"
+    if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe",
+                             fw_int, f"--project={project}"]):
+        logging.info("Creating firewall rule %s...", fw_int)
+        _run(["gcloud", "compute", "firewall-rules", "create", fw_int,
+              f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW",
+              "--rules=tcp,udp,icmp", f"--source-ranges={cidr}",
+              "--priority=1000", f"--project={project}"])
+
+    # Cloud Router
+    if not _resource_exists(["gcloud", "compute", "routers", "describe", router,
+                             f"--region={region}", f"--project={project}"]):
+        logging.info("Creating Cloud Router %s...", router)
+        _run(["gcloud", "compute", "routers", "create", router,
+              f"--network={vpc}", f"--region={region}", f"--project={project}"])
+
+    # Cloud NAT
+    if not _resource_exists(["gcloud", "compute", "routers", "nats", "describe", nat,
+                             f"--router={router}", f"--region={region}",
+                             f"--project={project}"]):
+        logging.info("Creating Cloud NAT %s...", nat)
+        _run(["gcloud", "compute", "routers", "nats", "create", nat,
+              f"--router={router}", f"--region={region}",
+              "--nat-all-subnet-ip-ranges", "--auto-allocate-nat-external-ips",
+              f"--project={project}"])
+
+
+def _create_cluster(cfg):
+    """Create the GKE cluster with DPv2 and Workload Identity."""
+    project = cfg["project"]
+    zone = cfg["zone"]
+    cluster = cfg["cluster_name"]
+
+    if _resource_exists(["gcloud", "container", "clusters", "describe", cluster,
+                         f"--zone={zone}", f"--project={project}"]):
+        logging.info("GKE cluster %s already exists.", cluster)
+        return
+
+    logging.info("Creating GKE cluster %s...", cluster)
+
+    if cfg["enable_pod_snapshots"]:
+        snapshot_flag = ["--enable-pod-snapshots"]
+        logging.info("Pod Snapshots ENABLED (using gcloud beta).")
+        cmd = ["gcloud", "beta", "container", "clusters", "create", cluster]
+    else:
+        snapshot_flag = []
+        cmd = ["gcloud", "container", "clusters", "create", cluster]
+
+    cmd += [
+        f"--zone={zone}",
+        f"--network={cfg['vpc_name']}",
+        f"--subnetwork={cfg['subnet_name']}",
+        "--enable-private-nodes",
+        "--enable-ip-alias",
+        f"--master-ipv4-cidr={cfg['master_cidr']}",
+        f"--cluster-version={cfg['gke_version']}",
+        "--no-enable-shielded-nodes",
+        "--num-nodes=1",
+        f"--machine-type={cfg['machine_type']}",
+        f"--disk-type={cfg['disk_type']}",
+        "--disk-size=50",
+        "--enable-dataplane-v2",
+        f"--workload-pool={project}.svc.id.goog",
+        "--release-channel=None",
+        f"--project={project}",
+    ] + snapshot_flag
+
+    _run(cmd, timeout=600)
+    logging.info("GKE cluster %s created.", cluster)
+
+
+def _get_credentials(cfg):
+    """Register to fleet and get kubectl credentials."""
+    project = cfg["project"]
+    zone = cfg["zone"]
+    cluster = cfg["cluster_name"]
+
+    if cfg["use_connect_gateway"]:
+        # Register to fleet
+        if not _resource_exists(["gcloud", "container", "fleet", "memberships",
+                                 "describe", cluster, f"--project={project}"]):
+            logging.info("Registering cluster %s to fleet...", cluster)
+            _run(["gcloud", "container", "fleet", "memberships", "register", cluster,
+                  f"--gke-cluster={zone}/{cluster}",
+                  "--enable-workload-identity",
+                  f"--project={project}"], timeout=120)
+
+        logging.info("Getting credentials via Connect Gateway...")
+        _run(["gcloud", "container", "fleet", "memberships", "get-credentials",
+              cluster, f"--project={project}"], timeout=60)
+    else:
+        logging.info("Getting credentials (direct endpoint)...")
+        _run(["gcloud", "container", "clusters", "get-credentials", cluster,
+              f"--zone={zone}", f"--project={project}"], timeout=60)
+
+
+def _create_sandbox_node_pool(cfg):
+    """Create the gVisor-enabled sandbox node pool."""
+    project = cfg["project"]
+    zone = cfg["zone"]
+    cluster = cfg["cluster_name"]
+    pool_name = cfg["sandbox_pool_name"]
+
+    if _resource_exists(["gcloud", "container", "node-pools", "describe", pool_name,
+                         f"--cluster={cluster}", f"--zone={zone}",
+                         f"--project={project}"]):
+        logging.info("Sandbox node pool %s already exists.", pool_name)
+        return
+
+    logging.info("Creating sandbox node pool %s with gVisor...", pool_name)
+    cmd = [
+        "gcloud", "container", "node-pools", "create", pool_name,
+        f"--cluster={cluster}",
+        f"--zone={zone}",
+        f"--project={project}",
+        f"--machine-type={cfg['machine_type']}",
+        f"--num-nodes={cfg['sandbox_node_count']}",
+        f"--disk-type={cfg['disk_type']}",
+        f"--disk-size={cfg['sandbox_disk_size']}",
+        f"--max-pods-per-node={cfg['sandbox_max_pods']}",
+        "--node-labels=dedicated=agentic-sandbox",
+        "--node-taints=dedicated=agentic-sandbox:NoSchedule",
+        "--workload-metadata=GKE_METADATA",
+        "--sandbox", "type=gvisor",
+    ]
+    _run(cmd, timeout=600)
+    logging.info("Sandbox node pool %s created.", pool_name)
+
+
+def _create_artifact_registry(cfg):
+    """Create Artifact Registry repositories."""
+    project = cfg["project"]
+    region = cfg["region"]
+
+    for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]):
+        logging.info("Ensuring AR repo %s exists...", repo_name)
+        _run_quiet([
+            "gcloud", "artifacts", "repositories", "create", repo_name,
+            "--repository-format=docker",
+            f"--location={region}",
+            f"--project={project}",
+        ])
+
+
+def _create_cloud_build_sa(cfg):
+    """Create Cloud Build service account and bind IAM roles."""
+    project = cfg["project"]
+    sa_email = cfg["cloud_build_sa_email"]
+    sa_name = cfg["cloud_build_sa"]
+
+    # Create SA if not exists
+    if not _resource_exists(["gcloud", "iam", "service-accounts", "describe",
+                             sa_email, f"--project={project}"]):
+        logging.info("Creating Cloud Build SA %s...", sa_email)
+        _run(["gcloud", "iam", "service-accounts", "create", sa_name,
+              f"--display-name={sa_name}", f"--project={project}"])
+        # Wait for propagation
+        time.sleep(10)
+
+    roles = [
+        "roles/logging.logWriter",
+        "roles/storage.objectViewer",
+        "roles/artifactregistry.writer",
+        "roles/serviceusage.serviceUsageConsumer",
+    ]
+    for role in roles:
+        _run_quiet([
+            "gcloud", "projects", "add-iam-policy-binding", project,
+            f"--member=serviceAccount:{sa_email}",
+            f"--role={role}",
+            "--condition=None", "--quiet",
+        ])
+    logging.info("Cloud Build SA ready.")
+
+
+# ---------------------------------------------------------------------------
+# Teardown steps
+# ---------------------------------------------------------------------------
+
+
+def _teardown_workloads(cfg):
+    """Delete K8s workloads, CRDs, and namespace."""
+    ns = cfg["namespace"]
+    version = cfg["sandbox_version"]
+
+    logging.info("Deleting namespace %s...", ns)
+    _run_quiet(["kubectl", "delete", "namespace", ns,
+                "--ignore-not-found=true", "--timeout=120s"])
+
+    logging.info("Removing Agent Sandbox CRDs...")
+    _run_quiet(["kubectl", "delete", "-f",
+                f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/extensions.yaml",
+                "--ignore-not-found=true"])
+    _run_quiet(["kubectl", "delete", "-f",
+                f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/manifest.yaml",
+                "--ignore-not-found=true"])
+
+    logging.info("Removing cluster-scoped RBAC...")
+    _run_quiet(["kubectl", "delete", "clusterrolebinding",
+                "adk-agent-sandbox-binding", "--ignore-not-found=true"])
+    _run_quiet(["kubectl", "delete", "clusterrole",
+                "adk-agent-sandbox-role", "--ignore-not-found=true"])
+
+
+def _teardown_images(cfg):
+    """Delete Artifact Registry repositories."""
+    project = cfg["project"]
+    region = cfg["region"]
+
+    for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]):
+        logging.info("Deleting AR repo %s...", repo_name)
+        _run_quiet(["gcloud", "artifacts", "repositories", "delete", repo_name,
+                    f"--location={region}", f"--project={project}", "--quiet"])
+
+
+def _teardown_cloud_build_sa(cfg):
+    """Delete Cloud Build service account and IAM bindings."""
+    project = cfg["project"]
+    sa_email = cfg["cloud_build_sa_email"]
+
+    roles = [
+        "roles/logging.logWriter",
+        "roles/storage.objectViewer",
+        "roles/artifactregistry.writer",
+        "roles/serviceusage.serviceUsageConsumer",
+    ]
+    for role in roles:
+        _run_quiet([
+            "gcloud", "projects", "remove-iam-policy-binding", project,
+            f"--member=serviceAccount:{sa_email}",
+            f"--role={role}", "--quiet",
+        ])
+
+    _run_quiet(["gcloud", "iam", "service-accounts", "delete", sa_email,
+                f"--project={project}", "--quiet"])
+    logging.info("Cloud Build SA deleted.")
+
+
+def _teardown_cluster(cfg):
+    """Delete GKE node pools and cluster."""
+    project = cfg["project"]
+    zone = cfg["zone"]
+    cluster = cfg["cluster_name"]
+    pool_name = cfg["sandbox_pool_name"]
+
+    logging.info("Deleting sandbox node pool %s...", pool_name)
+    _run_quiet(["gcloud", "container", "node-pools", "delete", pool_name,
+                f"--cluster={cluster}", f"--zone={zone}",
+                f"--project={project}", "--quiet"])
+
+    logging.info("Deleting GKE cluster %s...", cluster)
+    _run_quiet(["gcloud", "container", "clusters", "delete", cluster,
+                f"--zone={zone}", f"--project={project}", "--quiet"])
+
+
+def _teardown_network(cfg):
+    """Delete network resources in reverse dependency order."""
+    project = cfg["project"]
+    region = cfg["region"]
+    vpc = cfg["vpc_name"]
+    router = cfg["router_name"]
+    nat = cfg["nat_name"]
+    subnet = cfg["subnet_name"]
+
+    logging.info("Deleting Cloud NAT and Router...")
+    _run_quiet(["gcloud", "compute", "routers", "nats", "delete", nat,
+                f"--router={router}", f"--region={region}",
+                f"--project={project}", "--quiet"])
+    _run_quiet(["gcloud", "compute", "routers", "delete", router,
+                f"--region={region}", f"--project={project}", "--quiet"])
+
+    logging.info("Deleting firewall rules...")
+    for suffix in ("allow-iap-ssh", "allow-internal"):
+        _run_quiet(["gcloud", "compute", "firewall-rules", "delete",
+                    f"{vpc}-{suffix}", f"--project={project}", "--quiet"])
+
+    logging.info("Deleting subnet and VPC...")
+    _run_quiet(["gcloud", "compute", "networks", "subnets", "delete", subnet,
+                f"--region={region}", f"--project={project}", "--quiet"])
+    _run_quiet(["gcloud", "compute", "networks", "delete", vpc,
+                f"--project={project}", "--quiet"])
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+flags.DEFINE_enum(
+    "gke_provision_mode",
+    "custom",
+    ["custom", "native"],
+    "Provisioning mode: 'custom' uses direct gcloud calls (Phase 1 logic), "
+    "'native' uses PKB's container_cluster with prerequisite_setup.py.",
+)
+
+def Provision():
+    """Provision GKE infrastructure.
+
+    Mode is controlled by --gke_provision_mode:
+      - custom: Direct gcloud calls (full control, no PKB cluster management)
+      - native: PKB manages cluster via container_cluster spec.
+                Requires prerequisite_setup.py to have been run first.
+    """
+    mode = FLAGS.gke_provision_mode
+    if mode == "native":
+        logging.info(
+            "Provision mode=native: PKB manages cluster via container_cluster. "
+            "Ensure prerequisite_setup.py was run first (VPC, NAT, AR, images)."
+        )
+        return  # PKB handles cluster creation via container_cluster spec
+
+    logging.info("Provision mode=custom: using direct gcloud calls.")
+    cfg = _derive_config()
+
+    logging.info("=== Provision: project=%s cluster=%s machine=%s ===",
+                 cfg["project"], cfg["cluster_name"], cfg["machine_type"])
+
+    _enable_apis(cfg)
+    _create_network(cfg)
+    _create_cluster(cfg)
+    _get_credentials(cfg)
+    _create_sandbox_node_pool(cfg)
+    _create_artifact_registry(cfg)
+    _create_cloud_build_sa(cfg)
+
+    # --- Phase 3: Build container images ---
+    if not FLAGS.gke_skip_image_build:
+        from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils
+        gke_image_build_utils.BuildImages()
+    else:
+        logging.info("Skipping image builds (--gke_skip_image_build=true)")
+
+    logging.info("=== Provision complete: %s ===", cfg["cluster_name"])
+
+
+def Teardown():
+    """Teardown GKE infrastructure.
+
+    Mode is controlled by --gke_provision_mode:
+      - custom: Direct gcloud calls to delete all resources.
+      - native: PKB manages cluster deletion. Run prerequisite_setup.py --teardown
+                separately to clean up VPC/NAT/AR.
+    """
+    mode = FLAGS.gke_provision_mode
+    if mode == "native":
+        logging.info(
+            "Teardown mode=native: PKB manages cluster deletion. "
+            "Run prerequisite_setup.py --teardown to clean up VPC/NAT/AR."
+        )
+        return  # PKB handles cluster deletion
+
+    logging.info("Teardown mode=custom: using direct gcloud calls.")
+    cfg = _derive_config()
+
+    logging.info("=== Teardown: project=%s cluster=%s ===",
+                 cfg["project"], cfg["cluster_name"])
+    logging.info("  keep_images=%s  keep_infra=%s",
+                 FLAGS.gke_teardown_keep_images,
+                 FLAGS.gke_teardown_keep_infra)
+
+    # Always delete workloads
+    _teardown_workloads(cfg)
+
+    # Conditionally delete images
+    if not FLAGS.gke_teardown_keep_images:
+        _teardown_images(cfg)
+
+    # Conditionally delete infrastructure
+    if not FLAGS.gke_teardown_keep_infra:
+        _teardown_cloud_build_sa(cfg)
+        _teardown_cluster(cfg)
+        _teardown_network(cfg)
+
+    logging.info("=== Teardown complete ===")
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
new file mode 100644
index 0000000000..157bd2559e
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
@@ -0,0 +1,362 @@
+"""PKB Benchmark: GKE Agent Python Sandbox Density (Use Case B).
+
+Atomic single-point measurement of Python sandbox density on a
+pre-provisioned GKE cluster with gVisor isolation. Measures Code Execution
+Latency (CEL), Time To First Execution (TTFE), RSS memory growth, and
+per-type latency breakdown (compute, syscall, import) at a given
+concurrent session count.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the density parameter across iterations to find
+the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_python_density \\
+                --gke_python_density=16 \\
+                --gke_python_density_sample_count=20 \\
+                --gke_python_density_sample_warmup=0 \\
+                --gke_namespace=agentic \\
+                --gke_api_url=http://localhost:8080
+
+Samples emitted (per run):
+  - gke_python_density_orchestrator_cel_mean       (ms)
+  - gke_python_density_orchestrator_cel_p50        (ms)
+  - gke_python_density_orchestrator_cel_p95        (ms)
+  - gke_python_density_orchestrator_cel_p99        (ms)
+  - gke_python_density_orchestrator_cel_min        (ms)
+  - gke_python_density_orchestrator_cel_max        (ms)
+  - gke_python_density_sandbox_total_cel_mean      (ms)
+  - gke_python_density_sandbox_total_cel_p50       (ms)
+  - gke_python_density_sandbox_total_cel_p95       (ms)
+  - gke_python_density_sandbox_total_cel_p99       (ms)
+  - gke_python_density_sandbox_total_cel_min       (ms)
+  - gke_python_density_sandbox_total_cel_max       (ms)
+  - gke_python_density_sandbox_ttfe                (ms)
+  - gke_python_density_sandbox_rss_start           (MB)
+  - gke_python_density_sandbox_rss_end             (MB)
+  - gke_python_density_sandbox_rss_growth          (MB)
+  - gke_python_density_sandbox_compute_cel_mean    (ms)
+  - gke_python_density_sandbox_syscall_cel_mean    (ms)
+  - gke_python_density_sandbox_import_cel_mean     (ms)
+  - gke_python_density_wall_time                   (seconds)
+"""
+
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "gke_python_density"
+BENCHMARK_CONFIG = """
+gke_python_density:
+  description: >
+    Atomic single-point Python sandbox density measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "python-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=python-sandbox-example"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "gke_python_density",
+    1,
+    "Number of concurrent sandbox sessions to run.",
+)
+
+flags.DEFINE_integer(
+    "gke_python_density_sample_count",
+    20,
+    "Number of sample iterations per sandbox session.",
+)
+
+flags.DEFINE_integer(
+    "gke_python_density_sample_warmup",
+    0,
+    "Number of warmup iterations per session (excluded from stats).",
+)
+
+flags.DEFINE_bool(
+    "gke_python_density_patch_warmpool",
+    True,
+    "Patch SandboxWarmPool replicas to match density before measurement.",
+)
+
+flags.DEFINE_integer(
+    "gke_python_density_exec_timeout",
+    600,
+    "Timeout in seconds for the API call.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def Provision(benchmark_spec):
+    """Provision GKE cluster and all dependencies."""
+    gke_provision_utils.Provision()
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads()
+    utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single density measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    ns = FLAGS.gke_namespace
+    density = FLAGS.gke_python_density
+
+    logging.info("=== Run: density=%d ===", density)
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Patch warm pool to match density (moved from Prepare for sweep compatibility)
+    if FLAGS.gke_python_density_patch_warmpool:
+        utils.PatchWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            replicas=density,
+            label=_WARMPOOL_LABEL,
+        )
+
+    # POST to agent API
+    payload = {
+        "sample_count": FLAGS.gke_python_density_sample_count,
+        "sample_warmup": FLAGS.gke_python_density_sample_warmup,
+        "concurrent_sessions": density,
+        "sandbox_exec_timeout_s": FLAGS.gke_python_density_exec_timeout,
+    }
+
+    t0 = time.time()
+    result = utils.CallAgentApi("/benchmark/python/density", payload)
+    wall_time = time.time() - t0
+
+    successful = result.get("successful_sessions", 0)
+    failed = result.get("failed_sessions", 0)
+    agg = result.get("aggregate", {})
+
+    logging.info(
+        "API response: %d successful, %d failed sessions (%.1fs)",
+        successful,
+        failed,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "density": density,
+        "successful_sessions": successful,
+        "failed_sessions": failed,
+        "sample_count": FLAGS.gke_python_density_sample_count,
+        "sample_warmup": FLAGS.gke_python_density_sample_warmup,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # Orchestrator-side CEL
+    _emit(
+        samples,
+        agg,
+        "orchestrator_cel_mean_ms",
+        "orchestrator_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_p50_ms", "orchestrator_cel_p50", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_p95_ms", "orchestrator_cel_p95", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_p99_ms", "orchestrator_cel_p99", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_min_ms", "orchestrator_cel_min", "ms", ns, extra
+    )
+    _emit(
+        samples, agg, "orchestrator_cel_max_ms", "orchestrator_cel_max", "ms", ns, extra
+    )
+
+    # Sandbox-side total CEL
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_mean_ms",
+        "sandbox_total_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_p50_ms",
+        "sandbox_total_cel_p50",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_p95_ms",
+        "sandbox_total_cel_p95",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_p99_ms",
+        "sandbox_total_cel_p99",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_min_ms",
+        "sandbox_total_cel_min",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_total_cel_max_ms",
+        "sandbox_total_cel_max",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # TTFE
+    _emit(samples, agg, "sandbox_ttfe_ms", "sandbox_ttfe", "ms", ns, extra)
+
+    # RSS
+    _emit(samples, agg, "sandbox_rss_start_mb", "sandbox_rss_start", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_end_mb", "sandbox_rss_end", "MB", ns, extra)
+    _emit(samples, agg, "sandbox_rss_growth_mb", "sandbox_rss_growth", "MB", ns, extra)
+
+    # Per-type CEL breakdown
+    _emit(
+        samples,
+        agg,
+        "sandbox_compute_cel_mean_ms",
+        "sandbox_compute_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_syscall_cel_mean_ms",
+        "sandbox_syscall_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+    _emit(
+        samples,
+        agg,
+        "sandbox_import_cel_mean_ms",
+        "sandbox_import_cel_mean",
+        "ms",
+        ns,
+        extra,
+    )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for density=%d.", len(samples), density)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up after measurement. Scale warm pool to 0."""
+    ns = FLAGS.gke_namespace
+    logging.info("Cleanup: draining warm pool.")
+
+    if FLAGS.gke_python_density_patch_warmpool:
+        utils.DrainWarmPool(
+            namespace=ns,
+            warmpool_name=_WARMPOOL_NAME,
+            label=_WARMPOOL_LABEL,
+        )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete (cluster persists).")
+
+
+def Teardown(benchmark_spec):
+    """Teardown GKE cluster and all dependencies."""
+    gke_provision_utils.Teardown()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the aggregate dict."""
+    value = agg.get(agg_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
new file mode 100644
index 0000000000..f638494508
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
@@ -0,0 +1,802 @@
+"""PKB Benchmark: GKE Agent QPS Saturation (Use Case F).
+
+Atomic single-point measurement of scheduling throughput on a pre-provisioned
+GKE cluster.  Fires sandbox claim requests at a controlled QPS rate for a
+fixed duration and measures per-request TTFE (Time To First Execution).
+
+Supports two operating modes:
+  - **agent**: POST to the orchestrator /benchmark/python/qps endpoint
+  - **raw_claim**: Bypass the agent, create SandboxClaims directly via kubectl
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the target_qps parameter across iterations to find
+the QPS saturation point.
+
+Usage:
+  # Agent mode
+  python pkb.py --benchmarks=gke_qps \\
+                --gke_qps_target_qps=5.0 \\
+                --gke_qps_pool_size=70 \\
+                --gke_qps_step_duration_s=30.0 \\
+                --gke_qps_mode=agent \\
+                --gke_namespace=agentic \\
+                --gke_api_url=http://localhost:8080
+
+  # Raw claim mode
+  python pkb.py --benchmarks=gke_qps \\
+                --gke_qps_target_qps=5.0 \\
+                --gke_qps_pool_size=70 \\
+                --gke_qps_step_duration_s=30.0 \\
+                --gke_qps_mode=raw_claim \\
+                --gke_qps_claim_timeout_s=60.0 \\
+                --gke_namespace=agentic
+
+Samples emitted (per run):
+  - gke_qps_ttfe_mean                (ms)
+  - gke_qps_ttfe_p50                 (ms)
+  - gke_qps_ttfe_p95                 (ms)
+  - gke_qps_ttfe_p99                 (ms)
+  - gke_qps_ttfe_min                 (ms)
+  - gke_qps_ttfe_max                 (ms)
+  - gke_qps_claim_mean               (ms)
+  - gke_qps_claim_p95                (ms)
+  - gke_qps_actual_qps               (requests/sec)
+  - gke_qps_duration                 (seconds)
+  - gke_qps_total_requests           (count)
+  - gke_qps_successful_requests      (count)
+  - gke_qps_failed_requests          (count)
+  - gke_qps_pool_before              (count)
+  - gke_qps_pool_after               (count)
+  - gke_qps_wall_time                (seconds)
+"""
+
+import json
+import logging
+import subprocess
+import threading
+import time
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "gke_qps"
+BENCHMARK_CONFIG = """
+gke_qps:
+  description: >
+    Atomic single-point QPS saturation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+_WARMPOOL_NAME = "python-sandbox-warmpool"
+_WARMPOOL_LABEL = "sandbox=python-sandbox-example"
+_SANDBOX_TEMPLATE = "python-sandbox-template"
+_QPS_CLAIM_LABEL = "created-by=pkb-qps-benchmark"
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_float(
+    "gke_qps_target_qps",
+    5.0,
+    "Target requests per second (sandbox claims per second).",
+)
+
+flags.DEFINE_integer(
+    "gke_qps_pool_size",
+    70,
+    "Warm pool size maintained during the measurement.",
+)
+
+flags.DEFINE_float(
+    "gke_qps_step_duration_s",
+    30.0,
+    "Duration of the QPS burst in seconds.",
+)
+
+flags.DEFINE_integer(
+    "gke_qps_sandbox_exec_timeout_s",
+    30,
+    "Sandbox command execution timeout in seconds.",
+)
+
+flags.DEFINE_float(
+    "gke_qps_provision_timeout_s",
+    180.0,
+    "Max seconds to wait for pool pods to reach Running.",
+)
+
+flags.DEFINE_string(
+    "gke_qps_mode",
+    "agent",
+    "Operating mode: 'agent' (POST to orchestrator API) or "
+    "'raw_claim' (create SandboxClaims directly via kubectl).",
+)
+
+flags.DEFINE_float(
+    "gke_qps_claim_timeout_s",
+    60.0,
+    "Max seconds to wait for a raw claim to bind " "(only used with mode=raw_claim).",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def Provision(benchmark_spec):
+    """Provision GKE cluster and all dependencies."""
+    gke_provision_utils.Provision()
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads and verify agent API."""
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads()
+
+    mode = FLAGS.gke_qps_mode
+    if mode == "agent":
+        utils.CheckAgentHealthz(required=False)
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single QPS measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    ns = FLAGS.gke_namespace
+    pool_size = FLAGS.gke_qps_pool_size
+
+    # Scale warm pool (moved from Prepare for sweep compatibility)
+    utils.PatchWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        replicas=pool_size,
+        label=_WARMPOOL_LABEL,
+        wait_timeout=int(FLAGS.gke_qps_provision_timeout_s),
+    )
+
+    mode = FLAGS.gke_qps_mode
+
+    if mode == "raw_claim":
+        return _RunRawClaim(benchmark_spec)
+    else:
+        return _RunAgent(benchmark_spec)
+
+
+def Cleanup(benchmark_spec):
+    """Delete benchmark claims and drain warm pool."""
+    ns = FLAGS.gke_namespace
+    logging.info("Cleanup: deleting benchmark claims and draining warm pool.")
+
+    # Delete any lingering benchmark claims
+    _DeleteBenchmarkClaims(ns)
+
+    # Drain warm pool
+    utils.DrainWarmPool(
+        namespace=ns,
+        warmpool_name=_WARMPOOL_NAME,
+        label=_WARMPOOL_LABEL,
+    )
+
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+def Teardown(benchmark_spec):
+    """Teardown GKE cluster and all dependencies."""
+    gke_provision_utils.Teardown()
+
+
+# ---------------------------------------------------------------------------
+# Agent mode
+# ---------------------------------------------------------------------------
+
+
+def _RunAgent(benchmark_spec):
+    """Fire QPS burst via the orchestrator API."""
+    ns = FLAGS.gke_namespace
+    target_qps = FLAGS.gke_qps_target_qps
+    pool_size = FLAGS.gke_qps_pool_size
+    step_duration = FLAGS.gke_qps_step_duration_s
+
+    logging.info(
+        "=== Run (agent): target_qps=%s, pool_size=%d, duration=%ss ===",
+        target_qps,
+        pool_size,
+        step_duration,
+    )
+
+    # Ensure port-forward is active (needed when sweeps skip Prepare)
+    utils.EnsurePortForward()
+
+    # Record pool state before burst
+    pool_before = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # POST to agent API
+    payload = {
+        "target_qps": target_qps,
+        "duration_s": step_duration,
+        "sandbox_exec_timeout_s": FLAGS.gke_qps_sandbox_exec_timeout_s,
+    }
+
+    t0 = time.time()
+    api_timeout = int(step_duration + 300)
+    result = utils.CallAgentApi("/benchmark/python/qps", payload, timeout=api_timeout)
+    wall_time = time.time() - t0
+
+    # Record pool state after burst
+    pool_after = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # Extract response fields
+    aggregate = result.get("aggregate", {})
+    successful = result.get("successful_requests", 0)
+    failed = result.get("failed_requests", 0)
+    total = result.get("total_requests", 0)
+    actual_qps = result.get("actual_qps", 0)
+    duration_s = result.get("duration_s", 0)
+
+    logging.info(
+        "API response: actual_qps=%s, %d/%d requests ok (%.1fs)",
+        actual_qps,
+        successful,
+        total,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "target_qps": target_qps,
+        "pool_size": pool_size,
+        "step_duration_s": step_duration,
+        "mode": "agent",
+        "actual_qps": actual_qps,
+        "total_requests": total,
+        "successful_requests": successful,
+        "failed_requests": failed,
+        "pool_before": pool_before,
+        "pool_after": pool_after,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # TTFE latency stats
+    _emit(samples, aggregate, "ttfe_mean_ms", "ttfe_mean", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_p50_ms", "ttfe_p50", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_p95_ms", "ttfe_p95", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_p99_ms", "ttfe_p99", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_min_ms", "ttfe_min", "ms", ns, extra)
+    _emit(samples, aggregate, "ttfe_max_ms", "ttfe_max", "ms", ns, extra)
+
+    # Claim latency stats
+    _emit(samples, aggregate, "claim_mean_ms", "claim_mean", "ms", ns, extra)
+    _emit(samples, aggregate, "claim_p95_ms", "claim_p95", "ms", ns, extra)
+
+    # Throughput and counts
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_actual_qps",
+            actual_qps,
+            "requests/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_duration",
+            duration_s,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_requests",
+            float(total),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_successful_requests",
+            float(successful),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_failed_requests",
+            float(failed),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Pool state
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_before",
+            float(pool_before),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_after",
+            float(pool_after),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for target_qps=%s.", len(samples), target_qps)
+    return samples
+
+
+# ---------------------------------------------------------------------------
+# Raw claim mode
+# ---------------------------------------------------------------------------
+
+
+def _RunRawClaim(benchmark_spec):
+    """Fire SandboxClaims directly at target_qps (no agent)."""
+    ns = FLAGS.gke_namespace
+    target_qps = FLAGS.gke_qps_target_qps
+    pool_size = FLAGS.gke_qps_pool_size
+    step_duration = FLAGS.gke_qps_step_duration_s
+    claim_timeout = FLAGS.gke_qps_claim_timeout_s
+
+    logging.info(
+        "=== Run (raw_claim): target_qps=%s, pool_size=%d, duration=%ss ===",
+        target_qps,
+        pool_size,
+        step_duration,
+    )
+
+    # Record pool state before burst
+    pool_before = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # Calculate total claims to fire
+    total_claims = max(1, int(target_qps * step_duration))
+    interval = 1.0 / target_qps if target_qps > 0 else 1.0
+
+    logging.info(
+        "Firing %d raw SandboxClaims at %s req/s",
+        total_claims,
+        target_qps,
+    )
+
+    # Fire claims at target QPS in parallel threads
+    claim_results = []
+    lock = threading.Lock()
+
+    def _fire_and_wait(idx, fire_time):
+        claim_name = f"pkb-qps-0-{idx}-{uuid.uuid4().hex[:6]}"
+        result = {"request_id": idx, "fire_time_s": round(fire_time, 3)}
+        try:
+            t_create = _CreateClaim(ns, _SANDBOX_TEMPLATE, claim_name)
+            result["create_ts"] = t_create
+            t_bound = _WaitClaimBound(ns, claim_name, claim_timeout)
+            if t_bound is not None:
+                ttfe_ms = (t_bound - t_create) * 1000.0
+                result["ttfe_ms"] = round(ttfe_ms, 3)
+                result["claim_ms"] = round(ttfe_ms, 3)
+                result["error"] = None
+            else:
+                result["ttfe_ms"] = None
+                result["error"] = "Timeout waiting for claim to bind"
+        except Exception as e:
+            result["ttfe_ms"] = None
+            result["error"] = f"{type(e).__name__}: {e}"
+        with lock:
+            claim_results.append(result)
+
+    t0 = time.time()
+    threads = []
+    for i in range(total_claims):
+        fire_time = time.time() - t0
+        t = threading.Thread(target=_fire_and_wait, args=(i, fire_time), daemon=True)
+        threads.append(t)
+        t.start()
+        if i < total_claims - 1:
+            next_fire = t0 + (i + 1) * interval
+            sleep_time = next_fire - time.time()
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+
+    for t in threads:
+        t.join(timeout=claim_timeout + 30)
+
+    wall_time = time.time() - t0
+    actual_qps = round(total_claims / wall_time, 2) if wall_time > 0 else 0
+
+    # Record pool state after burst
+    pool_after = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running")
+
+    # Aggregate results
+    successful = [r for r in claim_results if r.get("ttfe_ms") is not None]
+    failed = [r for r in claim_results if r.get("error")]
+    ttfe_values = sorted(r["ttfe_ms"] for r in successful)
+
+    logging.info(
+        "Raw claim burst complete: %d/%d ok, actual_qps=%s (%.1fs)",
+        len(successful),
+        total_claims,
+        actual_qps,
+        wall_time,
+    )
+
+    # Build samples
+    extra = {
+        "target_qps": target_qps,
+        "pool_size": pool_size,
+        "step_duration_s": step_duration,
+        "mode": "raw_claim",
+        "actual_qps": actual_qps,
+        "total_requests": total_claims,
+        "successful_requests": len(successful),
+        "failed_requests": len(failed),
+        "pool_before": pool_before,
+        "pool_after": pool_after,
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    samples = []
+
+    # TTFE latency stats (computed from raw claim results)
+    if ttfe_values:
+        n = len(ttfe_values)
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_mean",
+                round(sum(ttfe_values) / n, 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_p50",
+                round(_percentile(ttfe_values, 50), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_p95",
+                round(_percentile(ttfe_values, 95), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_p99",
+                round(_percentile(ttfe_values, 99), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_min",
+                round(ttfe_values[0], 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_ttfe_max",
+                round(ttfe_values[-1], 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+
+        # Claim latency (same as TTFE in raw_claim mode)
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_claim_mean",
+                round(sum(ttfe_values) / n, 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_claim_p95",
+                round(_percentile(ttfe_values, 95), 3),
+                "ms",
+                ns,
+                extra,
+            )
+        )
+
+    # Throughput and counts
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_actual_qps",
+            actual_qps,
+            "requests/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_duration",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_requests",
+            float(total_claims),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_successful_requests",
+            float(len(successful)),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_failed_requests",
+            float(len(failed)),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Pool state
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_before",
+            float(pool_before),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_pool_after",
+            float(pool_after),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    # Cleanup benchmark claims
+    _DeleteBenchmarkClaims(ns)
+
+    logging.info("Emitted %d samples for target_qps=%s.", len(samples), target_qps)
+    return samples
+
+
+# ---------------------------------------------------------------------------
+# Raw claim helpers
+# ---------------------------------------------------------------------------
+
+
+def _CreateClaim(namespace, template, claim_name):
+    """Create a single SandboxClaim via kubectl and return creation timestamp."""
+    manifest = json.dumps(
+        {
+            "apiVersion": "extensions.agents.x-k8s.io/v1alpha1",
+            "kind": "SandboxClaim",
+            "metadata": {
+                "name": claim_name,
+                "namespace": namespace,
+                "labels": {"created-by": "pkb-qps-benchmark"},
+            },
+            "spec": {
+                "sandboxTemplateName": template,
+            },
+        }
+    )
+    proc = subprocess.run(
+        ["kubectl", "apply", "-n", namespace, "-f", "-"],
+        input=manifest,
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    t_create = time.time()
+    if proc.returncode != 0:
+        raise RuntimeError(
+            f"Failed to create claim {claim_name}: {proc.stderr.strip()}"
+        )
+    return t_create
+
+
+def _WaitClaimBound(namespace, claim_name, timeout_s):
+    """Wait for a SandboxClaim to reach Bound phase. Returns timestamp or None."""
+    deadline = time.time() + timeout_s
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            [
+                "get",
+                "sandboxclaim",
+                claim_name,
+                "-n",
+                namespace,
+                "-o",
+                "jsonpath={.status.phase}",
+            ],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if rc == 0 and stdout.lower() in ("bound", "ready"):
+            return time.time()
+        time.sleep(0.1)
+    return None
+
+
+def _DeleteBenchmarkClaims(namespace):
+    """Delete SandboxClaims labelled created-by=pkb-qps-benchmark."""
+    stdout, _, rc = utils.RunKubectl(
+        [
+            "get",
+            "sandboxclaim",
+            "-l",
+            _QPS_CLAIM_LABEL,
+            "-n",
+            namespace,
+            "-o",
+            "jsonpath={.items[*].metadata.name}",
+        ],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    names = stdout.split() if stdout else []
+    if not names or names == [""]:
+        return 0
+
+    count = len(names)
+    logging.info("Deleting %d pkb-qps SandboxClaim(s)", count)
+    utils.RunKubectl(
+        [
+            "delete",
+            "sandboxclaim",
+            "-l",
+            _QPS_CLAIM_LABEL,
+            "-n",
+            namespace,
+            "--wait=false",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+    # Wait for claims to be fully removed
+    t0 = time.time()
+    while time.time() - t0 < 120:
+        stdout, _, _ = utils.RunKubectl(
+            [
+                "get",
+                "sandboxclaim",
+                "-l",
+                _QPS_CLAIM_LABEL,
+                "-n",
+                namespace,
+                "--no-headers",
+                "--ignore-not-found",
+            ],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        remaining = len([l for l in stdout.splitlines() if l]) if stdout else 0
+        if remaining == 0:
+            break
+        time.sleep(2)
+
+    logging.info("Claims cleaned up in %.1fs", time.time() - t0)
+    return count
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _percentile(sorted_values, pct):
+    """Calculate percentile (0-100) with linear interpolation."""
+    if not sorted_values:
+        return 0.0
+    idx = (pct / 100) * (len(sorted_values) - 1)
+    lo = int(idx)
+    hi = min(lo + 1, len(sorted_values) - 1)
+    frac = idx - lo
+    return sorted_values[lo] * (1 - frac) + sorted_values[hi] * frac
+
+
+def _emit(samples, data, data_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the data dict."""
+    value = data.get(data_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
new file mode 100644
index 0000000000..4cfba5d5d0
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
@@ -0,0 +1,1022 @@
+"""PKB Benchmark: GKE Agent Pod Snapshot Saturation (Use Case A).
+
+Atomic single-point measurement of GKE Pod Snapshot create/restore latency
+on a pre-provisioned GKE cluster with gVisor isolation.  Measures snapshot
+time, restore time, TTFE (Time To First Execution), and restore correctness
+at a given preload_mb and burst_size.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the preload_mb parameter across iterations to find
+the saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_snapshot \\
+                --gke_snapshot_preload_mb=50 \\
+                --gke_snapshot_burst_size=3 \\
+                --gke_namespace=agentic \\
+                --gke_snapshot_skip_snapshot=false
+
+Samples emitted (per run):
+  - gke_snapshot_snapshot_p50        (seconds)
+  - gke_snapshot_snapshot_p95        (seconds)
+  - gke_snapshot_snapshot_max        (seconds)
+  - gke_snapshot_restore_p50         (seconds)
+  - gke_snapshot_restore_p95         (seconds)
+  - gke_snapshot_restore_max         (seconds)
+  - gke_snapshot_ttfe_p50            (seconds)
+  - gke_snapshot_ttfe_p95            (seconds)
+  - gke_snapshot_ttfe_max            (seconds)
+  - gke_snapshot_startup_time        (seconds)
+  - gke_snapshot_restore_correct_count (count)
+  - gke_snapshot_wall_time           (seconds)
+"""
+
+import json
+import logging
+import os
+import re
+import subprocess
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker import sample
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "gke_snapshot"
+BENCHMARK_CONFIG = """
+gke_snapshot:
+  description: >
+    Atomic single-point Pod Snapshot saturation measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "gke_snapshot_preload_mb",
+    10,
+    "Megabytes of memory to pre-allocate in the sandbox before snapshot.",
+)
+
+flags.DEFINE_integer(
+    "gke_snapshot_burst_size",
+    1,
+    "Number of concurrent source/snapshot/restore pods per measurement.",
+)
+
+flags.DEFINE_string(
+    "gke_snapshot_ksa_name",
+    "pod-snapshot-sa",
+    "Kubernetes service account for pod snapshots.",
+)
+
+flags.DEFINE_integer(
+    "gke_snapshot_pod_timeout",
+    180,
+    "Max seconds to wait for pod Running / preload.",
+)
+
+flags.DEFINE_boolean(
+    "gke_snapshot_skip_snapshot",
+    False,
+    "Skip snapshot/restore phases — measure cold-start TTFE only.",
+)
+
+flags.DEFINE_string(
+    "gke_snapshot_preload_mode",
+    "synthetic",
+    "Preload mode: 'synthetic' (os.urandom fill) or "
+    "'script:<path>' to run a custom startup script.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def Provision(benchmark_spec):
+    """Provision GKE cluster and all dependencies."""
+    gke_provision_utils.Provision()
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads, snapshot infra, and validate readiness."""
+    ns = FLAGS.gke_namespace
+    preload_mb = FLAGS.gke_snapshot_preload_mb
+
+    logging.info(
+        "=== Prepare: preload_mb=%d, burst_size=%d ===",
+        preload_mb,
+        FLAGS.gke_snapshot_burst_size,
+    )
+
+    # Deploy Agent Sandbox ecosystem (idempotent)
+    deploy_utils.DeployWorkloads()
+
+    # Deploy Pod Snapshot infrastructure (idempotent)
+    deploy_utils.DeploySnapshots()
+
+    # 1. Verify PodSnapshotStorageConfig exists (cluster-scoped).
+    _, _, retcode = utils.RunKubectl(
+        ["get", "podsnapshotstorageconfigs.podsnapshot.gke.io", "--no-headers"],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        raise RuntimeError(
+            "PodSnapshotStorageConfig CRD not found. "
+            "Ensure pod snapshots are enabled on the cluster."
+        )
+    logging.info("PodSnapshotStorageConfig verified.")
+
+    # 2. Verify PodSnapshotPolicy exists in the namespace.
+    _, _, retcode = utils.RunKubectl(
+        ["get", "podsnapshotpolicies.podsnapshot.gke.io", "-n", ns, "--no-headers"],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        logging.warning("PodSnapshotPolicy not found in namespace %s.", ns)
+
+    # 3. Verify the service account exists.
+    ksa = FLAGS.gke_snapshot_ksa_name
+    _, _, retcode = utils.RunKubectl(
+        ["get", "serviceaccount", ksa, "-n", ns],
+        timeout=30,
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        raise RuntimeError(
+            f"ServiceAccount {ksa} not found in namespace {ns}. "
+            "Run setup_snapshot_gke.sh or ensure DeploySnapshots() succeeded."
+        )
+    logging.info("ServiceAccount %s verified.", ksa)
+
+    # 4. Verify the template file exists.
+    template_path = _GetTemplatePath()
+    if not os.path.isfile(template_path):
+        raise RuntimeError(f"Snapshot template not found: {template_path}")
+    logging.info("Template file verified: %s", template_path)
+
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Execute a single snapshot/restore measurement and return samples.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    ns = FLAGS.gke_namespace
+    preload_mb = FLAGS.gke_snapshot_preload_mb
+    burst_size = FLAGS.gke_snapshot_burst_size
+    skip_snapshot = FLAGS.gke_snapshot_skip_snapshot
+    preload_mode = FLAGS.gke_snapshot_preload_mode
+    ksa_name = FLAGS.gke_snapshot_ksa_name
+    pod_timeout = FLAGS.gke_snapshot_pod_timeout
+
+    logging.info(
+        "=== Run: preload_mb=%d, burst_size=%d, skip_snapshot=%s ===",
+        preload_mb,
+        burst_size,
+        skip_snapshot,
+    )
+
+    template_path = _GetTemplatePath()
+    t0 = time.time()
+
+    # Run the snapshot/restore cycle
+    step_result = _RunSnapshotCycle(
+        namespace=ns,
+        preload_mb=preload_mb,
+        burst_size=burst_size,
+        skip_snapshot=skip_snapshot,
+        preload_mode=preload_mode,
+        ksa_name=ksa_name,
+        pod_timeout=pod_timeout,
+        template_path=template_path,
+    )
+
+    wall_time = time.time() - t0
+
+    # Build samples
+    extra = {
+        "preload_mb": preload_mb,
+        "burst_size": burst_size,
+        "skip_snapshot": skip_snapshot,
+        "preload_mode": preload_mode,
+        "restore_correct_count": step_result.get("restore_correct_count", 0),
+        "wall_time_s": round(wall_time, 2),
+    }
+
+    if step_result.get("error"):
+        extra["error"] = step_result["error"]
+
+    samples = []
+
+    # Snapshot metrics
+    _emit(samples, step_result, "snapshot_p50_s", "snapshot_p50", "seconds", ns, extra)
+    _emit(samples, step_result, "snapshot_p95_s", "snapshot_p95", "seconds", ns, extra)
+    _emit(samples, step_result, "snapshot_max_s", "snapshot_max", "seconds", ns, extra)
+
+    # Restore metrics
+    _emit(samples, step_result, "restore_p50_s", "restore_p50", "seconds", ns, extra)
+    _emit(samples, step_result, "restore_p95_s", "restore_p95", "seconds", ns, extra)
+    _emit(samples, step_result, "restore_max_s", "restore_max", "seconds", ns, extra)
+
+    # TTFE metrics
+    _emit(samples, step_result, "ttfe_p50_s", "ttfe_p50", "seconds", ns, extra)
+    _emit(samples, step_result, "ttfe_p95_s", "ttfe_p95", "seconds", ns, extra)
+    _emit(samples, step_result, "ttfe_max_s", "ttfe_max", "seconds", ns, extra)
+
+    # Startup time
+    _emit(samples, step_result, "startup_time_s", "startup_time", "seconds", ns, extra)
+
+    # Restore correctness
+    correct = step_result.get("restore_correct_count")
+    if correct is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_restore_correct_count",
+                correct,
+                "count",
+                ns,
+                extra,
+            )
+        )
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            round(wall_time, 2),
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for preload_mb=%d.", len(samples), preload_mb)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Clean up any leftover benchmark resources."""
+    ns = FLAGS.gke_namespace
+    logging.info("Cleanup — deleting any leftover snapshot-benchmark resources.")
+
+    for kind in (
+        "sandboxclaim",
+        "sandboxtemplate",
+        "podsnapshotmanualtrigger",
+        "podsnapshots.podsnapshot.gke.io",
+    ):
+        utils.RunKubectl(
+            [
+                "delete",
+                kind,
+                "-l",
+                "app=snapshot-benchmark-workload",
+                "-n",
+                ns,
+                "--ignore-not-found=true",
+            ],
+            timeout=60,
+            raise_on_failure=False,
+        )
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+def Teardown(benchmark_spec):
+    """Teardown GKE cluster and all dependencies."""
+    gke_provision_utils.Teardown()
+
+
+# ---------------------------------------------------------------------------
+# Core snapshot/restore logic
+# ---------------------------------------------------------------------------
+
+
+def _RunSnapshotCycle(
+    namespace,
+    preload_mb,
+    burst_size,
+    skip_snapshot,
+    preload_mode,
+    ksa_name,
+    pod_timeout,
+    template_path,
+):
+    """Execute one full snapshot/restore cycle and return a result dict.
+
+    Handles source creation, snapshot, restore, TTFE measurement,
+    correctness verification, and cleanup.
+    """
+    step_template = f"snap-bench-{preload_mb}mb"
+    source_names = [f"snap-src-0-{i}" for i in range(burst_size)]
+    restore_names = [f"snap-restore-0-{i}" for i in range(burst_size)]
+    trigger_names = [f"snap-trigger-0-{i}" for i in range(burst_size)]
+
+    result = {
+        "preload_mb": preload_mb,
+        "burst_size": burst_size,
+        "snapshot_p50_s": None,
+        "snapshot_p95_s": None,
+        "snapshot_max_s": None,
+        "restore_p50_s": None,
+        "restore_p95_s": None,
+        "restore_max_s": None,
+        "ttfe_p50_s": None,
+        "ttfe_p95_s": None,
+        "ttfe_max_s": None,
+        "startup_time_s": None,
+        "snapshot_counter": None,
+        "restore_correct_count": 0,
+        "burst_results": [],
+        "error": None,
+    }
+
+    try:
+        # 1. Create step-specific SandboxTemplate
+        logging.info(
+            "Creating SandboxTemplate '%s' (PRELOAD_MB=%d, memory=%dMi)",
+            step_template,
+            preload_mb,
+            max(512, preload_mb + 256),
+        )
+        if not _RenderAndApplyTemplate(
+            template_path,
+            step_template,
+            namespace,
+            ksa_name,
+            preload_mb,
+            preload_mode,
+        ):
+            raise RuntimeError("Failed to create SandboxTemplate")
+
+        time.sleep(2)
+
+        # 2. Create source claims and wait for Running + preload
+        logging.info("Creating %d source SandboxClaim(s)", burst_size)
+        t0_sources = time.time()
+        workers = min(burst_size, 50)
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            for sname in source_names:
+                pool.submit(_ApplyClaim, sname, namespace, step_template)
+
+        logging.info("Waiting for %d source pod(s) Running + preload", burst_size)
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            source_futs = [
+                pool.submit(
+                    _MeasureSingleSource,
+                    sname,
+                    namespace,
+                    t0_sources,
+                    pod_timeout,
+                    preload_mode,
+                )
+                for sname in source_names
+            ]
+            source_results = [f.result() for f in source_futs]
+
+        src_failed = [r for r in source_results if r.get("error")]
+        if src_failed:
+            fail_msgs = "; ".join(f"{r['pod']}: {r['error']}" for r in src_failed)
+            raise RuntimeError(
+                f"{len(src_failed)}/{burst_size} source pod(s) failed: {fail_msgs}"
+            )
+
+        startup_times = [
+            r["startup_time_s"]
+            for r in source_results
+            if r["startup_time_s"] is not None
+        ]
+        result["startup_time_s"] = (
+            round(_Percentile(startup_times, 50), 3) if startup_times else None
+        )
+
+        snapshot_counters = {r["pod"]: r["snapshot_counter"] for r in source_results}
+        min_counter = min(
+            (c for c in snapshot_counters.values() if c is not None), default=None
+        )
+        result["snapshot_counter"] = min_counter
+        logging.info("%d source pod(s) ready. Min counter: %s", burst_size, min_counter)
+
+        # --skip_snapshot: measure cold-start TTFE only
+        if skip_snapshot:
+            logging.info("skip_snapshot mode: measuring cold-start TTFE")
+            ttfe_times = []
+            burst_results = []
+            for i, sname in enumerate(source_names):
+                startup = source_results[i]["startup_time_s"]
+                counter = source_results[i]["snapshot_counter"]
+                preload_done = source_results[i].get("preload_complete_time_s")
+                ttfe_s = preload_done if preload_done else startup
+                ttfe_times.append(ttfe_s)
+                burst_results.append(
+                    {
+                        "pod": sname,
+                        "source_pod": sname,
+                        "startup_time_s": startup,
+                        "snapshot_counter": None,
+                        "snapshot_time_s": None,
+                        "restore_time_s": None,
+                        "ttfe_s": ttfe_s,
+                        "restore_counter": counter,
+                        "restore_correct": True,
+                        "error": None,
+                    }
+                )
+
+            result["burst_results"] = burst_results
+            result["restore_correct_count"] = burst_size
+
+            if ttfe_times:
+                result["ttfe_p50_s"] = round(_Percentile(ttfe_times, 50), 3)
+                result["ttfe_p95_s"] = round(_Percentile(ttfe_times, 95), 3)
+                result["ttfe_max_s"] = round(max(ttfe_times), 3)
+
+            # Skip to cleanup
+            return result
+
+        # 3. Trigger snapshots concurrently
+        logging.info("Triggering %d snapshot(s)", burst_size)
+        t0_snap = time.time()
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            snap_futs = [
+                pool.submit(
+                    _TriggerAndWaitSnapshot,
+                    tname,
+                    sname,
+                    namespace,
+                    t0_snap,
+                )
+                for tname, sname in zip(trigger_names, source_names)
+            ]
+            snap_results = [f.result() for f in snap_futs]
+
+        snap_failed = [r for r in snap_results if r.get("error")]
+        snap_times = [
+            r["snapshot_time_s"]
+            for r in snap_results
+            if r["snapshot_time_s"] is not None
+        ]
+        if snap_times:
+            result["snapshot_p50_s"] = round(_Percentile(snap_times, 50), 3)
+            result["snapshot_p95_s"] = round(_Percentile(snap_times, 95), 3)
+            result["snapshot_max_s"] = round(max(snap_times), 3)
+
+        if snap_failed:
+            fail_msgs = "; ".join(f"{r['trigger']}: {r['error']}" for r in snap_failed)
+            raise RuntimeError(
+                f"{len(snap_failed)}/{burst_size} snapshot(s) failed: {fail_msgs}"
+            )
+
+        # 4. Create restore claims concurrently
+        logging.info("Creating %d restore SandboxClaim(s)", burst_size)
+        t0_burst = time.time()
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            create_futs = [
+                pool.submit(_ApplyClaim, rname, namespace, step_template)
+                for rname in restore_names
+            ]
+            for f in create_futs:
+                f.result()
+
+        # 5. Poll restore pods for Running + TTFE
+        logging.info("Measuring restore + TTFE across %d pod(s)", burst_size)
+        with ThreadPoolExecutor(max_workers=workers) as pool:
+            measure_futs = [
+                pool.submit(
+                    _MeasureSingleRestore,
+                    rname,
+                    namespace,
+                    t0_burst,
+                    min_counter,
+                    pod_timeout,
+                )
+                for rname in restore_names
+            ]
+            burst_results = [f.result() for f in measure_futs]
+
+        # Merge source + snapshot info
+        for i in range(burst_size):
+            burst_results[i]["source_pod"] = source_names[i]
+            burst_results[i]["startup_time_s"] = source_results[i]["startup_time_s"]
+            burst_results[i]["snapshot_counter"] = source_results[i]["snapshot_counter"]
+            burst_results[i]["snapshot_time_s"] = snap_results[i]["snapshot_time_s"]
+
+        result["burst_results"] = burst_results
+
+        # 6. Aggregate
+        restore_times = [
+            r["restore_time_s"]
+            for r in burst_results
+            if r["restore_time_s"] is not None
+        ]
+        ttfe_times = [r["ttfe_s"] for r in burst_results if r["ttfe_s"] is not None]
+        correct_count = sum(1 for r in burst_results if r["restore_correct"])
+
+        result["restore_correct_count"] = correct_count
+
+        if restore_times:
+            result["restore_p50_s"] = round(_Percentile(restore_times, 50), 3)
+            result["restore_p95_s"] = round(_Percentile(restore_times, 95), 3)
+            result["restore_max_s"] = round(max(restore_times), 3)
+
+        if ttfe_times:
+            result["ttfe_p50_s"] = round(_Percentile(ttfe_times, 50), 3)
+            result["ttfe_p95_s"] = round(_Percentile(ttfe_times, 95), 3)
+            result["ttfe_max_s"] = round(max(ttfe_times), 3)
+
+        logging.info("Counter correct: %d/%d", correct_count, burst_size)
+
+    except Exception as e:
+        result["error"] = str(e)
+        logging.error("Snapshot cycle failed: %s", e)
+
+    finally:
+        # Cleanup
+        logging.info("Cleaning up step resources")
+        _CleanupStep(
+            source_names,
+            restore_names,
+            trigger_names,
+            step_template,
+            namespace,
+        )
+        time.sleep(5)
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Kubernetes interaction helpers
+# ---------------------------------------------------------------------------
+
+
+def _ApplyClaim(name, namespace, template_name):
+    """Create a SandboxClaim."""
+    manifest = json.dumps(
+        {
+            "apiVersion": "extensions.agents.x-k8s.io/v1alpha1",
+            "kind": "SandboxClaim",
+            "metadata": {
+                "name": name,
+                "namespace": namespace,
+                "labels": {"app": "snapshot-benchmark-workload"},
+            },
+            "spec": {"sandboxTemplateRef": {"name": template_name}},
+        }
+    )
+    proc = subprocess.run(
+        ["kubectl", "apply", "-f", "-"],
+        input=manifest,
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(f"Failed to create SandboxClaim {name}: {proc.stderr}")
+
+
+def _RenderAndApplyTemplate(
+    template_path,
+    template_name,
+    namespace,
+    ksa_name,
+    preload_mb,
+    preload_mode,
+):
+    """Render the .yaml.template with step-specific values and kubectl apply."""
+    if preload_mode.startswith("script:"):
+        return _RenderAndApplyScriptTemplate(
+            template_name,
+            namespace,
+            ksa_name,
+            preload_mb,
+            preload_mode,
+        )
+
+    with open(template_path) as f:
+        content = f.read()
+
+    memory_mi = max(512, preload_mb + 256)
+
+    rendered = (
+        content.replace("$AGENTIC_NAMESPACE", namespace)
+        .replace("$SNAPSHOT_KSA_NAME", ksa_name)
+        .replace("$SNAPSHOT_PRELOAD_MB", str(preload_mb))
+    )
+    rendered = rendered.replace(
+        "name: snapshot-benchmark-template",
+        f"name: {template_name}",
+    )
+    rendered = rendered.replace(
+        'memory: "512Mi"',
+        f'memory: "{memory_mi}Mi"',
+    )
+
+    proc = subprocess.run(
+        ["kubectl", "apply", "-f", "-"],
+        input=rendered,
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    if proc.returncode != 0:
+        logging.warning("kubectl apply stderr: %s", proc.stderr)
+    return proc.returncode == 0
+
+
+def _get_sandbox_node_selector():
+    """Return the correct nodeSelector based on provisioning mode."""
+    try:
+        mode = FLAGS.gke_provision_mode
+    except AttributeError:
+        mode = "custom"
+    if mode == "native":
+        return {"pkb_nodepool": "sandbox"}
+    return {"dedicated": "agentic-sandbox"}
+
+
+def _get_sandbox_tolerations():
+    """Return the correct tolerations based on provisioning mode."""
+    try:
+        mode = FLAGS.gke_provision_mode
+    except AttributeError:
+        mode = "custom"
+    tolerations = [
+        {
+            "key": "sandbox.gke.io/runtime",
+            "operator": "Equal",
+            "value": "gvisor",
+            "effect": "NoSchedule",
+        },
+    ]
+    if mode != "native":
+        tolerations.insert(
+            0,
+            {
+                "key": "dedicated",
+                "operator": "Equal",
+                "value": "agentic-sandbox",
+                "effect": "NoSchedule",
+            },
+        )
+    return tolerations
+
+
+def _RenderAndApplyScriptTemplate(
+    template_name,
+    namespace,
+    ksa_name,
+    preload_mb,
+    preload_mode,
+):
+    """Render a SandboxTemplate that runs a user-provided startup script."""
+    script_path = preload_mode.split(":", 1)[1]
+    if not os.path.isfile(script_path):
+        logging.error("Script not found: %s", script_path)
+        return False
+
+    with open(script_path) as f:
+        user_script = f.read()
+
+    memory_mi = max(512, preload_mb + 256)
+
+    entrypoint = (
+        "#!/bin/bash\n"
+        "set -e\n"
+        'echo "Running startup script..."\n'
+        "# --- User script start ---\n"
+        f"{user_script}\n"
+        "# --- User script end ---\n"
+        'echo "SCRIPT_READY"\n'
+        'echo "Starting counter."\n'
+        "i=0\n"
+        "while true; do\n"
+        '  echo "Count: $i"\n'
+        "  i=$((i + 1))\n"
+        "  sleep 1\n"
+        "done\n"
+    )
+
+    manifest = {
+        "apiVersion": "extensions.agents.x-k8s.io/v1alpha1",
+        "kind": "SandboxTemplate",
+        "metadata": {
+            "name": template_name,
+            "namespace": namespace,
+        },
+        "spec": {
+            "podTemplate": {
+                "metadata": {
+                    "labels": {"app": "snapshot-benchmark-workload"},
+                },
+                "spec": {
+                    "serviceAccountName": ksa_name,
+                    "runtimeClassName": "gvisor",
+                    "containers": [
+                        {
+                            "name": "preloader",
+                            "image": "python:3.11-slim",
+                            "command": ["bash", "-c"],
+                            "args": [entrypoint],
+                            "env": [{"name": "PRELOAD_MB", "value": str(preload_mb)}],
+                            "resources": {
+                                "requests": {
+                                    "cpu": "250m",
+                                    "memory": f"{memory_mi}Mi",
+                                    "ephemeral-storage": "512Mi",
+                                }
+                            },
+                        }
+                    ],
+                    "nodeSelector": _get_sandbox_node_selector(),
+                    "tolerations": _get_sandbox_tolerations(),
+                    "restartPolicy": "OnFailure",
+                },
+            }
+        },
+    }
+
+    proc = subprocess.run(
+        ["kubectl", "apply", "-f", "-"],
+        input=json.dumps(manifest),
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    if proc.returncode != 0:
+        logging.warning("kubectl apply stderr: %s", proc.stderr)
+    return proc.returncode == 0
+
+
+def _MeasureSingleSource(name, namespace, t0, pod_timeout, preload_mode):
+    """Wait for a source pod to be Running and preloaded."""
+    result = {
+        "pod": name,
+        "startup_time_s": None,
+        "preload_complete_time_s": None,
+        "snapshot_counter": None,
+        "error": None,
+    }
+
+    # Wait for Running
+    deadline = t0 + pod_timeout
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["get", "pod", name, "-n", namespace, "-o", "jsonpath={.status.phase}"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if stdout == "Running":
+            result["startup_time_s"] = round(time.time() - t0, 3)
+            break
+        time.sleep(1)
+    else:
+        result["error"] = f"Pod {name} did not reach Running within {pod_timeout}s"
+        return result
+
+    # Wait for preload
+    if not _WaitForPreload(name, namespace, pod_timeout, preload_mode):
+        result["error"] = f"Preload did not complete within {pod_timeout}s"
+        return result
+
+    result["preload_complete_time_s"] = round(time.time() - t0, 3)
+
+    # Let counter tick
+    time.sleep(3)
+    result["snapshot_counter"] = _GetLastCounter(name, namespace)
+    return result
+
+
+def _WaitForPreload(name, namespace, timeout_s, preload_mode):
+    """Wait for preload to complete."""
+    deadline = time.time() + timeout_s
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["logs", name, "-n", namespace, "--tail=20"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if "SCRIPT_READY" in stdout:
+            return True
+        if "Starting counter" in stdout or re.search(r"Count:\s*\d+", stdout):
+            return True
+        time.sleep(2)
+    return False
+
+
+def _GetLastCounter(name, namespace):
+    """Extract the last Count: N value from pod logs."""
+    stdout, _, rc = utils.RunKubectl(
+        ["logs", name, "-n", namespace, "--tail=10"],
+        timeout=10,
+        raise_on_failure=False,
+    )
+    if rc != 0:
+        return None
+    matches = re.findall(r"Count:\s*(\d+)", stdout)
+    return int(matches[-1]) if matches else None
+
+
+def _TriggerAndWaitSnapshot(trigger_name, target_pod, namespace, t0, timeout_s=300):
+    """Create a snapshot trigger and wait for Complete."""
+    result = {
+        "trigger": trigger_name,
+        "pod": target_pod,
+        "snapshot_time_s": None,
+        "error": None,
+    }
+    manifest = json.dumps(
+        {
+            "apiVersion": "podsnapshot.gke.io/v1",
+            "kind": "PodSnapshotManualTrigger",
+            "metadata": {"name": trigger_name, "namespace": namespace},
+            "spec": {"targetPod": target_pod},
+        }
+    )
+    proc = subprocess.run(
+        ["kubectl", "apply", "-f", "-"],
+        input=manifest,
+        capture_output=True,
+        text=True,
+        timeout=30,
+    )
+    if proc.returncode != 0:
+        result["error"] = f"Failed to create trigger: {proc.stderr}"
+        return result
+
+    deadline = t0 + timeout_s
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            [
+                "get",
+                "podsnapshotmanualtriggers.podsnapshot.gke.io",
+                trigger_name,
+                "-n",
+                namespace,
+                "-o",
+                "jsonpath={.status.conditions[0].reason}",
+            ],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if stdout == "Complete":
+            result["snapshot_time_s"] = round(time.time() - t0, 3)
+            return result
+        time.sleep(2)
+    result["error"] = f"Snapshot {trigger_name} did not complete within {timeout_s}s"
+    return result
+
+
+def _MeasureSingleRestore(name, namespace, t0, snapshot_counter, pod_timeout):
+    """Measure restore_time and TTFE for a single pod."""
+    result = {
+        "pod": name,
+        "restore_time_s": None,
+        "ttfe_s": None,
+        "restore_counter": None,
+        "restore_correct": False,
+        "error": None,
+    }
+
+    # Wait for Running
+    deadline = t0 + pod_timeout
+    while time.time() < deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["get", "pod", name, "-n", namespace, "-o", "jsonpath={.status.phase}"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if stdout == "Running":
+            result["restore_time_s"] = round(time.time() - t0, 3)
+            break
+        time.sleep(1)
+    else:
+        result["error"] = f"Pod {name} did not reach Running within {pod_timeout}s"
+        return result
+
+    # Wait for first Count (TTFE)
+    ttfe_deadline = t0 + pod_timeout
+    while time.time() < ttfe_deadline:
+        stdout, _, rc = utils.RunKubectl(
+            ["logs", name, "-n", namespace, "--tail=50"],
+            timeout=10,
+            raise_on_failure=False,
+        )
+        if rc == 0:
+            matches = re.findall(r"Count:\s*(\d+)", stdout)
+            if matches:
+                result["ttfe_s"] = round(time.time() - t0, 3)
+                result["restore_counter"] = int(matches[0])
+                if (
+                    snapshot_counter is not None
+                    and result["restore_counter"] >= snapshot_counter
+                ):
+                    result["restore_correct"] = True
+                return result
+        time.sleep(1)
+
+    result["error"] = f"Pod {name}: no Count output within timeout"
+    return result
+
+
+def _CleanupStep(source_names, restore_names, trigger_names, template_name, namespace):
+    """Delete source claims, restore claims, triggers, snapshots, and template."""
+    to_delete = [("sandboxtemplate", template_name)]
+    for name in source_names:
+        to_delete.append(("sandboxclaim", name))
+    for name in restore_names:
+        to_delete.append(("sandboxclaim", name))
+    for name in trigger_names:
+        to_delete.append(("podsnapshotmanualtrigger", name))
+
+    for kind, name in to_delete:
+        utils.RunKubectl(
+            ["delete", kind, name, "-n", namespace, "--ignore-not-found=true"],
+            timeout=60,
+            raise_on_failure=False,
+        )
+    # Delete any PodSnapshot resources
+    utils.RunKubectl(
+        [
+            "delete",
+            "podsnapshots.podsnapshot.gke.io",
+            "--all",
+            "-n",
+            namespace,
+            "--ignore-not-found=true",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _GetTemplatePath():
+    """Return the absolute path to the snapshot SandboxTemplate template."""
+    pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    return os.path.join(
+        pkg_dir,
+        "data",
+        "k8s_agents",
+        "manifests",
+        "snapshot-sandbox-template.yaml.template",
+    )
+
+
+def _Percentile(values, pct):
+    """Calculate percentile (0-100) from a list of values."""
+    if not values:
+        return 0.0
+    s = sorted(values)
+    idx = (pct / 100) * (len(s) - 1)
+    lo = int(idx)
+    hi = min(lo + 1, len(s) - 1)
+    frac = idx - lo
+    return s[lo] * (1 - frac) + s[hi] * frac
+
+
+def _emit(samples, data, data_key, metric_suffix, unit, namespace, extra):
+    """Emit a sample if the key exists in the data dict."""
+    value = data.get(data_key)
+    if value is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_{metric_suffix}",
+                value,
+                unit,
+                namespace,
+                extra,
+            )
+        )
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py
new file mode 100644
index 0000000000..1c00deca54
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py
@@ -0,0 +1,487 @@
+"""PKB Benchmark: GKE Agent Warmpool Scale-Up (Use Case E).
+
+Atomic single-point measurement of warm pool provisioning speed on a
+pre-provisioned GKE cluster.  Measures how quickly N sandbox pods can be
+provisioned from zero via the SandboxWarmPool controller.  No agent API
+is needed; this benchmark interacts directly with the Kubernetes API.
+
+This benchmark is designed to be invoked repeatedly by an external sweep
+controller that varies the target_replicas parameter across iterations to
+find the provisioning saturation point.
+
+Usage:
+  python pkb.py --benchmarks=gke_warmpool \
+                --gke_warmpool_target_replicas=100 \
+                --gke_warmpool_name=python-sandbox-warmpool \
+                --gke_warmpool_pod_label=sandbox=python-sandbox-example \
+                --gke_warmpool_ready_threshold_s=300 \
+                --gke_warmpool_poll_interval_s=2.0 \
+                --gke_warmpool_drain_timeout_s=300 \
+                --gke_namespace=agentic \
+                --gke_machine_type=c4-standard-8
+
+Samples emitted (per run):
+  - gke_warmpool_total_time_to_ready         (seconds)
+  - gke_warmpool_refill_rate                 (pods/sec)
+  - gke_warmpool_drain_time                  (seconds)
+  - gke_warmpool_first_pod_running           (seconds)
+  - gke_warmpool_final_running_count         (count)
+  - gke_warmpool_final_pending_count         (count)
+  - gke_warmpool_time_to_created_p50         (seconds)
+  - gke_warmpool_time_to_created_p95         (seconds)
+  - gke_warmpool_time_to_created_max         (seconds)
+  - gke_warmpool_time_to_created_count       (count)
+  - gke_warmpool_time_to_scheduled_p50       (seconds)
+  - gke_warmpool_time_to_scheduled_p95       (seconds)
+  - gke_warmpool_time_to_scheduled_max       (seconds)
+  - gke_warmpool_time_to_scheduled_count     (count)
+  - gke_warmpool_time_to_running_p50         (seconds)
+  - gke_warmpool_time_to_running_p95         (seconds)
+  - gke_warmpool_time_to_running_max         (seconds)
+  - gke_warmpool_time_to_running_count       (count)
+  - gke_warmpool_wall_time                   (seconds)
+"""
+
+import json
+import logging
+import time
+
+from absl import flags
+from datetime import datetime, timezone
+from perfkitbenchmarker import configs
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_benchmark_utils as utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+    gke_deploy_utils as deploy_utils,
+)
+from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
+
+FLAGS = flags.FLAGS
+
+BENCHMARK_NAME = "gke_warmpool"
+BENCHMARK_CONFIG = """
+gke_warmpool:
+  description: >
+    Atomic single-point warm pool scale-up measurement on a
+    pre-provisioned GKE cluster with gVisor isolation.
+"""
+
+# ---------------------------------------------------------------------------
+# Benchmark-specific flags
+# ---------------------------------------------------------------------------
+
+flags.DEFINE_integer(
+    "gke_warmpool_target_replicas",
+    100,
+    "Number of warm pool replicas to provision from zero.",
+)
+
+flags.DEFINE_string(
+    "gke_warmpool_name",
+    "python-sandbox-warmpool",
+    "SandboxWarmPool resource name.",
+)
+
+flags.DEFINE_string(
+    "gke_warmpool_pod_label",
+    "sandbox=python-sandbox-example",
+    "Label selector for warm pool pods.",
+)
+
+flags.DEFINE_float(
+    "gke_warmpool_ready_threshold_s",
+    300.0,
+    "Max seconds allowed for all pods to reach Running.",
+)
+
+flags.DEFINE_float(
+    "gke_warmpool_poll_interval_s",
+    2.0,
+    "Seconds between kubectl polls during provisioning.",
+)
+
+flags.DEFINE_float(
+    "gke_warmpool_drain_timeout_s",
+    300.0,
+    "Max seconds to wait for drain to 0.",
+)
+
+
+# ---------------------------------------------------------------------------
+# Lifecycle
+# ---------------------------------------------------------------------------
+
+
+def Provision(benchmark_spec):
+    """Provision GKE cluster and all dependencies."""
+    gke_provision_utils.Provision()
+
+
+def GetConfig(user_config):
+    """Load and return benchmark config.
+
+    No vm_groups — PKB skips Provision() and Teardown().
+    """
+    return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(benchmark_spec):
+    """Deploy workloads onto the cluster."""
+    logging.info("=== Prepare: deploying workloads ===")
+    deploy_utils.DeployWorkloads()
+    utils.EnsurePortForward()
+    logging.info("Prepare complete.")
+
+
+def Run(benchmark_spec):
+    """Scale warm pool from 0 to target and measure provisioning time.
+
+    Returns:
+      List of sample.Sample objects.
+    """
+    ns = FLAGS.gke_namespace
+    target = FLAGS.gke_warmpool_target_replicas
+    warmpool_name = FLAGS.gke_warmpool_name
+    label = FLAGS.gke_warmpool_pod_label
+    threshold_s = FLAGS.gke_warmpool_ready_threshold_s
+    poll_interval = FLAGS.gke_warmpool_poll_interval_s
+
+    # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility)
+    _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s)
+    time.sleep(3)
+
+    logging.info("=== Run: scaling %s to %d replicas ===", warmpool_name, target)
+
+    t_wall_start = time.time()
+
+    # 1. Measure drain time (should be near-zero since Prepare drained)
+    t0 = time.time()
+    _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s)
+    drain_time_s = round(time.time() - t0, 2)
+
+    time.sleep(2)
+
+    # 2. Scale up
+    logging.info("Patching %s replicas -> %d", warmpool_name, target)
+    patch_json = json.dumps({"spec": {"replicas": target}})
+    utils.RunKubectl(
+        [
+            "patch",
+            "sandboxwarmpool",
+            warmpool_name,
+            "-n",
+            ns,
+            "--type=merge",
+            f"-p={patch_json}",
+        ]
+    )
+
+    # 3. Poll until ready or timeout
+    t_scale = time.time()
+    scale_start_epoch = t_scale
+    deadline = t_scale + threshold_s
+    first_pod_time = None
+
+    while time.time() < deadline:
+        elapsed = time.time() - t_scale
+        running = _CountPods(ns, label, "Running")
+        pending = _CountPods(ns, label, "Pending")
+
+        if first_pod_time is None and running > 0:
+            first_pod_time = elapsed
+
+        pct = (running / target * 100) if target > 0 else 0
+        logging.info(
+            "[%.1fs] Running: %d/%d (%.0f%%)  Pending: %d",
+            elapsed,
+            running,
+            target,
+            pct,
+            pending,
+        )
+
+        if running >= target:
+            break
+
+        time.sleep(poll_interval)
+
+    total_time = round(time.time() - t_scale, 2)
+    final_running = _CountPods(ns, label, "Running")
+    final_pending = _CountPods(ns, label, "Pending")
+    rate = round(final_running / total_time, 2) if total_time > 0 else 0
+
+    logging.info(
+        "Scale-up complete: %d/%d Running in %.1fs (%.1f pods/sec)",
+        final_running,
+        target,
+        total_time,
+        rate,
+    )
+
+    # 4. Scrape pod lifecycle timestamps
+    lifecycle = _ScrapeLifecycle(ns, label, scale_start_epoch)
+
+    wall_time = round(time.time() - t_wall_start, 2)
+
+    # 5. Build samples
+    extra = {
+        "target_replicas": target,
+        "final_running_count": final_running,
+        "final_pending_count": final_pending,
+        "wall_time_s": wall_time,
+    }
+
+    samples = []
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_total_time_to_ready",
+            total_time,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_refill_rate",
+            rate,
+            "pods/sec",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_drain_time",
+            drain_time_s,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    if first_pod_time is not None:
+        samples.append(
+            utils.MakeSample(
+                f"{BENCHMARK_NAME}_first_pod_running",
+                round(first_pod_time, 2),
+                "seconds",
+                ns,
+                extra,
+            )
+        )
+
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_final_running_count",
+            float(final_running),
+            "count",
+            ns,
+            extra,
+        )
+    )
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_final_pending_count",
+            float(final_pending),
+            "count",
+            ns,
+            extra,
+        )
+    )
+
+    # Pod lifecycle percentiles
+    _EmitLifecycleSamples(samples, lifecycle, ns, extra)
+
+    # Wall time
+    samples.append(
+        utils.MakeSample(
+            f"{BENCHMARK_NAME}_wall_time",
+            wall_time,
+            "seconds",
+            ns,
+            extra,
+        )
+    )
+
+    logging.info("Emitted %d samples for target_replicas=%d.", len(samples), target)
+    return samples
+
+
+def Cleanup(benchmark_spec):
+    """Drain warm pool back to 0 after measurement."""
+    ns = FLAGS.gke_namespace
+    warmpool_name = FLAGS.gke_warmpool_name
+    label = FLAGS.gke_warmpool_pod_label
+
+    logging.info("Cleanup: draining warm pool to 0.")
+    _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s)
+    utils.StopPortForward()
+    logging.info("Cleanup complete.")
+
+
+def Teardown(benchmark_spec):
+    """Teardown GKE cluster and all dependencies."""
+    gke_provision_utils.Teardown()
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _CountPods(namespace, label, phase=None):
+    """Count pods matching label (and optionally phase)."""
+    cmd = ["get", "pods", "-n", namespace, "-l", label, "-o", "name"]
+    if phase:
+        cmd += [f"--field-selector=status.phase={phase}"]
+    stdout, _, rc = utils.RunKubectl(cmd, raise_on_failure=False)
+    if rc != 0 or not stdout:
+        return 0
+    return len(stdout.strip().splitlines())
+
+
+def _DrainPool(namespace, warmpool_name, label, timeout_s):
+    """Scale pool to 0 and wait for all pods to terminate."""
+    patch_json = json.dumps({"spec": {"replicas": 0}})
+    utils.RunKubectl(
+        [
+            "patch",
+            "sandboxwarmpool",
+            warmpool_name,
+            "-n",
+            namespace,
+            "--type=merge",
+            f"-p={patch_json}",
+        ],
+        raise_on_failure=False,
+    )
+
+    # Delete any lingering SandboxClaims
+    utils.RunKubectl(
+        [
+            "delete",
+            "sandboxclaims",
+            "--all",
+            "-n",
+            namespace,
+            "--ignore-not-found=true",
+        ],
+        timeout=60,
+        raise_on_failure=False,
+    )
+
+    t0 = time.time()
+    while time.time() - t0 < timeout_s:
+        remaining = _CountPods(namespace, label)
+        if remaining == 0:
+            elapsed = time.time() - t0
+            logging.info("Pool drained in %.1fs", elapsed)
+            return
+        time.sleep(2)
+
+    logging.warning("Drain timed out after %.0fs", timeout_s)
+
+
+def _ScrapeLifecycle(namespace, label, scale_start_epoch):
+    """Scrape pod metadata to compute time-to-created/scheduled/running.
+
+    Returns a dict with P50/P95/max/count for each phase relative to
+    scale_start_epoch.
+    """
+    stdout, _, rc = utils.RunKubectl(
+        ["get", "pods", "-n", namespace, "-l", label, "-o", "json"],
+        timeout=60,
+        raise_on_failure=False,
+    )
+    if rc != 0 or not stdout:
+        return {}
+
+    pods = json.loads(stdout).get("items", [])
+    created_deltas = []
+    scheduled_deltas = []
+    running_deltas = []
+
+    for pod in pods:
+        meta = pod.get("metadata", {})
+        status = pod.get("status", {})
+
+        # creationTimestamp -> time-to-created
+        created_str = meta.get("creationTimestamp")
+        if created_str:
+            created_ts = datetime.fromisoformat(
+                created_str.replace("Z", "+00:00")
+            ).timestamp()
+            created_deltas.append(created_ts - scale_start_epoch)
+
+        # PodScheduled condition -> time-to-scheduled
+        conditions = status.get("conditions", [])
+        for cond in conditions:
+            if cond.get("type") == "PodScheduled" and cond.get("status") == "True":
+                ts_str = cond.get("lastTransitionTime")
+                if ts_str:
+                    ts = datetime.fromisoformat(
+                        ts_str.replace("Z", "+00:00")
+                    ).timestamp()
+                    scheduled_deltas.append(ts - scale_start_epoch)
+            if cond.get("type") == "Ready" and cond.get("status") == "True":
+                ts_str = cond.get("lastTransitionTime")
+                if ts_str:
+                    ts = datetime.fromisoformat(
+                        ts_str.replace("Z", "+00:00")
+                    ).timestamp()
+                    running_deltas.append(ts - scale_start_epoch)
+
+    def _pcts(vals):
+        if not vals:
+            return {}
+        vals.sort()
+        n = len(vals)
+        return {
+            "p50": round(vals[n // 2], 2),
+            "p95": round(vals[int(n * 0.95)], 2) if n > 1 else round(vals[-1], 2),
+            "max": round(vals[-1], 2),
+            "count": n,
+        }
+
+    return {
+        "time_to_created_s": _pcts(created_deltas),
+        "time_to_scheduled_s": _pcts(scheduled_deltas),
+        "time_to_running_s": _pcts(running_deltas),
+    }
+
+
+def _EmitLifecycleSamples(samples, lifecycle, namespace, extra):
+    """Emit pod lifecycle percentile samples for all three phases."""
+    _PHASE_MAP = [
+        ("time_to_created_s", "time_to_created"),
+        ("time_to_scheduled_s", "time_to_scheduled"),
+        ("time_to_running_s", "time_to_running"),
+    ]
+    for lifecycle_key, metric_base in _PHASE_MAP:
+        phase_data = lifecycle.get(lifecycle_key, {})
+        for stat in ("p50", "p95", "max"):
+            val = phase_data.get(stat)
+            if val is not None:
+                samples.append(
+                    utils.MakeSample(
+                        f"{BENCHMARK_NAME}_{metric_base}_{stat}",
+                        val,
+                        "seconds",
+                        namespace,
+                        extra,
+                    )
+                )
+        count = phase_data.get("count")
+        if count is not None:
+            samples.append(
+                utils.MakeSample(
+                    f"{BENCHMARK_NAME}_{metric_base}_count",
+                    float(count),
+                    "count",
+                    namespace,
+                    extra,
+                )
+            )
diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py
index a56fe72b99..244ba5d774 100644
--- a/perfkitbenchmarker/providers/gcp/flags.py
+++ b/perfkitbenchmarker/providers/gcp/flags.py
@@ -580,6 +580,27 @@
     ' the size derived from max_vm_count. Use when the cluster will scale'
     ' beyond the default node pool (e.g. kubernetes_node_scale with 5k nodes).',
 )
+
+GKE_USE_BETA = flags.DEFINE_boolean(
+    'gke_use_beta',
+    False,
+    'Use gcloud beta for cluster creation (required for preview features '
+    'like pod snapshots).',
+)
+
+GKE_ADDITIONAL_FLAGS = flags.DEFINE_list(
+    'gke_additional_flags',
+    [],
+    'Additional flags to pass to gcloud container clusters create. '
+    'Example: --gke_additional_flags=--enable-pod-snapshots,--enable-dataplane-v2',
+)
+
+GKE_ADDITIONAL_NODEPOOL_FLAGS = flags.DEFINE_list(
+    'gke_additional_nodepool_flags',
+    [],
+    'Additional flags to pass to gcloud container node-pools create. '
+    'Example: --gke_additional_nodepool_flags=--max-pods-per-node=250',
+)
 GCE_PERFORMANCE_MONITORING_UNIT = flags.DEFINE_enum(
     'gce_performance_monitoring_unit',
     None,
diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
index f943a53ff1..3c24ad941c 100644
--- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
+++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
@@ -417,6 +417,12 @@ def _Create(self):
     if self.enable_aam:
       cmd.args.append('--auto-monitoring-scope=ALL')
 
+    # --- PKB Extension: beta gcloud and additional cluster create flags ---
+    if gcp_flags.GKE_USE_BETA.value:
+      cmd.use_beta_gcloud = True
+    for additional_flag in gcp_flags.GKE_ADDITIONAL_FLAGS.value:
+      cmd.args.append(additional_flag)
+
     self._RunClusterCreateCommand(cmd)
     self._GetKubeconfig()
     self._CreateCustomComputeClass(self.default_nodepool)
@@ -432,6 +438,10 @@ def _CreateNodePools(self):
           nodepool,
           cmd,
       )
+      # --- PKB Extension: additional node pool create flags ---
+      for additional_flag in gcp_flags.GKE_ADDITIONAL_NODEPOOL_FLAGS.value:
+        cmd.args.append(additional_flag)
+
       self._IssueResourceCreationCommand(cmd)
       self._CreateCustomComputeClass(nodepool)
 
diff --git a/requirements.txt b/requirements.txt
index 755f82737c..1313c628f5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,3 +33,4 @@ setuptools>=40.3.0,<81
 six>=1.13.0
 timeout-decorator
 scipy
+matplotlib

From 0338d094f2e5095f0ed0578435f4b8f56520241d Mon Sep 17 00:00:00 2001
From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com>
Date: Thu, 18 Jun 2026 04:14:08 -0400
Subject: [PATCH 2/5] attend to comments, fixes, and improvements

---
 .../config/agentic_benchmark_config.yaml      | 324 +++++++
 .../data/k8s_agents/config/gke-benchmark.conf | 171 ----
 .../config/native_provision_config.yaml       |  70 --
 .../k8s_agents/manifests/adk-agent.yaml.j2    | 118 +++
 .../k8s_agents/manifests/psi-reader.yaml.j2   |  56 ++
 .../manifests/sandbox-router.yaml.j2          |  69 ++
 .../manifests/sandbox-templates.yaml.j2       | 103 +++
 .../manifests/snapshot-crds.yaml.j2           |  24 +
 .../snapshot-sandbox-template.yaml.j2         |  46 +
 .../adk_agent/generated.env.template          |   6 +-
 .../adk_agent/gke_performance_agent/agent.py  |  44 +-
 .../k8s_agents/workloads/adk_agent/main.py    |   2 +-
 .../kubernetes/agentic/gke_benchmark_utils.py | 135 +--
 .../agentic/gke_chromium_density_benchmark.py |  45 +-
 .../agentic/gke_deletion_benchmark.py         |  56 +-
 .../kubernetes/agentic/gke_deploy_utils.py    | 846 +++++-------------
 .../agentic/gke_image_build_utils.py          |  68 +-
 .../agentic/gke_payload_benchmark.py          |  39 +-
 .../kubernetes/agentic/gke_post_teardown.py   |  77 ++
 .../agentic/gke_prerequisite_setup.py         | 516 -----------
 .../kubernetes/agentic/gke_prerequisites.py   | 107 +++
 .../kubernetes/agentic/gke_provision_utils.py | 698 ---------------
 .../agentic/gke_python_density_benchmark.py   |  59 +-
 .../kubernetes/agentic/gke_qps_benchmark.py   |  62 +-
 .../agentic/gke_snapshot_benchmark.py         | 196 ++--
 .../agentic/gke_warmpool_benchmark.py         |  88 +-
 perfkitbenchmarker/providers/gcp/flags.py     |   6 -
 .../providers/gcp/google_kubernetes_engine.py |   4 +-
 requirements.txt                              |   1 -
 snapshot-sandbox-template.yaml.j2             |  46 +
 30 files changed, 1586 insertions(+), 2496 deletions(-)
 create mode 100644 perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
 delete mode 100644 perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf
 delete mode 100644 perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml
 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2
 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2
 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2
 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2
 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2
 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
 delete mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py
 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
 delete mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py
 create mode 100644 snapshot-sandbox-template.yaml.j2

diff --git a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
new file mode 100644
index 0000000000..95077b469c
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
@@ -0,0 +1,324 @@
+# Agentic Benchmark Configuration for GKE
+# Used with: --benchmark_config_file=perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
+#
+# User/environment-specific flags that MUST be passed on CLI:
+#   --project=<project>
+#   --owner=<owner>
+#   --gce_network_name=<user>-agentic-vpc
+#   --gke_additional_flags="--workload-pool=<project>.svc.id.goog,--subnetwork=<user>-agentic-subnet,--enable-master-authorized-networks,--master-authorized-networks=$(curl -s ifconfig.me)/32"
+#
+# Per-run flags:
+#   --run_stage=provision|prepare|run,cleanup|teardown
+#   --run_uri=<unique_id>
+#   --temp_dir=<path>
+#
+# Benchmark-specific sweep parameters (vary per run):
+#   --gke_python_density_concurrent_sandbox_count=N
+#   --gke_snapshot_preload_mb=N
+#   etc.
+
+# ===========================================================================
+# Shared cluster configuration (identical across all benchmarks)
+# ===========================================================================
+
+gke_python_density:
+  flags:
+    # --- Cluster creation flags ---
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+    gce_subnet_region: "us-central1"
+
+    # --- Agentic workload flags ---
+    k8s_namespace: "agentic"
+    agent_sandbox_version: "v0.4.6"
+    k8s_gvisor: true
+    k8s_agent_api_url: "http://localhost:8080"
+    skip_image_build: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
+
+
+gke_chromium_density:
+  flags:
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+    gce_subnet_region: "us-central1"
+
+    k8s_namespace: "agentic"
+    agent_sandbox_version: "v0.4.6"
+    k8s_gvisor: true
+    k8s_agent_api_url: "http://localhost:8080"
+    skip_image_build: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
+
+
+gke_payload:
+  flags:
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+    gce_subnet_region: "us-central1"
+
+    k8s_namespace: "agentic"
+    agent_sandbox_version: "v0.4.6"
+    k8s_gvisor: true
+    k8s_agent_api_url: "http://localhost:8080"
+    skip_image_build: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
+
+
+gke_qps:
+  flags:
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+    gce_subnet_region: "us-central1"
+
+    k8s_namespace: "agentic"
+    agent_sandbox_version: "v0.4.6"
+    k8s_gvisor: true
+    k8s_agent_api_url: "http://localhost:8080"
+    skip_image_build: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
+
+
+gke_snapshot:
+  flags:
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+    gce_subnet_region: "us-central1"
+
+    k8s_namespace: "agentic"
+    agent_sandbox_version: "v0.4.6"
+    k8s_gvisor: true
+    k8s_agent_api_url: "http://localhost:8080"
+    skip_image_build: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
+
+
+gke_warmpool:
+  flags:
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+    gce_subnet_region: "us-central1"
+
+    k8s_namespace: "agentic"
+    agent_sandbox_version: "v0.4.6"
+    k8s_gvisor: true
+    k8s_agent_api_url: "http://localhost:8080"
+    skip_image_build: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
+
+
+gke_deletion:
+  flags:
+    gke_additional_flags:
+      - "--enable-pod-snapshots"
+      - "--enable-dataplane-v2"
+      - "--enable-private-nodes"
+      - "--enable-ip-alias"
+      - "--master-ipv4-cidr=172.16.0.0/28"
+    gke_additional_nodepool_flags:
+      - "--max-pods-per-node=250"
+    container_cluster_version: "1.35.3-gke.1389000"
+    gke_enable_shielded_nodes: false
+    gce_subnet_region: "us-central1"
+
+    k8s_namespace: "agentic"
+    agent_sandbox_version: "v0.4.6"
+    k8s_gvisor: true
+    k8s_agent_api_url: "http://localhost:8080"
+    skip_image_build: false
+
+  container_cluster:
+    cloud: GCP
+    type: Kubernetes
+    vm_count: 1
+    vm_spec:
+      GCP:
+        machine_type: c4-standard-8
+        zone: us-central1-a
+        boot_disk_type: hyperdisk-balanced
+        boot_disk_size: 50
+    nodepools:
+      sandbox:
+        vm_count: 1
+        vm_spec:
+          GCP:
+            machine_type: c4-standard-8
+            zone: us-central1-a
+            boot_disk_type: hyperdisk-balanced
+            boot_disk_size: 100
+        sandbox_config:
+          type: gvisor
diff --git a/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf b/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf
deleted file mode 100644
index 99e6411577..0000000000
--- a/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/bin/bash
-#
-# Agentic Workload Benchmarking configuration file for GKE
-# Adapted from nginx DPv2 baseline for Python Sandbox & Chromium Simulation
-#
-# Override machine type and cluster suffix via environment variables:
-#   MACHINE_TYPE=c4d-standard-8 CLUSTER_SUFFIX=c4d bash setup_infrastructure_gke.sh
-#
-# Supported profiles:
-#   MACHINE_TYPE=c3-standard-192-metal  CLUSTER_SUFFIX=c3metal
-#   MACHINE_TYPE=c4-standard-8          CLUSTER_SUFFIX=c4       (default)
-#   MACHINE_TYPE=c4d-standard-8         CLUSTER_SUFFIX=c4d
-#   MACHINE_TYPE=c4a-standard-8         CLUSTER_SUFFIX=c4a      (ARM64)
-
-USER_NAME_PREFIX=${USER%%.*}
-
-# GCP Project (MUST be set before running any script)
-PROJECT_ID="your-project-id"
-REGION="us-central1"
-ZONE="us-central1-a"
-
-# Google/ADK aliases (derived from canonical names above)
-# These are used by envsubst for the K8s manifest and by the ADK agent.
-GOOGLE_CLOUD_PROJECT="${PROJECT_ID}"
-GOOGLE_CLOUD_LOCATION="${REGION}"
-
-# Network Configuration
-VPC_NAME="${USER_NAME_PREFIX}-agentic-vpc"
-SUBNET_NAME="${USER_NAME_PREFIX}-agentic-subnet"
-SUBNET_CIDR="10.134.20.0/24"
-LAPTOP_IP="$(curl -s ifconfig.me)/32"  # PUBLIC IP to access the target (dynamically detected)
-# Cloud Router and NAT Configuration
-ROUTER_NAME="${USER_NAME_PREFIX}-agentic-nat-router"
-NAT_NAME="${USER_NAME_PREFIX}-agentic-nat-config"
-
-# GKE Cluster Configuration
-CLUSTER_SUFFIX="${CLUSTER_SUFFIX:-c4}"
-CLUSTER_NAME="${USER_NAME_PREFIX}-agentic-${CLUSTER_SUFFIX}"
-GKE_VERSION="1.35.3-gke.1389000"
-USE_CONNECT_GATEWAY="${USE_CONNECT_GATEWAY:-true}"  # Use Connect Gateway for kubectl access
-                                                    # Set to "false" to use direct public endpoint
-
-# =========================================================================
-# Machine Type Configuration (overridable via MACHINE_TYPE env var)
-# =========================================================================
-MACHINE_TYPE="${MACHINE_TYPE:-c4-standard-8}"
-
-# Derive disk type from machine family:
-#   C3 → pd-balanced, C4/C4D/C4A → hyperdisk-balanced
-_MACHINE_FAMILY="${MACHINE_TYPE%%-*}"  # e.g. "c4" from "c4-standard-8"
-case "${_MACHINE_FAMILY}" in
-  c3)  _DISK_TYPE="pd-balanced" ;;
-  *)   _DISK_TYPE="hyperdisk-balanced" ;;
-esac
-
-# Derive target architecture from machine family:
-#   C4A → arm64, everything else → amd64
-case "${_MACHINE_FAMILY}" in
-  c4a) _TARGET_ARCH="arm64" ;;
-  *)   _TARGET_ARCH="amd64" ;;
-esac
-
-# Derive unique master CIDR per cluster (each private cluster needs its own /28):
-#   c4 → 172.16.0.0/28, c4d → 172.16.0.16/28, c4a → 172.16.0.32/28, c3metal → 172.16.0.48/28
-case "${CLUSTER_SUFFIX}" in
-  c4)      MASTER_IPV4_CIDR="172.16.0.0/28" ;;
-  c4d)     MASTER_IPV4_CIDR="172.16.0.16/28" ;;
-  c4a)     MASTER_IPV4_CIDR="172.16.0.32/28" ;;
-  c3metal) MASTER_IPV4_CIDR="172.16.0.48/28" ;;
-  *)       MASTER_IPV4_CIDR="172.16.0.64/28" ;;  # fallback for future clusters
-esac
-
-DEFAULT_POOL_MACHINE_TYPE="${MACHINE_TYPE}"
-DEFAULT_POOL_DISK_TYPE="${_DISK_TYPE}"
-DEFAULT_POOL_DISK_SIZE="50"                 # Disk size in GB
-DEFAULT_POOL_NODE_COUNT="1"                 # Number of nodes in the default pool
-
-# =========================================================================
-# Agentic Workload NodePools
-# =========================================================================
-
-# Sandbox NodePool (Python + Chromium workloads with gVisor)
-SANDBOX_NODE_POOL_NAME="agentic-sandbox-pool"
-SANDBOX_MACHINE_TYPE="${MACHINE_TYPE}"          # Same as default pool (overridable)
-SANDBOX_DISK_SIZE="100"
-SANDBOX_DISK_TYPE="${_DISK_TYPE}"               # Derived from machine family
-SANDBOX_NODE_COUNT="1"
-SANDBOX_MAX_PODS_PER_NODE="250"             # Raise from default 110 to avoid GKE pod limit as density ceiling
-SANDBOX_ENABLE_GVISOR="true"                # Enable GKE Sandbox (gVisor) on this pool
-
-AGENT_SANDBOX_VERSION="v0.4.6"
-
-# =========================================================================
-# Workload Configuration
-# =========================================================================
-AGENTIC_NAMESPACE="agentic"
-
-# Python Sandbox Workload
-PYTHON_IMAGE="python:3.11-slim"
-PYTHON_POD_NAME="python-sandbox"
-PYTHON_REPLICAS="1"                         # Start with 1; sweep for density tests
-PYTHON_CPU_REQUEST="1"
-PYTHON_CPU_LIMIT="2"
-PYTHON_MEMORY_REQUEST="1Gi"
-PYTHON_MEMORY_LIMIT="4Gi"
-
-# Chromium Browser Simulation Workload
-CHROMIUM_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/chrome-sandbox:${_TARGET_ARCH}"
-CHROMIUM_POD_NAME="chromium-sandbox"
-CHROMIUM_REPLICAS="1"                       # Start with 1; sweep for density tests
-
-# Mock LLM Coordinator
-MOCK_LLM_IMAGE="python:3.11-slim"
-MOCK_LLM_POD_NAME="mock-llm-coordinator"
-MOCK_LLM_PORT="8080"
-
-# =========================================================================
-# Benchmark Parameters
-# =========================================================================
-
-# Python Density Benchmark (UC-B)
-SAMPLE_COUNT="20"                                 # Samples per sandbox session
-SAMPLE_WARMUP="0"                                 # Warmup samples (excluded from stats)
-
-# Payload Transfer Benchmark (UC-D)
-PAYLOAD_SIZE_MB="1"                                # Default payload size in MB
-PAYLOAD_ITERATIONS="20"                            # Transfer iterations per session
-
-# Chromium Benchmark
-CHROMIUM_TASK_COUNT="10"                          # Number of browser tasks per run
-CHROMIUM_WARMUP_TASKS="2"
-
-# General
-BENCHMARK_DURATION="300"                          # Duration in seconds per test
-NOTE="agentic-V0-gVisor-DPv2-baseline"
-
-# =========================================================================
-# Logging
-# =========================================================================
-# Log directory — defaults to tmp/ inside the repo (gitignored).
-# Override by setting BASE_LOG_DIR before sourcing this file,
-# e.g. export BASE_LOG_DIR="$HOME/agentic-logs" to keep logs outside the repo.
-_REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
-BASE_LOG_DIR="${BASE_LOG_DIR:-${_REPO_ROOT}/tmp/agentic-logs}"
-WRAPPER_LOG_DIR="${BASE_LOG_DIR}/wrapper_logs"
-
-LOG_PATH="logs"
-LOG_LEVEL="info"
-
-# =========================================================================
-# ADK Agent Deployment
-# =========================================================================
-ADK_REPO_NAME="adk-repo"                     # Artifact Registry repository name
-ADK_IMAGE_NAME="adk-agent"                   # Container image name
-GOOGLE_GENAI_USE_VERTEXAI="true"
-ADK_IMAGE_PATH="${REGION}-docker.pkg.dev/${PROJECT_ID}/${ADK_REPO_NAME}/${ADK_IMAGE_NAME}:${_TARGET_ARCH}"
-ADK_K8S_SA="adk-agent-sa"                    # Kubernetes service account for the agent
-CLOUD_BUILD_SA="adk-cloud-build-sa"             # Service account for Cloud Build submissions
-
-# Sandbox Router & Warm Pool
-SANDBOX_ROUTER_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/sandbox-router:${_TARGET_ARCH}"
-WARMPOOL_REPLICAS="2"                         # Number of pre-warmed sandbox pods
-
-# =========================================================================
-# Pod Snapshot Configuration (UC-A: Cold Start & Snapshot Pressure Test)
-# =========================================================================
-ENABLE_POD_SNAPSHOTS="true"                    # Enable pod snapshots feature on cluster
-SNAPSHOTS_BUCKET_NAME="agent-sandbox-snapshots-${PROJECT_ID}"
-SNAPSHOT_KSA_NAME="pod-snapshot-sa"            # KSA for snapshot storage access
-SNAPSHOT_FOLDER="benchmark-snapshots"          # Managed folder inside the bucket
-SNAPSHOT_PRELOAD_MB="10"                       # Default memory preload for snapshot sizing
diff --git a/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml
deleted file mode 100644
index 765c7c4256..0000000000
--- a/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-# Native PKB Provision Config for Agentic Benchmarks
-# Used with --gke_provision_mode=native
-#
-# Prerequisites (run once before PKB):
-#   python tools/agentic-benchmark/scripts/prerequisite_setup.py \
-#       --project_id=<project> --machine_type=<machine>
-#
-# IMPORTANT: Do NOT pass --gce_subnet_name on the command line.
-# PKB incorrectly resolves it as the --network value. Instead, pass the
-# subnet via --gke_additional_flags on the command line.
-#
-# Usage (provision):
-#   python pkb.py --benchmarks=gke_python_density \
-#       --gke_provision_mode=native \
-#       --benchmark_config_file=k8s_agents/config/native_provision_config.yaml \
-#       --gce_network_name=<user>-agentic-vpc \
-#       --gce_subnet_region=us-central1 \
-#       --zone=us-central1-a \
-#       --project=<project> \
-#       --owner=<owner> \
-#       --container_cluster_version=1.35.3-gke.1389000 \
-#       --gke_additional_flags="--subnetwork=<user>-agentic-subnet,--workload-pool=<project>.svc.id.goog"
-#
-# For sweeps (cluster pre-exists, PKB skips provision/teardown):
-#   The sweep bridge injects --run_stage=run,cleanup automatically.
-
-gke_python_density:
-  flags:
-    # Force gcloud beta for preview features (pod snapshots)
-    gke_use_beta: true
-
-    # Cluster-level additional flags (appended to gcloud [beta] container clusters create)
-    # NOTE: --subnetwork and --workload-pool are user/project-specific.
-    # Pass them on the command line via --gke_additional_flags=... (comma-separated).
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-
-    # Node-pool-level additional flags (appended to gcloud container node-pools create)
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-
-    # Standard PKB GKE flags
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-
-  container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2
new file mode 100644
index 0000000000..068b50be11
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2
@@ -0,0 +1,118 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: adk-agent-sa
+  namespace: {{ ns }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: adk-agent-sandbox-role
+rules:
+  - apiGroups: ["agents.x-k8s.io"]
+    resources: ["sandboxes"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["agents.x-k8s.io"]
+    resources: ["sandboxwarmpool", "sandboxwarmpools"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["extensions.agents.x-k8s.io"]
+    resources: ["sandboxclaims"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: [""]
+    resources: ["pods", "pods/log", "pods/exec", "services", "configmaps"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: [""]
+    resources: ["pods/portforward"]
+    verbs: ["create"]
+  - apiGroups: ["metrics.k8s.io"]
+    resources: ["pods"]
+    verbs: ["get", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: adk-agent-sandbox-binding
+  namespace: {{ ns }}
+subjects:
+  - kind: ServiceAccount
+    name: adk-agent-sa
+    namespace: {{ ns }}
+roleRef:
+  kind: ClusterRole
+  name: adk-agent-sandbox-role
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: adk-agent
+  namespace: {{ ns }}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: adk-agent
+  template:
+    metadata:
+      labels:
+        app: adk-agent
+    spec:
+      serviceAccountName: adk-agent-sa
+      containers:
+      - name: adk-agent
+        imagePullPolicy: Always
+        image: {{ adk_image }}
+        resources:
+          limits:
+            memory: "16384Mi"
+            cpu: "6000m"
+          requests:
+            memory: "512Mi"
+            cpu: "1000m"
+        ports:
+        - containerPort: 8080
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 15
+          periodSeconds: 30
+          timeoutSeconds: 10
+          failureThreshold: 6
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 5
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+        env:
+          - name: PORT
+            value: "8080"
+          - name: GOOGLE_CLOUD_PROJECT
+            value: "{{ project }}"
+          - name: GOOGLE_CLOUD_LOCATION
+            value: "{{ region }}"
+          - name: GOOGLE_GENAI_USE_VERTEXAI
+            value: "true"
+          - name: CLUSTER_NAME
+            value: "{{ cluster }}"
+          - name: AGENTIC_NAMESPACE
+            value: "{{ ns }}"
+          - name: SANDBOX_ROUTER_URL
+            value: "http://sandbox-router-svc.{{ ns }}.svc.cluster.local:8080"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: adk-agent
+  namespace: {{ ns }}
+spec:
+  type: ClusterIP
+  ports:
+    - port: 80
+      targetPort: 8080
+  selector:
+    app: adk-agent
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2
new file mode 100644
index 0000000000..d76f851e95
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2
@@ -0,0 +1,56 @@
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: psi-reader
+  namespace: {{ ns }}
+  labels:
+    app: psi-reader
+spec:
+  selector:
+    matchLabels:
+      app: psi-reader
+  template:
+    metadata:
+      labels:
+        app: psi-reader
+    spec:
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+      - key: "sandbox.gke.io/runtime"
+        operator: "Equal"
+        value: "gvisor"
+        effect: "NoSchedule"
+      - key: "dedicated"
+        operator: "Equal"
+        value: "agentic-sandbox"
+        effect: "NoSchedule"
+      hostPID: true
+      containers:
+      - name: reader
+        image: busybox:1.36
+        command: ["sleep", "infinity"]
+        securityContext:
+          privileged: true
+        volumeMounts:
+        - name: cgroup
+          mountPath: /host/sys/fs/cgroup
+          readOnly: true
+        - name: proc
+          mountPath: /host/proc
+          readOnly: true
+        resources:
+          requests:
+            cpu: "10m"
+            memory: "16Mi"
+          limits:
+            cpu: "50m"
+            memory: "32Mi"
+      volumes:
+      - name: cgroup
+        hostPath:
+          path: /sys/fs/cgroup
+      - name: proc
+        hostPath:
+          path: /proc
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2
new file mode 100644
index 0000000000..0d0541cfe7
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2
@@ -0,0 +1,69 @@
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: sandbox-router-svc
+  namespace: {{ ns }}
+spec:
+  type: ClusterIP
+  selector:
+    app: sandbox-router
+  ports:
+  - name: http
+    protocol: TCP
+    port: 8080
+    targetPort: 8080
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: sandbox-router-deployment
+  namespace: {{ ns }}
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: sandbox-router
+  template:
+    metadata:
+      labels:
+        app: sandbox-router
+    spec:
+      serviceAccountName: adk-agent-sa
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: topology.kubernetes.io/zone
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              app: sandbox-router
+      containers:
+      - name: router
+        image: {{ router_image }}
+        ports:
+        - containerPort: 8080
+        env:
+        - name: ALLOW_UNAUTHENTICATED_ROUTER
+          value: "true"
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 5
+          periodSeconds: 5
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8080
+          initialDelaySeconds: 10
+          periodSeconds: 10
+        resources:
+          requests:
+            cpu: "250m"
+            memory: "512Mi"
+          limits:
+            cpu: "1000m"
+            memory: "1Gi"
+      securityContext:
+        runAsUser: 1000
+        runAsGroup: 1000
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2
new file mode 100644
index 0000000000..e9af43332d
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2
@@ -0,0 +1,103 @@
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: python-sandbox-template
+  namespace: {{ ns }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        sandbox: python-sandbox-example
+    spec:
+      runtimeClassName: gvisor
+      containers:
+      - name: python-runtime
+        image: {{ python_image }}
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+      - key: "sandbox.gke.io/runtime"
+        operator: "Equal"
+        value: "gvisor"
+        effect: "NoSchedule"
+      - key: "dedicated"
+        operator: "Equal"
+        value: "agentic-sandbox"
+        effect: "NoSchedule"
+      restartPolicy: "OnFailure"
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxWarmPool
+metadata:
+  name: python-sandbox-warmpool
+  namespace: {{ ns }}
+spec:
+  replicas: {{ warmpool_replicas }}
+  sandboxTemplateRef:
+    name: python-sandbox-template
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: chromium-sandbox-template
+  namespace: {{ ns }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        sandbox: chromium-sandbox-example
+    spec:
+      runtimeClassName: gvisor
+      containers:
+      - name: chromium-runtime
+        image: {{ chromium_image }}
+        command: ["/bin/sh", "-c"]
+        args:
+          - |
+            socat TCP-LISTEN:9223,fork,reuseaddr TCP:127.0.0.1:9222 &
+            exec chromium --headless --no-sandbox --disable-gpu --disable-dev-shm-usage --remote-debugging-port=9222 --no-first-run --disable-field-trial-config --user-data-dir=/tmp/chrome-data about:blank
+        ports:
+          - containerPort: 9223
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+      - key: "sandbox.gke.io/runtime"
+        operator: "Equal"
+        value: "gvisor"
+        effect: "NoSchedule"
+      - key: "dedicated"
+        operator: "Equal"
+        value: "agentic-sandbox"
+        effect: "NoSchedule"
+      restartPolicy: "OnFailure"
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxWarmPool
+metadata:
+  name: chromium-sandbox-warmpool
+  namespace: {{ ns }}
+spec:
+  replicas: {{ chromium_replicas }}
+  sandboxTemplateRef:
+    name: chromium-sandbox-template
+---
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: allow-orchestrator-to-chromium
+  namespace: {{ ns }}
+spec:
+  podSelector:
+    matchLabels:
+      sandbox: chromium-sandbox-example
+  policyTypes:
+  - Ingress
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          app: adk-agent
+    ports:
+    - protocol: TCP
+      port: 9223
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2
new file mode 100644
index 0000000000..afc4e0ee4c
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2
@@ -0,0 +1,24 @@
+---
+apiVersion: podsnapshot.gke.io/v1
+kind: PodSnapshotStorageConfig
+metadata:
+  name: benchmark-pssc-gcs
+spec:
+  snapshotStorageConfig:
+    gcs:
+      bucket: "{{ bucket_name }}"
+      path: "{{ snapshot_folder }}"
+---
+apiVersion: podsnapshot.gke.io/v1
+kind: PodSnapshotPolicy
+metadata:
+  name: benchmark-psp
+  namespace: {{ ns }}
+spec:
+  storageConfigName: benchmark-pssc-gcs
+  selector:
+    matchLabels:
+      app: snapshot-benchmark-workload
+  triggerConfig:
+    type: manual
+    postCheckpoint: resume
diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2
new file mode 100644
index 0000000000..11850eb444
--- /dev/null
+++ b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2
@@ -0,0 +1,46 @@
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: {{ template_name }}
+  namespace: {{ namespace }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        app: snapshot-benchmark-workload
+    spec:
+      serviceAccountName: {{ ksa_name }}
+      runtimeClassName: gvisor
+      containers:
+      - name: preloader
+        image: python:3.11-slim
+        command: ["python3", "-c"]
+        args:
+          - |
+            import time, os
+            preload_mb = int(os.environ.get("PRELOAD_MB", "10"))
+            print(f"Preloading {preload_mb} MB of memory...", flush=True)
+            _ballast = bytearray(preload_mb * 1024 * 1024)
+            print(f"Preload complete. Starting counter.", flush=True)
+            i = 0
+            while True:
+                print(f"Count: {i}", flush=True)
+                i += 1
+                time.sleep(1)
+        env:
+          - name: PRELOAD_MB
+            value: "{{ preload_mb }}"
+        resources:
+          requests:
+            cpu: "250m"
+            memory: "{{ memory_mi }}Mi"
+            ephemeral-storage: "512Mi"
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+        - key: "sandbox.gke.io/runtime"
+          operator: "Equal"
+          value: "gvisor"
+          effect: "NoSchedule"
+      restartPolicy: "OnFailure"
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
index 0828d0a5ff..3ec5f62d0b 100644
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
@@ -1,11 +1,9 @@
 # ==========================================================================
 # ADK Agent — Generated Environment File Template
 # ==========================================================================
-# This file is rendered into generated.env by deploy_gke.sh using envsubst.
-# The single source of truth is: tools/agentic-benchmark/config/gke-benchmark.conf
+# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags).
 #
-# For local dev, run deploy_gke.sh to generate generated.env,
-# or manually create generated.env with your values.
+# For local dev, manually create generated.env with your values.
 # ==========================================================================
 
 # --- Required: GKE executor config ---
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
index 46094d244f..2aef3c153c 100644
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
@@ -1,3 +1,39 @@
+"""GKE Performance Agent -- ADK agent definition.
+
+This file runs INSIDE the GKE cluster as part of the adk-agent Deployment
+(see gke_deploy_utils.py for the K8s manifest). It is NOT run from the
+machine executing PKB. The ADK agent pod serves a FastAPI app (main.py)
+that PKB calls via HTTP through a kubectl port-forward tunnel.
+
+Execution flow:
+  PKB (your laptop/CI) -> kubectl port-forward -> adk-agent pod -> this file
+  -> GkeCodeExecutor -> SandboxClient -> gVisor sandbox pod
+"""
+
+"""GKE Performance Agent â ADK agent definition for sandbox benchmarking.
+
+EXECUTION CONTEXT:
+    This file runs INSIDE the GKE cluster, NOT on the PKB orchestrator machine.
+    It is packaged into a container image (see ../Dockerfile) and deployed as
+    the 'adk-agent' Deployment in the benchmark namespace.
+
+    Execution flow:
+      PKB machine                          GKE Cluster
+      ----------                           -----------
+      benchmark.Run()
+        -> CallAgentApi("/benchmark/...")   -> main.py (FastAPI)
+                                              -> Runner(agent=root_agent)
+                                                -> MockLlm yields code
+                                                -> V3GkeCodeExecutor._execute_in_sandbox()
+                                                  -> SandboxClient.create_sandbox()
+                                                  -> sandbox.files.write("script.py", code)
+                                                  -> sandbox.commands.run("python3 script.py")
+                                                  -> SandboxClient.delete_sandbox()
+
+    The PKB machine communicates with this agent via HTTP (port-forwarded
+    through kubectl or via a LoadBalancer/ClusterIP service).
+"""
+
 from google.adk.agents import LlmAgent
 from google.adk.code_executors import GkeCodeExecutor
 from google.adk.code_executors.code_execution_utils import CodeExecutionResult
@@ -19,7 +55,7 @@
 basedir = os.path.abspath(os.path.dirname(__file__))
 agent_dir = os.path.join(basedir, "..")
 
-# Load generated.env (auto-generated from gke-benchmark.conf by deploy_gke.sh).
+# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags).
 # In GKE, K8s manifest env vars take precedence.
 load_dotenv(os.path.join(agent_dir, "generated.env"))
 
@@ -67,9 +103,9 @@ def _build_benchmark_code() -> str:
     """Build the benchmark script with current env values injected.
 
     Selects the script based on BENCHMARK_MODE env var:
-      - 'density'  → benchmark_density.py  (Use Case B)
-      - 'payload'  → benchmark_payload.py  (Use Case D)
-      - 'qps'      → benchmark_qps.py      (Use Case F)
+      - 'density'  → benchmark_density.py
+      - 'payload'  → benchmark_payload.py
+      - 'qps'      → benchmark_qps.py
     """
     mode = os.getenv("BENCHMARK_MODE", "density")
 
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
index fa13f11fd7..bcdb090188 100644
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
+++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
@@ -90,7 +90,7 @@
 
 basedir = os.path.abspath(os.path.dirname(__file__))
 
-# Load generated.env (auto-generated from gke-benchmark.conf by build_images_gke.sh).
+# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags).
 # In GKE, K8s manifest env vars take precedence.
 load_dotenv(os.path.join(basedir, "generated.env"))
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
index ee4603a4b3..02d2d40a81 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
@@ -14,51 +14,46 @@
 
 from absl import flags
 from perfkitbenchmarker import sample
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.resources.container_service import kubectl
 
 FLAGS = flags.FLAGS
 
+# Module-level benchmark_spec reference for metadata derivation.
+# Set by each benchmark's Run() via set_benchmark_spec().
+_current_benchmark_spec = None
+
+
 # ---------------------------------------------------------------------------
 # Shared flags (registered once; importable by benchmark modules)
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_string(
-    "gke_namespace",
+    "k8s_namespace",
     "agentic",
     "Kubernetes namespace where the agentic workloads are deployed.",
 )
 
-flags.DEFINE_string(
-    "gke_machine_type",
-    "",
-    "Machine type of the sandbox node pool. Recorded in sample metadata.",
-)
-
-flags.DEFINE_string(
-    "gke_kubeconfig",
-    "",
-    "Path to a kubeconfig file. If empty, the system default is used.",
-)
-
 flags.DEFINE_bool(
-    "gke_gvisor",
+    "k8s_gvisor",
     True,
     "Whether the sandbox node pool uses gVisor. Recorded in sample metadata.",
 )
 
 flags.DEFINE_string(
-    "gke_note",
+    "gke_benchmark_note",
     "",
     "Arbitrary note string attached to every sample for tagging runs.",
 )
 
 flags.DEFINE_string(
-    "gke_api_url",
+    "k8s_agent_api_url",
     "http://localhost:8080",
     "Base URL of the ADK Agent API.",
 )
 
 flags.DEFINE_integer(
-    "gke_api_timeout",
+    "k8s_agent_api_timeout",
     600,
     "HTTP timeout in seconds for agent API benchmark calls.",
 )
@@ -71,14 +66,14 @@
 
 def GetAgentApiUrl():
     """Return the base URL of the ADK agent API service."""
-    return FLAGS.gke_api_url.rstrip("/")
+    return FLAGS.k8s_agent_api_url.rstrip("/")
 
 
 def CheckAgentHealthz(api_url=None, required=True):
     """Verify the agent API is reachable via /healthz.
 
     Args:
-        api_url: Base URL to check. Defaults to FLAGS.gke_api_url.
+        api_url: Base URL to check. Defaults to FLAGS.k8s_agent_api_url.
         required: If True (default), raise on failure. If False, log warning.
     """
     if api_url is None:
@@ -102,7 +97,7 @@ def CheckAgentHealthz(api_url=None, required=True):
 def CallAgentApi(endpoint, payload, timeout=None):
     """POST JSON to an agent API endpoint and return the parsed response."""
     if timeout is None:
-        timeout = FLAGS.gke_api_timeout
+        timeout = FLAGS.k8s_agent_api_timeout
     base_url = GetAgentApiUrl()
     url = f"{base_url}{endpoint}"
     data = json.dumps(payload).encode("utf-8")
@@ -131,23 +126,17 @@ def CallAgentApi(endpoint, payload, timeout=None):
 # ---------------------------------------------------------------------------
 
 
-def _KubectlCmd(args):
-    """Build a kubectl command list, optionally injecting --kubeconfig."""
-    cmd = ["kubectl"]
-    if FLAGS.gke_kubeconfig:
-        cmd += ["--kubeconfig", FLAGS.gke_kubeconfig]
-    return cmd + list(args)
-
-
 def RunKubectl(args, timeout=120, raise_on_failure=True):
-    """Run a kubectl command and return (stdout, stderr, retcode)."""
-    cmd = _KubectlCmd(args)
-    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
-    if raise_on_failure and proc.returncode != 0:
-        raise RuntimeError(
-            f"kubectl failed (rc={proc.returncode}): {proc.stderr}"
-        )
-    return proc.stdout, proc.stderr, proc.returncode
+    """Run a kubectl command and return (stdout, stderr, retcode).
+
+    Delegates to PKB's native kubectl module which handles kubeconfig
+    and retries for transient connection errors automatically.
+    """
+    return kubectl.RunKubectlCommand(
+        list(args),
+        timeout=timeout,
+        raise_on_failure=raise_on_failure,
+    )
 
 
 def CountPods(namespace, label, phase=None):
@@ -190,6 +179,13 @@ def DrainWarmPool(namespace, warmpool_name, label, timeout=120):
         "patch", "sandboxwarmpool", warmpool_name,
         "-n", namespace, "--type=merge", f"-p={patch_json}",
     ], raise_on_failure=False)
+
+    # Delete lingering SandboxClaims that may prevent pod termination
+    RunKubectl([
+        "delete", "sandboxclaims", "--all",
+        "-n", namespace, "--ignore-not-found=true",
+    ], timeout=60, raise_on_failure=False)
+
     deadline = time.time() + timeout
     while time.time() < deadline:
         remaining = CountPods(namespace, label)
@@ -197,12 +193,20 @@ def DrainWarmPool(namespace, warmpool_name, label, timeout=120):
             logging.info("Warm pool drained successfully")
             return True
         logging.info("Draining... %d pods remaining", remaining)
-        time.sleep(3)
+        time.sleep(2)
     logging.warning("Drain timed out, %d pods still present",
                     CountPods(namespace, label))
     return False
 
 
+def set_benchmark_spec(benchmark_spec):
+    """Store benchmark_spec for metadata derivation (called by Run())."""
+    global _current_benchmark_spec
+    _current_benchmark_spec = benchmark_spec
+
+
+
+
 # ---------------------------------------------------------------------------
 # Sample construction
 # ---------------------------------------------------------------------------
@@ -212,12 +216,25 @@ def BuildMetadata(namespace, extra=None):
     """Construct the common metadata dict for all samples."""
     metadata = {
         "namespace": namespace,
-        "gvisor": FLAGS.gke_gvisor,
+        "gvisor": FLAGS.k8s_gvisor,
     }
-    if FLAGS.gke_machine_type:
-        metadata["machine_type"] = FLAGS.gke_machine_type
-    if FLAGS.gke_note:
-        metadata["note"] = FLAGS.gke_note
+    # Derive machine_type from benchmark_spec (set via set_benchmark_spec)
+    machine_type = None
+    if _current_benchmark_spec:
+        cluster = getattr(_current_benchmark_spec, 'container_cluster', None)
+        if cluster:
+            # Prefer sandbox nodepool machine_type over default pool
+            nodepools = getattr(cluster, 'nodepools', None)
+            if nodepools and isinstance(nodepools, dict):
+                sandbox_pool = nodepools.get('sandbox')
+                if sandbox_pool and hasattr(sandbox_pool, 'vm_spec'):
+                    machine_type = getattr(sandbox_pool.vm_spec, 'machine_type', None)
+            if not machine_type and hasattr(cluster, 'vm_spec'):
+                machine_type = getattr(cluster.vm_spec, 'machine_type', None)
+    if machine_type:
+        metadata["machine_type"] = machine_type
+    if FLAGS.gke_benchmark_note:
+        metadata["note"] = FLAGS.gke_benchmark_note
     if extra:
         metadata.update(extra)
     return metadata
@@ -238,37 +255,37 @@ def MakeSample(metric, value, unit, namespace, extra_metadata=None):
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_bool(
-    "gke_auto_portforward",
+    "k8s_auto_portforward",
     True,
     "Automatically manage kubectl port-forward to the agent service.",
 )
 
 flags.DEFINE_integer(
-    "gke_portforward_local_port",
+    "k8s_portforward_local_port",
     8080,
     "Local port for kubectl port-forward.",
 )
 
 flags.DEFINE_integer(
-    "gke_portforward_remote_port",
+    "k8s_portforward_remote_port",
     80,
     "Remote service port for kubectl port-forward.",
 )
 
 flags.DEFINE_string(
-    "gke_portforward_service",
+    "k8s_portforward_service",
     "svc/adk-agent",
     "Kubernetes service to port-forward to.",
 )
 
 flags.DEFINE_float(
-    "gke_portforward_reconnect_delay",
+    "k8s_portforward_reconnect_delay",
     1.0,
     "Seconds to wait before reconnecting after port-forward drops.",
 )
 
 flags.DEFINE_float(
-    "gke_portforward_health_timeout",
+    "k8s_portforward_health_timeout",
     30.0,
     "Seconds to wait for agent health check after starting port-forward.",
 )
@@ -339,15 +356,15 @@ def stop(self):
 
     def _loop(self):
         """Background reconnect loop."""
-        ns = FLAGS.gke_namespace
-        svc = FLAGS.gke_portforward_service
-        local_port = FLAGS.gke_portforward_local_port
-        remote_port = FLAGS.gke_portforward_remote_port
-        delay = FLAGS.gke_portforward_reconnect_delay
+        ns = FLAGS.k8s_namespace
+        svc = FLAGS.k8s_portforward_service
+        local_port = FLAGS.k8s_portforward_local_port
+        remote_port = FLAGS.k8s_portforward_remote_port
+        delay = FLAGS.k8s_portforward_reconnect_delay
 
         cmd = ["kubectl"]
-        if FLAGS.gke_kubeconfig:
-            cmd += ["--kubeconfig", FLAGS.gke_kubeconfig]
+        if FLAGS.kubeconfig:
+            cmd += ["--kubeconfig", FLAGS.kubeconfig]
         cmd += [
             "port-forward", svc,
             "-n", ns,
@@ -425,7 +442,7 @@ def _kill_orphan(self):
         except (OSError, ValueError):
             self._cleanup_pid_file()
 
-        local_port = FLAGS.gke_portforward_local_port
+        local_port = FLAGS.k8s_portforward_local_port
         try:
             result = subprocess.run(
                 ["lsof", "-ti", f":{local_port}"],
@@ -456,14 +473,14 @@ def EnsurePortForward():
     Blocks until the agent health check passes or timeout is reached.
     Safe to call multiple times - only starts one background loop.
     """
-    if not FLAGS.gke_auto_portforward:
-        logging.info("Auto port-forward disabled (--gke_auto_portforward=false)")
+    if not FLAGS.k8s_auto_portforward:
+        logging.info("Auto port-forward disabled (--k8s_auto_portforward=false)")
         return
 
     _port_forward_manager.start()
 
     import time as _time
-    timeout = FLAGS.gke_portforward_health_timeout
+    timeout = FLAGS.k8s_portforward_health_timeout
     deadline = _time.time() + timeout
     api_url = GetAgentApiUrl()
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
index 0da929cbbd..24d55350b5 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
@@ -1,4 +1,4 @@
-"""PKB Benchmark: GKE Agent Chromium Density Saturation (Use Case C).
+"""PKB Benchmark: GKE Agent Chromium Density Saturation .
 
 Atomic single-point measurement of Chromium browser sandbox density on a
 pre-provisioned GKE cluster with gVisor isolation. Measures interaction
@@ -11,11 +11,11 @@
 
 Usage:
   python pkb.py --benchmarks=gke_chromium_density \\
-                --gke_chromium_density=4 \\
+                --gke_chromium_density_concurrent_sessions=4 \\
                 --gke_chromium_density_task_count=10 \\
                 --gke_chromium_density_warmup_tasks=5 \\
-                --gke_namespace=agentic \\
-                --gke_api_url=http://localhost:8080
+                --k8s_namespace=agentic \\
+                --k8s_agent_api_url=http://localhost:8080
 
 Samples emitted (per run):
   - gke_chromium_density_interaction_mean      (ms)
@@ -48,7 +48,6 @@
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
 )
-from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
 
 FLAGS = flags.FLAGS
 
@@ -68,7 +67,7 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_integer(
-    "gke_chromium_density",
+    "gke_chromium_density_concurrent_sessions",
     1,
     "Number of concurrent Chromium browser sessions to run.",
 )
@@ -109,11 +108,6 @@
 # ---------------------------------------------------------------------------
 
 
-def Provision(benchmark_spec):
-    """Provision GKE cluster and all dependencies."""
-    gke_provision_utils.Provision()
-
-
 def GetConfig(user_config):
     """Load and return benchmark config.
 
@@ -125,7 +119,7 @@ def GetConfig(user_config):
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
     logging.info("=== Prepare: deploying workloads ===")
-    deploy_utils.DeployWorkloads()
+    deploy_utils.DeployWorkloads(benchmark_spec)
     utils.CheckAgentHealthz(required=False)
     utils.EnsurePortForward()
     logging.info("Prepare complete.")
@@ -137,8 +131,10 @@ def Run(benchmark_spec):
     Returns:
       List of sample.Sample objects.
     """
-    ns = FLAGS.gke_namespace
-    density = FLAGS.gke_chromium_density
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    density = FLAGS.gke_chromium_density_concurrent_sessions
 
     logging.info("=== Run: chromium_density=%d ===", density)
 
@@ -227,7 +223,7 @@ def Run(benchmark_spec):
 
 def Cleanup(benchmark_spec):
     """Clean up after measurement. Delete claims and drain warm pool."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     logging.info("Cleanup: deleting SandboxClaims and draining warm pool.")
 
     # Delete any lingering SandboxClaims to release claimed pods
@@ -255,18 +251,25 @@ def Cleanup(benchmark_spec):
     logging.info("Cleanup complete (cluster persists).")
 
 
-def Teardown(benchmark_spec):
-    """Teardown GKE cluster and all dependencies."""
-    gke_provision_utils.Teardown()
-
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
 
 def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
-    """Emit a sample if the key exists in the aggregate dict."""
+    """Emit a sample if the key exists in the aggregate dict.
+
+    Args:
+        samples: List to append the new sample.Sample to.
+        agg: Aggregate metrics dict returned by the agent API response.
+        agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms").
+        metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric
+            name (e.g. "orchestrator_cel_mean").
+        unit: Unit string for the sample (e.g. "ms", "MB", "seconds").
+        namespace: Kubernetes namespace (included in sample metadata).
+        extra: Dict of additional metadata key-value pairs attached to
+            every sample (density, session counts, wall time, etc.).
+    """
     value = agg.get(agg_key)
     if value is not None:
         samples.append(
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
index cd12169fcd..92b360919d 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
@@ -1,4 +1,4 @@
-"""PKB Benchmark: GKE Agent Deletion & Cleanup (Use Case G).
+"""PKB Benchmark: GKE Agent Deletion & Cleanup .
 
 Atomic single-point measurement of bulk deletion efficiency and IP
 reclamation on a pre-provisioned GKE cluster with gVisor isolation.
@@ -18,7 +18,7 @@
                 --gke_deletion_poll_interval_s=1.0 \\
                 --gke_deletion_provision_timeout_s=120.0 \\
                 --gke_deletion_drain_timeout_s=300.0 \\
-                --gke_namespace=agentic \\
+                --k8s_namespace=agentic \\
                 --gke_machine_type=c4-standard-8
 
 Samples emitted (per run):
@@ -48,7 +48,6 @@
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
 )
-from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
 
 FLAGS = flags.FLAGS
 
@@ -106,11 +105,6 @@
 # ---------------------------------------------------------------------------
 
 
-def Provision(benchmark_spec):
-    """Provision GKE cluster and all dependencies."""
-    gke_provision_utils.Provision()
-
-
 def GetConfig(user_config):
     """Load and return benchmark config.
 
@@ -122,7 +116,7 @@ def GetConfig(user_config):
 def Prepare(benchmark_spec):
     """Deploy workloads onto the cluster."""
     logging.info("=== Prepare: deploying workloads ===")
-    deploy_utils.DeployWorkloads()
+    deploy_utils.DeployWorkloads(benchmark_spec)
     utils.EnsurePortForward()
     logging.info("Prepare complete.")
 
@@ -133,7 +127,9 @@ def Run(benchmark_spec):
     Returns:
       List of sample.Sample objects.
     """
-    ns = FLAGS.gke_namespace
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
     batch_size = FLAGS.gke_deletion_batch_size
     warmpool_name = FLAGS.gke_deletion_warmpool_name
     label = FLAGS.gke_deletion_pod_label
@@ -144,7 +140,7 @@ def Run(benchmark_spec):
     logging.info("=== Run: batch_size=%d ===", batch_size)
 
     # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility)
-    _DrainPool(ns, warmpool_name, label, drain_timeout)
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(drain_timeout))
     time.sleep(2)
 
     t_wall_start = time.time()
@@ -395,21 +391,16 @@ def Run(benchmark_spec):
 
 def Cleanup(benchmark_spec):
     """Best-effort drain of warm pool after measurement."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     warmpool_name = FLAGS.gke_deletion_warmpool_name
     label = FLAGS.gke_deletion_pod_label
 
     logging.info("Cleanup: draining warm pool to 0.")
-    _DrainPool(ns, warmpool_name, label, FLAGS.gke_deletion_drain_timeout_s)
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_deletion_drain_timeout_s))
     utils.StopPortForward()
     logging.info("Cleanup complete.")
 
 
-def Teardown(benchmark_spec):
-    """Teardown GKE cluster and all dependencies."""
-    gke_provision_utils.Teardown()
-
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -432,35 +423,6 @@ def _PatchReplicas(namespace, warmpool_name, replicas):
     )
 
 
-def _DrainPool(namespace, warmpool_name, label, timeout_s):
-    """Scale pool to 0 and wait for all pods to terminate."""
-    _PatchReplicas(namespace, warmpool_name, 0)
-
-    # Delete any lingering SandboxClaims
-    utils.RunKubectl(
-        [
-            "delete",
-            "sandboxclaims",
-            "--all",
-            "-n",
-            namespace,
-            "--ignore-not-found=true",
-        ],
-        timeout=60,
-        raise_on_failure=False,
-    )
-
-    t0 = time.time()
-    while time.time() - t0 < timeout_s:
-        remaining = utils.CountPods(namespace, label)
-        if remaining == 0:
-            logging.info("Pool drained in %.1fs", time.time() - t0)
-            return
-        time.sleep(2)
-
-    logging.warning("Drain timed out after %.0fs", timeout_s)
-
-
 def _GetPodNames(namespace, label):
     """Return list of pod names matching the label selector."""
     stdout, _, rc = utils.RunKubectl(
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
index ff35f2e92e..9ff1684951 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
@@ -7,13 +7,14 @@
 All functions are idempotent -- safe to call repeatedly without side effects.
 """
 
-import json
 import logging
 import os
-import subprocess
-import time
 
 from absl import flags
+from jinja2 import Template
+from perfkitbenchmarker import data
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.resources.container_service import kubectl
 
 FLAGS = flags.FLAGS
 
@@ -22,220 +23,197 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_string(
-    "gke_sandbox_version",
+    "agent_sandbox_version",
     "v0.4.6",
     "Agent Sandbox controller version (GitHub release tag).",
 )
 
 flags.DEFINE_string(
-    "gke_sandbox_router_image",
+    "agent_sandbox_router_image",
     "",
     "Sandbox router container image. If empty, router deployment is skipped.",
 )
 
 flags.DEFINE_string(
-    "gke_adk_image",
+    "k8s_agent_image",
     "",
     "ADK agent container image. If empty, agent deployment is skipped.",
 )
 
 flags.DEFINE_string(
-    "gke_chromium_image",
+    "k8s_chromium_image",
     "",
     "Chromium sandbox container image. If empty, uses placeholder.",
 )
 
 flags.DEFINE_integer(
-    "gke_warmpool_replicas",
+    "agent_sandbox_warmpool_replicas",
     2,
     "Default warm pool replica count for SandboxWarmPool resources.",
 )
 
 flags.DEFINE_integer(
-    "gke_chromium_replicas",
+    "agent_sandbox_chromium_replicas",
     1,
     "Default Chromium warm pool replica count.",
 )
 
 flags.DEFINE_string(
-    "gke_python_image",
+    "k8s_python_image",
     "registry.k8s.io/agent-sandbox/python-runtime-sandbox:v0.1.0",
     "Python runtime sandbox container image.",
 )
 
 flags.DEFINE_integer(
-    "gke_deploy_timeout",
+    "k8s_deploy_timeout",
     120,
     "Timeout in seconds for workload deployment rollout.",
 )
 
-flags.DEFINE_string(
-    "gke_cluster_name",
-    "",
-    "GKE cluster name. Used in ADK agent env vars for Workload Identity.",
+flags.DEFINE_bool(
+    "skip_image_build",
+    False,
+    "Skip container image builds during Prepare.",
 )
 
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
+
+# Module-level derived images (set during DeployWorkloads)
+_derived_images = {}
 
 # ---------------------------------------------------------------------------
-# Image path auto-derivation and mode-aware scheduling
-# (Insert this block BEFORE the "def DeployWorkloads():" function)
+# Template loading
 # ---------------------------------------------------------------------------
 
+_MANIFESTS_DIR = "k8s_agents/manifests"
 
-def _DeriveImagePaths():
-    """Auto-derive container image paths from project/region/machine_type.
-
-    When --gke_adk_image or --gke_sandbox_router_image are empty,
-    derives them from --gke_project_id, --gke_region, and
-    --gke_sandbox_machine_type using the same convention as
-    gke_image_build_utils.py and the bash build scripts.
-    """
-    project = getattr(FLAGS, "gke_project_id", "") or ""
-    region = getattr(FLAGS, "gke_region", "") or ""
-    machine_type = getattr(FLAGS, "gke_sandbox_machine_type", "") or ""
 
-    if not project or not region:
-        logging.info("Cannot auto-derive images: project=%s region=%s", project, region)
-        return
+def _LoadTemplate(template_name):
+    """Load a Jinja2 template from the data directory."""
+    template_path = os.path.join(
+        data.ResourcePath(_MANIFESTS_DIR), template_name
+    )
+    with open(template_path, "r") as f:
+        return Template(f.read())
 
-    machine_family = machine_type.split("-")[0] if machine_type else "c4"
-    target_arch = "arm64" if machine_family == "c4a" else "amd64"
 
-    if not FLAGS.gke_adk_image:
-        FLAGS.gke_adk_image = "{}-docker.pkg.dev/{}/adk-repo/adk-agent:{}".format(
-            region, project, target_arch
-        )
-        logging.info("Auto-derived gke_adk_image: %s", FLAGS.gke_adk_image)
+def _RenderAndApply(template_name, **kwargs):
+    """Load a Jinja2 template, render it, write to file, and kubectl apply."""
+    template = _LoadTemplate(template_name)
+    rendered = template.render(**kwargs)
 
-    if not FLAGS.gke_sandbox_router_image:
-        FLAGS.gke_sandbox_router_image = (
-            "{}-docker.pkg.dev/{}/agent-sandbox/sandbox-router:{}".format(
-                region, project, target_arch
-            )
-        )
-        logging.info(
-            "Auto-derived gke_sandbox_router_image: %s",
-            FLAGS.gke_sandbox_router_image,
-        )
-
-    if not FLAGS.gke_chromium_image:
-        FLAGS.gke_chromium_image = (
-            "{}-docker.pkg.dev/{}/agent-sandbox/chrome-sandbox:{}".format(
-                region, project, target_arch
-            )
-        )
-        logging.info(
-            "Auto-derived gke_chromium_image: %s", FLAGS.gke_chromium_image
-        )
+    # Write rendered YAML to tmp dir (RunKubectlCommand does not support stdin)
+    tmp_dir = os.path.join(
+        data.ResourcePath(_MANIFESTS_DIR), "tmp"
+    )
+    os.makedirs(tmp_dir, exist_ok=True)
 
-    if not FLAGS.gke_cluster_name:
-        import os as _os
+    # Strip .j2 extension for the rendered file
+    rendered_name = template_name.replace(".j2", "")
+    rendered_path = os.path.join(tmp_dir, rendered_name)
+    with open(rendered_path, "w") as f:
+        f.write(rendered)
 
-        user_prefix = _os.environ.get("USER", "pkb").split(".")[0]
-        suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"}
-        cluster_suffix = suffix_map.get(machine_family, machine_family)
-        FLAGS.gke_cluster_name = "{}-agentic-{}".format(
-            user_prefix, cluster_suffix
-        )
-        logging.info(
-            "Auto-derived gke_cluster_name: %s", FLAGS.gke_cluster_name
+    stdout, stderr, retcode = kubectl.RunKubectlCommand(
+        ["apply", "-f", rendered_path],
+        raise_on_failure=False,
+    )
+    if retcode != 0:
+        logging.warning(
+            "kubectl apply failed for %s: %s", template_name, stderr[:500]
         )
+    return retcode == 0
 
 
-def _GetSandboxNodeSelector():
-    """Return the correct nodeSelector dict based on provisioning mode.
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
 
-    - native mode: PKB auto-labels nodes with pkb_nodepool=<pool_name>
-    - custom mode: bash scripts label nodes with dedicated=agentic-sandbox
-    """
-    try:
-        mode = FLAGS.gke_provision_mode
-    except (AttributeError, KeyError):
-        mode = "custom"
-    if mode == "native":
-        return {"pkb_nodepool": "sandbox"}
-    return {"dedicated": "agentic-sandbox"}
 
+def _DeriveImagePaths(project, region, arch):
+    """Derive container image paths from cluster config.
 
-def _GetSandboxTolerations():
-    """Return tolerations list based on provisioning mode.
+    Args:
+        project: GCP project ID.
+        region: GCP region (e.g. us-central1).
+        arch: Docker platform architecture (amd64 or arm64).
 
-    Both modes need the gVisor toleration (auto-applied by GKE to sandbox pools).
-    Custom mode additionally needs the dedicated=agentic-sandbox toleration
-    (manually applied by setup_infrastructure_gke.sh).
+    Returns:
+        Dict with keys: adk_agent, sandbox_router, chromium.
     """
-    try:
-        mode = FLAGS.gke_provision_mode
-    except (AttributeError, KeyError):
-        mode = "custom"
-    tolerations = [
-        {
-            "key": "sandbox.gke.io/runtime",
-            "operator": "Equal",
-            "value": "gvisor",
-            "effect": "NoSchedule",
-        },
-    ]
-    if mode != "native":
-        tolerations.insert(
-            0,
-            {
-                "key": "dedicated",
-                "operator": "Equal",
-                "value": "agentic-sandbox",
-                "effect": "NoSchedule",
-            },
-        )
-    return tolerations
-
-
-def _NodeSelectorYaml(indent=6):
-    """Generate nodeSelector YAML block for embedding in manifests."""
-    selector = _GetSandboxNodeSelector()
-    spaces = " " * indent
-    lines = ["{}nodeSelector:".format(spaces)]
-    for k, v in selector.items():
-        lines.append("{}  {}: {}".format(spaces, k, v))
-    return "\n".join(lines)
-
-
-def _TolerationsYaml(indent=6):
-    """Generate tolerations YAML block for embedding in manifests."""
-    tolerations = _GetSandboxTolerations()
-    spaces = " " * indent
-    lines = ["{}tolerations:".format(spaces)]
-    for t in tolerations:
-        lines.append('{}  - key: "{}"'.format(spaces, t["key"]))
-        lines.append('{}    operator: "{}"'.format(spaces, t["operator"]))
-        lines.append('{}    value: "{}"'.format(spaces, t["value"]))
-        lines.append('{}    effect: "{}"'.format(spaces, t["effect"]))
-    return "\n".join(lines)
+    return {
+        "adk_agent": f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{arch}",
+        "sandbox_router": f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{arch}",
+        "chromium": f"{region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{arch}",
+    }
 
-
-def DeployWorkloads():
+def DeployWorkloads(benchmark_spec=None):
     """Deploy the full Agent Sandbox ecosystem onto the GKE cluster.
 
     Idempotent: safe to call repeatedly. Sequence:
-      1. Create namespace
-      2. Install Agent Sandbox CRDs
-      3. Deploy SandboxTemplates + WarmPools
-      4. Deploy Sandbox Router
-      5. Deploy ADK Agent (Deployment + Service + RBAC)
-      6. Deploy PSI Reader DaemonSet
-      7. Wait for ADK Agent rollout
+      1. Build images (if --skip_image_build=False)
+      2. Create namespace
+      3. Install Agent Sandbox CRDs
+      4. Deploy SandboxTemplates + WarmPools
+      5. Deploy Sandbox Router
+      6. Deploy ADK Agent (Deployment + Service + RBAC)
+      7. Deploy PSI Reader DaemonSet
+      8. Wait for ADK Agent rollout
     """
-    _DeriveImagePaths()
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     logging.info("=== DeployWorkloads: namespace=%s ===", ns)
 
+    # Derive project, region, machine_type, cluster_name from benchmark_spec
+    project = ""
+    region = ""
+    machine_type = ""
+    cluster_name = ""
+    if benchmark_spec:
+        cluster = getattr(benchmark_spec, 'container_cluster', None)
+        if cluster:
+            project = getattr(cluster, 'project', '') or ''
+            zone = getattr(cluster, 'zone', '') or ''
+            region = zone[:-2] if zone else ''
+            cluster_name = getattr(cluster, 'name', '') or ''
+            # Prefer sandbox nodepool machine_type
+            nodepools = getattr(cluster, 'nodepools', None)
+            if nodepools and isinstance(nodepools, dict):
+                sandbox_pool = nodepools.get('sandbox')
+                if sandbox_pool and hasattr(sandbox_pool, 'vm_spec'):
+                    machine_type = getattr(sandbox_pool.vm_spec, 'machine_type', '') or ''
+            if not machine_type and hasattr(cluster, 'vm_spec'):
+                machine_type = getattr(cluster.vm_spec, 'machine_type', '') or ''
+    # Fallback to global FLAGS if benchmark_spec not available
+    if not project:
+        project = getattr(FLAGS, 'project', '') or ''
+    if not region:
+        zone = getattr(FLAGS, 'zone', '') or ''
+        region = zone[:-2] if zone else ''
+
+    # Build images if requested
+    # Detect architecture and derive image paths
+    from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
+        gke_image_build_utils,
+    )
+    zone = cluster.zone if cluster else FLAGS.zone
+    arch = gke_image_build_utils._DetectArchitecture(machine_type, zone, project)
+
+    global _derived_images
+    _derived_images = _DeriveImagePaths(project, region, arch)
+
+    if not FLAGS.skip_image_build:
+        gke_image_build_utils.build_images_with_config(
+            project=project,
+            region=region,
+            machine_type=machine_type,
+            zone=zone,
+            arch=arch,
+        )
+
     _CreateNamespace(ns)
     _InstallCRDs()
     _DeploySandboxTemplates(ns)
     _DeploySandboxRouter(ns)
-    _DeployADKAgent(ns)
+    _DeployADKAgent(ns, project=project, region=region, cluster_name=cluster_name)
     _DeployPSIReader(ns)
     _WaitForAgentReady(ns)
 
@@ -243,7 +221,7 @@ def DeployWorkloads():
 
 
 def DeploySnapshots():
-    """Deploy Pod Snapshot infrastructure (UC-A only).
+    """Deploy Pod Snapshot infrastructure.
 
     Idempotent: safe to call repeatedly. Sequence:
       1. Create GCS bucket (hierarchical namespace)
@@ -252,27 +230,25 @@ def DeploySnapshots():
       4. Bind IAM roles
       5. Deploy PodSnapshotStorageConfig + PodSnapshotPolicy
     """
-    ns = FLAGS.gke_namespace
-    project = FLAGS.gke_project_id
-    region = FLAGS.gke_region
+    ns = FLAGS.k8s_namespace
+    project = getattr(FLAGS, 'project', '') or ''
+    zone = getattr(FLAGS, 'zone', '') or ''
+    region = zone[:-2] if zone else ''
 
     if not project:
-        logging.warning("DeploySnapshots: gke_project_id not set, skipping.")
+        logging.warning("DeploySnapshots: FLAGS.project not set, skipping.")
         return
 
     bucket_name = "agent-sandbox-snapshots-{}".format(project)
     snapshot_folder = "benchmark-snapshots"
-    ksa_name = "pod-snapshot-sa"
+    ksa_name = FLAGS.gke_snapshot_ksa_name
 
     logging.info("=== DeploySnapshots: bucket=%s ===", bucket_name)
 
     # 1. Create GCS bucket
-    _RunCmd(
+    vm_util.IssueCommand(
         [
-            "gcloud",
-            "storage",
-            "buckets",
-            "create",
+            "gcloud", "storage", "buckets", "create",
             "gs://{}".format(bucket_name),
             "--uniform-bucket-level-access",
             "--enable-hierarchical-namespace",
@@ -280,32 +256,23 @@ def DeploySnapshots():
             "--location={}".format(region),
             "--project={}".format(project),
         ],
-        check=False,
+        raise_on_failure=False,
     )
 
     # 2. Create managed folder
-    _RunCmd(
+    vm_util.IssueCommand(
         [
-            "gcloud",
-            "storage",
-            "managed-folders",
-            "create",
+            "gcloud", "storage", "managed-folders", "create",
             "gs://{}/{}/".format(bucket_name, snapshot_folder),
             "--project={}".format(project),
         ],
-        check=False,
+        raise_on_failure=False,
     )
 
     # 3. Create KSA
-    _RunKubectl(
-        [
-            "create",
-            "serviceaccount",
-            ksa_name,
-            "--namespace",
-            ns,
-        ],
-        check=False,
+    kubectl.RunKubectlCommand(
+        ["create", "serviceaccount", ksa_name, "--namespace", ns],
+        raise_on_failure=False,
     )
 
     # 4. IAM bindings
@@ -314,7 +281,12 @@ def DeploySnapshots():
         _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name)
 
     # 5. Deploy PSSC + PSP
-    _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder)
+    _RenderAndApply(
+        "snapshot-crds.yaml.j2",
+        ns=ns,
+        bucket_name=bucket_name,
+        snapshot_folder=snapshot_folder,
+    )
 
     logging.info("DeploySnapshots complete.")
 
@@ -324,499 +296,125 @@ def DeploySnapshots():
 # ---------------------------------------------------------------------------
 
 
-def _RunCmd(cmd, check=True, timeout=120):
-    """Run a shell command and return (stdout, returncode)."""
-    logging.info("CMD: %s", " ".join(cmd))
-    proc = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        timeout=timeout,
-    )
-    if check and proc.returncode != 0:
-        logging.warning(
-            "Command failed (rc=%d): %s", proc.returncode, proc.stderr[:500]
-        )
-    return proc.stdout.strip(), proc.returncode
-
-
-def _RunKubectl(args, check=True, timeout=120):
-    """Run kubectl with optional kubeconfig."""
-    cmd = ["kubectl"]
-    if FLAGS.gke_kubeconfig:
-        cmd += ["--kubeconfig", FLAGS.gke_kubeconfig]
-    cmd += list(args)
-    return _RunCmd(cmd, check=check, timeout=timeout)
-
-
-def _KubectlApply(manifest_str):
-    """Apply a YAML manifest string via kubectl stdin."""
-    cmd = ["kubectl", "apply", "-f", "-"]
-    if FLAGS.gke_kubeconfig:
-        cmd = [
-            "kubectl",
-            "--kubeconfig",
-            FLAGS.gke_kubeconfig,
-            "apply",
-            "-f",
-            "-",
-        ]
-    proc = subprocess.run(
-        cmd,
-        input=manifest_str,
-        capture_output=True,
-        text=True,
-        timeout=60,
-    )
-    if proc.returncode != 0:
-        logging.warning("kubectl apply failed: %s", proc.stderr[:500])
-    return proc.returncode == 0
-
-
 def _CreateNamespace(ns):
     """Create namespace if it doesn't exist."""
-    _RunKubectl(["create", "namespace", ns], check=False)
+    kubectl.RunKubectlCommand(
+        ["create", "namespace", ns],
+        raise_on_failure=False,
+    )
 
 
 def _InstallCRDs():
     """Install Agent Sandbox CRDs from GitHub release."""
-    version = FLAGS.gke_sandbox_version
+    version = FLAGS.agent_sandbox_version
     base_url = (
         "https://github.com/kubernetes-sigs/agent-sandbox"
         "/releases/download/{}".format(version)
     )
     logging.info("Installing Agent Sandbox CRDs (%s)", version)
-    _RunKubectl(
+    kubectl.RunKubectlCommand(
         [
             "apply",
-            "-f",
-            "{}/manifest.yaml".format(base_url),
-            "-f",
-            "{}/extensions.yaml".format(base_url),
+            "-f", "{}/manifest.yaml".format(base_url),
+            "-f", "{}/extensions.yaml".format(base_url),
         ],
-        check=False,
+        raise_on_failure=False,
     )
 
 
 def _DeploySandboxTemplates(ns):
     """Deploy SandboxTemplate + WarmPool for Python and Chromium."""
-    python_image = FLAGS.gke_python_image
-    chromium_image = FLAGS.gke_chromium_image or "chromium-placeholder:latest"
-    warmpool_replicas = FLAGS.gke_warmpool_replicas
-    chromium_replicas = FLAGS.gke_chromium_replicas
-
-    manifest = """---
-apiVersion: extensions.agents.x-k8s.io/v1alpha1
-kind: SandboxTemplate
-metadata:
-  name: python-sandbox-template
-  namespace: {ns}
-spec:
-  podTemplate:
-    metadata:
-      labels:
-        sandbox: python-sandbox-example
-    spec:
-      runtimeClassName: gvisor
-      containers:
-      - name: python-runtime
-        image: {python_image}
-{node_selector_yaml}
-{tolerations_yaml}
-      restartPolicy: "OnFailure"
----
-apiVersion: extensions.agents.x-k8s.io/v1alpha1
-kind: SandboxWarmPool
-metadata:
-  name: python-sandbox-warmpool
-  namespace: {ns}
-spec:
-  replicas: {warmpool_replicas}
-  sandboxTemplateRef:
-    name: python-sandbox-template
----
-apiVersion: extensions.agents.x-k8s.io/v1alpha1
-kind: SandboxTemplate
-metadata:
-  name: chromium-sandbox-template
-  namespace: {ns}
-spec:
-  podTemplate:
-    metadata:
-      labels:
-        sandbox: chromium-sandbox-example
-    spec:
-      runtimeClassName: gvisor
-      containers:
-      - name: chromium-runtime
-        image: {chromium_image}
-        command: ["/bin/sh", "-c"]
-        args:
-          - |
-            socat TCP-LISTEN:9223,fork,reuseaddr TCP:127.0.0.1:9222 &
-            exec chromium --headless --no-sandbox --disable-gpu --disable-dev-shm-usage --remote-debugging-port=9222 --no-first-run --disable-field-trial-config --user-data-dir=/tmp/chrome-data about:blank
-        ports:
-          - containerPort: 9223
-{node_selector_yaml}
-{tolerations_yaml}
-      restartPolicy: "OnFailure"
----
-apiVersion: extensions.agents.x-k8s.io/v1alpha1
-kind: SandboxWarmPool
-metadata:
-  name: chromium-sandbox-warmpool
-  namespace: {ns}
-spec:
-  replicas: {chromium_replicas}
-  sandboxTemplateRef:
-    name: chromium-sandbox-template
----
-apiVersion: networking.k8s.io/v1
-kind: NetworkPolicy
-metadata:
-  name: allow-orchestrator-to-chromium
-  namespace: {ns}
-spec:
-  podSelector:
-    matchLabels:
-      sandbox: chromium-sandbox-example
-  policyTypes:
-  - Ingress
-  ingress:
-  - from:
-    - podSelector:
-        matchLabels:
-          app: adk-agent
-    ports:
-    - protocol: TCP
-      port: 9223
-""".format(
+    python_image = FLAGS.k8s_python_image
+    chromium_image = FLAGS.k8s_chromium_image or _derived_images.get("chromium", "chromium-placeholder:latest")
+    warmpool_replicas = FLAGS.agent_sandbox_warmpool_replicas
+    chromium_replicas = FLAGS.agent_sandbox_chromium_replicas
+
+    _RenderAndApply(
+        "sandbox-templates.yaml.j2",
         ns=ns,
         python_image=python_image,
         chromium_image=chromium_image,
         warmpool_replicas=warmpool_replicas,
         chromium_replicas=chromium_replicas,
-        node_selector_yaml=_NodeSelectorYaml(),
-        tolerations_yaml=_TolerationsYaml(),
     )
-    _KubectlApply(manifest)
 
 
 def _DeploySandboxRouter(ns):
     """Deploy the Sandbox Router Deployment + Service."""
-    router_image = FLAGS.gke_sandbox_router_image
+    router_image = FLAGS.agent_sandbox_router_image or _derived_images.get("sandbox_router", "")
     if not router_image:
         logging.info("Sandbox router image not set, skipping router deployment.")
         return
 
-    manifest = """---
-apiVersion: v1
-kind: Service
-metadata:
-  name: sandbox-router-svc
-  namespace: {ns}
-spec:
-  type: ClusterIP
-  selector:
-    app: sandbox-router
-  ports:
-  - name: http
-    protocol: TCP
-    port: 8080
-    targetPort: 8080
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: sandbox-router-deployment
-  namespace: {ns}
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: sandbox-router
-  template:
-    metadata:
-      labels:
-        app: sandbox-router
-    spec:
-      serviceAccountName: adk-agent-sa
-      topologySpreadConstraints:
-        - maxSkew: 1
-          topologyKey: topology.kubernetes.io/zone
-          whenUnsatisfiable: ScheduleAnyway
-          labelSelector:
-            matchLabels:
-              app: sandbox-router
-      containers:
-      - name: router
-        image: {router_image}
-        ports:
-        - containerPort: 8080
-        env:
-        - name: ALLOW_UNAUTHENTICATED_ROUTER
-          value: "true"
-        readinessProbe:
-          httpGet:
-            path: /healthz
-            port: 8080
-          initialDelaySeconds: 5
-          periodSeconds: 5
-        livenessProbe:
-          httpGet:
-            path: /healthz
-            port: 8080
-          initialDelaySeconds: 10
-          periodSeconds: 10
-        resources:
-          requests:
-            cpu: "250m"
-            memory: "512Mi"
-          limits:
-            cpu: "1000m"
-            memory: "1Gi"
-      securityContext:
-        runAsUser: 1000
-        runAsGroup: 1000
-""".format(ns=ns, router_image=router_image)
-    _KubectlApply(manifest)
-
-
-def _DeployADKAgent(ns):
+    _RenderAndApply(
+        "sandbox-router.yaml.j2",
+        ns=ns,
+        router_image=router_image,
+    )
+
+
+def _DeployADKAgent(ns, project="", region="", cluster_name=""):
     """Deploy ADK Agent: SA, ClusterRole, RoleBinding, Deployment, Service."""
-    adk_image = FLAGS.gke_adk_image
+    adk_image = FLAGS.k8s_agent_image or _derived_images.get("adk_agent", "")
     if not adk_image:
         logging.info("ADK agent image not set, skipping agent deployment.")
         return
 
-    project = FLAGS.gke_project_id or ""
-    region = FLAGS.gke_region or ""
-    cluster = FLAGS.gke_cluster_name or ""
-
-    manifest = """---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: adk-agent-sa
-  namespace: {ns}
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: adk-agent-sandbox-role
-rules:
-  - apiGroups: ["agents.x-k8s.io"]
-    resources: ["sandboxes"]
-    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
-  - apiGroups: ["agents.x-k8s.io"]
-    resources: ["sandboxwarmpool", "sandboxwarmpools"]
-    verbs: ["get", "list", "watch"]
-  - apiGroups: ["extensions.agents.x-k8s.io"]
-    resources: ["sandboxclaims"]
-    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
-  - apiGroups: [""]
-    resources: ["pods", "pods/log", "pods/exec", "services", "configmaps"]
-    verbs: ["get", "list", "watch"]
-  - apiGroups: [""]
-    resources: ["pods/portforward"]
-    verbs: ["create"]
-  - apiGroups: ["metrics.k8s.io"]
-    resources: ["pods"]
-    verbs: ["get", "list"]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: adk-agent-sandbox-binding
-  namespace: {ns}
-subjects:
-  - kind: ServiceAccount
-    name: adk-agent-sa
-    namespace: {ns}
-roleRef:
-  kind: ClusterRole
-  name: adk-agent-sandbox-role
-  apiGroup: rbac.authorization.k8s.io
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: adk-agent
-  namespace: {ns}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: adk-agent
-  template:
-    metadata:
-      labels:
-        app: adk-agent
-    spec:
-      serviceAccountName: adk-agent-sa
-      containers:
-      - name: adk-agent
-        imagePullPolicy: Always
-        image: {adk_image}
-        resources:
-          limits:
-            memory: "16384Mi"
-            cpu: "6000m"
-          requests:
-            memory: "512Mi"
-            cpu: "1000m"
-        ports:
-        - containerPort: 8080
-        livenessProbe:
-          httpGet:
-            path: /healthz
-            port: 8080
-          initialDelaySeconds: 15
-          periodSeconds: 30
-          timeoutSeconds: 10
-          failureThreshold: 6
-        readinessProbe:
-          httpGet:
-            path: /healthz
-            port: 8080
-          initialDelaySeconds: 5
-          periodSeconds: 10
-          timeoutSeconds: 5
-          failureThreshold: 3
-        env:
-          - name: PORT
-            value: "8080"
-          - name: GOOGLE_CLOUD_PROJECT
-            value: "{project}"
-          - name: GOOGLE_CLOUD_LOCATION
-            value: "{region}"
-          - name: GOOGLE_GENAI_USE_VERTEXAI
-            value: "true"
-          - name: CLUSTER_NAME
-            value: "{cluster}"
-          - name: AGENTIC_NAMESPACE
-            value: "{ns}"
-          - name: SANDBOX_ROUTER_URL
-            value: "http://sandbox-router-svc.{ns}.svc.cluster.local:8080"
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: adk-agent
-  namespace: {ns}
-spec:
-  type: ClusterIP
-  ports:
-    - port: 80
-      targetPort: 8080
-  selector:
-    app: adk-agent
-""".format(ns=ns, adk_image=adk_image, project=project, region=region, cluster=cluster)
-    _KubectlApply(manifest)
+    project = project or ""
+    region = region or ""
+    cluster = cluster_name or ""
+
+    _RenderAndApply(
+        "adk-agent.yaml.j2",
+        ns=ns,
+        adk_image=adk_image,
+        project=project,
+        region=region,
+        cluster=cluster,
+    )
 
 
 def _DeployPSIReader(ns):
     """Deploy PSI Reader DaemonSet for cgroup pressure metrics."""
-    manifest = """---
-apiVersion: apps/v1
-kind: DaemonSet
-metadata:
-  name: psi-reader
-  namespace: {ns}
-  labels:
-    app: psi-reader
-spec:
-  selector:
-    matchLabels:
-      app: psi-reader
-  template:
-    metadata:
-      labels:
-        app: psi-reader
-    spec:
-{node_selector_yaml}
-{tolerations_yaml}
-      hostPID: true
-      containers:
-      - name: reader
-        image: busybox:1.36
-        command: ["sleep", "infinity"]
-        securityContext:
-          privileged: true
-        volumeMounts:
-        - name: cgroup
-          mountPath: /host/sys/fs/cgroup
-          readOnly: true
-        - name: proc
-          mountPath: /host/proc
-          readOnly: true
-        resources:
-          requests:
-            cpu: "10m"
-            memory: "16Mi"
-          limits:
-            cpu: "50m"
-            memory: "32Mi"
-      volumes:
-      - name: cgroup
-        hostPath:
-          path: /sys/fs/cgroup
-      - name: proc
-        hostPath:
-          path: /proc
-""".format(
-        ns=ns,
-        node_selector_yaml=_NodeSelectorYaml(),
-        tolerations_yaml=_TolerationsYaml(),
-    )
-    _KubectlApply(manifest)
+    _RenderAndApply("psi-reader.yaml.j2", ns=ns)
 
 
 def _WaitForAgentReady(ns):
     """Wait for ADK agent deployment to be ready."""
-    adk_image = FLAGS.gke_adk_image
+    adk_image = FLAGS.k8s_agent_image
     if not adk_image:
         logging.info("ADK agent not deployed, skipping rollout wait.")
         return
-    timeout = FLAGS.gke_deploy_timeout
+    timeout = FLAGS.k8s_deploy_timeout
     logging.info("Waiting for adk-agent rollout (timeout=%ds)...", timeout)
-    _RunKubectl(
+    kubectl.RunKubectlCommand(
         [
-            "rollout",
-            "status",
-            "deployment/adk-agent",
-            "-n",
-            ns,
+            "rollout", "status", "deployment/adk-agent",
+            "-n", ns,
             "--timeout={}s".format(timeout),
         ],
-        check=False,
+        raise_on_failure=False,
     )
 
 
 def _GetProjectNumber(project):
     """Get GCP project number from project ID."""
-    stdout, rc = _RunCmd(
+    stdout, _, retcode = vm_util.IssueCommand(
         [
-            "gcloud",
-            "projects",
-            "describe",
-            project,
+            "gcloud", "projects", "describe", project,
             "--format=value(projectNumber)",
         ],
-        check=False,
+        raise_on_failure=False,
     )
-    return stdout if rc == 0 else None
+    return stdout.strip() if retcode == 0 else None
 
 
 def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name):
     """Bind IAM roles for pod snapshot access."""
     # bucketViewer to namespace
-    _RunCmd(
+    vm_util.IssueCommand(
         [
-            "gcloud",
-            "storage",
-            "buckets",
-            "add-iam-policy-binding",
+            "gcloud", "storage", "buckets", "add-iam-policy-binding",
             "gs://{}".format(bucket_name),
             "--member=principalSet://iam.googleapis.com/projects/{}"
             "/locations/global/workloadIdentityPools/{}.svc.id.goog"
@@ -824,16 +422,13 @@ def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name):
             "--role=roles/storage.bucketViewer",
             "--quiet",
         ],
-        check=False,
+        raise_on_failure=False,
     )
 
     # objectAdmin to KSA
-    _RunCmd(
+    vm_util.IssueCommand(
         [
-            "gcloud",
-            "storage",
-            "buckets",
-            "add-iam-policy-binding",
+            "gcloud", "storage", "buckets", "add-iam-policy-binding",
             "gs://{}".format(bucket_name),
             "--member=principal://iam.googleapis.com/projects/{}"
             "/locations/global/workloadIdentityPools/{}.svc.id.goog"
@@ -841,51 +436,18 @@ def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name):
             "--role=roles/storage.objectAdmin",
             "--quiet",
         ],
-        check=False,
+        raise_on_failure=False,
     )
 
     # objectUser to GKE snapshot controller
-    _RunCmd(
+    vm_util.IssueCommand(
         [
-            "gcloud",
-            "storage",
-            "buckets",
-            "add-iam-policy-binding",
+            "gcloud", "storage", "buckets", "add-iam-policy-binding",
             "gs://{}".format(bucket_name),
             "--member=serviceAccount:service-{}"
             "@container-engine-robot.iam.gserviceaccount.com".format(project_number),
             "--role=roles/storage.objectUser",
             "--quiet",
         ],
-        check=False,
+        raise_on_failure=False,
     )
-
-
-def _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder):
-    """Deploy PodSnapshotStorageConfig + PodSnapshotPolicy."""
-    manifest = """---
-apiVersion: podsnapshot.gke.io/v1
-kind: PodSnapshotStorageConfig
-metadata:
-  name: benchmark-pssc-gcs
-spec:
-  snapshotStorageConfig:
-    gcs:
-      bucket: "{bucket_name}"
-      path: "{snapshot_folder}"
----
-apiVersion: podsnapshot.gke.io/v1
-kind: PodSnapshotPolicy
-metadata:
-  name: benchmark-psp
-  namespace: {ns}
-spec:
-  storageConfigName: benchmark-pssc-gcs
-  selector:
-    matchLabels:
-      app: snapshot-benchmark-workload
-  triggerConfig:
-    type: manual
-    postCheckpoint: resume
-""".format(ns=ns, bucket_name=bucket_name, snapshot_folder=snapshot_folder)
-    _KubectlApply(manifest)
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
index 38b85b4e11..13340184bc 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
@@ -25,12 +25,57 @@
 logger = logging.getLogger(__name__)
 
 
+
 # ---------------------------------------------------------------------------
-# Public API
+# Architecture detection
 # ---------------------------------------------------------------------------
 
+_ARCH_MAP = {
+    "X86_64": "amd64",
+    "ARM64": "arm64",
+}
+
+
+def _DetectArchitecture(machine_type, zone, project):
+    """Detect CPU architecture for a GCP machine type.
+
+    Uses gcloud to query the machine type's architecture, then maps
+    GCP naming (X86_64/ARM64) to Docker platform naming (amd64/arm64).
 
-def build_images_with_config(project, region, machine_type, cloud_build_sa=None):
+    Falls back to amd64 if gcloud fails.
+    """
+    try:
+        stdout, _, retcode = vm_util.IssueCommand(
+            [
+                "gcloud", "compute", "machine-types", "describe",
+                machine_type,
+                f"--zone={zone}",
+                f"--project={project}",
+                "--format=value(architecture)",
+            ],
+            raise_on_failure=False,
+            timeout=30,
+        )
+        if retcode == 0 and stdout.strip():
+            gcp_arch = stdout.strip().upper()
+            docker_arch = _ARCH_MAP.get(gcp_arch)
+            if docker_arch:
+                logging.info(
+                    "Detected architecture for %s: %s -> %s",
+                    machine_type, gcp_arch, docker_arch,
+                )
+                return docker_arch
+            logging.warning(
+                "Unknown GCP architecture '%s' for %s. Falling back to amd64.",
+                gcp_arch, machine_type,
+            )
+    except Exception as e:
+        logging.warning("gcloud machine-type describe failed: %s. Falling back to amd64.", e)
+
+    return "amd64"
+
+
+def build_images_with_config(project, region, machine_type, zone, arch, cloud_build_sa=None):
     """Core image build logic — no FLAGS dependency.
 
     Callable from both PKB (via BuildImages()) and prerequisite_setup.py.
@@ -43,9 +88,8 @@ def build_images_with_config(project, region, machine_type, cloud_build_sa=None)
         cloud_build_sa: Cloud Build service account email.
             If None, defaults to "adk-cloud-build-sa@{project}.iam.gserviceaccount.com".
     """
-    # Derive architecture from machine family
-    machine_family = machine_type.split("-")[0] if machine_type else "c4"
-    target_arch = "arm64" if machine_family == "c4a" else "amd64"
+    # Architecture passed in from caller (detected via gcloud)
+    target_arch = arch
 
     # Derive image paths
     adk_image = f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{target_arch}"
@@ -101,15 +145,19 @@ def build_images_with_config(project, region, machine_type, cloud_build_sa=None)
 
 
 def BuildImages():
-    """FLAGS-based entry point (called from PKB Provision).
+    """FLAGS-based entry point.
 
-    Reads configuration from FLAGS (set in gke_provision_utils.py).
+    Reads configuration from native PKB FLAGS.
     Delegates to build_images_with_config() for the actual work.
     """
+    project = getattr(FLAGS, 'project', '') or ''
+    zone = getattr(FLAGS, 'zone', '') or ''
+    region = zone[:-2] if zone else ''
+    machine_type = getattr(FLAGS, 'machine_type', '') or ''
     build_images_with_config(
-        project=FLAGS.gke_project_id,
-        region=FLAGS.gke_region,
-        machine_type=FLAGS.gke_sandbox_machine_type,
+        project=project,
+        region=region,
+        machine_type=machine_type,
     )
 
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
index 9ddac86ea0..7d95d4bc82 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
@@ -1,4 +1,4 @@
-"""PKB Benchmark: GKE Agent Payload Transfer Saturation (Use Case D).
+"""PKB Benchmark: GKE Agent Payload Transfer Saturation .
 
 Atomic single-point measurement of payload transfer latency from a gVisor
 sandbox back to the orchestrator on a pre-provisioned GKE cluster.  Measures
@@ -14,8 +14,8 @@
                 --gke_payload_size_mb=50 \
                 --gke_payload_iterations=20 \
                 --gke_payload_concurrent_sessions=5 \
-                --gke_namespace=agentic \
-                --gke_api_url=http://localhost:8080
+                --k8s_namespace=agentic \
+                --k8s_agent_api_url=http://localhost:8080
 
 Samples emitted (per run):
   - gke_payload_orchestrator_transfer_mean       (ms)
@@ -71,7 +71,6 @@
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
 )
-from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
 
 FLAGS = flags.FLAGS
 
@@ -126,11 +125,6 @@
 # ---------------------------------------------------------------------------
 
 
-def Provision(benchmark_spec):
-    """Provision GKE cluster and all dependencies."""
-    gke_provision_utils.Provision()
-
-
 def GetConfig(user_config):
     """Load and return benchmark config.
 
@@ -142,7 +136,7 @@ def GetConfig(user_config):
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
     logging.info("=== Prepare: deploying workloads ===")
-    deploy_utils.DeployWorkloads()
+    deploy_utils.DeployWorkloads(benchmark_spec)
     utils.CheckAgentHealthz(required=False)
     utils.EnsurePortForward()
     logging.info("Prepare complete.")
@@ -154,7 +148,9 @@ def Run(benchmark_spec):
     Returns:
       List of sample.Sample objects.
     """
-    ns = FLAGS.gke_namespace
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
     payload_size_mb = FLAGS.gke_payload_size_mb
     iterations = FLAGS.gke_payload_iterations
     concurrent = FLAGS.gke_payload_concurrent_sessions
@@ -575,7 +571,7 @@ def Run(benchmark_spec):
 
 def Cleanup(benchmark_spec):
     """Clean up after measurement. Scale warm pool to 0."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     logging.info("Cleanup: draining warm pool.")
 
     utils.DrainWarmPool(
@@ -588,18 +584,25 @@ def Cleanup(benchmark_spec):
     logging.info("Cleanup complete (cluster persists).")
 
 
-def Teardown(benchmark_spec):
-    """Teardown GKE cluster and all dependencies."""
-    gke_provision_utils.Teardown()
-
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
 
 def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
-    """Emit a sample if the key exists in the aggregate dict."""
+    """Emit a sample if the key exists in the aggregate dict.
+
+    Args:
+        samples: List to append the new sample.Sample to.
+        agg: Aggregate metrics dict returned by the agent API response.
+        agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms").
+        metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric
+            name (e.g. "orchestrator_cel_mean").
+        unit: Unit string for the sample (e.g. "ms", "MB", "seconds").
+        namespace: Kubernetes namespace (included in sample metadata).
+        extra: Dict of additional metadata key-value pairs attached to
+            every sample (density, session counts, wall time, etc.).
+    """
     value = agg.get(agg_key)
     if value is not None:
         samples.append(
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
new file mode 100644
index 0000000000..49e04bb83d
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+"""Post-Teardown Cleanup for GKE Agentic Benchmarking.
+
+Cleans up infrastructure created by gke_prerequisites.py and DeploySnapshots():
+  - Delete Cloud Build service account + IAM bindings
+  - Delete GCS snapshot bucket
+  - Delete Artifact Registry repositories
+
+Run ONCE after all benchmarks are complete (after PKB Teardown has deleted the cluster):
+  python -m perfkitbenchmarker.linux_benchmarks.kubernetes.agentic.gke_post_teardown \
+      --project_id=<project> --region=<region>
+"""
+
+import argparse
+import logging
+import subprocess
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def _run(cmd, check=False, timeout=300):
+    logger.info("CMD: %s", " ".join(cmd))
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+    if check and result.returncode != 0:
+        logger.warning("Command failed (rc=%d): %s", result.returncode, result.stderr[-300:])
+    return result
+
+
+def teardown_cloud_build_sa(project_id):
+    logger.info("=== Deleting Cloud Build SA ===")
+    sa_email = f"adk-cloud-build-sa@{project_id}.iam.gserviceaccount.com"
+    roles = ["roles/logging.logWriter", "roles/storage.objectViewer",
+             "roles/artifactregistry.writer", "roles/serviceusage.serviceUsageConsumer"]
+    for role in roles:
+        _run(["gcloud", "projects", "remove-iam-policy-binding", project_id,
+              f"--member=serviceAccount:{sa_email}", f"--role={role}", "--quiet"])
+    _run(["gcloud", "iam", "service-accounts", "delete", sa_email,
+          f"--project={project_id}", "--quiet"])
+    logger.info("Cloud Build SA deleted.")
+
+
+def teardown_snapshot_bucket(project_id, region):
+    logger.info("=== Deleting Snapshot Bucket ===")
+    bucket_name = f"agent-sandbox-snapshots-{project_id}"
+    _run(["gcloud", "storage", "rm", f"gs://{bucket_name}/**",
+          f"--project={project_id}", "--quiet"])
+    _run(["gcloud", "storage", "buckets", "delete", f"gs://{bucket_name}",
+          f"--project={project_id}", "--quiet"])
+    logger.info("Snapshot bucket deleted.")
+
+
+def teardown_images(project_id, region):
+    logger.info("=== Deleting AR repos ===")
+    for repo in ["adk-repo", "agent-sandbox"]:
+        _run(["gcloud", "artifacts", "repositories", "delete", repo,
+              f"--location={region}", f"--project={project_id}", "--quiet"])
+    logger.info("AR repos deleted.")
+
+
+def main():
+    p = argparse.ArgumentParser(description="GKE Agentic Benchmark Post-Teardown")
+    p.add_argument("--project_id", required=True, help="GCP project ID")
+    p.add_argument("--region", default="us-central1", help="GCP region")
+    p.add_argument("--keep_images", action="store_true", help="Skip AR repo deletion")
+    p.add_argument("--keep_bucket", action="store_true", help="Skip snapshot bucket deletion")
+    args = p.parse_args()
+    teardown_cloud_build_sa(args.project_id)
+    if not args.keep_bucket:
+        teardown_snapshot_bucket(args.project_id, args.region)
+    if not args.keep_images:
+        teardown_images(args.project_id, args.region)
+    print("\nPost-teardown complete!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py
deleted file mode 100644
index 70b9d95a4c..0000000000
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py
+++ /dev/null
@@ -1,516 +0,0 @@
-#!/usr/bin/env python3
-"""Prerequisite Setup for GKE Agentic Benchmarking.
-
-Creates infrastructure that PKB's native container_cluster provisioner
-cannot manage: VPC, Subnet, Cloud Router, NAT, Firewall Rules, Artifact
-Registry, Cloud Build SA, IAM bindings, and container image builds.
-
-This script is run ONCE before PKB provisioning. PKB then references the
-pre-existing VPC/subnet via --gce_network_name and --gce_subnet_name flags.
-
-Usage:
-  # Full setup (including image builds):
-  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
-      --project_id=my-project \
-      --region=us-central1 --zone=us-central1-a \
-      --machine_type=c4-standard-8
-
-  # Setup without image builds:
-  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
-      --project_id=my-project \
-      --region=us-central1 --zone=us-central1-a \
-      --skip_image_build
-
-  # Teardown:
-  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
-      --project_id=my-project \
-      --region=us-central1 --zone=us-central1-a \
-      --teardown
-
-  # Teardown (keep images):
-  python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \
-      --project_id=my-project \
-      --region=us-central1 --zone=us-central1-a \
-      --teardown --keep_images
-"""
-
-import argparse
-import logging
-import os
-import subprocess
-import sys
-import time
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s %(levelname)s %(message)s",
-)
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _run(cmd, check=True, timeout=300, capture=False):
-    """Run a shell command, logging it first."""
-    cmd_str = " ".join(cmd) if isinstance(cmd, list) else cmd
-    logging.info("CMD: %s", cmd_str)
-    result = subprocess.run(
-        cmd if isinstance(cmd, list) else cmd.split(),
-        capture_output=capture,
-        text=True,
-        timeout=timeout,
-    )
-    if check and result.returncode != 0:
-        stderr = result.stderr if capture else ""
-        logging.error("Command failed (rc=%d): %s", result.returncode, stderr)
-        raise RuntimeError(f"Command failed: {cmd_str}")
-    return result
-
-
-def _exists(cmd):
-    """Return True if a gcloud describe/get command succeeds."""
-    result = subprocess.run(
-        cmd if isinstance(cmd, list) else cmd.split(),
-        capture_output=True,
-        text=True,
-        timeout=60,
-    )
-    return result.returncode == 0
-
-
-def _derive_config(args):
-    """Derive configuration values from arguments."""
-    user_prefix = os.environ.get("USER", "pkb").split(".")[0]
-    machine_family = args.machine_type.split("-")[0]
-
-    # Disk type
-    disk_type = "pd-balanced" if machine_family == "c3" else "hyperdisk-balanced"
-
-    # Architecture
-    target_arch = "arm64" if machine_family == "c4a" else "amd64"
-
-    # Cluster suffix
-    if "metal" in args.machine_type:
-        cluster_suffix = "c3metal"
-    else:
-        cluster_suffix = machine_family
-
-    # Master CIDR (unique per cluster suffix)
-    master_cidrs = {
-        "c4": "172.16.0.0/28",
-        "c4d": "172.16.0.16/28",
-        "c4a": "172.16.0.32/28",
-        "c3metal": "172.16.0.48/28",
-    }
-    master_cidr = master_cidrs.get(cluster_suffix, "172.16.0.64/28")
-
-    return {
-        "user_prefix": user_prefix,
-        "machine_family": machine_family,
-        "disk_type": disk_type,
-        "target_arch": target_arch,
-        "cluster_suffix": cluster_suffix,
-        "master_cidr": master_cidr,
-        "vpc_name": f"{user_prefix}-agentic-vpc",
-        "subnet_name": f"{user_prefix}-agentic-subnet",
-        "subnet_cidr": args.subnet_cidr,
-        "router_name": f"{user_prefix}-agentic-nat-router",
-        "nat_name": f"{user_prefix}-agentic-nat-config",
-        "adk_repo_name": "adk-repo",
-        "sandbox_repo_name": "agent-sandbox",
-        "cloud_build_sa": "adk-cloud-build-sa",
-        "cloud_build_sa_email": f"adk-cloud-build-sa@{args.project_id}.iam.gserviceaccount.com",
-        "adk_image": f"{args.region}-docker.pkg.dev/{args.project_id}/adk-repo/adk-agent:{target_arch}",
-        "chromium_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/chrome-sandbox:{target_arch}",
-        "router_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/sandbox-router:{target_arch}",
-    }
-
-
-# ---------------------------------------------------------------------------
-# Setup Steps
-# ---------------------------------------------------------------------------
-
-
-def enable_apis(args):
-    """Enable required GCP APIs."""
-    logging.info("=== Enabling GCP APIs ===")
-    apis = [
-        "container.googleapis.com",
-        "artifactregistry.googleapis.com",
-        "cloudbuild.googleapis.com",
-        "aiplatform.googleapis.com",
-        "storage.googleapis.com",
-        "iam.googleapis.com",
-        "connectgateway.googleapis.com",
-        "gkehub.googleapis.com",
-        "gkeconnect.googleapis.com",
-        "iap.googleapis.com",
-    ]
-    _run([
-        "gcloud", "services", "enable", *apis,
-        f"--project={args.project_id}",
-    ])
-    logging.info("APIs enabled.")
-
-
-def create_vpc(args, config):
-    """Create custom VPC."""
-    logging.info("=== Creating VPC ===")
-    if _exists([
-        "gcloud", "compute", "networks", "describe", config["vpc_name"],
-        f"--project={args.project_id}",
-    ]):
-        logging.info("VPC %s already exists.", config["vpc_name"])
-        return
-
-    _run([
-        "gcloud", "compute", "networks", "create", config["vpc_name"],
-        "--subnet-mode=custom",
-        f"--project={args.project_id}",
-    ])
-    logging.info("VPC %s created.", config["vpc_name"])
-
-
-def create_subnet(args, config):
-    """Create subnet in the VPC."""
-    logging.info("=== Creating Subnet ===")
-    if _exists([
-        "gcloud", "compute", "networks", "subnets", "describe",
-        config["subnet_name"],
-        f"--region={args.region}",
-        f"--project={args.project_id}",
-    ]):
-        logging.info("Subnet %s already exists.", config["subnet_name"])
-        return
-
-    _run([
-        "gcloud", "compute", "networks", "subnets", "create",
-        config["subnet_name"],
-        f"--network={config['vpc_name']}",
-        f"--region={args.region}",
-        f"--range={config['subnet_cidr']}",
-        f"--project={args.project_id}",
-    ])
-    logging.info("Subnet %s created.", config["subnet_name"])
-
-
-def create_firewall_rules(args, config):
-    """Create firewall rules."""
-    logging.info("=== Creating Firewall Rules ===")
-
-    rules = [
-        {
-            "name": f"{config['vpc_name']}-allow-iap-ssh",
-            "rules": "tcp:22",
-            "source_ranges": "35.235.240.0/20",
-            "priority": "1000",
-        },
-        {
-            "name": f"{config['vpc_name']}-allow-internal",
-            "rules": "tcp,udp,icmp",
-            "source_ranges": config["subnet_cidr"],
-            "priority": "1000",
-        },
-    ]
-
-    for rule in rules:
-        if _exists([
-            "gcloud", "compute", "firewall-rules", "describe", rule["name"],
-            f"--project={args.project_id}",
-        ]):
-            logging.info("Firewall rule %s already exists.", rule["name"])
-            continue
-
-        _run([
-            "gcloud", "compute", "firewall-rules", "create", rule["name"],
-            f"--network={config['vpc_name']}",
-            "--direction=INGRESS",
-            "--action=ALLOW",
-            f"--rules={rule['rules']}",
-            f"--source-ranges={rule['source_ranges']}",
-            f"--priority={rule['priority']}",
-            f"--project={args.project_id}",
-        ])
-        logging.info("Firewall rule %s created.", rule["name"])
-
-
-def create_router_and_nat(args, config):
-    """Create Cloud Router and NAT for private node internet access."""
-    logging.info("=== Creating Cloud Router + NAT ===")
-
-    # Router
-    if not _exists([
-        "gcloud", "compute", "routers", "describe", config["router_name"],
-        f"--region={args.region}",
-        f"--project={args.project_id}",
-    ]):
-        _run([
-            "gcloud", "compute", "routers", "create", config["router_name"],
-            f"--network={config['vpc_name']}",
-            f"--region={args.region}",
-            f"--project={args.project_id}",
-        ])
-        logging.info("Router %s created.", config["router_name"])
-    else:
-        logging.info("Router %s already exists.", config["router_name"])
-
-    # NAT
-    if not _exists([
-        "gcloud", "compute", "routers", "nats", "describe", config["nat_name"],
-        f"--router={config['router_name']}",
-        f"--region={args.region}",
-        f"--project={args.project_id}",
-    ]):
-        _run([
-            "gcloud", "compute", "routers", "nats", "create", config["nat_name"],
-            f"--router={config['router_name']}",
-            f"--region={args.region}",
-            "--nat-all-subnet-ip-ranges",
-            "--auto-allocate-nat-external-ips",
-            f"--project={args.project_id}",
-        ])
-        logging.info("NAT %s created.", config["nat_name"])
-    else:
-        logging.info("NAT %s already exists.", config["nat_name"])
-
-
-def create_artifact_registry(args, config):
-    """Create Artifact Registry repositories."""
-    logging.info("=== Creating Artifact Registry Repos ===")
-
-    for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]:
-        result = subprocess.run(
-            [
-                "gcloud", "artifacts", "repositories", "describe", repo,
-                f"--location={args.region}",
-                f"--project={args.project_id}",
-            ],
-            capture_output=True, text=True, timeout=30,
-        )
-        if result.returncode == 0:
-            logging.info("AR repo %s already exists.", repo)
-            continue
-
-        _run([
-            "gcloud", "artifacts", "repositories", "create", repo,
-            "--repository-format=docker",
-            f"--location={args.region}",
-            f"--project={args.project_id}",
-        ])
-        logging.info("AR repo %s created.", repo)
-
-
-def create_cloud_build_sa(args, config):
-    """Create Cloud Build service account and bind IAM roles."""
-    logging.info("=== Creating Cloud Build SA ===")
-
-    sa_email = config["cloud_build_sa_email"]
-
-    # Create SA
-    if not _exists([
-        "gcloud", "iam", "service-accounts", "describe", sa_email,
-        f"--project={args.project_id}",
-    ]):
-        _run([
-            "gcloud", "iam", "service-accounts", "create",
-            config["cloud_build_sa"],
-            f"--display-name={config['cloud_build_sa']}",
-            f"--project={args.project_id}",
-        ])
-        logging.info("SA %s created. Waiting for propagation...", sa_email)
-        time.sleep(10)
-    else:
-        logging.info("SA %s already exists.", sa_email)
-
-    # Bind roles
-    roles = [
-        "roles/logging.logWriter",
-        "roles/storage.objectViewer",
-        "roles/artifactregistry.writer",
-        "roles/serviceusage.serviceUsageConsumer",
-    ]
-    for role in roles:
-        _run([
-            "gcloud", "projects", "add-iam-policy-binding", args.project_id,
-            f"--member=serviceAccount:{sa_email}",
-            f"--role={role}",
-            "--condition=None", "--quiet",
-        ], check=False)
-
-    logging.info("Cloud Build SA roles bound.")
-
-
-def build_images(args, config):
-    """Build and push container images via Cloud Build.
-
-    Delegates to gke_image_build_utils.build_images_with_config()
-    to avoid duplicating Cloud Build logic.
-    """
-    if args.skip_image_build:
-        logging.info("=== Skipping Image Builds (--skip_image_build) ===")
-        return
-
-    logging.info("=== Building Container Images ===")
-
-    # Import the shared image build module (same package)
-    from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils
-
-    gke_image_build_utils.build_images_with_config(
-        project=args.project_id,
-        region=args.region,
-        machine_type=args.machine_type,
-        cloud_build_sa=config["cloud_build_sa_email"],
-    )
-
-    logging.info("=== Image builds complete ===")
-
-
-# ---------------------------------------------------------------------------
-# Teardown Steps
-# ---------------------------------------------------------------------------
-
-
-def teardown(args, config):
-    """Tear down all prerequisite resources."""
-    logging.info("=== Prerequisite Teardown ===")
-
-    # AR repos
-    if not args.keep_images:
-        logging.info("Deleting Artifact Registry repos...")
-        for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]:
-            _run([
-                "gcloud", "artifacts", "repositories", "delete", repo,
-                f"--location={args.region}",
-                f"--project={args.project_id}", "--quiet",
-            ], check=False)
-    else:
-        logging.info("Keeping AR repos (--keep_images).")
-
-    # Cloud Build SA
-    logging.info("Deleting Cloud Build SA...")
-    sa_email = config["cloud_build_sa_email"]
-    roles = [
-        "roles/logging.logWriter",
-        "roles/storage.objectViewer",
-        "roles/artifactregistry.writer",
-        "roles/serviceusage.serviceUsageConsumer",
-    ]
-    for role in roles:
-        _run([
-            "gcloud", "projects", "remove-iam-policy-binding", args.project_id,
-            f"--member=serviceAccount:{sa_email}",
-            f"--role={role}", "--quiet",
-        ], check=False)
-    _run([
-        "gcloud", "iam", "service-accounts", "delete", sa_email,
-        f"--project={args.project_id}", "--quiet",
-    ], check=False)
-
-    # NAT + Router
-    logging.info("Deleting NAT + Router...")
-    _run([
-        "gcloud", "compute", "routers", "nats", "delete", config["nat_name"],
-        f"--router={config['router_name']}",
-        f"--region={args.region}",
-        f"--project={args.project_id}", "--quiet",
-    ], check=False)
-    _run([
-        "gcloud", "compute", "routers", "delete", config["router_name"],
-        f"--region={args.region}",
-        f"--project={args.project_id}", "--quiet",
-    ], check=False)
-
-    # Firewall rules
-    logging.info("Deleting firewall rules...")
-    for suffix in ["allow-iap-ssh", "allow-internal"]:
-        _run([
-            "gcloud", "compute", "firewall-rules", "delete",
-            f"{config['vpc_name']}-{suffix}",
-            f"--project={args.project_id}", "--quiet",
-        ], check=False)
-
-    # Subnet + VPC
-    logging.info("Deleting subnet + VPC...")
-    _run([
-        "gcloud", "compute", "networks", "subnets", "delete",
-        config["subnet_name"],
-        f"--region={args.region}",
-        f"--project={args.project_id}", "--quiet",
-    ], check=False)
-    _run([
-        "gcloud", "compute", "networks", "delete", config["vpc_name"],
-        f"--project={args.project_id}", "--quiet",
-    ], check=False)
-
-    logging.info("=== Prerequisite Teardown Complete ===")
-
-
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-
-def parse_args():
-    p = argparse.ArgumentParser(
-        description="Prerequisite Setup for GKE Agentic Benchmarking",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    p.add_argument("--project_id", required=True, help="GCP project ID")
-    p.add_argument("--region", default="us-central1", help="GCP region (default: us-central1)")
-    p.add_argument("--zone", default="us-central1-a", help="GCP zone (default: us-central1-a)")
-    p.add_argument("--machine_type", default="c4-standard-8",
-                   help="Machine type for sandbox nodes (default: c4-standard-8)")
-    p.add_argument("--subnet_cidr", default="10.134.20.0/24",
-                   help="Subnet CIDR range (default: 10.134.20.0/24)")
-    p.add_argument("--skip_image_build", action="store_true", default=False,
-                   help="Skip container image builds")
-    p.add_argument("--teardown", action="store_true", default=False,
-                   help="Tear down prerequisite resources instead of creating them")
-    p.add_argument("--keep_images", action="store_true", default=False,
-                   help="Keep AR repos during teardown")
-    return p.parse_args()
-
-
-def main():
-    args = parse_args()
-    config = _derive_config(args)
-
-    print(f"\n{'='*60}")
-    print(f"Project:      {args.project_id}")
-    print(f"Region:       {args.region}")
-    print(f"Zone:         {args.zone}")
-    print(f"Machine Type: {args.machine_type}")
-    print(f"VPC:          {config['vpc_name']}")
-    print(f"Subnet:       {config['subnet_name']} ({config['subnet_cidr']})")
-    print(f"Mode:         {'TEARDOWN' if args.teardown else 'SETUP'}")
-    print(f"{'='*60}\n")
-
-    if args.teardown:
-        teardown(args, config)
-    else:
-        enable_apis(args)
-        create_vpc(args, config)
-        create_subnet(args, config)
-        create_firewall_rules(args, config)
-        create_router_and_nat(args, config)
-        create_artifact_registry(args, config)
-        create_cloud_build_sa(args, config)
-        build_images(args, config)
-
-        print(f"\n{'='*60}")
-        print("Prerequisite setup complete!")
-        print(f"{'='*60}")
-        print(f"\nPKB flags to reference this infrastructure:")
-        print(f"  --gce_network_name={config['vpc_name']}")
-        print(f"\nNext: Run PKB with container_cluster provisioning:")
-        print(f"  python pkb.py --benchmarks=gke_python_density \\")
-        print(f"      --gce_network_name={config['vpc_name']} \\")
-        print(f"      --zone={args.zone} \\")
-        print(f"      --gke_use_beta=true \\")
-        print(f"      --gke_additional_flags=\"--enable-pod-snapshots,...,--subnetwork={config['subnet_name']}\"")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
new file mode 100644
index 0000000000..9c45f02449
--- /dev/null
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+"""Prerequisite Setup for GKE Agentic Benchmarking.
+
+Creates infrastructure that PKB cannot manage natively:
+  - Enable required GCP APIs
+  - Create Artifact Registry repositories
+  - Create Cloud Build service account + IAM bindings
+
+Run ONCE before PKB provisioning:
+  python -m perfkitbenchmarker.linux_benchmarks.kubernetes.agentic.gke_prerequisites \
+      --project_id=<project> --region=<region>
+"""
+
+import argparse
+import logging
+import os
+import subprocess
+import time
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def _run(cmd, check=True, timeout=300):
+    logger.info("CMD: %s", " ".join(cmd))
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
+    if check and result.returncode != 0:
+        logger.error("Command failed (rc=%d): %s", result.returncode, result.stderr[-500:])
+        raise RuntimeError(f"Command failed: {cmd}")
+    return result
+
+
+def _exists(cmd):
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+    return result.returncode == 0
+
+
+def enable_apis(project_id):
+    logger.info("=== Enabling GCP APIs ===")
+    apis = [
+        "container.googleapis.com",
+        "artifactregistry.googleapis.com",
+        "cloudbuild.googleapis.com",
+        "aiplatform.googleapis.com",
+        "storage.googleapis.com",
+        "iam.googleapis.com",
+        "connectgateway.googleapis.com",
+        "gkehub.googleapis.com",
+        "gkeconnect.googleapis.com",
+        "iap.googleapis.com",
+    ]
+    _run(["gcloud", "services", "enable"] + apis + [f"--project={project_id}"])
+    logger.info("APIs enabled.")
+
+
+def create_artifact_registry(project_id, region):
+    logger.info("=== Creating Artifact Registry Repos ===")
+    for repo in ["adk-repo", "agent-sandbox"]:
+        if _exists(["gcloud", "artifacts", "repositories", "describe", repo,
+                    f"--location={region}", f"--project={project_id}"]):
+            logger.info("AR repo %s already exists.", repo)
+            continue
+        _run(["gcloud", "artifacts", "repositories", "create", repo,
+              "--repository-format=docker",
+              f"--location={region}", f"--project={project_id}"])
+        logger.info("AR repo %s created.", repo)
+
+
+def create_cloud_build_sa(project_id):
+    logger.info("=== Creating Cloud Build SA ===")
+    sa_name = "adk-cloud-build-sa"
+    sa_email = f"{sa_name}@{project_id}.iam.gserviceaccount.com"
+    if not _exists(["gcloud", "iam", "service-accounts", "describe",
+                    sa_email, f"--project={project_id}"]):
+        _run(["gcloud", "iam", "service-accounts", "create", sa_name,
+              f"--display-name={sa_name}", f"--project={project_id}"])
+        logger.info("SA %s created. Waiting for propagation...", sa_email)
+        time.sleep(10)
+    else:
+        logger.info("SA %s already exists.", sa_email)
+    roles = [
+        "roles/logging.logWriter",
+        "roles/storage.objectViewer",
+        "roles/artifactregistry.writer",
+        "roles/serviceusage.serviceUsageConsumer",
+    ]
+    for role in roles:
+        _run(["gcloud", "projects", "add-iam-policy-binding", project_id,
+              f"--member=serviceAccount:{sa_email}",
+              f"--role={role}", "--condition=None", "--quiet"], check=False)
+    logger.info("Cloud Build SA roles bound.")
+
+
+def main():
+    p = argparse.ArgumentParser(description="GKE Agentic Benchmark Prerequisites")
+    p.add_argument("--project_id", required=True, help="GCP project ID")
+    p.add_argument("--region", default="us-central1", help="GCP region")
+    args = p.parse_args()
+    enable_apis(args.project_id)
+    create_artifact_registry(args.project_id, args.region)
+    create_cloud_build_sa(args.project_id)
+    print("\nPrerequisite setup complete!")
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py
deleted file mode 100644
index 4792f5a543..0000000000
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py
+++ /dev/null
@@ -1,698 +0,0 @@
-"""Shared Provision/Teardown utilities for GKE Agent Sandbox benchmarks.
-
-Provides the full GKE infrastructure lifecycle (create and destroy) used
-by all seven UC benchmark scripts.  Each benchmark's Provision() and
-Teardown() functions delegate to the public functions in this module.
-
-Infrastructure created (in order):
-  1. VPC + Subnet
-  2. Firewall rules (IAP SSH, internal, laptop IP)
-  3. Cloud Router + NAT
-  4. GKE Cluster (DPv2, Workload Identity, optional Pod Snapshots)
-  5. Fleet registration / credential retrieval
-  6. gVisor sandbox node pool
-  7. Artifact Registry repositories
-  8. Cloud Build service account + IAM bindings
-  9. Container images (optional, gated by --gke_skip_image_build)
-
-Teardown respects two flags:
-  --gke_teardown_keep_images: skip AR repo deletion
-  --gke_teardown_keep_infra:  only delete K8s workloads, keep cluster/network
-"""
-
-import logging
-import subprocess
-import time
-
-from absl import flags
-
-FLAGS = flags.FLAGS
-
-# Image build utilities (Phase 3)
-# Imported after FLAGS to avoid circular dependency
-# The actual import is deferred to Provision() to allow flag registration order
-
-# ---------------------------------------------------------------------------
-# Provision/Teardown flags
-# ---------------------------------------------------------------------------
-
-flags.DEFINE_string(
-    "gke_project_id",
-    "",
-    "GCP project ID for the benchmark cluster. Required for Provision/Teardown.",
-)
-
-flags.DEFINE_string(
-    "gke_region",
-    "us-central1",
-    "GCP region for networking and Artifact Registry.",
-)
-
-flags.DEFINE_string(
-    "gke_zone",
-    "us-central1-a",
-    "GCP zone for the GKE cluster and node pools.",
-)
-
-flags.DEFINE_string(
-    "gke_sandbox_machine_type",
-    "c4-standard-8",
-    "Machine type for the gVisor sandbox node pool.",
-)
-
-flags.DEFINE_string(
-    "gke_cluster_suffix",
-    "",
-    "Cluster name suffix. If empty, derived from machine family (e.g. 'c4').",
-)
-
-flags.DEFINE_string(
-    "gke_gke_version",
-    "1.35.3-gke.1389000",
-    "GKE cluster version.",
-)
-
-flags.DEFINE_bool(
-    "gke_use_connect_gateway",
-    True,
-    "Use Connect Gateway for kubectl access instead of direct public endpoint.",
-)
-
-flags.DEFINE_bool(
-    "gke_enable_pod_snapshots",
-    True,
-    "Enable GKE Pod Snapshots (Preview feature, uses gcloud beta).",
-)
-
-flags.DEFINE_bool(
-    "gke_skip_image_build",
-    True,
-    "Skip container image builds during Provision. Set to False on first run.",
-)
-
-flags.DEFINE_integer(
-    "gke_sandbox_node_count",
-    1,
-    "Number of nodes in the gVisor sandbox node pool.",
-)
-
-flags.DEFINE_integer(
-    "gke_sandbox_disk_size",
-    100,
-    "Disk size in GB for sandbox node pool nodes.",
-)
-
-flags.DEFINE_integer(
-    "gke_sandbox_max_pods_per_node",
-    250,
-    "Max pods per node on the sandbox node pool.",
-)
-
-flags.DEFINE_string(
-    "gke_subnet_cidr",
-    "10.134.20.0/24",
-    "CIDR range for the benchmark subnet.",
-)
-
-flags.DEFINE_bool(
-    "gke_teardown_keep_images",
-    False,
-    "If True, skip Artifact Registry repo deletion during Teardown.",
-)
-
-flags.DEFINE_bool(
-    "gke_teardown_keep_infra",
-    False,
-    "If True, only delete K8s workloads during Teardown (keep cluster/network).",
-)
-
-
-# ---------------------------------------------------------------------------
-# Internal helpers
-# ---------------------------------------------------------------------------
-
-
-def _run(cmd, timeout=300, check=True):
-    """Run a shell command and return CompletedProcess.
-
-    Args:
-        cmd: List of command arguments.
-        timeout: Max seconds to wait.
-        check: If True, raise on non-zero exit.
-
-    Returns:
-        subprocess.CompletedProcess
-    """
-    logging.info("CMD: %s", " ".join(cmd))
-    proc = subprocess.run(
-        cmd, capture_output=True, text=True, timeout=timeout,
-    )
-    if proc.returncode != 0:
-        logging.warning("CMD stderr: %s", proc.stderr[-500:] if proc.stderr else "")
-        if check:
-            raise RuntimeError(
-                f"Command failed (rc={proc.returncode}): {' '.join(cmd[:6])}\n"
-                f"{proc.stderr[-300:]}"
-            )
-    return proc
-
-
-def _run_quiet(cmd, timeout=300):
-    """Run a command, suppress errors (idempotent checks)."""
-    return _run(cmd, timeout=timeout, check=False)
-
-
-def _resource_exists(cmd):
-    """Return True if a gcloud describe/get command succeeds."""
-    proc = _run_quiet(cmd)
-    return proc.returncode == 0
-
-
-def _derive_config():
-    """Derive computed configuration values from flags.
-
-    Returns:
-        dict with all computed names and settings.
-    """
-    project = FLAGS.gke_project_id
-    if not project:
-        raise RuntimeError("--gke_project_id is required for Provision/Teardown.")
-
-    region = FLAGS.gke_region
-    zone = FLAGS.gke_zone
-    machine_type = FLAGS.gke_sandbox_machine_type
-
-    # Derive machine family (e.g. "c4" from "c4-standard-8")
-    machine_family = machine_type.split("-")[0]
-
-    # Derive cluster suffix
-    cluster_suffix = FLAGS.gke_cluster_suffix
-    if not cluster_suffix:
-        if machine_family == "c3" and "metal" in machine_type:
-            cluster_suffix = "c3metal"
-        else:
-            cluster_suffix = machine_family
-
-    # Derive disk type
-    if machine_family == "c3":
-        disk_type = "pd-balanced"
-    else:
-        disk_type = "hyperdisk-balanced"
-
-    # Derive architecture
-    if machine_family == "c4a":
-        target_arch = "arm64"
-    else:
-        target_arch = "amd64"
-
-    # Derive master CIDR
-    master_cidr_map = {
-        "c4": "172.16.0.0/28",
-        "c4d": "172.16.0.16/28",
-        "c4a": "172.16.0.32/28",
-        "c3metal": "172.16.0.48/28",
-    }
-    master_cidr = master_cidr_map.get(cluster_suffix, "172.16.0.64/28")
-
-    # Use a prefix derived from project for naming
-    name_prefix = "pkb"
-
-    cluster_name = f"{name_prefix}-agentic-{cluster_suffix}"
-    vpc_name = f"{name_prefix}-agentic-vpc"
-    subnet_name = f"{name_prefix}-agentic-subnet"
-    router_name = f"{name_prefix}-agentic-nat-router"
-    nat_name = f"{name_prefix}-agentic-nat-config"
-    sandbox_pool_name = "agentic-sandbox-pool"
-    adk_repo_name = "adk-repo"
-    sandbox_repo_name = "agent-sandbox"
-    cloud_build_sa = "adk-cloud-build-sa"
-    cloud_build_sa_email = f"{cloud_build_sa}@{project}.iam.gserviceaccount.com"
-    namespace = FLAGS.gke_namespace
-
-    return {
-        "project": project,
-        "region": region,
-        "zone": zone,
-        "machine_type": machine_type,
-        "machine_family": machine_family,
-        "cluster_suffix": cluster_suffix,
-        "disk_type": disk_type,
-        "target_arch": target_arch,
-        "master_cidr": master_cidr,
-        "cluster_name": cluster_name,
-        "vpc_name": vpc_name,
-        "subnet_name": subnet_name,
-        "subnet_cidr": FLAGS.gke_subnet_cidr,
-        "router_name": router_name,
-        "nat_name": nat_name,
-        "sandbox_pool_name": sandbox_pool_name,
-        "adk_repo_name": adk_repo_name,
-        "sandbox_repo_name": sandbox_repo_name,
-        "cloud_build_sa": cloud_build_sa,
-        "cloud_build_sa_email": cloud_build_sa_email,
-        "namespace": namespace,
-        "gke_version": FLAGS.gke_gke_version,
-        "sandbox_node_count": FLAGS.gke_sandbox_node_count,
-        "sandbox_disk_size": FLAGS.gke_sandbox_disk_size,
-        "sandbox_max_pods": FLAGS.gke_sandbox_max_pods_per_node,
-        "use_connect_gateway": FLAGS.gke_use_connect_gateway,
-        "enable_pod_snapshots": FLAGS.gke_enable_pod_snapshots,
-        "sandbox_version": FLAGS.gke_sandbox_version,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Provision steps
-# ---------------------------------------------------------------------------
-
-
-def _enable_apis(cfg):
-    """Enable required GCP services."""
-    logging.info("Enabling required GCP APIs...")
-    apis = [
-        "iap.googleapis.com",
-        "container.googleapis.com",
-        "artifactregistry.googleapis.com",
-        "cloudbuild.googleapis.com",
-        "aiplatform.googleapis.com",
-        "storage.googleapis.com",
-        "iam.googleapis.com",
-        "connectgateway.googleapis.com",
-        "gkehub.googleapis.com",
-        "gkeconnect.googleapis.com",
-    ]
-    _run(["gcloud", "services", "enable"] + apis + [f"--project={cfg['project']}"],
-         timeout=120)
-
-
-def _create_network(cfg):
-    """Create VPC, subnet, firewall rules, Cloud Router, and NAT."""
-    project = cfg["project"]
-    region = cfg["region"]
-    vpc = cfg["vpc_name"]
-    subnet = cfg["subnet_name"]
-    cidr = cfg["subnet_cidr"]
-    router = cfg["router_name"]
-    nat = cfg["nat_name"]
-
-    # VPC
-    if not _resource_exists(["gcloud", "compute", "networks", "describe", vpc,
-                             f"--project={project}"]):
-        logging.info("Creating VPC %s...", vpc)
-        _run(["gcloud", "compute", "networks", "create", vpc,
-              "--subnet-mode=custom", f"--project={project}"])
-
-    # Subnet
-    if not _resource_exists(["gcloud", "compute", "networks", "subnets", "describe",
-                             subnet, f"--region={region}", f"--project={project}"]):
-        logging.info("Creating subnet %s...", subnet)
-        _run(["gcloud", "compute", "networks", "subnets", "create", subnet,
-              f"--network={vpc}", f"--region={region}",
-              f"--range={cidr}", f"--project={project}"])
-
-    # Firewall: IAP SSH
-    fw_iap = f"{vpc}-allow-iap-ssh"
-    if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe",
-                             fw_iap, f"--project={project}"]):
-        logging.info("Creating firewall rule %s...", fw_iap)
-        _run(["gcloud", "compute", "firewall-rules", "create", fw_iap,
-              f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW",
-              "--rules=tcp:22", "--source-ranges=35.235.240.0/20",
-              "--priority=1000", f"--project={project}"])
-
-    # Firewall: internal
-    fw_int = f"{vpc}-allow-internal"
-    if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe",
-                             fw_int, f"--project={project}"]):
-        logging.info("Creating firewall rule %s...", fw_int)
-        _run(["gcloud", "compute", "firewall-rules", "create", fw_int,
-              f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW",
-              "--rules=tcp,udp,icmp", f"--source-ranges={cidr}",
-              "--priority=1000", f"--project={project}"])
-
-    # Cloud Router
-    if not _resource_exists(["gcloud", "compute", "routers", "describe", router,
-                             f"--region={region}", f"--project={project}"]):
-        logging.info("Creating Cloud Router %s...", router)
-        _run(["gcloud", "compute", "routers", "create", router,
-              f"--network={vpc}", f"--region={region}", f"--project={project}"])
-
-    # Cloud NAT
-    if not _resource_exists(["gcloud", "compute", "routers", "nats", "describe", nat,
-                             f"--router={router}", f"--region={region}",
-                             f"--project={project}"]):
-        logging.info("Creating Cloud NAT %s...", nat)
-        _run(["gcloud", "compute", "routers", "nats", "create", nat,
-              f"--router={router}", f"--region={region}",
-              "--nat-all-subnet-ip-ranges", "--auto-allocate-nat-external-ips",
-              f"--project={project}"])
-
-
-def _create_cluster(cfg):
-    """Create the GKE cluster with DPv2 and Workload Identity."""
-    project = cfg["project"]
-    zone = cfg["zone"]
-    cluster = cfg["cluster_name"]
-
-    if _resource_exists(["gcloud", "container", "clusters", "describe", cluster,
-                         f"--zone={zone}", f"--project={project}"]):
-        logging.info("GKE cluster %s already exists.", cluster)
-        return
-
-    logging.info("Creating GKE cluster %s...", cluster)
-
-    if cfg["enable_pod_snapshots"]:
-        snapshot_flag = ["--enable-pod-snapshots"]
-        logging.info("Pod Snapshots ENABLED (using gcloud beta).")
-        cmd = ["gcloud", "beta", "container", "clusters", "create", cluster]
-    else:
-        snapshot_flag = []
-        cmd = ["gcloud", "container", "clusters", "create", cluster]
-
-    cmd += [
-        f"--zone={zone}",
-        f"--network={cfg['vpc_name']}",
-        f"--subnetwork={cfg['subnet_name']}",
-        "--enable-private-nodes",
-        "--enable-ip-alias",
-        f"--master-ipv4-cidr={cfg['master_cidr']}",
-        f"--cluster-version={cfg['gke_version']}",
-        "--no-enable-shielded-nodes",
-        "--num-nodes=1",
-        f"--machine-type={cfg['machine_type']}",
-        f"--disk-type={cfg['disk_type']}",
-        "--disk-size=50",
-        "--enable-dataplane-v2",
-        f"--workload-pool={project}.svc.id.goog",
-        "--release-channel=None",
-        f"--project={project}",
-    ] + snapshot_flag
-
-    _run(cmd, timeout=600)
-    logging.info("GKE cluster %s created.", cluster)
-
-
-def _get_credentials(cfg):
-    """Register to fleet and get kubectl credentials."""
-    project = cfg["project"]
-    zone = cfg["zone"]
-    cluster = cfg["cluster_name"]
-
-    if cfg["use_connect_gateway"]:
-        # Register to fleet
-        if not _resource_exists(["gcloud", "container", "fleet", "memberships",
-                                 "describe", cluster, f"--project={project}"]):
-            logging.info("Registering cluster %s to fleet...", cluster)
-            _run(["gcloud", "container", "fleet", "memberships", "register", cluster,
-                  f"--gke-cluster={zone}/{cluster}",
-                  "--enable-workload-identity",
-                  f"--project={project}"], timeout=120)
-
-        logging.info("Getting credentials via Connect Gateway...")
-        _run(["gcloud", "container", "fleet", "memberships", "get-credentials",
-              cluster, f"--project={project}"], timeout=60)
-    else:
-        logging.info("Getting credentials (direct endpoint)...")
-        _run(["gcloud", "container", "clusters", "get-credentials", cluster,
-              f"--zone={zone}", f"--project={project}"], timeout=60)
-
-
-def _create_sandbox_node_pool(cfg):
-    """Create the gVisor-enabled sandbox node pool."""
-    project = cfg["project"]
-    zone = cfg["zone"]
-    cluster = cfg["cluster_name"]
-    pool_name = cfg["sandbox_pool_name"]
-
-    if _resource_exists(["gcloud", "container", "node-pools", "describe", pool_name,
-                         f"--cluster={cluster}", f"--zone={zone}",
-                         f"--project={project}"]):
-        logging.info("Sandbox node pool %s already exists.", pool_name)
-        return
-
-    logging.info("Creating sandbox node pool %s with gVisor...", pool_name)
-    cmd = [
-        "gcloud", "container", "node-pools", "create", pool_name,
-        f"--cluster={cluster}",
-        f"--zone={zone}",
-        f"--project={project}",
-        f"--machine-type={cfg['machine_type']}",
-        f"--num-nodes={cfg['sandbox_node_count']}",
-        f"--disk-type={cfg['disk_type']}",
-        f"--disk-size={cfg['sandbox_disk_size']}",
-        f"--max-pods-per-node={cfg['sandbox_max_pods']}",
-        "--node-labels=dedicated=agentic-sandbox",
-        "--node-taints=dedicated=agentic-sandbox:NoSchedule",
-        "--workload-metadata=GKE_METADATA",
-        "--sandbox", "type=gvisor",
-    ]
-    _run(cmd, timeout=600)
-    logging.info("Sandbox node pool %s created.", pool_name)
-
-
-def _create_artifact_registry(cfg):
-    """Create Artifact Registry repositories."""
-    project = cfg["project"]
-    region = cfg["region"]
-
-    for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]):
-        logging.info("Ensuring AR repo %s exists...", repo_name)
-        _run_quiet([
-            "gcloud", "artifacts", "repositories", "create", repo_name,
-            "--repository-format=docker",
-            f"--location={region}",
-            f"--project={project}",
-        ])
-
-
-def _create_cloud_build_sa(cfg):
-    """Create Cloud Build service account and bind IAM roles."""
-    project = cfg["project"]
-    sa_email = cfg["cloud_build_sa_email"]
-    sa_name = cfg["cloud_build_sa"]
-
-    # Create SA if not exists
-    if not _resource_exists(["gcloud", "iam", "service-accounts", "describe",
-                             sa_email, f"--project={project}"]):
-        logging.info("Creating Cloud Build SA %s...", sa_email)
-        _run(["gcloud", "iam", "service-accounts", "create", sa_name,
-              f"--display-name={sa_name}", f"--project={project}"])
-        # Wait for propagation
-        time.sleep(10)
-
-    roles = [
-        "roles/logging.logWriter",
-        "roles/storage.objectViewer",
-        "roles/artifactregistry.writer",
-        "roles/serviceusage.serviceUsageConsumer",
-    ]
-    for role in roles:
-        _run_quiet([
-            "gcloud", "projects", "add-iam-policy-binding", project,
-            f"--member=serviceAccount:{sa_email}",
-            f"--role={role}",
-            "--condition=None", "--quiet",
-        ])
-    logging.info("Cloud Build SA ready.")
-
-
-# ---------------------------------------------------------------------------
-# Teardown steps
-# ---------------------------------------------------------------------------
-
-
-def _teardown_workloads(cfg):
-    """Delete K8s workloads, CRDs, and namespace."""
-    ns = cfg["namespace"]
-    version = cfg["sandbox_version"]
-
-    logging.info("Deleting namespace %s...", ns)
-    _run_quiet(["kubectl", "delete", "namespace", ns,
-                "--ignore-not-found=true", "--timeout=120s"])
-
-    logging.info("Removing Agent Sandbox CRDs...")
-    _run_quiet(["kubectl", "delete", "-f",
-                f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/extensions.yaml",
-                "--ignore-not-found=true"])
-    _run_quiet(["kubectl", "delete", "-f",
-                f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/manifest.yaml",
-                "--ignore-not-found=true"])
-
-    logging.info("Removing cluster-scoped RBAC...")
-    _run_quiet(["kubectl", "delete", "clusterrolebinding",
-                "adk-agent-sandbox-binding", "--ignore-not-found=true"])
-    _run_quiet(["kubectl", "delete", "clusterrole",
-                "adk-agent-sandbox-role", "--ignore-not-found=true"])
-
-
-def _teardown_images(cfg):
-    """Delete Artifact Registry repositories."""
-    project = cfg["project"]
-    region = cfg["region"]
-
-    for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]):
-        logging.info("Deleting AR repo %s...", repo_name)
-        _run_quiet(["gcloud", "artifacts", "repositories", "delete", repo_name,
-                    f"--location={region}", f"--project={project}", "--quiet"])
-
-
-def _teardown_cloud_build_sa(cfg):
-    """Delete Cloud Build service account and IAM bindings."""
-    project = cfg["project"]
-    sa_email = cfg["cloud_build_sa_email"]
-
-    roles = [
-        "roles/logging.logWriter",
-        "roles/storage.objectViewer",
-        "roles/artifactregistry.writer",
-        "roles/serviceusage.serviceUsageConsumer",
-    ]
-    for role in roles:
-        _run_quiet([
-            "gcloud", "projects", "remove-iam-policy-binding", project,
-            f"--member=serviceAccount:{sa_email}",
-            f"--role={role}", "--quiet",
-        ])
-
-    _run_quiet(["gcloud", "iam", "service-accounts", "delete", sa_email,
-                f"--project={project}", "--quiet"])
-    logging.info("Cloud Build SA deleted.")
-
-
-def _teardown_cluster(cfg):
-    """Delete GKE node pools and cluster."""
-    project = cfg["project"]
-    zone = cfg["zone"]
-    cluster = cfg["cluster_name"]
-    pool_name = cfg["sandbox_pool_name"]
-
-    logging.info("Deleting sandbox node pool %s...", pool_name)
-    _run_quiet(["gcloud", "container", "node-pools", "delete", pool_name,
-                f"--cluster={cluster}", f"--zone={zone}",
-                f"--project={project}", "--quiet"])
-
-    logging.info("Deleting GKE cluster %s...", cluster)
-    _run_quiet(["gcloud", "container", "clusters", "delete", cluster,
-                f"--zone={zone}", f"--project={project}", "--quiet"])
-
-
-def _teardown_network(cfg):
-    """Delete network resources in reverse dependency order."""
-    project = cfg["project"]
-    region = cfg["region"]
-    vpc = cfg["vpc_name"]
-    router = cfg["router_name"]
-    nat = cfg["nat_name"]
-    subnet = cfg["subnet_name"]
-
-    logging.info("Deleting Cloud NAT and Router...")
-    _run_quiet(["gcloud", "compute", "routers", "nats", "delete", nat,
-                f"--router={router}", f"--region={region}",
-                f"--project={project}", "--quiet"])
-    _run_quiet(["gcloud", "compute", "routers", "delete", router,
-                f"--region={region}", f"--project={project}", "--quiet"])
-
-    logging.info("Deleting firewall rules...")
-    for suffix in ("allow-iap-ssh", "allow-internal"):
-        _run_quiet(["gcloud", "compute", "firewall-rules", "delete",
-                    f"{vpc}-{suffix}", f"--project={project}", "--quiet"])
-
-    logging.info("Deleting subnet and VPC...")
-    _run_quiet(["gcloud", "compute", "networks", "subnets", "delete", subnet,
-                f"--region={region}", f"--project={project}", "--quiet"])
-    _run_quiet(["gcloud", "compute", "networks", "delete", vpc,
-                f"--project={project}", "--quiet"])
-
-
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
-
-
-flags.DEFINE_enum(
-    "gke_provision_mode",
-    "custom",
-    ["custom", "native"],
-    "Provisioning mode: 'custom' uses direct gcloud calls (Phase 1 logic), "
-    "'native' uses PKB's container_cluster with prerequisite_setup.py.",
-)
-
-def Provision():
-    """Provision GKE infrastructure.
-
-    Mode is controlled by --gke_provision_mode:
-      - custom: Direct gcloud calls (full control, no PKB cluster management)
-      - native: PKB manages cluster via container_cluster spec.
-                Requires prerequisite_setup.py to have been run first.
-    """
-    mode = FLAGS.gke_provision_mode
-    if mode == "native":
-        logging.info(
-            "Provision mode=native: PKB manages cluster via container_cluster. "
-            "Ensure prerequisite_setup.py was run first (VPC, NAT, AR, images)."
-        )
-        return  # PKB handles cluster creation via container_cluster spec
-
-    logging.info("Provision mode=custom: using direct gcloud calls.")
-    cfg = _derive_config()
-
-    logging.info("=== Provision: project=%s cluster=%s machine=%s ===",
-                 cfg["project"], cfg["cluster_name"], cfg["machine_type"])
-
-    _enable_apis(cfg)
-    _create_network(cfg)
-    _create_cluster(cfg)
-    _get_credentials(cfg)
-    _create_sandbox_node_pool(cfg)
-    _create_artifact_registry(cfg)
-    _create_cloud_build_sa(cfg)
-
-    # --- Phase 3: Build container images ---
-    if not FLAGS.gke_skip_image_build:
-        from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils
-        gke_image_build_utils.BuildImages()
-    else:
-        logging.info("Skipping image builds (--gke_skip_image_build=true)")
-
-    logging.info("=== Provision complete: %s ===", cfg["cluster_name"])
-
-
-def Teardown():
-    """Teardown GKE infrastructure.
-
-    Mode is controlled by --gke_provision_mode:
-      - custom: Direct gcloud calls to delete all resources.
-      - native: PKB manages cluster deletion. Run prerequisite_setup.py --teardown
-                separately to clean up VPC/NAT/AR.
-    """
-    mode = FLAGS.gke_provision_mode
-    if mode == "native":
-        logging.info(
-            "Teardown mode=native: PKB manages cluster deletion. "
-            "Run prerequisite_setup.py --teardown to clean up VPC/NAT/AR."
-        )
-        return  # PKB handles cluster deletion
-
-    logging.info("Teardown mode=custom: using direct gcloud calls.")
-    cfg = _derive_config()
-
-    logging.info("=== Teardown: project=%s cluster=%s ===",
-                 cfg["project"], cfg["cluster_name"])
-    logging.info("  keep_images=%s  keep_infra=%s",
-                 FLAGS.gke_teardown_keep_images,
-                 FLAGS.gke_teardown_keep_infra)
-
-    # Always delete workloads
-    _teardown_workloads(cfg)
-
-    # Conditionally delete images
-    if not FLAGS.gke_teardown_keep_images:
-        _teardown_images(cfg)
-
-    # Conditionally delete infrastructure
-    if not FLAGS.gke_teardown_keep_infra:
-        _teardown_cloud_build_sa(cfg)
-        _teardown_cluster(cfg)
-        _teardown_network(cfg)
-
-    logging.info("=== Teardown complete ===")
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
index 157bd2559e..e323be4d31 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
@@ -1,4 +1,4 @@
-"""PKB Benchmark: GKE Agent Python Sandbox Density (Use Case B).
+"""PKB Benchmark: GKE Agent Python Sandbox Density .
 
 Atomic single-point measurement of Python sandbox density on a
 pre-provisioned GKE cluster with gVisor isolation. Measures Code Execution
@@ -6,17 +6,25 @@
 per-type latency breakdown (compute, syscall, import) at a given
 concurrent session count.
 
+Workflow per session:
+  1. Claim a pre-warmed sandbox pod from the SandboxWarmPool
+  2. Upload and execute the benchmark script inside the gVisor sandbox
+  3. Run `sample_warmup` iterations (results discarded - stabilizes caches)
+  4. Run `sample_count` measured iterations (results recorded)
+  5. Report TTFE, per-iteration CEL, RSS, and per-task-type breakdown
+  6. Release the sandbox claim
+
 This benchmark is designed to be invoked repeatedly by an external sweep
 controller that varies the density parameter across iterations to find
 the saturation point.
 
 Usage:
   python pkb.py --benchmarks=gke_python_density \\
-                --gke_python_density=16 \\
+                --gke_python_density_concurrent_sandbox_count=16 \\
                 --gke_python_density_sample_count=20 \\
                 --gke_python_density_sample_warmup=0 \\
-                --gke_namespace=agentic \\
-                --gke_api_url=http://localhost:8080
+                --k8s_namespace=agentic \\
+                --k8s_agent_api_url=http://localhost:8080
 
 Samples emitted (per run):
   - gke_python_density_orchestrator_cel_mean       (ms)
@@ -52,7 +60,6 @@
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
 )
-from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
 
 FLAGS = flags.FLAGS
 
@@ -72,7 +79,7 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_integer(
-    "gke_python_density",
+    "gke_python_density_concurrent_sandbox_count",
     1,
     "Number of concurrent sandbox sessions to run.",
 )
@@ -86,7 +93,11 @@
 flags.DEFINE_integer(
     "gke_python_density_sample_warmup",
     0,
-    "Number of warmup iterations per session (excluded from stats).",
+    "Number of warmup iterations per session (excluded from stats). "
+    "Warmup iterations execute the same benchmark tasks as measured "
+    "iterations but their latency results are discarded. This allows "
+    "JIT compilation, caches, and gVisor page faults to stabilize "
+    "before measurement begins.",
 )
 
 flags.DEFINE_bool(
@@ -107,11 +118,6 @@
 # ---------------------------------------------------------------------------
 
 
-def Provision(benchmark_spec):
-    """Provision GKE cluster and all dependencies."""
-    gke_provision_utils.Provision()
-
-
 def GetConfig(user_config):
     """Load and return benchmark config.
 
@@ -123,7 +129,7 @@ def GetConfig(user_config):
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
     logging.info("=== Prepare: deploying workloads ===")
-    deploy_utils.DeployWorkloads()
+    deploy_utils.DeployWorkloads(benchmark_spec)
     utils.CheckAgentHealthz(required=False)
     utils.EnsurePortForward()
     logging.info("Prepare complete.")
@@ -135,8 +141,10 @@ def Run(benchmark_spec):
     Returns:
       List of sample.Sample objects.
     """
-    ns = FLAGS.gke_namespace
-    density = FLAGS.gke_python_density
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
+    density = FLAGS.gke_python_density_concurrent_sandbox_count
 
     logging.info("=== Run: density=%d ===", density)
 
@@ -323,7 +331,7 @@ def Run(benchmark_spec):
 
 def Cleanup(benchmark_spec):
     """Clean up after measurement. Scale warm pool to 0."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     logging.info("Cleanup: draining warm pool.")
 
     if FLAGS.gke_python_density_patch_warmpool:
@@ -337,18 +345,25 @@ def Cleanup(benchmark_spec):
     logging.info("Cleanup complete (cluster persists).")
 
 
-def Teardown(benchmark_spec):
-    """Teardown GKE cluster and all dependencies."""
-    gke_provision_utils.Teardown()
-
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
 
 def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra):
-    """Emit a sample if the key exists in the aggregate dict."""
+    """Emit a sample if the key exists in the aggregate dict.
+
+    Args:
+        samples: List to append the new sample.Sample to.
+        agg: Aggregate metrics dict returned by the agent API response.
+        agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms").
+        metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric
+            name (e.g. "orchestrator_cel_mean").
+        unit: Unit string for the sample (e.g. "ms", "MB", "seconds").
+        namespace: Kubernetes namespace (included in sample metadata).
+        extra: Dict of additional metadata key-value pairs attached to
+            every sample (density, session counts, wall time, etc.).
+    """
     value = agg.get(agg_key)
     if value is not None:
         samples.append(
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
index f638494508..2146489752 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
@@ -1,4 +1,4 @@
-"""PKB Benchmark: GKE Agent QPS Saturation (Use Case F).
+"""PKB Benchmark: GKE Agent QPS Saturation .
 
 Atomic single-point measurement of scheduling throughput on a pre-provisioned
 GKE cluster.  Fires sandbox claim requests at a controlled QPS rate for a
@@ -19,8 +19,8 @@
                 --gke_qps_pool_size=70 \\
                 --gke_qps_step_duration_s=30.0 \\
                 --gke_qps_mode=agent \\
-                --gke_namespace=agentic \\
-                --gke_api_url=http://localhost:8080
+                --k8s_namespace=agentic \\
+                --k8s_agent_api_url=http://localhost:8080
 
   # Raw claim mode
   python pkb.py --benchmarks=gke_qps \\
@@ -29,7 +29,7 @@
                 --gke_qps_step_duration_s=30.0 \\
                 --gke_qps_mode=raw_claim \\
                 --gke_qps_claim_timeout_s=60.0 \\
-                --gke_namespace=agentic
+                --k8s_namespace=agentic
 
 Samples emitted (per run):
   - gke_qps_ttfe_mean                (ms)
@@ -51,21 +51,22 @@
 """
 
 import json
+import os
 import logging
-import subprocess
 import threading
 import time
 import uuid
 
 from absl import flags
 from perfkitbenchmarker import configs
+from perfkitbenchmarker import data
+from perfkitbenchmarker.resources.container_service import kubectl
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
 )
-from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
 
 FLAGS = flags.FLAGS
 
@@ -135,11 +136,6 @@
 # ---------------------------------------------------------------------------
 
 
-def Provision(benchmark_spec):
-    """Provision GKE cluster and all dependencies."""
-    gke_provision_utils.Provision()
-
-
 def GetConfig(user_config):
     """Load and return benchmark config.
 
@@ -151,7 +147,7 @@ def GetConfig(user_config):
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
     logging.info("=== Prepare: deploying workloads ===")
-    deploy_utils.DeployWorkloads()
+    deploy_utils.DeployWorkloads(benchmark_spec)
 
     mode = FLAGS.gke_qps_mode
     if mode == "agent":
@@ -166,7 +162,9 @@ def Run(benchmark_spec):
     Returns:
       List of sample.Sample objects.
     """
-    ns = FLAGS.gke_namespace
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
     pool_size = FLAGS.gke_qps_pool_size
 
     # Scale warm pool (moved from Prepare for sweep compatibility)
@@ -188,7 +186,7 @@ def Run(benchmark_spec):
 
 def Cleanup(benchmark_spec):
     """Delete benchmark claims and drain warm pool."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     logging.info("Cleanup: deleting benchmark claims and draining warm pool.")
 
     # Delete any lingering benchmark claims
@@ -205,11 +203,6 @@ def Cleanup(benchmark_spec):
     logging.info("Cleanup complete.")
 
 
-def Teardown(benchmark_spec):
-    """Teardown GKE cluster and all dependencies."""
-    gke_provision_utils.Teardown()
-
-
 # ---------------------------------------------------------------------------
 # Agent mode
 # ---------------------------------------------------------------------------
@@ -217,7 +210,7 @@ def Teardown(benchmark_spec):
 
 def _RunAgent(benchmark_spec):
     """Fire QPS burst via the orchestrator API."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     target_qps = FLAGS.gke_qps_target_qps
     pool_size = FLAGS.gke_qps_pool_size
     step_duration = FLAGS.gke_qps_step_duration_s
@@ -384,7 +377,7 @@ def _RunAgent(benchmark_spec):
 
 def _RunRawClaim(benchmark_spec):
     """Fire SandboxClaims directly at target_qps (no agent)."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     target_qps = FLAGS.gke_qps_target_qps
     pool_size = FLAGS.gke_qps_pool_size
     step_duration = FLAGS.gke_qps_step_duration_s
@@ -667,21 +660,30 @@ def _CreateClaim(namespace, template, claim_name):
                 "labels": {"created-by": "pkb-qps-benchmark"},
             },
             "spec": {
-                "sandboxTemplateName": template,
+                "sandboxTemplateRef": {"name": template},
             },
         }
     )
-    proc = subprocess.run(
-        ["kubectl", "apply", "-n", namespace, "-f", "-"],
-        input=manifest,
-        capture_output=True,
-        text=True,
-        timeout=30,
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
     )
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"qps-claim-{claim_name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
     t_create = time.time()
-    if proc.returncode != 0:
+    if retcode != 0:
         raise RuntimeError(
-            f"Failed to create claim {claim_name}: {proc.stderr.strip()}"
+            f"Failed to create claim {claim_name}: {stderr.strip()}"
         )
     return t_create
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
index 4cfba5d5d0..44d21fcc84 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
@@ -1,4 +1,4 @@
-"""PKB Benchmark: GKE Agent Pod Snapshot Saturation (Use Case A).
+"""PKB Benchmark: GKE Agent Pod Snapshot Saturation .
 
 Atomic single-point measurement of GKE Pod Snapshot create/restore latency
 on a pre-provisioned GKE cluster with gVisor isolation.  Measures snapshot
@@ -13,7 +13,7 @@
   python pkb.py --benchmarks=gke_snapshot \\
                 --gke_snapshot_preload_mb=50 \\
                 --gke_snapshot_burst_size=3 \\
-                --gke_namespace=agentic \\
+                --k8s_namespace=agentic \\
                 --gke_snapshot_skip_snapshot=false
 
 Samples emitted (per run):
@@ -35,12 +35,15 @@
 import logging
 import os
 import re
-import subprocess
 import time
 from concurrent.futures import ThreadPoolExecutor
 
+from jinja2 import Template
+
 from absl import flags
 from perfkitbenchmarker import configs
+from perfkitbenchmarker import data
+from perfkitbenchmarker.resources.container_service import kubectl
 from perfkitbenchmarker import sample
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_benchmark_utils as utils,
@@ -48,7 +51,6 @@
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
 )
-from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
 
 FLAGS = flags.FLAGS
 
@@ -107,11 +109,6 @@
 # ---------------------------------------------------------------------------
 
 
-def Provision(benchmark_spec):
-    """Provision GKE cluster and all dependencies."""
-    gke_provision_utils.Provision()
-
-
 def GetConfig(user_config):
     """Load and return benchmark config.
 
@@ -122,7 +119,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads, snapshot infra, and validate readiness."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     preload_mb = FLAGS.gke_snapshot_preload_mb
 
     logging.info(
@@ -132,7 +129,7 @@ def Prepare(benchmark_spec):
     )
 
     # Deploy Agent Sandbox ecosystem (idempotent)
-    deploy_utils.DeployWorkloads()
+    deploy_utils.DeployWorkloads(benchmark_spec)
 
     # Deploy Pod Snapshot infrastructure (idempotent)
     deploy_utils.DeploySnapshots()
@@ -189,7 +186,9 @@ def Run(benchmark_spec):
     Returns:
       List of sample.Sample objects.
     """
-    ns = FLAGS.gke_namespace
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
     preload_mb = FLAGS.gke_snapshot_preload_mb
     burst_size = FLAGS.gke_snapshot_burst_size
     skip_snapshot = FLAGS.gke_snapshot_skip_snapshot
@@ -284,7 +283,7 @@ def Run(benchmark_spec):
 
 def Cleanup(benchmark_spec):
     """Clean up any leftover benchmark resources."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     logging.info("Cleanup — deleting any leftover snapshot-benchmark resources.")
 
     for kind in (
@@ -310,11 +309,6 @@ def Cleanup(benchmark_spec):
     logging.info("Cleanup complete.")
 
 
-def Teardown(benchmark_spec):
-    """Teardown GKE cluster and all dependencies."""
-    gke_provision_utils.Teardown()
-
-
 # ---------------------------------------------------------------------------
 # Core snapshot/restore logic
 # ---------------------------------------------------------------------------
@@ -592,15 +586,24 @@ def _ApplyClaim(name, namespace, template_name):
             "spec": {"sandboxTemplateRef": {"name": template_name}},
         }
     )
-    proc = subprocess.run(
-        ["kubectl", "apply", "-f", "-"],
-        input=manifest,
-        capture_output=True,
-        text=True,
-        timeout=30,
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
     )
-    if proc.returncode != 0:
-        raise RuntimeError(f"Failed to create SandboxClaim {name}: {proc.stderr}")
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-claim-{name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        raise RuntimeError(f"Failed to create SandboxClaim {name}: {stderr}")
 
 
 def _RenderAndApplyTemplate(
@@ -611,7 +614,7 @@ def _RenderAndApplyTemplate(
     preload_mb,
     preload_mode,
 ):
-    """Render the .yaml.template with step-specific values and kubectl apply."""
+    """Render the Jinja2 template with step-specific values and kubectl apply."""
     if preload_mode.startswith("script:"):
         return _RenderAndApplyScriptTemplate(
             template_name,
@@ -626,50 +629,44 @@ def _RenderAndApplyTemplate(
 
     memory_mi = max(512, preload_mb + 256)
 
-    rendered = (
-        content.replace("$AGENTIC_NAMESPACE", namespace)
-        .replace("$SNAPSHOT_KSA_NAME", ksa_name)
-        .replace("$SNAPSHOT_PRELOAD_MB", str(preload_mb))
-    )
-    rendered = rendered.replace(
-        "name: snapshot-benchmark-template",
-        f"name: {template_name}",
-    )
-    rendered = rendered.replace(
-        'memory: "512Mi"',
-        f'memory: "{memory_mi}Mi"',
+    tmpl = Template(content)
+    rendered = tmpl.render(
+        template_name=template_name,
+        namespace=namespace,
+        ksa_name=ksa_name,
+        preload_mb=preload_mb,
+        memory_mi=memory_mi,
     )
 
-    proc = subprocess.run(
-        ["kubectl", "apply", "-f", "-"],
-        input=rendered,
-        capture_output=True,
-        text=True,
-        timeout=30,
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
     )
-    if proc.returncode != 0:
-        logging.warning("kubectl apply stderr: %s", proc.stderr)
-    return proc.returncode == 0
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-template-{template_name}.yaml")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(rendered)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        logging.warning("kubectl apply stderr: %s", stderr)
+    return retcode == 0
 
 
 def _get_sandbox_node_selector():
-    """Return the correct nodeSelector based on provisioning mode."""
-    try:
-        mode = FLAGS.gke_provision_mode
-    except AttributeError:
-        mode = "custom"
-    if mode == "native":
-        return {"pkb_nodepool": "sandbox"}
-    return {"dedicated": "agentic-sandbox"}
+    """Return the nodeSelector for sandbox pods."""
+    return {"pkb_nodepool": "sandbox"}
 
 
 def _get_sandbox_tolerations():
-    """Return the correct tolerations based on provisioning mode."""
-    try:
-        mode = FLAGS.gke_provision_mode
-    except AttributeError:
-        mode = "custom"
-    tolerations = [
+    """Return tolerations for sandbox pods."""
+    return [
         {
             "key": "sandbox.gke.io/runtime",
             "operator": "Equal",
@@ -677,17 +674,6 @@ def _get_sandbox_tolerations():
             "effect": "NoSchedule",
         },
     ]
-    if mode != "native":
-        tolerations.insert(
-            0,
-            {
-                "key": "dedicated",
-                "operator": "Equal",
-                "value": "agentic-sandbox",
-                "effect": "NoSchedule",
-            },
-        )
-    return tolerations
 
 
 def _RenderAndApplyScriptTemplate(
@@ -725,7 +711,7 @@ def _RenderAndApplyScriptTemplate(
         "done\n"
     )
 
-    manifest = {
+    manifest = json.dumps({
         "apiVersion": "extensions.agents.x-k8s.io/v1alpha1",
         "kind": "SandboxTemplate",
         "metadata": {
@@ -762,18 +748,27 @@ def _RenderAndApplyScriptTemplate(
                 },
             }
         },
-    }
+    })
 
-    proc = subprocess.run(
-        ["kubectl", "apply", "-f", "-"],
-        input=json.dumps(manifest),
-        capture_output=True,
-        text=True,
-        timeout=30,
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
     )
-    if proc.returncode != 0:
-        logging.warning("kubectl apply stderr: %s", proc.stderr)
-    return proc.returncode == 0
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-script-template-{template_name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        logging.warning("kubectl apply stderr: %s", stderr)
+    return retcode == 0
 
 
 def _MeasureSingleSource(name, namespace, t0, pod_timeout, preload_mode):
@@ -861,15 +856,24 @@ def _TriggerAndWaitSnapshot(trigger_name, target_pod, namespace, t0, timeout_s=3
             "spec": {"targetPod": target_pod},
         }
     )
-    proc = subprocess.run(
-        ["kubectl", "apply", "-f", "-"],
-        input=manifest,
-        capture_output=True,
-        text=True,
-        timeout=30,
+    tmp_dir = os.path.join(
+        data.ResourcePath("k8s_agents/manifests"), "tmp"
     )
-    if proc.returncode != 0:
-        result["error"] = f"Failed to create trigger: {proc.stderr}"
+    os.makedirs(tmp_dir, exist_ok=True)
+    tmp_path = os.path.join(tmp_dir, f"snap-trigger-{trigger_name}.json")
+    try:
+        with open(tmp_path, "w") as f:
+            f.write(manifest)
+        stdout, stderr, retcode = kubectl.RunKubectlCommand(
+            ["apply", "-f", tmp_path],
+            timeout=30,
+            raise_on_failure=False,
+        )
+    finally:
+        if os.path.isfile(tmp_path):
+            os.unlink(tmp_path)
+    if retcode != 0:
+        result["error"] = f"Failed to create trigger: {stderr}"
         return result
 
     deadline = t0 + timeout_s
@@ -985,13 +989,9 @@ def _CleanupStep(source_names, restore_names, trigger_names, template_name, name
 
 def _GetTemplatePath():
     """Return the absolute path to the snapshot SandboxTemplate template."""
-    pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
     return os.path.join(
-        pkg_dir,
-        "data",
-        "k8s_agents",
-        "manifests",
-        "snapshot-sandbox-template.yaml.template",
+        data.ResourcePath("k8s_agents/manifests"),
+        "snapshot-sandbox-template.yaml.j2",
     )
 
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py
index 1c00deca54..e696b089db 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py
@@ -17,7 +17,7 @@
                 --gke_warmpool_ready_threshold_s=300 \
                 --gke_warmpool_poll_interval_s=2.0 \
                 --gke_warmpool_drain_timeout_s=300 \
-                --gke_namespace=agentic \
+                --k8s_namespace=agentic \
                 --gke_machine_type=c4-standard-8
 
 Samples emitted (per run):
@@ -55,7 +55,6 @@
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
 )
-from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils
 
 FLAGS = flags.FLAGS
 
@@ -113,11 +112,6 @@
 # ---------------------------------------------------------------------------
 
 
-def Provision(benchmark_spec):
-    """Provision GKE cluster and all dependencies."""
-    gke_provision_utils.Provision()
-
-
 def GetConfig(user_config):
     """Load and return benchmark config.
 
@@ -129,7 +123,7 @@ def GetConfig(user_config):
 def Prepare(benchmark_spec):
     """Deploy workloads onto the cluster."""
     logging.info("=== Prepare: deploying workloads ===")
-    deploy_utils.DeployWorkloads()
+    deploy_utils.DeployWorkloads(benchmark_spec)
     utils.EnsurePortForward()
     logging.info("Prepare complete.")
 
@@ -140,7 +134,9 @@ def Run(benchmark_spec):
     Returns:
       List of sample.Sample objects.
     """
-    ns = FLAGS.gke_namespace
+    utils.set_benchmark_spec(benchmark_spec)
+
+    ns = FLAGS.k8s_namespace
     target = FLAGS.gke_warmpool_target_replicas
     warmpool_name = FLAGS.gke_warmpool_name
     label = FLAGS.gke_warmpool_pod_label
@@ -148,7 +144,7 @@ def Run(benchmark_spec):
     poll_interval = FLAGS.gke_warmpool_poll_interval_s
 
     # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility)
-    _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s)
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s))
     time.sleep(3)
 
     logging.info("=== Run: scaling %s to %d replicas ===", warmpool_name, target)
@@ -157,7 +153,7 @@ def Run(benchmark_spec):
 
     # 1. Measure drain time (should be near-zero since Prepare drained)
     t0 = time.time()
-    _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s)
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s))
     drain_time_s = round(time.time() - t0, 2)
 
     time.sleep(2)
@@ -185,8 +181,8 @@ def Run(benchmark_spec):
 
     while time.time() < deadline:
         elapsed = time.time() - t_scale
-        running = _CountPods(ns, label, "Running")
-        pending = _CountPods(ns, label, "Pending")
+        running = utils.CountPods(ns, label, "Running")
+        pending = utils.CountPods(ns, label, "Pending")
 
         if first_pod_time is None and running > 0:
             first_pod_time = elapsed
@@ -207,8 +203,8 @@ def Run(benchmark_spec):
         time.sleep(poll_interval)
 
     total_time = round(time.time() - t_scale, 2)
-    final_running = _CountPods(ns, label, "Running")
-    final_pending = _CountPods(ns, label, "Pending")
+    final_running = utils.CountPods(ns, label, "Running")
+    final_pending = utils.CountPods(ns, label, "Pending")
     rate = round(final_running / total_time, 2) if total_time > 0 else 0
 
     logging.info(
@@ -312,79 +308,21 @@ def Run(benchmark_spec):
 
 def Cleanup(benchmark_spec):
     """Drain warm pool back to 0 after measurement."""
-    ns = FLAGS.gke_namespace
+    ns = FLAGS.k8s_namespace
     warmpool_name = FLAGS.gke_warmpool_name
     label = FLAGS.gke_warmpool_pod_label
 
     logging.info("Cleanup: draining warm pool to 0.")
-    _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s)
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s))
     utils.StopPortForward()
     logging.info("Cleanup complete.")
 
 
-def Teardown(benchmark_spec):
-    """Teardown GKE cluster and all dependencies."""
-    gke_provision_utils.Teardown()
-
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
 
-def _CountPods(namespace, label, phase=None):
-    """Count pods matching label (and optionally phase)."""
-    cmd = ["get", "pods", "-n", namespace, "-l", label, "-o", "name"]
-    if phase:
-        cmd += [f"--field-selector=status.phase={phase}"]
-    stdout, _, rc = utils.RunKubectl(cmd, raise_on_failure=False)
-    if rc != 0 or not stdout:
-        return 0
-    return len(stdout.strip().splitlines())
-
-
-def _DrainPool(namespace, warmpool_name, label, timeout_s):
-    """Scale pool to 0 and wait for all pods to terminate."""
-    patch_json = json.dumps({"spec": {"replicas": 0}})
-    utils.RunKubectl(
-        [
-            "patch",
-            "sandboxwarmpool",
-            warmpool_name,
-            "-n",
-            namespace,
-            "--type=merge",
-            f"-p={patch_json}",
-        ],
-        raise_on_failure=False,
-    )
-
-    # Delete any lingering SandboxClaims
-    utils.RunKubectl(
-        [
-            "delete",
-            "sandboxclaims",
-            "--all",
-            "-n",
-            namespace,
-            "--ignore-not-found=true",
-        ],
-        timeout=60,
-        raise_on_failure=False,
-    )
-
-    t0 = time.time()
-    while time.time() - t0 < timeout_s:
-        remaining = _CountPods(namespace, label)
-        if remaining == 0:
-            elapsed = time.time() - t0
-            logging.info("Pool drained in %.1fs", elapsed)
-            return
-        time.sleep(2)
-
-    logging.warning("Drain timed out after %.0fs", timeout_s)
-
-
 def _ScrapeLifecycle(namespace, label, scale_start_epoch):
     """Scrape pod metadata to compute time-to-created/scheduled/running.
 
diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py
index 244ba5d774..eeabaae0b3 100644
--- a/perfkitbenchmarker/providers/gcp/flags.py
+++ b/perfkitbenchmarker/providers/gcp/flags.py
@@ -581,12 +581,6 @@
     ' beyond the default node pool (e.g. kubernetes_node_scale with 5k nodes).',
 )
 
-GKE_USE_BETA = flags.DEFINE_boolean(
-    'gke_use_beta',
-    False,
-    'Use gcloud beta for cluster creation (required for preview features '
-    'like pod snapshots).',
-)
 
 GKE_ADDITIONAL_FLAGS = flags.DEFINE_list(
     'gke_additional_flags',
diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
index 3c24ad941c..c4012faf1a 100644
--- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
+++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
@@ -417,9 +417,7 @@ def _Create(self):
     if self.enable_aam:
       cmd.args.append('--auto-monitoring-scope=ALL')
 
-    # --- PKB Extension: beta gcloud and additional cluster create flags ---
-    if gcp_flags.GKE_USE_BETA.value:
-      cmd.use_beta_gcloud = True
+    # --- PKB Extension: additional cluster create flags ---
     for additional_flag in gcp_flags.GKE_ADDITIONAL_FLAGS.value:
       cmd.args.append(additional_flag)
 
diff --git a/requirements.txt b/requirements.txt
index 1313c628f5..755f82737c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,4 +33,3 @@ setuptools>=40.3.0,<81
 six>=1.13.0
 timeout-decorator
 scipy
-matplotlib
diff --git a/snapshot-sandbox-template.yaml.j2 b/snapshot-sandbox-template.yaml.j2
new file mode 100644
index 0000000000..4e25cb5833
--- /dev/null
+++ b/snapshot-sandbox-template.yaml.j2
@@ -0,0 +1,46 @@
+---
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: {{ template_name }}
+  namespace: {{ ns }}
+spec:
+  podTemplate:
+    metadata:
+      labels:
+        app: snapshot-benchmark-workload
+    spec:
+      serviceAccountName: {{ ksa_name }}
+      runtimeClassName: gvisor
+      containers:
+      - name: preloader
+        image: python:3.11-slim
+        command: ["python3", "-c"]
+        args:
+          - |
+            import time, os
+            preload_mb = int(os.environ.get("PRELOAD_MB", "10"))
+            print(f"Preloading {preload_mb} MB of memory...", flush=True)
+            _ballast = bytearray(preload_mb * 1024 * 1024)
+            print(f"Preload complete. Starting counter.", flush=True)
+            i = 0
+            while True:
+                print(f"Count: {i}", flush=True)
+                i += 1
+                time.sleep(1)
+        env:
+          - name: PRELOAD_MB
+            value: "{{ preload_mb }}"
+        resources:
+          requests:
+            cpu: "250m"
+            memory: "{{ memory_mi }}Mi"
+            ephemeral-storage: "512Mi"
+      nodeSelector:
+        pkb_nodepool: sandbox
+      tolerations:
+        - key: "sandbox.gke.io/runtime"
+          operator: "Equal"
+          value: "gvisor"
+          effect: "NoSchedule"
+      restartPolicy: "OnFailure"

From 8fa0c68d3ea867cc6a0ae9dac46dd62909f109c7 Mon Sep 17 00:00:00 2001
From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com>
Date: Mon, 29 Jun 2026 12:31:43 -0400
Subject: [PATCH 3/5] renames

---
 .../config/agentic_benchmark_config.yaml      | 16 ++--
 .../kubernetes/agentic/gke_deploy_utils.py    | 14 +++-
 .../agentic/gke_image_build_utils.py          | 20 +++++
 ...chmark_utils.py => k8s_benchmark_utils.py} |  6 +-
 ...k.py => k8s_chromium_density_benchmark.py} | 40 +++++-----
 ...benchmark.py => k8s_deletion_benchmark.py} | 48 ++++++------
 ..._benchmark.py => k8s_payload_benchmark.py} | 32 ++++----
 ...ark.py => k8s_python_density_benchmark.py} | 38 ++++-----
 ..._qps_benchmark.py => k8s_qps_benchmark.py} | 62 +++++++--------
 ...benchmark.py => k8s_snapshot_benchmark.py} | 77 +++++++++++--------
 ...benchmark.py => k8s_warmpool_benchmark.py} | 50 ++++++------
 snapshot-sandbox-template.yaml.j2             | 46 -----------
 12 files changed, 222 insertions(+), 227 deletions(-)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_benchmark_utils.py => k8s_benchmark_utils.py} (99%)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_chromium_density_benchmark.py => k8s_chromium_density_benchmark.py} (89%)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_deletion_benchmark.py => k8s_deletion_benchmark.py} (92%)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_payload_benchmark.py => k8s_payload_benchmark.py} (95%)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_python_density_benchmark.py => k8s_python_density_benchmark.py} (91%)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_qps_benchmark.py => k8s_qps_benchmark.py} (94%)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_snapshot_benchmark.py => k8s_snapshot_benchmark.py} (94%)
 rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_warmpool_benchmark.py => k8s_warmpool_benchmark.py} (91%)
 delete mode 100644 snapshot-sandbox-template.yaml.j2

diff --git a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
index 95077b469c..0098eff013 100644
--- a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
+++ b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
@@ -13,7 +13,7 @@
 #   --temp_dir=<path>
 #
 # Benchmark-specific sweep parameters (vary per run):
-#   --gke_python_density_concurrent_sandbox_count=N
+#   --k8s_python_density_concurrent_sandbox_count=N
 #   --gke_snapshot_preload_mb=N
 #   etc.
 
@@ -21,7 +21,7 @@
 # Shared cluster configuration (identical across all benchmarks)
 # ===========================================================================
 
-gke_python_density:
+k8s_python_density:
   flags:
     # --- Cluster creation flags ---
     gke_additional_flags:
@@ -66,7 +66,7 @@ gke_python_density:
           type: gvisor
 
 
-gke_chromium_density:
+k8s_chromium_density:
   flags:
     gke_additional_flags:
       - "--enable-pod-snapshots"
@@ -109,7 +109,7 @@ gke_chromium_density:
           type: gvisor
 
 
-gke_payload:
+k8s_payload:
   flags:
     gke_additional_flags:
       - "--enable-pod-snapshots"
@@ -152,7 +152,7 @@ gke_payload:
           type: gvisor
 
 
-gke_qps:
+k8s_qps:
   flags:
     gke_additional_flags:
       - "--enable-pod-snapshots"
@@ -195,7 +195,7 @@ gke_qps:
           type: gvisor
 
 
-gke_snapshot:
+k8s_snapshot:
   flags:
     gke_additional_flags:
       - "--enable-pod-snapshots"
@@ -238,7 +238,7 @@ gke_snapshot:
           type: gvisor
 
 
-gke_warmpool:
+k8s_warmpool:
   flags:
     gke_additional_flags:
       - "--enable-pod-snapshots"
@@ -281,7 +281,7 @@ gke_warmpool:
           type: gvisor
 
 
-gke_deletion:
+k8s_deletion:
   flags:
     gke_additional_flags:
       - "--enable-pod-snapshots"
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
index 9ff1684951..297b06758f 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
@@ -124,6 +124,14 @@ def _RenderAndApply(template_name, **kwargs):
     return retcode == 0
 
 
+flags.DEFINE_bool(
+    "skip_deploy_snapshots",
+    False,
+    "Skip deployment of Pod Snapshot infrastructure. "
+    "Set to True on non-GKE clusters where pod snapshots are not supported.",
+)
+
+
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
@@ -230,6 +238,10 @@ def DeploySnapshots():
       4. Bind IAM roles
       5. Deploy PodSnapshotStorageConfig + PodSnapshotPolicy
     """
+    if FLAGS.skip_deploy_snapshots:
+        logging.info("Skipping snapshot infrastructure (--skip_deploy_snapshots=True).")
+        return
+
     ns = FLAGS.k8s_namespace
     project = getattr(FLAGS, 'project', '') or ''
     zone = getattr(FLAGS, 'zone', '') or ''
@@ -241,7 +253,7 @@ def DeploySnapshots():
 
     bucket_name = "agent-sandbox-snapshots-{}".format(project)
     snapshot_folder = "benchmark-snapshots"
-    ksa_name = FLAGS.gke_snapshot_ksa_name
+    ksa_name = FLAGS.k8s_snapshot_ksa_name
 
     logging.info("=== DeploySnapshots: bucket=%s ===", bucket_name)
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
index 13340184bc..a339af8022 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
@@ -30,6 +30,14 @@
 # Architecture detection
 # ---------------------------------------------------------------------------
 
+flags.DEFINE_string(
+    "target_arch",
+    "",
+    "Target CPU architecture for container images (amd64 or arm64). "
+    "If set, skips gcloud machine-type detection. "
+    "Use this for non-GCP environments or when gcloud is unavailable.",
+)
+
 _ARCH_MAP = {
     "X86_64": "amd64",
     "ARM64": "arm64",
@@ -44,6 +52,18 @@ def _DetectArchitecture(machine_type, zone, project):
 
     Falls back to amd64 if gcloud fails.
     """
+    # Quick exit if user provided arch explicitly
+    if FLAGS.target_arch:
+        arch = FLAGS.target_arch.lower()
+        if arch in ("amd64", "arm64"):
+            logging.info("Using user-provided target_arch: %s", arch)
+            return arch
+        logging.warning(
+            "Invalid --target_arch='%s'. Must be amd64 or arm64. "
+            "Proceeding with gcloud detection.",
+            FLAGS.target_arch,
+        )
+
     try:
         stdout, _, retcode = vm_util.IssueCommand(
             [
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py
similarity index 99%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py
index 02d2d40a81..e23aa32a6d 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py
@@ -41,7 +41,7 @@
 )
 
 flags.DEFINE_string(
-    "gke_benchmark_note",
+    "k8s_benchmark_note",
     "",
     "Arbitrary note string attached to every sample for tagging runs.",
 )
@@ -233,8 +233,8 @@ def BuildMetadata(namespace, extra=None):
                 machine_type = getattr(cluster.vm_spec, 'machine_type', None)
     if machine_type:
         metadata["machine_type"] = machine_type
-    if FLAGS.gke_benchmark_note:
-        metadata["note"] = FLAGS.gke_benchmark_note
+    if FLAGS.k8s_benchmark_note:
+        metadata["note"] = FLAGS.k8s_benchmark_note
     if extra:
         metadata.update(extra)
     return metadata
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
similarity index 89%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
index 24d55350b5..346f59a8b0 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
@@ -11,9 +11,9 @@
 
 Usage:
   python pkb.py --benchmarks=gke_chromium_density \\
-                --gke_chromium_density_concurrent_sessions=4 \\
-                --gke_chromium_density_task_count=10 \\
-                --gke_chromium_density_warmup_tasks=5 \\
+                --k8s_chromium_density_concurrent_sessions=4 \\
+                --k8s_chromium_density_task_count=10 \\
+                --k8s_chromium_density_warmup_tasks=5 \\
                 --k8s_namespace=agentic \\
                 --k8s_agent_api_url=http://localhost:8080
 
@@ -43,7 +43,7 @@
 from absl import flags
 from perfkitbenchmarker import configs
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
-    gke_benchmark_utils as utils,
+    k8s_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
@@ -51,9 +51,9 @@
 
 FLAGS = flags.FLAGS
 
-BENCHMARK_NAME = "gke_chromium_density"
+BENCHMARK_NAME = "k8s_chromium_density"
 BENCHMARK_CONFIG = """
-gke_chromium_density:
+k8s_chromium_density:
   description: >
     Atomic single-point Chromium browser sandbox density measurement on a
     pre-provisioned GKE cluster with gVisor isolation.
@@ -67,37 +67,37 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_integer(
-    "gke_chromium_density_concurrent_sessions",
+    "k8s_chromium_density_concurrent_sessions",
     1,
     "Number of concurrent Chromium browser sessions to run.",
 )
 
 flags.DEFINE_integer(
-    "gke_chromium_density_task_count",
+    "k8s_chromium_density_task_count",
     10,
     "Number of browser task iterations per Chromium session.",
 )
 
 flags.DEFINE_integer(
-    "gke_chromium_density_warmup_tasks",
+    "k8s_chromium_density_warmup_tasks",
     5,
     "Number of warmup iterations per session (excluded from stats).",
 )
 
 flags.DEFINE_bool(
-    "gke_chromium_density_patch_warmpool",
+    "k8s_chromium_density_patch_warmpool",
     True,
     "Patch SandboxWarmPool replicas to match density before measurement.",
 )
 
 flags.DEFINE_integer(
-    "gke_chromium_density_exec_timeout",
+    "k8s_chromium_density_exec_timeout",
     120,
     "Sandbox command execution timeout in seconds.",
 )
 
 flags.DEFINE_integer(
-    "gke_chromium_density_provision_timeout",
+    "k8s_chromium_density_provision_timeout",
     300,
     "Max seconds to wait for warm pool pods to reach Running.",
 )
@@ -134,7 +134,7 @@ def Run(benchmark_spec):
     utils.set_benchmark_spec(benchmark_spec)
 
     ns = FLAGS.k8s_namespace
-    density = FLAGS.gke_chromium_density_concurrent_sessions
+    density = FLAGS.k8s_chromium_density_concurrent_sessions
 
     logging.info("=== Run: chromium_density=%d ===", density)
 
@@ -142,21 +142,21 @@ def Run(benchmark_spec):
     utils.EnsurePortForward()
 
     # Patch warm pool (moved from Prepare for sweep compatibility)
-    if FLAGS.gke_chromium_density_patch_warmpool:
+    if FLAGS.k8s_chromium_density_patch_warmpool:
         utils.PatchWarmPool(
             namespace=ns,
             warmpool_name=_WARMPOOL_NAME,
             replicas=density,
             label=_WARMPOOL_LABEL,
-            wait_timeout=FLAGS.gke_chromium_density_provision_timeout,
+            wait_timeout=FLAGS.k8s_chromium_density_provision_timeout,
         )
 
     # POST to agent API
     payload = {
-        "task_count": FLAGS.gke_chromium_density_task_count,
-        "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks,
+        "task_count": FLAGS.k8s_chromium_density_task_count,
+        "warmup_tasks": FLAGS.k8s_chromium_density_warmup_tasks,
         "concurrent_sessions": density,
-        "sandbox_exec_timeout_s": FLAGS.gke_chromium_density_exec_timeout,
+        "sandbox_exec_timeout_s": FLAGS.k8s_chromium_density_exec_timeout,
     }
 
     t0 = time.time()
@@ -179,8 +179,8 @@ def Run(benchmark_spec):
         "density": density,
         "successful_sessions": successful,
         "failed_sessions": failed,
-        "task_count": FLAGS.gke_chromium_density_task_count,
-        "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks,
+        "task_count": FLAGS.k8s_chromium_density_task_count,
+        "warmup_tasks": FLAGS.k8s_chromium_density_warmup_tasks,
         "wall_time_s": round(wall_time, 2),
     }
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
similarity index 92%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
index 92b360919d..ddeae29f9d 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
@@ -12,12 +12,12 @@
 
 Usage:
   python pkb.py --benchmarks=gke_deletion \\
-                --gke_deletion_batch_size=100 \\
-                --gke_deletion_warmpool_name=python-sandbox-warmpool \\
-                --gke_deletion_pod_label=sandbox=python-sandbox-example \\
-                --gke_deletion_poll_interval_s=1.0 \\
-                --gke_deletion_provision_timeout_s=120.0 \\
-                --gke_deletion_drain_timeout_s=300.0 \\
+                --k8s_deletion_batch_size=100 \\
+                --k8s_deletion_warmpool_name=python-sandbox-warmpool \\
+                --k8s_deletion_pod_label=sandbox=python-sandbox-example \\
+                --k8s_deletion_poll_interval_s=1.0 \\
+                --k8s_deletion_provision_timeout_s=120.0 \\
+                --k8s_deletion_drain_timeout_s=300.0 \\
                 --k8s_namespace=agentic \\
                 --gke_machine_type=c4-standard-8
 
@@ -43,7 +43,7 @@
 from absl import flags
 from perfkitbenchmarker import configs
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
-    gke_benchmark_utils as utils,
+    k8s_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
@@ -51,9 +51,9 @@
 
 FLAGS = flags.FLAGS
 
-BENCHMARK_NAME = "gke_deletion"
+BENCHMARK_NAME = "k8s_deletion"
 BENCHMARK_CONFIG = """
-gke_deletion:
+k8s_deletion:
   description: >
     Atomic single-point bulk deletion and IP reclamation measurement on a
     pre-provisioned GKE cluster with gVisor isolation.
@@ -64,37 +64,37 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_integer(
-    "gke_deletion_batch_size",
+    "k8s_deletion_batch_size",
     100,
     "Number of sandbox pods to provision then bulk-delete.",
 )
 
 flags.DEFINE_string(
-    "gke_deletion_warmpool_name",
+    "k8s_deletion_warmpool_name",
     "python-sandbox-warmpool",
     "SandboxWarmPool resource name.",
 )
 
 flags.DEFINE_string(
-    "gke_deletion_pod_label",
+    "k8s_deletion_pod_label",
     "sandbox=python-sandbox-example",
     "Label selector for warm pool pods.",
 )
 
 flags.DEFINE_float(
-    "gke_deletion_poll_interval_s",
+    "k8s_deletion_poll_interval_s",
     1.0,
     "Seconds between kubectl polls during deletion.",
 )
 
 flags.DEFINE_float(
-    "gke_deletion_provision_timeout_s",
+    "k8s_deletion_provision_timeout_s",
     120.0,
     "Max seconds to wait for pods to reach Running before deletion.",
 )
 
 flags.DEFINE_float(
-    "gke_deletion_drain_timeout_s",
+    "k8s_deletion_drain_timeout_s",
     300.0,
     "Max seconds to wait for all pods to terminate after scale-to-0.",
 )
@@ -130,12 +130,12 @@ def Run(benchmark_spec):
     utils.set_benchmark_spec(benchmark_spec)
 
     ns = FLAGS.k8s_namespace
-    batch_size = FLAGS.gke_deletion_batch_size
-    warmpool_name = FLAGS.gke_deletion_warmpool_name
-    label = FLAGS.gke_deletion_pod_label
-    poll_interval = FLAGS.gke_deletion_poll_interval_s
-    provision_timeout = FLAGS.gke_deletion_provision_timeout_s
-    drain_timeout = FLAGS.gke_deletion_drain_timeout_s
+    batch_size = FLAGS.k8s_deletion_batch_size
+    warmpool_name = FLAGS.k8s_deletion_warmpool_name
+    label = FLAGS.k8s_deletion_pod_label
+    poll_interval = FLAGS.k8s_deletion_poll_interval_s
+    provision_timeout = FLAGS.k8s_deletion_provision_timeout_s
+    drain_timeout = FLAGS.k8s_deletion_drain_timeout_s
 
     logging.info("=== Run: batch_size=%d ===", batch_size)
 
@@ -392,11 +392,11 @@ def Run(benchmark_spec):
 def Cleanup(benchmark_spec):
     """Best-effort drain of warm pool after measurement."""
     ns = FLAGS.k8s_namespace
-    warmpool_name = FLAGS.gke_deletion_warmpool_name
-    label = FLAGS.gke_deletion_pod_label
+    warmpool_name = FLAGS.k8s_deletion_warmpool_name
+    label = FLAGS.k8s_deletion_pod_label
 
     logging.info("Cleanup: draining warm pool to 0.")
-    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_deletion_drain_timeout_s))
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_deletion_drain_timeout_s))
     utils.StopPortForward()
     logging.info("Cleanup complete.")
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
similarity index 95%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
index 7d95d4bc82..9f31aee342 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
@@ -11,9 +11,9 @@
 
 Usage:
   python pkb.py --benchmarks=gke_payload \
-                --gke_payload_size_mb=50 \
-                --gke_payload_iterations=20 \
-                --gke_payload_concurrent_sessions=5 \
+                --k8s_payload_size_mb=50 \
+                --k8s_payload_iterations=20 \
+                --k8s_payload_concurrent_sessions=5 \
                 --k8s_namespace=agentic \
                 --k8s_agent_api_url=http://localhost:8080
 
@@ -66,7 +66,7 @@
 from absl import flags
 from perfkitbenchmarker import configs
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
-    gke_benchmark_utils as utils,
+    k8s_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
@@ -74,9 +74,9 @@
 
 FLAGS = flags.FLAGS
 
-BENCHMARK_NAME = "gke_payload"
+BENCHMARK_NAME = "k8s_payload"
 BENCHMARK_CONFIG = """
-gke_payload:
+k8s_payload:
   description: >
     Atomic single-point payload transfer saturation measurement on a
     pre-provisioned GKE cluster with gVisor isolation.
@@ -90,31 +90,31 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_float(
-    "gke_payload_size_mb",
+    "k8s_payload_size_mb",
     1.0,
     "Payload size in megabytes to transfer from the sandbox.",
 )
 
 flags.DEFINE_integer(
-    "gke_payload_iterations",
+    "k8s_payload_iterations",
     20,
     "Number of transfer iterations per sandbox session.",
 )
 
 flags.DEFINE_integer(
-    "gke_payload_concurrent_sessions",
+    "k8s_payload_concurrent_sessions",
     5,
     "Number of parallel sandbox sessions.",
 )
 
 flags.DEFINE_integer(
-    "gke_payload_exec_timeout",
+    "k8s_payload_exec_timeout",
     300,
     "Sandbox command execution timeout in seconds.",
 )
 
 flags.DEFINE_bool(
-    "gke_payload_patch_warmpool",
+    "k8s_payload_patch_warmpool",
     True,
     "Patch SandboxWarmPool replicas to match concurrent_sessions before measurement.",
 )
@@ -151,9 +151,9 @@ def Run(benchmark_spec):
     utils.set_benchmark_spec(benchmark_spec)
 
     ns = FLAGS.k8s_namespace
-    payload_size_mb = FLAGS.gke_payload_size_mb
-    iterations = FLAGS.gke_payload_iterations
-    concurrent = FLAGS.gke_payload_concurrent_sessions
+    payload_size_mb = FLAGS.k8s_payload_size_mb
+    iterations = FLAGS.k8s_payload_iterations
+    concurrent = FLAGS.k8s_payload_concurrent_sessions
 
     logging.info(
         "=== Run: payload_size_mb=%s, iterations=%d, concurrent=%d ===",
@@ -166,7 +166,7 @@ def Run(benchmark_spec):
     utils.EnsurePortForward()
 
     # Patch warm pool (moved from Prepare for sweep compatibility)
-    if FLAGS.gke_payload_patch_warmpool:
+    if FLAGS.k8s_payload_patch_warmpool:
         utils.PatchWarmPool(
             namespace=ns,
             warmpool_name=_WARMPOOL_NAME,
@@ -179,7 +179,7 @@ def Run(benchmark_spec):
         "payload_size_mb": payload_size_mb,
         "payload_iterations": iterations,
         "concurrent_sessions": concurrent,
-        "sandbox_exec_timeout_s": FLAGS.gke_payload_exec_timeout,
+        "sandbox_exec_timeout_s": FLAGS.k8s_payload_exec_timeout,
     }
 
     t0 = time.time()
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
similarity index 91%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
index e323be4d31..207fd40a20 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
@@ -20,9 +20,9 @@
 
 Usage:
   python pkb.py --benchmarks=gke_python_density \\
-                --gke_python_density_concurrent_sandbox_count=16 \\
-                --gke_python_density_sample_count=20 \\
-                --gke_python_density_sample_warmup=0 \\
+                --k8s_python_density_concurrent_sandbox_count=16 \\
+                --k8s_python_density_sample_count=20 \\
+                --k8s_python_density_sample_warmup=0 \\
                 --k8s_namespace=agentic \\
                 --k8s_agent_api_url=http://localhost:8080
 
@@ -55,7 +55,7 @@
 from absl import flags
 from perfkitbenchmarker import configs
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
-    gke_benchmark_utils as utils,
+    k8s_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
@@ -63,9 +63,9 @@
 
 FLAGS = flags.FLAGS
 
-BENCHMARK_NAME = "gke_python_density"
+BENCHMARK_NAME = "k8s_python_density"
 BENCHMARK_CONFIG = """
-gke_python_density:
+k8s_python_density:
   description: >
     Atomic single-point Python sandbox density measurement on a
     pre-provisioned GKE cluster with gVisor isolation.
@@ -79,19 +79,19 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_integer(
-    "gke_python_density_concurrent_sandbox_count",
+    "k8s_python_density_concurrent_sandbox_count",
     1,
     "Number of concurrent sandbox sessions to run.",
 )
 
 flags.DEFINE_integer(
-    "gke_python_density_sample_count",
+    "k8s_python_density_sample_count",
     20,
     "Number of sample iterations per sandbox session.",
 )
 
 flags.DEFINE_integer(
-    "gke_python_density_sample_warmup",
+    "k8s_python_density_sample_warmup",
     0,
     "Number of warmup iterations per session (excluded from stats). "
     "Warmup iterations execute the same benchmark tasks as measured "
@@ -101,13 +101,13 @@
 )
 
 flags.DEFINE_bool(
-    "gke_python_density_patch_warmpool",
+    "k8s_python_density_patch_warmpool",
     True,
     "Patch SandboxWarmPool replicas to match density before measurement.",
 )
 
 flags.DEFINE_integer(
-    "gke_python_density_exec_timeout",
+    "k8s_python_density_exec_timeout",
     600,
     "Timeout in seconds for the API call.",
 )
@@ -144,7 +144,7 @@ def Run(benchmark_spec):
     utils.set_benchmark_spec(benchmark_spec)
 
     ns = FLAGS.k8s_namespace
-    density = FLAGS.gke_python_density_concurrent_sandbox_count
+    density = FLAGS.k8s_python_density_concurrent_sandbox_count
 
     logging.info("=== Run: density=%d ===", density)
 
@@ -152,7 +152,7 @@ def Run(benchmark_spec):
     utils.EnsurePortForward()
 
     # Patch warm pool to match density (moved from Prepare for sweep compatibility)
-    if FLAGS.gke_python_density_patch_warmpool:
+    if FLAGS.k8s_python_density_patch_warmpool:
         utils.PatchWarmPool(
             namespace=ns,
             warmpool_name=_WARMPOOL_NAME,
@@ -162,10 +162,10 @@ def Run(benchmark_spec):
 
     # POST to agent API
     payload = {
-        "sample_count": FLAGS.gke_python_density_sample_count,
-        "sample_warmup": FLAGS.gke_python_density_sample_warmup,
+        "sample_count": FLAGS.k8s_python_density_sample_count,
+        "sample_warmup": FLAGS.k8s_python_density_sample_warmup,
         "concurrent_sessions": density,
-        "sandbox_exec_timeout_s": FLAGS.gke_python_density_exec_timeout,
+        "sandbox_exec_timeout_s": FLAGS.k8s_python_density_exec_timeout,
     }
 
     t0 = time.time()
@@ -188,8 +188,8 @@ def Run(benchmark_spec):
         "density": density,
         "successful_sessions": successful,
         "failed_sessions": failed,
-        "sample_count": FLAGS.gke_python_density_sample_count,
-        "sample_warmup": FLAGS.gke_python_density_sample_warmup,
+        "sample_count": FLAGS.k8s_python_density_sample_count,
+        "sample_warmup": FLAGS.k8s_python_density_sample_warmup,
         "wall_time_s": round(wall_time, 2),
     }
 
@@ -334,7 +334,7 @@ def Cleanup(benchmark_spec):
     ns = FLAGS.k8s_namespace
     logging.info("Cleanup: draining warm pool.")
 
-    if FLAGS.gke_python_density_patch_warmpool:
+    if FLAGS.k8s_python_density_patch_warmpool:
         utils.DrainWarmPool(
             namespace=ns,
             warmpool_name=_WARMPOOL_NAME,
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
similarity index 94%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
index 2146489752..4528082ad5 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
@@ -15,20 +15,20 @@
 Usage:
   # Agent mode
   python pkb.py --benchmarks=gke_qps \\
-                --gke_qps_target_qps=5.0 \\
-                --gke_qps_pool_size=70 \\
-                --gke_qps_step_duration_s=30.0 \\
-                --gke_qps_mode=agent \\
+                --k8s_qps_target_qps=5.0 \\
+                --k8s_qps_pool_size=70 \\
+                --k8s_qps_step_duration_s=30.0 \\
+                --k8s_qps_mode=agent \\
                 --k8s_namespace=agentic \\
                 --k8s_agent_api_url=http://localhost:8080
 
   # Raw claim mode
   python pkb.py --benchmarks=gke_qps \\
-                --gke_qps_target_qps=5.0 \\
-                --gke_qps_pool_size=70 \\
-                --gke_qps_step_duration_s=30.0 \\
-                --gke_qps_mode=raw_claim \\
-                --gke_qps_claim_timeout_s=60.0 \\
+                --k8s_qps_target_qps=5.0 \\
+                --k8s_qps_pool_size=70 \\
+                --k8s_qps_step_duration_s=30.0 \\
+                --k8s_qps_mode=raw_claim \\
+                --k8s_qps_claim_timeout_s=60.0 \\
                 --k8s_namespace=agentic
 
 Samples emitted (per run):
@@ -62,7 +62,7 @@
 from perfkitbenchmarker import data
 from perfkitbenchmarker.resources.container_service import kubectl
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
-    gke_benchmark_utils as utils,
+    k8s_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
@@ -70,9 +70,9 @@
 
 FLAGS = flags.FLAGS
 
-BENCHMARK_NAME = "gke_qps"
+BENCHMARK_NAME = "k8s_qps"
 BENCHMARK_CONFIG = """
-gke_qps:
+k8s_qps:
   description: >
     Atomic single-point QPS saturation measurement on a
     pre-provisioned GKE cluster with gVisor isolation.
@@ -88,44 +88,44 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_float(
-    "gke_qps_target_qps",
+    "k8s_qps_target_qps",
     5.0,
     "Target requests per second (sandbox claims per second).",
 )
 
 flags.DEFINE_integer(
-    "gke_qps_pool_size",
+    "k8s_qps_pool_size",
     70,
     "Warm pool size maintained during the measurement.",
 )
 
 flags.DEFINE_float(
-    "gke_qps_step_duration_s",
+    "k8s_qps_step_duration_s",
     30.0,
     "Duration of the QPS burst in seconds.",
 )
 
 flags.DEFINE_integer(
-    "gke_qps_sandbox_exec_timeout_s",
+    "k8s_qps_sandbox_exec_timeout_s",
     30,
     "Sandbox command execution timeout in seconds.",
 )
 
 flags.DEFINE_float(
-    "gke_qps_provision_timeout_s",
+    "k8s_qps_provision_timeout_s",
     180.0,
     "Max seconds to wait for pool pods to reach Running.",
 )
 
 flags.DEFINE_string(
-    "gke_qps_mode",
+    "k8s_qps_mode",
     "agent",
     "Operating mode: 'agent' (POST to orchestrator API) or "
     "'raw_claim' (create SandboxClaims directly via kubectl).",
 )
 
 flags.DEFINE_float(
-    "gke_qps_claim_timeout_s",
+    "k8s_qps_claim_timeout_s",
     60.0,
     "Max seconds to wait for a raw claim to bind " "(only used with mode=raw_claim).",
 )
@@ -149,7 +149,7 @@ def Prepare(benchmark_spec):
     logging.info("=== Prepare: deploying workloads ===")
     deploy_utils.DeployWorkloads(benchmark_spec)
 
-    mode = FLAGS.gke_qps_mode
+    mode = FLAGS.k8s_qps_mode
     if mode == "agent":
         utils.CheckAgentHealthz(required=False)
     utils.EnsurePortForward()
@@ -165,7 +165,7 @@ def Run(benchmark_spec):
     utils.set_benchmark_spec(benchmark_spec)
 
     ns = FLAGS.k8s_namespace
-    pool_size = FLAGS.gke_qps_pool_size
+    pool_size = FLAGS.k8s_qps_pool_size
 
     # Scale warm pool (moved from Prepare for sweep compatibility)
     utils.PatchWarmPool(
@@ -173,10 +173,10 @@ def Run(benchmark_spec):
         warmpool_name=_WARMPOOL_NAME,
         replicas=pool_size,
         label=_WARMPOOL_LABEL,
-        wait_timeout=int(FLAGS.gke_qps_provision_timeout_s),
+        wait_timeout=int(FLAGS.k8s_qps_provision_timeout_s),
     )
 
-    mode = FLAGS.gke_qps_mode
+    mode = FLAGS.k8s_qps_mode
 
     if mode == "raw_claim":
         return _RunRawClaim(benchmark_spec)
@@ -211,9 +211,9 @@ def Cleanup(benchmark_spec):
 def _RunAgent(benchmark_spec):
     """Fire QPS burst via the orchestrator API."""
     ns = FLAGS.k8s_namespace
-    target_qps = FLAGS.gke_qps_target_qps
-    pool_size = FLAGS.gke_qps_pool_size
-    step_duration = FLAGS.gke_qps_step_duration_s
+    target_qps = FLAGS.k8s_qps_target_qps
+    pool_size = FLAGS.k8s_qps_pool_size
+    step_duration = FLAGS.k8s_qps_step_duration_s
 
     logging.info(
         "=== Run (agent): target_qps=%s, pool_size=%d, duration=%ss ===",
@@ -232,7 +232,7 @@ def _RunAgent(benchmark_spec):
     payload = {
         "target_qps": target_qps,
         "duration_s": step_duration,
-        "sandbox_exec_timeout_s": FLAGS.gke_qps_sandbox_exec_timeout_s,
+        "sandbox_exec_timeout_s": FLAGS.k8s_qps_sandbox_exec_timeout_s,
     }
 
     t0 = time.time()
@@ -378,10 +378,10 @@ def _RunAgent(benchmark_spec):
 def _RunRawClaim(benchmark_spec):
     """Fire SandboxClaims directly at target_qps (no agent)."""
     ns = FLAGS.k8s_namespace
-    target_qps = FLAGS.gke_qps_target_qps
-    pool_size = FLAGS.gke_qps_pool_size
-    step_duration = FLAGS.gke_qps_step_duration_s
-    claim_timeout = FLAGS.gke_qps_claim_timeout_s
+    target_qps = FLAGS.k8s_qps_target_qps
+    pool_size = FLAGS.k8s_qps_pool_size
+    step_duration = FLAGS.k8s_qps_step_duration_s
+    claim_timeout = FLAGS.k8s_qps_claim_timeout_s
 
     logging.info(
         "=== Run (raw_claim): target_qps=%s, pool_size=%d, duration=%ss ===",
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
similarity index 94%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
index 44d21fcc84..cb49011b08 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
@@ -11,24 +11,24 @@
 
 Usage:
   python pkb.py --benchmarks=gke_snapshot \\
-                --gke_snapshot_preload_mb=50 \\
-                --gke_snapshot_burst_size=3 \\
+                --k8s_snapshot_preload_mb=50 \\
+                --k8s_snapshot_burst_size=3 \\
                 --k8s_namespace=agentic \\
-                --gke_snapshot_skip_snapshot=false
+                --k8s_snapshot_skip_snapshot=false
 
 Samples emitted (per run):
-  - gke_snapshot_snapshot_p50        (seconds)
-  - gke_snapshot_snapshot_p95        (seconds)
-  - gke_snapshot_snapshot_max        (seconds)
-  - gke_snapshot_restore_p50         (seconds)
-  - gke_snapshot_restore_p95         (seconds)
-  - gke_snapshot_restore_max         (seconds)
-  - gke_snapshot_ttfe_p50            (seconds)
-  - gke_snapshot_ttfe_p95            (seconds)
-  - gke_snapshot_ttfe_max            (seconds)
-  - gke_snapshot_startup_time        (seconds)
-  - gke_snapshot_restore_correct_count (count)
-  - gke_snapshot_wall_time           (seconds)
+  - k8s_snapshot_snapshot_p50        (seconds)
+  - k8s_snapshot_snapshot_p95        (seconds)
+  - k8s_snapshot_snapshot_max        (seconds)
+  - k8s_snapshot_restore_p50         (seconds)
+  - k8s_snapshot_restore_p95         (seconds)
+  - k8s_snapshot_restore_max         (seconds)
+  - k8s_snapshot_ttfe_p50            (seconds)
+  - k8s_snapshot_ttfe_p95            (seconds)
+  - k8s_snapshot_ttfe_max            (seconds)
+  - k8s_snapshot_startup_time        (seconds)
+  - k8s_snapshot_restore_correct_count (count)
+  - k8s_snapshot_wall_time           (seconds)
 """
 
 import json
@@ -46,7 +46,7 @@
 from perfkitbenchmarker.resources.container_service import kubectl
 from perfkitbenchmarker import sample
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
-    gke_benchmark_utils as utils,
+    k8s_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
@@ -54,9 +54,9 @@
 
 FLAGS = flags.FLAGS
 
-BENCHMARK_NAME = "gke_snapshot"
+BENCHMARK_NAME = "k8s_snapshot"
 BENCHMARK_CONFIG = """
-gke_snapshot:
+k8s_snapshot:
   description: >
     Atomic single-point Pod Snapshot saturation measurement on a
     pre-provisioned GKE cluster with gVisor isolation.
@@ -67,37 +67,37 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_integer(
-    "gke_snapshot_preload_mb",
+    "k8s_snapshot_preload_mb",
     10,
     "Megabytes of memory to pre-allocate in the sandbox before snapshot.",
 )
 
 flags.DEFINE_integer(
-    "gke_snapshot_burst_size",
+    "k8s_snapshot_burst_size",
     1,
     "Number of concurrent source/snapshot/restore pods per measurement.",
 )
 
 flags.DEFINE_string(
-    "gke_snapshot_ksa_name",
+    "k8s_snapshot_ksa_name",
     "pod-snapshot-sa",
     "Kubernetes service account for pod snapshots.",
 )
 
 flags.DEFINE_integer(
-    "gke_snapshot_pod_timeout",
+    "k8s_snapshot_pod_timeout",
     180,
     "Max seconds to wait for pod Running / preload.",
 )
 
 flags.DEFINE_boolean(
-    "gke_snapshot_skip_snapshot",
+    "k8s_snapshot_skip_snapshot",
     False,
     "Skip snapshot/restore phases — measure cold-start TTFE only.",
 )
 
 flags.DEFINE_string(
-    "gke_snapshot_preload_mode",
+    "k8s_snapshot_preload_mode",
     "synthetic",
     "Preload mode: 'synthetic' (os.urandom fill) or "
     "'script:<path>' to run a custom startup script.",
@@ -120,19 +120,28 @@ def GetConfig(user_config):
 def Prepare(benchmark_spec):
     """Deploy workloads, snapshot infra, and validate readiness."""
     ns = FLAGS.k8s_namespace
-    preload_mb = FLAGS.gke_snapshot_preload_mb
+    preload_mb = FLAGS.k8s_snapshot_preload_mb
 
     logging.info(
         "=== Prepare: preload_mb=%d, burst_size=%d ===",
         preload_mb,
-        FLAGS.gke_snapshot_burst_size,
+        FLAGS.k8s_snapshot_burst_size,
     )
 
     # Deploy Agent Sandbox ecosystem (idempotent)
     deploy_utils.DeployWorkloads(benchmark_spec)
 
     # Deploy Pod Snapshot infrastructure (idempotent)
-    deploy_utils.DeploySnapshots()
+        # Pod Snapshots are GKE-specific; skip on other platforms
+    cloud = getattr(
+        getattr(benchmark_spec, "container_cluster", None), "cloud", "GCP"
+    )
+    if cloud == "GCP" and not FLAGS.skip_deploy_snapshots:
+        deploy_utils.DeploySnapshots()
+    elif cloud != "GCP":
+        logging.info(
+            "Pod Snapshot infrastructure skipped (cloud=%s, GKE required).", cloud
+        )
 
     # 1. Verify PodSnapshotStorageConfig exists (cluster-scoped).
     _, _, retcode = utils.RunKubectl(
@@ -157,7 +166,7 @@ def Prepare(benchmark_spec):
         logging.warning("PodSnapshotPolicy not found in namespace %s.", ns)
 
     # 3. Verify the service account exists.
-    ksa = FLAGS.gke_snapshot_ksa_name
+    ksa = FLAGS.k8s_snapshot_ksa_name
     _, _, retcode = utils.RunKubectl(
         ["get", "serviceaccount", ksa, "-n", ns],
         timeout=30,
@@ -189,12 +198,12 @@ def Run(benchmark_spec):
     utils.set_benchmark_spec(benchmark_spec)
 
     ns = FLAGS.k8s_namespace
-    preload_mb = FLAGS.gke_snapshot_preload_mb
-    burst_size = FLAGS.gke_snapshot_burst_size
-    skip_snapshot = FLAGS.gke_snapshot_skip_snapshot
-    preload_mode = FLAGS.gke_snapshot_preload_mode
-    ksa_name = FLAGS.gke_snapshot_ksa_name
-    pod_timeout = FLAGS.gke_snapshot_pod_timeout
+    preload_mb = FLAGS.k8s_snapshot_preload_mb
+    burst_size = FLAGS.k8s_snapshot_burst_size
+    skip_snapshot = FLAGS.k8s_snapshot_skip_snapshot
+    preload_mode = FLAGS.k8s_snapshot_preload_mode
+    ksa_name = FLAGS.k8s_snapshot_ksa_name
+    pod_timeout = FLAGS.k8s_snapshot_pod_timeout
 
     logging.info(
         "=== Run: preload_mb=%d, burst_size=%d, skip_snapshot=%s ===",
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
similarity index 91%
rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py
rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
index e696b089db..62c6462351 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
@@ -11,12 +11,12 @@
 
 Usage:
   python pkb.py --benchmarks=gke_warmpool \
-                --gke_warmpool_target_replicas=100 \
-                --gke_warmpool_name=python-sandbox-warmpool \
-                --gke_warmpool_pod_label=sandbox=python-sandbox-example \
-                --gke_warmpool_ready_threshold_s=300 \
-                --gke_warmpool_poll_interval_s=2.0 \
-                --gke_warmpool_drain_timeout_s=300 \
+                --k8s_warmpool_target_replicas=100 \
+                --k8s_warmpool_name=python-sandbox-warmpool \
+                --k8s_warmpool_pod_label=sandbox=python-sandbox-example \
+                --k8s_warmpool_ready_threshold_s=300 \
+                --k8s_warmpool_poll_interval_s=2.0 \
+                --k8s_warmpool_drain_timeout_s=300 \
                 --k8s_namespace=agentic \
                 --gke_machine_type=c4-standard-8
 
@@ -50,7 +50,7 @@
 from datetime import datetime, timezone
 from perfkitbenchmarker import configs
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
-    gke_benchmark_utils as utils,
+    k8s_benchmark_utils as utils,
 )
 from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
     gke_deploy_utils as deploy_utils,
@@ -58,9 +58,9 @@
 
 FLAGS = flags.FLAGS
 
-BENCHMARK_NAME = "gke_warmpool"
+BENCHMARK_NAME = "k8s_warmpool"
 BENCHMARK_CONFIG = """
-gke_warmpool:
+k8s_warmpool:
   description: >
     Atomic single-point warm pool scale-up measurement on a
     pre-provisioned GKE cluster with gVisor isolation.
@@ -71,37 +71,37 @@
 # ---------------------------------------------------------------------------
 
 flags.DEFINE_integer(
-    "gke_warmpool_target_replicas",
+    "k8s_warmpool_target_replicas",
     100,
     "Number of warm pool replicas to provision from zero.",
 )
 
 flags.DEFINE_string(
-    "gke_warmpool_name",
+    "k8s_warmpool_name",
     "python-sandbox-warmpool",
     "SandboxWarmPool resource name.",
 )
 
 flags.DEFINE_string(
-    "gke_warmpool_pod_label",
+    "k8s_warmpool_pod_label",
     "sandbox=python-sandbox-example",
     "Label selector for warm pool pods.",
 )
 
 flags.DEFINE_float(
-    "gke_warmpool_ready_threshold_s",
+    "k8s_warmpool_ready_threshold_s",
     300.0,
     "Max seconds allowed for all pods to reach Running.",
 )
 
 flags.DEFINE_float(
-    "gke_warmpool_poll_interval_s",
+    "k8s_warmpool_poll_interval_s",
     2.0,
     "Seconds between kubectl polls during provisioning.",
 )
 
 flags.DEFINE_float(
-    "gke_warmpool_drain_timeout_s",
+    "k8s_warmpool_drain_timeout_s",
     300.0,
     "Max seconds to wait for drain to 0.",
 )
@@ -137,14 +137,14 @@ def Run(benchmark_spec):
     utils.set_benchmark_spec(benchmark_spec)
 
     ns = FLAGS.k8s_namespace
-    target = FLAGS.gke_warmpool_target_replicas
-    warmpool_name = FLAGS.gke_warmpool_name
-    label = FLAGS.gke_warmpool_pod_label
-    threshold_s = FLAGS.gke_warmpool_ready_threshold_s
-    poll_interval = FLAGS.gke_warmpool_poll_interval_s
+    target = FLAGS.k8s_warmpool_target_replicas
+    warmpool_name = FLAGS.k8s_warmpool_name
+    label = FLAGS.k8s_warmpool_pod_label
+    threshold_s = FLAGS.k8s_warmpool_ready_threshold_s
+    poll_interval = FLAGS.k8s_warmpool_poll_interval_s
 
     # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility)
-    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s))
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s))
     time.sleep(3)
 
     logging.info("=== Run: scaling %s to %d replicas ===", warmpool_name, target)
@@ -153,7 +153,7 @@ def Run(benchmark_spec):
 
     # 1. Measure drain time (should be near-zero since Prepare drained)
     t0 = time.time()
-    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s))
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s))
     drain_time_s = round(time.time() - t0, 2)
 
     time.sleep(2)
@@ -309,11 +309,11 @@ def Run(benchmark_spec):
 def Cleanup(benchmark_spec):
     """Drain warm pool back to 0 after measurement."""
     ns = FLAGS.k8s_namespace
-    warmpool_name = FLAGS.gke_warmpool_name
-    label = FLAGS.gke_warmpool_pod_label
+    warmpool_name = FLAGS.k8s_warmpool_name
+    label = FLAGS.k8s_warmpool_pod_label
 
     logging.info("Cleanup: draining warm pool to 0.")
-    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s))
+    utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s))
     utils.StopPortForward()
     logging.info("Cleanup complete.")
 
diff --git a/snapshot-sandbox-template.yaml.j2 b/snapshot-sandbox-template.yaml.j2
deleted file mode 100644
index 4e25cb5833..0000000000
--- a/snapshot-sandbox-template.yaml.j2
+++ /dev/null
@@ -1,46 +0,0 @@
----
-apiVersion: extensions.agents.x-k8s.io/v1alpha1
-kind: SandboxTemplate
-metadata:
-  name: {{ template_name }}
-  namespace: {{ ns }}
-spec:
-  podTemplate:
-    metadata:
-      labels:
-        app: snapshot-benchmark-workload
-    spec:
-      serviceAccountName: {{ ksa_name }}
-      runtimeClassName: gvisor
-      containers:
-      - name: preloader
-        image: python:3.11-slim
-        command: ["python3", "-c"]
-        args:
-          - |
-            import time, os
-            preload_mb = int(os.environ.get("PRELOAD_MB", "10"))
-            print(f"Preloading {preload_mb} MB of memory...", flush=True)
-            _ballast = bytearray(preload_mb * 1024 * 1024)
-            print(f"Preload complete. Starting counter.", flush=True)
-            i = 0
-            while True:
-                print(f"Count: {i}", flush=True)
-                i += 1
-                time.sleep(1)
-        env:
-          - name: PRELOAD_MB
-            value: "{{ preload_mb }}"
-        resources:
-          requests:
-            cpu: "250m"
-            memory: "{{ memory_mi }}Mi"
-            ephemeral-storage: "512Mi"
-      nodeSelector:
-        pkb_nodepool: sandbox
-      tolerations:
-        - key: "sandbox.gke.io/runtime"
-          operator: "Equal"
-          value: "gvisor"
-          effect: "NoSchedule"
-      restartPolicy: "OnFailure"

From 84ddaf5ed191952f3b403dff9bd4469cdbb41829 Mon Sep 17 00:00:00 2001
From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com>
Date: Tue, 30 Jun 2026 22:10:03 -0400
Subject: [PATCH 4/5] pkb-native image building, bug fixes, and optimizations

---
 .../agentic/adk-agent}/.dockerignore          |   0
 .../agentic/adk-agent}/.gcloudignore          |   0
 .../agentic/adk-agent}/Dockerfile             |   0
 .../agentic/adk-agent}/__init__.py            |   0
 .../gke_performance_agent/__init__.py         |   0
 .../adk-agent}/gke_performance_agent/agent.py |  16 +-
 .../agentic/adk-agent}/main.py                |  22 +-
 .../agentic/adk-agent}/requirements.txt       |   0
 .../python_test_app/benchmark_density.py      |   0
 .../python_test_app/benchmark_payload.py      |   0
 .../python_test_app/benchmark_qps.py          |   0
 .../config/agentic_benchmark_config.yaml      | 378 +++++-------------
 .../workloads/adk_agent/cloudbuild.yaml       |  13 -
 .../adk_agent/generated.env.template          |  26 --
 .../chromium_test_app/benchmark_density.js    | 177 --------
 .../kubernetes/agentic/gke_deploy_utils.py    |  88 ++--
 .../agentic/gke_image_build_utils.py          | 199 +--------
 .../kubernetes/agentic/gke_post_teardown.py   |  40 +-
 .../kubernetes/agentic/gke_prerequisites.py   | 116 +++++-
 .../agentic/k8s_snapshot_benchmark.py         |  31 +-
 20 files changed, 350 insertions(+), 756 deletions(-)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/.dockerignore (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/.gcloudignore (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/Dockerfile (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/__init__.py (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/gke_performance_agent/__init__.py (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/gke_performance_agent/agent.py (96%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/main.py (97%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/requirements.txt (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/sandboxed_apps/python_test_app/benchmark_density.py (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/sandboxed_apps/python_test_app/benchmark_payload.py (100%)
 rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/sandboxed_apps/python_test_app/benchmark_qps.py (100%)
 delete mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml
 delete mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
 delete mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js

diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore b/perfkitbenchmarker/data/docker/agentic/adk-agent/.dockerignore
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/.dockerignore
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore b/perfkitbenchmarker/data/docker/agentic/adk-agent/.gcloudignore
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/.gcloudignore
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile b/perfkitbenchmarker/data/docker/agentic/adk-agent/Dockerfile
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/Dockerfile
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/__init__.py
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/__init__.py
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/__init__.py
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/__init__.py
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py
similarity index 96%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py
index 2aef3c153c..6561942960 100644
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py
@@ -40,6 +40,7 @@
 from google.adk.models.base_llm import BaseLlm
 from google.adk.models.llm_response import LlmResponse
 from google.genai import types
+from concurrent.futures import ThreadPoolExecutor
 from dotenv import load_dotenv
 from google.adk.apps import App
 import logging
@@ -166,6 +167,12 @@ async def generate_content_async(self, llm_request, stream=False):
 # =========================================================================
 
 
+# Module-level thread pool for sandbox I/O operations.
+# Initialized once at import time to avoid thread-safety issues
+# with lazy initialization inside _execute_in_sandbox().
+_SANDBOX_POOL = ThreadPoolExecutor(max_workers=16)
+
+
 class V3GkeCodeExecutor(GkeCodeExecutor):
     def _execute_in_sandbox(self, code: str) -> CodeExecutionResult:
         """Executes code using the v0.4.6 compatible SandboxClient."""
@@ -173,17 +180,10 @@ def _execute_in_sandbox(self, code: str) -> CodeExecutionResult:
         from k8s_agent_sandbox.models import SandboxDirectConnectionConfig
         import logging
         import time
-        from concurrent.futures import ThreadPoolExecutor
 
         logging.info("Executing via V3 SandboxClient (v0.4.6 compatible).")
 
-        # Shared thread pool for sandbox operations to allow overlapping
-        # blocking I/O when sessions run on different threads.
-        global _SANDBOX_POOL
-        try:
-            _SANDBOX_POOL
-        except NameError:
-            _SANDBOX_POOL = ThreadPoolExecutor(max_workers=16)
+        # _SANDBOX_POOL is initialized at module level (thread-safe).
 
         # Use DirectConnection when SANDBOX_ROUTER_URL is set (in-cluster),
         # otherwise fall back to kubectl port-forward (dev mode).
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/main.py
similarity index 97%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/main.py
index bcdb090188..473c2072c2 100644
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/main.py
@@ -386,8 +386,13 @@ async def benchmark_python_density(req: BenchmarkRequest):
 
         prompt = "Please start the GKE performance benchmark workflow."
 
-        # Fire concurrent sessions. Run each session in its own thread so
-        # blocking ADK/Runner activity cannot serialize session start.
+        # Fire concurrent sessions.
+        # DESIGN NOTE: Each session runs in its own thread via asyncio.to_thread()
+        # with a nested asyncio.run() to create a per-thread event loop. This is
+        # intentional -- the ADK Runner performs blocking I/O (sandbox lifecycle
+        # via kubectl/HTTP) that would starve a shared event loop and serialize
+        # session starts. The per-thread event loop overhead (~0.1ms) is negligible
+        # compared to sandbox round-trip times (~200ms+).
         thread_tasks = [
             asyncio.create_task(
                 asyncio.to_thread(
@@ -472,8 +477,13 @@ async def benchmark_python_payload(req: PayloadBenchmarkRequest):
 
         prompt = "Please start the GKE performance benchmark workflow."
 
-        # Fire concurrent sessions. Run each session in its own thread so
-        # blocking ADK/Runner activity cannot serialize session start.
+        # Fire concurrent sessions.
+        # DESIGN NOTE: Each session runs in its own thread via asyncio.to_thread()
+        # with a nested asyncio.run() to create a per-thread event loop. This is
+        # intentional -- the ADK Runner performs blocking I/O (sandbox lifecycle
+        # via kubectl/HTTP) that would starve a shared event loop and serialize
+        # session starts. The per-thread event loop overhead (~0.1ms) is negligible
+        # compared to sandbox round-trip times (~200ms+).
         thread_tasks = [
             asyncio.create_task(
                 asyncio.to_thread(
@@ -544,7 +554,7 @@ async def benchmark_python_qps(req: QpsBenchmarkRequest):
         qps_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))"
 
     sandbox_template = os.getenv("SANDBOX_TEMPLATE", "python-sandbox-template")
-    sandbox_namespace = os.getenv("SANDBOX_NAMESPACE", "agentic")
+    sandbox_namespace = os.getenv("AGENTIC_NAMESPACE", "agentic")
     exec_timeout = req.sandbox_exec_timeout_s
     qps_claim_label = {"created-by": "pkb-qps-benchmark"}
 
@@ -791,7 +801,7 @@ async def benchmark_chromium_density(req: ChromiumBenchmarkRequest):
             k8s_config.load_kube_config()
         core_v1 = k8s_client.CoreV1Api()
 
-        # Inline HTML test page (same as benchmark_density.js used)
+        # Inline HTML test page (data: URL avoids network dependencies)
         test_page = """data:text/html,
 <!DOCTYPE html>
 <html>
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt b/perfkitbenchmarker/data/docker/agentic/adk-agent/requirements.txt
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/requirements.txt
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_density.py
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_density.py
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_payload.py
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_payload.py
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_qps.py
similarity index 100%
rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py
rename to perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_qps.py
diff --git a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
index 0098eff013..69922efdb0 100644
--- a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
+++ b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml
@@ -14,311 +14,137 @@
 #
 # Benchmark-specific sweep parameters (vary per run):
 #   --k8s_python_density_concurrent_sandbox_count=N
-#   --gke_snapshot_preload_mb=N
+#   --k8s_snapshot_preload_mb=N
 #   etc.
 
 # ===========================================================================
-# Shared cluster configuration (identical across all benchmarks)
+# Shared configuration (defined once, referenced by all benchmarks via YAML
+# anchors). PKB ignores top-level keys that don't match a benchmark name.
 # ===========================================================================
 
-k8s_python_density:
-  flags:
-    # --- Cluster creation flags ---
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-    gce_subnet_region: "us-central1"
+_shared_flags: &shared_flags
+  # --- Cluster creation flags ---
+  gke_additional_flags:
+    - "--enable-pod-snapshots"
+    - "--enable-dataplane-v2"
+    - "--enable-private-nodes"
+    - "--enable-ip-alias"
+    - "--master-ipv4-cidr=172.16.0.0/28"
+  gke_additional_nodepool_flags:
+    - "--max-pods-per-node=250"
+  container_cluster_version: "1.35.5-gke.1057002"
+  gke_enable_shielded_nodes: false
+  gce_subnet_region: "us-central1"
+
+  # --- Agentic workload flags ---
+  k8s_namespace: "agentic"
+  agent_sandbox_version: "v0.4.6"
+  k8s_gvisor: true
+  k8s_agent_api_url: "http://localhost:8080"
+
+_shared_cluster: &shared_cluster
+  cloud: GCP
+  type: Kubernetes
+  vm_count: 1
+  vm_spec:
+    GCP:
+      machine_type: c4-standard-8
+      zone: us-central1-a
+      boot_disk_type: hyperdisk-balanced
+      boot_disk_size: 50
+  nodepools:
+    sandbox:
+      vm_count: 1
+      vm_spec:
+        GCP:
+          machine_type: c4-standard-8
+          zone: us-central1-a
+          boot_disk_type: hyperdisk-balanced
+          boot_disk_size: 100
+      sandbox_config:
+        type: gvisor
+
+_shared_registry: &shared_registry
+  cloud: GCP
+  spec:
+    GCP:
+      zone: us-central1-a
+
+
+_shared_container_specs: &shared_container_specs
+  adk_agent:
+    image: agentic/adk-agent
 
-    # --- Agentic workload flags ---
-    k8s_namespace: "agentic"
-    agent_sandbox_version: "v0.4.6"
-    k8s_gvisor: true
-    k8s_agent_api_url: "http://localhost:8080"
-    skip_image_build: false
+# ===========================================================================
+# Benchmark definitions (each references the shared anchors above)
+# ===========================================================================
 
+k8s_python_density:
+  flags:
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
   container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
-
+    <<: *shared_cluster
 
 k8s_chromium_density:
   flags:
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-    gce_subnet_region: "us-central1"
-
-    k8s_namespace: "agentic"
-    agent_sandbox_version: "v0.4.6"
-    k8s_gvisor: true
-    k8s_agent_api_url: "http://localhost:8080"
-    skip_image_build: false
-
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
   container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
-
+    <<: *shared_cluster
 
 k8s_payload:
   flags:
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-    gce_subnet_region: "us-central1"
-
-    k8s_namespace: "agentic"
-    agent_sandbox_version: "v0.4.6"
-    k8s_gvisor: true
-    k8s_agent_api_url: "http://localhost:8080"
-    skip_image_build: false
-
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
   container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
-
+    <<: *shared_cluster
 
 k8s_qps:
   flags:
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-    gce_subnet_region: "us-central1"
-
-    k8s_namespace: "agentic"
-    agent_sandbox_version: "v0.4.6"
-    k8s_gvisor: true
-    k8s_agent_api_url: "http://localhost:8080"
-    skip_image_build: false
-
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
   container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
-
+    <<: *shared_cluster
 
 k8s_snapshot:
   flags:
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-    gce_subnet_region: "us-central1"
-
-    k8s_namespace: "agentic"
-    agent_sandbox_version: "v0.4.6"
-    k8s_gvisor: true
-    k8s_agent_api_url: "http://localhost:8080"
-    skip_image_build: false
-
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
   container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
-
+    <<: *shared_cluster
 
 k8s_warmpool:
   flags:
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-    gce_subnet_region: "us-central1"
-
-    k8s_namespace: "agentic"
-    agent_sandbox_version: "v0.4.6"
-    k8s_gvisor: true
-    k8s_agent_api_url: "http://localhost:8080"
-    skip_image_build: false
-
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
   container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
-
+    <<: *shared_cluster
 
 k8s_deletion:
   flags:
-    gke_additional_flags:
-      - "--enable-pod-snapshots"
-      - "--enable-dataplane-v2"
-      - "--enable-private-nodes"
-      - "--enable-ip-alias"
-      - "--master-ipv4-cidr=172.16.0.0/28"
-    gke_additional_nodepool_flags:
-      - "--max-pods-per-node=250"
-    container_cluster_version: "1.35.3-gke.1389000"
-    gke_enable_shielded_nodes: false
-    gce_subnet_region: "us-central1"
-
-    k8s_namespace: "agentic"
-    agent_sandbox_version: "v0.4.6"
-    k8s_gvisor: true
-    k8s_agent_api_url: "http://localhost:8080"
-    skip_image_build: false
-
+    <<: *shared_flags
+  container_registry:
+    <<: *shared_registry
+  container_specs:
+    <<: *shared_container_specs
   container_cluster:
-    cloud: GCP
-    type: Kubernetes
-    vm_count: 1
-    vm_spec:
-      GCP:
-        machine_type: c4-standard-8
-        zone: us-central1-a
-        boot_disk_type: hyperdisk-balanced
-        boot_disk_size: 50
-    nodepools:
-      sandbox:
-        vm_count: 1
-        vm_spec:
-          GCP:
-            machine_type: c4-standard-8
-            zone: us-central1-a
-            boot_disk_type: hyperdisk-balanced
-            boot_disk_size: 100
-        sandbox_config:
-          type: gvisor
+    <<: *shared_cluster
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml
deleted file mode 100644
index f3f3f4b810..0000000000
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-steps:
-  - name: 'gcr.io/cloud-builders/docker'
-    args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.']
-
-images:
-  - '${_IMAGE_PATH}'
-
-options:
-  logging: CLOUD_LOGGING_ONLY
-
-substitutions:
-  _IMAGE_PATH: ''
-  _PLATFORM: 'linux/amd64'
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
deleted file mode 100644
index 3ec5f62d0b..0000000000
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template
+++ /dev/null
@@ -1,26 +0,0 @@
-# ==========================================================================
-# ADK Agent — Generated Environment File Template
-# ==========================================================================
-# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags).
-#
-# For local dev, manually create generated.env with your values.
-# ==========================================================================
-
-# --- Required: GKE executor config ---
-CLUSTER_NAME="${CLUSTER_NAME}"
-GOOGLE_CLOUD_PROJECT="${GOOGLE_CLOUD_PROJECT}"
-GOOGLE_CLOUD_LOCATION="${GOOGLE_CLOUD_LOCATION}"
-AGENTIC_NAMESPACE="${AGENTIC_NAMESPACE}"
-GOOGLE_GENAI_USE_VERTEXAI="${GOOGLE_GENAI_USE_VERTEXAI}"
-
-# --- Sandbox connection (set in-cluster; leave blank for local dev mode) ---
-# When set, SandboxClient uses DirectConnection (bypasses kubectl port-forward).
-# For local dev, set to "" to use per-pod kubectl port-forward tunnels.
-SANDBOX_ROUTER_URL="http://sandbox-router-svc.${AGENTIC_NAMESPACE}.svc.cluster.local:8080"
-
-# --- Optional: benchmark defaults (overridden by HTTP request params) ---
-SAMPLE_COUNT="${SAMPLE_COUNT}"
-SAMPLE_WARMUP="${SAMPLE_WARMUP}"
-PAYLOAD_SIZE_MB="${PAYLOAD_SIZE_MB}"
-PAYLOAD_ITERATIONS="${PAYLOAD_ITERATIONS}"
-
diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js
deleted file mode 100644
index 7638720691..0000000000
--- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js
+++ /dev/null
@@ -1,177 +0,0 @@
-// Agentic Chromium Sandbox Benchmark (UC-C)
-// Measures: Interaction Latency, Screenshot Generation, DOM Evaluation, RSS
-// Requires: Playwright (pre-installed in the container image)
-//
-// Self-contained — no external Mock LLM service needed.  Uses data: URLs
-// and inline HTML to avoid network dependencies so the benchmark measures
-// pure gVisor + Chromium overhead.
-//
-// Environment variables (injected by orchestrator):
-//   TASK_COUNT    — iterations per run (default: 10)
-//   WARMUP_TASKS  — warmup iterations excluded from stats (default: 2)
-
-const { chromium } = require('playwright');
-const os = require('os');
-
-const TASK_COUNT = parseInt(process.env.TASK_COUNT || '10');
-const WARMUP_TASKS = parseInt(process.env.WARMUP_TASKS || '2');
-
-// Inline HTML page — avoids network round-trips so we measure pure
-// browser engine + gVisor overhead.
-const TEST_PAGE = `data:text/html,
-<!DOCTYPE html>
-<html>
-<head><title>PKB Chromium Benchmark</title></head>
-<body>
-  <h1 id="heading">Hello Sandbox</h1>
-  <input id="search" type="text" placeholder="Search..." />
-  <button id="btn">Click Me</button>
-  <div id="output"></div>
-  <script>
-    document.getElementById('btn').addEventListener('click', () => {
-      document.getElementById('output').textContent = 'clicked';
-    });
-  </script>
-</body>
-</html>`;
-
-function percentile(sorted, p) {
-  if (!sorted.length) return null;
-  const idx = Math.min(Math.floor(sorted.length * p), sorted.length - 1);
-  return sorted[idx];
-}
-
-function getMemoryMB() {
-  try {
-    const usage = process.memoryUsage();
-    return {
-      rss_mb: Math.round(usage.rss / 1024 / 1024 * 100) / 100,
-      heap_used_mb: Math.round(usage.heapUsed / 1024 / 1024 * 100) / 100,
-      heap_total_mb: Math.round(usage.heapTotal / 1024 / 1024 * 100) / 100,
-    };
-  } catch (e) {
-    return { rss_mb: null, heap_used_mb: null, heap_total_mb: null };
-  }
-}
-
-async function runBenchmark() {
-  const memStart = getMemoryMB();
-
-  // ── Cold Start: browser launch ──
-  const coldStart = performance.now();
-  const browser = await chromium.launch({
-    headless: true,
-    args: [
-      '--no-sandbox',
-      '--disable-gpu',
-      '--disable-dev-shm-usage',
-      '--disable-async-dns',
-      '--single-process',
-    ],
-  });
-  const cold_start_ms = performance.now() - coldStart;
-
-  const context = await browser.newContext();
-  const page = await context.newPage();
-
-  // Navigate once before the loop — amortize first-navigation overhead
-  await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' });
-
-  // Per-task latency arrays (filled during measured runs only)
-  const navigate_ms = [];
-  const screenshot_ms = [];
-  const evaluate_ms = [];
-  const click_ms = [];
-  const fill_ms = [];
-  const interaction_ms = []; // all task types pooled
-
-  for (let run = 0; run < WARMUP_TASKS + TASK_COUNT; run++) {
-    const measuring = run >= WARMUP_TASKS;
-
-    // 1. Navigate (reload the data: page)
-    let t0 = performance.now();
-    await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' });
-    let elapsed = performance.now() - t0;
-    if (measuring) { navigate_ms.push(elapsed); interaction_ms.push(elapsed); }
-
-    // 2. DOM evaluate — read heading text
-    t0 = performance.now();
-    await page.evaluate(() => document.getElementById('heading').textContent);
-    elapsed = performance.now() - t0;
-    if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); }
-
-    // 3. Fill input
-    t0 = performance.now();
-    await page.fill('#search', `query-${run}`);
-    elapsed = performance.now() - t0;
-    if (measuring) { fill_ms.push(elapsed); interaction_ms.push(elapsed); }
-
-    // 4. Click button
-    t0 = performance.now();
-    await page.click('#btn');
-    elapsed = performance.now() - t0;
-    if (measuring) { click_ms.push(elapsed); interaction_ms.push(elapsed); }
-
-    // 5. Verify click effect (DOM mutation)
-    t0 = performance.now();
-    await page.evaluate(() => document.getElementById('output').textContent);
-    elapsed = performance.now() - t0;
-    if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); }
-
-    // 6. Screenshot (snapshot generation)
-    t0 = performance.now();
-    await page.screenshot({ path: '/tmp/snap.png' });
-    elapsed = performance.now() - t0;
-    if (measuring) { screenshot_ms.push(elapsed); interaction_ms.push(elapsed); }
-  }
-
-  await browser.close();
-  const memEnd = getMemoryMB();
-
-  // ── Compute stats ──
-  const computeStats = (arr) => {
-    if (!arr.length) return null;
-    const sorted = [...arr].sort((a, b) => a - b);
-    const sum = sorted.reduce((a, b) => a + b, 0);
-    return {
-      mean_ms: Math.round(sum / sorted.length * 1000) / 1000,
-      p50_ms: Math.round(percentile(sorted, 0.50) * 1000) / 1000,
-      p95_ms: Math.round(percentile(sorted, 0.95) * 1000) / 1000,
-      p99_ms: Math.round(percentile(sorted, 0.99) * 1000) / 1000,
-      min_ms: Math.round(sorted[0] * 1000) / 1000,
-      max_ms: Math.round(sorted[sorted.length - 1] * 1000) / 1000,
-    };
-  };
-
-  const summary = {
-    sandbox_status: 'ok',
-    cold_start_ms: Math.round(cold_start_ms * 1000) / 1000,
-    task_count: TASK_COUNT,
-    warmup_tasks: WARMUP_TASKS,
-    // Per-task-type latency stats
-    navigate: computeStats(navigate_ms),
-    evaluate: computeStats(evaluate_ms),
-    fill: computeStats(fill_ms),
-    click: computeStats(click_ms),
-    screenshot: computeStats(screenshot_ms),
-    // Pooled interaction latency (all types)
-    interaction: computeStats(interaction_ms),
-    // Memory
-    rss_start_mb: memStart.rss_mb,
-    rss_end_mb: memEnd.rss_mb,
-    rss_growth_mb: memEnd.rss_mb != null && memStart.rss_mb != null
-      ? Math.round((memEnd.rss_mb - memStart.rss_mb) * 100) / 100
-      : null,
-  };
-
-  // Print JSON to stdout — orchestrator parses this
-  console.log(JSON.stringify(summary));
-}
-
-runBenchmark().catch((e) => {
-  console.log(JSON.stringify({
-    sandbox_status: 'error',
-    error: `${e.name}: ${e.message}`,
-  }));
-  process.exit(1);
-});
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
index 297b06758f..b2d31e026b 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py
@@ -70,11 +70,7 @@
     "Timeout in seconds for workload deployment rollout.",
 )
 
-flags.DEFINE_bool(
-    "skip_image_build",
-    False,
-    "Skip container image builds during Prepare.",
-)
+
 
 
 # Module-level derived images (set during DeployWorkloads)
@@ -131,6 +127,12 @@ def _RenderAndApply(template_name, **kwargs):
     "Set to True on non-GKE clusters where pod snapshots are not supported.",
 )
 
+flags.DEFINE_string(
+    "k8s_snapshot_ksa_name",
+    "pod-snapshot-sa",
+    "Kubernetes service account for pod snapshots.",
+)
+
 
 # ---------------------------------------------------------------------------
 # Public API
@@ -175,6 +177,7 @@ def DeployWorkloads(benchmark_spec=None):
     region = ""
     machine_type = ""
     cluster_name = ""
+    cluster = None
     if benchmark_spec:
         cluster = getattr(benchmark_spec, 'container_cluster', None)
         if cluster:
@@ -197,31 +200,36 @@ def DeployWorkloads(benchmark_spec=None):
         zone = getattr(FLAGS, 'zone', '') or ''
         region = zone[:-2] if zone else ''
 
-    # Build images if requested
-    # Detect architecture and derive image paths
+    # Derive image paths for template rendering.
+    # Chrome and Router images are built during prerequisites
+    # (gke_prerequisites.py), not during Prepare.
+    # ADK agent image is built by PKB container_specs during Provision.
     from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import (
         gke_image_build_utils,
     )
-    zone = cluster.zone if cluster else FLAGS.zone
-    arch = gke_image_build_utils._DetectArchitecture(machine_type, zone, project)
-
+    arch = FLAGS.target_arch or "amd64"
     global _derived_images
     _derived_images = _DeriveImagePaths(project, region, arch)
-
-    if not FLAGS.skip_image_build:
-        gke_image_build_utils.build_images_with_config(
-            project=project,
-            region=region,
-            machine_type=machine_type,
-            zone=zone,
-            arch=arch,
-        )
+    logging.info(
+        "DeployWorkloads: project=%s region=%s arch=%s",
+        project, region, arch,
+    )
+    logging.info("_derived_images: %s", _derived_images)
 
     _CreateNamespace(ns)
     _InstallCRDs()
     _DeploySandboxTemplates(ns)
     _DeploySandboxRouter(ns)
-    _DeployADKAgent(ns, project=project, region=region, cluster_name=cluster_name)
+    # Prefer ADK image from PKB-native container_specs (built during Provision).
+    # Falls back to FLAGS.k8s_agent_image or derived image path.
+    adk_image_from_specs = ""
+    if benchmark_spec:
+        specs = getattr(benchmark_spec, "container_specs", {})
+        adk_spec = specs.get("adk_agent")
+        if adk_spec and getattr(adk_spec, "image", None):
+            adk_image_from_specs = adk_spec.image
+            logging.info("Using ADK image from container_specs: %s", adk_image_from_specs)
+    _DeployADKAgent(ns, project=project, region=region, cluster_name=cluster_name, adk_image_override=adk_image_from_specs)
     _DeployPSIReader(ns)
     _WaitForAgentReady(ns)
 
@@ -365,13 +373,29 @@ def _DeploySandboxRouter(ns):
     )
 
 
-def _DeployADKAgent(ns, project="", region="", cluster_name=""):
+def _DeployADKAgent(ns, project="", region="", cluster_name="", adk_image_override=""):
     """Deploy ADK Agent: SA, ClusterRole, RoleBinding, Deployment, Service."""
-    adk_image = FLAGS.k8s_agent_image or _derived_images.get("adk_agent", "")
+    adk_image = adk_image_override or FLAGS.k8s_agent_image or _derived_images.get("adk_agent", "")
+
+    # Validate the image looks like a registry path, not a Dockerfile path.
+    # When Prepare runs separately from Provision, container_specs may not
+    # have the built image path. The config YAML default (agentic/adk-agent)
+    # is the Dockerfile lookup path, not a valid registry reference.
+    if adk_image and "docker.pkg.dev" not in adk_image:
+        derived = _derived_images.get("adk_agent", "")
+        if derived:
+            logging.warning(
+                "ADK image %s is not a registry path. Using derived: %s",
+                adk_image, derived,
+            )
+            adk_image = derived
+
     if not adk_image:
         logging.info("ADK agent image not set, skipping agent deployment.")
         return
 
+    logging.info("Using ADK image: %s", adk_image)
+
     project = project or ""
     region = region or ""
     cluster = cluster_name or ""
@@ -392,14 +416,17 @@ def _DeployPSIReader(ns):
 
 
 def _WaitForAgentReady(ns):
-    """Wait for ADK agent deployment to be ready."""
-    adk_image = FLAGS.k8s_agent_image
-    if not adk_image:
-        logging.info("ADK agent not deployed, skipping rollout wait.")
-        return
+    """Wait for ADK agent deployment to be ready.
+
+    Always attempts the rollout wait regardless of how the image was
+    specified (FLAGS.k8s_agent_image, container_specs, or _derived_images).
+    kubectl rollout status returns non-zero harmlessly if the deployment
+    does not exist, and raise_on_failure=False prevents that from
+    propagating.
+    """
     timeout = FLAGS.k8s_deploy_timeout
     logging.info("Waiting for adk-agent rollout (timeout=%ds)...", timeout)
-    kubectl.RunKubectlCommand(
+    _, stderr, retcode = kubectl.RunKubectlCommand(
         [
             "rollout", "status", "deployment/adk-agent",
             "-n", ns,
@@ -407,6 +434,11 @@ def _WaitForAgentReady(ns):
         ],
         raise_on_failure=False,
     )
+    if retcode != 0:
+        logging.warning(
+            "adk-agent rollout status returned %d: %s",
+            retcode, stderr.strip()[:200],
+        )
 
 
 def _GetProjectNumber(project):
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
index a339af8022..750ae05988 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
@@ -1,14 +1,15 @@
 """Shared image build utilities for GKE Agent Sandbox benchmarks.
 
-Builds and pushes container images (ADK agent, Chrome sandbox, Sandbox Router)
-via Google Cloud Build. Called from:
-  - Provision() when --gke_skip_image_build is False (via BuildImages())
-  - prerequisite_setup.py (via build_images_with_config())
+Builds and pushes container images (Chrome sandbox, Sandbox Router) via
+Google Cloud Build. Called from gke_deploy_utils.DeployWorkloads() during
+the Prepare stage.
+
+NOTE: The ADK Agent image is built by the PKB native container_specs
+mechanism during the Provision stage, not by this module.
 
 Images built:
-  - ADK Agent: perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/ -> {region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{arch}
-  - Chrome Sandbox: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{arch}
-  - Sandbox Router: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{arch}
+  - Chrome Sandbox: cloned from agent-sandbox repo
+  - Sandbox Router: cloned from agent-sandbox repo
 """
 
 import logging
@@ -18,6 +19,7 @@
 import tempfile
 
 from absl import flags
+from perfkitbenchmarker import vm_util
 
 FLAGS = flags.FLAGS
 
@@ -25,7 +27,6 @@
 logger = logging.getLogger(__name__)
 
 
-
 # ---------------------------------------------------------------------------
 # Architecture detection
 # ---------------------------------------------------------------------------
@@ -95,18 +96,17 @@ def _DetectArchitecture(machine_type, zone, project):
     return "amd64"
 
 
-def build_images_with_config(project, region, machine_type, zone, arch, cloud_build_sa=None):
+def build_images_with_config(project, region, machine_type, zone, arch):
     """Core image build logic — no FLAGS dependency.
 
     Callable from both PKB (via BuildImages()) and prerequisite_setup.py.
+    Uses the project's default Cloud Build SA (no custom SA needed).
 
     Args:
         project: GCP project ID.
         region: GCP region (e.g. "us-central1").
         machine_type: Machine type string (e.g. "c4-standard-8").
             Used to derive target architecture (arm64 for c4a, amd64 otherwise).
-        cloud_build_sa: Cloud Build service account email.
-            If None, defaults to "adk-cloud-build-sa@{project}.iam.gserviceaccount.com".
     """
     # Architecture passed in from caller (detected via gcloud)
     target_arch = arch
@@ -120,33 +120,20 @@ def build_images_with_config(project, region, machine_type, zone, arch, cloud_bu
         f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{target_arch}"
     )
 
-    # Cloud Build SA
-    if cloud_build_sa is None:
-        cloud_build_sa = f"adk-cloud-build-sa@{project}.iam.gserviceaccount.com"
 
-    logger.info("=== Building Container Images ===")
+    logger.info("=== Building Container Images (Chrome + Router only) ===")
     logger.info("  Project: %s", project)
     logger.info("  Region: %s", region)
     logger.info("  Architecture: %s", target_arch)
-    logger.info("  Cloud Build SA: %s", cloud_build_sa)
-
-    # 1. Build ADK Agent
-    _BuildADKAgentImage(
-        project=project,
-        region=region,
-        target_arch=target_arch,
-        image_path=adk_image,
-        cloud_build_sa=cloud_build_sa,
-        machine_type=machine_type,
-    )
+    logger.info("  Cloud Build SA: default (project Cloud Build SA)")
+    logger.info("  NOTE: ADK Agent image is built by PKB via container_specs")
 
-    # 2. Build Chrome Sandbox
+    # 1. Build Chrome Sandbox
     _BuildChromeSandboxImage(
         project=project,
         region=region,
         target_arch=target_arch,
         image_path=chrome_image,
-        cloud_build_sa=cloud_build_sa,
     )
 
     # 3. Build Sandbox Router
@@ -155,86 +142,19 @@ def build_images_with_config(project, region, machine_type, zone, arch, cloud_bu
         region=region,
         target_arch=target_arch,
         image_path=router_image,
-        cloud_build_sa=cloud_build_sa,
     )
 
-    logger.info("=== All images built successfully ===")
-    logger.info("  ADK Agent:      %s", adk_image)
+    logger.info("=== Chrome + Router images built successfully ===")
     logger.info("  Chrome Sandbox: %s", chrome_image)
     logger.info("  Sandbox Router: %s", router_image)
-
-
-def BuildImages():
-    """FLAGS-based entry point.
-
-    Reads configuration from native PKB FLAGS.
-    Delegates to build_images_with_config() for the actual work.
-    """
-    project = getattr(FLAGS, 'project', '') or ''
-    zone = getattr(FLAGS, 'zone', '') or ''
-    region = zone[:-2] if zone else ''
-    machine_type = getattr(FLAGS, 'machine_type', '') or ''
-    build_images_with_config(
-        project=project,
-        region=region,
-        machine_type=machine_type,
-    )
+    logger.info("  (ADK Agent built by PKB via container_specs)")
 
 
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
 
-
-def _BuildADKAgentImage(
-    project, region, target_arch, image_path, cloud_build_sa, machine_type=None
-):
-    """Build and push the ADK Agent image.
-
-    Uses the existing perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml with --substitutions
-    rather than generating a new one (avoids overwriting the committed file).
-    """
-    logger.info("Building ADK Agent image: %s", image_path)
-
-    # Locate the agent source directory
-    # Expected layout: repo_root/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/
-    repo_root = _FindRepoRoot()
-    agent_dir = os.path.join(repo_root, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")
-
-    if not os.path.isdir(agent_dir):
-        raise RuntimeError(
-            f"ADK agent source not found at {agent_dir}. "
-            "Ensure you are running from the repository root."
-        )
-
-    # Generate generated.env from template
-    _GenerateEnvFile(agent_dir, project, region, machine_type=machine_type)
-
-    # Use the existing cloudbuild.yaml with substitutions (don't overwrite)
-    cloudbuild_path = os.path.join(agent_dir, "cloudbuild.yaml")
-    if not os.path.isfile(cloudbuild_path):
-        raise RuntimeError(
-            f"cloudbuild.yaml not found at {cloudbuild_path}. "
-            "Expected perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml to exist."
-        )
-
-    _RunCmd(
-        [
-            "gcloud",
-            "builds",
-            "submit",
-            agent_dir,
-            f"--config={cloudbuild_path}",
-            f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}",
-            f"--project={project}",
-            f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}",
-        ]
-    )
-
-    logger.info("ADK Agent image built successfully.")
-
-
-def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_build_sa):
+def _BuildChromeSandboxImage(project, region, target_arch, image_path):
     """Build and push the Chrome Sandbox image."""
     logger.info("Building Chrome Sandbox image: %s", image_path)
 
@@ -280,7 +200,6 @@ def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_bui
             image_path=image_path,
             target_arch=target_arch,
             project=project,
-            cloud_build_sa=cloud_build_sa,
         )
 
         logger.info("Chrome Sandbox image built successfully.")
@@ -288,7 +207,7 @@ def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_bui
         shutil.rmtree(tmp_dir, ignore_errors=True)
 
 
-def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_build_sa):
+def _BuildSandboxRouterImage(project, region, target_arch, image_path):
     """Build and push the Sandbox Router image."""
     logger.info("Building Sandbox Router image: %s", image_path)
 
@@ -330,7 +249,6 @@ def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_bui
             image_path=image_path,
             target_arch=target_arch,
             project=project,
-            cloud_build_sa=cloud_build_sa,
         )
 
         logger.info("Sandbox Router image built successfully.")
@@ -338,60 +256,11 @@ def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_bui
         shutil.rmtree(tmp_dir, ignore_errors=True)
 
 
-def _GenerateEnvFile(
-    agent_dir, project, region, machine_type=None, namespace="agentic"
-):
-    """Render generated.env from template with current config values."""
-    template_path = os.path.join(agent_dir, "generated.env.template")
-    output_path = os.path.join(agent_dir, "generated.env")
-
-    if not os.path.isfile(template_path):
-        logger.warning(
-            "generated.env.template not found at %s, skipping.", template_path
-        )
-        return
-
-    with open(template_path, "r") as f:
-        content = f.read()
-
-    # Derive cluster name
-    machine_family = machine_type.split("-")[0] if machine_type else "c4"
-    suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"}
-    cluster_suffix = suffix_map.get(machine_family, "c4")
-
-    # Get username prefix for cluster name
-    user = os.environ.get("USER", "benchmark")
-    user_prefix = user.split(".")[0] if "." in user else user
-    cluster_name = f"{user_prefix}-agentic-{cluster_suffix}"
-
-    # Substitute variables
-    replacements = {
-        "${CLUSTER_NAME}": cluster_name,
-        "${GOOGLE_CLOUD_PROJECT}": project,
-        "${GOOGLE_CLOUD_LOCATION}": region,
-        "${AGENTIC_NAMESPACE}": namespace,
-        "${GOOGLE_GENAI_USE_VERTEXAI}": "true",
-        "${SANDBOX_ROUTER_URL}": f"http://sandbox-router-svc.{namespace}.svc.cluster.local:8080",
-        "${SAMPLE_COUNT}": "20",
-        "${SAMPLE_WARMUP}": "0",
-        "${PAYLOAD_SIZE_MB}": "1",
-        "${PAYLOAD_ITERATIONS}": "20",
-    }
-
-    for key, value in replacements.items():
-        content = content.replace(key, value)
-
-    with open(output_path, "w") as f:
-        f.write(content)
-
-    logger.info("Generated %s", output_path)
-
-
-def _SubmitCloudBuild(source_dir, image_path, target_arch, project, cloud_build_sa):
+def _SubmitCloudBuild(source_dir, image_path, target_arch, project):
     """Generate a cloudbuild.yaml with substitutions and submit via Cloud Build.
 
     Used for Chrome and Router images (built in temp directories).
-    The ADK agent uses its own committed cloudbuild.yaml instead.
+    Uses the project's default Cloud Build SA.
     """
     cloudbuild_content = """steps:
   - name: 'gcr.io/cloud-builders/docker'
@@ -419,36 +288,10 @@ def _SubmitCloudBuild(source_dir, image_path, target_arch, project, cloud_build_
             f"--config={cloudbuild_path}",
             f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}",
             f"--project={project}",
-            f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}",
         ]
     )
 
 
-def _FindRepoRoot():
-    """Find the repository root by looking for known markers."""
-    # Try relative to this file
-    this_dir = os.path.dirname(os.path.abspath(__file__))
-    # Expected: perfkitbenchmarker/linux_benchmarks/ -> go up 2 levels
-    candidate = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(this_dir))))
-    if os.path.isdir(os.path.join(candidate, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")):
-        return candidate
-
-    # Try CWD
-    cwd = os.getcwd()
-    if os.path.isdir(os.path.join(cwd, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")):
-        return cwd
-
-    # Try parent of CWD
-    parent = os.path.dirname(cwd)
-    if os.path.isdir(os.path.join(parent, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")):
-        return parent
-
-    raise RuntimeError(
-        "Cannot locate repository root (looking for perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/). "
-        "Run from the repository root directory."
-    )
-
-
 def _RunCmd(cmd, cwd=None):
     """Run a shell command, raising on failure."""
     logger.info("  CMD: %s", " ".join(cmd))
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
index 49e04bb83d..1bae7b41d4 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py
@@ -27,17 +27,31 @@ def _run(cmd, check=False, timeout=300):
     return result
 
 
-def teardown_cloud_build_sa(project_id):
-    logger.info("=== Deleting Cloud Build SA ===")
-    sa_email = f"adk-cloud-build-sa@{project_id}.iam.gserviceaccount.com"
+def revoke_cloudbuild_sa_permissions(project_id):
+    """Revoke extra IAM roles from Cloud Build SA(s).
+
+    Mirrors grant_cloudbuild_sa_permissions() from gke_prerequisites.py.
+    Revokes roles from both possible SAs. Does NOT delete them
+    (they are project-managed).
+    """
+    logger.info("=== Revoking extra permissions from Cloud Build SA(s) ===")
+    result = _run(["gcloud", "projects", "describe", project_id,
+                   "--format=value(projectNumber)"])
+    project_number = result.stdout.strip()
+    if not project_number:
+        logger.warning("Could not determine project number, skipping SA cleanup")
+        return
+    sa_emails = [
+        f"{project_number}@cloudbuild.gserviceaccount.com",
+        f"{project_number}-compute@developer.gserviceaccount.com",
+    ]
     roles = ["roles/logging.logWriter", "roles/storage.objectViewer",
              "roles/artifactregistry.writer", "roles/serviceusage.serviceUsageConsumer"]
-    for role in roles:
-        _run(["gcloud", "projects", "remove-iam-policy-binding", project_id,
-              f"--member=serviceAccount:{sa_email}", f"--role={role}", "--quiet"])
-    _run(["gcloud", "iam", "service-accounts", "delete", sa_email,
-          f"--project={project_id}", "--quiet"])
-    logger.info("Cloud Build SA deleted.")
+    for sa_email in sa_emails:
+        for role in roles:
+            _run(["gcloud", "projects", "remove-iam-policy-binding", project_id,
+                  f"--member=serviceAccount:{sa_email}", f"--role={role}", "--quiet"])
+    logger.info("Cloud Build SA extra permissions revoked.")
 
 
 def teardown_snapshot_bucket(project_id, region):
@@ -52,7 +66,11 @@ def teardown_snapshot_bucket(project_id, region):
 
 def teardown_images(project_id, region):
     logger.info("=== Deleting AR repos ===")
-    for repo in ["adk-repo", "agent-sandbox"]:
+    # "adk-repo" is created/deleted by PKB container_registry lifecycle
+    # (Provision creates it, Teardown deletes it). If you skip PKB Teardown,
+    # run: gcloud artifacts repositories delete adk-repo --location=<region>
+    # Only "agent-sandbox" (Chrome + Router images) needs manual cleanup here.
+    for repo in ["agent-sandbox"]:
         _run(["gcloud", "artifacts", "repositories", "delete", repo,
               f"--location={region}", f"--project={project_id}", "--quiet"])
     logger.info("AR repos deleted.")
@@ -65,7 +83,7 @@ def main():
     p.add_argument("--keep_images", action="store_true", help="Skip AR repo deletion")
     p.add_argument("--keep_bucket", action="store_true", help="Skip snapshot bucket deletion")
     args = p.parse_args()
-    teardown_cloud_build_sa(args.project_id)
+    revoke_cloudbuild_sa_permissions(args.project_id)
     if not args.keep_bucket:
         teardown_snapshot_bucket(args.project_id, args.region)
     if not args.keep_images:
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
index 9c45f02449..72c32d5b1f 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py
@@ -55,7 +55,10 @@ def enable_apis(project_id):
 
 def create_artifact_registry(project_id, region):
     logger.info("=== Creating Artifact Registry Repos ===")
-    for repo in ["adk-repo", "agent-sandbox"]:
+    # "adk-repo" is no longer needed here -- PKB creates its own AR repo
+    # via container_registry during the Provision stage.
+    # Only "agent-sandbox" is needed for Chrome/Router images.
+    for repo in ["agent-sandbox"]:
         if _exists(["gcloud", "artifacts", "repositories", "describe", repo,
                     f"--location={region}", f"--project={project_id}"]):
             logger.info("AR repo %s already exists.", repo)
@@ -66,39 +69,112 @@ def create_artifact_registry(project_id, region):
         logger.info("AR repo %s created.", repo)
 
 
-def create_cloud_build_sa(project_id):
-    logger.info("=== Creating Cloud Build SA ===")
-    sa_name = "adk-cloud-build-sa"
-    sa_email = f"{sa_name}@{project_id}.iam.gserviceaccount.com"
-    if not _exists(["gcloud", "iam", "service-accounts", "describe",
-                    sa_email, f"--project={project_id}"]):
-        _run(["gcloud", "iam", "service-accounts", "create", sa_name,
-              f"--display-name={sa_name}", f"--project={project_id}"])
-        logger.info("SA %s created. Waiting for propagation...", sa_email)
-        time.sleep(10)
-    else:
-        logger.info("SA %s already exists.", sa_email)
+def grant_cloudbuild_sa_permissions(project_id):
+    """Grant required IAM roles to the Cloud Build service account(s).
+
+    Auto-detects which SA Cloud Build uses in this project:
+      - Legacy projects: {number}@cloudbuild.gserviceaccount.com
+      - Newer projects:  {number}-compute@developer.gserviceaccount.com
+
+    Grants permissions to both SAs to ensure compatibility regardless
+    of project configuration. This is idempotent and safe.
+    """
+    logger.info("=== Granting permissions to Cloud Build SA(s) ===")
+    result = _run(["gcloud", "projects", "describe", project_id,
+                   "--format=value(projectNumber)"])
+    project_number = result.stdout.strip()
+    if not project_number:
+        logger.error("Could not determine project number for %s", project_id)
+        return
+
+    # Both possible Cloud Build SAs
+    cloudbuild_sa = f"{project_number}@cloudbuild.gserviceaccount.com"
+    compute_sa = f"{project_number}-compute@developer.gserviceaccount.com"
+
+    # Detect which SA(s) exist
+    sa_emails = []
+    for sa in [cloudbuild_sa, compute_sa]:
+        if _exists(["gcloud", "iam", "service-accounts", "describe",
+                    sa, f"--project={project_id}"]):
+            sa_emails.append(sa)
+            logger.info("Found Cloud Build SA: %s", sa)
+        else:
+            logger.info("SA not found (skipping): %s", sa)
+
+    if not sa_emails:
+        logger.error("No Cloud Build SA found in project %s", project_id)
+        return
+
     roles = [
         "roles/logging.logWriter",
         "roles/storage.objectViewer",
         "roles/artifactregistry.writer",
         "roles/serviceusage.serviceUsageConsumer",
     ]
-    for role in roles:
-        _run(["gcloud", "projects", "add-iam-policy-binding", project_id,
-              f"--member=serviceAccount:{sa_email}",
-              f"--role={role}", "--condition=None", "--quiet"], check=False)
-    logger.info("Cloud Build SA roles bound.")
-
+    for sa_email in sa_emails:
+        logger.info("Granting roles to %s", sa_email)
+        for role in roles:
+            _run(["gcloud", "projects", "add-iam-policy-binding", project_id,
+                  f"--member=serviceAccount:{sa_email}",
+                  f"--role={role}", "--condition=None", "--quiet"], check=False)
+    logger.info("Cloud Build SA permissions granted.")
+
+
+
+
+def build_sandbox_images(project_id, region, target_arch):
+    """Build Chrome Sandbox and Sandbox Router images via Cloud Build."""
+    logger.info("=== Building Sandbox Images (arch=%s) ===", target_arch)
+    from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils
+
+    chrome_image = (
+        f"{region}-docker.pkg.dev/{project_id}/agent-sandbox/chrome-sandbox:{target_arch}"
+    )
+    router_image = (
+        f"{region}-docker.pkg.dev/{project_id}/agent-sandbox/sandbox-router:{target_arch}"
+    )
+
+    gke_image_build_utils._BuildChromeSandboxImage(
+        project=project_id,
+        region=region,
+        target_arch=target_arch,
+        image_path=chrome_image,
+    )
+
+    gke_image_build_utils._BuildSandboxRouterImage(
+        project=project_id,
+        region=region,
+        target_arch=target_arch,
+        image_path=router_image,
+    )
+
+    logger.info("Sandbox images built successfully.")
+    logger.info("  Chrome: %s", chrome_image)
+    logger.info("  Router: %s", router_image)
 
 def main():
     p = argparse.ArgumentParser(description="GKE Agentic Benchmark Prerequisites")
     p.add_argument("--project_id", required=True, help="GCP project ID")
     p.add_argument("--region", default="us-central1", help="GCP region")
+    p.add_argument(
+        "--target_arch",
+        required=True,
+        choices=["amd64", "arm64"],
+        help="Target CPU architecture for container images (amd64 or arm64)",
+    )
+    p.add_argument(
+        "--skip_image_build",
+        action="store_true",
+        help="Skip Chrome and Router image builds (images already in registry)",
+    )
     args = p.parse_args()
     enable_apis(args.project_id)
     create_artifact_registry(args.project_id, args.region)
-    create_cloud_build_sa(args.project_id)
+    grant_cloudbuild_sa_permissions(args.project_id)
+    if not args.skip_image_build:
+        build_sandbox_images(args.project_id, args.region, args.target_arch)
+    else:
+        logger.info("Skipping image builds (--skip_image_build)")
     print("\nPrerequisite setup complete!")
 
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
index cb49011b08..c6fa3577bc 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
@@ -78,11 +78,9 @@
     "Number of concurrent source/snapshot/restore pods per measurement.",
 )
 
-flags.DEFINE_string(
-    "k8s_snapshot_ksa_name",
-    "pod-snapshot-sa",
-    "Kubernetes service account for pod snapshots.",
-)
+# k8s_snapshot_ksa_name is defined in gke_deploy_utils.py
+# (where DeploySnapshots() consumes it) and is available here
+# via the deploy_utils import.
 
 flags.DEFINE_integer(
     "k8s_snapshot_pod_timeout",
@@ -131,16 +129,23 @@ def Prepare(benchmark_spec):
     # Deploy Agent Sandbox ecosystem (idempotent)
     deploy_utils.DeployWorkloads(benchmark_spec)
 
-    # Deploy Pod Snapshot infrastructure (idempotent)
-        # Pod Snapshots are GKE-specific; skip on other platforms
-    cloud = getattr(
-        getattr(benchmark_spec, "container_cluster", None), "cloud", "GCP"
-    )
-    if cloud == "GCP" and not FLAGS.skip_deploy_snapshots:
+    # Deploy Pod Snapshot infrastructure (idempotent).
+    # Pod Snapshots are GKE-specific; skip on other platforms.
+    # Only attempt deployment when we have a confirmed GCP cluster
+    # (avoids surprise failures on pre-existing clusters where
+    # benchmark_spec.container_cluster may be None).
+    cluster = getattr(benchmark_spec, "container_cluster", None)
+    if cluster and getattr(cluster, "cloud", None) == "GCP" and not FLAGS.skip_deploy_snapshots:
         deploy_utils.DeploySnapshots()
-    elif cloud != "GCP":
+    elif not cluster:
+        logging.info(
+            "Pod Snapshot infrastructure skipped (no container_cluster in "
+            "benchmark_spec). Use --skip_deploy_snapshots=False to force."
+        )
+    elif getattr(cluster, "cloud", None) != "GCP":
         logging.info(
-            "Pod Snapshot infrastructure skipped (cloud=%s, GKE required).", cloud
+            "Pod Snapshot infrastructure skipped (cloud=%s, GKE required).",
+            getattr(cluster, "cloud", "unknown"),
         )
 
     # 1. Verify PodSnapshotStorageConfig exists (cluster-scoped).

From 0ca3cdd8e2d6c942021f6fa444a9252ebd285cef Mon Sep 17 00:00:00 2001
From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com>
Date: Fri, 3 Jul 2026 14:23:47 -0400
Subject: [PATCH 5/5] multiple fixes

---
 .../agentic/adk-agent/cloudbuild-arm64.yaml   | 20 +++++++
 .../agentic/gke_image_build_utils.py          | 55 ++++++++++++++++---
 .../agentic/k8s_chromium_density_benchmark.py |  1 +
 .../agentic/k8s_deletion_benchmark.py         |  1 +
 .../agentic/k8s_payload_benchmark.py          |  1 +
 .../agentic/k8s_python_density_benchmark.py   |  1 +
 .../kubernetes/agentic/k8s_qps_benchmark.py   |  1 +
 .../agentic/k8s_snapshot_benchmark.py         |  1 +
 .../agentic/k8s_warmpool_benchmark.py         |  1 +
 .../providers/gcp/google_kubernetes_engine.py | 25 ++++++---
 10 files changed, 93 insertions(+), 14 deletions(-)
 create mode 100644 perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml

diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml b/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml
new file mode 100644
index 0000000000..653f07fcf8
--- /dev/null
+++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml
@@ -0,0 +1,20 @@
+# Cloud Build config for cross-compiling to ARM64.
+# Used by PKB when --container_remote_build_config points to this file.
+# The _IMAGE substitution is passed by PKB RemoteBuild() automatically.
+steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['run', '--privileged', 'multiarch/qemu-user-static', '--reset', '-p', 'yes']
+    id: 'qemu-setup'
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'create', '--use', '--name', 'multiarch-builder']
+    id: 'create-builder'
+    waitFor: ['qemu-setup']
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'build', '--platform', 'linux/arm64', '-t', '${_IMAGE}', '--push', '.']
+    id: 'build-and-push'
+    waitFor: ['create-builder']
+options:
+  logging: CLOUD_LOGGING_ONLY
+  machineType: E2_HIGHCPU_32
+substitutions:
+  _IMAGE: ''
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
index 750ae05988..2e976207f5 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py
@@ -68,7 +68,10 @@ def _DetectArchitecture(machine_type, zone, project):
     try:
         stdout, _, retcode = vm_util.IssueCommand(
             [
-                "gcloud", "compute", "machine-types", "describe",
+                "gcloud",
+                "compute",
+                "machine-types",
+                "describe",
                 machine_type,
                 f"--zone={zone}",
                 f"--project={project}",
@@ -83,15 +86,20 @@ def _DetectArchitecture(machine_type, zone, project):
             if docker_arch:
                 logging.info(
                     "Detected architecture for %s: %s -> %s",
-                    machine_type, gcp_arch, docker_arch,
+                    machine_type,
+                    gcp_arch,
+                    docker_arch,
                 )
                 return docker_arch
             logging.warning(
                 "Unknown GCP architecture '%s' for %s. Falling back to amd64.",
-                gcp_arch, machine_type,
+                gcp_arch,
+                machine_type,
             )
     except Exception as e:
-        logging.warning("gcloud machine-type describe failed: %s. Falling back to amd64.", e)
+        logging.warning(
+            "gcloud machine-type describe failed: %s. Falling back to amd64.", e
+        )
 
     return "amd64"
 
@@ -120,7 +128,6 @@ def build_images_with_config(project, region, machine_type, zone, arch):
         f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{target_arch}"
     )
 
-
     logger.info("=== Building Container Images (Chrome + Router only) ===")
     logger.info("  Project: %s", project)
     logger.info("  Region: %s", region)
@@ -154,6 +161,7 @@ def build_images_with_config(project, region, machine_type, zone, arch):
 # Internal helpers
 # ---------------------------------------------------------------------------
 
+
 def _BuildChromeSandboxImage(project, region, target_arch, image_path):
     """Build and push the Chrome Sandbox image."""
     logger.info("Building Chrome Sandbox image: %s", image_path)
@@ -261,8 +269,15 @@ def _SubmitCloudBuild(source_dir, image_path, target_arch, project):
 
     Used for Chrome and Router images (built in temp directories).
     Uses the project's default Cloud Build SA.
+
+    For cross-architecture builds (e.g. arm64 on amd64 workers), uses
+    QEMU emulation + Docker Buildx to produce the target-arch image.
+    A high-CPU machine type (E2_HIGHCPU_32) is used to offset the
+    overhead of QEMU instruction translation.
     """
-    cloudbuild_content = """steps:
+    if target_arch == "amd64":
+        # Native build — no emulation needed
+        cloudbuild_content = """steps:
   - name: 'gcr.io/cloud-builders/docker'
     args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.']
     env:
@@ -274,6 +289,32 @@ def _SubmitCloudBuild(source_dir, image_path, target_arch, project):
 substitutions:
   _IMAGE_PATH: ''
   _PLATFORM: 'linux/amd64'
+"""
+    else:
+        # Cross-arch build — QEMU + Buildx required.
+        # Cloud Build workers are amd64; QEMU registers binfmt handlers
+        # so the kernel can execute arm64 binaries transparently.
+        # E2_HIGHCPU_32 provides 32 vCPUs to offset emulation overhead.
+        # Buildx --push handles the registry push directly, so no
+        # top-level 'images:' key is needed.
+        cloudbuild_content = """steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['run', '--privileged', 'multiarch/qemu-user-static', '--reset', '-p', 'yes']
+    id: 'qemu-setup'
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'create', '--use', '--name', 'multiarch-builder']
+    id: 'create-builder'
+    waitFor: ['qemu-setup']
+  - name: 'gcr.io/cloud-builders/docker'
+    args: ['buildx', 'build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '--push', '.']
+    id: 'build-and-push'
+    waitFor: ['create-builder']
+options:
+  logging: CLOUD_LOGGING_ONLY
+  machineType: E2_HIGHCPU_32
+substitutions:
+  _IMAGE_PATH: ''
+  _PLATFORM: 'linux/amd64'
 """
     cloudbuild_path = os.path.join(source_dir, "cloudbuild.yaml")
     with open(cloudbuild_path, "w") as f:
@@ -302,7 +343,7 @@ def _RunCmd(cmd, cwd=None):
         capture_output=True,
         text=True,
         cwd=cwd,
-        timeout=600,
+        timeout=2400,  # 40 min: allows for QEMU cross-arch builds
         env=env,
     )
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
index 346f59a8b0..bd9114877c 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py
@@ -118,6 +118,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
     logging.info("=== Prepare: deploying workloads ===")
     deploy_utils.DeployWorkloads(benchmark_spec)
     utils.CheckAgentHealthz(required=False)
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
index ddeae29f9d..418b5c1ed9 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py
@@ -115,6 +115,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads onto the cluster."""
+    benchmark_spec.always_call_cleanup = True
     logging.info("=== Prepare: deploying workloads ===")
     deploy_utils.DeployWorkloads(benchmark_spec)
     utils.EnsurePortForward()
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
index 9f31aee342..109ab0efe6 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py
@@ -135,6 +135,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
     logging.info("=== Prepare: deploying workloads ===")
     deploy_utils.DeployWorkloads(benchmark_spec)
     utils.CheckAgentHealthz(required=False)
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
index 207fd40a20..7760f23ff7 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py
@@ -128,6 +128,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
     logging.info("=== Prepare: deploying workloads ===")
     deploy_utils.DeployWorkloads(benchmark_spec)
     utils.CheckAgentHealthz(required=False)
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
index 4528082ad5..feb82c8614 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py
@@ -146,6 +146,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads and verify agent API."""
+    benchmark_spec.always_call_cleanup = True
     logging.info("=== Prepare: deploying workloads ===")
     deploy_utils.DeployWorkloads(benchmark_spec)
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
index c6fa3577bc..8d78c6649b 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py
@@ -117,6 +117,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads, snapshot infra, and validate readiness."""
+    benchmark_spec.always_call_cleanup = True
     ns = FLAGS.k8s_namespace
     preload_mb = FLAGS.k8s_snapshot_preload_mb
 
diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
index 62c6462351..9024f9f28e 100644
--- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
+++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py
@@ -122,6 +122,7 @@ def GetConfig(user_config):
 
 def Prepare(benchmark_spec):
     """Deploy workloads onto the cluster."""
+    benchmark_spec.always_call_cleanup = True
     logging.info("=== Prepare: deploying workloads ===")
     deploy_utils.DeployWorkloads(benchmark_spec)
     utils.EnsurePortForward()
diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
index c4012faf1a..06d4a295dc 100644
--- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
+++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
@@ -102,14 +102,25 @@ def _Delete(self):
     ).Issue()
 
   def RemoteBuild(self, image: container.ContainerImage):
-    """Builds the image remotely."""
-    if not gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value:
-      full_tag = self.GetFullRegistryTag(image.name)
+    """Builds the image remotely.
+
+    If --container_remote_build_config is set, uses it as the
+    --config argument to `gcloud builds submit` and passes the
+    image tag via --substitutions _IMAGE=<tag>.
+    Otherwise uses the simple --tag shorthand.
+    """
+    full_tag = self.GetFullRegistryTag(image.name)
+    if gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value:
+      build_cmd = util.GcloudCommand(
+          self, 'builds', 'submit',
+          '--config', gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value,
+          '--substitutions', f'_IMAGE={full_tag}',
+          image.directory,
+      )
     else:
-      full_tag = gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value
-    build_cmd = util.GcloudCommand(
-        self, 'builds', 'submit', '--tag', full_tag, image.directory
-    )
+      build_cmd = util.GcloudCommand(
+          self, 'builds', 'submit', '--tag', full_tag, image.directory,
+      )
     build_cmd.Issue(timeout=None)