From f614265ac60258fcfb7343747f43e7f398585c81 Mon Sep 17 00:00:00 2001 From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com> Date: Tue, 16 Jun 2026 04:02:04 -0400 Subject: [PATCH 1/5] add agentic benchmarking on gke --- .gitignore | 2 + .../data/k8s_agents/config/gke-benchmark.conf | 171 +++ .../config/native_provision_config.yaml | 70 ++ .../workloads/adk_agent/.dockerignore | 165 +++ .../workloads/adk_agent/.gcloudignore | 25 + .../k8s_agents/workloads/adk_agent/Dockerfile | 29 + .../workloads/adk_agent/__init__.py | 1 + .../workloads/adk_agent/cloudbuild.yaml | 13 + .../adk_agent/generated.env.template | 28 + .../gke_performance_agent/__init__.py | 2 + .../adk_agent/gke_performance_agent/agent.py | 240 ++++ .../k8s_agents/workloads/adk_agent/main.py | 1097 +++++++++++++++++ .../workloads/adk_agent/requirements.txt | 11 + .../chromium_test_app/benchmark_density.js | 177 +++ .../python_test_app/benchmark_density.py | 196 +++ .../python_test_app/benchmark_payload.py | 203 +++ .../python_test_app/benchmark_qps.py | 24 + .../workloads/vibe_coding/README.md | 64 + .../workloads/vibe_coding/startup_npm_vite.sh | 84 ++ .../vibe_coding/startup_pip_fastapi.sh | 65 + .../linux_benchmarks/kubernetes/__init__.py | 13 + .../kubernetes/agentic/__init__.py | 13 + .../kubernetes/agentic/gke_benchmark_utils.py | 489 ++++++++ .../agentic/gke_chromium_density_benchmark.py | 280 +++++ .../agentic/gke_deletion_benchmark.py | 518 ++++++++ .../kubernetes/agentic/gke_deploy_utils.py | 891 +++++++++++++ .../agentic/gke_image_build_utils.py | 403 ++++++ .../agentic/gke_payload_benchmark.py | 613 +++++++++ .../agentic/gke_prerequisite_setup.py | 516 ++++++++ .../kubernetes/agentic/gke_provision_utils.py | 698 +++++++++++ .../agentic/gke_python_density_benchmark.py | 362 ++++++ .../kubernetes/agentic/gke_qps_benchmark.py | 802 ++++++++++++ .../agentic/gke_snapshot_benchmark.py | 1022 +++++++++++++++ .../agentic/gke_warmpool_benchmark.py | 487 ++++++++ perfkitbenchmarker/providers/gcp/flags.py | 21 + .../providers/gcp/google_kubernetes_engine.py | 10 + requirements.txt | 1 + 37 files changed, 9806 insertions(+) create mode 100644 perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf create mode 100644 perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh create mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py diff --git a/.gitignore b/.gitignore index 1e1c6fe077..6f0c9cb603 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,5 @@ /.idea /*git_ignore* .DS_Store +.adk +tmp/ diff --git a/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf b/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf new file mode 100644 index 0000000000..99e6411577 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf @@ -0,0 +1,171 @@ +#!/bin/bash +# +# Agentic Workload Benchmarking configuration file for GKE +# Adapted from nginx DPv2 baseline for Python Sandbox & Chromium Simulation +# +# Override machine type and cluster suffix via environment variables: +# MACHINE_TYPE=c4d-standard-8 CLUSTER_SUFFIX=c4d bash setup_infrastructure_gke.sh +# +# Supported profiles: +# MACHINE_TYPE=c3-standard-192-metal CLUSTER_SUFFIX=c3metal +# MACHINE_TYPE=c4-standard-8 CLUSTER_SUFFIX=c4 (default) +# MACHINE_TYPE=c4d-standard-8 CLUSTER_SUFFIX=c4d +# MACHINE_TYPE=c4a-standard-8 CLUSTER_SUFFIX=c4a (ARM64) + +USER_NAME_PREFIX=${USER%%.*} + +# GCP Project (MUST be set before running any script) +PROJECT_ID="your-project-id" +REGION="us-central1" +ZONE="us-central1-a" + +# Google/ADK aliases (derived from canonical names above) +# These are used by envsubst for the K8s manifest and by the ADK agent. +GOOGLE_CLOUD_PROJECT="${PROJECT_ID}" +GOOGLE_CLOUD_LOCATION="${REGION}" + +# Network Configuration +VPC_NAME="${USER_NAME_PREFIX}-agentic-vpc" +SUBNET_NAME="${USER_NAME_PREFIX}-agentic-subnet" +SUBNET_CIDR="10.134.20.0/24" +LAPTOP_IP="$(curl -s ifconfig.me)/32" # PUBLIC IP to access the target (dynamically detected) +# Cloud Router and NAT Configuration +ROUTER_NAME="${USER_NAME_PREFIX}-agentic-nat-router" +NAT_NAME="${USER_NAME_PREFIX}-agentic-nat-config" + +# GKE Cluster Configuration +CLUSTER_SUFFIX="${CLUSTER_SUFFIX:-c4}" +CLUSTER_NAME="${USER_NAME_PREFIX}-agentic-${CLUSTER_SUFFIX}" +GKE_VERSION="1.35.3-gke.1389000" +USE_CONNECT_GATEWAY="${USE_CONNECT_GATEWAY:-true}" # Use Connect Gateway for kubectl access + # Set to "false" to use direct public endpoint + +# ========================================================================= +# Machine Type Configuration (overridable via MACHINE_TYPE env var) +# ========================================================================= +MACHINE_TYPE="${MACHINE_TYPE:-c4-standard-8}" + +# Derive disk type from machine family: +# C3 → pd-balanced, C4/C4D/C4A → hyperdisk-balanced +_MACHINE_FAMILY="${MACHINE_TYPE%%-*}" # e.g. "c4" from "c4-standard-8" +case "${_MACHINE_FAMILY}" in + c3) _DISK_TYPE="pd-balanced" ;; + *) _DISK_TYPE="hyperdisk-balanced" ;; +esac + +# Derive target architecture from machine family: +# C4A → arm64, everything else → amd64 +case "${_MACHINE_FAMILY}" in + c4a) _TARGET_ARCH="arm64" ;; + *) _TARGET_ARCH="amd64" ;; +esac + +# Derive unique master CIDR per cluster (each private cluster needs its own /28): +# c4 → 172.16.0.0/28, c4d → 172.16.0.16/28, c4a → 172.16.0.32/28, c3metal → 172.16.0.48/28 +case "${CLUSTER_SUFFIX}" in + c4) MASTER_IPV4_CIDR="172.16.0.0/28" ;; + c4d) MASTER_IPV4_CIDR="172.16.0.16/28" ;; + c4a) MASTER_IPV4_CIDR="172.16.0.32/28" ;; + c3metal) MASTER_IPV4_CIDR="172.16.0.48/28" ;; + *) MASTER_IPV4_CIDR="172.16.0.64/28" ;; # fallback for future clusters +esac + +DEFAULT_POOL_MACHINE_TYPE="${MACHINE_TYPE}" +DEFAULT_POOL_DISK_TYPE="${_DISK_TYPE}" +DEFAULT_POOL_DISK_SIZE="50" # Disk size in GB +DEFAULT_POOL_NODE_COUNT="1" # Number of nodes in the default pool + +# ========================================================================= +# Agentic Workload NodePools +# ========================================================================= + +# Sandbox NodePool (Python + Chromium workloads with gVisor) +SANDBOX_NODE_POOL_NAME="agentic-sandbox-pool" +SANDBOX_MACHINE_TYPE="${MACHINE_TYPE}" # Same as default pool (overridable) +SANDBOX_DISK_SIZE="100" +SANDBOX_DISK_TYPE="${_DISK_TYPE}" # Derived from machine family +SANDBOX_NODE_COUNT="1" +SANDBOX_MAX_PODS_PER_NODE="250" # Raise from default 110 to avoid GKE pod limit as density ceiling +SANDBOX_ENABLE_GVISOR="true" # Enable GKE Sandbox (gVisor) on this pool + +AGENT_SANDBOX_VERSION="v0.4.6" + +# ========================================================================= +# Workload Configuration +# ========================================================================= +AGENTIC_NAMESPACE="agentic" + +# Python Sandbox Workload +PYTHON_IMAGE="python:3.11-slim" +PYTHON_POD_NAME="python-sandbox" +PYTHON_REPLICAS="1" # Start with 1; sweep for density tests +PYTHON_CPU_REQUEST="1" +PYTHON_CPU_LIMIT="2" +PYTHON_MEMORY_REQUEST="1Gi" +PYTHON_MEMORY_LIMIT="4Gi" + +# Chromium Browser Simulation Workload +CHROMIUM_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/chrome-sandbox:${_TARGET_ARCH}" +CHROMIUM_POD_NAME="chromium-sandbox" +CHROMIUM_REPLICAS="1" # Start with 1; sweep for density tests + +# Mock LLM Coordinator +MOCK_LLM_IMAGE="python:3.11-slim" +MOCK_LLM_POD_NAME="mock-llm-coordinator" +MOCK_LLM_PORT="8080" + +# ========================================================================= +# Benchmark Parameters +# ========================================================================= + +# Python Density Benchmark (UC-B) +SAMPLE_COUNT="20" # Samples per sandbox session +SAMPLE_WARMUP="0" # Warmup samples (excluded from stats) + +# Payload Transfer Benchmark (UC-D) +PAYLOAD_SIZE_MB="1" # Default payload size in MB +PAYLOAD_ITERATIONS="20" # Transfer iterations per session + +# Chromium Benchmark +CHROMIUM_TASK_COUNT="10" # Number of browser tasks per run +CHROMIUM_WARMUP_TASKS="2" + +# General +BENCHMARK_DURATION="300" # Duration in seconds per test +NOTE="agentic-V0-gVisor-DPv2-baseline" + +# ========================================================================= +# Logging +# ========================================================================= +# Log directory — defaults to tmp/ inside the repo (gitignored). +# Override by setting BASE_LOG_DIR before sourcing this file, +# e.g. export BASE_LOG_DIR="$HOME/agentic-logs" to keep logs outside the repo. +_REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)" +BASE_LOG_DIR="${BASE_LOG_DIR:-${_REPO_ROOT}/tmp/agentic-logs}" +WRAPPER_LOG_DIR="${BASE_LOG_DIR}/wrapper_logs" + +LOG_PATH="logs" +LOG_LEVEL="info" + +# ========================================================================= +# ADK Agent Deployment +# ========================================================================= +ADK_REPO_NAME="adk-repo" # Artifact Registry repository name +ADK_IMAGE_NAME="adk-agent" # Container image name +GOOGLE_GENAI_USE_VERTEXAI="true" +ADK_IMAGE_PATH="${REGION}-docker.pkg.dev/${PROJECT_ID}/${ADK_REPO_NAME}/${ADK_IMAGE_NAME}:${_TARGET_ARCH}" +ADK_K8S_SA="adk-agent-sa" # Kubernetes service account for the agent +CLOUD_BUILD_SA="adk-cloud-build-sa" # Service account for Cloud Build submissions + +# Sandbox Router & Warm Pool +SANDBOX_ROUTER_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/sandbox-router:${_TARGET_ARCH}" +WARMPOOL_REPLICAS="2" # Number of pre-warmed sandbox pods + +# ========================================================================= +# Pod Snapshot Configuration (UC-A: Cold Start & Snapshot Pressure Test) +# ========================================================================= +ENABLE_POD_SNAPSHOTS="true" # Enable pod snapshots feature on cluster +SNAPSHOTS_BUCKET_NAME="agent-sandbox-snapshots-${PROJECT_ID}" +SNAPSHOT_KSA_NAME="pod-snapshot-sa" # KSA for snapshot storage access +SNAPSHOT_FOLDER="benchmark-snapshots" # Managed folder inside the bucket +SNAPSHOT_PRELOAD_MB="10" # Default memory preload for snapshot sizing diff --git a/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml new file mode 100644 index 0000000000..765c7c4256 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml @@ -0,0 +1,70 @@ +# Native PKB Provision Config for Agentic Benchmarks +# Used with --gke_provision_mode=native +# +# Prerequisites (run once before PKB): +# python tools/agentic-benchmark/scripts/prerequisite_setup.py \ +# --project_id= --machine_type= +# +# IMPORTANT: Do NOT pass --gce_subnet_name on the command line. +# PKB incorrectly resolves it as the --network value. Instead, pass the +# subnet via --gke_additional_flags on the command line. +# +# Usage (provision): +# python pkb.py --benchmarks=gke_python_density \ +# --gke_provision_mode=native \ +# --benchmark_config_file=k8s_agents/config/native_provision_config.yaml \ +# --gce_network_name=-agentic-vpc \ +# --gce_subnet_region=us-central1 \ +# --zone=us-central1-a \ +# --project= \ +# --owner= \ +# --container_cluster_version=1.35.3-gke.1389000 \ +# --gke_additional_flags="--subnetwork=-agentic-subnet,--workload-pool=.svc.id.goog" +# +# For sweeps (cluster pre-exists, PKB skips provision/teardown): +# The sweep bridge injects --run_stage=run,cleanup automatically. + +gke_python_density: + flags: + # Force gcloud beta for preview features (pod snapshots) + gke_use_beta: true + + # Cluster-level additional flags (appended to gcloud [beta] container clusters create) + # NOTE: --subnetwork and --workload-pool are user/project-specific. + # Pass them on the command line via --gke_additional_flags=... (comma-separated). + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + + # Node-pool-level additional flags (appended to gcloud container node-pools create) + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + + # Standard PKB GKE flags + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore new file mode 100644 index 0000000000..78cf8c8595 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore @@ -0,0 +1,165 @@ + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# dotenv +.env + +# virtualenv +.venv/ +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + + +### OSX ### +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon +# Thumbnails +._* +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + + +### Windows ### +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# Windows shortcuts +*.lnk + + +### Vagrant ### +.vagrant/ +### Local rules, see .gitignore.tail to override! ### +shippable +.git + +tmp/ +sessions.db +.adk/ diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore new file mode 100644 index 0000000000..fb34b7833c --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore @@ -0,0 +1,25 @@ +# This file tells gcloud builds submit which files to exclude from the upload. +# Without it, gcloud ignores .dockerignore and uploads everything (including .venv). + +.git +.venv/ +venv/ +ENV/ +__pycache__/ +*.py[cod] +*$py.class +*.so +*.egg-info/ +*.egg +dist/ +build/ +.tox/ +.cache/ +.coverage +htmlcov/ +*.log +.env +.adk/ +sessions.db +tmp/ +.DS_Store diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile new file mode 100644 index 0000000000..417ad58946 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.13-slim +WORKDIR /app + +# Install kubectl (required by k8s-agent-sandbox for port-forwarding to sandbox pods) +# Uses TARGETARCH (injected by BuildKit) to download the correct binary for amd64 or arm64 +RUN apt-get update && \ + apt-get install -y --no-install-recommends curl ca-certificates && \ + ARCH=$(dpkg --print-architecture) && \ + curl -LO "https://dl.k8s.io/release/$(curl -sL https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl" && \ + install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \ + rm kubectl && \ + apt-get purge -y curl && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +RUN adduser --disabled-password --gecos "" myuser && \ + chown -R myuser:myuser /app + +COPY . . + +USER myuser + +ENV PATH="/home/myuser/.local/bin:$PATH" + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] + diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py new file mode 100644 index 0000000000..5271a8ef60 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py @@ -0,0 +1 @@ +# ADK Agent package diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml new file mode 100644 index 0000000000..f3f3f4b810 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml @@ -0,0 +1,13 @@ +steps: + - name: 'gcr.io/cloud-builders/docker' + args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.'] + +images: + - '${_IMAGE_PATH}' + +options: + logging: CLOUD_LOGGING_ONLY + +substitutions: + _IMAGE_PATH: '' + _PLATFORM: 'linux/amd64' diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template new file mode 100644 index 0000000000..0828d0a5ff --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template @@ -0,0 +1,28 @@ +# ========================================================================== +# ADK Agent — Generated Environment File Template +# ========================================================================== +# This file is rendered into generated.env by deploy_gke.sh using envsubst. +# The single source of truth is: tools/agentic-benchmark/config/gke-benchmark.conf +# +# For local dev, run deploy_gke.sh to generate generated.env, +# or manually create generated.env with your values. +# ========================================================================== + +# --- Required: GKE executor config --- +CLUSTER_NAME="${CLUSTER_NAME}" +GOOGLE_CLOUD_PROJECT="${GOOGLE_CLOUD_PROJECT}" +GOOGLE_CLOUD_LOCATION="${GOOGLE_CLOUD_LOCATION}" +AGENTIC_NAMESPACE="${AGENTIC_NAMESPACE}" +GOOGLE_GENAI_USE_VERTEXAI="${GOOGLE_GENAI_USE_VERTEXAI}" + +# --- Sandbox connection (set in-cluster; leave blank for local dev mode) --- +# When set, SandboxClient uses DirectConnection (bypasses kubectl port-forward). +# For local dev, set to "" to use per-pod kubectl port-forward tunnels. +SANDBOX_ROUTER_URL="http://sandbox-router-svc.${AGENTIC_NAMESPACE}.svc.cluster.local:8080" + +# --- Optional: benchmark defaults (overridden by HTTP request params) --- +SAMPLE_COUNT="${SAMPLE_COUNT}" +SAMPLE_WARMUP="${SAMPLE_WARMUP}" +PAYLOAD_SIZE_MB="${PAYLOAD_SIZE_MB}" +PAYLOAD_ITERATIONS="${PAYLOAD_ITERATIONS}" + diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py new file mode 100644 index 0000000000..c6df9a7a2a --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py @@ -0,0 +1,2 @@ +# GKE Performance Agent package +from . import agent \ No newline at end of file diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py new file mode 100644 index 0000000000..46094d244f --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py @@ -0,0 +1,240 @@ +from google.adk.agents import LlmAgent +from google.adk.code_executors import GkeCodeExecutor +from google.adk.code_executors.code_execution_utils import CodeExecutionResult +from google.adk.models.base_llm import BaseLlm +from google.adk.models.llm_response import LlmResponse +from google.genai import types +from dotenv import load_dotenv +from google.adk.apps import App +import logging +import os + +# --- Configure Logging --- +logging.basicConfig(level=logging.INFO) + +# ========================================================================= +# 1. Environment and Configuration +# ========================================================================= + +basedir = os.path.abspath(os.path.dirname(__file__)) +agent_dir = os.path.join(basedir, "..") + +# Load generated.env (auto-generated from gke-benchmark.conf by deploy_gke.sh). +# In GKE, K8s manifest env vars take precedence. +load_dotenv(os.path.join(agent_dir, "generated.env")) + +# ========================================================================= +# 2. Mock LLM Definition (Inheriting from BaseLlm for Pydantic) +# ========================================================================= + +# Load the benchmark scripts +density_script_path = os.path.join( + basedir, "../sandboxed_apps/python_test_app/benchmark_density.py" +) +try: + with open(density_script_path, "r") as f: + density_benchmark_code = f.read() +except Exception: + density_benchmark_code = "import os; print(os.uname())" + +payload_script_path = os.path.join( + basedir, "../sandboxed_apps/python_test_app/benchmark_payload.py" +) +try: + with open(payload_script_path, "r") as f: + payload_benchmark_code = f.read() +except Exception: + payload_benchmark_code = "import os; print(os.uname())" + +qps_script_path = os.path.join( + basedir, "../sandboxed_apps/python_test_app/benchmark_qps.py" +) +try: + with open(qps_script_path, "r") as f: + qps_benchmark_code = f.read() +except Exception: + qps_benchmark_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))" + +# Keys that main.py sets in os.environ per-request. We inject them into +# the script so they reach the sandbox pod. If unset, the benchmark scripts +# use their own built-in defaults. +_DENSITY_ENV_KEYS = ["SAMPLE_COUNT", "SAMPLE_WARMUP"] +_PAYLOAD_ENV_KEYS = ["PAYLOAD_SIZE_MB", "PAYLOAD_ITERATIONS"] +_QPS_ENV_KEYS: list[str] = [] # QPS script needs no env config + + +def _build_benchmark_code() -> str: + """Build the benchmark script with current env values injected. + + Selects the script based on BENCHMARK_MODE env var: + - 'density' → benchmark_density.py (Use Case B) + - 'payload' → benchmark_payload.py (Use Case D) + - 'qps' → benchmark_qps.py (Use Case F) + """ + mode = os.getenv("BENCHMARK_MODE", "density") + + if mode == "payload": + env_keys = _PAYLOAD_ENV_KEYS + script = payload_benchmark_code + elif mode == "qps": + env_keys = _QPS_ENV_KEYS + script = qps_benchmark_code + else: + env_keys = _DENSITY_ENV_KEYS + script = density_benchmark_code + + lines = ["import os"] + for k in env_keys: + v = os.getenv(k) + if v is not None: + lines.append(f"os.environ['{k}'] = '{v}'") + return "\n".join(lines) + "\n\n" + script + + +class MockLlm(BaseLlm): + model: str = "mock-model" + + async def generate_content_async(self, llm_request, stream=False): + """Mock the ADK response loop. + + BaseLlm.generate_content_async is an AsyncGenerator — it must YIELD + LlmResponse objects, never return them. + """ + # ADK appends the code execution result to the conversation + # history before calling the LLM again. If the history has + # grown beyond the initial user prompt, code has already + # executed — return plain text to stop the loop. + has_execution_result = len(llm_request.contents) > 1 + + if has_execution_result: + part = types.Part(text="Execution Complete") + else: + # Create an ADK-compliant result with executable code. + # Build at request time so SAMPLE_COUNT/SAMPLE_WARMUP reflect + # the current os.environ values set by main.py per-request. + part = types.Part( + executable_code=types.ExecutableCode( + language="PYTHON", code=_build_benchmark_code() + ) + ) + + content = types.Content(role="model", parts=[part]) + response = LlmResponse(content=content, partial=False) + + # Yield exactly one final response (both streaming and non-streaming) + yield response + + +# ========================================================================= +# 3. Agent Initialization +# ========================================================================= + + +class V3GkeCodeExecutor(GkeCodeExecutor): + def _execute_in_sandbox(self, code: str) -> CodeExecutionResult: + """Executes code using the v0.4.6 compatible SandboxClient.""" + from k8s_agent_sandbox.sandbox_client import SandboxClient + from k8s_agent_sandbox.models import SandboxDirectConnectionConfig + import logging + import time + from concurrent.futures import ThreadPoolExecutor + + logging.info("Executing via V3 SandboxClient (v0.4.6 compatible).") + + # Shared thread pool for sandbox operations to allow overlapping + # blocking I/O when sessions run on different threads. + global _SANDBOX_POOL + try: + _SANDBOX_POOL + except NameError: + _SANDBOX_POOL = ThreadPoolExecutor(max_workers=16) + + # Use DirectConnection when SANDBOX_ROUTER_URL is set (in-cluster), + # otherwise fall back to kubectl port-forward (dev mode). + router_url = os.getenv("SANDBOX_ROUTER_URL") + if router_url: + client = SandboxClient( + connection_config=SandboxDirectConnectionConfig(api_url=router_url) + ) + else: + client = SandboxClient() + # v0.4.6 create_sandbox uses 'template' and 'namespace' arguments + create_ms = upload_ms = run_ms = delete_ms = 0.0 + sandbox = None + # Time sandbox creation + t0 = time.time() + create_future = _SANDBOX_POOL.submit( + client.create_sandbox, + template=self.sandbox_template, + namespace=self.namespace, + ) + sandbox = create_future.result() + create_ms = (time.time() - t0) * 1000.0 + try: + # v0.4.6 handles file I/O via the .files namespace + t0 = time.time() + upload_future = _SANDBOX_POOL.submit(sandbox.files.write, "script.py", code) + upload_future.result() + upload_ms = (time.time() - t0) * 1000.0 + + # SANDBOX_EXEC_TIMEOUT_S is set per-request by main.py. + # Default 60 s keeps density/snapshot runs tight; payload + # sweeps raise it for large blobs. + run_timeout = int(os.getenv("SANDBOX_EXEC_TIMEOUT_S", "60")) + + t0 = time.time() + run_future = _SANDBOX_POOL.submit( + sandbox.commands.run, "python3 script.py", timeout=run_timeout + ) + result = run_future.result() + run_ms = (time.time() - t0) * 1000.0 + + # ADK's build_code_execution_result_part discards stdout when + # stderr is non-empty (OUTCOME_FAILED path). Sandbox scripts + # produce benign stderr (C-extension reimport noise, gVisor + # warnings) that would cause all sandbox_* metrics to vanish. + # Log stderr for debugging, then clear it so ADK passes + # stdout through. + if result.stderr: + logging.warning("Sandbox stderr (ignored): %s", result.stderr[:500]) + + logging.info( + "SANDBOX_TIMINGS: create_ms=%.3f upload_ms=%.3f run_ms=%.3f", + create_ms, + upload_ms, + run_ms, + ) + return CodeExecutionResult(stdout=result.stdout, stderr="") + finally: + # Always cleanup the claim + t0 = time.time() + if sandbox is not None: + delete_future = _SANDBOX_POOL.submit( + client.delete_sandbox, sandbox.claim_name, namespace=self.namespace + ) + delete_future.result() + delete_ms = (time.time() - t0) * 1000.0 + logging.info("SANDBOX_TIMINGS_DELETE: delete_ms=%.3f", delete_ms) + + +gke_executor = V3GkeCodeExecutor( + cluster_name=os.getenv("CLUSTER_NAME"), + location=os.getenv("GOOGLE_CLOUD_LOCATION"), + namespace=os.getenv("AGENTIC_NAMESPACE"), + executor_type="sandbox", + sandbox_template="python-sandbox-template", +) + +gke_performance_agent = LlmAgent( + name="gke_performance_agent", # Must be a valid identifier (no dashes) + model=MockLlm(model="mock-model"), + code_executor=gke_executor, +) + +root_agent = gke_performance_agent + +app = App( + name=root_agent.name, + root_agent=root_agent, + # enable_tracing=True, +) diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py new file mode 100644 index 0000000000..fa13f11fd7 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py @@ -0,0 +1,1097 @@ +"""FastAPI service fronting the GKE Performance Agent. + +Exposes REST endpoints that PKB calls to trigger benchmarks. The agent +runs *inside* the GKE cluster so it can reach the Sandbox Controller and +create gVisor sandboxes natively. + +Endpoints: + GET /healthz → liveness probe + POST /benchmark/python/density → run the Python density benchmark (UC-B) + POST /benchmark/python/payload → run the payload transfer benchmark (UC-D) + POST /benchmark/python/qps → run the QPS saturation benchmark (UC-F) + POST /benchmark/chromium/density → run the Chromium density benchmark (UC-C) + POST /run → raw ADK agent interaction + +POST /benchmark/python/density — Request: + { + "sample_count": int — iterations per sandbox session (default: 100) + "sample_warmup": int — warmup iterations excluded from stats (default: 5) + "concurrent_sessions": int — parallel sandbox sessions (default: 1) + "sandbox_exec_timeout_s": int — sandbox command execution timeout in seconds (default: 60) + } + +POST /benchmark/python/density — Response: + { + "concurrent_sessions": int — requested session count + "successful_sessions": int — sessions completed without error + "failed_sessions": int — sessions that returned an error + "aggregate": { + --- Orchestrator-side (timed in _run_single_session, stats in benchmark_density) --- + "orchestrator_cel_mean_ms": mean round-trip across sessions + "orchestrator_cel_p50_ms": P50 round-trip + "orchestrator_cel_p99_ms": P99 round-trip + "orchestrator_cel_min_ms": min round-trip + "orchestrator_cel_max_ms": max round-trip + + --- Sandbox-side overall (from benchmark_density.py, mean across sessions) --- + "sandbox_ttfe_ms": Time To First Execution + "sandbox_total_cel_mean_ms": mean total CEL per iteration (sum of all task types) + "sandbox_total_cel_p50_ms": P50 total CEL per iteration + "sandbox_total_cel_p99_ms": P99 total CEL per iteration + "sandbox_total_cel_min_ms": min total CEL per iteration + "sandbox_total_cel_max_ms": max total CEL per iteration + + --- Sandbox RSS (from benchmark_density.py, mean across sessions) --- + "sandbox_rss_start_mb": RSS at benchmark start + "sandbox_rss_end_mb": RSS at benchmark end + "sandbox_rss_growth_mb": RSS growth during benchmark + + --- Per-type CEL breakdown (from benchmark_density.py, mean across sessions) --- + "sandbox_compute_cel_{mean,p50,p99,min,max}_ms": CPU-bound (math.factorial) + "sandbox_syscall_cel_{mean,p50,p99,min,max}_ms": gVisor Sentry (os.stat/listdir) + "sandbox_import_cel_{mean,p50,p99,min,max}_ms": Gofer FS I/O (importlib) + } + "sessions": [ per-session detail array + { + "session_id": int — zero-based session index + "orchestrator_total_ms": float — full round-trip for this session + "raw_output": str — raw code execution stdout + "sandbox_ttfe_ms": float — TTFE for this session + "sandbox_total_cel_mean_ms": float — total CEL mean for this session + ... all other sandbox_* metrics for this session + } + ] + } + +Data Flow: + benchmark_density.py (inside gVisor) → all sandbox_* metrics per session + main.py (this file) → orchestrator_* timing + cross-session aggregation +""" + +import json +import logging +import os +import re +import time +import asyncio +from typing import Optional +from concurrent.futures import ThreadPoolExecutor + +import uvicorn +from contextlib import asynccontextmanager +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel, Field +from google.genai import types +from google.adk.sessions import InMemorySessionService +from google.adk.artifacts import InMemoryArtifactService +from google.adk.runners import Runner + +from dotenv import load_dotenv + +basedir = os.path.abspath(os.path.dirname(__file__)) + +# Load generated.env (auto-generated from gke-benchmark.conf by build_images_gke.sh). +# In GKE, K8s manifest env vars take precedence. +load_dotenv(os.path.join(basedir, "generated.env")) + +from gke_performance_agent import agent + + +# ── SandboxClient factory (DirectConnection vs Dev-mode tunnel) ────────── +def _make_sandbox_client(): + """Create a SandboxClient with the optimal connection strategy. + + When SANDBOX_ROUTER_URL is set (in-cluster), uses DirectConnectionConfig + to bypass kubectl port-forward SPDY tunnels — enabling true N-way + parallelism. Without it, falls back to LocalTunnelConnectionConfig + (dev mode, serialized through a single SPDY stream). + """ + from k8s_agent_sandbox.sandbox_client import SandboxClient + + router_url = os.getenv("SANDBOX_ROUTER_URL") + if router_url: + from k8s_agent_sandbox.models import SandboxDirectConnectionConfig + + return SandboxClient( + connection_config=SandboxDirectConnectionConfig(api_url=router_url) + ) + return SandboxClient() + + +# --- Constants --- +APP_NAME = "gke_performance_agent_app" +USER_ID = "benchmark_user" + +# --- Configure Logging --- +try: + import google.cloud.logging as gcl + + gcl.Client().setup_logging() +except Exception: + logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +# ========================================================================= +# FastAPI Application +# ========================================================================= +# --- Adaptive ThreadPool based on Agent CPU --- +def _compute_thread_count() -> int: + """Compute a recommended max worker count for ThreadPoolExecutor. + + Heuristic: use ~2x the detected CPU count to provide overlap for blocking + I/O (port-forward, file upload) while avoiding CPU oversubscription. + Cap between 2 and 64 workers. + """ + cpu = os.cpu_count() or 1 + return max(2, min(64, cpu * 2)) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Lifespan handler: configure a tuned ThreadPoolExecutor for asyncio. + + Sets the default executor so `asyncio.to_thread` uses our tuned pool, + and shuts it down on application exit. + """ + workers = _compute_thread_count() + executor = ThreadPoolExecutor(max_workers=workers) + loop = asyncio.get_running_loop() + loop.set_default_executor(executor) + logging.info( + "Default ThreadPoolExecutor set to %d workers (cpu=%s)", workers, os.cpu_count() + ) + try: + yield + finally: + try: + executor.shutdown(wait=False) + logging.info("ThreadPoolExecutor shut down") + except Exception: + logging.exception("Error shutting down ThreadPoolExecutor") + + +app = FastAPI(title="GKE Benchmark Agent", version="0.2.0", lifespan=lifespan) + +# Serialise benchmark requests so concurrent POSTs cannot clobber the +# shared env vars (BENCHMARK_MODE, SAMPLE_COUNT, …) that agent.py reads. +_benchmark_lock = asyncio.Lock() + + +def _percentile_stats(sorted_values: list, prefix: str) -> dict: + """Compute mean/p50/p95/p99/min/max from a pre-sorted list of numbers.""" + n = len(sorted_values) + if n == 0: + return {} + return { + f"{prefix}_mean_ms": round(sum(sorted_values) / n, 6), + f"{prefix}_p50_ms": round(sorted_values[n // 2], 6), + f"{prefix}_p95_ms": round(sorted_values[min(int(n * 0.95), n - 1)], 6), + f"{prefix}_p99_ms": round(sorted_values[min(int(n * 0.99), n - 1)], 6), + f"{prefix}_min_ms": round(sorted_values[0], 6), + f"{prefix}_max_ms": round(sorted_values[-1], 6), + } + + +# --- Request / Response Models --- +class BenchmarkRequest(BaseModel): + sample_count: int = Field( + default=100, ge=1, description="Sample count per sandbox session" + ) + sample_warmup: int = Field( + default=5, ge=0, description="Warmup iterations per sandbox session" + ) + concurrent_sessions: int = Field( + default=1, ge=1, description="Number of parallel sandbox sessions" + ) + sandbox_exec_timeout_s: int = Field( + default=60, ge=10, description="Sandbox command execution timeout in seconds" + ) + + +class RunRequest(BaseModel): + prompt: str = "Please start the GKE performance benchmark workflow." + + +class PayloadBenchmarkRequest(BaseModel): + payload_size_mb: float = Field(default=1, gt=0, description="Payload size in MB") + payload_iterations: int = Field( + default=20, ge=1, description="Number of transfer iterations" + ) + concurrent_sessions: int = Field( + default=1, ge=1, description="Number of parallel sandbox sessions" + ) + sandbox_exec_timeout_s: int = Field( + default=60, ge=10, description="Sandbox command execution timeout in seconds" + ) + + +class QpsBenchmarkRequest(BaseModel): + target_qps: float = Field( + default=10.0, ge=0.1, description="Target requests per second" + ) + duration_s: float = Field( + default=60.0, ge=5.0, description="Duration of the QPS burst in seconds" + ) + sandbox_exec_timeout_s: int = Field( + default=30, ge=10, description="Sandbox command execution timeout in seconds" + ) + + +class ChromiumBenchmarkRequest(BaseModel): + task_count: int = Field( + default=10, ge=1, description="Iterations per Chromium session" + ) + warmup_tasks: int = Field( + default=2, ge=0, description="Warmup iterations excluded from stats" + ) + concurrent_sessions: int = Field( + default=1, ge=1, description="Number of parallel Chromium sessions" + ) + sandbox_exec_timeout_s: int = Field( + default=120, ge=10, description="Sandbox command execution timeout in seconds" + ) + + +# --- JSON extraction helper --- +_JSON_RE = re.compile(r"\{[^{}]*\}", re.DOTALL) + + +def _parse_sandbox_json(raw_output: str) -> Optional[dict]: + """Extract the sandbox JSON summary from code execution output. + + The sandbox script prints a JSON blob to stdout among other log lines. + We find the last valid JSON object that contains sandbox_ keys. + """ + matches = _JSON_RE.findall(raw_output) + for candidate in reversed(matches): + try: + obj = json.loads(candidate) + if any(k.startswith("sandbox_") for k in obj): + return obj + except json.JSONDecodeError: + continue + return None + + +# --- Agent helper --- +async def _run_agent(prompt: str) -> str: + """Create a fresh session, run the agent, return the final text output.""" + session_service = InMemorySessionService() + artifact_service = InMemoryArtifactService() + session = await session_service.create_session( + app_name=APP_NAME, + user_id=USER_ID, + state={}, + ) + + runner = Runner( + agent=agent.root_agent, + app_name=APP_NAME, + session_service=session_service, + artifact_service=artifact_service, + ) + + content = types.Content( + role="user", + parts=[types.Part(text=prompt)], + ) + + final_response = "" + code_execution_output = "" + async with runner: + async for event in runner.run_async( + user_id=USER_ID, + session_id=session.id, + new_message=content, + ): + if event.content and event.content.parts: + for part in event.content.parts: + cer = getattr(part, "code_execution_result", None) or getattr( + part, "codeExecutionResult", None + ) + if cer: + code_execution_output = getattr(cer, "output", "") or "" + if event.is_final_response() and event.content and event.content.parts: + final_response = event.content.parts[0].text + + await session_service.delete_session( + app_name=APP_NAME, + user_id=USER_ID, + session_id=session.id, + ) + return code_execution_output if code_execution_output else final_response + + +async def _run_single_session(session_id: int, prompt: str) -> dict: + """Run one agent session and return orchestrator + sandbox metrics.""" + orchestrator_start = time.perf_counter() + logging.info("SESSION_START: session_id=%d start_ts=%.3f", session_id, time.time()) + + try: + raw_output = await _run_agent(prompt) + except Exception as e: + return { + "session_id": session_id, + "error": str(e), + } + + orchestrator_elapsed_ms = round( + (time.perf_counter() - orchestrator_start) * 1000, 6 + ) + logging.info( + "SESSION_END: session_id=%d elapsed_ms=%.3f", + session_id, + orchestrator_elapsed_ms, + ) + + # Parse sandbox-side metrics from the code execution output + sandbox_metrics = _parse_sandbox_json(raw_output) or {} + + return { + "session_id": session_id, + "orchestrator_total_ms": orchestrator_elapsed_ms, + "raw_output": raw_output, + **sandbox_metrics, + } + + +# --- Endpoints --- +@app.get("/healthz") +async def healthz(): + return {"status": "ok"} + + +@app.post("/benchmark/python/density") +async def benchmark_python_density(req: BenchmarkRequest): + """Trigger the Python density benchmark (Use Case B). + + Fires `concurrent_sessions` parallel agent sessions. Each session + claims its own sandbox, runs the benchmark script with the given + iteration/warmup counts, and returns both orchestrator-side and + sandbox-side metrics. + """ + async with _benchmark_lock: + os.environ["BENCHMARK_MODE"] = "density" + os.environ["SAMPLE_COUNT"] = str(req.sample_count) + os.environ["SAMPLE_WARMUP"] = str(req.sample_warmup) + os.environ["SANDBOX_EXEC_TIMEOUT_S"] = str(req.sandbox_exec_timeout_s) + + logger.info( + "Starting Python benchmark: sample_count=%d sample_warmup=%d concurrent_sessions=%d", + req.sample_count, + req.sample_warmup, + req.concurrent_sessions, + ) + + prompt = "Please start the GKE performance benchmark workflow." + + # Fire concurrent sessions. Run each session in its own thread so + # blocking ADK/Runner activity cannot serialize session start. + thread_tasks = [ + asyncio.create_task( + asyncio.to_thread( + lambda sid=i: asyncio.run(_run_single_session(sid, prompt)) + ) + ) + for i in range(req.concurrent_sessions) + ] + session_results = await asyncio.gather(*thread_tasks) + + # Separate successful vs failed sessions + successful = [r for r in session_results if "error" not in r] + failed = [r for r in session_results if "error" in r] + + # Aggregate orchestrator-side metrics across all successful sessions + aggregate = {} + if successful: + orch_times = sorted(r["orchestrator_total_ms"] for r in successful) + aggregate.update(_percentile_stats(orch_times, "orchestrator_cel")) + + # Aggregate sandbox-side metrics across sessions + sandbox_keys = [k for k in successful[0] if k.startswith("sandbox_")] + for key in sandbox_keys: + sample_val = successful[0].get(key) + if isinstance(sample_val, list): + # Pool raw latency arrays across sandboxes → true cross-sandbox stats + pooled = sorted( + v + for r in successful + for v in (r.get(key) or []) + if isinstance(r.get(key), list) + ) + if pooled: + base = key[:-3] if key.endswith("_ms") else key + aggregate.update(_percentile_stats(pooled, base)) + elif isinstance(sample_val, (int, float)): + vals = [ + r[key] + for r in successful + if key in r and isinstance(r[key], (int, float)) + ] + if vals: + if key.endswith("_cel_ms"): + # Latency scalars (e.g. import_cel_ms): compute + # cross-sandbox percentile stats, like array metrics. + base = key[:-3] + aggregate.update(_percentile_stats(sorted(vals), base)) + else: + # Non-latency scalars (e.g. rss_mb, ttfe_ms): average + aggregate[key] = round(sum(vals) / len(vals), 6) + + return { + "concurrent_sessions": req.concurrent_sessions, + "successful_sessions": len(successful), + "failed_sessions": len(failed), + "aggregate": aggregate, + "sessions": session_results, + } + + +@app.post("/benchmark/python/payload") +async def benchmark_python_payload(req: PayloadBenchmarkRequest): + """Trigger the payload transfer benchmark (Use Case D). + + Measures the cost of returning large observation payloads from a + gVisor sandbox back to the orchestrator. Each session generates a + payload of `payload_size_mb` MB, encodes it (base64), writes it + through the gVisor Gofer path, and reports latency breakdowns. + """ + async with _benchmark_lock: + os.environ["BENCHMARK_MODE"] = "payload" + os.environ["PAYLOAD_SIZE_MB"] = str(req.payload_size_mb) + os.environ["PAYLOAD_ITERATIONS"] = str(req.payload_iterations) + os.environ["SANDBOX_EXEC_TIMEOUT_S"] = str(req.sandbox_exec_timeout_s) + + logger.info( + "Starting Payload benchmark: payload_size_mb=%s iterations=%d concurrent_sessions=%d", + req.payload_size_mb, + req.payload_iterations, + req.concurrent_sessions, + ) + + prompt = "Please start the GKE performance benchmark workflow." + + # Fire concurrent sessions. Run each session in its own thread so + # blocking ADK/Runner activity cannot serialize session start. + thread_tasks = [ + asyncio.create_task( + asyncio.to_thread( + lambda sid=i: asyncio.run(_run_single_session(sid, prompt)) + ) + ) + for i in range(req.concurrent_sessions) + ] + session_results = await asyncio.gather(*thread_tasks) + + # Separate successful vs failed sessions + successful = [r for r in session_results if "error" not in r] + failed = [r for r in session_results if "error" in r] + + # Aggregate orchestrator-side metrics across all successful sessions + aggregate = {} + if successful: + orch_times = sorted(r["orchestrator_total_ms"] for r in successful) + aggregate.update(_percentile_stats(orch_times, "orchestrator_transfer")) + + # Aggregate sandbox-side metrics (mean across sessions, numeric only) + sandbox_keys = [k for k in successful[0] if k.startswith("sandbox_")] + for key in sandbox_keys: + vals = [ + r[key] + for r in successful + if key in r and isinstance(r[key], (int, float)) + ] + if vals: + aggregate[key] = round(sum(vals) / len(vals), 6) + + return { + "payload_size_mb": req.payload_size_mb, + "payload_iterations": req.payload_iterations, + "concurrent_sessions": req.concurrent_sessions, + "successful_sessions": len(successful), + "failed_sessions": len(failed), + "aggregate": aggregate, + "sessions": session_results, + } + + +@app.post("/benchmark/python/qps") +async def benchmark_python_qps(req: QpsBenchmarkRequest): + """Trigger the QPS saturation benchmark (Use Case F). + + Fires sandbox claim requests at a controlled rate (target_qps) for + duration_s seconds. Each request claims a sandbox from the warm pool, + runs a trivial script, and releases it. Returns per-request TTFE + (claim + upload + execute + delete) and aggregate latency stats. + + Uses a lightweight path that calls SandboxClient directly — bypasses + the full ADK Runner/MockLLM pipeline to avoid per-request overhead + and accurately measure sandbox lifecycle latency at high QPS. + + When the warm pool drains faster than it refills, TTFE spikes from + ~200ms to seconds — identifying the QPS saturation point. + """ + + # Load the QPS script once + qps_script_path = os.path.join( + basedir, "sandboxed_apps/python_test_app/benchmark_qps.py" + ) + try: + with open(qps_script_path, "r") as f: + qps_code = f.read() + except Exception: + qps_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))" + + sandbox_template = os.getenv("SANDBOX_TEMPLATE", "python-sandbox-template") + sandbox_namespace = os.getenv("SANDBOX_NAMESPACE", "agentic") + exec_timeout = req.sandbox_exec_timeout_s + qps_claim_label = {"created-by": "pkb-qps-benchmark"} + + def _run_qps_request(request_id: int) -> dict: + """Lightweight sandbox claim→execute→release cycle.""" + t_total = time.perf_counter() + client = _make_sandbox_client() + sandbox = None + try: + # Claim + t0 = time.perf_counter() + sandbox = client.create_sandbox( + template=sandbox_template, + namespace=sandbox_namespace, + labels=qps_claim_label, + ) + claim_ms = (time.perf_counter() - t0) * 1000 + + # Upload + t0 = time.perf_counter() + sandbox.files.write("script.py", qps_code) + upload_ms = (time.perf_counter() - t0) * 1000 + + # Execute + t0 = time.perf_counter() + result = sandbox.commands.run("python3 script.py", timeout=exec_timeout) + exec_ms = (time.perf_counter() - t0) * 1000 + + ttfe_ms = (time.perf_counter() - t_total) * 1000 + + return { + "request_id": request_id, + "ttfe_ms": round(ttfe_ms, 3), + "claim_ms": round(claim_ms, 3), + "upload_ms": round(upload_ms, 3), + "exec_ms": round(exec_ms, 3), + } + except Exception as e: + ttfe_ms = (time.perf_counter() - t_total) * 1000 + return { + "request_id": request_id, + "ttfe_ms": round(ttfe_ms, 3), + "error": f"{type(e).__name__}: {e}", + } + finally: + if sandbox is not None: + try: + client.delete_sandbox( + sandbox.claim_name, namespace=sandbox_namespace + ) + except Exception: + pass + + async with _benchmark_lock: + logger.info( + "Starting QPS benchmark: target_qps=%.1f duration_s=%.1f", + req.target_qps, + req.duration_s, + ) + + interval = 1.0 / req.target_qps + + # Use a scoped executor sized to the expected concurrency. + # Each sandbox request takes ~0.5-5s depending on environment + # (in-cluster vs port-forward). We need enough workers so the + # thread pool itself is never the bottleneck — only real sandbox + # contention should limit throughput. + peak_concurrency = int(req.target_qps * req.duration_s) + qps_workers = max(16, min(512, peak_concurrency)) + qps_executor = ThreadPoolExecutor(max_workers=qps_workers) + loop = asyncio.get_running_loop() + logger.info( + "QPS executor: %d workers for ~%d expected requests", + qps_workers, + peak_concurrency, + ) + + # Schedule requests at the target QPS rate + tasks: list[asyncio.Task] = [] + t_start = time.time() + next_fire = t_start + request_id = 0 + + while True: + now = time.time() + elapsed = now - t_start + if elapsed >= req.duration_s: + break + if now >= next_fire: + rid = request_id + request_id += 1 + fut = loop.run_in_executor(qps_executor, _run_qps_request, rid) + tasks.append(fut) + next_fire += interval + else: + await asyncio.sleep(min(0.001, next_fire - now)) + + # Wait for in-flight requests with a drain timeout. + drain_timeout = max(60.0, req.duration_s) + done, pending = await asyncio.wait(tasks, timeout=drain_timeout) + + # Clean up the scoped executor + qps_executor.shutdown(wait=False) + + # Collect completed results (guard against individual task exceptions) + session_results = [] + for t in done: + try: + session_results.append(t.result()) + except Exception as exc: + session_results.append( + { + "request_id": -1, + "error": str(exc), + } + ) + + # Cancel tasks still queued/running and mark as timed out + for t in pending: + t.cancel() + if pending: + logger.warning( + "QPS drain timeout: %d/%d requests still pending after %.0fs", + len(pending), + len(tasks), + drain_timeout, + ) + for t in pending: + session_results.append( + { + "request_id": -1, + "error": "drain_timeout", + } + ) + + # Bulk-delete SandboxClaims left by cancelled tasks. + # Only targets claims labelled created-by=pkb-qps-benchmark so + # we never touch claims created by other workloads. + try: + import subprocess as _sp + + _claims = _sp.run( + [ + "kubectl", + "get", + "sandboxclaim", + "-n", + sandbox_namespace, + "-l", + "created-by=pkb-qps-benchmark", + "-o", + "jsonpath={.items[*].metadata.name}", + ], + capture_output=True, + text=True, + ) + claim_names = _claims.stdout.strip().split() + if claim_names and claim_names != [""]: + logger.info("Cleaning up %d lingering pkb-qps claims", len(claim_names)) + _sp.run( + [ + "kubectl", + "delete", + "sandboxclaim", + "-l", + "created-by=pkb-qps-benchmark", + "-n", + sandbox_namespace, + "--wait=false", + ], + capture_output=True, + text=True, + ) + except Exception: + logger.warning("Failed to clean up lingering claims", exc_info=True) + + wall_time = time.time() - t_start + + # Separate successful vs failed + successful = [r for r in session_results if "error" not in r] + failed = [r for r in session_results if "error" in r] + + # Compute TTFE stats + aggregate = {} + if successful: + ttfe_values = sorted(r["ttfe_ms"] for r in successful) + if ttfe_values: + aggregate.update(_percentile_stats(ttfe_values, "ttfe")) + + # Also compute claim latency stats (the warm-pool-sensitive metric) + claim_values = sorted(r["claim_ms"] for r in successful if "claim_ms" in r) + if claim_values: + aggregate.update(_percentile_stats(claim_values, "claim")) + + return { + "target_qps": req.target_qps, + "actual_qps": round(request_id / wall_time, 2) if wall_time > 0 else 0, + "duration_s": round(wall_time, 2), + "total_requests": request_id, + "successful_requests": len(successful), + "failed_requests": len(failed), + "aggregate": aggregate, + "sessions": session_results, + } + + +@app.post("/benchmark/chromium/density") +async def benchmark_chromium_density(req: ChromiumBenchmarkRequest): + """Trigger the Chromium density benchmark (Use Case C). + + Fires `concurrent_sessions` parallel Chromium sandbox sessions. Each + session claims its own sandbox from the chromium warm pool, connects to + the sandbox's Chrome instance via CDP (Chrome DevTools Protocol), and + drives the benchmark from the orchestrator using Playwright. + + Architecture: + - Sandbox: runs headless Chromium (upstream chrome-sandbox image) with + --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 + - Orchestrator: connects Playwright via connect_over_cdp() to the + sandbox pod IP:9222 and drives navigate/click/evaluate/screenshot. + - This isolates pure Chrome-under-gVisor overhead without Node.js or + a runtime server in the sandbox. + """ + from playwright.async_api import async_playwright + from kubernetes import client as k8s_client, config as k8s_config + + async with _benchmark_lock: + + sandbox_namespace = os.getenv("AGENTIC_NAMESPACE", "agentic") + sandbox_template = "chromium-sandbox-template" + + logger.info( + "Starting Chromium density benchmark (CDP): concurrent_sessions=%d " + "task_count=%d warmup_tasks=%d", + req.concurrent_sessions, + req.task_count, + req.warmup_tasks, + ) + + # Initialize K8s client for pod IP lookup + try: + k8s_config.load_incluster_config() + except k8s_config.ConfigException: + k8s_config.load_kube_config() + core_v1 = k8s_client.CoreV1Api() + + # Inline HTML test page (same as benchmark_density.js used) + test_page = """data:text/html, + + +PKB Chromium Benchmark + +

Hello Sandbox

+ + +
+ + +""" + + # Limit concurrent K8s Metrics API calls to avoid overwhelming metrics-server + _metrics_semaphore = asyncio.Semaphore(5) + + async def _run_chromium_session_cdp(session_id: int) -> dict: + """Run one Chromium benchmark session via CDP.""" + sb_client = _make_sandbox_client() + sandbox = None + t_start = time.time() + claim_ms = 0.0 + cold_start_ms = 0.0 + try: + # 1. Claim sandbox from warm pool + t0 = time.time() + sandbox = sb_client.create_sandbox( + template=sandbox_template, + namespace=sandbox_namespace, + ) + claim_ms = (time.time() - t0) * 1000.0 + + # 2. Resolve pod IP + pod_name = sandbox.get_pod_name() + pod = core_v1.read_namespaced_pod(pod_name, sandbox_namespace) + pod_ip = pod.status.pod_ip + if not pod_ip: + raise RuntimeError(f"Pod {pod_name} has no IP assigned") + + cdp_url = f"http://{pod_ip}:9223" + + # 3. Connect Playwright via CDP + async with async_playwright() as pw: + # Wait for Chrome to be ready (retry connection) + browser = None + for attempt in range(20): + try: + browser = await pw.chromium.connect_over_cdp(cdp_url) + break + except Exception: + if attempt >= 19: + raise + await asyncio.sleep(0.5) + + # Cold start = claim + CDP connect (time until browser ready) + cold_start_ms = (time.time() - t_start) * 1000.0 + + context = await browser.new_context() + page = await context.new_page() + + # Navigate once before measurement loop + await page.goto(test_page, wait_until="domcontentloaded") + + # Latency arrays (filled during measured runs only) + navigate_ms = [] + screenshot_ms = [] + evaluate_ms = [] + click_ms = [] + fill_ms = [] + interaction_ms = [] + + total_runs = req.warmup_tasks + req.task_count + for run_idx in range(total_runs): + measuring = run_idx >= req.warmup_tasks + + # 1. Navigate (reload page) + t0 = time.time() + await page.goto(test_page, wait_until="domcontentloaded") + elapsed = (time.time() - t0) * 1000.0 + if measuring: + navigate_ms.append(elapsed) + interaction_ms.append(elapsed) + + # 2. DOM evaluate — read heading text + t0 = time.time() + await page.evaluate( + "() => document.getElementById('heading').textContent" + ) + elapsed = (time.time() - t0) * 1000.0 + if measuring: + evaluate_ms.append(elapsed) + interaction_ms.append(elapsed) + + # 3. Fill input + t0 = time.time() + await page.fill("#search", f"query-{run_idx}") + elapsed = (time.time() - t0) * 1000.0 + if measuring: + fill_ms.append(elapsed) + interaction_ms.append(elapsed) + + # 4. Click button + t0 = time.time() + await page.click("#btn") + elapsed = (time.time() - t0) * 1000.0 + if measuring: + click_ms.append(elapsed) + interaction_ms.append(elapsed) + + # 5. Verify click effect (DOM mutation) + t0 = time.time() + await page.evaluate( + "() => document.getElementById('output').textContent" + ) + elapsed = (time.time() - t0) * 1000.0 + if measuring: + evaluate_ms.append(elapsed) + interaction_ms.append(elapsed) + + # 6. Screenshot + t0 = time.time() + await page.screenshot() + elapsed = (time.time() - t0) * 1000.0 + if measuring: + screenshot_ms.append(elapsed) + interaction_ms.append(elapsed) + + # Read pod memory usage from K8s Metrics API + rss_mb = None + try: + async with _metrics_semaphore: + custom_api = k8s_client.CustomObjectsApi() + pod_metrics = await asyncio.to_thread( + custom_api.get_namespaced_custom_object, + group="metrics.k8s.io", + version="v1beta1", + namespace=sandbox_namespace, + plural="pods", + name=pod_name, + ) + for c in pod_metrics.get("containers", []): + usage = c.get("usage", {}).get("memory", "") + if usage.endswith("Ki"): + rss_mb = round(int(usage[:-2]) / 1024, 1) + elif usage.endswith("Mi"): + rss_mb = round(float(usage[:-2]), 1) + elif usage.endswith("Gi"): + rss_mb = round(float(usage[:-2]) * 1024, 1) + break + except Exception: + logger.warning( + "Failed to read pod metrics for %s", + pod_name, + exc_info=True, + ) + + await browser.close() + + total_ms = (time.time() - t_start) * 1000.0 + + # Compute stats helper + def _compute_stats(arr): + if not arr: + return None + s = sorted(arr) + n = len(s) + return { + "mean_ms": round(sum(s) / n, 3), + "p50_ms": round(s[min(int(n * 0.50), n - 1)], 3), + "p95_ms": round(s[min(int(n * 0.95), n - 1)], 3), + "p99_ms": round(s[min(int(n * 0.99), n - 1)], 3), + "min_ms": round(s[0], 3), + "max_ms": round(s[-1], 3), + } + + return { + "session_id": session_id, + "sandbox_status": "ok", + "orchestrator_total_ms": round(total_ms, 3), + "claim_ms": round(claim_ms, 3), + "cold_start_ms": round(cold_start_ms, 3), + "rss_mb": rss_mb, + "navigate": _compute_stats(navigate_ms), + "evaluate": _compute_stats(evaluate_ms), + "fill": _compute_stats(fill_ms), + "click": _compute_stats(click_ms), + "screenshot": _compute_stats(screenshot_ms), + "interaction": _compute_stats(interaction_ms), + } + + except Exception as e: + total_ms = (time.time() - t_start) * 1000.0 + logger.exception("Chromium CDP session %d failed", session_id) + return { + "session_id": session_id, + "orchestrator_total_ms": round(total_ms, 3), + "claim_ms": round(claim_ms, 3), + "error": f"{type(e).__name__}: {e}", + } + finally: + if sandbox is not None: + try: + sb_client.delete_sandbox( + sandbox.claim_name, namespace=sandbox_namespace + ) + except Exception: + logger.warning( + "Failed to delete sandbox for session %d", + session_id, + exc_info=True, + ) + + # Fire concurrent sessions + tasks = [_run_chromium_session_cdp(i) for i in range(req.concurrent_sessions)] + session_results = await asyncio.gather(*tasks) + + # Separate successful vs failed + successful = [r for r in session_results if "error" not in r] + failed = [r for r in session_results if "error" in r] + + # Aggregate metrics + aggregate = {} + if successful: + orch_times = sorted(r["orchestrator_total_ms"] for r in successful) + aggregate.update(_percentile_stats(orch_times, "orchestrator_total")) + + claim_times = sorted(r["claim_ms"] for r in successful if "claim_ms" in r) + if claim_times: + aggregate.update(_percentile_stats(claim_times, "claim")) + + # Aggregate cold start and RSS + cold_starts = sorted( + r["cold_start_ms"] for r in successful if "cold_start_ms" in r + ) + if cold_starts: + aggregate["cold_start_mean_ms"] = round( + sum(cold_starts) / len(cold_starts), 3 + ) + aggregate["cold_start_p95_ms"] = round( + cold_starts[min(int(len(cold_starts) * 0.95), len(cold_starts) - 1)], 3 + ) + + rss_vals = sorted( + r["rss_mb"] for r in successful if r.get("rss_mb") is not None + ) + if rss_vals: + aggregate["rss_end_mb"] = round(sum(rss_vals) / len(rss_vals), 1) + + # Aggregate per-task-type interaction stats + for metric_key in ( + "interaction", + "navigate", + "evaluate", + "click", + "fill", + "screenshot", + ): + means = sorted( + r[metric_key]["mean_ms"] + for r in successful + if isinstance(r.get(metric_key), dict) and "mean_ms" in r[metric_key] + ) + p95s = sorted( + r[metric_key]["p95_ms"] + for r in successful + if isinstance(r.get(metric_key), dict) and "p95_ms" in r[metric_key] + ) + if means: + aggregate[f"{metric_key}_mean_ms"] = round(sum(means) / len(means), 3) + if p95s: + aggregate[f"{metric_key}_p95_ms"] = round( + p95s[min(int(len(p95s) * 0.95), len(p95s) - 1)], 3 + ) + + return { + "concurrent_sessions": req.concurrent_sessions, + "successful_sessions": len(successful), + "failed_sessions": len(failed), + "aggregate": aggregate, + "sessions": session_results, + } + + +@app.post("/run") +async def run_agent(req: RunRequest): + """Raw agent interaction — send any prompt, get back the agent text.""" + try: + output = await _run_agent(req.prompt) + return {"response": output} + except Exception as e: + logger.exception("Agent run failed") + raise HTTPException(status_code=500, detail=str(e)) + + +# ========================================================================= +# Entry point +# ========================================================================= +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080))) diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt new file mode 100644 index 0000000000..4ca072323c --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt @@ -0,0 +1,11 @@ +# Requirements for GKE Performance Agent +google-adk[gke,extensions]==1.34.1 +k8s-agent-sandbox==0.4.6 +kubernetes>=36.0.1 # Fix: v36.0.0 has auth key mismatch bug (PR #2585) +google-cloud-aiplatform[adk]==1.153.1 +google-cloud-logging==3.15.0 +fastapi==0.135.3 +uvicorn[standard]==0.44.0 +python-dotenv==1.0.1 +playwright==1.59.0 + diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js new file mode 100644 index 0000000000..7638720691 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js @@ -0,0 +1,177 @@ +// Agentic Chromium Sandbox Benchmark (UC-C) +// Measures: Interaction Latency, Screenshot Generation, DOM Evaluation, RSS +// Requires: Playwright (pre-installed in the container image) +// +// Self-contained — no external Mock LLM service needed. Uses data: URLs +// and inline HTML to avoid network dependencies so the benchmark measures +// pure gVisor + Chromium overhead. +// +// Environment variables (injected by orchestrator): +// TASK_COUNT — iterations per run (default: 10) +// WARMUP_TASKS — warmup iterations excluded from stats (default: 2) + +const { chromium } = require('playwright'); +const os = require('os'); + +const TASK_COUNT = parseInt(process.env.TASK_COUNT || '10'); +const WARMUP_TASKS = parseInt(process.env.WARMUP_TASKS || '2'); + +// Inline HTML page — avoids network round-trips so we measure pure +// browser engine + gVisor overhead. +const TEST_PAGE = `data:text/html, + + +PKB Chromium Benchmark + +

Hello Sandbox

+ + +
+ + +`; + +function percentile(sorted, p) { + if (!sorted.length) return null; + const idx = Math.min(Math.floor(sorted.length * p), sorted.length - 1); + return sorted[idx]; +} + +function getMemoryMB() { + try { + const usage = process.memoryUsage(); + return { + rss_mb: Math.round(usage.rss / 1024 / 1024 * 100) / 100, + heap_used_mb: Math.round(usage.heapUsed / 1024 / 1024 * 100) / 100, + heap_total_mb: Math.round(usage.heapTotal / 1024 / 1024 * 100) / 100, + }; + } catch (e) { + return { rss_mb: null, heap_used_mb: null, heap_total_mb: null }; + } +} + +async function runBenchmark() { + const memStart = getMemoryMB(); + + // ── Cold Start: browser launch ── + const coldStart = performance.now(); + const browser = await chromium.launch({ + headless: true, + args: [ + '--no-sandbox', + '--disable-gpu', + '--disable-dev-shm-usage', + '--disable-async-dns', + '--single-process', + ], + }); + const cold_start_ms = performance.now() - coldStart; + + const context = await browser.newContext(); + const page = await context.newPage(); + + // Navigate once before the loop — amortize first-navigation overhead + await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' }); + + // Per-task latency arrays (filled during measured runs only) + const navigate_ms = []; + const screenshot_ms = []; + const evaluate_ms = []; + const click_ms = []; + const fill_ms = []; + const interaction_ms = []; // all task types pooled + + for (let run = 0; run < WARMUP_TASKS + TASK_COUNT; run++) { + const measuring = run >= WARMUP_TASKS; + + // 1. Navigate (reload the data: page) + let t0 = performance.now(); + await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' }); + let elapsed = performance.now() - t0; + if (measuring) { navigate_ms.push(elapsed); interaction_ms.push(elapsed); } + + // 2. DOM evaluate — read heading text + t0 = performance.now(); + await page.evaluate(() => document.getElementById('heading').textContent); + elapsed = performance.now() - t0; + if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); } + + // 3. Fill input + t0 = performance.now(); + await page.fill('#search', `query-${run}`); + elapsed = performance.now() - t0; + if (measuring) { fill_ms.push(elapsed); interaction_ms.push(elapsed); } + + // 4. Click button + t0 = performance.now(); + await page.click('#btn'); + elapsed = performance.now() - t0; + if (measuring) { click_ms.push(elapsed); interaction_ms.push(elapsed); } + + // 5. Verify click effect (DOM mutation) + t0 = performance.now(); + await page.evaluate(() => document.getElementById('output').textContent); + elapsed = performance.now() - t0; + if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); } + + // 6. Screenshot (snapshot generation) + t0 = performance.now(); + await page.screenshot({ path: '/tmp/snap.png' }); + elapsed = performance.now() - t0; + if (measuring) { screenshot_ms.push(elapsed); interaction_ms.push(elapsed); } + } + + await browser.close(); + const memEnd = getMemoryMB(); + + // ── Compute stats ── + const computeStats = (arr) => { + if (!arr.length) return null; + const sorted = [...arr].sort((a, b) => a - b); + const sum = sorted.reduce((a, b) => a + b, 0); + return { + mean_ms: Math.round(sum / sorted.length * 1000) / 1000, + p50_ms: Math.round(percentile(sorted, 0.50) * 1000) / 1000, + p95_ms: Math.round(percentile(sorted, 0.95) * 1000) / 1000, + p99_ms: Math.round(percentile(sorted, 0.99) * 1000) / 1000, + min_ms: Math.round(sorted[0] * 1000) / 1000, + max_ms: Math.round(sorted[sorted.length - 1] * 1000) / 1000, + }; + }; + + const summary = { + sandbox_status: 'ok', + cold_start_ms: Math.round(cold_start_ms * 1000) / 1000, + task_count: TASK_COUNT, + warmup_tasks: WARMUP_TASKS, + // Per-task-type latency stats + navigate: computeStats(navigate_ms), + evaluate: computeStats(evaluate_ms), + fill: computeStats(fill_ms), + click: computeStats(click_ms), + screenshot: computeStats(screenshot_ms), + // Pooled interaction latency (all types) + interaction: computeStats(interaction_ms), + // Memory + rss_start_mb: memStart.rss_mb, + rss_end_mb: memEnd.rss_mb, + rss_growth_mb: memEnd.rss_mb != null && memStart.rss_mb != null + ? Math.round((memEnd.rss_mb - memStart.rss_mb) * 100) / 100 + : null, + }; + + // Print JSON to stdout — orchestrator parses this + console.log(JSON.stringify(summary)); +} + +runBenchmark().catch((e) => { + console.log(JSON.stringify({ + sandbox_status: 'error', + error: `${e.name}: ${e.message}`, + })); + process.exit(1); +}); diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py new file mode 100644 index 0000000000..c1d20ecbfb --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 +""" +Agentic Python Sandbox Benchmark +Measures: TTFE (Time to First Execution), CEL (Command Execution Latency), RSS Memory + +Three task categories: + - compute: CPU-bound (matrix multiply, sorting large lists) + - syscall: gVisor Sentry stress (large file I/O, many stat calls) + - import: Gofer FS I/O + memory (import heavy stdlib, build data) + +Metrics: all sandbox_* keys. +""" +import time +import json +import os +import resource +import sys +import math +import random +import warnings + +warnings.filterwarnings("ignore") + +SAMPLE_COUNT = int(os.environ.get("SAMPLE_COUNT") or "20") +SAMPLE_WARMUP = int(os.environ.get("SAMPLE_WARMUP") or "0") + +print(f"SAMPLE_COUNT: {SAMPLE_COUNT}") +print(f"SAMPLE_WARMUP: {SAMPLE_WARMUP}") + +# ── Persistent allocations (retained across iterations to grow RSS) ── +# ~20MB baseline allocation that stays resident +_RESIDENT_DATA = [bytearray(1024 * 1024) for _ in range(20)] # 20 × 1MB + + +def get_rss_mb(): + """Get current RSS memory in MB.""" + return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 + + +def get_static_tasks(): + """Return deterministic static tasks to measure execution latency. + + Three task categories enable decomposition of CEL degradation: + - compute: sort a 100k-element list + matrix-like multiply + - syscall: write/read 1MB temp files, 2000 stat calls + - import: import 15 heavy stdlib modules + build large dicts + """ + return [ + { + "id": 1, + "type": "compute", + "code": ( + "import math, random\n" + "random.seed(42)\n" + "data = [random.random() for _ in range(100_000)]\n" + "data.sort()\n" + "# Matrix-like multiply (flattened 200×200)\n" + "a = list(range(40_000))\n" + "b = [x * 0.001 for x in a]\n" + "_ = sum(x * y for x, y in zip(a, b))\n" + ), + }, + { + "id": 2, + "type": "syscall", + "code": ( + "import os, tempfile\n" + "d = tempfile.gettempdir()\n" + "# Write + read 1MB file through gVisor Gofer\n" + "path = os.path.join(d, 'bench_heavy.bin')\n" + "data = b'x' * (1024 * 1024)\n" + "with open(path, 'wb') as f:\n" + " f.write(data)\n" + "with open(path, 'rb') as f:\n" + " _ = f.read()\n" + "os.unlink(path)\n" + "# Heavy stat/listdir\n" + "[os.stat(d) for _ in range(1000)]\n" + "[os.listdir(d) for _ in range(1000)]\n" + ), + }, + { + "id": 3, + "type": "import", + "code": ( + "import importlib, sys\n" + "mods = [\n" + " 'json', 'csv', 'html', 'email', 'unittest', 'logging',\n" + " 'xml.etree.ElementTree', 'http.client', 'urllib.request',\n" + " 'argparse', 'pprint', 'textwrap', 'difflib',\n" + "]\n" + "for _ in range(20):\n" + " for m in mods:\n" + " try:\n" + " sys.modules.pop(m, None)\n" + " importlib.import_module(m)\n" + " except Exception:\n" + " pass\n" + "# Build a large dict to add memory pressure\n" + "_ = {str(i): list(range(100)) for i in range(10_000)}\n" + ), + }, + ] + + +def _percentile(sorted_vals, pct): + """Return the value at the given percentile from a pre-sorted list.""" + idx = int(len(sorted_vals) * pct) + return sorted_vals[min(idx, len(sorted_vals) - 1)] + + +def run_benchmark(): + results = {"ttfe_ms": None, "cel_ms": [], "rss_mb_start": None, "rss_mb_end": None} + + # Measure TTFE + ttfe_start = time.perf_counter() + exec("x = 1 + 1", globals()) + results["ttfe_ms"] = round((time.perf_counter() - ttfe_start) * 1000, 6) + + results["rss_mb_start"] = get_rss_mb() + + tasks = get_static_tasks() + sampled_tasks = [t for t in tasks if t["type"] != "import"] + import_task = next((t for t in tasks if t["type"] == "import"), None) + + # Warmup — sampled tasks only (import uses C-extension modules that + # error on repeated reimport, so it runs once outside the loop) + for _ in range(SAMPLE_WARMUP): + for task in sampled_tasks: + exec(task["code"], globals()) + + # Benchmark iterations — compute + syscall only + for i in range(SAMPLE_COUNT): + # Grow resident memory slightly each iteration (~100KB) + _RESIDENT_DATA.append(bytearray(100 * 1024)) + + for task in sampled_tasks: + start = time.perf_counter() + exec(task["code"], globals()) + elapsed_ms = round((time.perf_counter() - start) * 1000, 6) + results["cel_ms"].append({ + "iteration": i, + "task_id": task["id"], + "type": task["type"], + "latency_ms": elapsed_ms, + }) + + # Import task — single run (C-extension modules break on repeated reimport) + import_elapsed_ms = 0.0 + if import_task: + import_start = time.perf_counter() + exec(import_task["code"], globals()) + import_elapsed_ms = round((time.perf_counter() - import_start) * 1000, 6) + + results["rss_mb_end"] = get_rss_mb() + + # --- Raw per-iteration totals (compute + syscall) --- + iteration_totals = [] + for i in range(SAMPLE_COUNT): + total = sum(r["latency_ms"] for r in results["cel_ms"] if r["iteration"] == i) + iteration_totals.append(round(total, 6)) + + # --- Raw per-type latencies --- + types_seen = sorted(set(r["type"] for r in results["cel_ms"])) + per_type_raw = {} + for t in types_seen: + per_type_raw[t] = [round(r["latency_ms"], 6) + for r in results["cel_ms"] if r["type"] == t] + + # Output raw arrays — cross-sandbox stats computed by main.py + summary = { + "hostname": os.environ.get("HOSTNAME", "unknown"), + "sandbox_ttfe_ms": results["ttfe_ms"], + "sandbox_total_cel_ms": iteration_totals, + "sandbox_import_cel_ms": import_elapsed_ms, + "sandbox_rss_start_mb": results["rss_mb_start"], + "sandbox_rss_end_mb": results["rss_mb_end"], + "sandbox_rss_growth_mb": round(results["rss_mb_end"] - results["rss_mb_start"], 6), + "sample_count": SAMPLE_COUNT, + "sample_warmup": SAMPLE_WARMUP, + "total_iterations": len(iteration_totals), + "task_types": len(types_seen) + (1 if import_task else 0), + } + + for t, raw in per_type_raw.items(): + summary[f"sandbox_{t}_cel_ms"] = raw + + print(json.dumps(summary)) + + with open("/tmp/benchmark_results.json", "w") as f: + json.dump(results, f) + + return summary + +if __name__ == "__main__": + run_benchmark() diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py new file mode 100644 index 0000000000..f92a3e694d --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +"""Agentic Payload Transfer Benchmark (Use Case D). + +Measures the cost of returning large "Observation" payloads from a gVisor +sandbox back to the Orchestrator via the real data path: + stdout → code_execution_result.output → orchestrator HTTP response. + +For a given PAYLOAD_SIZE_MB, the script: + 1. Generates a payload of that size (os.urandom + base64) + 2. Measures generation, serialization, and stdout-write times separately + 3. Repeats for PAYLOAD_ITERATIONS to compute stable percentiles + 4. On the final iteration, writes the actual payload to stdout (measuring + real end-to-end transfer); other iterations write to /dev/null to + measure write-syscall cost without flooding the return channel. + 5. Emits a JSON summary to stderr (parsed by main.py) + +Metrics are split so that pass/fail thresholds can exclude generation +time (os.urandom), which is not part of data transfer. + +Environment variables (injected by the agent): + PAYLOAD_SIZE_MB — target payload size in megabytes (default: 1) + PAYLOAD_ITERATIONS — number of transfer iterations (default: 20) +""" + +import base64 +import json +import os +import resource +import sys +import time + +PAYLOAD_SIZE_MB = float(os.environ.get("PAYLOAD_SIZE_MB") or "1") +PAYLOAD_ITERATIONS = int(os.environ.get("PAYLOAD_ITERATIONS") or "20") + + +# Use stderr for all diagnostic/metric output so stdout is reserved for +# the actual payload transfer (the measured data path). +def _log(msg): + print(msg, file=sys.stderr, flush=True) + + +_log(f"PAYLOAD_SIZE_MB: {PAYLOAD_SIZE_MB}") +_log(f"PAYLOAD_ITERATIONS: {PAYLOAD_ITERATIONS}") + + +def get_rss_mb(): + """Get current RSS memory in MB.""" + return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 + + +def _percentile(sorted_vals, pct): + """Return the value at the given percentile from a pre-sorted list.""" + if not sorted_vals: + return 0.0 + idx = int(len(sorted_vals) * pct) + return sorted_vals[min(idx, len(sorted_vals) - 1)] + + +def _stats_for(latencies): + """Compute mean/p50/p95/p99/min/max for a list of latencies (ms).""" + latencies.sort() + return { + "mean": round(sum(latencies) / len(latencies), 6), + "p50": round(latencies[len(latencies) // 2], 6), + "p95": round(_percentile(latencies, 0.95), 6), + "p99": round(_percentile(latencies, 0.99), 6), + "min": round(latencies[0], 6), + "max": round(latencies[-1], 6), + } + + +def run_benchmark(): + """Execute the payload transfer benchmark and print JSON results.""" + target_bytes = int(PAYLOAD_SIZE_MB * 1024 * 1024) + rss_start = get_rss_mb() + + generation_times = [] + serialization_times = [] + stdout_times = [] # stdout write syscall time + transfer_times = [] # serialize + stdout write (the threshold metric) + throughputs = [] # MB/s based on stdout write time + + # --- Warmup (2 iterations, not recorded) --- + for _ in range(2): + raw = os.urandom(target_bytes) + _ = base64.b64encode(raw).decode("ascii") + + # --- Measured iterations --- + for i in range(PAYLOAD_ITERATIONS): + # 1. Generate payload (os.urandom — NOT data transfer) + t0 = time.perf_counter() + raw = os.urandom(target_bytes) + t_gen = time.perf_counter() + + # 2. Serialize (base64 encode — mirrors real observation encoding) + encoded = base64.b64encode(raw).decode("ascii") + t_ser = time.perf_counter() + + # 3. Transfer — write payload to stdout (the real sandbox→orchestrator path). + # Only the final iteration writes to actual stdout to measure real + # end-to-end transfer without flooding the return channel. + # Other iterations write to /dev/null (same gVisor write-syscall path, + # data discarded by host kernel). + t_xfer_start = time.perf_counter() + if i == PAYLOAD_ITERATIONS - 1: + sys.stdout.write(encoded) + sys.stdout.flush() + else: + with open("/dev/null", "w") as devnull: + devnull.write(encoded) + t_xfer = time.perf_counter() + + gen_ms = (t_gen - t0) * 1000 + ser_ms = (t_ser - t_gen) * 1000 + stdout_ms = (t_xfer - t_xfer_start) * 1000 + transfer_ms = ser_ms + stdout_ms # excludes generation + + generation_times.append(gen_ms) + serialization_times.append(ser_ms) + stdout_times.append(stdout_ms) + transfer_times.append(transfer_ms) + + # Throughput in MB/s (based on encoded size and stdout write time) + encoded_size_mb = len(encoded) / (1024 * 1024) + if stdout_ms > 0: + throughputs.append(encoded_size_mb / (stdout_ms / 1000)) + + rss_end = get_rss_mb() + + # Compute stats + gen_stats = _stats_for(generation_times) + ser_stats = _stats_for(serialization_times) + stdout_stats = _stats_for(stdout_times) + transfer_stats = _stats_for(transfer_times) + throughput_stats = _stats_for(throughputs) if throughputs else {} + + # Payload metadata + encoded_size_bytes = len(base64.b64encode(os.urandom(target_bytes))) + + summary = { + "hostname": os.environ.get("HOSTNAME", "unknown"), + # Payload config + "sandbox_payload_size_bytes": target_bytes, + "sandbox_payload_encoded_size_bytes": encoded_size_bytes, + "sandbox_payload_iterations": PAYLOAD_ITERATIONS, + # Generation time (os.urandom — NOT data transfer, excluded from threshold) + "sandbox_generation_time_mean_ms": gen_stats["mean"], + "sandbox_generation_time_p50_ms": gen_stats["p50"], + "sandbox_generation_time_p95_ms": gen_stats["p95"], + "sandbox_generation_time_p99_ms": gen_stats["p99"], + "sandbox_generation_time_min_ms": gen_stats["min"], + "sandbox_generation_time_max_ms": gen_stats["max"], + # Serialization time (base64 encode — CPU bound) + "sandbox_serialization_time_mean_ms": ser_stats["mean"], + "sandbox_serialization_time_p50_ms": ser_stats["p50"], + "sandbox_serialization_time_p95_ms": ser_stats["p95"], + "sandbox_serialization_time_p99_ms": ser_stats["p99"], + "sandbox_serialization_time_min_ms": ser_stats["min"], + "sandbox_serialization_time_max_ms": ser_stats["max"], + # Stdout write time (the raw write-syscall through gVisor) + "sandbox_stdout_time_mean_ms": stdout_stats["mean"], + "sandbox_stdout_time_p50_ms": stdout_stats["p50"], + "sandbox_stdout_time_p95_ms": stdout_stats["p95"], + "sandbox_stdout_time_p99_ms": stdout_stats["p99"], + "sandbox_stdout_time_min_ms": stdout_stats["min"], + "sandbox_stdout_time_max_ms": stdout_stats["max"], + # Transfer time (serialization + stdout write — the threshold metric) + "sandbox_transfer_time_mean_ms": transfer_stats["mean"], + "sandbox_transfer_time_p50_ms": transfer_stats["p50"], + "sandbox_transfer_time_p95_ms": transfer_stats["p95"], + "sandbox_transfer_time_p99_ms": transfer_stats["p99"], + "sandbox_transfer_time_min_ms": transfer_stats["min"], + "sandbox_transfer_time_max_ms": transfer_stats["max"], + # Throughput (MB/s based on transfer write time) + "sandbox_throughput_mean_mbps": throughput_stats.get("mean"), + "sandbox_throughput_p50_mbps": throughput_stats.get("p50"), + "sandbox_throughput_min_mbps": throughput_stats.get("min"), + # RSS + "sandbox_rss_start_mb": rss_start, + "sandbox_rss_end_mb": rss_end, + "sandbox_rss_growth_mb": rss_end - rss_start, + } + + # Emit JSON summary to stderr for diagnostics. + _log("---BENCHMARK_RESULT_JSON---") + _log(json.dumps(summary, indent=2)) + + # Also emit to stdout (after the payload data) so that + # _parse_sandbox_json() can find it in code_execution_result.output. + # ADK only captures stdout, not stderr. + print("\n---BENCHMARK_RESULT_JSON---", flush=True) + print(json.dumps(summary), flush=True) + + return summary + + +if __name__ == "__main__": + try: + run_benchmark() + except Exception as e: + import traceback + + traceback.print_exc() diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py new file mode 100644 index 0000000000..07ef6309db --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +"""Minimal QPS benchmark script for UC-F (Scheduling Throughput). + +Runs inside the GKE Agent Sandbox to validate claim readiness. +Executes a trivial operation and reports status. The orchestrator-side +timing (orchestrator_total_ms) serves as the primary TTFE measurement — +when the warm pool drains, that metric spikes because fresh pods must be +cold-started. +""" +import json +import time + +t0 = time.perf_counter() + +# Trivial computation to prove the sandbox is functional +result = sum(range(10_000)) + +elapsed_ms = (time.perf_counter() - t0) * 1000 + +print(json.dumps({ + "sandbox_status": "ok", + "sandbox_qps_exec_ms": round(elapsed_ms, 3), + "sandbox_compute_result": result, +})) diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md new file mode 100644 index 0000000000..86b33c8486 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/README.md @@ -0,0 +1,64 @@ +# Vibe Coding Startup Scripts + +Pluggable startup scripts for the UC-A snapshot saturation harness (`sweeps/snapshot_saturation_search.py`). Each script simulates a realistic "vibe coding" sandbox cold-start — the kind of environment setup that happens when an AI coding agent provisions a new sandbox for a user. + +## How It Works + +When `--preload_mode=script:` is passed to the sweep harness: + +1. The script is read from disk and embedded into the pod's container entrypoint +2. The pod runs the script to completion (installs packages, starts services, etc.) +3. After the script exits 0, the harness prints `SCRIPT_READY` and starts a counter loop +4. **TTFE** is measured as the total time from SandboxClaim creation to `SCRIPT_READY` + +This lets you compare cold-start TTFE (full script execution) against snapshot/restore TTFE (resuming from a pre-snapshotted state where the script already ran). + +## Scripts + +### startup_pip_fastapi.sh + +**Lightweight Python variant.** Runs natively in the `python:3.11-slim` base image. + +Steps: `pip install fastapi uvicorn` → create app → start uvicorn → wait for first HTTP response. + +Typical cold-start: ~5–8s on GKE with fast network. + +```bash +# Cold-start only +python sweeps/snapshot_saturation_search.py \ + --skip_snapshot \ + --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \ + --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \ + --ttfe_threshold_s=20 + +# With snapshot/restore (shows restore speedup vs cold-start) +python sweeps/snapshot_saturation_search.py \ + --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \ + --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \ + --ttfe_threshold_s=20 --restore_threshold_s=10 +``` + +### startup_npm_vite.sh + +**Heavier Node.js variant.** Installs Node.js + npm from apt, then npm-installs Vite and starts a dev server. + +Steps: `apt-get install nodejs npm` → `npm install vite` → start Vite dev server → wait for first page served. + +Typical cold-start: ~30–60s (apt + npm on cold cache). + +```bash +python sweeps/snapshot_saturation_search.py \ + --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \ + --burst_size=3 --search_mode=binary --search_min=10 --search_max=30 \ + --ttfe_threshold_s=120 --restore_threshold_s=10 +``` + +## Writing Your Own Script + +Requirements: +- Must be a bash script (runs via `bash -c` in a `python:3.11-slim` container) +- Must exit 0 on success (use `set -e` for fail-fast) +- Should print progress to stdout (visible in pod logs for debugging) +- The harness appends `SCRIPT_READY` + counter loop after your script — don't add your own + +The `PRELOAD_MB` env var is available but unused by these scripts. The sweep varies it to test different memory request levels on the pod. diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh new file mode 100644 index 0000000000..f3e9c9c235 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_npm_vite.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Vibe Coding Startup Script — npm + Vite dev server +# +# Simulates a typical agentic sandbox "vibe coding" cold-start: +# 1. Install Node.js dependencies (bun/npm) +# 2. Start a Vite dev server +# 3. Wait for the server to be ready (first page served) +# +# This script is designed to run inside the sandbox container (python:3.11-slim). +# It installs Node.js + npm + dependencies from scratch to measure realistic +# cold-start latency including package installation. +# +# Usage (cold-start only): +# python sweeps/snapshot_saturation_search.py \ +# --skip_snapshot \ +# --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \ +# --burst_size=3 \ +# --search_mode=binary --search_min=10 --search_max=30 \ +# --ttfe_threshold_s=120 +# +# Usage (with snapshot/restore): +# python sweeps/snapshot_saturation_search.py \ +# --preload_mode=script:workloads/vibe_coding/startup_npm_vite.sh \ +# --burst_size=3 \ +# --search_mode=binary --search_min=10 --search_max=30 \ +# --ttfe_threshold_s=120 --restore_threshold_s=10 +# +# NOTE: --search_min/--search_max control the PRELOAD_MB env var passed to +# the container; in script mode this is unused by the script itself but +# varies memory requests to test different resource pressure levels. + +set -e + +echo "[vibe-coding] Installing Node.js..." +apt-get update -qq && apt-get install -y -qq nodejs npm > /dev/null 2>&1 + +echo "[vibe-coding] Creating project scaffold..." +mkdir -p /tmp/vibe-project && cd /tmp/vibe-project + +# Create a minimal package.json with Vite +cat > package.json << 'EOF' +{ + "name": "vibe-sandbox", + "private": true, + "scripts": { + "dev": "vite --host 0.0.0.0 --port 5173" + }, + "dependencies": { + "vite": "^5.0.0" + } +} +EOF + +# Create minimal index.html for Vite to serve +cat > index.html << 'EOF' + +Vibe +

Ready

+ +EOF + +echo "[vibe-coding] Installing npm dependencies..." +npm install --prefer-offline 2>&1 | tail -5 + +echo "[vibe-coding] Starting Vite dev server..." +npx vite --host 0.0.0.0 --port 5173 & +VITE_PID=$! + +echo "[vibe-coding] Waiting for server to be ready..." +MAX_WAIT=60 +ELAPSED=0 +while ! curl -s http://localhost:5173 > /dev/null 2>&1; do + sleep 1 + ELAPSED=$((ELAPSED + 1)) + if [ $ELAPSED -ge $MAX_WAIT ]; then + echo "[vibe-coding] ERROR: Server did not start within ${MAX_WAIT}s" + exit 1 + fi +done + +echo "[vibe-coding] First page served successfully (${ELAPSED}s)" + +# Kill the vite server — we only needed to measure startup time +kill $VITE_PID 2>/dev/null || true diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh new file mode 100644 index 0000000000..d54a851bda --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/workloads/vibe_coding/startup_pip_fastapi.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Lightweight Vibe Coding Startup Script — pip install + FastAPI +# +# Simulates a Python-based agentic sandbox cold-start: +# 1. Install Python packages (FastAPI + uvicorn) +# 2. Start a web server +# 3. Wait for the server to respond +# +# This is lighter weight than the npm/Vite variant and runs natively +# in the python:3.11-slim base image without needing to install Node.js. +# +# Usage (cold-start only): +# python sweeps/snapshot_saturation_search.py \ +# --skip_snapshot \ +# --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \ +# --burst_size=3 \ +# --search_mode=binary --search_min=10 --search_max=30 \ +# --ttfe_threshold_s=20 +# +# Usage (with snapshot/restore): +# python sweeps/snapshot_saturation_search.py \ +# --preload_mode=script:workloads/vibe_coding/startup_pip_fastapi.sh \ +# --burst_size=3 \ +# --search_mode=binary --search_min=10 --search_max=30 \ +# --ttfe_threshold_s=20 --restore_threshold_s=10 +# +# NOTE: --search_min/--search_max control the PRELOAD_MB env var passed to +# the container; in script mode this is unused by the script itself but +# varies memory requests to test different resource pressure levels. + +set -e + +echo "[vibe-coding] Installing Python packages..." +pip install --quiet fastapi uvicorn 2>&1 | tail -3 + +echo "[vibe-coding] Creating app..." +cat > /tmp/app.py << 'EOF' +from fastapi import FastAPI +app = FastAPI() + +@app.get("/") +def root(): + return {"status": "ready"} +EOF + +echo "[vibe-coding] Starting uvicorn server..." +python -m uvicorn app:app --host 0.0.0.0 --port 8000 --app-dir /tmp & +SERVER_PID=$! + +echo "[vibe-coding] Waiting for server to be ready..." +MAX_WAIT=30 +ELAPSED=0 +while ! python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" 2>/dev/null; do + sleep 1 + ELAPSED=$((ELAPSED + 1)) + if [ $ELAPSED -ge $MAX_WAIT ]; then + echo "[vibe-coding] ERROR: Server did not start within ${MAX_WAIT}s" + exit 1 + fi +done + +echo "[vibe-coding] First request served successfully (${ELAPSED}s)" + +# Kill the server — we only needed to measure startup time +kill $SERVER_PID 2>/dev/null || true diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py new file mode 100644 index 0000000000..6dfb59b981 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py new file mode 100644 index 0000000000..6dfb59b981 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py new file mode 100644 index 0000000000..ee4603a4b3 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py @@ -0,0 +1,489 @@ +"""Shared utilities for GKE Agent Sandbox benchmarks. + +Provides helpers for agent API interaction, kubectl commands, warm pool +management, and sample construction used by all GKE agent benchmark +definitions. +""" + +import json +import logging +import subprocess +import time +import urllib.request +import urllib.error + +from absl import flags +from perfkitbenchmarker import sample + +FLAGS = flags.FLAGS + +# --------------------------------------------------------------------------- +# Shared flags (registered once; importable by benchmark modules) +# --------------------------------------------------------------------------- + +flags.DEFINE_string( + "gke_namespace", + "agentic", + "Kubernetes namespace where the agentic workloads are deployed.", +) + +flags.DEFINE_string( + "gke_machine_type", + "", + "Machine type of the sandbox node pool. Recorded in sample metadata.", +) + +flags.DEFINE_string( + "gke_kubeconfig", + "", + "Path to a kubeconfig file. If empty, the system default is used.", +) + +flags.DEFINE_bool( + "gke_gvisor", + True, + "Whether the sandbox node pool uses gVisor. Recorded in sample metadata.", +) + +flags.DEFINE_string( + "gke_note", + "", + "Arbitrary note string attached to every sample for tagging runs.", +) + +flags.DEFINE_string( + "gke_api_url", + "http://localhost:8080", + "Base URL of the ADK Agent API.", +) + +flags.DEFINE_integer( + "gke_api_timeout", + 600, + "HTTP timeout in seconds for agent API benchmark calls.", +) + + +# --------------------------------------------------------------------------- +# Agent API helpers +# --------------------------------------------------------------------------- + + +def GetAgentApiUrl(): + """Return the base URL of the ADK agent API service.""" + return FLAGS.gke_api_url.rstrip("/") + + +def CheckAgentHealthz(api_url=None, required=True): + """Verify the agent API is reachable via /healthz. + + Args: + api_url: Base URL to check. Defaults to FLAGS.gke_api_url. + required: If True (default), raise on failure. If False, log warning. + """ + if api_url is None: + api_url = GetAgentApiUrl() + try: + req = urllib.request.Request(f"{api_url}/healthz") + with urllib.request.urlopen(req, timeout=15) as resp: + logging.info("Agent healthz: %s", resp.read().decode()) + except (urllib.error.URLError, urllib.error.HTTPError) as e: + msg = ( + f"Agent API is not reachable at {api_url}/healthz: {e}\n" + "Hint: ensure kubectl port-forward is running " + "(kubectl port-forward svc/adk-agent -n 8080:80)." + ) + if required: + raise RuntimeError(msg) + else: + logging.warning("Health check deferred (non-fatal): %s", msg) + + +def CallAgentApi(endpoint, payload, timeout=None): + """POST JSON to an agent API endpoint and return the parsed response.""" + if timeout is None: + timeout = FLAGS.gke_api_timeout + base_url = GetAgentApiUrl() + url = f"{base_url}{endpoint}" + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + url, data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + logging.info("POST %s payload=%s timeout=%ds", url, payload, timeout) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = resp.read().decode("utf-8") + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace") + raise RuntimeError(f"Agent API returned HTTP {e.code}: {body[:500]}") + except urllib.error.URLError as e: + raise RuntimeError(f"Cannot reach agent API at {url}: {e.reason}") + try: + return json.loads(body) + except json.JSONDecodeError: + raise RuntimeError(f"Agent API returned non-JSON response:\n{body[:500]}") + + +# --------------------------------------------------------------------------- +# kubectl helpers +# --------------------------------------------------------------------------- + + +def _KubectlCmd(args): + """Build a kubectl command list, optionally injecting --kubeconfig.""" + cmd = ["kubectl"] + if FLAGS.gke_kubeconfig: + cmd += ["--kubeconfig", FLAGS.gke_kubeconfig] + return cmd + list(args) + + +def RunKubectl(args, timeout=120, raise_on_failure=True): + """Run a kubectl command and return (stdout, stderr, retcode).""" + cmd = _KubectlCmd(args) + proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) + if raise_on_failure and proc.returncode != 0: + raise RuntimeError( + f"kubectl failed (rc={proc.returncode}): {proc.stderr}" + ) + return proc.stdout, proc.stderr, proc.returncode + + +def CountPods(namespace, label, phase=None): + """Count pods matching label (and optionally phase).""" + cmd = ["get", "pods", "-n", namespace, "-l", label, "-o", "name"] + if phase: + cmd += [f"--field-selector=status.phase={phase}"] + stdout, _, rc = RunKubectl(cmd, raise_on_failure=False) + if rc != 0 or not stdout: + return 0 + return len(stdout.strip().splitlines()) + + +def PatchWarmPool(namespace, warmpool_name, replicas, label, wait_timeout=180): + """Patch SandboxWarmPool replicas and wait for pods to be ready.""" + logging.info("Patching %s replicas -> %d", warmpool_name, replicas) + patch_json = json.dumps({"spec": {"replicas": replicas}}) + RunKubectl([ + "patch", "sandboxwarmpool", warmpool_name, + "-n", namespace, "--type=merge", f"-p={patch_json}", + ]) + if replicas == 0: + return True + deadline = time.time() + wait_timeout + while time.time() < deadline: + running = CountPods(namespace, label, phase="Running") + logging.info("%d/%d warm pool pods Running", running, replicas) + if running >= replicas: + return True + time.sleep(3) + logging.warning("Timed out waiting for %d warm pool pods", replicas) + return False + + +def DrainWarmPool(namespace, warmpool_name, label, timeout=120): + """Scale warm pool to 0 and wait for all pods to terminate.""" + logging.info("Draining warm pool %s to 0", warmpool_name) + patch_json = json.dumps({"spec": {"replicas": 0}}) + RunKubectl([ + "patch", "sandboxwarmpool", warmpool_name, + "-n", namespace, "--type=merge", f"-p={patch_json}", + ], raise_on_failure=False) + deadline = time.time() + timeout + while time.time() < deadline: + remaining = CountPods(namespace, label) + if remaining == 0: + logging.info("Warm pool drained successfully") + return True + logging.info("Draining... %d pods remaining", remaining) + time.sleep(3) + logging.warning("Drain timed out, %d pods still present", + CountPods(namespace, label)) + return False + + +# --------------------------------------------------------------------------- +# Sample construction +# --------------------------------------------------------------------------- + + +def BuildMetadata(namespace, extra=None): + """Construct the common metadata dict for all samples.""" + metadata = { + "namespace": namespace, + "gvisor": FLAGS.gke_gvisor, + } + if FLAGS.gke_machine_type: + metadata["machine_type"] = FLAGS.gke_machine_type + if FLAGS.gke_note: + metadata["note"] = FLAGS.gke_note + if extra: + metadata.update(extra) + return metadata + + +def MakeSample(metric, value, unit, namespace, extra_metadata=None): + """Create a single sample.Sample with standard metadata.""" + return sample.Sample( + metric=metric, + value=value, + unit=unit, + metadata=BuildMetadata(namespace, extra_metadata), + ) + + +# --------------------------------------------------------------------------- +# Port-forward flags +# --------------------------------------------------------------------------- + +flags.DEFINE_bool( + "gke_auto_portforward", + True, + "Automatically manage kubectl port-forward to the agent service.", +) + +flags.DEFINE_integer( + "gke_portforward_local_port", + 8080, + "Local port for kubectl port-forward.", +) + +flags.DEFINE_integer( + "gke_portforward_remote_port", + 80, + "Remote service port for kubectl port-forward.", +) + +flags.DEFINE_string( + "gke_portforward_service", + "svc/adk-agent", + "Kubernetes service to port-forward to.", +) + +flags.DEFINE_float( + "gke_portforward_reconnect_delay", + 1.0, + "Seconds to wait before reconnecting after port-forward drops.", +) + +flags.DEFINE_float( + "gke_portforward_health_timeout", + 30.0, + "Seconds to wait for agent health check after starting port-forward.", +) + + +# --------------------------------------------------------------------------- +# Port-forward manager +# --------------------------------------------------------------------------- + +import atexit +import os as _os +import signal +import threading + + +_PID_FILE = "/tmp/pkb_portforward.pid" + + +class _PortForwardManager: + """Manages a kubectl port-forward subprocess with auto-reconnect. + + Mimics the shell pattern: + while true; do + kubectl port-forward svc/adk-agent -n agentic 8080:80 + echo "Reconnecting..." + sleep 1 + done + + Thread-safe. Idempotent start/stop. Cleans up orphans via PID file. + """ + + def __init__(self): + self._proc = None + self._thread = None + self._stop_event = threading.Event() + self._lock = threading.Lock() + self._started = False + + @property + def is_running(self): + return self._started and not self._stop_event.is_set() + + def start(self): + """Start the port-forward loop (idempotent).""" + with self._lock: + if self._started and not self._stop_event.is_set(): + if self._proc and self._proc.poll() is None: + return + return + + self._kill_orphan() + self._stop_event.clear() + self._started = True + self._thread = threading.Thread( + target=self._loop, daemon=True, name="pkb-portforward" + ) + self._thread.start() + + def stop(self): + """Stop the port-forward loop and kill the subprocess.""" + with self._lock: + if not self._started: + return + self._stop_event.set() + self._kill_proc() + self._started = False + self._cleanup_pid_file() + + def _loop(self): + """Background reconnect loop.""" + ns = FLAGS.gke_namespace + svc = FLAGS.gke_portforward_service + local_port = FLAGS.gke_portforward_local_port + remote_port = FLAGS.gke_portforward_remote_port + delay = FLAGS.gke_portforward_reconnect_delay + + cmd = ["kubectl"] + if FLAGS.gke_kubeconfig: + cmd += ["--kubeconfig", FLAGS.gke_kubeconfig] + cmd += [ + "port-forward", svc, + "-n", ns, + f"{local_port}:{remote_port}", + ] + + while not self._stop_event.is_set(): + logging.info("Starting port-forward: %s", " ".join(cmd)) + try: + self._proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + self._write_pid_file(self._proc.pid) + + while not self._stop_event.is_set(): + retcode = self._proc.poll() + if retcode is not None: + break + self._stop_event.wait(timeout=0.5) + + except Exception as e: + logging.warning("Port-forward error: %s", e) + + if not self._stop_event.is_set(): + logging.info( + "Port-forward disconnected. Reconnecting in %.1fs...", delay + ) + self._stop_event.wait(timeout=delay) + + def _kill_proc(self): + """Kill the current subprocess if alive.""" + if self._proc and self._proc.poll() is None: + try: + self._proc.terminate() + self._proc.wait(timeout=5) + except Exception: + try: + self._proc.kill() + except Exception: + pass + self._proc = None + + def _write_pid_file(self, pid): + """Write PID to file for orphan detection.""" + try: + with open(_PID_FILE, "w") as f: + f.write(str(pid)) + except Exception: + pass + + def _cleanup_pid_file(self): + """Remove PID file.""" + try: + _os.unlink(_PID_FILE) + except OSError: + pass + + def _kill_orphan(self): + """Kill a port-forward process left by a previous PKB run.""" + try: + if _os.path.exists(_PID_FILE): + with open(_PID_FILE, "r") as f: + pid = int(f.read().strip()) + logging.info("Killing orphan port-forward (PID %d)", pid) + _os.kill(pid, signal.SIGTERM) + import time as _time + _time.sleep(0.5) + try: + _os.kill(pid, signal.SIGKILL) + except OSError: + pass + self._cleanup_pid_file() + except (OSError, ValueError): + self._cleanup_pid_file() + + local_port = FLAGS.gke_portforward_local_port + try: + result = subprocess.run( + ["lsof", "-ti", f":{local_port}"], + capture_output=True, text=True, timeout=5, + ) + if result.returncode == 0 and result.stdout.strip(): + for pid_str in result.stdout.strip().split(): + try: + pid = int(pid_str) + _os.kill(pid, signal.SIGTERM) + logging.info("Killed process %d on port %d", pid, local_port) + except (OSError, ValueError): + pass + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + +# Singleton instance +_port_forward_manager = _PortForwardManager() + +# Ensure cleanup on interpreter exit +atexit.register(_port_forward_manager.stop) + + +def EnsurePortForward(): + """Start port-forward if auto_portforward is enabled (idempotent). + + Blocks until the agent health check passes or timeout is reached. + Safe to call multiple times - only starts one background loop. + """ + if not FLAGS.gke_auto_portforward: + logging.info("Auto port-forward disabled (--gke_auto_portforward=false)") + return + + _port_forward_manager.start() + + import time as _time + timeout = FLAGS.gke_portforward_health_timeout + deadline = _time.time() + timeout + api_url = GetAgentApiUrl() + + while _time.time() < deadline: + try: + req = urllib.request.Request(f"{api_url}/healthz") + with urllib.request.urlopen(req, timeout=3) as resp: + logging.info("Port-forward healthy: %s", resp.read().decode()) + return + except Exception: + _time.sleep(1) + + logging.warning( + "Port-forward health check did not pass within %.0fs. " + "Continuing anyway (Run() will fail if agent is unreachable).", + timeout, + ) + + +def StopPortForward(): + """Stop the port-forward subprocess and clean up.""" + _port_forward_manager.stop() + logging.info("Port-forward stopped.") diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py new file mode 100644 index 0000000000..0da929cbbd --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py @@ -0,0 +1,280 @@ +"""PKB Benchmark: GKE Agent Chromium Density Saturation (Use Case C). + +Atomic single-point measurement of Chromium browser sandbox density on a +pre-provisioned GKE cluster with gVisor isolation. Measures interaction +latency, screenshot generation time, cold start, navigation, evaluation, +fill, click latencies, and RSS memory at a given concurrent session count. + +This benchmark is designed to be invoked repeatedly by an external sweep +controller that varies the density parameter across iterations to find +the saturation point. + +Usage: + python pkb.py --benchmarks=gke_chromium_density \\ + --gke_chromium_density=4 \\ + --gke_chromium_density_task_count=10 \\ + --gke_chromium_density_warmup_tasks=5 \\ + --gke_namespace=agentic \\ + --gke_api_url=http://localhost:8080 + +Samples emitted (per run): + - gke_chromium_density_interaction_mean (ms) + - gke_chromium_density_interaction_p95 (ms) + - gke_chromium_density_navigate_mean (ms) + - gke_chromium_density_navigate_p95 (ms) + - gke_chromium_density_evaluate_mean (ms) + - gke_chromium_density_evaluate_p95 (ms) + - gke_chromium_density_fill_mean (ms) + - gke_chromium_density_fill_p95 (ms) + - gke_chromium_density_click_mean (ms) + - gke_chromium_density_click_p95 (ms) + - gke_chromium_density_screenshot_mean (ms) + - gke_chromium_density_screenshot_p95 (ms) + - gke_chromium_density_cold_start_mean (ms) + - gke_chromium_density_cold_start_p95 (ms) + - gke_chromium_density_rss_end (MB) + - gke_chromium_density_rss_growth (MB) + - gke_chromium_density_wall_time (seconds) +""" + +import logging +import time + +from absl import flags +from perfkitbenchmarker import configs +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_benchmark_utils as utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_deploy_utils as deploy_utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = "gke_chromium_density" +BENCHMARK_CONFIG = """ +gke_chromium_density: + description: > + Atomic single-point Chromium browser sandbox density measurement on a + pre-provisioned GKE cluster with gVisor isolation. +""" + +_WARMPOOL_NAME = "chromium-sandbox-warmpool" +_WARMPOOL_LABEL = "sandbox=chromium-sandbox-example" + +# --------------------------------------------------------------------------- +# Benchmark-specific flags +# --------------------------------------------------------------------------- + +flags.DEFINE_integer( + "gke_chromium_density", + 1, + "Number of concurrent Chromium browser sessions to run.", +) + +flags.DEFINE_integer( + "gke_chromium_density_task_count", + 10, + "Number of browser task iterations per Chromium session.", +) + +flags.DEFINE_integer( + "gke_chromium_density_warmup_tasks", + 5, + "Number of warmup iterations per session (excluded from stats).", +) + +flags.DEFINE_bool( + "gke_chromium_density_patch_warmpool", + True, + "Patch SandboxWarmPool replicas to match density before measurement.", +) + +flags.DEFINE_integer( + "gke_chromium_density_exec_timeout", + 120, + "Sandbox command execution timeout in seconds.", +) + +flags.DEFINE_integer( + "gke_chromium_density_provision_timeout", + 300, + "Max seconds to wait for warm pool pods to reach Running.", +) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def Provision(benchmark_spec): + """Provision GKE cluster and all dependencies.""" + gke_provision_utils.Provision() + + +def GetConfig(user_config): + """Load and return benchmark config. + + No vm_groups — PKB skips Provision() and Teardown(). + """ + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """Deploy workloads and verify agent API.""" + logging.info("=== Prepare: deploying workloads ===") + deploy_utils.DeployWorkloads() + utils.CheckAgentHealthz(required=False) + utils.EnsurePortForward() + logging.info("Prepare complete.") + + +def Run(benchmark_spec): + """Execute a single Chromium density measurement and return samples. + + Returns: + List of sample.Sample objects. + """ + ns = FLAGS.gke_namespace + density = FLAGS.gke_chromium_density + + logging.info("=== Run: chromium_density=%d ===", density) + + # Ensure port-forward is active (needed when sweeps skip Prepare) + utils.EnsurePortForward() + + # Patch warm pool (moved from Prepare for sweep compatibility) + if FLAGS.gke_chromium_density_patch_warmpool: + utils.PatchWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + replicas=density, + label=_WARMPOOL_LABEL, + wait_timeout=FLAGS.gke_chromium_density_provision_timeout, + ) + + # POST to agent API + payload = { + "task_count": FLAGS.gke_chromium_density_task_count, + "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks, + "concurrent_sessions": density, + "sandbox_exec_timeout_s": FLAGS.gke_chromium_density_exec_timeout, + } + + t0 = time.time() + result = utils.CallAgentApi("/benchmark/chromium/density", payload) + wall_time = time.time() - t0 + + successful = result.get("successful_sessions", 0) + failed = result.get("failed_sessions", 0) + agg = result.get("aggregate", {}) + + logging.info( + "API response: %d successful, %d failed sessions (%.1fs)", + successful, + failed, + wall_time, + ) + + # Build samples + extra = { + "density": density, + "successful_sessions": successful, + "failed_sessions": failed, + "task_count": FLAGS.gke_chromium_density_task_count, + "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks, + "wall_time_s": round(wall_time, 2), + } + + samples = [] + + # Per-task-type latency: mean and P95 for each + _emit(samples, agg, "interaction_mean_ms", "interaction_mean", "ms", ns, extra) + _emit(samples, agg, "interaction_p95_ms", "interaction_p95", "ms", ns, extra) + _emit(samples, agg, "navigate_mean_ms", "navigate_mean", "ms", ns, extra) + _emit(samples, agg, "navigate_p95_ms", "navigate_p95", "ms", ns, extra) + _emit(samples, agg, "evaluate_mean_ms", "evaluate_mean", "ms", ns, extra) + _emit(samples, agg, "evaluate_p95_ms", "evaluate_p95", "ms", ns, extra) + _emit(samples, agg, "fill_mean_ms", "fill_mean", "ms", ns, extra) + _emit(samples, agg, "fill_p95_ms", "fill_p95", "ms", ns, extra) + _emit(samples, agg, "click_mean_ms", "click_mean", "ms", ns, extra) + _emit(samples, agg, "click_p95_ms", "click_p95", "ms", ns, extra) + _emit(samples, agg, "screenshot_mean_ms", "screenshot_mean", "ms", ns, extra) + _emit(samples, agg, "screenshot_p95_ms", "screenshot_p95", "ms", ns, extra) + _emit(samples, agg, "cold_start_mean_ms", "cold_start_mean", "ms", ns, extra) + _emit(samples, agg, "cold_start_p95_ms", "cold_start_p95", "ms", ns, extra) + + # RSS memory + _emit(samples, agg, "rss_end_mb", "rss_end", "MB", ns, extra) + _emit(samples, agg, "rss_growth_mb", "rss_growth", "MB", ns, extra) + + # Wall time + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + + logging.info("Emitted %d samples for chromium_density=%d.", len(samples), density) + return samples + + +def Cleanup(benchmark_spec): + """Clean up after measurement. Delete claims and drain warm pool.""" + ns = FLAGS.gke_namespace + logging.info("Cleanup: deleting SandboxClaims and draining warm pool.") + + # Delete any lingering SandboxClaims to release claimed pods + utils.RunKubectl( + [ + "delete", + "sandboxclaims", + "--all", + "-n", + ns, + "--ignore-not-found=true", + ], + timeout=60, + raise_on_failure=False, + ) + + # Drain warm pool to 0 + utils.DrainWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + label=_WARMPOOL_LABEL, + ) + + utils.StopPortForward() + logging.info("Cleanup complete (cluster persists).") + + +def Teardown(benchmark_spec): + """Teardown GKE cluster and all dependencies.""" + gke_provision_utils.Teardown() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra): + """Emit a sample if the key exists in the aggregate dict.""" + value = agg.get(agg_key) + if value is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_{metric_suffix}", + value, + unit, + namespace, + extra, + ) + ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py new file mode 100644 index 0000000000..cd12169fcd --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py @@ -0,0 +1,518 @@ +"""PKB Benchmark: GKE Agent Deletion & Cleanup (Use Case G). + +Atomic single-point measurement of bulk deletion efficiency and IP +reclamation on a pre-provisioned GKE cluster with gVisor isolation. +Provisions N sandbox pods via SandboxWarmPool, then bulk-deletes them +and measures per-pod deletion latency, aggregate deletion stats, and +IP address reclamation timing. + +This benchmark is designed to be invoked repeatedly by an external sweep +controller that varies the batch_size parameter across iterations to find +the deletion saturation point. + +Usage: + python pkb.py --benchmarks=gke_deletion \\ + --gke_deletion_batch_size=100 \\ + --gke_deletion_warmpool_name=python-sandbox-warmpool \\ + --gke_deletion_pod_label=sandbox=python-sandbox-example \\ + --gke_deletion_poll_interval_s=1.0 \\ + --gke_deletion_provision_timeout_s=120.0 \\ + --gke_deletion_drain_timeout_s=300.0 \\ + --gke_namespace=agentic \\ + --gke_machine_type=c4-standard-8 + +Samples emitted (per run): + - gke_deletion_provision_time (seconds) + - gke_deletion_total_drain_time (seconds) + - gke_deletion_latency_p50 (seconds) + - gke_deletion_latency_p95 (seconds) + - gke_deletion_latency_p99 (seconds) + - gke_deletion_latency_max (seconds) + - gke_deletion_rate (pods/sec) + - gke_deletion_ip_before (count) + - gke_deletion_ip_after (count) + - gke_deletion_ip_reclaim_time (seconds) + - gke_deletion_final_running_count (count) + - gke_deletion_wall_time (seconds) +""" + +import json +import logging +import time + +from absl import flags +from perfkitbenchmarker import configs +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_benchmark_utils as utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_deploy_utils as deploy_utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = "gke_deletion" +BENCHMARK_CONFIG = """ +gke_deletion: + description: > + Atomic single-point bulk deletion and IP reclamation measurement on a + pre-provisioned GKE cluster with gVisor isolation. +""" + +# --------------------------------------------------------------------------- +# Benchmark-specific flags +# --------------------------------------------------------------------------- + +flags.DEFINE_integer( + "gke_deletion_batch_size", + 100, + "Number of sandbox pods to provision then bulk-delete.", +) + +flags.DEFINE_string( + "gke_deletion_warmpool_name", + "python-sandbox-warmpool", + "SandboxWarmPool resource name.", +) + +flags.DEFINE_string( + "gke_deletion_pod_label", + "sandbox=python-sandbox-example", + "Label selector for warm pool pods.", +) + +flags.DEFINE_float( + "gke_deletion_poll_interval_s", + 1.0, + "Seconds between kubectl polls during deletion.", +) + +flags.DEFINE_float( + "gke_deletion_provision_timeout_s", + 120.0, + "Max seconds to wait for pods to reach Running before deletion.", +) + +flags.DEFINE_float( + "gke_deletion_drain_timeout_s", + 300.0, + "Max seconds to wait for all pods to terminate after scale-to-0.", +) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def Provision(benchmark_spec): + """Provision GKE cluster and all dependencies.""" + gke_provision_utils.Provision() + + +def GetConfig(user_config): + """Load and return benchmark config. + + No vm_groups — PKB skips Provision() and Teardown(). + """ + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """Deploy workloads onto the cluster.""" + logging.info("=== Prepare: deploying workloads ===") + deploy_utils.DeployWorkloads() + utils.EnsurePortForward() + logging.info("Prepare complete.") + + +def Run(benchmark_spec): + """Provision N pods, bulk-delete, measure deletion latency and IP reclamation. + + Returns: + List of sample.Sample objects. + """ + ns = FLAGS.gke_namespace + batch_size = FLAGS.gke_deletion_batch_size + warmpool_name = FLAGS.gke_deletion_warmpool_name + label = FLAGS.gke_deletion_pod_label + poll_interval = FLAGS.gke_deletion_poll_interval_s + provision_timeout = FLAGS.gke_deletion_provision_timeout_s + drain_timeout = FLAGS.gke_deletion_drain_timeout_s + + logging.info("=== Run: batch_size=%d ===", batch_size) + + # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility) + _DrainPool(ns, warmpool_name, label, drain_timeout) + time.sleep(2) + + t_wall_start = time.time() + + # 1. Provision N pods + logging.info("Provisioning %d pods...", batch_size) + provision_start = time.time() + _PatchReplicas(ns, warmpool_name, batch_size) + + deadline = time.time() + provision_timeout + while time.time() < deadline: + running = utils.CountPods(ns, label, phase="Running") + pct = (running / batch_size * 100) if batch_size > 0 else 0 + logging.info("Provisioning... %d/%d (%.0f%%)", running, batch_size, pct) + if running >= batch_size: + break + time.sleep(3) + + provision_time = time.time() - provision_start + final_running = utils.CountPods(ns, label, phase="Running") + + logging.info( + "Provisioned %d/%d pods in %.1fs", + final_running, + batch_size, + provision_time, + ) + + # If not all pods reached Running, this is a failure + if final_running < batch_size: + raise RuntimeError( + f"Provisioning failed: only {final_running}/{batch_size} pods " + f"reached Running within {provision_timeout}s" + ) + + # 2. Record pod names and IP count before deletion + pod_names_before = set(_GetPodNames(ns, label)) + ip_before = _CountAllocatedIPs(ns, label) + + logging.info( + "Recorded %d pods, %d IPs allocated", + len(pod_names_before), + ip_before, + ) + + # Brief settle + time.sleep(1) + + # 3. Bulk delete: scale to 0 + logging.info("Scaling to 0 (bulk delete of %d pods)...", len(pod_names_before)) + _PatchReplicas(ns, warmpool_name, 0) + + # 4. Poll: track pod disappearance and IP reclamation + t_delete = time.time() + deadline_drain = t_delete + drain_timeout + pod_gone_times = {} # pod_name -> elapsed_s when first absent + ip_reclaim_time = None + + while time.time() < deadline_drain: + elapsed = time.time() - t_delete + + # Current pod names still present + current_pods = set(_GetPodNames(ns, label)) + remaining = len(current_pods) + + # Track which pods have disappeared + gone_now = pod_names_before - current_pods + for pn in gone_now: + if pn not in pod_gone_times: + pod_gone_times[pn] = elapsed + + # IP count (scoped to warm pool label) + ips = _CountAllocatedIPs(ns, label) + if ip_reclaim_time is None and ips == 0: + ip_reclaim_time = elapsed + + deleted = len(pod_names_before) - remaining + pct = (deleted / len(pod_names_before) * 100) if pod_names_before else 0 + logging.info( + "[%.1fs] Deleted: %d/%d (%.0f%%) IPs: %d", + elapsed, + deleted, + len(pod_names_before), + pct, + ips, + ) + + if remaining == 0: + break + + time.sleep(poll_interval) + + total_drain_time = time.time() - t_delete + + # Pods we never saw disappear (stuck) get the full drain time + for pn in pod_names_before: + if pn not in pod_gone_times: + pod_gone_times[pn] = total_drain_time + + # 5. Compute per-pod deletion latencies + deletion_latencies = sorted(pod_gone_times.values()) + n = len(deletion_latencies) + + ip_after = _CountAllocatedIPs(ns, label) + deletion_rate = ( + (len(pod_names_before) / total_drain_time) if total_drain_time > 0 else 0 + ) + + logging.info( + "Drain complete: %.1fs, rate=%.1f pods/sec, IPs: %d->%d", + total_drain_time, + deletion_rate, + ip_before, + ip_after, + ) + + wall_time = time.time() - t_wall_start + + # 6. Build samples + extra = { + "batch_size": batch_size, + "final_running_count": final_running, + "ip_before": ip_before, + "ip_after": ip_after, + "wall_time_s": round(wall_time, 2), + } + + samples = [] + + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_provision_time", + round(provision_time, 2), + "seconds", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_total_drain_time", + round(total_drain_time, 2), + "seconds", + ns, + extra, + ) + ) + + if n > 0: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_latency_p50", + round(_Percentile(deletion_latencies, 50), 3), + "seconds", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_latency_p95", + round(_Percentile(deletion_latencies, 95), 3), + "seconds", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_latency_p99", + round(_Percentile(deletion_latencies, 99), 3), + "seconds", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_latency_max", + round(deletion_latencies[-1], 3), + "seconds", + ns, + extra, + ) + ) + + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_rate", + round(deletion_rate, 2), + "pods/sec", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ip_before", + float(ip_before), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ip_after", + float(ip_after), + "count", + ns, + extra, + ) + ) + + if ip_reclaim_time is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ip_reclaim_time", + round(ip_reclaim_time, 2), + "seconds", + ns, + extra, + ) + ) + + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_final_running_count", + float(final_running), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + + logging.info("Emitted %d samples for batch_size=%d.", len(samples), batch_size) + return samples + + +def Cleanup(benchmark_spec): + """Best-effort drain of warm pool after measurement.""" + ns = FLAGS.gke_namespace + warmpool_name = FLAGS.gke_deletion_warmpool_name + label = FLAGS.gke_deletion_pod_label + + logging.info("Cleanup: draining warm pool to 0.") + _DrainPool(ns, warmpool_name, label, FLAGS.gke_deletion_drain_timeout_s) + utils.StopPortForward() + logging.info("Cleanup complete.") + + +def Teardown(benchmark_spec): + """Teardown GKE cluster and all dependencies.""" + gke_provision_utils.Teardown() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _PatchReplicas(namespace, warmpool_name, replicas): + """Patch SandboxWarmPool to a specific replica count.""" + patch_json = json.dumps({"spec": {"replicas": replicas}}) + utils.RunKubectl( + [ + "patch", + "sandboxwarmpool", + warmpool_name, + "-n", + namespace, + "--type=merge", + f"-p={patch_json}", + ], + raise_on_failure=False, + ) + + +def _DrainPool(namespace, warmpool_name, label, timeout_s): + """Scale pool to 0 and wait for all pods to terminate.""" + _PatchReplicas(namespace, warmpool_name, 0) + + # Delete any lingering SandboxClaims + utils.RunKubectl( + [ + "delete", + "sandboxclaims", + "--all", + "-n", + namespace, + "--ignore-not-found=true", + ], + timeout=60, + raise_on_failure=False, + ) + + t0 = time.time() + while time.time() - t0 < timeout_s: + remaining = utils.CountPods(namespace, label) + if remaining == 0: + logging.info("Pool drained in %.1fs", time.time() - t0) + return + time.sleep(2) + + logging.warning("Drain timed out after %.0fs", timeout_s) + + +def _GetPodNames(namespace, label): + """Return list of pod names matching the label selector.""" + stdout, _, rc = utils.RunKubectl( + [ + "get", + "pods", + "-n", + namespace, + "-l", + label, + "-o", + "jsonpath={.items[*].metadata.name}", + ], + timeout=30, + raise_on_failure=False, + ) + if rc != 0 or not stdout: + return [] + return stdout.split() + + +def _CountAllocatedIPs(namespace, label): + """Count pod IPs currently allocated for pods matching the label. + + Scoped to the warm pool label to accurately measure IPAM release + efficiency for the specific pods being deleted. + """ + stdout, _, rc = utils.RunKubectl( + [ + "get", + "pods", + "-n", + namespace, + "-l", + label, + "-o", + "jsonpath={.items[*].status.podIP}", + ], + timeout=30, + raise_on_failure=False, + ) + if rc != 0 or not stdout: + return 0 + return len([ip for ip in stdout.split() if ip]) + + +def _Percentile(sorted_values, pct): + """Calculate percentile (0-100) with linear interpolation.""" + if not sorted_values: + return 0.0 + idx = (pct / 100) * (len(sorted_values) - 1) + lo = int(idx) + hi = min(lo + 1, len(sorted_values) - 1) + frac = idx - lo + return sorted_values[lo] * (1 - frac) + sorted_values[hi] * frac diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py new file mode 100644 index 0000000000..ff35f2e92e --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py @@ -0,0 +1,891 @@ +"""Shared workload deployment utilities for GKE Agent Sandbox benchmarks. + +Provides idempotent functions to deploy the Agent Sandbox ecosystem +(CRDs, templates, warm pools, router, ADK agent, PSI reader) onto a +pre-provisioned GKE cluster. Called by each benchmark's Prepare() stage. + +All functions are idempotent -- safe to call repeatedly without side effects. +""" + +import json +import logging +import os +import subprocess +import time + +from absl import flags + +FLAGS = flags.FLAGS + +# --------------------------------------------------------------------------- +# Flags (registered once; shared across all benchmarks) +# --------------------------------------------------------------------------- + +flags.DEFINE_string( + "gke_sandbox_version", + "v0.4.6", + "Agent Sandbox controller version (GitHub release tag).", +) + +flags.DEFINE_string( + "gke_sandbox_router_image", + "", + "Sandbox router container image. If empty, router deployment is skipped.", +) + +flags.DEFINE_string( + "gke_adk_image", + "", + "ADK agent container image. If empty, agent deployment is skipped.", +) + +flags.DEFINE_string( + "gke_chromium_image", + "", + "Chromium sandbox container image. If empty, uses placeholder.", +) + +flags.DEFINE_integer( + "gke_warmpool_replicas", + 2, + "Default warm pool replica count for SandboxWarmPool resources.", +) + +flags.DEFINE_integer( + "gke_chromium_replicas", + 1, + "Default Chromium warm pool replica count.", +) + +flags.DEFINE_string( + "gke_python_image", + "registry.k8s.io/agent-sandbox/python-runtime-sandbox:v0.1.0", + "Python runtime sandbox container image.", +) + +flags.DEFINE_integer( + "gke_deploy_timeout", + 120, + "Timeout in seconds for workload deployment rollout.", +) + +flags.DEFINE_string( + "gke_cluster_name", + "", + "GKE cluster name. Used in ADK agent env vars for Workload Identity.", +) + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Image path auto-derivation and mode-aware scheduling +# (Insert this block BEFORE the "def DeployWorkloads():" function) +# --------------------------------------------------------------------------- + + +def _DeriveImagePaths(): + """Auto-derive container image paths from project/region/machine_type. + + When --gke_adk_image or --gke_sandbox_router_image are empty, + derives them from --gke_project_id, --gke_region, and + --gke_sandbox_machine_type using the same convention as + gke_image_build_utils.py and the bash build scripts. + """ + project = getattr(FLAGS, "gke_project_id", "") or "" + region = getattr(FLAGS, "gke_region", "") or "" + machine_type = getattr(FLAGS, "gke_sandbox_machine_type", "") or "" + + if not project or not region: + logging.info("Cannot auto-derive images: project=%s region=%s", project, region) + return + + machine_family = machine_type.split("-")[0] if machine_type else "c4" + target_arch = "arm64" if machine_family == "c4a" else "amd64" + + if not FLAGS.gke_adk_image: + FLAGS.gke_adk_image = "{}-docker.pkg.dev/{}/adk-repo/adk-agent:{}".format( + region, project, target_arch + ) + logging.info("Auto-derived gke_adk_image: %s", FLAGS.gke_adk_image) + + if not FLAGS.gke_sandbox_router_image: + FLAGS.gke_sandbox_router_image = ( + "{}-docker.pkg.dev/{}/agent-sandbox/sandbox-router:{}".format( + region, project, target_arch + ) + ) + logging.info( + "Auto-derived gke_sandbox_router_image: %s", + FLAGS.gke_sandbox_router_image, + ) + + if not FLAGS.gke_chromium_image: + FLAGS.gke_chromium_image = ( + "{}-docker.pkg.dev/{}/agent-sandbox/chrome-sandbox:{}".format( + region, project, target_arch + ) + ) + logging.info( + "Auto-derived gke_chromium_image: %s", FLAGS.gke_chromium_image + ) + + if not FLAGS.gke_cluster_name: + import os as _os + + user_prefix = _os.environ.get("USER", "pkb").split(".")[0] + suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"} + cluster_suffix = suffix_map.get(machine_family, machine_family) + FLAGS.gke_cluster_name = "{}-agentic-{}".format( + user_prefix, cluster_suffix + ) + logging.info( + "Auto-derived gke_cluster_name: %s", FLAGS.gke_cluster_name + ) + + +def _GetSandboxNodeSelector(): + """Return the correct nodeSelector dict based on provisioning mode. + + - native mode: PKB auto-labels nodes with pkb_nodepool= + - custom mode: bash scripts label nodes with dedicated=agentic-sandbox + """ + try: + mode = FLAGS.gke_provision_mode + except (AttributeError, KeyError): + mode = "custom" + if mode == "native": + return {"pkb_nodepool": "sandbox"} + return {"dedicated": "agentic-sandbox"} + + +def _GetSandboxTolerations(): + """Return tolerations list based on provisioning mode. + + Both modes need the gVisor toleration (auto-applied by GKE to sandbox pools). + Custom mode additionally needs the dedicated=agentic-sandbox toleration + (manually applied by setup_infrastructure_gke.sh). + """ + try: + mode = FLAGS.gke_provision_mode + except (AttributeError, KeyError): + mode = "custom" + tolerations = [ + { + "key": "sandbox.gke.io/runtime", + "operator": "Equal", + "value": "gvisor", + "effect": "NoSchedule", + }, + ] + if mode != "native": + tolerations.insert( + 0, + { + "key": "dedicated", + "operator": "Equal", + "value": "agentic-sandbox", + "effect": "NoSchedule", + }, + ) + return tolerations + + +def _NodeSelectorYaml(indent=6): + """Generate nodeSelector YAML block for embedding in manifests.""" + selector = _GetSandboxNodeSelector() + spaces = " " * indent + lines = ["{}nodeSelector:".format(spaces)] + for k, v in selector.items(): + lines.append("{} {}: {}".format(spaces, k, v)) + return "\n".join(lines) + + +def _TolerationsYaml(indent=6): + """Generate tolerations YAML block for embedding in manifests.""" + tolerations = _GetSandboxTolerations() + spaces = " " * indent + lines = ["{}tolerations:".format(spaces)] + for t in tolerations: + lines.append('{} - key: "{}"'.format(spaces, t["key"])) + lines.append('{} operator: "{}"'.format(spaces, t["operator"])) + lines.append('{} value: "{}"'.format(spaces, t["value"])) + lines.append('{} effect: "{}"'.format(spaces, t["effect"])) + return "\n".join(lines) + + +def DeployWorkloads(): + """Deploy the full Agent Sandbox ecosystem onto the GKE cluster. + + Idempotent: safe to call repeatedly. Sequence: + 1. Create namespace + 2. Install Agent Sandbox CRDs + 3. Deploy SandboxTemplates + WarmPools + 4. Deploy Sandbox Router + 5. Deploy ADK Agent (Deployment + Service + RBAC) + 6. Deploy PSI Reader DaemonSet + 7. Wait for ADK Agent rollout + """ + _DeriveImagePaths() + ns = FLAGS.gke_namespace + logging.info("=== DeployWorkloads: namespace=%s ===", ns) + + _CreateNamespace(ns) + _InstallCRDs() + _DeploySandboxTemplates(ns) + _DeploySandboxRouter(ns) + _DeployADKAgent(ns) + _DeployPSIReader(ns) + _WaitForAgentReady(ns) + + logging.info("DeployWorkloads complete.") + + +def DeploySnapshots(): + """Deploy Pod Snapshot infrastructure (UC-A only). + + Idempotent: safe to call repeatedly. Sequence: + 1. Create GCS bucket (hierarchical namespace) + 2. Create managed folder + 3. Create KSA for snapshots + 4. Bind IAM roles + 5. Deploy PodSnapshotStorageConfig + PodSnapshotPolicy + """ + ns = FLAGS.gke_namespace + project = FLAGS.gke_project_id + region = FLAGS.gke_region + + if not project: + logging.warning("DeploySnapshots: gke_project_id not set, skipping.") + return + + bucket_name = "agent-sandbox-snapshots-{}".format(project) + snapshot_folder = "benchmark-snapshots" + ksa_name = "pod-snapshot-sa" + + logging.info("=== DeploySnapshots: bucket=%s ===", bucket_name) + + # 1. Create GCS bucket + _RunCmd( + [ + "gcloud", + "storage", + "buckets", + "create", + "gs://{}".format(bucket_name), + "--uniform-bucket-level-access", + "--enable-hierarchical-namespace", + "--soft-delete-duration=0d", + "--location={}".format(region), + "--project={}".format(project), + ], + check=False, + ) + + # 2. Create managed folder + _RunCmd( + [ + "gcloud", + "storage", + "managed-folders", + "create", + "gs://{}/{}/".format(bucket_name, snapshot_folder), + "--project={}".format(project), + ], + check=False, + ) + + # 3. Create KSA + _RunKubectl( + [ + "create", + "serviceaccount", + ksa_name, + "--namespace", + ns, + ], + check=False, + ) + + # 4. IAM bindings + project_number = _GetProjectNumber(project) + if project_number: + _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name) + + # 5. Deploy PSSC + PSP + _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder) + + logging.info("DeploySnapshots complete.") + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def _RunCmd(cmd, check=True, timeout=120): + """Run a shell command and return (stdout, returncode).""" + logging.info("CMD: %s", " ".join(cmd)) + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=timeout, + ) + if check and proc.returncode != 0: + logging.warning( + "Command failed (rc=%d): %s", proc.returncode, proc.stderr[:500] + ) + return proc.stdout.strip(), proc.returncode + + +def _RunKubectl(args, check=True, timeout=120): + """Run kubectl with optional kubeconfig.""" + cmd = ["kubectl"] + if FLAGS.gke_kubeconfig: + cmd += ["--kubeconfig", FLAGS.gke_kubeconfig] + cmd += list(args) + return _RunCmd(cmd, check=check, timeout=timeout) + + +def _KubectlApply(manifest_str): + """Apply a YAML manifest string via kubectl stdin.""" + cmd = ["kubectl", "apply", "-f", "-"] + if FLAGS.gke_kubeconfig: + cmd = [ + "kubectl", + "--kubeconfig", + FLAGS.gke_kubeconfig, + "apply", + "-f", + "-", + ] + proc = subprocess.run( + cmd, + input=manifest_str, + capture_output=True, + text=True, + timeout=60, + ) + if proc.returncode != 0: + logging.warning("kubectl apply failed: %s", proc.stderr[:500]) + return proc.returncode == 0 + + +def _CreateNamespace(ns): + """Create namespace if it doesn't exist.""" + _RunKubectl(["create", "namespace", ns], check=False) + + +def _InstallCRDs(): + """Install Agent Sandbox CRDs from GitHub release.""" + version = FLAGS.gke_sandbox_version + base_url = ( + "https://github.com/kubernetes-sigs/agent-sandbox" + "/releases/download/{}".format(version) + ) + logging.info("Installing Agent Sandbox CRDs (%s)", version) + _RunKubectl( + [ + "apply", + "-f", + "{}/manifest.yaml".format(base_url), + "-f", + "{}/extensions.yaml".format(base_url), + ], + check=False, + ) + + +def _DeploySandboxTemplates(ns): + """Deploy SandboxTemplate + WarmPool for Python and Chromium.""" + python_image = FLAGS.gke_python_image + chromium_image = FLAGS.gke_chromium_image or "chromium-placeholder:latest" + warmpool_replicas = FLAGS.gke_warmpool_replicas + chromium_replicas = FLAGS.gke_chromium_replicas + + manifest = """--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxTemplate +metadata: + name: python-sandbox-template + namespace: {ns} +spec: + podTemplate: + metadata: + labels: + sandbox: python-sandbox-example + spec: + runtimeClassName: gvisor + containers: + - name: python-runtime + image: {python_image} +{node_selector_yaml} +{tolerations_yaml} + restartPolicy: "OnFailure" +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxWarmPool +metadata: + name: python-sandbox-warmpool + namespace: {ns} +spec: + replicas: {warmpool_replicas} + sandboxTemplateRef: + name: python-sandbox-template +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxTemplate +metadata: + name: chromium-sandbox-template + namespace: {ns} +spec: + podTemplate: + metadata: + labels: + sandbox: chromium-sandbox-example + spec: + runtimeClassName: gvisor + containers: + - name: chromium-runtime + image: {chromium_image} + command: ["/bin/sh", "-c"] + args: + - | + socat TCP-LISTEN:9223,fork,reuseaddr TCP:127.0.0.1:9222 & + exec chromium --headless --no-sandbox --disable-gpu --disable-dev-shm-usage --remote-debugging-port=9222 --no-first-run --disable-field-trial-config --user-data-dir=/tmp/chrome-data about:blank + ports: + - containerPort: 9223 +{node_selector_yaml} +{tolerations_yaml} + restartPolicy: "OnFailure" +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxWarmPool +metadata: + name: chromium-sandbox-warmpool + namespace: {ns} +spec: + replicas: {chromium_replicas} + sandboxTemplateRef: + name: chromium-sandbox-template +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-orchestrator-to-chromium + namespace: {ns} +spec: + podSelector: + matchLabels: + sandbox: chromium-sandbox-example + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: adk-agent + ports: + - protocol: TCP + port: 9223 +""".format( + ns=ns, + python_image=python_image, + chromium_image=chromium_image, + warmpool_replicas=warmpool_replicas, + chromium_replicas=chromium_replicas, + node_selector_yaml=_NodeSelectorYaml(), + tolerations_yaml=_TolerationsYaml(), + ) + _KubectlApply(manifest) + + +def _DeploySandboxRouter(ns): + """Deploy the Sandbox Router Deployment + Service.""" + router_image = FLAGS.gke_sandbox_router_image + if not router_image: + logging.info("Sandbox router image not set, skipping router deployment.") + return + + manifest = """--- +apiVersion: v1 +kind: Service +metadata: + name: sandbox-router-svc + namespace: {ns} +spec: + type: ClusterIP + selector: + app: sandbox-router + ports: + - name: http + protocol: TCP + port: 8080 + targetPort: 8080 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sandbox-router-deployment + namespace: {ns} +spec: + replicas: 2 + selector: + matchLabels: + app: sandbox-router + template: + metadata: + labels: + app: sandbox-router + spec: + serviceAccountName: adk-agent-sa + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: sandbox-router + containers: + - name: router + image: {router_image} + ports: + - containerPort: 8080 + env: + - name: ALLOW_UNAUTHENTICATED_ROUTER + value: "true" + readinessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 10 + resources: + requests: + cpu: "250m" + memory: "512Mi" + limits: + cpu: "1000m" + memory: "1Gi" + securityContext: + runAsUser: 1000 + runAsGroup: 1000 +""".format(ns=ns, router_image=router_image) + _KubectlApply(manifest) + + +def _DeployADKAgent(ns): + """Deploy ADK Agent: SA, ClusterRole, RoleBinding, Deployment, Service.""" + adk_image = FLAGS.gke_adk_image + if not adk_image: + logging.info("ADK agent image not set, skipping agent deployment.") + return + + project = FLAGS.gke_project_id or "" + region = FLAGS.gke_region or "" + cluster = FLAGS.gke_cluster_name or "" + + manifest = """--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: adk-agent-sa + namespace: {ns} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: adk-agent-sandbox-role +rules: + - apiGroups: ["agents.x-k8s.io"] + resources: ["sandboxes"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["agents.x-k8s.io"] + resources: ["sandboxwarmpool", "sandboxwarmpools"] + verbs: ["get", "list", "watch"] + - apiGroups: ["extensions.agents.x-k8s.io"] + resources: ["sandboxclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["pods", "pods/log", "pods/exec", "services", "configmaps"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/portforward"] + verbs: ["create"] + - apiGroups: ["metrics.k8s.io"] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: adk-agent-sandbox-binding + namespace: {ns} +subjects: + - kind: ServiceAccount + name: adk-agent-sa + namespace: {ns} +roleRef: + kind: ClusterRole + name: adk-agent-sandbox-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: adk-agent + namespace: {ns} +spec: + replicas: 1 + selector: + matchLabels: + app: adk-agent + template: + metadata: + labels: + app: adk-agent + spec: + serviceAccountName: adk-agent-sa + containers: + - name: adk-agent + imagePullPolicy: Always + image: {adk_image} + resources: + limits: + memory: "16384Mi" + cpu: "6000m" + requests: + memory: "512Mi" + cpu: "1000m" + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 6 + readinessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + env: + - name: PORT + value: "8080" + - name: GOOGLE_CLOUD_PROJECT + value: "{project}" + - name: GOOGLE_CLOUD_LOCATION + value: "{region}" + - name: GOOGLE_GENAI_USE_VERTEXAI + value: "true" + - name: CLUSTER_NAME + value: "{cluster}" + - name: AGENTIC_NAMESPACE + value: "{ns}" + - name: SANDBOX_ROUTER_URL + value: "http://sandbox-router-svc.{ns}.svc.cluster.local:8080" +--- +apiVersion: v1 +kind: Service +metadata: + name: adk-agent + namespace: {ns} +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 8080 + selector: + app: adk-agent +""".format(ns=ns, adk_image=adk_image, project=project, region=region, cluster=cluster) + _KubectlApply(manifest) + + +def _DeployPSIReader(ns): + """Deploy PSI Reader DaemonSet for cgroup pressure metrics.""" + manifest = """--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: psi-reader + namespace: {ns} + labels: + app: psi-reader +spec: + selector: + matchLabels: + app: psi-reader + template: + metadata: + labels: + app: psi-reader + spec: +{node_selector_yaml} +{tolerations_yaml} + hostPID: true + containers: + - name: reader + image: busybox:1.36 + command: ["sleep", "infinity"] + securityContext: + privileged: true + volumeMounts: + - name: cgroup + mountPath: /host/sys/fs/cgroup + readOnly: true + - name: proc + mountPath: /host/proc + readOnly: true + resources: + requests: + cpu: "10m" + memory: "16Mi" + limits: + cpu: "50m" + memory: "32Mi" + volumes: + - name: cgroup + hostPath: + path: /sys/fs/cgroup + - name: proc + hostPath: + path: /proc +""".format( + ns=ns, + node_selector_yaml=_NodeSelectorYaml(), + tolerations_yaml=_TolerationsYaml(), + ) + _KubectlApply(manifest) + + +def _WaitForAgentReady(ns): + """Wait for ADK agent deployment to be ready.""" + adk_image = FLAGS.gke_adk_image + if not adk_image: + logging.info("ADK agent not deployed, skipping rollout wait.") + return + timeout = FLAGS.gke_deploy_timeout + logging.info("Waiting for adk-agent rollout (timeout=%ds)...", timeout) + _RunKubectl( + [ + "rollout", + "status", + "deployment/adk-agent", + "-n", + ns, + "--timeout={}s".format(timeout), + ], + check=False, + ) + + +def _GetProjectNumber(project): + """Get GCP project number from project ID.""" + stdout, rc = _RunCmd( + [ + "gcloud", + "projects", + "describe", + project, + "--format=value(projectNumber)", + ], + check=False, + ) + return stdout if rc == 0 else None + + +def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name): + """Bind IAM roles for pod snapshot access.""" + # bucketViewer to namespace + _RunCmd( + [ + "gcloud", + "storage", + "buckets", + "add-iam-policy-binding", + "gs://{}".format(bucket_name), + "--member=principalSet://iam.googleapis.com/projects/{}" + "/locations/global/workloadIdentityPools/{}.svc.id.goog" + "/namespace/{}".format(project_number, project, ns), + "--role=roles/storage.bucketViewer", + "--quiet", + ], + check=False, + ) + + # objectAdmin to KSA + _RunCmd( + [ + "gcloud", + "storage", + "buckets", + "add-iam-policy-binding", + "gs://{}".format(bucket_name), + "--member=principal://iam.googleapis.com/projects/{}" + "/locations/global/workloadIdentityPools/{}.svc.id.goog" + "/subject/ns/{}/sa/{}".format(project_number, project, ns, ksa_name), + "--role=roles/storage.objectAdmin", + "--quiet", + ], + check=False, + ) + + # objectUser to GKE snapshot controller + _RunCmd( + [ + "gcloud", + "storage", + "buckets", + "add-iam-policy-binding", + "gs://{}".format(bucket_name), + "--member=serviceAccount:service-{}" + "@container-engine-robot.iam.gserviceaccount.com".format(project_number), + "--role=roles/storage.objectUser", + "--quiet", + ], + check=False, + ) + + +def _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder): + """Deploy PodSnapshotStorageConfig + PodSnapshotPolicy.""" + manifest = """--- +apiVersion: podsnapshot.gke.io/v1 +kind: PodSnapshotStorageConfig +metadata: + name: benchmark-pssc-gcs +spec: + snapshotStorageConfig: + gcs: + bucket: "{bucket_name}" + path: "{snapshot_folder}" +--- +apiVersion: podsnapshot.gke.io/v1 +kind: PodSnapshotPolicy +metadata: + name: benchmark-psp + namespace: {ns} +spec: + storageConfigName: benchmark-pssc-gcs + selector: + matchLabels: + app: snapshot-benchmark-workload + triggerConfig: + type: manual + postCheckpoint: resume +""".format(ns=ns, bucket_name=bucket_name, snapshot_folder=snapshot_folder) + _KubectlApply(manifest) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py new file mode 100644 index 0000000000..38b85b4e11 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py @@ -0,0 +1,403 @@ +"""Shared image build utilities for GKE Agent Sandbox benchmarks. + +Builds and pushes container images (ADK agent, Chrome sandbox, Sandbox Router) +via Google Cloud Build. Called from: + - Provision() when --gke_skip_image_build is False (via BuildImages()) + - prerequisite_setup.py (via build_images_with_config()) + +Images built: + - ADK Agent: perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/ -> {region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{arch} + - Chrome Sandbox: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{arch} + - Sandbox Router: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{arch} +""" + +import logging +import os +import shutil +import subprocess +import tempfile + +from absl import flags + +FLAGS = flags.FLAGS + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def build_images_with_config(project, region, machine_type, cloud_build_sa=None): + """Core image build logic — no FLAGS dependency. + + Callable from both PKB (via BuildImages()) and prerequisite_setup.py. + + Args: + project: GCP project ID. + region: GCP region (e.g. "us-central1"). + machine_type: Machine type string (e.g. "c4-standard-8"). + Used to derive target architecture (arm64 for c4a, amd64 otherwise). + cloud_build_sa: Cloud Build service account email. + If None, defaults to "adk-cloud-build-sa@{project}.iam.gserviceaccount.com". + """ + # Derive architecture from machine family + machine_family = machine_type.split("-")[0] if machine_type else "c4" + target_arch = "arm64" if machine_family == "c4a" else "amd64" + + # Derive image paths + adk_image = f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{target_arch}" + chrome_image = ( + f"{region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{target_arch}" + ) + router_image = ( + f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{target_arch}" + ) + + # Cloud Build SA + if cloud_build_sa is None: + cloud_build_sa = f"adk-cloud-build-sa@{project}.iam.gserviceaccount.com" + + logger.info("=== Building Container Images ===") + logger.info(" Project: %s", project) + logger.info(" Region: %s", region) + logger.info(" Architecture: %s", target_arch) + logger.info(" Cloud Build SA: %s", cloud_build_sa) + + # 1. Build ADK Agent + _BuildADKAgentImage( + project=project, + region=region, + target_arch=target_arch, + image_path=adk_image, + cloud_build_sa=cloud_build_sa, + machine_type=machine_type, + ) + + # 2. Build Chrome Sandbox + _BuildChromeSandboxImage( + project=project, + region=region, + target_arch=target_arch, + image_path=chrome_image, + cloud_build_sa=cloud_build_sa, + ) + + # 3. Build Sandbox Router + _BuildSandboxRouterImage( + project=project, + region=region, + target_arch=target_arch, + image_path=router_image, + cloud_build_sa=cloud_build_sa, + ) + + logger.info("=== All images built successfully ===") + logger.info(" ADK Agent: %s", adk_image) + logger.info(" Chrome Sandbox: %s", chrome_image) + logger.info(" Sandbox Router: %s", router_image) + + +def BuildImages(): + """FLAGS-based entry point (called from PKB Provision). + + Reads configuration from FLAGS (set in gke_provision_utils.py). + Delegates to build_images_with_config() for the actual work. + """ + build_images_with_config( + project=FLAGS.gke_project_id, + region=FLAGS.gke_region, + machine_type=FLAGS.gke_sandbox_machine_type, + ) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def _BuildADKAgentImage( + project, region, target_arch, image_path, cloud_build_sa, machine_type=None +): + """Build and push the ADK Agent image. + + Uses the existing perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml with --substitutions + rather than generating a new one (avoids overwriting the committed file). + """ + logger.info("Building ADK Agent image: %s", image_path) + + # Locate the agent source directory + # Expected layout: repo_root/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/ + repo_root = _FindRepoRoot() + agent_dir = os.path.join(repo_root, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent") + + if not os.path.isdir(agent_dir): + raise RuntimeError( + f"ADK agent source not found at {agent_dir}. " + "Ensure you are running from the repository root." + ) + + # Generate generated.env from template + _GenerateEnvFile(agent_dir, project, region, machine_type=machine_type) + + # Use the existing cloudbuild.yaml with substitutions (don't overwrite) + cloudbuild_path = os.path.join(agent_dir, "cloudbuild.yaml") + if not os.path.isfile(cloudbuild_path): + raise RuntimeError( + f"cloudbuild.yaml not found at {cloudbuild_path}. " + "Expected perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml to exist." + ) + + _RunCmd( + [ + "gcloud", + "builds", + "submit", + agent_dir, + f"--config={cloudbuild_path}", + f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}", + f"--project={project}", + f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}", + ] + ) + + logger.info("ADK Agent image built successfully.") + + +def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_build_sa): + """Build and push the Chrome Sandbox image.""" + logger.info("Building Chrome Sandbox image: %s", image_path) + + tmp_dir = tempfile.mkdtemp(prefix="chrome-sandbox-") + try: + # Clone agent-sandbox repo (sparse checkout) + logger.info("Cloning agent-sandbox chrome-sandbox source...") + _RunCmd( + [ + "git", + "clone", + "--depth", + "1", + "--filter=blob:none", + "--sparse", + "https://github.com/kubernetes-sigs/agent-sandbox.git", + tmp_dir, + ] + ) + _RunCmd( + ["git", "sparse-checkout", "set", "examples/chrome-sandbox"], + cwd=tmp_dir, + ) + + build_dir = os.path.join(tmp_dir, "examples", "chrome-sandbox") + if not os.path.isfile(os.path.join(build_dir, "Dockerfile")): + raise RuntimeError(f"chrome-sandbox Dockerfile not found at {build_dir}") + + # Patch Dockerfile: add socat for CDP proxy + dockerfile_path = os.path.join(build_dir, "Dockerfile") + with open(dockerfile_path, "r") as f: + content = f.read() + content = content.replace( + "RUN apt-get update && apt-get install --yes --no-install-recommends chromium", + "RUN apt-get update && apt-get install --yes --no-install-recommends chromium socat", + ) + with open(dockerfile_path, "w") as f: + f.write(content) + + # Submit Cloud Build (generates cloudbuild.yaml in temp dir) + _SubmitCloudBuild( + source_dir=build_dir, + image_path=image_path, + target_arch=target_arch, + project=project, + cloud_build_sa=cloud_build_sa, + ) + + logger.info("Chrome Sandbox image built successfully.") + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_build_sa): + """Build and push the Sandbox Router image.""" + logger.info("Building Sandbox Router image: %s", image_path) + + tmp_dir = tempfile.mkdtemp(prefix="sandbox-router-") + try: + # Clone agent-sandbox repo (sparse checkout) + logger.info("Cloning agent-sandbox router source...") + _RunCmd( + [ + "git", + "clone", + "--depth", + "1", + "--filter=blob:none", + "--sparse", + "https://github.com/kubernetes-sigs/agent-sandbox.git", + tmp_dir, + ] + ) + _RunCmd( + [ + "git", + "sparse-checkout", + "set", + "clients/python/agentic-sandbox-client/sandbox-router", + ], + cwd=tmp_dir, + ) + + build_dir = os.path.join( + tmp_dir, "clients", "python", "agentic-sandbox-client", "sandbox-router" + ) + if not os.path.isfile(os.path.join(build_dir, "Dockerfile")): + raise RuntimeError(f"sandbox-router Dockerfile not found at {build_dir}") + + # Submit Cloud Build (generates cloudbuild.yaml in temp dir) + _SubmitCloudBuild( + source_dir=build_dir, + image_path=image_path, + target_arch=target_arch, + project=project, + cloud_build_sa=cloud_build_sa, + ) + + logger.info("Sandbox Router image built successfully.") + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def _GenerateEnvFile( + agent_dir, project, region, machine_type=None, namespace="agentic" +): + """Render generated.env from template with current config values.""" + template_path = os.path.join(agent_dir, "generated.env.template") + output_path = os.path.join(agent_dir, "generated.env") + + if not os.path.isfile(template_path): + logger.warning( + "generated.env.template not found at %s, skipping.", template_path + ) + return + + with open(template_path, "r") as f: + content = f.read() + + # Derive cluster name + machine_family = machine_type.split("-")[0] if machine_type else "c4" + suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"} + cluster_suffix = suffix_map.get(machine_family, "c4") + + # Get username prefix for cluster name + user = os.environ.get("USER", "benchmark") + user_prefix = user.split(".")[0] if "." in user else user + cluster_name = f"{user_prefix}-agentic-{cluster_suffix}" + + # Substitute variables + replacements = { + "${CLUSTER_NAME}": cluster_name, + "${GOOGLE_CLOUD_PROJECT}": project, + "${GOOGLE_CLOUD_LOCATION}": region, + "${AGENTIC_NAMESPACE}": namespace, + "${GOOGLE_GENAI_USE_VERTEXAI}": "true", + "${SANDBOX_ROUTER_URL}": f"http://sandbox-router-svc.{namespace}.svc.cluster.local:8080", + "${SAMPLE_COUNT}": "20", + "${SAMPLE_WARMUP}": "0", + "${PAYLOAD_SIZE_MB}": "1", + "${PAYLOAD_ITERATIONS}": "20", + } + + for key, value in replacements.items(): + content = content.replace(key, value) + + with open(output_path, "w") as f: + f.write(content) + + logger.info("Generated %s", output_path) + + +def _SubmitCloudBuild(source_dir, image_path, target_arch, project, cloud_build_sa): + """Generate a cloudbuild.yaml with substitutions and submit via Cloud Build. + + Used for Chrome and Router images (built in temp directories). + The ADK agent uses its own committed cloudbuild.yaml instead. + """ + cloudbuild_content = """steps: + - name: 'gcr.io/cloud-builders/docker' + args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.'] + env: + - 'DOCKER_BUILDKIT=1' +images: + - '${_IMAGE_PATH}' +options: + logging: CLOUD_LOGGING_ONLY +substitutions: + _IMAGE_PATH: '' + _PLATFORM: 'linux/amd64' +""" + cloudbuild_path = os.path.join(source_dir, "cloudbuild.yaml") + with open(cloudbuild_path, "w") as f: + f.write(cloudbuild_content) + + _RunCmd( + [ + "gcloud", + "builds", + "submit", + source_dir, + f"--config={cloudbuild_path}", + f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}", + f"--project={project}", + f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}", + ] + ) + + +def _FindRepoRoot(): + """Find the repository root by looking for known markers.""" + # Try relative to this file + this_dir = os.path.dirname(os.path.abspath(__file__)) + # Expected: perfkitbenchmarker/linux_benchmarks/ -> go up 2 levels + candidate = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(this_dir)))) + if os.path.isdir(os.path.join(candidate, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")): + return candidate + + # Try CWD + cwd = os.getcwd() + if os.path.isdir(os.path.join(cwd, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")): + return cwd + + # Try parent of CWD + parent = os.path.dirname(cwd) + if os.path.isdir(os.path.join(parent, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")): + return parent + + raise RuntimeError( + "Cannot locate repository root (looking for perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/). " + "Run from the repository root directory." + ) + + +def _RunCmd(cmd, cwd=None): + """Run a shell command, raising on failure.""" + logger.info(" CMD: %s", " ".join(cmd)) + env = os.environ.copy() + env["CLOUDSDK_AUTH_DISABLE_SSL_VALIDATION"] = "true" + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=cwd, + timeout=600, + env=env, + ) + + if proc.returncode != 0: + raise RuntimeError( + f"Command failed (rc={proc.returncode}): {' '.join(cmd)}\n" + f"stderr: {proc.stderr[-500:]}" + ) + return proc.stdout diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py new file mode 100644 index 0000000000..9ddac86ea0 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py @@ -0,0 +1,613 @@ +"""PKB Benchmark: GKE Agent Payload Transfer Saturation (Use Case D). + +Atomic single-point measurement of payload transfer latency from a gVisor +sandbox back to the orchestrator on a pre-provisioned GKE cluster. Measures +generation time, serialization time, stdout write time, total transfer time, +throughput, and RSS at a given payload_size_mb and concurrent_sessions count. + +This benchmark is designed to be invoked repeatedly by an external sweep +controller that varies the payload_size_mb parameter across iterations to +find the saturation point. + +Usage: + python pkb.py --benchmarks=gke_payload \ + --gke_payload_size_mb=50 \ + --gke_payload_iterations=20 \ + --gke_payload_concurrent_sessions=5 \ + --gke_namespace=agentic \ + --gke_api_url=http://localhost:8080 + +Samples emitted (per run): + - gke_payload_orchestrator_transfer_mean (ms) + - gke_payload_orchestrator_transfer_p50 (ms) + - gke_payload_orchestrator_transfer_p95 (ms) + - gke_payload_orchestrator_transfer_p99 (ms) + - gke_payload_orchestrator_transfer_min (ms) + - gke_payload_orchestrator_transfer_max (ms) + - gke_payload_sandbox_payload_size_bytes (bytes) + - gke_payload_sandbox_payload_encoded_size_bytes (bytes) + - gke_payload_sandbox_payload_iterations (count) + - gke_payload_sandbox_generation_time_mean (ms) + - gke_payload_sandbox_generation_time_p50 (ms) + - gke_payload_sandbox_generation_time_p95 (ms) + - gke_payload_sandbox_generation_time_p99 (ms) + - gke_payload_sandbox_generation_time_min (ms) + - gke_payload_sandbox_generation_time_max (ms) + - gke_payload_sandbox_serialization_time_mean (ms) + - gke_payload_sandbox_serialization_time_p50 (ms) + - gke_payload_sandbox_serialization_time_p95 (ms) + - gke_payload_sandbox_serialization_time_p99 (ms) + - gke_payload_sandbox_serialization_time_min (ms) + - gke_payload_sandbox_serialization_time_max (ms) + - gke_payload_sandbox_stdout_time_mean (ms) + - gke_payload_sandbox_stdout_time_p50 (ms) + - gke_payload_sandbox_stdout_time_p95 (ms) + - gke_payload_sandbox_stdout_time_p99 (ms) + - gke_payload_sandbox_stdout_time_min (ms) + - gke_payload_sandbox_stdout_time_max (ms) + - gke_payload_sandbox_transfer_time_mean (ms) + - gke_payload_sandbox_transfer_time_p50 (ms) + - gke_payload_sandbox_transfer_time_p95 (ms) + - gke_payload_sandbox_transfer_time_p99 (ms) + - gke_payload_sandbox_transfer_time_min (ms) + - gke_payload_sandbox_transfer_time_max (ms) + - gke_payload_sandbox_throughput_mean (MB/s) + - gke_payload_sandbox_throughput_p50 (MB/s) + - gke_payload_sandbox_throughput_min (MB/s) + - gke_payload_sandbox_rss_start (MB) + - gke_payload_sandbox_rss_end (MB) + - gke_payload_sandbox_rss_growth (MB) + - gke_payload_wall_time (seconds) +""" + +import logging +import time + +from absl import flags +from perfkitbenchmarker import configs +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_benchmark_utils as utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_deploy_utils as deploy_utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = "gke_payload" +BENCHMARK_CONFIG = """ +gke_payload: + description: > + Atomic single-point payload transfer saturation measurement on a + pre-provisioned GKE cluster with gVisor isolation. +""" + +_WARMPOOL_NAME = "python-sandbox-warmpool" +_WARMPOOL_LABEL = "sandbox=python-sandbox-example" + +# --------------------------------------------------------------------------- +# Benchmark-specific flags +# --------------------------------------------------------------------------- + +flags.DEFINE_float( + "gke_payload_size_mb", + 1.0, + "Payload size in megabytes to transfer from the sandbox.", +) + +flags.DEFINE_integer( + "gke_payload_iterations", + 20, + "Number of transfer iterations per sandbox session.", +) + +flags.DEFINE_integer( + "gke_payload_concurrent_sessions", + 5, + "Number of parallel sandbox sessions.", +) + +flags.DEFINE_integer( + "gke_payload_exec_timeout", + 300, + "Sandbox command execution timeout in seconds.", +) + +flags.DEFINE_bool( + "gke_payload_patch_warmpool", + True, + "Patch SandboxWarmPool replicas to match concurrent_sessions before measurement.", +) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def Provision(benchmark_spec): + """Provision GKE cluster and all dependencies.""" + gke_provision_utils.Provision() + + +def GetConfig(user_config): + """Load and return benchmark config. + + No vm_groups — PKB skips Provision() and Teardown(). + """ + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """Deploy workloads and verify agent API.""" + logging.info("=== Prepare: deploying workloads ===") + deploy_utils.DeployWorkloads() + utils.CheckAgentHealthz(required=False) + utils.EnsurePortForward() + logging.info("Prepare complete.") + + +def Run(benchmark_spec): + """Execute a single payload transfer measurement and return samples. + + Returns: + List of sample.Sample objects. + """ + ns = FLAGS.gke_namespace + payload_size_mb = FLAGS.gke_payload_size_mb + iterations = FLAGS.gke_payload_iterations + concurrent = FLAGS.gke_payload_concurrent_sessions + + logging.info( + "=== Run: payload_size_mb=%s, iterations=%d, concurrent=%d ===", + payload_size_mb, + iterations, + concurrent, + ) + + # Ensure port-forward is active (needed when sweeps skip Prepare) + utils.EnsurePortForward() + + # Patch warm pool (moved from Prepare for sweep compatibility) + if FLAGS.gke_payload_patch_warmpool: + utils.PatchWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + replicas=concurrent, + label=_WARMPOOL_LABEL, + ) + + # POST to agent API + payload = { + "payload_size_mb": payload_size_mb, + "payload_iterations": iterations, + "concurrent_sessions": concurrent, + "sandbox_exec_timeout_s": FLAGS.gke_payload_exec_timeout, + } + + t0 = time.time() + result = utils.CallAgentApi("/benchmark/python/payload", payload) + wall_time = time.time() - t0 + + successful = result.get("successful_sessions", 0) + failed = result.get("failed_sessions", 0) + agg = result.get("aggregate", {}) + + logging.info( + "API response: %d successful, %d failed sessions (%.1fs)", + successful, + failed, + wall_time, + ) + + # Build samples + extra = { + "payload_size_mb": payload_size_mb, + "payload_iterations": iterations, + "concurrent_sessions": concurrent, + "successful_sessions": successful, + "failed_sessions": failed, + "wall_time_s": round(wall_time, 2), + } + + samples = [] + + # Orchestrator-side transfer latency + _emit( + samples, + agg, + "orchestrator_transfer_mean_ms", + "orchestrator_transfer_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "orchestrator_transfer_p50_ms", + "orchestrator_transfer_p50", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "orchestrator_transfer_p95_ms", + "orchestrator_transfer_p95", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "orchestrator_transfer_p99_ms", + "orchestrator_transfer_p99", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "orchestrator_transfer_min_ms", + "orchestrator_transfer_min", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "orchestrator_transfer_max_ms", + "orchestrator_transfer_max", + "ms", + ns, + extra, + ) + + # Payload metadata + _emit( + samples, + agg, + "sandbox_payload_size_bytes", + "sandbox_payload_size_bytes", + "bytes", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_payload_encoded_size_bytes", + "sandbox_payload_encoded_size_bytes", + "bytes", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_payload_iterations", + "sandbox_payload_iterations", + "count", + ns, + extra, + ) + + # Generation time (os.urandom) + _emit( + samples, + agg, + "sandbox_generation_time_mean_ms", + "sandbox_generation_time_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_generation_time_p50_ms", + "sandbox_generation_time_p50", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_generation_time_p95_ms", + "sandbox_generation_time_p95", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_generation_time_p99_ms", + "sandbox_generation_time_p99", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_generation_time_min_ms", + "sandbox_generation_time_min", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_generation_time_max_ms", + "sandbox_generation_time_max", + "ms", + ns, + extra, + ) + + # Serialization time (base64 encode) + _emit( + samples, + agg, + "sandbox_serialization_time_mean_ms", + "sandbox_serialization_time_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_serialization_time_p50_ms", + "sandbox_serialization_time_p50", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_serialization_time_p95_ms", + "sandbox_serialization_time_p95", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_serialization_time_p99_ms", + "sandbox_serialization_time_p99", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_serialization_time_min_ms", + "sandbox_serialization_time_min", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_serialization_time_max_ms", + "sandbox_serialization_time_max", + "ms", + ns, + extra, + ) + + # Stdout write time (gVisor Gofer write syscall) + _emit( + samples, + agg, + "sandbox_stdout_time_mean_ms", + "sandbox_stdout_time_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_stdout_time_p50_ms", + "sandbox_stdout_time_p50", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_stdout_time_p95_ms", + "sandbox_stdout_time_p95", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_stdout_time_p99_ms", + "sandbox_stdout_time_p99", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_stdout_time_min_ms", + "sandbox_stdout_time_min", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_stdout_time_max_ms", + "sandbox_stdout_time_max", + "ms", + ns, + extra, + ) + + # Transfer time (serialization + stdout write — threshold metric) + _emit( + samples, + agg, + "sandbox_transfer_time_mean_ms", + "sandbox_transfer_time_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_transfer_time_p50_ms", + "sandbox_transfer_time_p50", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_transfer_time_p95_ms", + "sandbox_transfer_time_p95", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_transfer_time_p99_ms", + "sandbox_transfer_time_p99", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_transfer_time_min_ms", + "sandbox_transfer_time_min", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_transfer_time_max_ms", + "sandbox_transfer_time_max", + "ms", + ns, + extra, + ) + + # Throughput + _emit( + samples, + agg, + "sandbox_throughput_mean_mbps", + "sandbox_throughput_mean", + "MB/s", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_throughput_p50_mbps", + "sandbox_throughput_p50", + "MB/s", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_throughput_min_mbps", + "sandbox_throughput_min", + "MB/s", + ns, + extra, + ) + + # RSS + _emit(samples, agg, "sandbox_rss_start_mb", "sandbox_rss_start", "MB", ns, extra) + _emit(samples, agg, "sandbox_rss_end_mb", "sandbox_rss_end", "MB", ns, extra) + _emit(samples, agg, "sandbox_rss_growth_mb", "sandbox_rss_growth", "MB", ns, extra) + + # Wall time + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + + logging.info( + "Emitted %d samples for payload_size_mb=%s.", len(samples), payload_size_mb + ) + return samples + + +def Cleanup(benchmark_spec): + """Clean up after measurement. Scale warm pool to 0.""" + ns = FLAGS.gke_namespace + logging.info("Cleanup: draining warm pool.") + + utils.DrainWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + label=_WARMPOOL_LABEL, + ) + + utils.StopPortForward() + logging.info("Cleanup complete (cluster persists).") + + +def Teardown(benchmark_spec): + """Teardown GKE cluster and all dependencies.""" + gke_provision_utils.Teardown() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra): + """Emit a sample if the key exists in the aggregate dict.""" + value = agg.get(agg_key) + if value is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_{metric_suffix}", + value, + unit, + namespace, + extra, + ) + ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py new file mode 100644 index 0000000000..70b9d95a4c --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py @@ -0,0 +1,516 @@ +#!/usr/bin/env python3 +"""Prerequisite Setup for GKE Agentic Benchmarking. + +Creates infrastructure that PKB's native container_cluster provisioner +cannot manage: VPC, Subnet, Cloud Router, NAT, Firewall Rules, Artifact +Registry, Cloud Build SA, IAM bindings, and container image builds. + +This script is run ONCE before PKB provisioning. PKB then references the +pre-existing VPC/subnet via --gce_network_name and --gce_subnet_name flags. + +Usage: + # Full setup (including image builds): + python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ + --project_id=my-project \ + --region=us-central1 --zone=us-central1-a \ + --machine_type=c4-standard-8 + + # Setup without image builds: + python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ + --project_id=my-project \ + --region=us-central1 --zone=us-central1-a \ + --skip_image_build + + # Teardown: + python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ + --project_id=my-project \ + --region=us-central1 --zone=us-central1-a \ + --teardown + + # Teardown (keep images): + python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ + --project_id=my-project \ + --region=us-central1 --zone=us-central1-a \ + --teardown --keep_images +""" + +import argparse +import logging +import os +import subprocess +import sys +import time + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _run(cmd, check=True, timeout=300, capture=False): + """Run a shell command, logging it first.""" + cmd_str = " ".join(cmd) if isinstance(cmd, list) else cmd + logging.info("CMD: %s", cmd_str) + result = subprocess.run( + cmd if isinstance(cmd, list) else cmd.split(), + capture_output=capture, + text=True, + timeout=timeout, + ) + if check and result.returncode != 0: + stderr = result.stderr if capture else "" + logging.error("Command failed (rc=%d): %s", result.returncode, stderr) + raise RuntimeError(f"Command failed: {cmd_str}") + return result + + +def _exists(cmd): + """Return True if a gcloud describe/get command succeeds.""" + result = subprocess.run( + cmd if isinstance(cmd, list) else cmd.split(), + capture_output=True, + text=True, + timeout=60, + ) + return result.returncode == 0 + + +def _derive_config(args): + """Derive configuration values from arguments.""" + user_prefix = os.environ.get("USER", "pkb").split(".")[0] + machine_family = args.machine_type.split("-")[0] + + # Disk type + disk_type = "pd-balanced" if machine_family == "c3" else "hyperdisk-balanced" + + # Architecture + target_arch = "arm64" if machine_family == "c4a" else "amd64" + + # Cluster suffix + if "metal" in args.machine_type: + cluster_suffix = "c3metal" + else: + cluster_suffix = machine_family + + # Master CIDR (unique per cluster suffix) + master_cidrs = { + "c4": "172.16.0.0/28", + "c4d": "172.16.0.16/28", + "c4a": "172.16.0.32/28", + "c3metal": "172.16.0.48/28", + } + master_cidr = master_cidrs.get(cluster_suffix, "172.16.0.64/28") + + return { + "user_prefix": user_prefix, + "machine_family": machine_family, + "disk_type": disk_type, + "target_arch": target_arch, + "cluster_suffix": cluster_suffix, + "master_cidr": master_cidr, + "vpc_name": f"{user_prefix}-agentic-vpc", + "subnet_name": f"{user_prefix}-agentic-subnet", + "subnet_cidr": args.subnet_cidr, + "router_name": f"{user_prefix}-agentic-nat-router", + "nat_name": f"{user_prefix}-agentic-nat-config", + "adk_repo_name": "adk-repo", + "sandbox_repo_name": "agent-sandbox", + "cloud_build_sa": "adk-cloud-build-sa", + "cloud_build_sa_email": f"adk-cloud-build-sa@{args.project_id}.iam.gserviceaccount.com", + "adk_image": f"{args.region}-docker.pkg.dev/{args.project_id}/adk-repo/adk-agent:{target_arch}", + "chromium_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/chrome-sandbox:{target_arch}", + "router_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/sandbox-router:{target_arch}", + } + + +# --------------------------------------------------------------------------- +# Setup Steps +# --------------------------------------------------------------------------- + + +def enable_apis(args): + """Enable required GCP APIs.""" + logging.info("=== Enabling GCP APIs ===") + apis = [ + "container.googleapis.com", + "artifactregistry.googleapis.com", + "cloudbuild.googleapis.com", + "aiplatform.googleapis.com", + "storage.googleapis.com", + "iam.googleapis.com", + "connectgateway.googleapis.com", + "gkehub.googleapis.com", + "gkeconnect.googleapis.com", + "iap.googleapis.com", + ] + _run([ + "gcloud", "services", "enable", *apis, + f"--project={args.project_id}", + ]) + logging.info("APIs enabled.") + + +def create_vpc(args, config): + """Create custom VPC.""" + logging.info("=== Creating VPC ===") + if _exists([ + "gcloud", "compute", "networks", "describe", config["vpc_name"], + f"--project={args.project_id}", + ]): + logging.info("VPC %s already exists.", config["vpc_name"]) + return + + _run([ + "gcloud", "compute", "networks", "create", config["vpc_name"], + "--subnet-mode=custom", + f"--project={args.project_id}", + ]) + logging.info("VPC %s created.", config["vpc_name"]) + + +def create_subnet(args, config): + """Create subnet in the VPC.""" + logging.info("=== Creating Subnet ===") + if _exists([ + "gcloud", "compute", "networks", "subnets", "describe", + config["subnet_name"], + f"--region={args.region}", + f"--project={args.project_id}", + ]): + logging.info("Subnet %s already exists.", config["subnet_name"]) + return + + _run([ + "gcloud", "compute", "networks", "subnets", "create", + config["subnet_name"], + f"--network={config['vpc_name']}", + f"--region={args.region}", + f"--range={config['subnet_cidr']}", + f"--project={args.project_id}", + ]) + logging.info("Subnet %s created.", config["subnet_name"]) + + +def create_firewall_rules(args, config): + """Create firewall rules.""" + logging.info("=== Creating Firewall Rules ===") + + rules = [ + { + "name": f"{config['vpc_name']}-allow-iap-ssh", + "rules": "tcp:22", + "source_ranges": "35.235.240.0/20", + "priority": "1000", + }, + { + "name": f"{config['vpc_name']}-allow-internal", + "rules": "tcp,udp,icmp", + "source_ranges": config["subnet_cidr"], + "priority": "1000", + }, + ] + + for rule in rules: + if _exists([ + "gcloud", "compute", "firewall-rules", "describe", rule["name"], + f"--project={args.project_id}", + ]): + logging.info("Firewall rule %s already exists.", rule["name"]) + continue + + _run([ + "gcloud", "compute", "firewall-rules", "create", rule["name"], + f"--network={config['vpc_name']}", + "--direction=INGRESS", + "--action=ALLOW", + f"--rules={rule['rules']}", + f"--source-ranges={rule['source_ranges']}", + f"--priority={rule['priority']}", + f"--project={args.project_id}", + ]) + logging.info("Firewall rule %s created.", rule["name"]) + + +def create_router_and_nat(args, config): + """Create Cloud Router and NAT for private node internet access.""" + logging.info("=== Creating Cloud Router + NAT ===") + + # Router + if not _exists([ + "gcloud", "compute", "routers", "describe", config["router_name"], + f"--region={args.region}", + f"--project={args.project_id}", + ]): + _run([ + "gcloud", "compute", "routers", "create", config["router_name"], + f"--network={config['vpc_name']}", + f"--region={args.region}", + f"--project={args.project_id}", + ]) + logging.info("Router %s created.", config["router_name"]) + else: + logging.info("Router %s already exists.", config["router_name"]) + + # NAT + if not _exists([ + "gcloud", "compute", "routers", "nats", "describe", config["nat_name"], + f"--router={config['router_name']}", + f"--region={args.region}", + f"--project={args.project_id}", + ]): + _run([ + "gcloud", "compute", "routers", "nats", "create", config["nat_name"], + f"--router={config['router_name']}", + f"--region={args.region}", + "--nat-all-subnet-ip-ranges", + "--auto-allocate-nat-external-ips", + f"--project={args.project_id}", + ]) + logging.info("NAT %s created.", config["nat_name"]) + else: + logging.info("NAT %s already exists.", config["nat_name"]) + + +def create_artifact_registry(args, config): + """Create Artifact Registry repositories.""" + logging.info("=== Creating Artifact Registry Repos ===") + + for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]: + result = subprocess.run( + [ + "gcloud", "artifacts", "repositories", "describe", repo, + f"--location={args.region}", + f"--project={args.project_id}", + ], + capture_output=True, text=True, timeout=30, + ) + if result.returncode == 0: + logging.info("AR repo %s already exists.", repo) + continue + + _run([ + "gcloud", "artifacts", "repositories", "create", repo, + "--repository-format=docker", + f"--location={args.region}", + f"--project={args.project_id}", + ]) + logging.info("AR repo %s created.", repo) + + +def create_cloud_build_sa(args, config): + """Create Cloud Build service account and bind IAM roles.""" + logging.info("=== Creating Cloud Build SA ===") + + sa_email = config["cloud_build_sa_email"] + + # Create SA + if not _exists([ + "gcloud", "iam", "service-accounts", "describe", sa_email, + f"--project={args.project_id}", + ]): + _run([ + "gcloud", "iam", "service-accounts", "create", + config["cloud_build_sa"], + f"--display-name={config['cloud_build_sa']}", + f"--project={args.project_id}", + ]) + logging.info("SA %s created. Waiting for propagation...", sa_email) + time.sleep(10) + else: + logging.info("SA %s already exists.", sa_email) + + # Bind roles + roles = [ + "roles/logging.logWriter", + "roles/storage.objectViewer", + "roles/artifactregistry.writer", + "roles/serviceusage.serviceUsageConsumer", + ] + for role in roles: + _run([ + "gcloud", "projects", "add-iam-policy-binding", args.project_id, + f"--member=serviceAccount:{sa_email}", + f"--role={role}", + "--condition=None", "--quiet", + ], check=False) + + logging.info("Cloud Build SA roles bound.") + + +def build_images(args, config): + """Build and push container images via Cloud Build. + + Delegates to gke_image_build_utils.build_images_with_config() + to avoid duplicating Cloud Build logic. + """ + if args.skip_image_build: + logging.info("=== Skipping Image Builds (--skip_image_build) ===") + return + + logging.info("=== Building Container Images ===") + + # Import the shared image build module (same package) + from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils + + gke_image_build_utils.build_images_with_config( + project=args.project_id, + region=args.region, + machine_type=args.machine_type, + cloud_build_sa=config["cloud_build_sa_email"], + ) + + logging.info("=== Image builds complete ===") + + +# --------------------------------------------------------------------------- +# Teardown Steps +# --------------------------------------------------------------------------- + + +def teardown(args, config): + """Tear down all prerequisite resources.""" + logging.info("=== Prerequisite Teardown ===") + + # AR repos + if not args.keep_images: + logging.info("Deleting Artifact Registry repos...") + for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]: + _run([ + "gcloud", "artifacts", "repositories", "delete", repo, + f"--location={args.region}", + f"--project={args.project_id}", "--quiet", + ], check=False) + else: + logging.info("Keeping AR repos (--keep_images).") + + # Cloud Build SA + logging.info("Deleting Cloud Build SA...") + sa_email = config["cloud_build_sa_email"] + roles = [ + "roles/logging.logWriter", + "roles/storage.objectViewer", + "roles/artifactregistry.writer", + "roles/serviceusage.serviceUsageConsumer", + ] + for role in roles: + _run([ + "gcloud", "projects", "remove-iam-policy-binding", args.project_id, + f"--member=serviceAccount:{sa_email}", + f"--role={role}", "--quiet", + ], check=False) + _run([ + "gcloud", "iam", "service-accounts", "delete", sa_email, + f"--project={args.project_id}", "--quiet", + ], check=False) + + # NAT + Router + logging.info("Deleting NAT + Router...") + _run([ + "gcloud", "compute", "routers", "nats", "delete", config["nat_name"], + f"--router={config['router_name']}", + f"--region={args.region}", + f"--project={args.project_id}", "--quiet", + ], check=False) + _run([ + "gcloud", "compute", "routers", "delete", config["router_name"], + f"--region={args.region}", + f"--project={args.project_id}", "--quiet", + ], check=False) + + # Firewall rules + logging.info("Deleting firewall rules...") + for suffix in ["allow-iap-ssh", "allow-internal"]: + _run([ + "gcloud", "compute", "firewall-rules", "delete", + f"{config['vpc_name']}-{suffix}", + f"--project={args.project_id}", "--quiet", + ], check=False) + + # Subnet + VPC + logging.info("Deleting subnet + VPC...") + _run([ + "gcloud", "compute", "networks", "subnets", "delete", + config["subnet_name"], + f"--region={args.region}", + f"--project={args.project_id}", "--quiet", + ], check=False) + _run([ + "gcloud", "compute", "networks", "delete", config["vpc_name"], + f"--project={args.project_id}", "--quiet", + ], check=False) + + logging.info("=== Prerequisite Teardown Complete ===") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def parse_args(): + p = argparse.ArgumentParser( + description="Prerequisite Setup for GKE Agentic Benchmarking", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument("--project_id", required=True, help="GCP project ID") + p.add_argument("--region", default="us-central1", help="GCP region (default: us-central1)") + p.add_argument("--zone", default="us-central1-a", help="GCP zone (default: us-central1-a)") + p.add_argument("--machine_type", default="c4-standard-8", + help="Machine type for sandbox nodes (default: c4-standard-8)") + p.add_argument("--subnet_cidr", default="10.134.20.0/24", + help="Subnet CIDR range (default: 10.134.20.0/24)") + p.add_argument("--skip_image_build", action="store_true", default=False, + help="Skip container image builds") + p.add_argument("--teardown", action="store_true", default=False, + help="Tear down prerequisite resources instead of creating them") + p.add_argument("--keep_images", action="store_true", default=False, + help="Keep AR repos during teardown") + return p.parse_args() + + +def main(): + args = parse_args() + config = _derive_config(args) + + print(f"\n{'='*60}") + print(f"Project: {args.project_id}") + print(f"Region: {args.region}") + print(f"Zone: {args.zone}") + print(f"Machine Type: {args.machine_type}") + print(f"VPC: {config['vpc_name']}") + print(f"Subnet: {config['subnet_name']} ({config['subnet_cidr']})") + print(f"Mode: {'TEARDOWN' if args.teardown else 'SETUP'}") + print(f"{'='*60}\n") + + if args.teardown: + teardown(args, config) + else: + enable_apis(args) + create_vpc(args, config) + create_subnet(args, config) + create_firewall_rules(args, config) + create_router_and_nat(args, config) + create_artifact_registry(args, config) + create_cloud_build_sa(args, config) + build_images(args, config) + + print(f"\n{'='*60}") + print("Prerequisite setup complete!") + print(f"{'='*60}") + print(f"\nPKB flags to reference this infrastructure:") + print(f" --gce_network_name={config['vpc_name']}") + print(f"\nNext: Run PKB with container_cluster provisioning:") + print(f" python pkb.py --benchmarks=gke_python_density \\") + print(f" --gce_network_name={config['vpc_name']} \\") + print(f" --zone={args.zone} \\") + print(f" --gke_use_beta=true \\") + print(f" --gke_additional_flags=\"--enable-pod-snapshots,...,--subnetwork={config['subnet_name']}\"") + + +if __name__ == "__main__": + main() diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py new file mode 100644 index 0000000000..4792f5a543 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py @@ -0,0 +1,698 @@ +"""Shared Provision/Teardown utilities for GKE Agent Sandbox benchmarks. + +Provides the full GKE infrastructure lifecycle (create and destroy) used +by all seven UC benchmark scripts. Each benchmark's Provision() and +Teardown() functions delegate to the public functions in this module. + +Infrastructure created (in order): + 1. VPC + Subnet + 2. Firewall rules (IAP SSH, internal, laptop IP) + 3. Cloud Router + NAT + 4. GKE Cluster (DPv2, Workload Identity, optional Pod Snapshots) + 5. Fleet registration / credential retrieval + 6. gVisor sandbox node pool + 7. Artifact Registry repositories + 8. Cloud Build service account + IAM bindings + 9. Container images (optional, gated by --gke_skip_image_build) + +Teardown respects two flags: + --gke_teardown_keep_images: skip AR repo deletion + --gke_teardown_keep_infra: only delete K8s workloads, keep cluster/network +""" + +import logging +import subprocess +import time + +from absl import flags + +FLAGS = flags.FLAGS + +# Image build utilities (Phase 3) +# Imported after FLAGS to avoid circular dependency +# The actual import is deferred to Provision() to allow flag registration order + +# --------------------------------------------------------------------------- +# Provision/Teardown flags +# --------------------------------------------------------------------------- + +flags.DEFINE_string( + "gke_project_id", + "", + "GCP project ID for the benchmark cluster. Required for Provision/Teardown.", +) + +flags.DEFINE_string( + "gke_region", + "us-central1", + "GCP region for networking and Artifact Registry.", +) + +flags.DEFINE_string( + "gke_zone", + "us-central1-a", + "GCP zone for the GKE cluster and node pools.", +) + +flags.DEFINE_string( + "gke_sandbox_machine_type", + "c4-standard-8", + "Machine type for the gVisor sandbox node pool.", +) + +flags.DEFINE_string( + "gke_cluster_suffix", + "", + "Cluster name suffix. If empty, derived from machine family (e.g. 'c4').", +) + +flags.DEFINE_string( + "gke_gke_version", + "1.35.3-gke.1389000", + "GKE cluster version.", +) + +flags.DEFINE_bool( + "gke_use_connect_gateway", + True, + "Use Connect Gateway for kubectl access instead of direct public endpoint.", +) + +flags.DEFINE_bool( + "gke_enable_pod_snapshots", + True, + "Enable GKE Pod Snapshots (Preview feature, uses gcloud beta).", +) + +flags.DEFINE_bool( + "gke_skip_image_build", + True, + "Skip container image builds during Provision. Set to False on first run.", +) + +flags.DEFINE_integer( + "gke_sandbox_node_count", + 1, + "Number of nodes in the gVisor sandbox node pool.", +) + +flags.DEFINE_integer( + "gke_sandbox_disk_size", + 100, + "Disk size in GB for sandbox node pool nodes.", +) + +flags.DEFINE_integer( + "gke_sandbox_max_pods_per_node", + 250, + "Max pods per node on the sandbox node pool.", +) + +flags.DEFINE_string( + "gke_subnet_cidr", + "10.134.20.0/24", + "CIDR range for the benchmark subnet.", +) + +flags.DEFINE_bool( + "gke_teardown_keep_images", + False, + "If True, skip Artifact Registry repo deletion during Teardown.", +) + +flags.DEFINE_bool( + "gke_teardown_keep_infra", + False, + "If True, only delete K8s workloads during Teardown (keep cluster/network).", +) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def _run(cmd, timeout=300, check=True): + """Run a shell command and return CompletedProcess. + + Args: + cmd: List of command arguments. + timeout: Max seconds to wait. + check: If True, raise on non-zero exit. + + Returns: + subprocess.CompletedProcess + """ + logging.info("CMD: %s", " ".join(cmd)) + proc = subprocess.run( + cmd, capture_output=True, text=True, timeout=timeout, + ) + if proc.returncode != 0: + logging.warning("CMD stderr: %s", proc.stderr[-500:] if proc.stderr else "") + if check: + raise RuntimeError( + f"Command failed (rc={proc.returncode}): {' '.join(cmd[:6])}\n" + f"{proc.stderr[-300:]}" + ) + return proc + + +def _run_quiet(cmd, timeout=300): + """Run a command, suppress errors (idempotent checks).""" + return _run(cmd, timeout=timeout, check=False) + + +def _resource_exists(cmd): + """Return True if a gcloud describe/get command succeeds.""" + proc = _run_quiet(cmd) + return proc.returncode == 0 + + +def _derive_config(): + """Derive computed configuration values from flags. + + Returns: + dict with all computed names and settings. + """ + project = FLAGS.gke_project_id + if not project: + raise RuntimeError("--gke_project_id is required for Provision/Teardown.") + + region = FLAGS.gke_region + zone = FLAGS.gke_zone + machine_type = FLAGS.gke_sandbox_machine_type + + # Derive machine family (e.g. "c4" from "c4-standard-8") + machine_family = machine_type.split("-")[0] + + # Derive cluster suffix + cluster_suffix = FLAGS.gke_cluster_suffix + if not cluster_suffix: + if machine_family == "c3" and "metal" in machine_type: + cluster_suffix = "c3metal" + else: + cluster_suffix = machine_family + + # Derive disk type + if machine_family == "c3": + disk_type = "pd-balanced" + else: + disk_type = "hyperdisk-balanced" + + # Derive architecture + if machine_family == "c4a": + target_arch = "arm64" + else: + target_arch = "amd64" + + # Derive master CIDR + master_cidr_map = { + "c4": "172.16.0.0/28", + "c4d": "172.16.0.16/28", + "c4a": "172.16.0.32/28", + "c3metal": "172.16.0.48/28", + } + master_cidr = master_cidr_map.get(cluster_suffix, "172.16.0.64/28") + + # Use a prefix derived from project for naming + name_prefix = "pkb" + + cluster_name = f"{name_prefix}-agentic-{cluster_suffix}" + vpc_name = f"{name_prefix}-agentic-vpc" + subnet_name = f"{name_prefix}-agentic-subnet" + router_name = f"{name_prefix}-agentic-nat-router" + nat_name = f"{name_prefix}-agentic-nat-config" + sandbox_pool_name = "agentic-sandbox-pool" + adk_repo_name = "adk-repo" + sandbox_repo_name = "agent-sandbox" + cloud_build_sa = "adk-cloud-build-sa" + cloud_build_sa_email = f"{cloud_build_sa}@{project}.iam.gserviceaccount.com" + namespace = FLAGS.gke_namespace + + return { + "project": project, + "region": region, + "zone": zone, + "machine_type": machine_type, + "machine_family": machine_family, + "cluster_suffix": cluster_suffix, + "disk_type": disk_type, + "target_arch": target_arch, + "master_cidr": master_cidr, + "cluster_name": cluster_name, + "vpc_name": vpc_name, + "subnet_name": subnet_name, + "subnet_cidr": FLAGS.gke_subnet_cidr, + "router_name": router_name, + "nat_name": nat_name, + "sandbox_pool_name": sandbox_pool_name, + "adk_repo_name": adk_repo_name, + "sandbox_repo_name": sandbox_repo_name, + "cloud_build_sa": cloud_build_sa, + "cloud_build_sa_email": cloud_build_sa_email, + "namespace": namespace, + "gke_version": FLAGS.gke_gke_version, + "sandbox_node_count": FLAGS.gke_sandbox_node_count, + "sandbox_disk_size": FLAGS.gke_sandbox_disk_size, + "sandbox_max_pods": FLAGS.gke_sandbox_max_pods_per_node, + "use_connect_gateway": FLAGS.gke_use_connect_gateway, + "enable_pod_snapshots": FLAGS.gke_enable_pod_snapshots, + "sandbox_version": FLAGS.gke_sandbox_version, + } + + +# --------------------------------------------------------------------------- +# Provision steps +# --------------------------------------------------------------------------- + + +def _enable_apis(cfg): + """Enable required GCP services.""" + logging.info("Enabling required GCP APIs...") + apis = [ + "iap.googleapis.com", + "container.googleapis.com", + "artifactregistry.googleapis.com", + "cloudbuild.googleapis.com", + "aiplatform.googleapis.com", + "storage.googleapis.com", + "iam.googleapis.com", + "connectgateway.googleapis.com", + "gkehub.googleapis.com", + "gkeconnect.googleapis.com", + ] + _run(["gcloud", "services", "enable"] + apis + [f"--project={cfg['project']}"], + timeout=120) + + +def _create_network(cfg): + """Create VPC, subnet, firewall rules, Cloud Router, and NAT.""" + project = cfg["project"] + region = cfg["region"] + vpc = cfg["vpc_name"] + subnet = cfg["subnet_name"] + cidr = cfg["subnet_cidr"] + router = cfg["router_name"] + nat = cfg["nat_name"] + + # VPC + if not _resource_exists(["gcloud", "compute", "networks", "describe", vpc, + f"--project={project}"]): + logging.info("Creating VPC %s...", vpc) + _run(["gcloud", "compute", "networks", "create", vpc, + "--subnet-mode=custom", f"--project={project}"]) + + # Subnet + if not _resource_exists(["gcloud", "compute", "networks", "subnets", "describe", + subnet, f"--region={region}", f"--project={project}"]): + logging.info("Creating subnet %s...", subnet) + _run(["gcloud", "compute", "networks", "subnets", "create", subnet, + f"--network={vpc}", f"--region={region}", + f"--range={cidr}", f"--project={project}"]) + + # Firewall: IAP SSH + fw_iap = f"{vpc}-allow-iap-ssh" + if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe", + fw_iap, f"--project={project}"]): + logging.info("Creating firewall rule %s...", fw_iap) + _run(["gcloud", "compute", "firewall-rules", "create", fw_iap, + f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW", + "--rules=tcp:22", "--source-ranges=35.235.240.0/20", + "--priority=1000", f"--project={project}"]) + + # Firewall: internal + fw_int = f"{vpc}-allow-internal" + if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe", + fw_int, f"--project={project}"]): + logging.info("Creating firewall rule %s...", fw_int) + _run(["gcloud", "compute", "firewall-rules", "create", fw_int, + f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW", + "--rules=tcp,udp,icmp", f"--source-ranges={cidr}", + "--priority=1000", f"--project={project}"]) + + # Cloud Router + if not _resource_exists(["gcloud", "compute", "routers", "describe", router, + f"--region={region}", f"--project={project}"]): + logging.info("Creating Cloud Router %s...", router) + _run(["gcloud", "compute", "routers", "create", router, + f"--network={vpc}", f"--region={region}", f"--project={project}"]) + + # Cloud NAT + if not _resource_exists(["gcloud", "compute", "routers", "nats", "describe", nat, + f"--router={router}", f"--region={region}", + f"--project={project}"]): + logging.info("Creating Cloud NAT %s...", nat) + _run(["gcloud", "compute", "routers", "nats", "create", nat, + f"--router={router}", f"--region={region}", + "--nat-all-subnet-ip-ranges", "--auto-allocate-nat-external-ips", + f"--project={project}"]) + + +def _create_cluster(cfg): + """Create the GKE cluster with DPv2 and Workload Identity.""" + project = cfg["project"] + zone = cfg["zone"] + cluster = cfg["cluster_name"] + + if _resource_exists(["gcloud", "container", "clusters", "describe", cluster, + f"--zone={zone}", f"--project={project}"]): + logging.info("GKE cluster %s already exists.", cluster) + return + + logging.info("Creating GKE cluster %s...", cluster) + + if cfg["enable_pod_snapshots"]: + snapshot_flag = ["--enable-pod-snapshots"] + logging.info("Pod Snapshots ENABLED (using gcloud beta).") + cmd = ["gcloud", "beta", "container", "clusters", "create", cluster] + else: + snapshot_flag = [] + cmd = ["gcloud", "container", "clusters", "create", cluster] + + cmd += [ + f"--zone={zone}", + f"--network={cfg['vpc_name']}", + f"--subnetwork={cfg['subnet_name']}", + "--enable-private-nodes", + "--enable-ip-alias", + f"--master-ipv4-cidr={cfg['master_cidr']}", + f"--cluster-version={cfg['gke_version']}", + "--no-enable-shielded-nodes", + "--num-nodes=1", + f"--machine-type={cfg['machine_type']}", + f"--disk-type={cfg['disk_type']}", + "--disk-size=50", + "--enable-dataplane-v2", + f"--workload-pool={project}.svc.id.goog", + "--release-channel=None", + f"--project={project}", + ] + snapshot_flag + + _run(cmd, timeout=600) + logging.info("GKE cluster %s created.", cluster) + + +def _get_credentials(cfg): + """Register to fleet and get kubectl credentials.""" + project = cfg["project"] + zone = cfg["zone"] + cluster = cfg["cluster_name"] + + if cfg["use_connect_gateway"]: + # Register to fleet + if not _resource_exists(["gcloud", "container", "fleet", "memberships", + "describe", cluster, f"--project={project}"]): + logging.info("Registering cluster %s to fleet...", cluster) + _run(["gcloud", "container", "fleet", "memberships", "register", cluster, + f"--gke-cluster={zone}/{cluster}", + "--enable-workload-identity", + f"--project={project}"], timeout=120) + + logging.info("Getting credentials via Connect Gateway...") + _run(["gcloud", "container", "fleet", "memberships", "get-credentials", + cluster, f"--project={project}"], timeout=60) + else: + logging.info("Getting credentials (direct endpoint)...") + _run(["gcloud", "container", "clusters", "get-credentials", cluster, + f"--zone={zone}", f"--project={project}"], timeout=60) + + +def _create_sandbox_node_pool(cfg): + """Create the gVisor-enabled sandbox node pool.""" + project = cfg["project"] + zone = cfg["zone"] + cluster = cfg["cluster_name"] + pool_name = cfg["sandbox_pool_name"] + + if _resource_exists(["gcloud", "container", "node-pools", "describe", pool_name, + f"--cluster={cluster}", f"--zone={zone}", + f"--project={project}"]): + logging.info("Sandbox node pool %s already exists.", pool_name) + return + + logging.info("Creating sandbox node pool %s with gVisor...", pool_name) + cmd = [ + "gcloud", "container", "node-pools", "create", pool_name, + f"--cluster={cluster}", + f"--zone={zone}", + f"--project={project}", + f"--machine-type={cfg['machine_type']}", + f"--num-nodes={cfg['sandbox_node_count']}", + f"--disk-type={cfg['disk_type']}", + f"--disk-size={cfg['sandbox_disk_size']}", + f"--max-pods-per-node={cfg['sandbox_max_pods']}", + "--node-labels=dedicated=agentic-sandbox", + "--node-taints=dedicated=agentic-sandbox:NoSchedule", + "--workload-metadata=GKE_METADATA", + "--sandbox", "type=gvisor", + ] + _run(cmd, timeout=600) + logging.info("Sandbox node pool %s created.", pool_name) + + +def _create_artifact_registry(cfg): + """Create Artifact Registry repositories.""" + project = cfg["project"] + region = cfg["region"] + + for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]): + logging.info("Ensuring AR repo %s exists...", repo_name) + _run_quiet([ + "gcloud", "artifacts", "repositories", "create", repo_name, + "--repository-format=docker", + f"--location={region}", + f"--project={project}", + ]) + + +def _create_cloud_build_sa(cfg): + """Create Cloud Build service account and bind IAM roles.""" + project = cfg["project"] + sa_email = cfg["cloud_build_sa_email"] + sa_name = cfg["cloud_build_sa"] + + # Create SA if not exists + if not _resource_exists(["gcloud", "iam", "service-accounts", "describe", + sa_email, f"--project={project}"]): + logging.info("Creating Cloud Build SA %s...", sa_email) + _run(["gcloud", "iam", "service-accounts", "create", sa_name, + f"--display-name={sa_name}", f"--project={project}"]) + # Wait for propagation + time.sleep(10) + + roles = [ + "roles/logging.logWriter", + "roles/storage.objectViewer", + "roles/artifactregistry.writer", + "roles/serviceusage.serviceUsageConsumer", + ] + for role in roles: + _run_quiet([ + "gcloud", "projects", "add-iam-policy-binding", project, + f"--member=serviceAccount:{sa_email}", + f"--role={role}", + "--condition=None", "--quiet", + ]) + logging.info("Cloud Build SA ready.") + + +# --------------------------------------------------------------------------- +# Teardown steps +# --------------------------------------------------------------------------- + + +def _teardown_workloads(cfg): + """Delete K8s workloads, CRDs, and namespace.""" + ns = cfg["namespace"] + version = cfg["sandbox_version"] + + logging.info("Deleting namespace %s...", ns) + _run_quiet(["kubectl", "delete", "namespace", ns, + "--ignore-not-found=true", "--timeout=120s"]) + + logging.info("Removing Agent Sandbox CRDs...") + _run_quiet(["kubectl", "delete", "-f", + f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/extensions.yaml", + "--ignore-not-found=true"]) + _run_quiet(["kubectl", "delete", "-f", + f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/manifest.yaml", + "--ignore-not-found=true"]) + + logging.info("Removing cluster-scoped RBAC...") + _run_quiet(["kubectl", "delete", "clusterrolebinding", + "adk-agent-sandbox-binding", "--ignore-not-found=true"]) + _run_quiet(["kubectl", "delete", "clusterrole", + "adk-agent-sandbox-role", "--ignore-not-found=true"]) + + +def _teardown_images(cfg): + """Delete Artifact Registry repositories.""" + project = cfg["project"] + region = cfg["region"] + + for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]): + logging.info("Deleting AR repo %s...", repo_name) + _run_quiet(["gcloud", "artifacts", "repositories", "delete", repo_name, + f"--location={region}", f"--project={project}", "--quiet"]) + + +def _teardown_cloud_build_sa(cfg): + """Delete Cloud Build service account and IAM bindings.""" + project = cfg["project"] + sa_email = cfg["cloud_build_sa_email"] + + roles = [ + "roles/logging.logWriter", + "roles/storage.objectViewer", + "roles/artifactregistry.writer", + "roles/serviceusage.serviceUsageConsumer", + ] + for role in roles: + _run_quiet([ + "gcloud", "projects", "remove-iam-policy-binding", project, + f"--member=serviceAccount:{sa_email}", + f"--role={role}", "--quiet", + ]) + + _run_quiet(["gcloud", "iam", "service-accounts", "delete", sa_email, + f"--project={project}", "--quiet"]) + logging.info("Cloud Build SA deleted.") + + +def _teardown_cluster(cfg): + """Delete GKE node pools and cluster.""" + project = cfg["project"] + zone = cfg["zone"] + cluster = cfg["cluster_name"] + pool_name = cfg["sandbox_pool_name"] + + logging.info("Deleting sandbox node pool %s...", pool_name) + _run_quiet(["gcloud", "container", "node-pools", "delete", pool_name, + f"--cluster={cluster}", f"--zone={zone}", + f"--project={project}", "--quiet"]) + + logging.info("Deleting GKE cluster %s...", cluster) + _run_quiet(["gcloud", "container", "clusters", "delete", cluster, + f"--zone={zone}", f"--project={project}", "--quiet"]) + + +def _teardown_network(cfg): + """Delete network resources in reverse dependency order.""" + project = cfg["project"] + region = cfg["region"] + vpc = cfg["vpc_name"] + router = cfg["router_name"] + nat = cfg["nat_name"] + subnet = cfg["subnet_name"] + + logging.info("Deleting Cloud NAT and Router...") + _run_quiet(["gcloud", "compute", "routers", "nats", "delete", nat, + f"--router={router}", f"--region={region}", + f"--project={project}", "--quiet"]) + _run_quiet(["gcloud", "compute", "routers", "delete", router, + f"--region={region}", f"--project={project}", "--quiet"]) + + logging.info("Deleting firewall rules...") + for suffix in ("allow-iap-ssh", "allow-internal"): + _run_quiet(["gcloud", "compute", "firewall-rules", "delete", + f"{vpc}-{suffix}", f"--project={project}", "--quiet"]) + + logging.info("Deleting subnet and VPC...") + _run_quiet(["gcloud", "compute", "networks", "subnets", "delete", subnet, + f"--region={region}", f"--project={project}", "--quiet"]) + _run_quiet(["gcloud", "compute", "networks", "delete", vpc, + f"--project={project}", "--quiet"]) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +flags.DEFINE_enum( + "gke_provision_mode", + "custom", + ["custom", "native"], + "Provisioning mode: 'custom' uses direct gcloud calls (Phase 1 logic), " + "'native' uses PKB's container_cluster with prerequisite_setup.py.", +) + +def Provision(): + """Provision GKE infrastructure. + + Mode is controlled by --gke_provision_mode: + - custom: Direct gcloud calls (full control, no PKB cluster management) + - native: PKB manages cluster via container_cluster spec. + Requires prerequisite_setup.py to have been run first. + """ + mode = FLAGS.gke_provision_mode + if mode == "native": + logging.info( + "Provision mode=native: PKB manages cluster via container_cluster. " + "Ensure prerequisite_setup.py was run first (VPC, NAT, AR, images)." + ) + return # PKB handles cluster creation via container_cluster spec + + logging.info("Provision mode=custom: using direct gcloud calls.") + cfg = _derive_config() + + logging.info("=== Provision: project=%s cluster=%s machine=%s ===", + cfg["project"], cfg["cluster_name"], cfg["machine_type"]) + + _enable_apis(cfg) + _create_network(cfg) + _create_cluster(cfg) + _get_credentials(cfg) + _create_sandbox_node_pool(cfg) + _create_artifact_registry(cfg) + _create_cloud_build_sa(cfg) + + # --- Phase 3: Build container images --- + if not FLAGS.gke_skip_image_build: + from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils + gke_image_build_utils.BuildImages() + else: + logging.info("Skipping image builds (--gke_skip_image_build=true)") + + logging.info("=== Provision complete: %s ===", cfg["cluster_name"]) + + +def Teardown(): + """Teardown GKE infrastructure. + + Mode is controlled by --gke_provision_mode: + - custom: Direct gcloud calls to delete all resources. + - native: PKB manages cluster deletion. Run prerequisite_setup.py --teardown + separately to clean up VPC/NAT/AR. + """ + mode = FLAGS.gke_provision_mode + if mode == "native": + logging.info( + "Teardown mode=native: PKB manages cluster deletion. " + "Run prerequisite_setup.py --teardown to clean up VPC/NAT/AR." + ) + return # PKB handles cluster deletion + + logging.info("Teardown mode=custom: using direct gcloud calls.") + cfg = _derive_config() + + logging.info("=== Teardown: project=%s cluster=%s ===", + cfg["project"], cfg["cluster_name"]) + logging.info(" keep_images=%s keep_infra=%s", + FLAGS.gke_teardown_keep_images, + FLAGS.gke_teardown_keep_infra) + + # Always delete workloads + _teardown_workloads(cfg) + + # Conditionally delete images + if not FLAGS.gke_teardown_keep_images: + _teardown_images(cfg) + + # Conditionally delete infrastructure + if not FLAGS.gke_teardown_keep_infra: + _teardown_cloud_build_sa(cfg) + _teardown_cluster(cfg) + _teardown_network(cfg) + + logging.info("=== Teardown complete ===") diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py new file mode 100644 index 0000000000..157bd2559e --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py @@ -0,0 +1,362 @@ +"""PKB Benchmark: GKE Agent Python Sandbox Density (Use Case B). + +Atomic single-point measurement of Python sandbox density on a +pre-provisioned GKE cluster with gVisor isolation. Measures Code Execution +Latency (CEL), Time To First Execution (TTFE), RSS memory growth, and +per-type latency breakdown (compute, syscall, import) at a given +concurrent session count. + +This benchmark is designed to be invoked repeatedly by an external sweep +controller that varies the density parameter across iterations to find +the saturation point. + +Usage: + python pkb.py --benchmarks=gke_python_density \\ + --gke_python_density=16 \\ + --gke_python_density_sample_count=20 \\ + --gke_python_density_sample_warmup=0 \\ + --gke_namespace=agentic \\ + --gke_api_url=http://localhost:8080 + +Samples emitted (per run): + - gke_python_density_orchestrator_cel_mean (ms) + - gke_python_density_orchestrator_cel_p50 (ms) + - gke_python_density_orchestrator_cel_p95 (ms) + - gke_python_density_orchestrator_cel_p99 (ms) + - gke_python_density_orchestrator_cel_min (ms) + - gke_python_density_orchestrator_cel_max (ms) + - gke_python_density_sandbox_total_cel_mean (ms) + - gke_python_density_sandbox_total_cel_p50 (ms) + - gke_python_density_sandbox_total_cel_p95 (ms) + - gke_python_density_sandbox_total_cel_p99 (ms) + - gke_python_density_sandbox_total_cel_min (ms) + - gke_python_density_sandbox_total_cel_max (ms) + - gke_python_density_sandbox_ttfe (ms) + - gke_python_density_sandbox_rss_start (MB) + - gke_python_density_sandbox_rss_end (MB) + - gke_python_density_sandbox_rss_growth (MB) + - gke_python_density_sandbox_compute_cel_mean (ms) + - gke_python_density_sandbox_syscall_cel_mean (ms) + - gke_python_density_sandbox_import_cel_mean (ms) + - gke_python_density_wall_time (seconds) +""" + +import logging +import time + +from absl import flags +from perfkitbenchmarker import configs +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_benchmark_utils as utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_deploy_utils as deploy_utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = "gke_python_density" +BENCHMARK_CONFIG = """ +gke_python_density: + description: > + Atomic single-point Python sandbox density measurement on a + pre-provisioned GKE cluster with gVisor isolation. +""" + +_WARMPOOL_NAME = "python-sandbox-warmpool" +_WARMPOOL_LABEL = "sandbox=python-sandbox-example" + +# --------------------------------------------------------------------------- +# Benchmark-specific flags +# --------------------------------------------------------------------------- + +flags.DEFINE_integer( + "gke_python_density", + 1, + "Number of concurrent sandbox sessions to run.", +) + +flags.DEFINE_integer( + "gke_python_density_sample_count", + 20, + "Number of sample iterations per sandbox session.", +) + +flags.DEFINE_integer( + "gke_python_density_sample_warmup", + 0, + "Number of warmup iterations per session (excluded from stats).", +) + +flags.DEFINE_bool( + "gke_python_density_patch_warmpool", + True, + "Patch SandboxWarmPool replicas to match density before measurement.", +) + +flags.DEFINE_integer( + "gke_python_density_exec_timeout", + 600, + "Timeout in seconds for the API call.", +) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def Provision(benchmark_spec): + """Provision GKE cluster and all dependencies.""" + gke_provision_utils.Provision() + + +def GetConfig(user_config): + """Load and return benchmark config. + + No vm_groups — PKB skips Provision() and Teardown(). + """ + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """Deploy workloads and verify agent API.""" + logging.info("=== Prepare: deploying workloads ===") + deploy_utils.DeployWorkloads() + utils.CheckAgentHealthz(required=False) + utils.EnsurePortForward() + logging.info("Prepare complete.") + + +def Run(benchmark_spec): + """Execute a single density measurement and return samples. + + Returns: + List of sample.Sample objects. + """ + ns = FLAGS.gke_namespace + density = FLAGS.gke_python_density + + logging.info("=== Run: density=%d ===", density) + + # Ensure port-forward is active (needed when sweeps skip Prepare) + utils.EnsurePortForward() + + # Patch warm pool to match density (moved from Prepare for sweep compatibility) + if FLAGS.gke_python_density_patch_warmpool: + utils.PatchWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + replicas=density, + label=_WARMPOOL_LABEL, + ) + + # POST to agent API + payload = { + "sample_count": FLAGS.gke_python_density_sample_count, + "sample_warmup": FLAGS.gke_python_density_sample_warmup, + "concurrent_sessions": density, + "sandbox_exec_timeout_s": FLAGS.gke_python_density_exec_timeout, + } + + t0 = time.time() + result = utils.CallAgentApi("/benchmark/python/density", payload) + wall_time = time.time() - t0 + + successful = result.get("successful_sessions", 0) + failed = result.get("failed_sessions", 0) + agg = result.get("aggregate", {}) + + logging.info( + "API response: %d successful, %d failed sessions (%.1fs)", + successful, + failed, + wall_time, + ) + + # Build samples + extra = { + "density": density, + "successful_sessions": successful, + "failed_sessions": failed, + "sample_count": FLAGS.gke_python_density_sample_count, + "sample_warmup": FLAGS.gke_python_density_sample_warmup, + "wall_time_s": round(wall_time, 2), + } + + samples = [] + + # Orchestrator-side CEL + _emit( + samples, + agg, + "orchestrator_cel_mean_ms", + "orchestrator_cel_mean", + "ms", + ns, + extra, + ) + _emit( + samples, agg, "orchestrator_cel_p50_ms", "orchestrator_cel_p50", "ms", ns, extra + ) + _emit( + samples, agg, "orchestrator_cel_p95_ms", "orchestrator_cel_p95", "ms", ns, extra + ) + _emit( + samples, agg, "orchestrator_cel_p99_ms", "orchestrator_cel_p99", "ms", ns, extra + ) + _emit( + samples, agg, "orchestrator_cel_min_ms", "orchestrator_cel_min", "ms", ns, extra + ) + _emit( + samples, agg, "orchestrator_cel_max_ms", "orchestrator_cel_max", "ms", ns, extra + ) + + # Sandbox-side total CEL + _emit( + samples, + agg, + "sandbox_total_cel_mean_ms", + "sandbox_total_cel_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_total_cel_p50_ms", + "sandbox_total_cel_p50", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_total_cel_p95_ms", + "sandbox_total_cel_p95", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_total_cel_p99_ms", + "sandbox_total_cel_p99", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_total_cel_min_ms", + "sandbox_total_cel_min", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_total_cel_max_ms", + "sandbox_total_cel_max", + "ms", + ns, + extra, + ) + + # TTFE + _emit(samples, agg, "sandbox_ttfe_ms", "sandbox_ttfe", "ms", ns, extra) + + # RSS + _emit(samples, agg, "sandbox_rss_start_mb", "sandbox_rss_start", "MB", ns, extra) + _emit(samples, agg, "sandbox_rss_end_mb", "sandbox_rss_end", "MB", ns, extra) + _emit(samples, agg, "sandbox_rss_growth_mb", "sandbox_rss_growth", "MB", ns, extra) + + # Per-type CEL breakdown + _emit( + samples, + agg, + "sandbox_compute_cel_mean_ms", + "sandbox_compute_cel_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_syscall_cel_mean_ms", + "sandbox_syscall_cel_mean", + "ms", + ns, + extra, + ) + _emit( + samples, + agg, + "sandbox_import_cel_mean_ms", + "sandbox_import_cel_mean", + "ms", + ns, + extra, + ) + + # Wall time + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + + logging.info("Emitted %d samples for density=%d.", len(samples), density) + return samples + + +def Cleanup(benchmark_spec): + """Clean up after measurement. Scale warm pool to 0.""" + ns = FLAGS.gke_namespace + logging.info("Cleanup: draining warm pool.") + + if FLAGS.gke_python_density_patch_warmpool: + utils.DrainWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + label=_WARMPOOL_LABEL, + ) + + utils.StopPortForward() + logging.info("Cleanup complete (cluster persists).") + + +def Teardown(benchmark_spec): + """Teardown GKE cluster and all dependencies.""" + gke_provision_utils.Teardown() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra): + """Emit a sample if the key exists in the aggregate dict.""" + value = agg.get(agg_key) + if value is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_{metric_suffix}", + value, + unit, + namespace, + extra, + ) + ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py new file mode 100644 index 0000000000..f638494508 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py @@ -0,0 +1,802 @@ +"""PKB Benchmark: GKE Agent QPS Saturation (Use Case F). + +Atomic single-point measurement of scheduling throughput on a pre-provisioned +GKE cluster. Fires sandbox claim requests at a controlled QPS rate for a +fixed duration and measures per-request TTFE (Time To First Execution). + +Supports two operating modes: + - **agent**: POST to the orchestrator /benchmark/python/qps endpoint + - **raw_claim**: Bypass the agent, create SandboxClaims directly via kubectl + +This benchmark is designed to be invoked repeatedly by an external sweep +controller that varies the target_qps parameter across iterations to find +the QPS saturation point. + +Usage: + # Agent mode + python pkb.py --benchmarks=gke_qps \\ + --gke_qps_target_qps=5.0 \\ + --gke_qps_pool_size=70 \\ + --gke_qps_step_duration_s=30.0 \\ + --gke_qps_mode=agent \\ + --gke_namespace=agentic \\ + --gke_api_url=http://localhost:8080 + + # Raw claim mode + python pkb.py --benchmarks=gke_qps \\ + --gke_qps_target_qps=5.0 \\ + --gke_qps_pool_size=70 \\ + --gke_qps_step_duration_s=30.0 \\ + --gke_qps_mode=raw_claim \\ + --gke_qps_claim_timeout_s=60.0 \\ + --gke_namespace=agentic + +Samples emitted (per run): + - gke_qps_ttfe_mean (ms) + - gke_qps_ttfe_p50 (ms) + - gke_qps_ttfe_p95 (ms) + - gke_qps_ttfe_p99 (ms) + - gke_qps_ttfe_min (ms) + - gke_qps_ttfe_max (ms) + - gke_qps_claim_mean (ms) + - gke_qps_claim_p95 (ms) + - gke_qps_actual_qps (requests/sec) + - gke_qps_duration (seconds) + - gke_qps_total_requests (count) + - gke_qps_successful_requests (count) + - gke_qps_failed_requests (count) + - gke_qps_pool_before (count) + - gke_qps_pool_after (count) + - gke_qps_wall_time (seconds) +""" + +import json +import logging +import subprocess +import threading +import time +import uuid + +from absl import flags +from perfkitbenchmarker import configs +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_benchmark_utils as utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_deploy_utils as deploy_utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = "gke_qps" +BENCHMARK_CONFIG = """ +gke_qps: + description: > + Atomic single-point QPS saturation measurement on a + pre-provisioned GKE cluster with gVisor isolation. +""" + +_WARMPOOL_NAME = "python-sandbox-warmpool" +_WARMPOOL_LABEL = "sandbox=python-sandbox-example" +_SANDBOX_TEMPLATE = "python-sandbox-template" +_QPS_CLAIM_LABEL = "created-by=pkb-qps-benchmark" + +# --------------------------------------------------------------------------- +# Benchmark-specific flags +# --------------------------------------------------------------------------- + +flags.DEFINE_float( + "gke_qps_target_qps", + 5.0, + "Target requests per second (sandbox claims per second).", +) + +flags.DEFINE_integer( + "gke_qps_pool_size", + 70, + "Warm pool size maintained during the measurement.", +) + +flags.DEFINE_float( + "gke_qps_step_duration_s", + 30.0, + "Duration of the QPS burst in seconds.", +) + +flags.DEFINE_integer( + "gke_qps_sandbox_exec_timeout_s", + 30, + "Sandbox command execution timeout in seconds.", +) + +flags.DEFINE_float( + "gke_qps_provision_timeout_s", + 180.0, + "Max seconds to wait for pool pods to reach Running.", +) + +flags.DEFINE_string( + "gke_qps_mode", + "agent", + "Operating mode: 'agent' (POST to orchestrator API) or " + "'raw_claim' (create SandboxClaims directly via kubectl).", +) + +flags.DEFINE_float( + "gke_qps_claim_timeout_s", + 60.0, + "Max seconds to wait for a raw claim to bind " "(only used with mode=raw_claim).", +) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def Provision(benchmark_spec): + """Provision GKE cluster and all dependencies.""" + gke_provision_utils.Provision() + + +def GetConfig(user_config): + """Load and return benchmark config. + + No vm_groups — PKB skips Provision() and Teardown(). + """ + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """Deploy workloads and verify agent API.""" + logging.info("=== Prepare: deploying workloads ===") + deploy_utils.DeployWorkloads() + + mode = FLAGS.gke_qps_mode + if mode == "agent": + utils.CheckAgentHealthz(required=False) + utils.EnsurePortForward() + logging.info("Prepare complete.") + + +def Run(benchmark_spec): + """Execute a single QPS measurement and return samples. + + Returns: + List of sample.Sample objects. + """ + ns = FLAGS.gke_namespace + pool_size = FLAGS.gke_qps_pool_size + + # Scale warm pool (moved from Prepare for sweep compatibility) + utils.PatchWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + replicas=pool_size, + label=_WARMPOOL_LABEL, + wait_timeout=int(FLAGS.gke_qps_provision_timeout_s), + ) + + mode = FLAGS.gke_qps_mode + + if mode == "raw_claim": + return _RunRawClaim(benchmark_spec) + else: + return _RunAgent(benchmark_spec) + + +def Cleanup(benchmark_spec): + """Delete benchmark claims and drain warm pool.""" + ns = FLAGS.gke_namespace + logging.info("Cleanup: deleting benchmark claims and draining warm pool.") + + # Delete any lingering benchmark claims + _DeleteBenchmarkClaims(ns) + + # Drain warm pool + utils.DrainWarmPool( + namespace=ns, + warmpool_name=_WARMPOOL_NAME, + label=_WARMPOOL_LABEL, + ) + + utils.StopPortForward() + logging.info("Cleanup complete.") + + +def Teardown(benchmark_spec): + """Teardown GKE cluster and all dependencies.""" + gke_provision_utils.Teardown() + + +# --------------------------------------------------------------------------- +# Agent mode +# --------------------------------------------------------------------------- + + +def _RunAgent(benchmark_spec): + """Fire QPS burst via the orchestrator API.""" + ns = FLAGS.gke_namespace + target_qps = FLAGS.gke_qps_target_qps + pool_size = FLAGS.gke_qps_pool_size + step_duration = FLAGS.gke_qps_step_duration_s + + logging.info( + "=== Run (agent): target_qps=%s, pool_size=%d, duration=%ss ===", + target_qps, + pool_size, + step_duration, + ) + + # Ensure port-forward is active (needed when sweeps skip Prepare) + utils.EnsurePortForward() + + # Record pool state before burst + pool_before = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running") + + # POST to agent API + payload = { + "target_qps": target_qps, + "duration_s": step_duration, + "sandbox_exec_timeout_s": FLAGS.gke_qps_sandbox_exec_timeout_s, + } + + t0 = time.time() + api_timeout = int(step_duration + 300) + result = utils.CallAgentApi("/benchmark/python/qps", payload, timeout=api_timeout) + wall_time = time.time() - t0 + + # Record pool state after burst + pool_after = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running") + + # Extract response fields + aggregate = result.get("aggregate", {}) + successful = result.get("successful_requests", 0) + failed = result.get("failed_requests", 0) + total = result.get("total_requests", 0) + actual_qps = result.get("actual_qps", 0) + duration_s = result.get("duration_s", 0) + + logging.info( + "API response: actual_qps=%s, %d/%d requests ok (%.1fs)", + actual_qps, + successful, + total, + wall_time, + ) + + # Build samples + extra = { + "target_qps": target_qps, + "pool_size": pool_size, + "step_duration_s": step_duration, + "mode": "agent", + "actual_qps": actual_qps, + "total_requests": total, + "successful_requests": successful, + "failed_requests": failed, + "pool_before": pool_before, + "pool_after": pool_after, + "wall_time_s": round(wall_time, 2), + } + + samples = [] + + # TTFE latency stats + _emit(samples, aggregate, "ttfe_mean_ms", "ttfe_mean", "ms", ns, extra) + _emit(samples, aggregate, "ttfe_p50_ms", "ttfe_p50", "ms", ns, extra) + _emit(samples, aggregate, "ttfe_p95_ms", "ttfe_p95", "ms", ns, extra) + _emit(samples, aggregate, "ttfe_p99_ms", "ttfe_p99", "ms", ns, extra) + _emit(samples, aggregate, "ttfe_min_ms", "ttfe_min", "ms", ns, extra) + _emit(samples, aggregate, "ttfe_max_ms", "ttfe_max", "ms", ns, extra) + + # Claim latency stats + _emit(samples, aggregate, "claim_mean_ms", "claim_mean", "ms", ns, extra) + _emit(samples, aggregate, "claim_p95_ms", "claim_p95", "ms", ns, extra) + + # Throughput and counts + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_actual_qps", + actual_qps, + "requests/sec", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_duration", + duration_s, + "seconds", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_total_requests", + float(total), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_successful_requests", + float(successful), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_failed_requests", + float(failed), + "count", + ns, + extra, + ) + ) + + # Pool state + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_pool_before", + float(pool_before), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_pool_after", + float(pool_after), + "count", + ns, + extra, + ) + ) + + # Wall time + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + + logging.info("Emitted %d samples for target_qps=%s.", len(samples), target_qps) + return samples + + +# --------------------------------------------------------------------------- +# Raw claim mode +# --------------------------------------------------------------------------- + + +def _RunRawClaim(benchmark_spec): + """Fire SandboxClaims directly at target_qps (no agent).""" + ns = FLAGS.gke_namespace + target_qps = FLAGS.gke_qps_target_qps + pool_size = FLAGS.gke_qps_pool_size + step_duration = FLAGS.gke_qps_step_duration_s + claim_timeout = FLAGS.gke_qps_claim_timeout_s + + logging.info( + "=== Run (raw_claim): target_qps=%s, pool_size=%d, duration=%ss ===", + target_qps, + pool_size, + step_duration, + ) + + # Record pool state before burst + pool_before = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running") + + # Calculate total claims to fire + total_claims = max(1, int(target_qps * step_duration)) + interval = 1.0 / target_qps if target_qps > 0 else 1.0 + + logging.info( + "Firing %d raw SandboxClaims at %s req/s", + total_claims, + target_qps, + ) + + # Fire claims at target QPS in parallel threads + claim_results = [] + lock = threading.Lock() + + def _fire_and_wait(idx, fire_time): + claim_name = f"pkb-qps-0-{idx}-{uuid.uuid4().hex[:6]}" + result = {"request_id": idx, "fire_time_s": round(fire_time, 3)} + try: + t_create = _CreateClaim(ns, _SANDBOX_TEMPLATE, claim_name) + result["create_ts"] = t_create + t_bound = _WaitClaimBound(ns, claim_name, claim_timeout) + if t_bound is not None: + ttfe_ms = (t_bound - t_create) * 1000.0 + result["ttfe_ms"] = round(ttfe_ms, 3) + result["claim_ms"] = round(ttfe_ms, 3) + result["error"] = None + else: + result["ttfe_ms"] = None + result["error"] = "Timeout waiting for claim to bind" + except Exception as e: + result["ttfe_ms"] = None + result["error"] = f"{type(e).__name__}: {e}" + with lock: + claim_results.append(result) + + t0 = time.time() + threads = [] + for i in range(total_claims): + fire_time = time.time() - t0 + t = threading.Thread(target=_fire_and_wait, args=(i, fire_time), daemon=True) + threads.append(t) + t.start() + if i < total_claims - 1: + next_fire = t0 + (i + 1) * interval + sleep_time = next_fire - time.time() + if sleep_time > 0: + time.sleep(sleep_time) + + for t in threads: + t.join(timeout=claim_timeout + 30) + + wall_time = time.time() - t0 + actual_qps = round(total_claims / wall_time, 2) if wall_time > 0 else 0 + + # Record pool state after burst + pool_after = utils.CountPods(ns, _WARMPOOL_LABEL, phase="Running") + + # Aggregate results + successful = [r for r in claim_results if r.get("ttfe_ms") is not None] + failed = [r for r in claim_results if r.get("error")] + ttfe_values = sorted(r["ttfe_ms"] for r in successful) + + logging.info( + "Raw claim burst complete: %d/%d ok, actual_qps=%s (%.1fs)", + len(successful), + total_claims, + actual_qps, + wall_time, + ) + + # Build samples + extra = { + "target_qps": target_qps, + "pool_size": pool_size, + "step_duration_s": step_duration, + "mode": "raw_claim", + "actual_qps": actual_qps, + "total_requests": total_claims, + "successful_requests": len(successful), + "failed_requests": len(failed), + "pool_before": pool_before, + "pool_after": pool_after, + "wall_time_s": round(wall_time, 2), + } + + samples = [] + + # TTFE latency stats (computed from raw claim results) + if ttfe_values: + n = len(ttfe_values) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ttfe_mean", + round(sum(ttfe_values) / n, 3), + "ms", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ttfe_p50", + round(_percentile(ttfe_values, 50), 3), + "ms", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ttfe_p95", + round(_percentile(ttfe_values, 95), 3), + "ms", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ttfe_p99", + round(_percentile(ttfe_values, 99), 3), + "ms", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ttfe_min", + round(ttfe_values[0], 3), + "ms", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_ttfe_max", + round(ttfe_values[-1], 3), + "ms", + ns, + extra, + ) + ) + + # Claim latency (same as TTFE in raw_claim mode) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_claim_mean", + round(sum(ttfe_values) / n, 3), + "ms", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_claim_p95", + round(_percentile(ttfe_values, 95), 3), + "ms", + ns, + extra, + ) + ) + + # Throughput and counts + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_actual_qps", + actual_qps, + "requests/sec", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_duration", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_total_requests", + float(total_claims), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_successful_requests", + float(len(successful)), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_failed_requests", + float(len(failed)), + "count", + ns, + extra, + ) + ) + + # Pool state + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_pool_before", + float(pool_before), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_pool_after", + float(pool_after), + "count", + ns, + extra, + ) + ) + + # Wall time + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + + # Cleanup benchmark claims + _DeleteBenchmarkClaims(ns) + + logging.info("Emitted %d samples for target_qps=%s.", len(samples), target_qps) + return samples + + +# --------------------------------------------------------------------------- +# Raw claim helpers +# --------------------------------------------------------------------------- + + +def _CreateClaim(namespace, template, claim_name): + """Create a single SandboxClaim via kubectl and return creation timestamp.""" + manifest = json.dumps( + { + "apiVersion": "extensions.agents.x-k8s.io/v1alpha1", + "kind": "SandboxClaim", + "metadata": { + "name": claim_name, + "namespace": namespace, + "labels": {"created-by": "pkb-qps-benchmark"}, + }, + "spec": { + "sandboxTemplateName": template, + }, + } + ) + proc = subprocess.run( + ["kubectl", "apply", "-n", namespace, "-f", "-"], + input=manifest, + capture_output=True, + text=True, + timeout=30, + ) + t_create = time.time() + if proc.returncode != 0: + raise RuntimeError( + f"Failed to create claim {claim_name}: {proc.stderr.strip()}" + ) + return t_create + + +def _WaitClaimBound(namespace, claim_name, timeout_s): + """Wait for a SandboxClaim to reach Bound phase. Returns timestamp or None.""" + deadline = time.time() + timeout_s + while time.time() < deadline: + stdout, _, rc = utils.RunKubectl( + [ + "get", + "sandboxclaim", + claim_name, + "-n", + namespace, + "-o", + "jsonpath={.status.phase}", + ], + timeout=10, + raise_on_failure=False, + ) + if rc == 0 and stdout.lower() in ("bound", "ready"): + return time.time() + time.sleep(0.1) + return None + + +def _DeleteBenchmarkClaims(namespace): + """Delete SandboxClaims labelled created-by=pkb-qps-benchmark.""" + stdout, _, rc = utils.RunKubectl( + [ + "get", + "sandboxclaim", + "-l", + _QPS_CLAIM_LABEL, + "-n", + namespace, + "-o", + "jsonpath={.items[*].metadata.name}", + ], + timeout=30, + raise_on_failure=False, + ) + names = stdout.split() if stdout else [] + if not names or names == [""]: + return 0 + + count = len(names) + logging.info("Deleting %d pkb-qps SandboxClaim(s)", count) + utils.RunKubectl( + [ + "delete", + "sandboxclaim", + "-l", + _QPS_CLAIM_LABEL, + "-n", + namespace, + "--wait=false", + ], + timeout=60, + raise_on_failure=False, + ) + + # Wait for claims to be fully removed + t0 = time.time() + while time.time() - t0 < 120: + stdout, _, _ = utils.RunKubectl( + [ + "get", + "sandboxclaim", + "-l", + _QPS_CLAIM_LABEL, + "-n", + namespace, + "--no-headers", + "--ignore-not-found", + ], + timeout=10, + raise_on_failure=False, + ) + remaining = len([l for l in stdout.splitlines() if l]) if stdout else 0 + if remaining == 0: + break + time.sleep(2) + + logging.info("Claims cleaned up in %.1fs", time.time() - t0) + return count + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _percentile(sorted_values, pct): + """Calculate percentile (0-100) with linear interpolation.""" + if not sorted_values: + return 0.0 + idx = (pct / 100) * (len(sorted_values) - 1) + lo = int(idx) + hi = min(lo + 1, len(sorted_values) - 1) + frac = idx - lo + return sorted_values[lo] * (1 - frac) + sorted_values[hi] * frac + + +def _emit(samples, data, data_key, metric_suffix, unit, namespace, extra): + """Emit a sample if the key exists in the data dict.""" + value = data.get(data_key) + if value is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_{metric_suffix}", + value, + unit, + namespace, + extra, + ) + ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py new file mode 100644 index 0000000000..4cfba5d5d0 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py @@ -0,0 +1,1022 @@ +"""PKB Benchmark: GKE Agent Pod Snapshot Saturation (Use Case A). + +Atomic single-point measurement of GKE Pod Snapshot create/restore latency +on a pre-provisioned GKE cluster with gVisor isolation. Measures snapshot +time, restore time, TTFE (Time To First Execution), and restore correctness +at a given preload_mb and burst_size. + +This benchmark is designed to be invoked repeatedly by an external sweep +controller that varies the preload_mb parameter across iterations to find +the saturation point. + +Usage: + python pkb.py --benchmarks=gke_snapshot \\ + --gke_snapshot_preload_mb=50 \\ + --gke_snapshot_burst_size=3 \\ + --gke_namespace=agentic \\ + --gke_snapshot_skip_snapshot=false + +Samples emitted (per run): + - gke_snapshot_snapshot_p50 (seconds) + - gke_snapshot_snapshot_p95 (seconds) + - gke_snapshot_snapshot_max (seconds) + - gke_snapshot_restore_p50 (seconds) + - gke_snapshot_restore_p95 (seconds) + - gke_snapshot_restore_max (seconds) + - gke_snapshot_ttfe_p50 (seconds) + - gke_snapshot_ttfe_p95 (seconds) + - gke_snapshot_ttfe_max (seconds) + - gke_snapshot_startup_time (seconds) + - gke_snapshot_restore_correct_count (count) + - gke_snapshot_wall_time (seconds) +""" + +import json +import logging +import os +import re +import subprocess +import time +from concurrent.futures import ThreadPoolExecutor + +from absl import flags +from perfkitbenchmarker import configs +from perfkitbenchmarker import sample +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_benchmark_utils as utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_deploy_utils as deploy_utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = "gke_snapshot" +BENCHMARK_CONFIG = """ +gke_snapshot: + description: > + Atomic single-point Pod Snapshot saturation measurement on a + pre-provisioned GKE cluster with gVisor isolation. +""" + +# --------------------------------------------------------------------------- +# Benchmark-specific flags +# --------------------------------------------------------------------------- + +flags.DEFINE_integer( + "gke_snapshot_preload_mb", + 10, + "Megabytes of memory to pre-allocate in the sandbox before snapshot.", +) + +flags.DEFINE_integer( + "gke_snapshot_burst_size", + 1, + "Number of concurrent source/snapshot/restore pods per measurement.", +) + +flags.DEFINE_string( + "gke_snapshot_ksa_name", + "pod-snapshot-sa", + "Kubernetes service account for pod snapshots.", +) + +flags.DEFINE_integer( + "gke_snapshot_pod_timeout", + 180, + "Max seconds to wait for pod Running / preload.", +) + +flags.DEFINE_boolean( + "gke_snapshot_skip_snapshot", + False, + "Skip snapshot/restore phases — measure cold-start TTFE only.", +) + +flags.DEFINE_string( + "gke_snapshot_preload_mode", + "synthetic", + "Preload mode: 'synthetic' (os.urandom fill) or " + "'script:' to run a custom startup script.", +) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def Provision(benchmark_spec): + """Provision GKE cluster and all dependencies.""" + gke_provision_utils.Provision() + + +def GetConfig(user_config): + """Load and return benchmark config. + + No vm_groups — PKB skips Provision() and Teardown(). + """ + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """Deploy workloads, snapshot infra, and validate readiness.""" + ns = FLAGS.gke_namespace + preload_mb = FLAGS.gke_snapshot_preload_mb + + logging.info( + "=== Prepare: preload_mb=%d, burst_size=%d ===", + preload_mb, + FLAGS.gke_snapshot_burst_size, + ) + + # Deploy Agent Sandbox ecosystem (idempotent) + deploy_utils.DeployWorkloads() + + # Deploy Pod Snapshot infrastructure (idempotent) + deploy_utils.DeploySnapshots() + + # 1. Verify PodSnapshotStorageConfig exists (cluster-scoped). + _, _, retcode = utils.RunKubectl( + ["get", "podsnapshotstorageconfigs.podsnapshot.gke.io", "--no-headers"], + timeout=30, + raise_on_failure=False, + ) + if retcode != 0: + raise RuntimeError( + "PodSnapshotStorageConfig CRD not found. " + "Ensure pod snapshots are enabled on the cluster." + ) + logging.info("PodSnapshotStorageConfig verified.") + + # 2. Verify PodSnapshotPolicy exists in the namespace. + _, _, retcode = utils.RunKubectl( + ["get", "podsnapshotpolicies.podsnapshot.gke.io", "-n", ns, "--no-headers"], + timeout=30, + raise_on_failure=False, + ) + if retcode != 0: + logging.warning("PodSnapshotPolicy not found in namespace %s.", ns) + + # 3. Verify the service account exists. + ksa = FLAGS.gke_snapshot_ksa_name + _, _, retcode = utils.RunKubectl( + ["get", "serviceaccount", ksa, "-n", ns], + timeout=30, + raise_on_failure=False, + ) + if retcode != 0: + raise RuntimeError( + f"ServiceAccount {ksa} not found in namespace {ns}. " + "Run setup_snapshot_gke.sh or ensure DeploySnapshots() succeeded." + ) + logging.info("ServiceAccount %s verified.", ksa) + + # 4. Verify the template file exists. + template_path = _GetTemplatePath() + if not os.path.isfile(template_path): + raise RuntimeError(f"Snapshot template not found: {template_path}") + logging.info("Template file verified: %s", template_path) + + utils.EnsurePortForward() + logging.info("Prepare complete.") + + +def Run(benchmark_spec): + """Execute a single snapshot/restore measurement and return samples. + + Returns: + List of sample.Sample objects. + """ + ns = FLAGS.gke_namespace + preload_mb = FLAGS.gke_snapshot_preload_mb + burst_size = FLAGS.gke_snapshot_burst_size + skip_snapshot = FLAGS.gke_snapshot_skip_snapshot + preload_mode = FLAGS.gke_snapshot_preload_mode + ksa_name = FLAGS.gke_snapshot_ksa_name + pod_timeout = FLAGS.gke_snapshot_pod_timeout + + logging.info( + "=== Run: preload_mb=%d, burst_size=%d, skip_snapshot=%s ===", + preload_mb, + burst_size, + skip_snapshot, + ) + + template_path = _GetTemplatePath() + t0 = time.time() + + # Run the snapshot/restore cycle + step_result = _RunSnapshotCycle( + namespace=ns, + preload_mb=preload_mb, + burst_size=burst_size, + skip_snapshot=skip_snapshot, + preload_mode=preload_mode, + ksa_name=ksa_name, + pod_timeout=pod_timeout, + template_path=template_path, + ) + + wall_time = time.time() - t0 + + # Build samples + extra = { + "preload_mb": preload_mb, + "burst_size": burst_size, + "skip_snapshot": skip_snapshot, + "preload_mode": preload_mode, + "restore_correct_count": step_result.get("restore_correct_count", 0), + "wall_time_s": round(wall_time, 2), + } + + if step_result.get("error"): + extra["error"] = step_result["error"] + + samples = [] + + # Snapshot metrics + _emit(samples, step_result, "snapshot_p50_s", "snapshot_p50", "seconds", ns, extra) + _emit(samples, step_result, "snapshot_p95_s", "snapshot_p95", "seconds", ns, extra) + _emit(samples, step_result, "snapshot_max_s", "snapshot_max", "seconds", ns, extra) + + # Restore metrics + _emit(samples, step_result, "restore_p50_s", "restore_p50", "seconds", ns, extra) + _emit(samples, step_result, "restore_p95_s", "restore_p95", "seconds", ns, extra) + _emit(samples, step_result, "restore_max_s", "restore_max", "seconds", ns, extra) + + # TTFE metrics + _emit(samples, step_result, "ttfe_p50_s", "ttfe_p50", "seconds", ns, extra) + _emit(samples, step_result, "ttfe_p95_s", "ttfe_p95", "seconds", ns, extra) + _emit(samples, step_result, "ttfe_max_s", "ttfe_max", "seconds", ns, extra) + + # Startup time + _emit(samples, step_result, "startup_time_s", "startup_time", "seconds", ns, extra) + + # Restore correctness + correct = step_result.get("restore_correct_count") + if correct is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_restore_correct_count", + correct, + "count", + ns, + extra, + ) + ) + + # Wall time + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + round(wall_time, 2), + "seconds", + ns, + extra, + ) + ) + + logging.info("Emitted %d samples for preload_mb=%d.", len(samples), preload_mb) + return samples + + +def Cleanup(benchmark_spec): + """Clean up any leftover benchmark resources.""" + ns = FLAGS.gke_namespace + logging.info("Cleanup — deleting any leftover snapshot-benchmark resources.") + + for kind in ( + "sandboxclaim", + "sandboxtemplate", + "podsnapshotmanualtrigger", + "podsnapshots.podsnapshot.gke.io", + ): + utils.RunKubectl( + [ + "delete", + kind, + "-l", + "app=snapshot-benchmark-workload", + "-n", + ns, + "--ignore-not-found=true", + ], + timeout=60, + raise_on_failure=False, + ) + utils.StopPortForward() + logging.info("Cleanup complete.") + + +def Teardown(benchmark_spec): + """Teardown GKE cluster and all dependencies.""" + gke_provision_utils.Teardown() + + +# --------------------------------------------------------------------------- +# Core snapshot/restore logic +# --------------------------------------------------------------------------- + + +def _RunSnapshotCycle( + namespace, + preload_mb, + burst_size, + skip_snapshot, + preload_mode, + ksa_name, + pod_timeout, + template_path, +): + """Execute one full snapshot/restore cycle and return a result dict. + + Handles source creation, snapshot, restore, TTFE measurement, + correctness verification, and cleanup. + """ + step_template = f"snap-bench-{preload_mb}mb" + source_names = [f"snap-src-0-{i}" for i in range(burst_size)] + restore_names = [f"snap-restore-0-{i}" for i in range(burst_size)] + trigger_names = [f"snap-trigger-0-{i}" for i in range(burst_size)] + + result = { + "preload_mb": preload_mb, + "burst_size": burst_size, + "snapshot_p50_s": None, + "snapshot_p95_s": None, + "snapshot_max_s": None, + "restore_p50_s": None, + "restore_p95_s": None, + "restore_max_s": None, + "ttfe_p50_s": None, + "ttfe_p95_s": None, + "ttfe_max_s": None, + "startup_time_s": None, + "snapshot_counter": None, + "restore_correct_count": 0, + "burst_results": [], + "error": None, + } + + try: + # 1. Create step-specific SandboxTemplate + logging.info( + "Creating SandboxTemplate '%s' (PRELOAD_MB=%d, memory=%dMi)", + step_template, + preload_mb, + max(512, preload_mb + 256), + ) + if not _RenderAndApplyTemplate( + template_path, + step_template, + namespace, + ksa_name, + preload_mb, + preload_mode, + ): + raise RuntimeError("Failed to create SandboxTemplate") + + time.sleep(2) + + # 2. Create source claims and wait for Running + preload + logging.info("Creating %d source SandboxClaim(s)", burst_size) + t0_sources = time.time() + workers = min(burst_size, 50) + with ThreadPoolExecutor(max_workers=workers) as pool: + for sname in source_names: + pool.submit(_ApplyClaim, sname, namespace, step_template) + + logging.info("Waiting for %d source pod(s) Running + preload", burst_size) + with ThreadPoolExecutor(max_workers=workers) as pool: + source_futs = [ + pool.submit( + _MeasureSingleSource, + sname, + namespace, + t0_sources, + pod_timeout, + preload_mode, + ) + for sname in source_names + ] + source_results = [f.result() for f in source_futs] + + src_failed = [r for r in source_results if r.get("error")] + if src_failed: + fail_msgs = "; ".join(f"{r['pod']}: {r['error']}" for r in src_failed) + raise RuntimeError( + f"{len(src_failed)}/{burst_size} source pod(s) failed: {fail_msgs}" + ) + + startup_times = [ + r["startup_time_s"] + for r in source_results + if r["startup_time_s"] is not None + ] + result["startup_time_s"] = ( + round(_Percentile(startup_times, 50), 3) if startup_times else None + ) + + snapshot_counters = {r["pod"]: r["snapshot_counter"] for r in source_results} + min_counter = min( + (c for c in snapshot_counters.values() if c is not None), default=None + ) + result["snapshot_counter"] = min_counter + logging.info("%d source pod(s) ready. Min counter: %s", burst_size, min_counter) + + # --skip_snapshot: measure cold-start TTFE only + if skip_snapshot: + logging.info("skip_snapshot mode: measuring cold-start TTFE") + ttfe_times = [] + burst_results = [] + for i, sname in enumerate(source_names): + startup = source_results[i]["startup_time_s"] + counter = source_results[i]["snapshot_counter"] + preload_done = source_results[i].get("preload_complete_time_s") + ttfe_s = preload_done if preload_done else startup + ttfe_times.append(ttfe_s) + burst_results.append( + { + "pod": sname, + "source_pod": sname, + "startup_time_s": startup, + "snapshot_counter": None, + "snapshot_time_s": None, + "restore_time_s": None, + "ttfe_s": ttfe_s, + "restore_counter": counter, + "restore_correct": True, + "error": None, + } + ) + + result["burst_results"] = burst_results + result["restore_correct_count"] = burst_size + + if ttfe_times: + result["ttfe_p50_s"] = round(_Percentile(ttfe_times, 50), 3) + result["ttfe_p95_s"] = round(_Percentile(ttfe_times, 95), 3) + result["ttfe_max_s"] = round(max(ttfe_times), 3) + + # Skip to cleanup + return result + + # 3. Trigger snapshots concurrently + logging.info("Triggering %d snapshot(s)", burst_size) + t0_snap = time.time() + with ThreadPoolExecutor(max_workers=workers) as pool: + snap_futs = [ + pool.submit( + _TriggerAndWaitSnapshot, + tname, + sname, + namespace, + t0_snap, + ) + for tname, sname in zip(trigger_names, source_names) + ] + snap_results = [f.result() for f in snap_futs] + + snap_failed = [r for r in snap_results if r.get("error")] + snap_times = [ + r["snapshot_time_s"] + for r in snap_results + if r["snapshot_time_s"] is not None + ] + if snap_times: + result["snapshot_p50_s"] = round(_Percentile(snap_times, 50), 3) + result["snapshot_p95_s"] = round(_Percentile(snap_times, 95), 3) + result["snapshot_max_s"] = round(max(snap_times), 3) + + if snap_failed: + fail_msgs = "; ".join(f"{r['trigger']}: {r['error']}" for r in snap_failed) + raise RuntimeError( + f"{len(snap_failed)}/{burst_size} snapshot(s) failed: {fail_msgs}" + ) + + # 4. Create restore claims concurrently + logging.info("Creating %d restore SandboxClaim(s)", burst_size) + t0_burst = time.time() + with ThreadPoolExecutor(max_workers=workers) as pool: + create_futs = [ + pool.submit(_ApplyClaim, rname, namespace, step_template) + for rname in restore_names + ] + for f in create_futs: + f.result() + + # 5. Poll restore pods for Running + TTFE + logging.info("Measuring restore + TTFE across %d pod(s)", burst_size) + with ThreadPoolExecutor(max_workers=workers) as pool: + measure_futs = [ + pool.submit( + _MeasureSingleRestore, + rname, + namespace, + t0_burst, + min_counter, + pod_timeout, + ) + for rname in restore_names + ] + burst_results = [f.result() for f in measure_futs] + + # Merge source + snapshot info + for i in range(burst_size): + burst_results[i]["source_pod"] = source_names[i] + burst_results[i]["startup_time_s"] = source_results[i]["startup_time_s"] + burst_results[i]["snapshot_counter"] = source_results[i]["snapshot_counter"] + burst_results[i]["snapshot_time_s"] = snap_results[i]["snapshot_time_s"] + + result["burst_results"] = burst_results + + # 6. Aggregate + restore_times = [ + r["restore_time_s"] + for r in burst_results + if r["restore_time_s"] is not None + ] + ttfe_times = [r["ttfe_s"] for r in burst_results if r["ttfe_s"] is not None] + correct_count = sum(1 for r in burst_results if r["restore_correct"]) + + result["restore_correct_count"] = correct_count + + if restore_times: + result["restore_p50_s"] = round(_Percentile(restore_times, 50), 3) + result["restore_p95_s"] = round(_Percentile(restore_times, 95), 3) + result["restore_max_s"] = round(max(restore_times), 3) + + if ttfe_times: + result["ttfe_p50_s"] = round(_Percentile(ttfe_times, 50), 3) + result["ttfe_p95_s"] = round(_Percentile(ttfe_times, 95), 3) + result["ttfe_max_s"] = round(max(ttfe_times), 3) + + logging.info("Counter correct: %d/%d", correct_count, burst_size) + + except Exception as e: + result["error"] = str(e) + logging.error("Snapshot cycle failed: %s", e) + + finally: + # Cleanup + logging.info("Cleaning up step resources") + _CleanupStep( + source_names, + restore_names, + trigger_names, + step_template, + namespace, + ) + time.sleep(5) + + return result + + +# --------------------------------------------------------------------------- +# Kubernetes interaction helpers +# --------------------------------------------------------------------------- + + +def _ApplyClaim(name, namespace, template_name): + """Create a SandboxClaim.""" + manifest = json.dumps( + { + "apiVersion": "extensions.agents.x-k8s.io/v1alpha1", + "kind": "SandboxClaim", + "metadata": { + "name": name, + "namespace": namespace, + "labels": {"app": "snapshot-benchmark-workload"}, + }, + "spec": {"sandboxTemplateRef": {"name": template_name}}, + } + ) + proc = subprocess.run( + ["kubectl", "apply", "-f", "-"], + input=manifest, + capture_output=True, + text=True, + timeout=30, + ) + if proc.returncode != 0: + raise RuntimeError(f"Failed to create SandboxClaim {name}: {proc.stderr}") + + +def _RenderAndApplyTemplate( + template_path, + template_name, + namespace, + ksa_name, + preload_mb, + preload_mode, +): + """Render the .yaml.template with step-specific values and kubectl apply.""" + if preload_mode.startswith("script:"): + return _RenderAndApplyScriptTemplate( + template_name, + namespace, + ksa_name, + preload_mb, + preload_mode, + ) + + with open(template_path) as f: + content = f.read() + + memory_mi = max(512, preload_mb + 256) + + rendered = ( + content.replace("$AGENTIC_NAMESPACE", namespace) + .replace("$SNAPSHOT_KSA_NAME", ksa_name) + .replace("$SNAPSHOT_PRELOAD_MB", str(preload_mb)) + ) + rendered = rendered.replace( + "name: snapshot-benchmark-template", + f"name: {template_name}", + ) + rendered = rendered.replace( + 'memory: "512Mi"', + f'memory: "{memory_mi}Mi"', + ) + + proc = subprocess.run( + ["kubectl", "apply", "-f", "-"], + input=rendered, + capture_output=True, + text=True, + timeout=30, + ) + if proc.returncode != 0: + logging.warning("kubectl apply stderr: %s", proc.stderr) + return proc.returncode == 0 + + +def _get_sandbox_node_selector(): + """Return the correct nodeSelector based on provisioning mode.""" + try: + mode = FLAGS.gke_provision_mode + except AttributeError: + mode = "custom" + if mode == "native": + return {"pkb_nodepool": "sandbox"} + return {"dedicated": "agentic-sandbox"} + + +def _get_sandbox_tolerations(): + """Return the correct tolerations based on provisioning mode.""" + try: + mode = FLAGS.gke_provision_mode + except AttributeError: + mode = "custom" + tolerations = [ + { + "key": "sandbox.gke.io/runtime", + "operator": "Equal", + "value": "gvisor", + "effect": "NoSchedule", + }, + ] + if mode != "native": + tolerations.insert( + 0, + { + "key": "dedicated", + "operator": "Equal", + "value": "agentic-sandbox", + "effect": "NoSchedule", + }, + ) + return tolerations + + +def _RenderAndApplyScriptTemplate( + template_name, + namespace, + ksa_name, + preload_mb, + preload_mode, +): + """Render a SandboxTemplate that runs a user-provided startup script.""" + script_path = preload_mode.split(":", 1)[1] + if not os.path.isfile(script_path): + logging.error("Script not found: %s", script_path) + return False + + with open(script_path) as f: + user_script = f.read() + + memory_mi = max(512, preload_mb + 256) + + entrypoint = ( + "#!/bin/bash\n" + "set -e\n" + 'echo "Running startup script..."\n' + "# --- User script start ---\n" + f"{user_script}\n" + "# --- User script end ---\n" + 'echo "SCRIPT_READY"\n' + 'echo "Starting counter."\n' + "i=0\n" + "while true; do\n" + ' echo "Count: $i"\n' + " i=$((i + 1))\n" + " sleep 1\n" + "done\n" + ) + + manifest = { + "apiVersion": "extensions.agents.x-k8s.io/v1alpha1", + "kind": "SandboxTemplate", + "metadata": { + "name": template_name, + "namespace": namespace, + }, + "spec": { + "podTemplate": { + "metadata": { + "labels": {"app": "snapshot-benchmark-workload"}, + }, + "spec": { + "serviceAccountName": ksa_name, + "runtimeClassName": "gvisor", + "containers": [ + { + "name": "preloader", + "image": "python:3.11-slim", + "command": ["bash", "-c"], + "args": [entrypoint], + "env": [{"name": "PRELOAD_MB", "value": str(preload_mb)}], + "resources": { + "requests": { + "cpu": "250m", + "memory": f"{memory_mi}Mi", + "ephemeral-storage": "512Mi", + } + }, + } + ], + "nodeSelector": _get_sandbox_node_selector(), + "tolerations": _get_sandbox_tolerations(), + "restartPolicy": "OnFailure", + }, + } + }, + } + + proc = subprocess.run( + ["kubectl", "apply", "-f", "-"], + input=json.dumps(manifest), + capture_output=True, + text=True, + timeout=30, + ) + if proc.returncode != 0: + logging.warning("kubectl apply stderr: %s", proc.stderr) + return proc.returncode == 0 + + +def _MeasureSingleSource(name, namespace, t0, pod_timeout, preload_mode): + """Wait for a source pod to be Running and preloaded.""" + result = { + "pod": name, + "startup_time_s": None, + "preload_complete_time_s": None, + "snapshot_counter": None, + "error": None, + } + + # Wait for Running + deadline = t0 + pod_timeout + while time.time() < deadline: + stdout, _, rc = utils.RunKubectl( + ["get", "pod", name, "-n", namespace, "-o", "jsonpath={.status.phase}"], + timeout=10, + raise_on_failure=False, + ) + if stdout == "Running": + result["startup_time_s"] = round(time.time() - t0, 3) + break + time.sleep(1) + else: + result["error"] = f"Pod {name} did not reach Running within {pod_timeout}s" + return result + + # Wait for preload + if not _WaitForPreload(name, namespace, pod_timeout, preload_mode): + result["error"] = f"Preload did not complete within {pod_timeout}s" + return result + + result["preload_complete_time_s"] = round(time.time() - t0, 3) + + # Let counter tick + time.sleep(3) + result["snapshot_counter"] = _GetLastCounter(name, namespace) + return result + + +def _WaitForPreload(name, namespace, timeout_s, preload_mode): + """Wait for preload to complete.""" + deadline = time.time() + timeout_s + while time.time() < deadline: + stdout, _, rc = utils.RunKubectl( + ["logs", name, "-n", namespace, "--tail=20"], + timeout=10, + raise_on_failure=False, + ) + if "SCRIPT_READY" in stdout: + return True + if "Starting counter" in stdout or re.search(r"Count:\s*\d+", stdout): + return True + time.sleep(2) + return False + + +def _GetLastCounter(name, namespace): + """Extract the last Count: N value from pod logs.""" + stdout, _, rc = utils.RunKubectl( + ["logs", name, "-n", namespace, "--tail=10"], + timeout=10, + raise_on_failure=False, + ) + if rc != 0: + return None + matches = re.findall(r"Count:\s*(\d+)", stdout) + return int(matches[-1]) if matches else None + + +def _TriggerAndWaitSnapshot(trigger_name, target_pod, namespace, t0, timeout_s=300): + """Create a snapshot trigger and wait for Complete.""" + result = { + "trigger": trigger_name, + "pod": target_pod, + "snapshot_time_s": None, + "error": None, + } + manifest = json.dumps( + { + "apiVersion": "podsnapshot.gke.io/v1", + "kind": "PodSnapshotManualTrigger", + "metadata": {"name": trigger_name, "namespace": namespace}, + "spec": {"targetPod": target_pod}, + } + ) + proc = subprocess.run( + ["kubectl", "apply", "-f", "-"], + input=manifest, + capture_output=True, + text=True, + timeout=30, + ) + if proc.returncode != 0: + result["error"] = f"Failed to create trigger: {proc.stderr}" + return result + + deadline = t0 + timeout_s + while time.time() < deadline: + stdout, _, rc = utils.RunKubectl( + [ + "get", + "podsnapshotmanualtriggers.podsnapshot.gke.io", + trigger_name, + "-n", + namespace, + "-o", + "jsonpath={.status.conditions[0].reason}", + ], + timeout=10, + raise_on_failure=False, + ) + if stdout == "Complete": + result["snapshot_time_s"] = round(time.time() - t0, 3) + return result + time.sleep(2) + result["error"] = f"Snapshot {trigger_name} did not complete within {timeout_s}s" + return result + + +def _MeasureSingleRestore(name, namespace, t0, snapshot_counter, pod_timeout): + """Measure restore_time and TTFE for a single pod.""" + result = { + "pod": name, + "restore_time_s": None, + "ttfe_s": None, + "restore_counter": None, + "restore_correct": False, + "error": None, + } + + # Wait for Running + deadline = t0 + pod_timeout + while time.time() < deadline: + stdout, _, rc = utils.RunKubectl( + ["get", "pod", name, "-n", namespace, "-o", "jsonpath={.status.phase}"], + timeout=10, + raise_on_failure=False, + ) + if stdout == "Running": + result["restore_time_s"] = round(time.time() - t0, 3) + break + time.sleep(1) + else: + result["error"] = f"Pod {name} did not reach Running within {pod_timeout}s" + return result + + # Wait for first Count (TTFE) + ttfe_deadline = t0 + pod_timeout + while time.time() < ttfe_deadline: + stdout, _, rc = utils.RunKubectl( + ["logs", name, "-n", namespace, "--tail=50"], + timeout=10, + raise_on_failure=False, + ) + if rc == 0: + matches = re.findall(r"Count:\s*(\d+)", stdout) + if matches: + result["ttfe_s"] = round(time.time() - t0, 3) + result["restore_counter"] = int(matches[0]) + if ( + snapshot_counter is not None + and result["restore_counter"] >= snapshot_counter + ): + result["restore_correct"] = True + return result + time.sleep(1) + + result["error"] = f"Pod {name}: no Count output within timeout" + return result + + +def _CleanupStep(source_names, restore_names, trigger_names, template_name, namespace): + """Delete source claims, restore claims, triggers, snapshots, and template.""" + to_delete = [("sandboxtemplate", template_name)] + for name in source_names: + to_delete.append(("sandboxclaim", name)) + for name in restore_names: + to_delete.append(("sandboxclaim", name)) + for name in trigger_names: + to_delete.append(("podsnapshotmanualtrigger", name)) + + for kind, name in to_delete: + utils.RunKubectl( + ["delete", kind, name, "-n", namespace, "--ignore-not-found=true"], + timeout=60, + raise_on_failure=False, + ) + # Delete any PodSnapshot resources + utils.RunKubectl( + [ + "delete", + "podsnapshots.podsnapshot.gke.io", + "--all", + "-n", + namespace, + "--ignore-not-found=true", + ], + timeout=60, + raise_on_failure=False, + ) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _GetTemplatePath(): + """Return the absolute path to the snapshot SandboxTemplate template.""" + pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + return os.path.join( + pkg_dir, + "data", + "k8s_agents", + "manifests", + "snapshot-sandbox-template.yaml.template", + ) + + +def _Percentile(values, pct): + """Calculate percentile (0-100) from a list of values.""" + if not values: + return 0.0 + s = sorted(values) + idx = (pct / 100) * (len(s) - 1) + lo = int(idx) + hi = min(lo + 1, len(s) - 1) + frac = idx - lo + return s[lo] * (1 - frac) + s[hi] * frac + + +def _emit(samples, data, data_key, metric_suffix, unit, namespace, extra): + """Emit a sample if the key exists in the data dict.""" + value = data.get(data_key) + if value is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_{metric_suffix}", + value, + unit, + namespace, + extra, + ) + ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py new file mode 100644 index 0000000000..1c00deca54 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py @@ -0,0 +1,487 @@ +"""PKB Benchmark: GKE Agent Warmpool Scale-Up (Use Case E). + +Atomic single-point measurement of warm pool provisioning speed on a +pre-provisioned GKE cluster. Measures how quickly N sandbox pods can be +provisioned from zero via the SandboxWarmPool controller. No agent API +is needed; this benchmark interacts directly with the Kubernetes API. + +This benchmark is designed to be invoked repeatedly by an external sweep +controller that varies the target_replicas parameter across iterations to +find the provisioning saturation point. + +Usage: + python pkb.py --benchmarks=gke_warmpool \ + --gke_warmpool_target_replicas=100 \ + --gke_warmpool_name=python-sandbox-warmpool \ + --gke_warmpool_pod_label=sandbox=python-sandbox-example \ + --gke_warmpool_ready_threshold_s=300 \ + --gke_warmpool_poll_interval_s=2.0 \ + --gke_warmpool_drain_timeout_s=300 \ + --gke_namespace=agentic \ + --gke_machine_type=c4-standard-8 + +Samples emitted (per run): + - gke_warmpool_total_time_to_ready (seconds) + - gke_warmpool_refill_rate (pods/sec) + - gke_warmpool_drain_time (seconds) + - gke_warmpool_first_pod_running (seconds) + - gke_warmpool_final_running_count (count) + - gke_warmpool_final_pending_count (count) + - gke_warmpool_time_to_created_p50 (seconds) + - gke_warmpool_time_to_created_p95 (seconds) + - gke_warmpool_time_to_created_max (seconds) + - gke_warmpool_time_to_created_count (count) + - gke_warmpool_time_to_scheduled_p50 (seconds) + - gke_warmpool_time_to_scheduled_p95 (seconds) + - gke_warmpool_time_to_scheduled_max (seconds) + - gke_warmpool_time_to_scheduled_count (count) + - gke_warmpool_time_to_running_p50 (seconds) + - gke_warmpool_time_to_running_p95 (seconds) + - gke_warmpool_time_to_running_max (seconds) + - gke_warmpool_time_to_running_count (count) + - gke_warmpool_wall_time (seconds) +""" + +import json +import logging +import time + +from absl import flags +from datetime import datetime, timezone +from perfkitbenchmarker import configs +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_benchmark_utils as utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_deploy_utils as deploy_utils, +) +from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = "gke_warmpool" +BENCHMARK_CONFIG = """ +gke_warmpool: + description: > + Atomic single-point warm pool scale-up measurement on a + pre-provisioned GKE cluster with gVisor isolation. +""" + +# --------------------------------------------------------------------------- +# Benchmark-specific flags +# --------------------------------------------------------------------------- + +flags.DEFINE_integer( + "gke_warmpool_target_replicas", + 100, + "Number of warm pool replicas to provision from zero.", +) + +flags.DEFINE_string( + "gke_warmpool_name", + "python-sandbox-warmpool", + "SandboxWarmPool resource name.", +) + +flags.DEFINE_string( + "gke_warmpool_pod_label", + "sandbox=python-sandbox-example", + "Label selector for warm pool pods.", +) + +flags.DEFINE_float( + "gke_warmpool_ready_threshold_s", + 300.0, + "Max seconds allowed for all pods to reach Running.", +) + +flags.DEFINE_float( + "gke_warmpool_poll_interval_s", + 2.0, + "Seconds between kubectl polls during provisioning.", +) + +flags.DEFINE_float( + "gke_warmpool_drain_timeout_s", + 300.0, + "Max seconds to wait for drain to 0.", +) + + +# --------------------------------------------------------------------------- +# Lifecycle +# --------------------------------------------------------------------------- + + +def Provision(benchmark_spec): + """Provision GKE cluster and all dependencies.""" + gke_provision_utils.Provision() + + +def GetConfig(user_config): + """Load and return benchmark config. + + No vm_groups — PKB skips Provision() and Teardown(). + """ + return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + + +def Prepare(benchmark_spec): + """Deploy workloads onto the cluster.""" + logging.info("=== Prepare: deploying workloads ===") + deploy_utils.DeployWorkloads() + utils.EnsurePortForward() + logging.info("Prepare complete.") + + +def Run(benchmark_spec): + """Scale warm pool from 0 to target and measure provisioning time. + + Returns: + List of sample.Sample objects. + """ + ns = FLAGS.gke_namespace + target = FLAGS.gke_warmpool_target_replicas + warmpool_name = FLAGS.gke_warmpool_name + label = FLAGS.gke_warmpool_pod_label + threshold_s = FLAGS.gke_warmpool_ready_threshold_s + poll_interval = FLAGS.gke_warmpool_poll_interval_s + + # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility) + _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s) + time.sleep(3) + + logging.info("=== Run: scaling %s to %d replicas ===", warmpool_name, target) + + t_wall_start = time.time() + + # 1. Measure drain time (should be near-zero since Prepare drained) + t0 = time.time() + _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s) + drain_time_s = round(time.time() - t0, 2) + + time.sleep(2) + + # 2. Scale up + logging.info("Patching %s replicas -> %d", warmpool_name, target) + patch_json = json.dumps({"spec": {"replicas": target}}) + utils.RunKubectl( + [ + "patch", + "sandboxwarmpool", + warmpool_name, + "-n", + ns, + "--type=merge", + f"-p={patch_json}", + ] + ) + + # 3. Poll until ready or timeout + t_scale = time.time() + scale_start_epoch = t_scale + deadline = t_scale + threshold_s + first_pod_time = None + + while time.time() < deadline: + elapsed = time.time() - t_scale + running = _CountPods(ns, label, "Running") + pending = _CountPods(ns, label, "Pending") + + if first_pod_time is None and running > 0: + first_pod_time = elapsed + + pct = (running / target * 100) if target > 0 else 0 + logging.info( + "[%.1fs] Running: %d/%d (%.0f%%) Pending: %d", + elapsed, + running, + target, + pct, + pending, + ) + + if running >= target: + break + + time.sleep(poll_interval) + + total_time = round(time.time() - t_scale, 2) + final_running = _CountPods(ns, label, "Running") + final_pending = _CountPods(ns, label, "Pending") + rate = round(final_running / total_time, 2) if total_time > 0 else 0 + + logging.info( + "Scale-up complete: %d/%d Running in %.1fs (%.1f pods/sec)", + final_running, + target, + total_time, + rate, + ) + + # 4. Scrape pod lifecycle timestamps + lifecycle = _ScrapeLifecycle(ns, label, scale_start_epoch) + + wall_time = round(time.time() - t_wall_start, 2) + + # 5. Build samples + extra = { + "target_replicas": target, + "final_running_count": final_running, + "final_pending_count": final_pending, + "wall_time_s": wall_time, + } + + samples = [] + + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_total_time_to_ready", + total_time, + "seconds", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_refill_rate", + rate, + "pods/sec", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_drain_time", + drain_time_s, + "seconds", + ns, + extra, + ) + ) + + if first_pod_time is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_first_pod_running", + round(first_pod_time, 2), + "seconds", + ns, + extra, + ) + ) + + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_final_running_count", + float(final_running), + "count", + ns, + extra, + ) + ) + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_final_pending_count", + float(final_pending), + "count", + ns, + extra, + ) + ) + + # Pod lifecycle percentiles + _EmitLifecycleSamples(samples, lifecycle, ns, extra) + + # Wall time + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_wall_time", + wall_time, + "seconds", + ns, + extra, + ) + ) + + logging.info("Emitted %d samples for target_replicas=%d.", len(samples), target) + return samples + + +def Cleanup(benchmark_spec): + """Drain warm pool back to 0 after measurement.""" + ns = FLAGS.gke_namespace + warmpool_name = FLAGS.gke_warmpool_name + label = FLAGS.gke_warmpool_pod_label + + logging.info("Cleanup: draining warm pool to 0.") + _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s) + utils.StopPortForward() + logging.info("Cleanup complete.") + + +def Teardown(benchmark_spec): + """Teardown GKE cluster and all dependencies.""" + gke_provision_utils.Teardown() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _CountPods(namespace, label, phase=None): + """Count pods matching label (and optionally phase).""" + cmd = ["get", "pods", "-n", namespace, "-l", label, "-o", "name"] + if phase: + cmd += [f"--field-selector=status.phase={phase}"] + stdout, _, rc = utils.RunKubectl(cmd, raise_on_failure=False) + if rc != 0 or not stdout: + return 0 + return len(stdout.strip().splitlines()) + + +def _DrainPool(namespace, warmpool_name, label, timeout_s): + """Scale pool to 0 and wait for all pods to terminate.""" + patch_json = json.dumps({"spec": {"replicas": 0}}) + utils.RunKubectl( + [ + "patch", + "sandboxwarmpool", + warmpool_name, + "-n", + namespace, + "--type=merge", + f"-p={patch_json}", + ], + raise_on_failure=False, + ) + + # Delete any lingering SandboxClaims + utils.RunKubectl( + [ + "delete", + "sandboxclaims", + "--all", + "-n", + namespace, + "--ignore-not-found=true", + ], + timeout=60, + raise_on_failure=False, + ) + + t0 = time.time() + while time.time() - t0 < timeout_s: + remaining = _CountPods(namespace, label) + if remaining == 0: + elapsed = time.time() - t0 + logging.info("Pool drained in %.1fs", elapsed) + return + time.sleep(2) + + logging.warning("Drain timed out after %.0fs", timeout_s) + + +def _ScrapeLifecycle(namespace, label, scale_start_epoch): + """Scrape pod metadata to compute time-to-created/scheduled/running. + + Returns a dict with P50/P95/max/count for each phase relative to + scale_start_epoch. + """ + stdout, _, rc = utils.RunKubectl( + ["get", "pods", "-n", namespace, "-l", label, "-o", "json"], + timeout=60, + raise_on_failure=False, + ) + if rc != 0 or not stdout: + return {} + + pods = json.loads(stdout).get("items", []) + created_deltas = [] + scheduled_deltas = [] + running_deltas = [] + + for pod in pods: + meta = pod.get("metadata", {}) + status = pod.get("status", {}) + + # creationTimestamp -> time-to-created + created_str = meta.get("creationTimestamp") + if created_str: + created_ts = datetime.fromisoformat( + created_str.replace("Z", "+00:00") + ).timestamp() + created_deltas.append(created_ts - scale_start_epoch) + + # PodScheduled condition -> time-to-scheduled + conditions = status.get("conditions", []) + for cond in conditions: + if cond.get("type") == "PodScheduled" and cond.get("status") == "True": + ts_str = cond.get("lastTransitionTime") + if ts_str: + ts = datetime.fromisoformat( + ts_str.replace("Z", "+00:00") + ).timestamp() + scheduled_deltas.append(ts - scale_start_epoch) + if cond.get("type") == "Ready" and cond.get("status") == "True": + ts_str = cond.get("lastTransitionTime") + if ts_str: + ts = datetime.fromisoformat( + ts_str.replace("Z", "+00:00") + ).timestamp() + running_deltas.append(ts - scale_start_epoch) + + def _pcts(vals): + if not vals: + return {} + vals.sort() + n = len(vals) + return { + "p50": round(vals[n // 2], 2), + "p95": round(vals[int(n * 0.95)], 2) if n > 1 else round(vals[-1], 2), + "max": round(vals[-1], 2), + "count": n, + } + + return { + "time_to_created_s": _pcts(created_deltas), + "time_to_scheduled_s": _pcts(scheduled_deltas), + "time_to_running_s": _pcts(running_deltas), + } + + +def _EmitLifecycleSamples(samples, lifecycle, namespace, extra): + """Emit pod lifecycle percentile samples for all three phases.""" + _PHASE_MAP = [ + ("time_to_created_s", "time_to_created"), + ("time_to_scheduled_s", "time_to_scheduled"), + ("time_to_running_s", "time_to_running"), + ] + for lifecycle_key, metric_base in _PHASE_MAP: + phase_data = lifecycle.get(lifecycle_key, {}) + for stat in ("p50", "p95", "max"): + val = phase_data.get(stat) + if val is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_{metric_base}_{stat}", + val, + "seconds", + namespace, + extra, + ) + ) + count = phase_data.get("count") + if count is not None: + samples.append( + utils.MakeSample( + f"{BENCHMARK_NAME}_{metric_base}_count", + float(count), + "count", + namespace, + extra, + ) + ) diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py index a56fe72b99..244ba5d774 100644 --- a/perfkitbenchmarker/providers/gcp/flags.py +++ b/perfkitbenchmarker/providers/gcp/flags.py @@ -580,6 +580,27 @@ ' the size derived from max_vm_count. Use when the cluster will scale' ' beyond the default node pool (e.g. kubernetes_node_scale with 5k nodes).', ) + +GKE_USE_BETA = flags.DEFINE_boolean( + 'gke_use_beta', + False, + 'Use gcloud beta for cluster creation (required for preview features ' + 'like pod snapshots).', +) + +GKE_ADDITIONAL_FLAGS = flags.DEFINE_list( + 'gke_additional_flags', + [], + 'Additional flags to pass to gcloud container clusters create. ' + 'Example: --gke_additional_flags=--enable-pod-snapshots,--enable-dataplane-v2', +) + +GKE_ADDITIONAL_NODEPOOL_FLAGS = flags.DEFINE_list( + 'gke_additional_nodepool_flags', + [], + 'Additional flags to pass to gcloud container node-pools create. ' + 'Example: --gke_additional_nodepool_flags=--max-pods-per-node=250', +) GCE_PERFORMANCE_MONITORING_UNIT = flags.DEFINE_enum( 'gce_performance_monitoring_unit', None, diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py index f943a53ff1..3c24ad941c 100644 --- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py +++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py @@ -417,6 +417,12 @@ def _Create(self): if self.enable_aam: cmd.args.append('--auto-monitoring-scope=ALL') + # --- PKB Extension: beta gcloud and additional cluster create flags --- + if gcp_flags.GKE_USE_BETA.value: + cmd.use_beta_gcloud = True + for additional_flag in gcp_flags.GKE_ADDITIONAL_FLAGS.value: + cmd.args.append(additional_flag) + self._RunClusterCreateCommand(cmd) self._GetKubeconfig() self._CreateCustomComputeClass(self.default_nodepool) @@ -432,6 +438,10 @@ def _CreateNodePools(self): nodepool, cmd, ) + # --- PKB Extension: additional node pool create flags --- + for additional_flag in gcp_flags.GKE_ADDITIONAL_NODEPOOL_FLAGS.value: + cmd.args.append(additional_flag) + self._IssueResourceCreationCommand(cmd) self._CreateCustomComputeClass(nodepool) diff --git a/requirements.txt b/requirements.txt index 755f82737c..1313c628f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,3 +33,4 @@ setuptools>=40.3.0,<81 six>=1.13.0 timeout-decorator scipy +matplotlib From 0338d094f2e5095f0ed0578435f4b8f56520241d Mon Sep 17 00:00:00 2001 From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com> Date: Thu, 18 Jun 2026 04:14:08 -0400 Subject: [PATCH 2/5] attend to comments, fixes, and improvements --- .../config/agentic_benchmark_config.yaml | 324 +++++++ .../data/k8s_agents/config/gke-benchmark.conf | 171 ---- .../config/native_provision_config.yaml | 70 -- .../k8s_agents/manifests/adk-agent.yaml.j2 | 118 +++ .../k8s_agents/manifests/psi-reader.yaml.j2 | 56 ++ .../manifests/sandbox-router.yaml.j2 | 69 ++ .../manifests/sandbox-templates.yaml.j2 | 103 +++ .../manifests/snapshot-crds.yaml.j2 | 24 + .../snapshot-sandbox-template.yaml.j2 | 46 + .../adk_agent/generated.env.template | 6 +- .../adk_agent/gke_performance_agent/agent.py | 44 +- .../k8s_agents/workloads/adk_agent/main.py | 2 +- .../kubernetes/agentic/gke_benchmark_utils.py | 135 +-- .../agentic/gke_chromium_density_benchmark.py | 45 +- .../agentic/gke_deletion_benchmark.py | 56 +- .../kubernetes/agentic/gke_deploy_utils.py | 846 +++++------------- .../agentic/gke_image_build_utils.py | 68 +- .../agentic/gke_payload_benchmark.py | 39 +- .../kubernetes/agentic/gke_post_teardown.py | 77 ++ .../agentic/gke_prerequisite_setup.py | 516 ----------- .../kubernetes/agentic/gke_prerequisites.py | 107 +++ .../kubernetes/agentic/gke_provision_utils.py | 698 --------------- .../agentic/gke_python_density_benchmark.py | 59 +- .../kubernetes/agentic/gke_qps_benchmark.py | 62 +- .../agentic/gke_snapshot_benchmark.py | 196 ++-- .../agentic/gke_warmpool_benchmark.py | 88 +- perfkitbenchmarker/providers/gcp/flags.py | 6 - .../providers/gcp/google_kubernetes_engine.py | 4 +- requirements.txt | 1 - snapshot-sandbox-template.yaml.j2 | 46 + 30 files changed, 1586 insertions(+), 2496 deletions(-) create mode 100644 perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml delete mode 100644 perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf delete mode 100644 perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2 create mode 100644 perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2 create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py delete mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py create mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py delete mode 100644 perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py create mode 100644 snapshot-sandbox-template.yaml.j2 diff --git a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml new file mode 100644 index 0000000000..95077b469c --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml @@ -0,0 +1,324 @@ +# Agentic Benchmark Configuration for GKE +# Used with: --benchmark_config_file=perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml +# +# User/environment-specific flags that MUST be passed on CLI: +# --project= +# --owner= +# --gce_network_name=-agentic-vpc +# --gke_additional_flags="--workload-pool=.svc.id.goog,--subnetwork=-agentic-subnet,--enable-master-authorized-networks,--master-authorized-networks=$(curl -s ifconfig.me)/32" +# +# Per-run flags: +# --run_stage=provision|prepare|run,cleanup|teardown +# --run_uri= +# --temp_dir= +# +# Benchmark-specific sweep parameters (vary per run): +# --gke_python_density_concurrent_sandbox_count=N +# --gke_snapshot_preload_mb=N +# etc. + +# =========================================================================== +# Shared cluster configuration (identical across all benchmarks) +# =========================================================================== + +gke_python_density: + flags: + # --- Cluster creation flags --- + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + # --- Agentic workload flags --- + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + skip_image_build: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor + + +gke_chromium_density: + flags: + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + skip_image_build: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor + + +gke_payload: + flags: + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + skip_image_build: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor + + +gke_qps: + flags: + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + skip_image_build: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor + + +gke_snapshot: + flags: + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + skip_image_build: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor + + +gke_warmpool: + flags: + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + skip_image_build: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor + + +gke_deletion: + flags: + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.3-gke.1389000" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + skip_image_build: false + + container_cluster: + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor diff --git a/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf b/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf deleted file mode 100644 index 99e6411577..0000000000 --- a/perfkitbenchmarker/data/k8s_agents/config/gke-benchmark.conf +++ /dev/null @@ -1,171 +0,0 @@ -#!/bin/bash -# -# Agentic Workload Benchmarking configuration file for GKE -# Adapted from nginx DPv2 baseline for Python Sandbox & Chromium Simulation -# -# Override machine type and cluster suffix via environment variables: -# MACHINE_TYPE=c4d-standard-8 CLUSTER_SUFFIX=c4d bash setup_infrastructure_gke.sh -# -# Supported profiles: -# MACHINE_TYPE=c3-standard-192-metal CLUSTER_SUFFIX=c3metal -# MACHINE_TYPE=c4-standard-8 CLUSTER_SUFFIX=c4 (default) -# MACHINE_TYPE=c4d-standard-8 CLUSTER_SUFFIX=c4d -# MACHINE_TYPE=c4a-standard-8 CLUSTER_SUFFIX=c4a (ARM64) - -USER_NAME_PREFIX=${USER%%.*} - -# GCP Project (MUST be set before running any script) -PROJECT_ID="your-project-id" -REGION="us-central1" -ZONE="us-central1-a" - -# Google/ADK aliases (derived from canonical names above) -# These are used by envsubst for the K8s manifest and by the ADK agent. -GOOGLE_CLOUD_PROJECT="${PROJECT_ID}" -GOOGLE_CLOUD_LOCATION="${REGION}" - -# Network Configuration -VPC_NAME="${USER_NAME_PREFIX}-agentic-vpc" -SUBNET_NAME="${USER_NAME_PREFIX}-agentic-subnet" -SUBNET_CIDR="10.134.20.0/24" -LAPTOP_IP="$(curl -s ifconfig.me)/32" # PUBLIC IP to access the target (dynamically detected) -# Cloud Router and NAT Configuration -ROUTER_NAME="${USER_NAME_PREFIX}-agentic-nat-router" -NAT_NAME="${USER_NAME_PREFIX}-agentic-nat-config" - -# GKE Cluster Configuration -CLUSTER_SUFFIX="${CLUSTER_SUFFIX:-c4}" -CLUSTER_NAME="${USER_NAME_PREFIX}-agentic-${CLUSTER_SUFFIX}" -GKE_VERSION="1.35.3-gke.1389000" -USE_CONNECT_GATEWAY="${USE_CONNECT_GATEWAY:-true}" # Use Connect Gateway for kubectl access - # Set to "false" to use direct public endpoint - -# ========================================================================= -# Machine Type Configuration (overridable via MACHINE_TYPE env var) -# ========================================================================= -MACHINE_TYPE="${MACHINE_TYPE:-c4-standard-8}" - -# Derive disk type from machine family: -# C3 → pd-balanced, C4/C4D/C4A → hyperdisk-balanced -_MACHINE_FAMILY="${MACHINE_TYPE%%-*}" # e.g. "c4" from "c4-standard-8" -case "${_MACHINE_FAMILY}" in - c3) _DISK_TYPE="pd-balanced" ;; - *) _DISK_TYPE="hyperdisk-balanced" ;; -esac - -# Derive target architecture from machine family: -# C4A → arm64, everything else → amd64 -case "${_MACHINE_FAMILY}" in - c4a) _TARGET_ARCH="arm64" ;; - *) _TARGET_ARCH="amd64" ;; -esac - -# Derive unique master CIDR per cluster (each private cluster needs its own /28): -# c4 → 172.16.0.0/28, c4d → 172.16.0.16/28, c4a → 172.16.0.32/28, c3metal → 172.16.0.48/28 -case "${CLUSTER_SUFFIX}" in - c4) MASTER_IPV4_CIDR="172.16.0.0/28" ;; - c4d) MASTER_IPV4_CIDR="172.16.0.16/28" ;; - c4a) MASTER_IPV4_CIDR="172.16.0.32/28" ;; - c3metal) MASTER_IPV4_CIDR="172.16.0.48/28" ;; - *) MASTER_IPV4_CIDR="172.16.0.64/28" ;; # fallback for future clusters -esac - -DEFAULT_POOL_MACHINE_TYPE="${MACHINE_TYPE}" -DEFAULT_POOL_DISK_TYPE="${_DISK_TYPE}" -DEFAULT_POOL_DISK_SIZE="50" # Disk size in GB -DEFAULT_POOL_NODE_COUNT="1" # Number of nodes in the default pool - -# ========================================================================= -# Agentic Workload NodePools -# ========================================================================= - -# Sandbox NodePool (Python + Chromium workloads with gVisor) -SANDBOX_NODE_POOL_NAME="agentic-sandbox-pool" -SANDBOX_MACHINE_TYPE="${MACHINE_TYPE}" # Same as default pool (overridable) -SANDBOX_DISK_SIZE="100" -SANDBOX_DISK_TYPE="${_DISK_TYPE}" # Derived from machine family -SANDBOX_NODE_COUNT="1" -SANDBOX_MAX_PODS_PER_NODE="250" # Raise from default 110 to avoid GKE pod limit as density ceiling -SANDBOX_ENABLE_GVISOR="true" # Enable GKE Sandbox (gVisor) on this pool - -AGENT_SANDBOX_VERSION="v0.4.6" - -# ========================================================================= -# Workload Configuration -# ========================================================================= -AGENTIC_NAMESPACE="agentic" - -# Python Sandbox Workload -PYTHON_IMAGE="python:3.11-slim" -PYTHON_POD_NAME="python-sandbox" -PYTHON_REPLICAS="1" # Start with 1; sweep for density tests -PYTHON_CPU_REQUEST="1" -PYTHON_CPU_LIMIT="2" -PYTHON_MEMORY_REQUEST="1Gi" -PYTHON_MEMORY_LIMIT="4Gi" - -# Chromium Browser Simulation Workload -CHROMIUM_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/chrome-sandbox:${_TARGET_ARCH}" -CHROMIUM_POD_NAME="chromium-sandbox" -CHROMIUM_REPLICAS="1" # Start with 1; sweep for density tests - -# Mock LLM Coordinator -MOCK_LLM_IMAGE="python:3.11-slim" -MOCK_LLM_POD_NAME="mock-llm-coordinator" -MOCK_LLM_PORT="8080" - -# ========================================================================= -# Benchmark Parameters -# ========================================================================= - -# Python Density Benchmark (UC-B) -SAMPLE_COUNT="20" # Samples per sandbox session -SAMPLE_WARMUP="0" # Warmup samples (excluded from stats) - -# Payload Transfer Benchmark (UC-D) -PAYLOAD_SIZE_MB="1" # Default payload size in MB -PAYLOAD_ITERATIONS="20" # Transfer iterations per session - -# Chromium Benchmark -CHROMIUM_TASK_COUNT="10" # Number of browser tasks per run -CHROMIUM_WARMUP_TASKS="2" - -# General -BENCHMARK_DURATION="300" # Duration in seconds per test -NOTE="agentic-V0-gVisor-DPv2-baseline" - -# ========================================================================= -# Logging -# ========================================================================= -# Log directory — defaults to tmp/ inside the repo (gitignored). -# Override by setting BASE_LOG_DIR before sourcing this file, -# e.g. export BASE_LOG_DIR="$HOME/agentic-logs" to keep logs outside the repo. -_REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)" -BASE_LOG_DIR="${BASE_LOG_DIR:-${_REPO_ROOT}/tmp/agentic-logs}" -WRAPPER_LOG_DIR="${BASE_LOG_DIR}/wrapper_logs" - -LOG_PATH="logs" -LOG_LEVEL="info" - -# ========================================================================= -# ADK Agent Deployment -# ========================================================================= -ADK_REPO_NAME="adk-repo" # Artifact Registry repository name -ADK_IMAGE_NAME="adk-agent" # Container image name -GOOGLE_GENAI_USE_VERTEXAI="true" -ADK_IMAGE_PATH="${REGION}-docker.pkg.dev/${PROJECT_ID}/${ADK_REPO_NAME}/${ADK_IMAGE_NAME}:${_TARGET_ARCH}" -ADK_K8S_SA="adk-agent-sa" # Kubernetes service account for the agent -CLOUD_BUILD_SA="adk-cloud-build-sa" # Service account for Cloud Build submissions - -# Sandbox Router & Warm Pool -SANDBOX_ROUTER_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/agent-sandbox/sandbox-router:${_TARGET_ARCH}" -WARMPOOL_REPLICAS="2" # Number of pre-warmed sandbox pods - -# ========================================================================= -# Pod Snapshot Configuration (UC-A: Cold Start & Snapshot Pressure Test) -# ========================================================================= -ENABLE_POD_SNAPSHOTS="true" # Enable pod snapshots feature on cluster -SNAPSHOTS_BUCKET_NAME="agent-sandbox-snapshots-${PROJECT_ID}" -SNAPSHOT_KSA_NAME="pod-snapshot-sa" # KSA for snapshot storage access -SNAPSHOT_FOLDER="benchmark-snapshots" # Managed folder inside the bucket -SNAPSHOT_PRELOAD_MB="10" # Default memory preload for snapshot sizing diff --git a/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml deleted file mode 100644 index 765c7c4256..0000000000 --- a/perfkitbenchmarker/data/k8s_agents/config/native_provision_config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# Native PKB Provision Config for Agentic Benchmarks -# Used with --gke_provision_mode=native -# -# Prerequisites (run once before PKB): -# python tools/agentic-benchmark/scripts/prerequisite_setup.py \ -# --project_id= --machine_type= -# -# IMPORTANT: Do NOT pass --gce_subnet_name on the command line. -# PKB incorrectly resolves it as the --network value. Instead, pass the -# subnet via --gke_additional_flags on the command line. -# -# Usage (provision): -# python pkb.py --benchmarks=gke_python_density \ -# --gke_provision_mode=native \ -# --benchmark_config_file=k8s_agents/config/native_provision_config.yaml \ -# --gce_network_name=-agentic-vpc \ -# --gce_subnet_region=us-central1 \ -# --zone=us-central1-a \ -# --project= \ -# --owner= \ -# --container_cluster_version=1.35.3-gke.1389000 \ -# --gke_additional_flags="--subnetwork=-agentic-subnet,--workload-pool=.svc.id.goog" -# -# For sweeps (cluster pre-exists, PKB skips provision/teardown): -# The sweep bridge injects --run_stage=run,cleanup automatically. - -gke_python_density: - flags: - # Force gcloud beta for preview features (pod snapshots) - gke_use_beta: true - - # Cluster-level additional flags (appended to gcloud [beta] container clusters create) - # NOTE: --subnetwork and --workload-pool are user/project-specific. - # Pass them on the command line via --gke_additional_flags=... (comma-separated). - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - - # Node-pool-level additional flags (appended to gcloud container node-pools create) - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - - # Standard PKB GKE flags - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - - container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2 new file mode 100644 index 0000000000..068b50be11 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/manifests/adk-agent.yaml.j2 @@ -0,0 +1,118 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: adk-agent-sa + namespace: {{ ns }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: adk-agent-sandbox-role +rules: + - apiGroups: ["agents.x-k8s.io"] + resources: ["sandboxes"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: ["agents.x-k8s.io"] + resources: ["sandboxwarmpool", "sandboxwarmpools"] + verbs: ["get", "list", "watch"] + - apiGroups: ["extensions.agents.x-k8s.io"] + resources: ["sandboxclaims"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] + - apiGroups: [""] + resources: ["pods", "pods/log", "pods/exec", "services", "configmaps"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/portforward"] + verbs: ["create"] + - apiGroups: ["metrics.k8s.io"] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: adk-agent-sandbox-binding + namespace: {{ ns }} +subjects: + - kind: ServiceAccount + name: adk-agent-sa + namespace: {{ ns }} +roleRef: + kind: ClusterRole + name: adk-agent-sandbox-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: adk-agent + namespace: {{ ns }} +spec: + replicas: 1 + selector: + matchLabels: + app: adk-agent + template: + metadata: + labels: + app: adk-agent + spec: + serviceAccountName: adk-agent-sa + containers: + - name: adk-agent + imagePullPolicy: Always + image: {{ adk_image }} + resources: + limits: + memory: "16384Mi" + cpu: "6000m" + requests: + memory: "512Mi" + cpu: "1000m" + ports: + - containerPort: 8080 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 15 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 6 + readinessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + env: + - name: PORT + value: "8080" + - name: GOOGLE_CLOUD_PROJECT + value: "{{ project }}" + - name: GOOGLE_CLOUD_LOCATION + value: "{{ region }}" + - name: GOOGLE_GENAI_USE_VERTEXAI + value: "true" + - name: CLUSTER_NAME + value: "{{ cluster }}" + - name: AGENTIC_NAMESPACE + value: "{{ ns }}" + - name: SANDBOX_ROUTER_URL + value: "http://sandbox-router-svc.{{ ns }}.svc.cluster.local:8080" +--- +apiVersion: v1 +kind: Service +metadata: + name: adk-agent + namespace: {{ ns }} +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: 8080 + selector: + app: adk-agent diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2 new file mode 100644 index 0000000000..d76f851e95 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/manifests/psi-reader.yaml.j2 @@ -0,0 +1,56 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: psi-reader + namespace: {{ ns }} + labels: + app: psi-reader +spec: + selector: + matchLabels: + app: psi-reader + template: + metadata: + labels: + app: psi-reader + spec: + nodeSelector: + pkb_nodepool: sandbox + tolerations: + - key: "sandbox.gke.io/runtime" + operator: "Equal" + value: "gvisor" + effect: "NoSchedule" + - key: "dedicated" + operator: "Equal" + value: "agentic-sandbox" + effect: "NoSchedule" + hostPID: true + containers: + - name: reader + image: busybox:1.36 + command: ["sleep", "infinity"] + securityContext: + privileged: true + volumeMounts: + - name: cgroup + mountPath: /host/sys/fs/cgroup + readOnly: true + - name: proc + mountPath: /host/proc + readOnly: true + resources: + requests: + cpu: "10m" + memory: "16Mi" + limits: + cpu: "50m" + memory: "32Mi" + volumes: + - name: cgroup + hostPath: + path: /sys/fs/cgroup + - name: proc + hostPath: + path: /proc diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2 new file mode 100644 index 0000000000..0d0541cfe7 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-router.yaml.j2 @@ -0,0 +1,69 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: sandbox-router-svc + namespace: {{ ns }} +spec: + type: ClusterIP + selector: + app: sandbox-router + ports: + - name: http + protocol: TCP + port: 8080 + targetPort: 8080 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sandbox-router-deployment + namespace: {{ ns }} +spec: + replicas: 2 + selector: + matchLabels: + app: sandbox-router + template: + metadata: + labels: + app: sandbox-router + spec: + serviceAccountName: adk-agent-sa + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: sandbox-router + containers: + - name: router + image: {{ router_image }} + ports: + - containerPort: 8080 + env: + - name: ALLOW_UNAUTHENTICATED_ROUTER + value: "true" + readinessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 10 + resources: + requests: + cpu: "250m" + memory: "512Mi" + limits: + cpu: "1000m" + memory: "1Gi" + securityContext: + runAsUser: 1000 + runAsGroup: 1000 diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2 new file mode 100644 index 0000000000..e9af43332d --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/manifests/sandbox-templates.yaml.j2 @@ -0,0 +1,103 @@ +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxTemplate +metadata: + name: python-sandbox-template + namespace: {{ ns }} +spec: + podTemplate: + metadata: + labels: + sandbox: python-sandbox-example + spec: + runtimeClassName: gvisor + containers: + - name: python-runtime + image: {{ python_image }} + nodeSelector: + pkb_nodepool: sandbox + tolerations: + - key: "sandbox.gke.io/runtime" + operator: "Equal" + value: "gvisor" + effect: "NoSchedule" + - key: "dedicated" + operator: "Equal" + value: "agentic-sandbox" + effect: "NoSchedule" + restartPolicy: "OnFailure" +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxWarmPool +metadata: + name: python-sandbox-warmpool + namespace: {{ ns }} +spec: + replicas: {{ warmpool_replicas }} + sandboxTemplateRef: + name: python-sandbox-template +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxTemplate +metadata: + name: chromium-sandbox-template + namespace: {{ ns }} +spec: + podTemplate: + metadata: + labels: + sandbox: chromium-sandbox-example + spec: + runtimeClassName: gvisor + containers: + - name: chromium-runtime + image: {{ chromium_image }} + command: ["/bin/sh", "-c"] + args: + - | + socat TCP-LISTEN:9223,fork,reuseaddr TCP:127.0.0.1:9222 & + exec chromium --headless --no-sandbox --disable-gpu --disable-dev-shm-usage --remote-debugging-port=9222 --no-first-run --disable-field-trial-config --user-data-dir=/tmp/chrome-data about:blank + ports: + - containerPort: 9223 + nodeSelector: + pkb_nodepool: sandbox + tolerations: + - key: "sandbox.gke.io/runtime" + operator: "Equal" + value: "gvisor" + effect: "NoSchedule" + - key: "dedicated" + operator: "Equal" + value: "agentic-sandbox" + effect: "NoSchedule" + restartPolicy: "OnFailure" +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxWarmPool +metadata: + name: chromium-sandbox-warmpool + namespace: {{ ns }} +spec: + replicas: {{ chromium_replicas }} + sandboxTemplateRef: + name: chromium-sandbox-template +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-orchestrator-to-chromium + namespace: {{ ns }} +spec: + podSelector: + matchLabels: + sandbox: chromium-sandbox-example + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: adk-agent + ports: + - protocol: TCP + port: 9223 diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2 new file mode 100644 index 0000000000..afc4e0ee4c --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-crds.yaml.j2 @@ -0,0 +1,24 @@ +--- +apiVersion: podsnapshot.gke.io/v1 +kind: PodSnapshotStorageConfig +metadata: + name: benchmark-pssc-gcs +spec: + snapshotStorageConfig: + gcs: + bucket: "{{ bucket_name }}" + path: "{{ snapshot_folder }}" +--- +apiVersion: podsnapshot.gke.io/v1 +kind: PodSnapshotPolicy +metadata: + name: benchmark-psp + namespace: {{ ns }} +spec: + storageConfigName: benchmark-pssc-gcs + selector: + matchLabels: + app: snapshot-benchmark-workload + triggerConfig: + type: manual + postCheckpoint: resume diff --git a/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2 b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2 new file mode 100644 index 0000000000..11850eb444 --- /dev/null +++ b/perfkitbenchmarker/data/k8s_agents/manifests/snapshot-sandbox-template.yaml.j2 @@ -0,0 +1,46 @@ +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxTemplate +metadata: + name: {{ template_name }} + namespace: {{ namespace }} +spec: + podTemplate: + metadata: + labels: + app: snapshot-benchmark-workload + spec: + serviceAccountName: {{ ksa_name }} + runtimeClassName: gvisor + containers: + - name: preloader + image: python:3.11-slim + command: ["python3", "-c"] + args: + - | + import time, os + preload_mb = int(os.environ.get("PRELOAD_MB", "10")) + print(f"Preloading {preload_mb} MB of memory...", flush=True) + _ballast = bytearray(preload_mb * 1024 * 1024) + print(f"Preload complete. Starting counter.", flush=True) + i = 0 + while True: + print(f"Count: {i}", flush=True) + i += 1 + time.sleep(1) + env: + - name: PRELOAD_MB + value: "{{ preload_mb }}" + resources: + requests: + cpu: "250m" + memory: "{{ memory_mi }}Mi" + ephemeral-storage: "512Mi" + nodeSelector: + pkb_nodepool: sandbox + tolerations: + - key: "sandbox.gke.io/runtime" + operator: "Equal" + value: "gvisor" + effect: "NoSchedule" + restartPolicy: "OnFailure" diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template index 0828d0a5ff..3ec5f62d0b 100644 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template @@ -1,11 +1,9 @@ # ========================================================================== # ADK Agent — Generated Environment File Template # ========================================================================== -# This file is rendered into generated.env by deploy_gke.sh using envsubst. -# The single source of truth is: tools/agentic-benchmark/config/gke-benchmark.conf +# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags). # -# For local dev, run deploy_gke.sh to generate generated.env, -# or manually create generated.env with your values. +# For local dev, manually create generated.env with your values. # ========================================================================== # --- Required: GKE executor config --- diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py index 46094d244f..2aef3c153c 100644 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py @@ -1,3 +1,39 @@ +"""GKE Performance Agent -- ADK agent definition. + +This file runs INSIDE the GKE cluster as part of the adk-agent Deployment +(see gke_deploy_utils.py for the K8s manifest). It is NOT run from the +machine executing PKB. The ADK agent pod serves a FastAPI app (main.py) +that PKB calls via HTTP through a kubectl port-forward tunnel. + +Execution flow: + PKB (your laptop/CI) -> kubectl port-forward -> adk-agent pod -> this file + -> GkeCodeExecutor -> SandboxClient -> gVisor sandbox pod +""" + +"""GKE Performance Agent — ADK agent definition for sandbox benchmarking. + +EXECUTION CONTEXT: + This file runs INSIDE the GKE cluster, NOT on the PKB orchestrator machine. + It is packaged into a container image (see ../Dockerfile) and deployed as + the 'adk-agent' Deployment in the benchmark namespace. + + Execution flow: + PKB machine GKE Cluster + ---------- ----------- + benchmark.Run() + -> CallAgentApi("/benchmark/...") -> main.py (FastAPI) + -> Runner(agent=root_agent) + -> MockLlm yields code + -> V3GkeCodeExecutor._execute_in_sandbox() + -> SandboxClient.create_sandbox() + -> sandbox.files.write("script.py", code) + -> sandbox.commands.run("python3 script.py") + -> SandboxClient.delete_sandbox() + + The PKB machine communicates with this agent via HTTP (port-forwarded + through kubectl or via a LoadBalancer/ClusterIP service). +""" + from google.adk.agents import LlmAgent from google.adk.code_executors import GkeCodeExecutor from google.adk.code_executors.code_execution_utils import CodeExecutionResult @@ -19,7 +55,7 @@ basedir = os.path.abspath(os.path.dirname(__file__)) agent_dir = os.path.join(basedir, "..") -# Load generated.env (auto-generated from gke-benchmark.conf by deploy_gke.sh). +# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags). # In GKE, K8s manifest env vars take precedence. load_dotenv(os.path.join(agent_dir, "generated.env")) @@ -67,9 +103,9 @@ def _build_benchmark_code() -> str: """Build the benchmark script with current env values injected. Selects the script based on BENCHMARK_MODE env var: - - 'density' → benchmark_density.py (Use Case B) - - 'payload' → benchmark_payload.py (Use Case D) - - 'qps' → benchmark_qps.py (Use Case F) + - 'density' → benchmark_density.py + - 'payload' → benchmark_payload.py + - 'qps' → benchmark_qps.py """ mode = os.getenv("BENCHMARK_MODE", "density") diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py index fa13f11fd7..bcdb090188 100644 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py +++ b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py @@ -90,7 +90,7 @@ basedir = os.path.abspath(os.path.dirname(__file__)) -# Load generated.env (auto-generated from gke-benchmark.conf by build_images_gke.sh). +# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags). # In GKE, K8s manifest env vars take precedence. load_dotenv(os.path.join(basedir, "generated.env")) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py index ee4603a4b3..02d2d40a81 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py @@ -14,51 +14,46 @@ from absl import flags from perfkitbenchmarker import sample +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.resources.container_service import kubectl FLAGS = flags.FLAGS +# Module-level benchmark_spec reference for metadata derivation. +# Set by each benchmark's Run() via set_benchmark_spec(). +_current_benchmark_spec = None + + # --------------------------------------------------------------------------- # Shared flags (registered once; importable by benchmark modules) # --------------------------------------------------------------------------- flags.DEFINE_string( - "gke_namespace", + "k8s_namespace", "agentic", "Kubernetes namespace where the agentic workloads are deployed.", ) -flags.DEFINE_string( - "gke_machine_type", - "", - "Machine type of the sandbox node pool. Recorded in sample metadata.", -) - -flags.DEFINE_string( - "gke_kubeconfig", - "", - "Path to a kubeconfig file. If empty, the system default is used.", -) - flags.DEFINE_bool( - "gke_gvisor", + "k8s_gvisor", True, "Whether the sandbox node pool uses gVisor. Recorded in sample metadata.", ) flags.DEFINE_string( - "gke_note", + "gke_benchmark_note", "", "Arbitrary note string attached to every sample for tagging runs.", ) flags.DEFINE_string( - "gke_api_url", + "k8s_agent_api_url", "http://localhost:8080", "Base URL of the ADK Agent API.", ) flags.DEFINE_integer( - "gke_api_timeout", + "k8s_agent_api_timeout", 600, "HTTP timeout in seconds for agent API benchmark calls.", ) @@ -71,14 +66,14 @@ def GetAgentApiUrl(): """Return the base URL of the ADK agent API service.""" - return FLAGS.gke_api_url.rstrip("/") + return FLAGS.k8s_agent_api_url.rstrip("/") def CheckAgentHealthz(api_url=None, required=True): """Verify the agent API is reachable via /healthz. Args: - api_url: Base URL to check. Defaults to FLAGS.gke_api_url. + api_url: Base URL to check. Defaults to FLAGS.k8s_agent_api_url. required: If True (default), raise on failure. If False, log warning. """ if api_url is None: @@ -102,7 +97,7 @@ def CheckAgentHealthz(api_url=None, required=True): def CallAgentApi(endpoint, payload, timeout=None): """POST JSON to an agent API endpoint and return the parsed response.""" if timeout is None: - timeout = FLAGS.gke_api_timeout + timeout = FLAGS.k8s_agent_api_timeout base_url = GetAgentApiUrl() url = f"{base_url}{endpoint}" data = json.dumps(payload).encode("utf-8") @@ -131,23 +126,17 @@ def CallAgentApi(endpoint, payload, timeout=None): # --------------------------------------------------------------------------- -def _KubectlCmd(args): - """Build a kubectl command list, optionally injecting --kubeconfig.""" - cmd = ["kubectl"] - if FLAGS.gke_kubeconfig: - cmd += ["--kubeconfig", FLAGS.gke_kubeconfig] - return cmd + list(args) - - def RunKubectl(args, timeout=120, raise_on_failure=True): - """Run a kubectl command and return (stdout, stderr, retcode).""" - cmd = _KubectlCmd(args) - proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) - if raise_on_failure and proc.returncode != 0: - raise RuntimeError( - f"kubectl failed (rc={proc.returncode}): {proc.stderr}" - ) - return proc.stdout, proc.stderr, proc.returncode + """Run a kubectl command and return (stdout, stderr, retcode). + + Delegates to PKB's native kubectl module which handles kubeconfig + and retries for transient connection errors automatically. + """ + return kubectl.RunKubectlCommand( + list(args), + timeout=timeout, + raise_on_failure=raise_on_failure, + ) def CountPods(namespace, label, phase=None): @@ -190,6 +179,13 @@ def DrainWarmPool(namespace, warmpool_name, label, timeout=120): "patch", "sandboxwarmpool", warmpool_name, "-n", namespace, "--type=merge", f"-p={patch_json}", ], raise_on_failure=False) + + # Delete lingering SandboxClaims that may prevent pod termination + RunKubectl([ + "delete", "sandboxclaims", "--all", + "-n", namespace, "--ignore-not-found=true", + ], timeout=60, raise_on_failure=False) + deadline = time.time() + timeout while time.time() < deadline: remaining = CountPods(namespace, label) @@ -197,12 +193,20 @@ def DrainWarmPool(namespace, warmpool_name, label, timeout=120): logging.info("Warm pool drained successfully") return True logging.info("Draining... %d pods remaining", remaining) - time.sleep(3) + time.sleep(2) logging.warning("Drain timed out, %d pods still present", CountPods(namespace, label)) return False +def set_benchmark_spec(benchmark_spec): + """Store benchmark_spec for metadata derivation (called by Run()).""" + global _current_benchmark_spec + _current_benchmark_spec = benchmark_spec + + + + # --------------------------------------------------------------------------- # Sample construction # --------------------------------------------------------------------------- @@ -212,12 +216,25 @@ def BuildMetadata(namespace, extra=None): """Construct the common metadata dict for all samples.""" metadata = { "namespace": namespace, - "gvisor": FLAGS.gke_gvisor, + "gvisor": FLAGS.k8s_gvisor, } - if FLAGS.gke_machine_type: - metadata["machine_type"] = FLAGS.gke_machine_type - if FLAGS.gke_note: - metadata["note"] = FLAGS.gke_note + # Derive machine_type from benchmark_spec (set via set_benchmark_spec) + machine_type = None + if _current_benchmark_spec: + cluster = getattr(_current_benchmark_spec, 'container_cluster', None) + if cluster: + # Prefer sandbox nodepool machine_type over default pool + nodepools = getattr(cluster, 'nodepools', None) + if nodepools and isinstance(nodepools, dict): + sandbox_pool = nodepools.get('sandbox') + if sandbox_pool and hasattr(sandbox_pool, 'vm_spec'): + machine_type = getattr(sandbox_pool.vm_spec, 'machine_type', None) + if not machine_type and hasattr(cluster, 'vm_spec'): + machine_type = getattr(cluster.vm_spec, 'machine_type', None) + if machine_type: + metadata["machine_type"] = machine_type + if FLAGS.gke_benchmark_note: + metadata["note"] = FLAGS.gke_benchmark_note if extra: metadata.update(extra) return metadata @@ -238,37 +255,37 @@ def MakeSample(metric, value, unit, namespace, extra_metadata=None): # --------------------------------------------------------------------------- flags.DEFINE_bool( - "gke_auto_portforward", + "k8s_auto_portforward", True, "Automatically manage kubectl port-forward to the agent service.", ) flags.DEFINE_integer( - "gke_portforward_local_port", + "k8s_portforward_local_port", 8080, "Local port for kubectl port-forward.", ) flags.DEFINE_integer( - "gke_portforward_remote_port", + "k8s_portforward_remote_port", 80, "Remote service port for kubectl port-forward.", ) flags.DEFINE_string( - "gke_portforward_service", + "k8s_portforward_service", "svc/adk-agent", "Kubernetes service to port-forward to.", ) flags.DEFINE_float( - "gke_portforward_reconnect_delay", + "k8s_portforward_reconnect_delay", 1.0, "Seconds to wait before reconnecting after port-forward drops.", ) flags.DEFINE_float( - "gke_portforward_health_timeout", + "k8s_portforward_health_timeout", 30.0, "Seconds to wait for agent health check after starting port-forward.", ) @@ -339,15 +356,15 @@ def stop(self): def _loop(self): """Background reconnect loop.""" - ns = FLAGS.gke_namespace - svc = FLAGS.gke_portforward_service - local_port = FLAGS.gke_portforward_local_port - remote_port = FLAGS.gke_portforward_remote_port - delay = FLAGS.gke_portforward_reconnect_delay + ns = FLAGS.k8s_namespace + svc = FLAGS.k8s_portforward_service + local_port = FLAGS.k8s_portforward_local_port + remote_port = FLAGS.k8s_portforward_remote_port + delay = FLAGS.k8s_portforward_reconnect_delay cmd = ["kubectl"] - if FLAGS.gke_kubeconfig: - cmd += ["--kubeconfig", FLAGS.gke_kubeconfig] + if FLAGS.kubeconfig: + cmd += ["--kubeconfig", FLAGS.kubeconfig] cmd += [ "port-forward", svc, "-n", ns, @@ -425,7 +442,7 @@ def _kill_orphan(self): except (OSError, ValueError): self._cleanup_pid_file() - local_port = FLAGS.gke_portforward_local_port + local_port = FLAGS.k8s_portforward_local_port try: result = subprocess.run( ["lsof", "-ti", f":{local_port}"], @@ -456,14 +473,14 @@ def EnsurePortForward(): Blocks until the agent health check passes or timeout is reached. Safe to call multiple times - only starts one background loop. """ - if not FLAGS.gke_auto_portforward: - logging.info("Auto port-forward disabled (--gke_auto_portforward=false)") + if not FLAGS.k8s_auto_portforward: + logging.info("Auto port-forward disabled (--k8s_auto_portforward=false)") return _port_forward_manager.start() import time as _time - timeout = FLAGS.gke_portforward_health_timeout + timeout = FLAGS.k8s_portforward_health_timeout deadline = _time.time() + timeout api_url = GetAgentApiUrl() diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py index 0da929cbbd..24d55350b5 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py @@ -1,4 +1,4 @@ -"""PKB Benchmark: GKE Agent Chromium Density Saturation (Use Case C). +"""PKB Benchmark: GKE Agent Chromium Density Saturation . Atomic single-point measurement of Chromium browser sandbox density on a pre-provisioned GKE cluster with gVisor isolation. Measures interaction @@ -11,11 +11,11 @@ Usage: python pkb.py --benchmarks=gke_chromium_density \\ - --gke_chromium_density=4 \\ + --gke_chromium_density_concurrent_sessions=4 \\ --gke_chromium_density_task_count=10 \\ --gke_chromium_density_warmup_tasks=5 \\ - --gke_namespace=agentic \\ - --gke_api_url=http://localhost:8080 + --k8s_namespace=agentic \\ + --k8s_agent_api_url=http://localhost:8080 Samples emitted (per run): - gke_chromium_density_interaction_mean (ms) @@ -48,7 +48,6 @@ from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, ) -from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils FLAGS = flags.FLAGS @@ -68,7 +67,7 @@ # --------------------------------------------------------------------------- flags.DEFINE_integer( - "gke_chromium_density", + "gke_chromium_density_concurrent_sessions", 1, "Number of concurrent Chromium browser sessions to run.", ) @@ -109,11 +108,6 @@ # --------------------------------------------------------------------------- -def Provision(benchmark_spec): - """Provision GKE cluster and all dependencies.""" - gke_provision_utils.Provision() - - def GetConfig(user_config): """Load and return benchmark config. @@ -125,7 +119,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" logging.info("=== Prepare: deploying workloads ===") - deploy_utils.DeployWorkloads() + deploy_utils.DeployWorkloads(benchmark_spec) utils.CheckAgentHealthz(required=False) utils.EnsurePortForward() logging.info("Prepare complete.") @@ -137,8 +131,10 @@ def Run(benchmark_spec): Returns: List of sample.Sample objects. """ - ns = FLAGS.gke_namespace - density = FLAGS.gke_chromium_density + utils.set_benchmark_spec(benchmark_spec) + + ns = FLAGS.k8s_namespace + density = FLAGS.gke_chromium_density_concurrent_sessions logging.info("=== Run: chromium_density=%d ===", density) @@ -227,7 +223,7 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Clean up after measurement. Delete claims and drain warm pool.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace logging.info("Cleanup: deleting SandboxClaims and draining warm pool.") # Delete any lingering SandboxClaims to release claimed pods @@ -255,18 +251,25 @@ def Cleanup(benchmark_spec): logging.info("Cleanup complete (cluster persists).") -def Teardown(benchmark_spec): - """Teardown GKE cluster and all dependencies.""" - gke_provision_utils.Teardown() - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra): - """Emit a sample if the key exists in the aggregate dict.""" + """Emit a sample if the key exists in the aggregate dict. + + Args: + samples: List to append the new sample.Sample to. + agg: Aggregate metrics dict returned by the agent API response. + agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms"). + metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric + name (e.g. "orchestrator_cel_mean"). + unit: Unit string for the sample (e.g. "ms", "MB", "seconds"). + namespace: Kubernetes namespace (included in sample metadata). + extra: Dict of additional metadata key-value pairs attached to + every sample (density, session counts, wall time, etc.). + """ value = agg.get(agg_key) if value is not None: samples.append( diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py index cd12169fcd..92b360919d 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py @@ -1,4 +1,4 @@ -"""PKB Benchmark: GKE Agent Deletion & Cleanup (Use Case G). +"""PKB Benchmark: GKE Agent Deletion & Cleanup . Atomic single-point measurement of bulk deletion efficiency and IP reclamation on a pre-provisioned GKE cluster with gVisor isolation. @@ -18,7 +18,7 @@ --gke_deletion_poll_interval_s=1.0 \\ --gke_deletion_provision_timeout_s=120.0 \\ --gke_deletion_drain_timeout_s=300.0 \\ - --gke_namespace=agentic \\ + --k8s_namespace=agentic \\ --gke_machine_type=c4-standard-8 Samples emitted (per run): @@ -48,7 +48,6 @@ from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, ) -from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils FLAGS = flags.FLAGS @@ -106,11 +105,6 @@ # --------------------------------------------------------------------------- -def Provision(benchmark_spec): - """Provision GKE cluster and all dependencies.""" - gke_provision_utils.Provision() - - def GetConfig(user_config): """Load and return benchmark config. @@ -122,7 +116,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads onto the cluster.""" logging.info("=== Prepare: deploying workloads ===") - deploy_utils.DeployWorkloads() + deploy_utils.DeployWorkloads(benchmark_spec) utils.EnsurePortForward() logging.info("Prepare complete.") @@ -133,7 +127,9 @@ def Run(benchmark_spec): Returns: List of sample.Sample objects. """ - ns = FLAGS.gke_namespace + utils.set_benchmark_spec(benchmark_spec) + + ns = FLAGS.k8s_namespace batch_size = FLAGS.gke_deletion_batch_size warmpool_name = FLAGS.gke_deletion_warmpool_name label = FLAGS.gke_deletion_pod_label @@ -144,7 +140,7 @@ def Run(benchmark_spec): logging.info("=== Run: batch_size=%d ===", batch_size) # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility) - _DrainPool(ns, warmpool_name, label, drain_timeout) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(drain_timeout)) time.sleep(2) t_wall_start = time.time() @@ -395,21 +391,16 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Best-effort drain of warm pool after measurement.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace warmpool_name = FLAGS.gke_deletion_warmpool_name label = FLAGS.gke_deletion_pod_label logging.info("Cleanup: draining warm pool to 0.") - _DrainPool(ns, warmpool_name, label, FLAGS.gke_deletion_drain_timeout_s) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_deletion_drain_timeout_s)) utils.StopPortForward() logging.info("Cleanup complete.") -def Teardown(benchmark_spec): - """Teardown GKE cluster and all dependencies.""" - gke_provision_utils.Teardown() - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -432,35 +423,6 @@ def _PatchReplicas(namespace, warmpool_name, replicas): ) -def _DrainPool(namespace, warmpool_name, label, timeout_s): - """Scale pool to 0 and wait for all pods to terminate.""" - _PatchReplicas(namespace, warmpool_name, 0) - - # Delete any lingering SandboxClaims - utils.RunKubectl( - [ - "delete", - "sandboxclaims", - "--all", - "-n", - namespace, - "--ignore-not-found=true", - ], - timeout=60, - raise_on_failure=False, - ) - - t0 = time.time() - while time.time() - t0 < timeout_s: - remaining = utils.CountPods(namespace, label) - if remaining == 0: - logging.info("Pool drained in %.1fs", time.time() - t0) - return - time.sleep(2) - - logging.warning("Drain timed out after %.0fs", timeout_s) - - def _GetPodNames(namespace, label): """Return list of pod names matching the label selector.""" stdout, _, rc = utils.RunKubectl( diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py index ff35f2e92e..9ff1684951 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py @@ -7,13 +7,14 @@ All functions are idempotent -- safe to call repeatedly without side effects. """ -import json import logging import os -import subprocess -import time from absl import flags +from jinja2 import Template +from perfkitbenchmarker import data +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.resources.container_service import kubectl FLAGS = flags.FLAGS @@ -22,220 +23,197 @@ # --------------------------------------------------------------------------- flags.DEFINE_string( - "gke_sandbox_version", + "agent_sandbox_version", "v0.4.6", "Agent Sandbox controller version (GitHub release tag).", ) flags.DEFINE_string( - "gke_sandbox_router_image", + "agent_sandbox_router_image", "", "Sandbox router container image. If empty, router deployment is skipped.", ) flags.DEFINE_string( - "gke_adk_image", + "k8s_agent_image", "", "ADK agent container image. If empty, agent deployment is skipped.", ) flags.DEFINE_string( - "gke_chromium_image", + "k8s_chromium_image", "", "Chromium sandbox container image. If empty, uses placeholder.", ) flags.DEFINE_integer( - "gke_warmpool_replicas", + "agent_sandbox_warmpool_replicas", 2, "Default warm pool replica count for SandboxWarmPool resources.", ) flags.DEFINE_integer( - "gke_chromium_replicas", + "agent_sandbox_chromium_replicas", 1, "Default Chromium warm pool replica count.", ) flags.DEFINE_string( - "gke_python_image", + "k8s_python_image", "registry.k8s.io/agent-sandbox/python-runtime-sandbox:v0.1.0", "Python runtime sandbox container image.", ) flags.DEFINE_integer( - "gke_deploy_timeout", + "k8s_deploy_timeout", 120, "Timeout in seconds for workload deployment rollout.", ) -flags.DEFINE_string( - "gke_cluster_name", - "", - "GKE cluster name. Used in ADK agent env vars for Workload Identity.", +flags.DEFINE_bool( + "skip_image_build", + False, + "Skip container image builds during Prepare.", ) -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- + +# Module-level derived images (set during DeployWorkloads) +_derived_images = {} # --------------------------------------------------------------------------- -# Image path auto-derivation and mode-aware scheduling -# (Insert this block BEFORE the "def DeployWorkloads():" function) +# Template loading # --------------------------------------------------------------------------- +_MANIFESTS_DIR = "k8s_agents/manifests" -def _DeriveImagePaths(): - """Auto-derive container image paths from project/region/machine_type. - - When --gke_adk_image or --gke_sandbox_router_image are empty, - derives them from --gke_project_id, --gke_region, and - --gke_sandbox_machine_type using the same convention as - gke_image_build_utils.py and the bash build scripts. - """ - project = getattr(FLAGS, "gke_project_id", "") or "" - region = getattr(FLAGS, "gke_region", "") or "" - machine_type = getattr(FLAGS, "gke_sandbox_machine_type", "") or "" - if not project or not region: - logging.info("Cannot auto-derive images: project=%s region=%s", project, region) - return +def _LoadTemplate(template_name): + """Load a Jinja2 template from the data directory.""" + template_path = os.path.join( + data.ResourcePath(_MANIFESTS_DIR), template_name + ) + with open(template_path, "r") as f: + return Template(f.read()) - machine_family = machine_type.split("-")[0] if machine_type else "c4" - target_arch = "arm64" if machine_family == "c4a" else "amd64" - if not FLAGS.gke_adk_image: - FLAGS.gke_adk_image = "{}-docker.pkg.dev/{}/adk-repo/adk-agent:{}".format( - region, project, target_arch - ) - logging.info("Auto-derived gke_adk_image: %s", FLAGS.gke_adk_image) +def _RenderAndApply(template_name, **kwargs): + """Load a Jinja2 template, render it, write to file, and kubectl apply.""" + template = _LoadTemplate(template_name) + rendered = template.render(**kwargs) - if not FLAGS.gke_sandbox_router_image: - FLAGS.gke_sandbox_router_image = ( - "{}-docker.pkg.dev/{}/agent-sandbox/sandbox-router:{}".format( - region, project, target_arch - ) - ) - logging.info( - "Auto-derived gke_sandbox_router_image: %s", - FLAGS.gke_sandbox_router_image, - ) - - if not FLAGS.gke_chromium_image: - FLAGS.gke_chromium_image = ( - "{}-docker.pkg.dev/{}/agent-sandbox/chrome-sandbox:{}".format( - region, project, target_arch - ) - ) - logging.info( - "Auto-derived gke_chromium_image: %s", FLAGS.gke_chromium_image - ) + # Write rendered YAML to tmp dir (RunKubectlCommand does not support stdin) + tmp_dir = os.path.join( + data.ResourcePath(_MANIFESTS_DIR), "tmp" + ) + os.makedirs(tmp_dir, exist_ok=True) - if not FLAGS.gke_cluster_name: - import os as _os + # Strip .j2 extension for the rendered file + rendered_name = template_name.replace(".j2", "") + rendered_path = os.path.join(tmp_dir, rendered_name) + with open(rendered_path, "w") as f: + f.write(rendered) - user_prefix = _os.environ.get("USER", "pkb").split(".")[0] - suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"} - cluster_suffix = suffix_map.get(machine_family, machine_family) - FLAGS.gke_cluster_name = "{}-agentic-{}".format( - user_prefix, cluster_suffix - ) - logging.info( - "Auto-derived gke_cluster_name: %s", FLAGS.gke_cluster_name + stdout, stderr, retcode = kubectl.RunKubectlCommand( + ["apply", "-f", rendered_path], + raise_on_failure=False, + ) + if retcode != 0: + logging.warning( + "kubectl apply failed for %s: %s", template_name, stderr[:500] ) + return retcode == 0 -def _GetSandboxNodeSelector(): - """Return the correct nodeSelector dict based on provisioning mode. +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- - - native mode: PKB auto-labels nodes with pkb_nodepool= - - custom mode: bash scripts label nodes with dedicated=agentic-sandbox - """ - try: - mode = FLAGS.gke_provision_mode - except (AttributeError, KeyError): - mode = "custom" - if mode == "native": - return {"pkb_nodepool": "sandbox"} - return {"dedicated": "agentic-sandbox"} +def _DeriveImagePaths(project, region, arch): + """Derive container image paths from cluster config. -def _GetSandboxTolerations(): - """Return tolerations list based on provisioning mode. + Args: + project: GCP project ID. + region: GCP region (e.g. us-central1). + arch: Docker platform architecture (amd64 or arm64). - Both modes need the gVisor toleration (auto-applied by GKE to sandbox pools). - Custom mode additionally needs the dedicated=agentic-sandbox toleration - (manually applied by setup_infrastructure_gke.sh). + Returns: + Dict with keys: adk_agent, sandbox_router, chromium. """ - try: - mode = FLAGS.gke_provision_mode - except (AttributeError, KeyError): - mode = "custom" - tolerations = [ - { - "key": "sandbox.gke.io/runtime", - "operator": "Equal", - "value": "gvisor", - "effect": "NoSchedule", - }, - ] - if mode != "native": - tolerations.insert( - 0, - { - "key": "dedicated", - "operator": "Equal", - "value": "agentic-sandbox", - "effect": "NoSchedule", - }, - ) - return tolerations - - -def _NodeSelectorYaml(indent=6): - """Generate nodeSelector YAML block for embedding in manifests.""" - selector = _GetSandboxNodeSelector() - spaces = " " * indent - lines = ["{}nodeSelector:".format(spaces)] - for k, v in selector.items(): - lines.append("{} {}: {}".format(spaces, k, v)) - return "\n".join(lines) - - -def _TolerationsYaml(indent=6): - """Generate tolerations YAML block for embedding in manifests.""" - tolerations = _GetSandboxTolerations() - spaces = " " * indent - lines = ["{}tolerations:".format(spaces)] - for t in tolerations: - lines.append('{} - key: "{}"'.format(spaces, t["key"])) - lines.append('{} operator: "{}"'.format(spaces, t["operator"])) - lines.append('{} value: "{}"'.format(spaces, t["value"])) - lines.append('{} effect: "{}"'.format(spaces, t["effect"])) - return "\n".join(lines) + return { + "adk_agent": f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{arch}", + "sandbox_router": f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{arch}", + "chromium": f"{region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{arch}", + } - -def DeployWorkloads(): +def DeployWorkloads(benchmark_spec=None): """Deploy the full Agent Sandbox ecosystem onto the GKE cluster. Idempotent: safe to call repeatedly. Sequence: - 1. Create namespace - 2. Install Agent Sandbox CRDs - 3. Deploy SandboxTemplates + WarmPools - 4. Deploy Sandbox Router - 5. Deploy ADK Agent (Deployment + Service + RBAC) - 6. Deploy PSI Reader DaemonSet - 7. Wait for ADK Agent rollout + 1. Build images (if --skip_image_build=False) + 2. Create namespace + 3. Install Agent Sandbox CRDs + 4. Deploy SandboxTemplates + WarmPools + 5. Deploy Sandbox Router + 6. Deploy ADK Agent (Deployment + Service + RBAC) + 7. Deploy PSI Reader DaemonSet + 8. Wait for ADK Agent rollout """ - _DeriveImagePaths() - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace logging.info("=== DeployWorkloads: namespace=%s ===", ns) + # Derive project, region, machine_type, cluster_name from benchmark_spec + project = "" + region = "" + machine_type = "" + cluster_name = "" + if benchmark_spec: + cluster = getattr(benchmark_spec, 'container_cluster', None) + if cluster: + project = getattr(cluster, 'project', '') or '' + zone = getattr(cluster, 'zone', '') or '' + region = zone[:-2] if zone else '' + cluster_name = getattr(cluster, 'name', '') or '' + # Prefer sandbox nodepool machine_type + nodepools = getattr(cluster, 'nodepools', None) + if nodepools and isinstance(nodepools, dict): + sandbox_pool = nodepools.get('sandbox') + if sandbox_pool and hasattr(sandbox_pool, 'vm_spec'): + machine_type = getattr(sandbox_pool.vm_spec, 'machine_type', '') or '' + if not machine_type and hasattr(cluster, 'vm_spec'): + machine_type = getattr(cluster.vm_spec, 'machine_type', '') or '' + # Fallback to global FLAGS if benchmark_spec not available + if not project: + project = getattr(FLAGS, 'project', '') or '' + if not region: + zone = getattr(FLAGS, 'zone', '') or '' + region = zone[:-2] if zone else '' + + # Build images if requested + # Detect architecture and derive image paths + from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( + gke_image_build_utils, + ) + zone = cluster.zone if cluster else FLAGS.zone + arch = gke_image_build_utils._DetectArchitecture(machine_type, zone, project) + + global _derived_images + _derived_images = _DeriveImagePaths(project, region, arch) + + if not FLAGS.skip_image_build: + gke_image_build_utils.build_images_with_config( + project=project, + region=region, + machine_type=machine_type, + zone=zone, + arch=arch, + ) + _CreateNamespace(ns) _InstallCRDs() _DeploySandboxTemplates(ns) _DeploySandboxRouter(ns) - _DeployADKAgent(ns) + _DeployADKAgent(ns, project=project, region=region, cluster_name=cluster_name) _DeployPSIReader(ns) _WaitForAgentReady(ns) @@ -243,7 +221,7 @@ def DeployWorkloads(): def DeploySnapshots(): - """Deploy Pod Snapshot infrastructure (UC-A only). + """Deploy Pod Snapshot infrastructure. Idempotent: safe to call repeatedly. Sequence: 1. Create GCS bucket (hierarchical namespace) @@ -252,27 +230,25 @@ def DeploySnapshots(): 4. Bind IAM roles 5. Deploy PodSnapshotStorageConfig + PodSnapshotPolicy """ - ns = FLAGS.gke_namespace - project = FLAGS.gke_project_id - region = FLAGS.gke_region + ns = FLAGS.k8s_namespace + project = getattr(FLAGS, 'project', '') or '' + zone = getattr(FLAGS, 'zone', '') or '' + region = zone[:-2] if zone else '' if not project: - logging.warning("DeploySnapshots: gke_project_id not set, skipping.") + logging.warning("DeploySnapshots: FLAGS.project not set, skipping.") return bucket_name = "agent-sandbox-snapshots-{}".format(project) snapshot_folder = "benchmark-snapshots" - ksa_name = "pod-snapshot-sa" + ksa_name = FLAGS.gke_snapshot_ksa_name logging.info("=== DeploySnapshots: bucket=%s ===", bucket_name) # 1. Create GCS bucket - _RunCmd( + vm_util.IssueCommand( [ - "gcloud", - "storage", - "buckets", - "create", + "gcloud", "storage", "buckets", "create", "gs://{}".format(bucket_name), "--uniform-bucket-level-access", "--enable-hierarchical-namespace", @@ -280,32 +256,23 @@ def DeploySnapshots(): "--location={}".format(region), "--project={}".format(project), ], - check=False, + raise_on_failure=False, ) # 2. Create managed folder - _RunCmd( + vm_util.IssueCommand( [ - "gcloud", - "storage", - "managed-folders", - "create", + "gcloud", "storage", "managed-folders", "create", "gs://{}/{}/".format(bucket_name, snapshot_folder), "--project={}".format(project), ], - check=False, + raise_on_failure=False, ) # 3. Create KSA - _RunKubectl( - [ - "create", - "serviceaccount", - ksa_name, - "--namespace", - ns, - ], - check=False, + kubectl.RunKubectlCommand( + ["create", "serviceaccount", ksa_name, "--namespace", ns], + raise_on_failure=False, ) # 4. IAM bindings @@ -314,7 +281,12 @@ def DeploySnapshots(): _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name) # 5. Deploy PSSC + PSP - _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder) + _RenderAndApply( + "snapshot-crds.yaml.j2", + ns=ns, + bucket_name=bucket_name, + snapshot_folder=snapshot_folder, + ) logging.info("DeploySnapshots complete.") @@ -324,499 +296,125 @@ def DeploySnapshots(): # --------------------------------------------------------------------------- -def _RunCmd(cmd, check=True, timeout=120): - """Run a shell command and return (stdout, returncode).""" - logging.info("CMD: %s", " ".join(cmd)) - proc = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=timeout, - ) - if check and proc.returncode != 0: - logging.warning( - "Command failed (rc=%d): %s", proc.returncode, proc.stderr[:500] - ) - return proc.stdout.strip(), proc.returncode - - -def _RunKubectl(args, check=True, timeout=120): - """Run kubectl with optional kubeconfig.""" - cmd = ["kubectl"] - if FLAGS.gke_kubeconfig: - cmd += ["--kubeconfig", FLAGS.gke_kubeconfig] - cmd += list(args) - return _RunCmd(cmd, check=check, timeout=timeout) - - -def _KubectlApply(manifest_str): - """Apply a YAML manifest string via kubectl stdin.""" - cmd = ["kubectl", "apply", "-f", "-"] - if FLAGS.gke_kubeconfig: - cmd = [ - "kubectl", - "--kubeconfig", - FLAGS.gke_kubeconfig, - "apply", - "-f", - "-", - ] - proc = subprocess.run( - cmd, - input=manifest_str, - capture_output=True, - text=True, - timeout=60, - ) - if proc.returncode != 0: - logging.warning("kubectl apply failed: %s", proc.stderr[:500]) - return proc.returncode == 0 - - def _CreateNamespace(ns): """Create namespace if it doesn't exist.""" - _RunKubectl(["create", "namespace", ns], check=False) + kubectl.RunKubectlCommand( + ["create", "namespace", ns], + raise_on_failure=False, + ) def _InstallCRDs(): """Install Agent Sandbox CRDs from GitHub release.""" - version = FLAGS.gke_sandbox_version + version = FLAGS.agent_sandbox_version base_url = ( "https://github.com/kubernetes-sigs/agent-sandbox" "/releases/download/{}".format(version) ) logging.info("Installing Agent Sandbox CRDs (%s)", version) - _RunKubectl( + kubectl.RunKubectlCommand( [ "apply", - "-f", - "{}/manifest.yaml".format(base_url), - "-f", - "{}/extensions.yaml".format(base_url), + "-f", "{}/manifest.yaml".format(base_url), + "-f", "{}/extensions.yaml".format(base_url), ], - check=False, + raise_on_failure=False, ) def _DeploySandboxTemplates(ns): """Deploy SandboxTemplate + WarmPool for Python and Chromium.""" - python_image = FLAGS.gke_python_image - chromium_image = FLAGS.gke_chromium_image or "chromium-placeholder:latest" - warmpool_replicas = FLAGS.gke_warmpool_replicas - chromium_replicas = FLAGS.gke_chromium_replicas - - manifest = """--- -apiVersion: extensions.agents.x-k8s.io/v1alpha1 -kind: SandboxTemplate -metadata: - name: python-sandbox-template - namespace: {ns} -spec: - podTemplate: - metadata: - labels: - sandbox: python-sandbox-example - spec: - runtimeClassName: gvisor - containers: - - name: python-runtime - image: {python_image} -{node_selector_yaml} -{tolerations_yaml} - restartPolicy: "OnFailure" ---- -apiVersion: extensions.agents.x-k8s.io/v1alpha1 -kind: SandboxWarmPool -metadata: - name: python-sandbox-warmpool - namespace: {ns} -spec: - replicas: {warmpool_replicas} - sandboxTemplateRef: - name: python-sandbox-template ---- -apiVersion: extensions.agents.x-k8s.io/v1alpha1 -kind: SandboxTemplate -metadata: - name: chromium-sandbox-template - namespace: {ns} -spec: - podTemplate: - metadata: - labels: - sandbox: chromium-sandbox-example - spec: - runtimeClassName: gvisor - containers: - - name: chromium-runtime - image: {chromium_image} - command: ["/bin/sh", "-c"] - args: - - | - socat TCP-LISTEN:9223,fork,reuseaddr TCP:127.0.0.1:9222 & - exec chromium --headless --no-sandbox --disable-gpu --disable-dev-shm-usage --remote-debugging-port=9222 --no-first-run --disable-field-trial-config --user-data-dir=/tmp/chrome-data about:blank - ports: - - containerPort: 9223 -{node_selector_yaml} -{tolerations_yaml} - restartPolicy: "OnFailure" ---- -apiVersion: extensions.agents.x-k8s.io/v1alpha1 -kind: SandboxWarmPool -metadata: - name: chromium-sandbox-warmpool - namespace: {ns} -spec: - replicas: {chromium_replicas} - sandboxTemplateRef: - name: chromium-sandbox-template ---- -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - name: allow-orchestrator-to-chromium - namespace: {ns} -spec: - podSelector: - matchLabels: - sandbox: chromium-sandbox-example - policyTypes: - - Ingress - ingress: - - from: - - podSelector: - matchLabels: - app: adk-agent - ports: - - protocol: TCP - port: 9223 -""".format( + python_image = FLAGS.k8s_python_image + chromium_image = FLAGS.k8s_chromium_image or _derived_images.get("chromium", "chromium-placeholder:latest") + warmpool_replicas = FLAGS.agent_sandbox_warmpool_replicas + chromium_replicas = FLAGS.agent_sandbox_chromium_replicas + + _RenderAndApply( + "sandbox-templates.yaml.j2", ns=ns, python_image=python_image, chromium_image=chromium_image, warmpool_replicas=warmpool_replicas, chromium_replicas=chromium_replicas, - node_selector_yaml=_NodeSelectorYaml(), - tolerations_yaml=_TolerationsYaml(), ) - _KubectlApply(manifest) def _DeploySandboxRouter(ns): """Deploy the Sandbox Router Deployment + Service.""" - router_image = FLAGS.gke_sandbox_router_image + router_image = FLAGS.agent_sandbox_router_image or _derived_images.get("sandbox_router", "") if not router_image: logging.info("Sandbox router image not set, skipping router deployment.") return - manifest = """--- -apiVersion: v1 -kind: Service -metadata: - name: sandbox-router-svc - namespace: {ns} -spec: - type: ClusterIP - selector: - app: sandbox-router - ports: - - name: http - protocol: TCP - port: 8080 - targetPort: 8080 ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: sandbox-router-deployment - namespace: {ns} -spec: - replicas: 2 - selector: - matchLabels: - app: sandbox-router - template: - metadata: - labels: - app: sandbox-router - spec: - serviceAccountName: adk-agent-sa - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: topology.kubernetes.io/zone - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: sandbox-router - containers: - - name: router - image: {router_image} - ports: - - containerPort: 8080 - env: - - name: ALLOW_UNAUTHENTICATED_ROUTER - value: "true" - readinessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 5 - periodSeconds: 5 - livenessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 10 - periodSeconds: 10 - resources: - requests: - cpu: "250m" - memory: "512Mi" - limits: - cpu: "1000m" - memory: "1Gi" - securityContext: - runAsUser: 1000 - runAsGroup: 1000 -""".format(ns=ns, router_image=router_image) - _KubectlApply(manifest) - - -def _DeployADKAgent(ns): + _RenderAndApply( + "sandbox-router.yaml.j2", + ns=ns, + router_image=router_image, + ) + + +def _DeployADKAgent(ns, project="", region="", cluster_name=""): """Deploy ADK Agent: SA, ClusterRole, RoleBinding, Deployment, Service.""" - adk_image = FLAGS.gke_adk_image + adk_image = FLAGS.k8s_agent_image or _derived_images.get("adk_agent", "") if not adk_image: logging.info("ADK agent image not set, skipping agent deployment.") return - project = FLAGS.gke_project_id or "" - region = FLAGS.gke_region or "" - cluster = FLAGS.gke_cluster_name or "" - - manifest = """--- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: adk-agent-sa - namespace: {ns} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: adk-agent-sandbox-role -rules: - - apiGroups: ["agents.x-k8s.io"] - resources: ["sandboxes"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - - apiGroups: ["agents.x-k8s.io"] - resources: ["sandboxwarmpool", "sandboxwarmpools"] - verbs: ["get", "list", "watch"] - - apiGroups: ["extensions.agents.x-k8s.io"] - resources: ["sandboxclaims"] - verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - - apiGroups: [""] - resources: ["pods", "pods/log", "pods/exec", "services", "configmaps"] - verbs: ["get", "list", "watch"] - - apiGroups: [""] - resources: ["pods/portforward"] - verbs: ["create"] - - apiGroups: ["metrics.k8s.io"] - resources: ["pods"] - verbs: ["get", "list"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: adk-agent-sandbox-binding - namespace: {ns} -subjects: - - kind: ServiceAccount - name: adk-agent-sa - namespace: {ns} -roleRef: - kind: ClusterRole - name: adk-agent-sandbox-role - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: adk-agent - namespace: {ns} -spec: - replicas: 1 - selector: - matchLabels: - app: adk-agent - template: - metadata: - labels: - app: adk-agent - spec: - serviceAccountName: adk-agent-sa - containers: - - name: adk-agent - imagePullPolicy: Always - image: {adk_image} - resources: - limits: - memory: "16384Mi" - cpu: "6000m" - requests: - memory: "512Mi" - cpu: "1000m" - ports: - - containerPort: 8080 - livenessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 15 - periodSeconds: 30 - timeoutSeconds: 10 - failureThreshold: 6 - readinessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 5 - periodSeconds: 10 - timeoutSeconds: 5 - failureThreshold: 3 - env: - - name: PORT - value: "8080" - - name: GOOGLE_CLOUD_PROJECT - value: "{project}" - - name: GOOGLE_CLOUD_LOCATION - value: "{region}" - - name: GOOGLE_GENAI_USE_VERTEXAI - value: "true" - - name: CLUSTER_NAME - value: "{cluster}" - - name: AGENTIC_NAMESPACE - value: "{ns}" - - name: SANDBOX_ROUTER_URL - value: "http://sandbox-router-svc.{ns}.svc.cluster.local:8080" ---- -apiVersion: v1 -kind: Service -metadata: - name: adk-agent - namespace: {ns} -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: 8080 - selector: - app: adk-agent -""".format(ns=ns, adk_image=adk_image, project=project, region=region, cluster=cluster) - _KubectlApply(manifest) + project = project or "" + region = region or "" + cluster = cluster_name or "" + + _RenderAndApply( + "adk-agent.yaml.j2", + ns=ns, + adk_image=adk_image, + project=project, + region=region, + cluster=cluster, + ) def _DeployPSIReader(ns): """Deploy PSI Reader DaemonSet for cgroup pressure metrics.""" - manifest = """--- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: psi-reader - namespace: {ns} - labels: - app: psi-reader -spec: - selector: - matchLabels: - app: psi-reader - template: - metadata: - labels: - app: psi-reader - spec: -{node_selector_yaml} -{tolerations_yaml} - hostPID: true - containers: - - name: reader - image: busybox:1.36 - command: ["sleep", "infinity"] - securityContext: - privileged: true - volumeMounts: - - name: cgroup - mountPath: /host/sys/fs/cgroup - readOnly: true - - name: proc - mountPath: /host/proc - readOnly: true - resources: - requests: - cpu: "10m" - memory: "16Mi" - limits: - cpu: "50m" - memory: "32Mi" - volumes: - - name: cgroup - hostPath: - path: /sys/fs/cgroup - - name: proc - hostPath: - path: /proc -""".format( - ns=ns, - node_selector_yaml=_NodeSelectorYaml(), - tolerations_yaml=_TolerationsYaml(), - ) - _KubectlApply(manifest) + _RenderAndApply("psi-reader.yaml.j2", ns=ns) def _WaitForAgentReady(ns): """Wait for ADK agent deployment to be ready.""" - adk_image = FLAGS.gke_adk_image + adk_image = FLAGS.k8s_agent_image if not adk_image: logging.info("ADK agent not deployed, skipping rollout wait.") return - timeout = FLAGS.gke_deploy_timeout + timeout = FLAGS.k8s_deploy_timeout logging.info("Waiting for adk-agent rollout (timeout=%ds)...", timeout) - _RunKubectl( + kubectl.RunKubectlCommand( [ - "rollout", - "status", - "deployment/adk-agent", - "-n", - ns, + "rollout", "status", "deployment/adk-agent", + "-n", ns, "--timeout={}s".format(timeout), ], - check=False, + raise_on_failure=False, ) def _GetProjectNumber(project): """Get GCP project number from project ID.""" - stdout, rc = _RunCmd( + stdout, _, retcode = vm_util.IssueCommand( [ - "gcloud", - "projects", - "describe", - project, + "gcloud", "projects", "describe", project, "--format=value(projectNumber)", ], - check=False, + raise_on_failure=False, ) - return stdout if rc == 0 else None + return stdout.strip() if retcode == 0 else None def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name): """Bind IAM roles for pod snapshot access.""" # bucketViewer to namespace - _RunCmd( + vm_util.IssueCommand( [ - "gcloud", - "storage", - "buckets", - "add-iam-policy-binding", + "gcloud", "storage", "buckets", "add-iam-policy-binding", "gs://{}".format(bucket_name), "--member=principalSet://iam.googleapis.com/projects/{}" "/locations/global/workloadIdentityPools/{}.svc.id.goog" @@ -824,16 +422,13 @@ def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name): "--role=roles/storage.bucketViewer", "--quiet", ], - check=False, + raise_on_failure=False, ) # objectAdmin to KSA - _RunCmd( + vm_util.IssueCommand( [ - "gcloud", - "storage", - "buckets", - "add-iam-policy-binding", + "gcloud", "storage", "buckets", "add-iam-policy-binding", "gs://{}".format(bucket_name), "--member=principal://iam.googleapis.com/projects/{}" "/locations/global/workloadIdentityPools/{}.svc.id.goog" @@ -841,51 +436,18 @@ def _BindSnapshotIAM(bucket_name, project, project_number, ns, ksa_name): "--role=roles/storage.objectAdmin", "--quiet", ], - check=False, + raise_on_failure=False, ) # objectUser to GKE snapshot controller - _RunCmd( + vm_util.IssueCommand( [ - "gcloud", - "storage", - "buckets", - "add-iam-policy-binding", + "gcloud", "storage", "buckets", "add-iam-policy-binding", "gs://{}".format(bucket_name), "--member=serviceAccount:service-{}" "@container-engine-robot.iam.gserviceaccount.com".format(project_number), "--role=roles/storage.objectUser", "--quiet", ], - check=False, + raise_on_failure=False, ) - - -def _DeploySnapshotCRDs(ns, bucket_name, snapshot_folder): - """Deploy PodSnapshotStorageConfig + PodSnapshotPolicy.""" - manifest = """--- -apiVersion: podsnapshot.gke.io/v1 -kind: PodSnapshotStorageConfig -metadata: - name: benchmark-pssc-gcs -spec: - snapshotStorageConfig: - gcs: - bucket: "{bucket_name}" - path: "{snapshot_folder}" ---- -apiVersion: podsnapshot.gke.io/v1 -kind: PodSnapshotPolicy -metadata: - name: benchmark-psp - namespace: {ns} -spec: - storageConfigName: benchmark-pssc-gcs - selector: - matchLabels: - app: snapshot-benchmark-workload - triggerConfig: - type: manual - postCheckpoint: resume -""".format(ns=ns, bucket_name=bucket_name, snapshot_folder=snapshot_folder) - _KubectlApply(manifest) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py index 38b85b4e11..13340184bc 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py @@ -25,12 +25,57 @@ logger = logging.getLogger(__name__) + # --------------------------------------------------------------------------- -# Public API +# Architecture detection # --------------------------------------------------------------------------- +_ARCH_MAP = { + "X86_64": "amd64", + "ARM64": "arm64", +} + + +def _DetectArchitecture(machine_type, zone, project): + """Detect CPU architecture for a GCP machine type. + + Uses gcloud to query the machine type's architecture, then maps + GCP naming (X86_64/ARM64) to Docker platform naming (amd64/arm64). -def build_images_with_config(project, region, machine_type, cloud_build_sa=None): + Falls back to amd64 if gcloud fails. + """ + try: + stdout, _, retcode = vm_util.IssueCommand( + [ + "gcloud", "compute", "machine-types", "describe", + machine_type, + f"--zone={zone}", + f"--project={project}", + "--format=value(architecture)", + ], + raise_on_failure=False, + timeout=30, + ) + if retcode == 0 and stdout.strip(): + gcp_arch = stdout.strip().upper() + docker_arch = _ARCH_MAP.get(gcp_arch) + if docker_arch: + logging.info( + "Detected architecture for %s: %s -> %s", + machine_type, gcp_arch, docker_arch, + ) + return docker_arch + logging.warning( + "Unknown GCP architecture '%s' for %s. Falling back to amd64.", + gcp_arch, machine_type, + ) + except Exception as e: + logging.warning("gcloud machine-type describe failed: %s. Falling back to amd64.", e) + + return "amd64" + + +def build_images_with_config(project, region, machine_type, zone, arch, cloud_build_sa=None): """Core image build logic — no FLAGS dependency. Callable from both PKB (via BuildImages()) and prerequisite_setup.py. @@ -43,9 +88,8 @@ def build_images_with_config(project, region, machine_type, cloud_build_sa=None) cloud_build_sa: Cloud Build service account email. If None, defaults to "adk-cloud-build-sa@{project}.iam.gserviceaccount.com". """ - # Derive architecture from machine family - machine_family = machine_type.split("-")[0] if machine_type else "c4" - target_arch = "arm64" if machine_family == "c4a" else "amd64" + # Architecture passed in from caller (detected via gcloud) + target_arch = arch # Derive image paths adk_image = f"{region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{target_arch}" @@ -101,15 +145,19 @@ def build_images_with_config(project, region, machine_type, cloud_build_sa=None) def BuildImages(): - """FLAGS-based entry point (called from PKB Provision). + """FLAGS-based entry point. - Reads configuration from FLAGS (set in gke_provision_utils.py). + Reads configuration from native PKB FLAGS. Delegates to build_images_with_config() for the actual work. """ + project = getattr(FLAGS, 'project', '') or '' + zone = getattr(FLAGS, 'zone', '') or '' + region = zone[:-2] if zone else '' + machine_type = getattr(FLAGS, 'machine_type', '') or '' build_images_with_config( - project=FLAGS.gke_project_id, - region=FLAGS.gke_region, - machine_type=FLAGS.gke_sandbox_machine_type, + project=project, + region=region, + machine_type=machine_type, ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py index 9ddac86ea0..7d95d4bc82 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py @@ -1,4 +1,4 @@ -"""PKB Benchmark: GKE Agent Payload Transfer Saturation (Use Case D). +"""PKB Benchmark: GKE Agent Payload Transfer Saturation . Atomic single-point measurement of payload transfer latency from a gVisor sandbox back to the orchestrator on a pre-provisioned GKE cluster. Measures @@ -14,8 +14,8 @@ --gke_payload_size_mb=50 \ --gke_payload_iterations=20 \ --gke_payload_concurrent_sessions=5 \ - --gke_namespace=agentic \ - --gke_api_url=http://localhost:8080 + --k8s_namespace=agentic \ + --k8s_agent_api_url=http://localhost:8080 Samples emitted (per run): - gke_payload_orchestrator_transfer_mean (ms) @@ -71,7 +71,6 @@ from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, ) -from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils FLAGS = flags.FLAGS @@ -126,11 +125,6 @@ # --------------------------------------------------------------------------- -def Provision(benchmark_spec): - """Provision GKE cluster and all dependencies.""" - gke_provision_utils.Provision() - - def GetConfig(user_config): """Load and return benchmark config. @@ -142,7 +136,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" logging.info("=== Prepare: deploying workloads ===") - deploy_utils.DeployWorkloads() + deploy_utils.DeployWorkloads(benchmark_spec) utils.CheckAgentHealthz(required=False) utils.EnsurePortForward() logging.info("Prepare complete.") @@ -154,7 +148,9 @@ def Run(benchmark_spec): Returns: List of sample.Sample objects. """ - ns = FLAGS.gke_namespace + utils.set_benchmark_spec(benchmark_spec) + + ns = FLAGS.k8s_namespace payload_size_mb = FLAGS.gke_payload_size_mb iterations = FLAGS.gke_payload_iterations concurrent = FLAGS.gke_payload_concurrent_sessions @@ -575,7 +571,7 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Clean up after measurement. Scale warm pool to 0.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace logging.info("Cleanup: draining warm pool.") utils.DrainWarmPool( @@ -588,18 +584,25 @@ def Cleanup(benchmark_spec): logging.info("Cleanup complete (cluster persists).") -def Teardown(benchmark_spec): - """Teardown GKE cluster and all dependencies.""" - gke_provision_utils.Teardown() - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra): - """Emit a sample if the key exists in the aggregate dict.""" + """Emit a sample if the key exists in the aggregate dict. + + Args: + samples: List to append the new sample.Sample to. + agg: Aggregate metrics dict returned by the agent API response. + agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms"). + metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric + name (e.g. "orchestrator_cel_mean"). + unit: Unit string for the sample (e.g. "ms", "MB", "seconds"). + namespace: Kubernetes namespace (included in sample metadata). + extra: Dict of additional metadata key-value pairs attached to + every sample (density, session counts, wall time, etc.). + """ value = agg.get(agg_key) if value is not None: samples.append( diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py new file mode 100644 index 0000000000..49e04bb83d --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +"""Post-Teardown Cleanup for GKE Agentic Benchmarking. + +Cleans up infrastructure created by gke_prerequisites.py and DeploySnapshots(): + - Delete Cloud Build service account + IAM bindings + - Delete GCS snapshot bucket + - Delete Artifact Registry repositories + +Run ONCE after all benchmarks are complete (after PKB Teardown has deleted the cluster): + python -m perfkitbenchmarker.linux_benchmarks.kubernetes.agentic.gke_post_teardown \ + --project_id= --region= +""" + +import argparse +import logging +import subprocess + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) + + +def _run(cmd, check=False, timeout=300): + logger.info("CMD: %s", " ".join(cmd)) + result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) + if check and result.returncode != 0: + logger.warning("Command failed (rc=%d): %s", result.returncode, result.stderr[-300:]) + return result + + +def teardown_cloud_build_sa(project_id): + logger.info("=== Deleting Cloud Build SA ===") + sa_email = f"adk-cloud-build-sa@{project_id}.iam.gserviceaccount.com" + roles = ["roles/logging.logWriter", "roles/storage.objectViewer", + "roles/artifactregistry.writer", "roles/serviceusage.serviceUsageConsumer"] + for role in roles: + _run(["gcloud", "projects", "remove-iam-policy-binding", project_id, + f"--member=serviceAccount:{sa_email}", f"--role={role}", "--quiet"]) + _run(["gcloud", "iam", "service-accounts", "delete", sa_email, + f"--project={project_id}", "--quiet"]) + logger.info("Cloud Build SA deleted.") + + +def teardown_snapshot_bucket(project_id, region): + logger.info("=== Deleting Snapshot Bucket ===") + bucket_name = f"agent-sandbox-snapshots-{project_id}" + _run(["gcloud", "storage", "rm", f"gs://{bucket_name}/**", + f"--project={project_id}", "--quiet"]) + _run(["gcloud", "storage", "buckets", "delete", f"gs://{bucket_name}", + f"--project={project_id}", "--quiet"]) + logger.info("Snapshot bucket deleted.") + + +def teardown_images(project_id, region): + logger.info("=== Deleting AR repos ===") + for repo in ["adk-repo", "agent-sandbox"]: + _run(["gcloud", "artifacts", "repositories", "delete", repo, + f"--location={region}", f"--project={project_id}", "--quiet"]) + logger.info("AR repos deleted.") + + +def main(): + p = argparse.ArgumentParser(description="GKE Agentic Benchmark Post-Teardown") + p.add_argument("--project_id", required=True, help="GCP project ID") + p.add_argument("--region", default="us-central1", help="GCP region") + p.add_argument("--keep_images", action="store_true", help="Skip AR repo deletion") + p.add_argument("--keep_bucket", action="store_true", help="Skip snapshot bucket deletion") + args = p.parse_args() + teardown_cloud_build_sa(args.project_id) + if not args.keep_bucket: + teardown_snapshot_bucket(args.project_id, args.region) + if not args.keep_images: + teardown_images(args.project_id, args.region) + print("\nPost-teardown complete!") + + +if __name__ == "__main__": + main() diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py deleted file mode 100644 index 70b9d95a4c..0000000000 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisite_setup.py +++ /dev/null @@ -1,516 +0,0 @@ -#!/usr/bin/env python3 -"""Prerequisite Setup for GKE Agentic Benchmarking. - -Creates infrastructure that PKB's native container_cluster provisioner -cannot manage: VPC, Subnet, Cloud Router, NAT, Firewall Rules, Artifact -Registry, Cloud Build SA, IAM bindings, and container image builds. - -This script is run ONCE before PKB provisioning. PKB then references the -pre-existing VPC/subnet via --gce_network_name and --gce_subnet_name flags. - -Usage: - # Full setup (including image builds): - python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ - --project_id=my-project \ - --region=us-central1 --zone=us-central1-a \ - --machine_type=c4-standard-8 - - # Setup without image builds: - python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ - --project_id=my-project \ - --region=us-central1 --zone=us-central1-a \ - --skip_image_build - - # Teardown: - python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ - --project_id=my-project \ - --region=us-central1 --zone=us-central1-a \ - --teardown - - # Teardown (keep images): - python -m perfkitbenchmarker.linux_benchmarks.gke_prerequisite_setup \ - --project_id=my-project \ - --region=us-central1 --zone=us-central1-a \ - --teardown --keep_images -""" - -import argparse -import logging -import os -import subprocess -import sys -import time - -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s %(levelname)s %(message)s", -) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _run(cmd, check=True, timeout=300, capture=False): - """Run a shell command, logging it first.""" - cmd_str = " ".join(cmd) if isinstance(cmd, list) else cmd - logging.info("CMD: %s", cmd_str) - result = subprocess.run( - cmd if isinstance(cmd, list) else cmd.split(), - capture_output=capture, - text=True, - timeout=timeout, - ) - if check and result.returncode != 0: - stderr = result.stderr if capture else "" - logging.error("Command failed (rc=%d): %s", result.returncode, stderr) - raise RuntimeError(f"Command failed: {cmd_str}") - return result - - -def _exists(cmd): - """Return True if a gcloud describe/get command succeeds.""" - result = subprocess.run( - cmd if isinstance(cmd, list) else cmd.split(), - capture_output=True, - text=True, - timeout=60, - ) - return result.returncode == 0 - - -def _derive_config(args): - """Derive configuration values from arguments.""" - user_prefix = os.environ.get("USER", "pkb").split(".")[0] - machine_family = args.machine_type.split("-")[0] - - # Disk type - disk_type = "pd-balanced" if machine_family == "c3" else "hyperdisk-balanced" - - # Architecture - target_arch = "arm64" if machine_family == "c4a" else "amd64" - - # Cluster suffix - if "metal" in args.machine_type: - cluster_suffix = "c3metal" - else: - cluster_suffix = machine_family - - # Master CIDR (unique per cluster suffix) - master_cidrs = { - "c4": "172.16.0.0/28", - "c4d": "172.16.0.16/28", - "c4a": "172.16.0.32/28", - "c3metal": "172.16.0.48/28", - } - master_cidr = master_cidrs.get(cluster_suffix, "172.16.0.64/28") - - return { - "user_prefix": user_prefix, - "machine_family": machine_family, - "disk_type": disk_type, - "target_arch": target_arch, - "cluster_suffix": cluster_suffix, - "master_cidr": master_cidr, - "vpc_name": f"{user_prefix}-agentic-vpc", - "subnet_name": f"{user_prefix}-agentic-subnet", - "subnet_cidr": args.subnet_cidr, - "router_name": f"{user_prefix}-agentic-nat-router", - "nat_name": f"{user_prefix}-agentic-nat-config", - "adk_repo_name": "adk-repo", - "sandbox_repo_name": "agent-sandbox", - "cloud_build_sa": "adk-cloud-build-sa", - "cloud_build_sa_email": f"adk-cloud-build-sa@{args.project_id}.iam.gserviceaccount.com", - "adk_image": f"{args.region}-docker.pkg.dev/{args.project_id}/adk-repo/adk-agent:{target_arch}", - "chromium_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/chrome-sandbox:{target_arch}", - "router_image": f"{args.region}-docker.pkg.dev/{args.project_id}/agent-sandbox/sandbox-router:{target_arch}", - } - - -# --------------------------------------------------------------------------- -# Setup Steps -# --------------------------------------------------------------------------- - - -def enable_apis(args): - """Enable required GCP APIs.""" - logging.info("=== Enabling GCP APIs ===") - apis = [ - "container.googleapis.com", - "artifactregistry.googleapis.com", - "cloudbuild.googleapis.com", - "aiplatform.googleapis.com", - "storage.googleapis.com", - "iam.googleapis.com", - "connectgateway.googleapis.com", - "gkehub.googleapis.com", - "gkeconnect.googleapis.com", - "iap.googleapis.com", - ] - _run([ - "gcloud", "services", "enable", *apis, - f"--project={args.project_id}", - ]) - logging.info("APIs enabled.") - - -def create_vpc(args, config): - """Create custom VPC.""" - logging.info("=== Creating VPC ===") - if _exists([ - "gcloud", "compute", "networks", "describe", config["vpc_name"], - f"--project={args.project_id}", - ]): - logging.info("VPC %s already exists.", config["vpc_name"]) - return - - _run([ - "gcloud", "compute", "networks", "create", config["vpc_name"], - "--subnet-mode=custom", - f"--project={args.project_id}", - ]) - logging.info("VPC %s created.", config["vpc_name"]) - - -def create_subnet(args, config): - """Create subnet in the VPC.""" - logging.info("=== Creating Subnet ===") - if _exists([ - "gcloud", "compute", "networks", "subnets", "describe", - config["subnet_name"], - f"--region={args.region}", - f"--project={args.project_id}", - ]): - logging.info("Subnet %s already exists.", config["subnet_name"]) - return - - _run([ - "gcloud", "compute", "networks", "subnets", "create", - config["subnet_name"], - f"--network={config['vpc_name']}", - f"--region={args.region}", - f"--range={config['subnet_cidr']}", - f"--project={args.project_id}", - ]) - logging.info("Subnet %s created.", config["subnet_name"]) - - -def create_firewall_rules(args, config): - """Create firewall rules.""" - logging.info("=== Creating Firewall Rules ===") - - rules = [ - { - "name": f"{config['vpc_name']}-allow-iap-ssh", - "rules": "tcp:22", - "source_ranges": "35.235.240.0/20", - "priority": "1000", - }, - { - "name": f"{config['vpc_name']}-allow-internal", - "rules": "tcp,udp,icmp", - "source_ranges": config["subnet_cidr"], - "priority": "1000", - }, - ] - - for rule in rules: - if _exists([ - "gcloud", "compute", "firewall-rules", "describe", rule["name"], - f"--project={args.project_id}", - ]): - logging.info("Firewall rule %s already exists.", rule["name"]) - continue - - _run([ - "gcloud", "compute", "firewall-rules", "create", rule["name"], - f"--network={config['vpc_name']}", - "--direction=INGRESS", - "--action=ALLOW", - f"--rules={rule['rules']}", - f"--source-ranges={rule['source_ranges']}", - f"--priority={rule['priority']}", - f"--project={args.project_id}", - ]) - logging.info("Firewall rule %s created.", rule["name"]) - - -def create_router_and_nat(args, config): - """Create Cloud Router and NAT for private node internet access.""" - logging.info("=== Creating Cloud Router + NAT ===") - - # Router - if not _exists([ - "gcloud", "compute", "routers", "describe", config["router_name"], - f"--region={args.region}", - f"--project={args.project_id}", - ]): - _run([ - "gcloud", "compute", "routers", "create", config["router_name"], - f"--network={config['vpc_name']}", - f"--region={args.region}", - f"--project={args.project_id}", - ]) - logging.info("Router %s created.", config["router_name"]) - else: - logging.info("Router %s already exists.", config["router_name"]) - - # NAT - if not _exists([ - "gcloud", "compute", "routers", "nats", "describe", config["nat_name"], - f"--router={config['router_name']}", - f"--region={args.region}", - f"--project={args.project_id}", - ]): - _run([ - "gcloud", "compute", "routers", "nats", "create", config["nat_name"], - f"--router={config['router_name']}", - f"--region={args.region}", - "--nat-all-subnet-ip-ranges", - "--auto-allocate-nat-external-ips", - f"--project={args.project_id}", - ]) - logging.info("NAT %s created.", config["nat_name"]) - else: - logging.info("NAT %s already exists.", config["nat_name"]) - - -def create_artifact_registry(args, config): - """Create Artifact Registry repositories.""" - logging.info("=== Creating Artifact Registry Repos ===") - - for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]: - result = subprocess.run( - [ - "gcloud", "artifacts", "repositories", "describe", repo, - f"--location={args.region}", - f"--project={args.project_id}", - ], - capture_output=True, text=True, timeout=30, - ) - if result.returncode == 0: - logging.info("AR repo %s already exists.", repo) - continue - - _run([ - "gcloud", "artifacts", "repositories", "create", repo, - "--repository-format=docker", - f"--location={args.region}", - f"--project={args.project_id}", - ]) - logging.info("AR repo %s created.", repo) - - -def create_cloud_build_sa(args, config): - """Create Cloud Build service account and bind IAM roles.""" - logging.info("=== Creating Cloud Build SA ===") - - sa_email = config["cloud_build_sa_email"] - - # Create SA - if not _exists([ - "gcloud", "iam", "service-accounts", "describe", sa_email, - f"--project={args.project_id}", - ]): - _run([ - "gcloud", "iam", "service-accounts", "create", - config["cloud_build_sa"], - f"--display-name={config['cloud_build_sa']}", - f"--project={args.project_id}", - ]) - logging.info("SA %s created. Waiting for propagation...", sa_email) - time.sleep(10) - else: - logging.info("SA %s already exists.", sa_email) - - # Bind roles - roles = [ - "roles/logging.logWriter", - "roles/storage.objectViewer", - "roles/artifactregistry.writer", - "roles/serviceusage.serviceUsageConsumer", - ] - for role in roles: - _run([ - "gcloud", "projects", "add-iam-policy-binding", args.project_id, - f"--member=serviceAccount:{sa_email}", - f"--role={role}", - "--condition=None", "--quiet", - ], check=False) - - logging.info("Cloud Build SA roles bound.") - - -def build_images(args, config): - """Build and push container images via Cloud Build. - - Delegates to gke_image_build_utils.build_images_with_config() - to avoid duplicating Cloud Build logic. - """ - if args.skip_image_build: - logging.info("=== Skipping Image Builds (--skip_image_build) ===") - return - - logging.info("=== Building Container Images ===") - - # Import the shared image build module (same package) - from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils - - gke_image_build_utils.build_images_with_config( - project=args.project_id, - region=args.region, - machine_type=args.machine_type, - cloud_build_sa=config["cloud_build_sa_email"], - ) - - logging.info("=== Image builds complete ===") - - -# --------------------------------------------------------------------------- -# Teardown Steps -# --------------------------------------------------------------------------- - - -def teardown(args, config): - """Tear down all prerequisite resources.""" - logging.info("=== Prerequisite Teardown ===") - - # AR repos - if not args.keep_images: - logging.info("Deleting Artifact Registry repos...") - for repo in [config["adk_repo_name"], config["sandbox_repo_name"]]: - _run([ - "gcloud", "artifacts", "repositories", "delete", repo, - f"--location={args.region}", - f"--project={args.project_id}", "--quiet", - ], check=False) - else: - logging.info("Keeping AR repos (--keep_images).") - - # Cloud Build SA - logging.info("Deleting Cloud Build SA...") - sa_email = config["cloud_build_sa_email"] - roles = [ - "roles/logging.logWriter", - "roles/storage.objectViewer", - "roles/artifactregistry.writer", - "roles/serviceusage.serviceUsageConsumer", - ] - for role in roles: - _run([ - "gcloud", "projects", "remove-iam-policy-binding", args.project_id, - f"--member=serviceAccount:{sa_email}", - f"--role={role}", "--quiet", - ], check=False) - _run([ - "gcloud", "iam", "service-accounts", "delete", sa_email, - f"--project={args.project_id}", "--quiet", - ], check=False) - - # NAT + Router - logging.info("Deleting NAT + Router...") - _run([ - "gcloud", "compute", "routers", "nats", "delete", config["nat_name"], - f"--router={config['router_name']}", - f"--region={args.region}", - f"--project={args.project_id}", "--quiet", - ], check=False) - _run([ - "gcloud", "compute", "routers", "delete", config["router_name"], - f"--region={args.region}", - f"--project={args.project_id}", "--quiet", - ], check=False) - - # Firewall rules - logging.info("Deleting firewall rules...") - for suffix in ["allow-iap-ssh", "allow-internal"]: - _run([ - "gcloud", "compute", "firewall-rules", "delete", - f"{config['vpc_name']}-{suffix}", - f"--project={args.project_id}", "--quiet", - ], check=False) - - # Subnet + VPC - logging.info("Deleting subnet + VPC...") - _run([ - "gcloud", "compute", "networks", "subnets", "delete", - config["subnet_name"], - f"--region={args.region}", - f"--project={args.project_id}", "--quiet", - ], check=False) - _run([ - "gcloud", "compute", "networks", "delete", config["vpc_name"], - f"--project={args.project_id}", "--quiet", - ], check=False) - - logging.info("=== Prerequisite Teardown Complete ===") - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - - -def parse_args(): - p = argparse.ArgumentParser( - description="Prerequisite Setup for GKE Agentic Benchmarking", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - p.add_argument("--project_id", required=True, help="GCP project ID") - p.add_argument("--region", default="us-central1", help="GCP region (default: us-central1)") - p.add_argument("--zone", default="us-central1-a", help="GCP zone (default: us-central1-a)") - p.add_argument("--machine_type", default="c4-standard-8", - help="Machine type for sandbox nodes (default: c4-standard-8)") - p.add_argument("--subnet_cidr", default="10.134.20.0/24", - help="Subnet CIDR range (default: 10.134.20.0/24)") - p.add_argument("--skip_image_build", action="store_true", default=False, - help="Skip container image builds") - p.add_argument("--teardown", action="store_true", default=False, - help="Tear down prerequisite resources instead of creating them") - p.add_argument("--keep_images", action="store_true", default=False, - help="Keep AR repos during teardown") - return p.parse_args() - - -def main(): - args = parse_args() - config = _derive_config(args) - - print(f"\n{'='*60}") - print(f"Project: {args.project_id}") - print(f"Region: {args.region}") - print(f"Zone: {args.zone}") - print(f"Machine Type: {args.machine_type}") - print(f"VPC: {config['vpc_name']}") - print(f"Subnet: {config['subnet_name']} ({config['subnet_cidr']})") - print(f"Mode: {'TEARDOWN' if args.teardown else 'SETUP'}") - print(f"{'='*60}\n") - - if args.teardown: - teardown(args, config) - else: - enable_apis(args) - create_vpc(args, config) - create_subnet(args, config) - create_firewall_rules(args, config) - create_router_and_nat(args, config) - create_artifact_registry(args, config) - create_cloud_build_sa(args, config) - build_images(args, config) - - print(f"\n{'='*60}") - print("Prerequisite setup complete!") - print(f"{'='*60}") - print(f"\nPKB flags to reference this infrastructure:") - print(f" --gce_network_name={config['vpc_name']}") - print(f"\nNext: Run PKB with container_cluster provisioning:") - print(f" python pkb.py --benchmarks=gke_python_density \\") - print(f" --gce_network_name={config['vpc_name']} \\") - print(f" --zone={args.zone} \\") - print(f" --gke_use_beta=true \\") - print(f" --gke_additional_flags=\"--enable-pod-snapshots,...,--subnetwork={config['subnet_name']}\"") - - -if __name__ == "__main__": - main() diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py new file mode 100644 index 0000000000..9c45f02449 --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +"""Prerequisite Setup for GKE Agentic Benchmarking. + +Creates infrastructure that PKB cannot manage natively: + - Enable required GCP APIs + - Create Artifact Registry repositories + - Create Cloud Build service account + IAM bindings + +Run ONCE before PKB provisioning: + python -m perfkitbenchmarker.linux_benchmarks.kubernetes.agentic.gke_prerequisites \ + --project_id= --region= +""" + +import argparse +import logging +import os +import subprocess +import time + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +logger = logging.getLogger(__name__) + + +def _run(cmd, check=True, timeout=300): + logger.info("CMD: %s", " ".join(cmd)) + result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) + if check and result.returncode != 0: + logger.error("Command failed (rc=%d): %s", result.returncode, result.stderr[-500:]) + raise RuntimeError(f"Command failed: {cmd}") + return result + + +def _exists(cmd): + result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) + return result.returncode == 0 + + +def enable_apis(project_id): + logger.info("=== Enabling GCP APIs ===") + apis = [ + "container.googleapis.com", + "artifactregistry.googleapis.com", + "cloudbuild.googleapis.com", + "aiplatform.googleapis.com", + "storage.googleapis.com", + "iam.googleapis.com", + "connectgateway.googleapis.com", + "gkehub.googleapis.com", + "gkeconnect.googleapis.com", + "iap.googleapis.com", + ] + _run(["gcloud", "services", "enable"] + apis + [f"--project={project_id}"]) + logger.info("APIs enabled.") + + +def create_artifact_registry(project_id, region): + logger.info("=== Creating Artifact Registry Repos ===") + for repo in ["adk-repo", "agent-sandbox"]: + if _exists(["gcloud", "artifacts", "repositories", "describe", repo, + f"--location={region}", f"--project={project_id}"]): + logger.info("AR repo %s already exists.", repo) + continue + _run(["gcloud", "artifacts", "repositories", "create", repo, + "--repository-format=docker", + f"--location={region}", f"--project={project_id}"]) + logger.info("AR repo %s created.", repo) + + +def create_cloud_build_sa(project_id): + logger.info("=== Creating Cloud Build SA ===") + sa_name = "adk-cloud-build-sa" + sa_email = f"{sa_name}@{project_id}.iam.gserviceaccount.com" + if not _exists(["gcloud", "iam", "service-accounts", "describe", + sa_email, f"--project={project_id}"]): + _run(["gcloud", "iam", "service-accounts", "create", sa_name, + f"--display-name={sa_name}", f"--project={project_id}"]) + logger.info("SA %s created. Waiting for propagation...", sa_email) + time.sleep(10) + else: + logger.info("SA %s already exists.", sa_email) + roles = [ + "roles/logging.logWriter", + "roles/storage.objectViewer", + "roles/artifactregistry.writer", + "roles/serviceusage.serviceUsageConsumer", + ] + for role in roles: + _run(["gcloud", "projects", "add-iam-policy-binding", project_id, + f"--member=serviceAccount:{sa_email}", + f"--role={role}", "--condition=None", "--quiet"], check=False) + logger.info("Cloud Build SA roles bound.") + + +def main(): + p = argparse.ArgumentParser(description="GKE Agentic Benchmark Prerequisites") + p.add_argument("--project_id", required=True, help="GCP project ID") + p.add_argument("--region", default="us-central1", help="GCP region") + args = p.parse_args() + enable_apis(args.project_id) + create_artifact_registry(args.project_id, args.region) + create_cloud_build_sa(args.project_id) + print("\nPrerequisite setup complete!") + + +if __name__ == "__main__": + main() + diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py deleted file mode 100644 index 4792f5a543..0000000000 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_provision_utils.py +++ /dev/null @@ -1,698 +0,0 @@ -"""Shared Provision/Teardown utilities for GKE Agent Sandbox benchmarks. - -Provides the full GKE infrastructure lifecycle (create and destroy) used -by all seven UC benchmark scripts. Each benchmark's Provision() and -Teardown() functions delegate to the public functions in this module. - -Infrastructure created (in order): - 1. VPC + Subnet - 2. Firewall rules (IAP SSH, internal, laptop IP) - 3. Cloud Router + NAT - 4. GKE Cluster (DPv2, Workload Identity, optional Pod Snapshots) - 5. Fleet registration / credential retrieval - 6. gVisor sandbox node pool - 7. Artifact Registry repositories - 8. Cloud Build service account + IAM bindings - 9. Container images (optional, gated by --gke_skip_image_build) - -Teardown respects two flags: - --gke_teardown_keep_images: skip AR repo deletion - --gke_teardown_keep_infra: only delete K8s workloads, keep cluster/network -""" - -import logging -import subprocess -import time - -from absl import flags - -FLAGS = flags.FLAGS - -# Image build utilities (Phase 3) -# Imported after FLAGS to avoid circular dependency -# The actual import is deferred to Provision() to allow flag registration order - -# --------------------------------------------------------------------------- -# Provision/Teardown flags -# --------------------------------------------------------------------------- - -flags.DEFINE_string( - "gke_project_id", - "", - "GCP project ID for the benchmark cluster. Required for Provision/Teardown.", -) - -flags.DEFINE_string( - "gke_region", - "us-central1", - "GCP region for networking and Artifact Registry.", -) - -flags.DEFINE_string( - "gke_zone", - "us-central1-a", - "GCP zone for the GKE cluster and node pools.", -) - -flags.DEFINE_string( - "gke_sandbox_machine_type", - "c4-standard-8", - "Machine type for the gVisor sandbox node pool.", -) - -flags.DEFINE_string( - "gke_cluster_suffix", - "", - "Cluster name suffix. If empty, derived from machine family (e.g. 'c4').", -) - -flags.DEFINE_string( - "gke_gke_version", - "1.35.3-gke.1389000", - "GKE cluster version.", -) - -flags.DEFINE_bool( - "gke_use_connect_gateway", - True, - "Use Connect Gateway for kubectl access instead of direct public endpoint.", -) - -flags.DEFINE_bool( - "gke_enable_pod_snapshots", - True, - "Enable GKE Pod Snapshots (Preview feature, uses gcloud beta).", -) - -flags.DEFINE_bool( - "gke_skip_image_build", - True, - "Skip container image builds during Provision. Set to False on first run.", -) - -flags.DEFINE_integer( - "gke_sandbox_node_count", - 1, - "Number of nodes in the gVisor sandbox node pool.", -) - -flags.DEFINE_integer( - "gke_sandbox_disk_size", - 100, - "Disk size in GB for sandbox node pool nodes.", -) - -flags.DEFINE_integer( - "gke_sandbox_max_pods_per_node", - 250, - "Max pods per node on the sandbox node pool.", -) - -flags.DEFINE_string( - "gke_subnet_cidr", - "10.134.20.0/24", - "CIDR range for the benchmark subnet.", -) - -flags.DEFINE_bool( - "gke_teardown_keep_images", - False, - "If True, skip Artifact Registry repo deletion during Teardown.", -) - -flags.DEFINE_bool( - "gke_teardown_keep_infra", - False, - "If True, only delete K8s workloads during Teardown (keep cluster/network).", -) - - -# --------------------------------------------------------------------------- -# Internal helpers -# --------------------------------------------------------------------------- - - -def _run(cmd, timeout=300, check=True): - """Run a shell command and return CompletedProcess. - - Args: - cmd: List of command arguments. - timeout: Max seconds to wait. - check: If True, raise on non-zero exit. - - Returns: - subprocess.CompletedProcess - """ - logging.info("CMD: %s", " ".join(cmd)) - proc = subprocess.run( - cmd, capture_output=True, text=True, timeout=timeout, - ) - if proc.returncode != 0: - logging.warning("CMD stderr: %s", proc.stderr[-500:] if proc.stderr else "") - if check: - raise RuntimeError( - f"Command failed (rc={proc.returncode}): {' '.join(cmd[:6])}\n" - f"{proc.stderr[-300:]}" - ) - return proc - - -def _run_quiet(cmd, timeout=300): - """Run a command, suppress errors (idempotent checks).""" - return _run(cmd, timeout=timeout, check=False) - - -def _resource_exists(cmd): - """Return True if a gcloud describe/get command succeeds.""" - proc = _run_quiet(cmd) - return proc.returncode == 0 - - -def _derive_config(): - """Derive computed configuration values from flags. - - Returns: - dict with all computed names and settings. - """ - project = FLAGS.gke_project_id - if not project: - raise RuntimeError("--gke_project_id is required for Provision/Teardown.") - - region = FLAGS.gke_region - zone = FLAGS.gke_zone - machine_type = FLAGS.gke_sandbox_machine_type - - # Derive machine family (e.g. "c4" from "c4-standard-8") - machine_family = machine_type.split("-")[0] - - # Derive cluster suffix - cluster_suffix = FLAGS.gke_cluster_suffix - if not cluster_suffix: - if machine_family == "c3" and "metal" in machine_type: - cluster_suffix = "c3metal" - else: - cluster_suffix = machine_family - - # Derive disk type - if machine_family == "c3": - disk_type = "pd-balanced" - else: - disk_type = "hyperdisk-balanced" - - # Derive architecture - if machine_family == "c4a": - target_arch = "arm64" - else: - target_arch = "amd64" - - # Derive master CIDR - master_cidr_map = { - "c4": "172.16.0.0/28", - "c4d": "172.16.0.16/28", - "c4a": "172.16.0.32/28", - "c3metal": "172.16.0.48/28", - } - master_cidr = master_cidr_map.get(cluster_suffix, "172.16.0.64/28") - - # Use a prefix derived from project for naming - name_prefix = "pkb" - - cluster_name = f"{name_prefix}-agentic-{cluster_suffix}" - vpc_name = f"{name_prefix}-agentic-vpc" - subnet_name = f"{name_prefix}-agentic-subnet" - router_name = f"{name_prefix}-agentic-nat-router" - nat_name = f"{name_prefix}-agentic-nat-config" - sandbox_pool_name = "agentic-sandbox-pool" - adk_repo_name = "adk-repo" - sandbox_repo_name = "agent-sandbox" - cloud_build_sa = "adk-cloud-build-sa" - cloud_build_sa_email = f"{cloud_build_sa}@{project}.iam.gserviceaccount.com" - namespace = FLAGS.gke_namespace - - return { - "project": project, - "region": region, - "zone": zone, - "machine_type": machine_type, - "machine_family": machine_family, - "cluster_suffix": cluster_suffix, - "disk_type": disk_type, - "target_arch": target_arch, - "master_cidr": master_cidr, - "cluster_name": cluster_name, - "vpc_name": vpc_name, - "subnet_name": subnet_name, - "subnet_cidr": FLAGS.gke_subnet_cidr, - "router_name": router_name, - "nat_name": nat_name, - "sandbox_pool_name": sandbox_pool_name, - "adk_repo_name": adk_repo_name, - "sandbox_repo_name": sandbox_repo_name, - "cloud_build_sa": cloud_build_sa, - "cloud_build_sa_email": cloud_build_sa_email, - "namespace": namespace, - "gke_version": FLAGS.gke_gke_version, - "sandbox_node_count": FLAGS.gke_sandbox_node_count, - "sandbox_disk_size": FLAGS.gke_sandbox_disk_size, - "sandbox_max_pods": FLAGS.gke_sandbox_max_pods_per_node, - "use_connect_gateway": FLAGS.gke_use_connect_gateway, - "enable_pod_snapshots": FLAGS.gke_enable_pod_snapshots, - "sandbox_version": FLAGS.gke_sandbox_version, - } - - -# --------------------------------------------------------------------------- -# Provision steps -# --------------------------------------------------------------------------- - - -def _enable_apis(cfg): - """Enable required GCP services.""" - logging.info("Enabling required GCP APIs...") - apis = [ - "iap.googleapis.com", - "container.googleapis.com", - "artifactregistry.googleapis.com", - "cloudbuild.googleapis.com", - "aiplatform.googleapis.com", - "storage.googleapis.com", - "iam.googleapis.com", - "connectgateway.googleapis.com", - "gkehub.googleapis.com", - "gkeconnect.googleapis.com", - ] - _run(["gcloud", "services", "enable"] + apis + [f"--project={cfg['project']}"], - timeout=120) - - -def _create_network(cfg): - """Create VPC, subnet, firewall rules, Cloud Router, and NAT.""" - project = cfg["project"] - region = cfg["region"] - vpc = cfg["vpc_name"] - subnet = cfg["subnet_name"] - cidr = cfg["subnet_cidr"] - router = cfg["router_name"] - nat = cfg["nat_name"] - - # VPC - if not _resource_exists(["gcloud", "compute", "networks", "describe", vpc, - f"--project={project}"]): - logging.info("Creating VPC %s...", vpc) - _run(["gcloud", "compute", "networks", "create", vpc, - "--subnet-mode=custom", f"--project={project}"]) - - # Subnet - if not _resource_exists(["gcloud", "compute", "networks", "subnets", "describe", - subnet, f"--region={region}", f"--project={project}"]): - logging.info("Creating subnet %s...", subnet) - _run(["gcloud", "compute", "networks", "subnets", "create", subnet, - f"--network={vpc}", f"--region={region}", - f"--range={cidr}", f"--project={project}"]) - - # Firewall: IAP SSH - fw_iap = f"{vpc}-allow-iap-ssh" - if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe", - fw_iap, f"--project={project}"]): - logging.info("Creating firewall rule %s...", fw_iap) - _run(["gcloud", "compute", "firewall-rules", "create", fw_iap, - f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW", - "--rules=tcp:22", "--source-ranges=35.235.240.0/20", - "--priority=1000", f"--project={project}"]) - - # Firewall: internal - fw_int = f"{vpc}-allow-internal" - if not _resource_exists(["gcloud", "compute", "firewall-rules", "describe", - fw_int, f"--project={project}"]): - logging.info("Creating firewall rule %s...", fw_int) - _run(["gcloud", "compute", "firewall-rules", "create", fw_int, - f"--network={vpc}", "--direction=INGRESS", "--action=ALLOW", - "--rules=tcp,udp,icmp", f"--source-ranges={cidr}", - "--priority=1000", f"--project={project}"]) - - # Cloud Router - if not _resource_exists(["gcloud", "compute", "routers", "describe", router, - f"--region={region}", f"--project={project}"]): - logging.info("Creating Cloud Router %s...", router) - _run(["gcloud", "compute", "routers", "create", router, - f"--network={vpc}", f"--region={region}", f"--project={project}"]) - - # Cloud NAT - if not _resource_exists(["gcloud", "compute", "routers", "nats", "describe", nat, - f"--router={router}", f"--region={region}", - f"--project={project}"]): - logging.info("Creating Cloud NAT %s...", nat) - _run(["gcloud", "compute", "routers", "nats", "create", nat, - f"--router={router}", f"--region={region}", - "--nat-all-subnet-ip-ranges", "--auto-allocate-nat-external-ips", - f"--project={project}"]) - - -def _create_cluster(cfg): - """Create the GKE cluster with DPv2 and Workload Identity.""" - project = cfg["project"] - zone = cfg["zone"] - cluster = cfg["cluster_name"] - - if _resource_exists(["gcloud", "container", "clusters", "describe", cluster, - f"--zone={zone}", f"--project={project}"]): - logging.info("GKE cluster %s already exists.", cluster) - return - - logging.info("Creating GKE cluster %s...", cluster) - - if cfg["enable_pod_snapshots"]: - snapshot_flag = ["--enable-pod-snapshots"] - logging.info("Pod Snapshots ENABLED (using gcloud beta).") - cmd = ["gcloud", "beta", "container", "clusters", "create", cluster] - else: - snapshot_flag = [] - cmd = ["gcloud", "container", "clusters", "create", cluster] - - cmd += [ - f"--zone={zone}", - f"--network={cfg['vpc_name']}", - f"--subnetwork={cfg['subnet_name']}", - "--enable-private-nodes", - "--enable-ip-alias", - f"--master-ipv4-cidr={cfg['master_cidr']}", - f"--cluster-version={cfg['gke_version']}", - "--no-enable-shielded-nodes", - "--num-nodes=1", - f"--machine-type={cfg['machine_type']}", - f"--disk-type={cfg['disk_type']}", - "--disk-size=50", - "--enable-dataplane-v2", - f"--workload-pool={project}.svc.id.goog", - "--release-channel=None", - f"--project={project}", - ] + snapshot_flag - - _run(cmd, timeout=600) - logging.info("GKE cluster %s created.", cluster) - - -def _get_credentials(cfg): - """Register to fleet and get kubectl credentials.""" - project = cfg["project"] - zone = cfg["zone"] - cluster = cfg["cluster_name"] - - if cfg["use_connect_gateway"]: - # Register to fleet - if not _resource_exists(["gcloud", "container", "fleet", "memberships", - "describe", cluster, f"--project={project}"]): - logging.info("Registering cluster %s to fleet...", cluster) - _run(["gcloud", "container", "fleet", "memberships", "register", cluster, - f"--gke-cluster={zone}/{cluster}", - "--enable-workload-identity", - f"--project={project}"], timeout=120) - - logging.info("Getting credentials via Connect Gateway...") - _run(["gcloud", "container", "fleet", "memberships", "get-credentials", - cluster, f"--project={project}"], timeout=60) - else: - logging.info("Getting credentials (direct endpoint)...") - _run(["gcloud", "container", "clusters", "get-credentials", cluster, - f"--zone={zone}", f"--project={project}"], timeout=60) - - -def _create_sandbox_node_pool(cfg): - """Create the gVisor-enabled sandbox node pool.""" - project = cfg["project"] - zone = cfg["zone"] - cluster = cfg["cluster_name"] - pool_name = cfg["sandbox_pool_name"] - - if _resource_exists(["gcloud", "container", "node-pools", "describe", pool_name, - f"--cluster={cluster}", f"--zone={zone}", - f"--project={project}"]): - logging.info("Sandbox node pool %s already exists.", pool_name) - return - - logging.info("Creating sandbox node pool %s with gVisor...", pool_name) - cmd = [ - "gcloud", "container", "node-pools", "create", pool_name, - f"--cluster={cluster}", - f"--zone={zone}", - f"--project={project}", - f"--machine-type={cfg['machine_type']}", - f"--num-nodes={cfg['sandbox_node_count']}", - f"--disk-type={cfg['disk_type']}", - f"--disk-size={cfg['sandbox_disk_size']}", - f"--max-pods-per-node={cfg['sandbox_max_pods']}", - "--node-labels=dedicated=agentic-sandbox", - "--node-taints=dedicated=agentic-sandbox:NoSchedule", - "--workload-metadata=GKE_METADATA", - "--sandbox", "type=gvisor", - ] - _run(cmd, timeout=600) - logging.info("Sandbox node pool %s created.", pool_name) - - -def _create_artifact_registry(cfg): - """Create Artifact Registry repositories.""" - project = cfg["project"] - region = cfg["region"] - - for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]): - logging.info("Ensuring AR repo %s exists...", repo_name) - _run_quiet([ - "gcloud", "artifacts", "repositories", "create", repo_name, - "--repository-format=docker", - f"--location={region}", - f"--project={project}", - ]) - - -def _create_cloud_build_sa(cfg): - """Create Cloud Build service account and bind IAM roles.""" - project = cfg["project"] - sa_email = cfg["cloud_build_sa_email"] - sa_name = cfg["cloud_build_sa"] - - # Create SA if not exists - if not _resource_exists(["gcloud", "iam", "service-accounts", "describe", - sa_email, f"--project={project}"]): - logging.info("Creating Cloud Build SA %s...", sa_email) - _run(["gcloud", "iam", "service-accounts", "create", sa_name, - f"--display-name={sa_name}", f"--project={project}"]) - # Wait for propagation - time.sleep(10) - - roles = [ - "roles/logging.logWriter", - "roles/storage.objectViewer", - "roles/artifactregistry.writer", - "roles/serviceusage.serviceUsageConsumer", - ] - for role in roles: - _run_quiet([ - "gcloud", "projects", "add-iam-policy-binding", project, - f"--member=serviceAccount:{sa_email}", - f"--role={role}", - "--condition=None", "--quiet", - ]) - logging.info("Cloud Build SA ready.") - - -# --------------------------------------------------------------------------- -# Teardown steps -# --------------------------------------------------------------------------- - - -def _teardown_workloads(cfg): - """Delete K8s workloads, CRDs, and namespace.""" - ns = cfg["namespace"] - version = cfg["sandbox_version"] - - logging.info("Deleting namespace %s...", ns) - _run_quiet(["kubectl", "delete", "namespace", ns, - "--ignore-not-found=true", "--timeout=120s"]) - - logging.info("Removing Agent Sandbox CRDs...") - _run_quiet(["kubectl", "delete", "-f", - f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/extensions.yaml", - "--ignore-not-found=true"]) - _run_quiet(["kubectl", "delete", "-f", - f"https://github.com/kubernetes-sigs/agent-sandbox/releases/download/{version}/manifest.yaml", - "--ignore-not-found=true"]) - - logging.info("Removing cluster-scoped RBAC...") - _run_quiet(["kubectl", "delete", "clusterrolebinding", - "adk-agent-sandbox-binding", "--ignore-not-found=true"]) - _run_quiet(["kubectl", "delete", "clusterrole", - "adk-agent-sandbox-role", "--ignore-not-found=true"]) - - -def _teardown_images(cfg): - """Delete Artifact Registry repositories.""" - project = cfg["project"] - region = cfg["region"] - - for repo_name in (cfg["adk_repo_name"], cfg["sandbox_repo_name"]): - logging.info("Deleting AR repo %s...", repo_name) - _run_quiet(["gcloud", "artifacts", "repositories", "delete", repo_name, - f"--location={region}", f"--project={project}", "--quiet"]) - - -def _teardown_cloud_build_sa(cfg): - """Delete Cloud Build service account and IAM bindings.""" - project = cfg["project"] - sa_email = cfg["cloud_build_sa_email"] - - roles = [ - "roles/logging.logWriter", - "roles/storage.objectViewer", - "roles/artifactregistry.writer", - "roles/serviceusage.serviceUsageConsumer", - ] - for role in roles: - _run_quiet([ - "gcloud", "projects", "remove-iam-policy-binding", project, - f"--member=serviceAccount:{sa_email}", - f"--role={role}", "--quiet", - ]) - - _run_quiet(["gcloud", "iam", "service-accounts", "delete", sa_email, - f"--project={project}", "--quiet"]) - logging.info("Cloud Build SA deleted.") - - -def _teardown_cluster(cfg): - """Delete GKE node pools and cluster.""" - project = cfg["project"] - zone = cfg["zone"] - cluster = cfg["cluster_name"] - pool_name = cfg["sandbox_pool_name"] - - logging.info("Deleting sandbox node pool %s...", pool_name) - _run_quiet(["gcloud", "container", "node-pools", "delete", pool_name, - f"--cluster={cluster}", f"--zone={zone}", - f"--project={project}", "--quiet"]) - - logging.info("Deleting GKE cluster %s...", cluster) - _run_quiet(["gcloud", "container", "clusters", "delete", cluster, - f"--zone={zone}", f"--project={project}", "--quiet"]) - - -def _teardown_network(cfg): - """Delete network resources in reverse dependency order.""" - project = cfg["project"] - region = cfg["region"] - vpc = cfg["vpc_name"] - router = cfg["router_name"] - nat = cfg["nat_name"] - subnet = cfg["subnet_name"] - - logging.info("Deleting Cloud NAT and Router...") - _run_quiet(["gcloud", "compute", "routers", "nats", "delete", nat, - f"--router={router}", f"--region={region}", - f"--project={project}", "--quiet"]) - _run_quiet(["gcloud", "compute", "routers", "delete", router, - f"--region={region}", f"--project={project}", "--quiet"]) - - logging.info("Deleting firewall rules...") - for suffix in ("allow-iap-ssh", "allow-internal"): - _run_quiet(["gcloud", "compute", "firewall-rules", "delete", - f"{vpc}-{suffix}", f"--project={project}", "--quiet"]) - - logging.info("Deleting subnet and VPC...") - _run_quiet(["gcloud", "compute", "networks", "subnets", "delete", subnet, - f"--region={region}", f"--project={project}", "--quiet"]) - _run_quiet(["gcloud", "compute", "networks", "delete", vpc, - f"--project={project}", "--quiet"]) - - -# --------------------------------------------------------------------------- -# Public API -# --------------------------------------------------------------------------- - - -flags.DEFINE_enum( - "gke_provision_mode", - "custom", - ["custom", "native"], - "Provisioning mode: 'custom' uses direct gcloud calls (Phase 1 logic), " - "'native' uses PKB's container_cluster with prerequisite_setup.py.", -) - -def Provision(): - """Provision GKE infrastructure. - - Mode is controlled by --gke_provision_mode: - - custom: Direct gcloud calls (full control, no PKB cluster management) - - native: PKB manages cluster via container_cluster spec. - Requires prerequisite_setup.py to have been run first. - """ - mode = FLAGS.gke_provision_mode - if mode == "native": - logging.info( - "Provision mode=native: PKB manages cluster via container_cluster. " - "Ensure prerequisite_setup.py was run first (VPC, NAT, AR, images)." - ) - return # PKB handles cluster creation via container_cluster spec - - logging.info("Provision mode=custom: using direct gcloud calls.") - cfg = _derive_config() - - logging.info("=== Provision: project=%s cluster=%s machine=%s ===", - cfg["project"], cfg["cluster_name"], cfg["machine_type"]) - - _enable_apis(cfg) - _create_network(cfg) - _create_cluster(cfg) - _get_credentials(cfg) - _create_sandbox_node_pool(cfg) - _create_artifact_registry(cfg) - _create_cloud_build_sa(cfg) - - # --- Phase 3: Build container images --- - if not FLAGS.gke_skip_image_build: - from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils - gke_image_build_utils.BuildImages() - else: - logging.info("Skipping image builds (--gke_skip_image_build=true)") - - logging.info("=== Provision complete: %s ===", cfg["cluster_name"]) - - -def Teardown(): - """Teardown GKE infrastructure. - - Mode is controlled by --gke_provision_mode: - - custom: Direct gcloud calls to delete all resources. - - native: PKB manages cluster deletion. Run prerequisite_setup.py --teardown - separately to clean up VPC/NAT/AR. - """ - mode = FLAGS.gke_provision_mode - if mode == "native": - logging.info( - "Teardown mode=native: PKB manages cluster deletion. " - "Run prerequisite_setup.py --teardown to clean up VPC/NAT/AR." - ) - return # PKB handles cluster deletion - - logging.info("Teardown mode=custom: using direct gcloud calls.") - cfg = _derive_config() - - logging.info("=== Teardown: project=%s cluster=%s ===", - cfg["project"], cfg["cluster_name"]) - logging.info(" keep_images=%s keep_infra=%s", - FLAGS.gke_teardown_keep_images, - FLAGS.gke_teardown_keep_infra) - - # Always delete workloads - _teardown_workloads(cfg) - - # Conditionally delete images - if not FLAGS.gke_teardown_keep_images: - _teardown_images(cfg) - - # Conditionally delete infrastructure - if not FLAGS.gke_teardown_keep_infra: - _teardown_cloud_build_sa(cfg) - _teardown_cluster(cfg) - _teardown_network(cfg) - - logging.info("=== Teardown complete ===") diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py index 157bd2559e..e323be4d31 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py @@ -1,4 +1,4 @@ -"""PKB Benchmark: GKE Agent Python Sandbox Density (Use Case B). +"""PKB Benchmark: GKE Agent Python Sandbox Density . Atomic single-point measurement of Python sandbox density on a pre-provisioned GKE cluster with gVisor isolation. Measures Code Execution @@ -6,17 +6,25 @@ per-type latency breakdown (compute, syscall, import) at a given concurrent session count. +Workflow per session: + 1. Claim a pre-warmed sandbox pod from the SandboxWarmPool + 2. Upload and execute the benchmark script inside the gVisor sandbox + 3. Run `sample_warmup` iterations (results discarded - stabilizes caches) + 4. Run `sample_count` measured iterations (results recorded) + 5. Report TTFE, per-iteration CEL, RSS, and per-task-type breakdown + 6. Release the sandbox claim + This benchmark is designed to be invoked repeatedly by an external sweep controller that varies the density parameter across iterations to find the saturation point. Usage: python pkb.py --benchmarks=gke_python_density \\ - --gke_python_density=16 \\ + --gke_python_density_concurrent_sandbox_count=16 \\ --gke_python_density_sample_count=20 \\ --gke_python_density_sample_warmup=0 \\ - --gke_namespace=agentic \\ - --gke_api_url=http://localhost:8080 + --k8s_namespace=agentic \\ + --k8s_agent_api_url=http://localhost:8080 Samples emitted (per run): - gke_python_density_orchestrator_cel_mean (ms) @@ -52,7 +60,6 @@ from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, ) -from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils FLAGS = flags.FLAGS @@ -72,7 +79,7 @@ # --------------------------------------------------------------------------- flags.DEFINE_integer( - "gke_python_density", + "gke_python_density_concurrent_sandbox_count", 1, "Number of concurrent sandbox sessions to run.", ) @@ -86,7 +93,11 @@ flags.DEFINE_integer( "gke_python_density_sample_warmup", 0, - "Number of warmup iterations per session (excluded from stats).", + "Number of warmup iterations per session (excluded from stats). " + "Warmup iterations execute the same benchmark tasks as measured " + "iterations but their latency results are discarded. This allows " + "JIT compilation, caches, and gVisor page faults to stabilize " + "before measurement begins.", ) flags.DEFINE_bool( @@ -107,11 +118,6 @@ # --------------------------------------------------------------------------- -def Provision(benchmark_spec): - """Provision GKE cluster and all dependencies.""" - gke_provision_utils.Provision() - - def GetConfig(user_config): """Load and return benchmark config. @@ -123,7 +129,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" logging.info("=== Prepare: deploying workloads ===") - deploy_utils.DeployWorkloads() + deploy_utils.DeployWorkloads(benchmark_spec) utils.CheckAgentHealthz(required=False) utils.EnsurePortForward() logging.info("Prepare complete.") @@ -135,8 +141,10 @@ def Run(benchmark_spec): Returns: List of sample.Sample objects. """ - ns = FLAGS.gke_namespace - density = FLAGS.gke_python_density + utils.set_benchmark_spec(benchmark_spec) + + ns = FLAGS.k8s_namespace + density = FLAGS.gke_python_density_concurrent_sandbox_count logging.info("=== Run: density=%d ===", density) @@ -323,7 +331,7 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Clean up after measurement. Scale warm pool to 0.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace logging.info("Cleanup: draining warm pool.") if FLAGS.gke_python_density_patch_warmpool: @@ -337,18 +345,25 @@ def Cleanup(benchmark_spec): logging.info("Cleanup complete (cluster persists).") -def Teardown(benchmark_spec): - """Teardown GKE cluster and all dependencies.""" - gke_provision_utils.Teardown() - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _emit(samples, agg, agg_key, metric_suffix, unit, namespace, extra): - """Emit a sample if the key exists in the aggregate dict.""" + """Emit a sample if the key exists in the aggregate dict. + + Args: + samples: List to append the new sample.Sample to. + agg: Aggregate metrics dict returned by the agent API response. + agg_key: Key to look up in `agg` (e.g. "orchestrator_cel_mean_ms"). + metric_suffix: Suffix appended to BENCHMARK_NAME to form the metric + name (e.g. "orchestrator_cel_mean"). + unit: Unit string for the sample (e.g. "ms", "MB", "seconds"). + namespace: Kubernetes namespace (included in sample metadata). + extra: Dict of additional metadata key-value pairs attached to + every sample (density, session counts, wall time, etc.). + """ value = agg.get(agg_key) if value is not None: samples.append( diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py index f638494508..2146489752 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py @@ -1,4 +1,4 @@ -"""PKB Benchmark: GKE Agent QPS Saturation (Use Case F). +"""PKB Benchmark: GKE Agent QPS Saturation . Atomic single-point measurement of scheduling throughput on a pre-provisioned GKE cluster. Fires sandbox claim requests at a controlled QPS rate for a @@ -19,8 +19,8 @@ --gke_qps_pool_size=70 \\ --gke_qps_step_duration_s=30.0 \\ --gke_qps_mode=agent \\ - --gke_namespace=agentic \\ - --gke_api_url=http://localhost:8080 + --k8s_namespace=agentic \\ + --k8s_agent_api_url=http://localhost:8080 # Raw claim mode python pkb.py --benchmarks=gke_qps \\ @@ -29,7 +29,7 @@ --gke_qps_step_duration_s=30.0 \\ --gke_qps_mode=raw_claim \\ --gke_qps_claim_timeout_s=60.0 \\ - --gke_namespace=agentic + --k8s_namespace=agentic Samples emitted (per run): - gke_qps_ttfe_mean (ms) @@ -51,21 +51,22 @@ """ import json +import os import logging -import subprocess import threading import time import uuid from absl import flags from perfkitbenchmarker import configs +from perfkitbenchmarker import data +from perfkitbenchmarker.resources.container_service import kubectl from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, ) -from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils FLAGS = flags.FLAGS @@ -135,11 +136,6 @@ # --------------------------------------------------------------------------- -def Provision(benchmark_spec): - """Provision GKE cluster and all dependencies.""" - gke_provision_utils.Provision() - - def GetConfig(user_config): """Load and return benchmark config. @@ -151,7 +147,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" logging.info("=== Prepare: deploying workloads ===") - deploy_utils.DeployWorkloads() + deploy_utils.DeployWorkloads(benchmark_spec) mode = FLAGS.gke_qps_mode if mode == "agent": @@ -166,7 +162,9 @@ def Run(benchmark_spec): Returns: List of sample.Sample objects. """ - ns = FLAGS.gke_namespace + utils.set_benchmark_spec(benchmark_spec) + + ns = FLAGS.k8s_namespace pool_size = FLAGS.gke_qps_pool_size # Scale warm pool (moved from Prepare for sweep compatibility) @@ -188,7 +186,7 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Delete benchmark claims and drain warm pool.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace logging.info("Cleanup: deleting benchmark claims and draining warm pool.") # Delete any lingering benchmark claims @@ -205,11 +203,6 @@ def Cleanup(benchmark_spec): logging.info("Cleanup complete.") -def Teardown(benchmark_spec): - """Teardown GKE cluster and all dependencies.""" - gke_provision_utils.Teardown() - - # --------------------------------------------------------------------------- # Agent mode # --------------------------------------------------------------------------- @@ -217,7 +210,7 @@ def Teardown(benchmark_spec): def _RunAgent(benchmark_spec): """Fire QPS burst via the orchestrator API.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace target_qps = FLAGS.gke_qps_target_qps pool_size = FLAGS.gke_qps_pool_size step_duration = FLAGS.gke_qps_step_duration_s @@ -384,7 +377,7 @@ def _RunAgent(benchmark_spec): def _RunRawClaim(benchmark_spec): """Fire SandboxClaims directly at target_qps (no agent).""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace target_qps = FLAGS.gke_qps_target_qps pool_size = FLAGS.gke_qps_pool_size step_duration = FLAGS.gke_qps_step_duration_s @@ -667,21 +660,30 @@ def _CreateClaim(namespace, template, claim_name): "labels": {"created-by": "pkb-qps-benchmark"}, }, "spec": { - "sandboxTemplateName": template, + "sandboxTemplateRef": {"name": template}, }, } ) - proc = subprocess.run( - ["kubectl", "apply", "-n", namespace, "-f", "-"], - input=manifest, - capture_output=True, - text=True, - timeout=30, + tmp_dir = os.path.join( + data.ResourcePath("k8s_agents/manifests"), "tmp" ) + os.makedirs(tmp_dir, exist_ok=True) + tmp_path = os.path.join(tmp_dir, f"qps-claim-{claim_name}.json") + try: + with open(tmp_path, "w") as f: + f.write(manifest) + stdout, stderr, retcode = kubectl.RunKubectlCommand( + ["apply", "-f", tmp_path], + timeout=30, + raise_on_failure=False, + ) + finally: + if os.path.isfile(tmp_path): + os.unlink(tmp_path) t_create = time.time() - if proc.returncode != 0: + if retcode != 0: raise RuntimeError( - f"Failed to create claim {claim_name}: {proc.stderr.strip()}" + f"Failed to create claim {claim_name}: {stderr.strip()}" ) return t_create diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py index 4cfba5d5d0..44d21fcc84 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py @@ -1,4 +1,4 @@ -"""PKB Benchmark: GKE Agent Pod Snapshot Saturation (Use Case A). +"""PKB Benchmark: GKE Agent Pod Snapshot Saturation . Atomic single-point measurement of GKE Pod Snapshot create/restore latency on a pre-provisioned GKE cluster with gVisor isolation. Measures snapshot @@ -13,7 +13,7 @@ python pkb.py --benchmarks=gke_snapshot \\ --gke_snapshot_preload_mb=50 \\ --gke_snapshot_burst_size=3 \\ - --gke_namespace=agentic \\ + --k8s_namespace=agentic \\ --gke_snapshot_skip_snapshot=false Samples emitted (per run): @@ -35,12 +35,15 @@ import logging import os import re -import subprocess import time from concurrent.futures import ThreadPoolExecutor +from jinja2 import Template + from absl import flags from perfkitbenchmarker import configs +from perfkitbenchmarker import data +from perfkitbenchmarker.resources.container_service import kubectl from perfkitbenchmarker import sample from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_benchmark_utils as utils, @@ -48,7 +51,6 @@ from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, ) -from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils FLAGS = flags.FLAGS @@ -107,11 +109,6 @@ # --------------------------------------------------------------------------- -def Provision(benchmark_spec): - """Provision GKE cluster and all dependencies.""" - gke_provision_utils.Provision() - - def GetConfig(user_config): """Load and return benchmark config. @@ -122,7 +119,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads, snapshot infra, and validate readiness.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace preload_mb = FLAGS.gke_snapshot_preload_mb logging.info( @@ -132,7 +129,7 @@ def Prepare(benchmark_spec): ) # Deploy Agent Sandbox ecosystem (idempotent) - deploy_utils.DeployWorkloads() + deploy_utils.DeployWorkloads(benchmark_spec) # Deploy Pod Snapshot infrastructure (idempotent) deploy_utils.DeploySnapshots() @@ -189,7 +186,9 @@ def Run(benchmark_spec): Returns: List of sample.Sample objects. """ - ns = FLAGS.gke_namespace + utils.set_benchmark_spec(benchmark_spec) + + ns = FLAGS.k8s_namespace preload_mb = FLAGS.gke_snapshot_preload_mb burst_size = FLAGS.gke_snapshot_burst_size skip_snapshot = FLAGS.gke_snapshot_skip_snapshot @@ -284,7 +283,7 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Clean up any leftover benchmark resources.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace logging.info("Cleanup — deleting any leftover snapshot-benchmark resources.") for kind in ( @@ -310,11 +309,6 @@ def Cleanup(benchmark_spec): logging.info("Cleanup complete.") -def Teardown(benchmark_spec): - """Teardown GKE cluster and all dependencies.""" - gke_provision_utils.Teardown() - - # --------------------------------------------------------------------------- # Core snapshot/restore logic # --------------------------------------------------------------------------- @@ -592,15 +586,24 @@ def _ApplyClaim(name, namespace, template_name): "spec": {"sandboxTemplateRef": {"name": template_name}}, } ) - proc = subprocess.run( - ["kubectl", "apply", "-f", "-"], - input=manifest, - capture_output=True, - text=True, - timeout=30, + tmp_dir = os.path.join( + data.ResourcePath("k8s_agents/manifests"), "tmp" ) - if proc.returncode != 0: - raise RuntimeError(f"Failed to create SandboxClaim {name}: {proc.stderr}") + os.makedirs(tmp_dir, exist_ok=True) + tmp_path = os.path.join(tmp_dir, f"snap-claim-{name}.json") + try: + with open(tmp_path, "w") as f: + f.write(manifest) + stdout, stderr, retcode = kubectl.RunKubectlCommand( + ["apply", "-f", tmp_path], + timeout=30, + raise_on_failure=False, + ) + finally: + if os.path.isfile(tmp_path): + os.unlink(tmp_path) + if retcode != 0: + raise RuntimeError(f"Failed to create SandboxClaim {name}: {stderr}") def _RenderAndApplyTemplate( @@ -611,7 +614,7 @@ def _RenderAndApplyTemplate( preload_mb, preload_mode, ): - """Render the .yaml.template with step-specific values and kubectl apply.""" + """Render the Jinja2 template with step-specific values and kubectl apply.""" if preload_mode.startswith("script:"): return _RenderAndApplyScriptTemplate( template_name, @@ -626,50 +629,44 @@ def _RenderAndApplyTemplate( memory_mi = max(512, preload_mb + 256) - rendered = ( - content.replace("$AGENTIC_NAMESPACE", namespace) - .replace("$SNAPSHOT_KSA_NAME", ksa_name) - .replace("$SNAPSHOT_PRELOAD_MB", str(preload_mb)) - ) - rendered = rendered.replace( - "name: snapshot-benchmark-template", - f"name: {template_name}", - ) - rendered = rendered.replace( - 'memory: "512Mi"', - f'memory: "{memory_mi}Mi"', + tmpl = Template(content) + rendered = tmpl.render( + template_name=template_name, + namespace=namespace, + ksa_name=ksa_name, + preload_mb=preload_mb, + memory_mi=memory_mi, ) - proc = subprocess.run( - ["kubectl", "apply", "-f", "-"], - input=rendered, - capture_output=True, - text=True, - timeout=30, + tmp_dir = os.path.join( + data.ResourcePath("k8s_agents/manifests"), "tmp" ) - if proc.returncode != 0: - logging.warning("kubectl apply stderr: %s", proc.stderr) - return proc.returncode == 0 + os.makedirs(tmp_dir, exist_ok=True) + tmp_path = os.path.join(tmp_dir, f"snap-template-{template_name}.yaml") + try: + with open(tmp_path, "w") as f: + f.write(rendered) + stdout, stderr, retcode = kubectl.RunKubectlCommand( + ["apply", "-f", tmp_path], + timeout=30, + raise_on_failure=False, + ) + finally: + if os.path.isfile(tmp_path): + os.unlink(tmp_path) + if retcode != 0: + logging.warning("kubectl apply stderr: %s", stderr) + return retcode == 0 def _get_sandbox_node_selector(): - """Return the correct nodeSelector based on provisioning mode.""" - try: - mode = FLAGS.gke_provision_mode - except AttributeError: - mode = "custom" - if mode == "native": - return {"pkb_nodepool": "sandbox"} - return {"dedicated": "agentic-sandbox"} + """Return the nodeSelector for sandbox pods.""" + return {"pkb_nodepool": "sandbox"} def _get_sandbox_tolerations(): - """Return the correct tolerations based on provisioning mode.""" - try: - mode = FLAGS.gke_provision_mode - except AttributeError: - mode = "custom" - tolerations = [ + """Return tolerations for sandbox pods.""" + return [ { "key": "sandbox.gke.io/runtime", "operator": "Equal", @@ -677,17 +674,6 @@ def _get_sandbox_tolerations(): "effect": "NoSchedule", }, ] - if mode != "native": - tolerations.insert( - 0, - { - "key": "dedicated", - "operator": "Equal", - "value": "agentic-sandbox", - "effect": "NoSchedule", - }, - ) - return tolerations def _RenderAndApplyScriptTemplate( @@ -725,7 +711,7 @@ def _RenderAndApplyScriptTemplate( "done\n" ) - manifest = { + manifest = json.dumps({ "apiVersion": "extensions.agents.x-k8s.io/v1alpha1", "kind": "SandboxTemplate", "metadata": { @@ -762,18 +748,27 @@ def _RenderAndApplyScriptTemplate( }, } }, - } + }) - proc = subprocess.run( - ["kubectl", "apply", "-f", "-"], - input=json.dumps(manifest), - capture_output=True, - text=True, - timeout=30, + tmp_dir = os.path.join( + data.ResourcePath("k8s_agents/manifests"), "tmp" ) - if proc.returncode != 0: - logging.warning("kubectl apply stderr: %s", proc.stderr) - return proc.returncode == 0 + os.makedirs(tmp_dir, exist_ok=True) + tmp_path = os.path.join(tmp_dir, f"snap-script-template-{template_name}.json") + try: + with open(tmp_path, "w") as f: + f.write(manifest) + stdout, stderr, retcode = kubectl.RunKubectlCommand( + ["apply", "-f", tmp_path], + timeout=30, + raise_on_failure=False, + ) + finally: + if os.path.isfile(tmp_path): + os.unlink(tmp_path) + if retcode != 0: + logging.warning("kubectl apply stderr: %s", stderr) + return retcode == 0 def _MeasureSingleSource(name, namespace, t0, pod_timeout, preload_mode): @@ -861,15 +856,24 @@ def _TriggerAndWaitSnapshot(trigger_name, target_pod, namespace, t0, timeout_s=3 "spec": {"targetPod": target_pod}, } ) - proc = subprocess.run( - ["kubectl", "apply", "-f", "-"], - input=manifest, - capture_output=True, - text=True, - timeout=30, + tmp_dir = os.path.join( + data.ResourcePath("k8s_agents/manifests"), "tmp" ) - if proc.returncode != 0: - result["error"] = f"Failed to create trigger: {proc.stderr}" + os.makedirs(tmp_dir, exist_ok=True) + tmp_path = os.path.join(tmp_dir, f"snap-trigger-{trigger_name}.json") + try: + with open(tmp_path, "w") as f: + f.write(manifest) + stdout, stderr, retcode = kubectl.RunKubectlCommand( + ["apply", "-f", tmp_path], + timeout=30, + raise_on_failure=False, + ) + finally: + if os.path.isfile(tmp_path): + os.unlink(tmp_path) + if retcode != 0: + result["error"] = f"Failed to create trigger: {stderr}" return result deadline = t0 + timeout_s @@ -985,13 +989,9 @@ def _CleanupStep(source_names, restore_names, trigger_names, template_name, name def _GetTemplatePath(): """Return the absolute path to the snapshot SandboxTemplate template.""" - pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) return os.path.join( - pkg_dir, - "data", - "k8s_agents", - "manifests", - "snapshot-sandbox-template.yaml.template", + data.ResourcePath("k8s_agents/manifests"), + "snapshot-sandbox-template.yaml.j2", ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py index 1c00deca54..e696b089db 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py @@ -17,7 +17,7 @@ --gke_warmpool_ready_threshold_s=300 \ --gke_warmpool_poll_interval_s=2.0 \ --gke_warmpool_drain_timeout_s=300 \ - --gke_namespace=agentic \ + --k8s_namespace=agentic \ --gke_machine_type=c4-standard-8 Samples emitted (per run): @@ -55,7 +55,6 @@ from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, ) -from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_provision_utils FLAGS = flags.FLAGS @@ -113,11 +112,6 @@ # --------------------------------------------------------------------------- -def Provision(benchmark_spec): - """Provision GKE cluster and all dependencies.""" - gke_provision_utils.Provision() - - def GetConfig(user_config): """Load and return benchmark config. @@ -129,7 +123,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads onto the cluster.""" logging.info("=== Prepare: deploying workloads ===") - deploy_utils.DeployWorkloads() + deploy_utils.DeployWorkloads(benchmark_spec) utils.EnsurePortForward() logging.info("Prepare complete.") @@ -140,7 +134,9 @@ def Run(benchmark_spec): Returns: List of sample.Sample objects. """ - ns = FLAGS.gke_namespace + utils.set_benchmark_spec(benchmark_spec) + + ns = FLAGS.k8s_namespace target = FLAGS.gke_warmpool_target_replicas warmpool_name = FLAGS.gke_warmpool_name label = FLAGS.gke_warmpool_pod_label @@ -148,7 +144,7 @@ def Run(benchmark_spec): poll_interval = FLAGS.gke_warmpool_poll_interval_s # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility) - _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s)) time.sleep(3) logging.info("=== Run: scaling %s to %d replicas ===", warmpool_name, target) @@ -157,7 +153,7 @@ def Run(benchmark_spec): # 1. Measure drain time (should be near-zero since Prepare drained) t0 = time.time() - _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s)) drain_time_s = round(time.time() - t0, 2) time.sleep(2) @@ -185,8 +181,8 @@ def Run(benchmark_spec): while time.time() < deadline: elapsed = time.time() - t_scale - running = _CountPods(ns, label, "Running") - pending = _CountPods(ns, label, "Pending") + running = utils.CountPods(ns, label, "Running") + pending = utils.CountPods(ns, label, "Pending") if first_pod_time is None and running > 0: first_pod_time = elapsed @@ -207,8 +203,8 @@ def Run(benchmark_spec): time.sleep(poll_interval) total_time = round(time.time() - t_scale, 2) - final_running = _CountPods(ns, label, "Running") - final_pending = _CountPods(ns, label, "Pending") + final_running = utils.CountPods(ns, label, "Running") + final_pending = utils.CountPods(ns, label, "Pending") rate = round(final_running / total_time, 2) if total_time > 0 else 0 logging.info( @@ -312,79 +308,21 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Drain warm pool back to 0 after measurement.""" - ns = FLAGS.gke_namespace + ns = FLAGS.k8s_namespace warmpool_name = FLAGS.gke_warmpool_name label = FLAGS.gke_warmpool_pod_label logging.info("Cleanup: draining warm pool to 0.") - _DrainPool(ns, warmpool_name, label, FLAGS.gke_warmpool_drain_timeout_s) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s)) utils.StopPortForward() logging.info("Cleanup complete.") -def Teardown(benchmark_spec): - """Teardown GKE cluster and all dependencies.""" - gke_provision_utils.Teardown() - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- -def _CountPods(namespace, label, phase=None): - """Count pods matching label (and optionally phase).""" - cmd = ["get", "pods", "-n", namespace, "-l", label, "-o", "name"] - if phase: - cmd += [f"--field-selector=status.phase={phase}"] - stdout, _, rc = utils.RunKubectl(cmd, raise_on_failure=False) - if rc != 0 or not stdout: - return 0 - return len(stdout.strip().splitlines()) - - -def _DrainPool(namespace, warmpool_name, label, timeout_s): - """Scale pool to 0 and wait for all pods to terminate.""" - patch_json = json.dumps({"spec": {"replicas": 0}}) - utils.RunKubectl( - [ - "patch", - "sandboxwarmpool", - warmpool_name, - "-n", - namespace, - "--type=merge", - f"-p={patch_json}", - ], - raise_on_failure=False, - ) - - # Delete any lingering SandboxClaims - utils.RunKubectl( - [ - "delete", - "sandboxclaims", - "--all", - "-n", - namespace, - "--ignore-not-found=true", - ], - timeout=60, - raise_on_failure=False, - ) - - t0 = time.time() - while time.time() - t0 < timeout_s: - remaining = _CountPods(namespace, label) - if remaining == 0: - elapsed = time.time() - t0 - logging.info("Pool drained in %.1fs", elapsed) - return - time.sleep(2) - - logging.warning("Drain timed out after %.0fs", timeout_s) - - def _ScrapeLifecycle(namespace, label, scale_start_epoch): """Scrape pod metadata to compute time-to-created/scheduled/running. diff --git a/perfkitbenchmarker/providers/gcp/flags.py b/perfkitbenchmarker/providers/gcp/flags.py index 244ba5d774..eeabaae0b3 100644 --- a/perfkitbenchmarker/providers/gcp/flags.py +++ b/perfkitbenchmarker/providers/gcp/flags.py @@ -581,12 +581,6 @@ ' beyond the default node pool (e.g. kubernetes_node_scale with 5k nodes).', ) -GKE_USE_BETA = flags.DEFINE_boolean( - 'gke_use_beta', - False, - 'Use gcloud beta for cluster creation (required for preview features ' - 'like pod snapshots).', -) GKE_ADDITIONAL_FLAGS = flags.DEFINE_list( 'gke_additional_flags', diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py index 3c24ad941c..c4012faf1a 100644 --- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py +++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py @@ -417,9 +417,7 @@ def _Create(self): if self.enable_aam: cmd.args.append('--auto-monitoring-scope=ALL') - # --- PKB Extension: beta gcloud and additional cluster create flags --- - if gcp_flags.GKE_USE_BETA.value: - cmd.use_beta_gcloud = True + # --- PKB Extension: additional cluster create flags --- for additional_flag in gcp_flags.GKE_ADDITIONAL_FLAGS.value: cmd.args.append(additional_flag) diff --git a/requirements.txt b/requirements.txt index 1313c628f5..755f82737c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,4 +33,3 @@ setuptools>=40.3.0,<81 six>=1.13.0 timeout-decorator scipy -matplotlib diff --git a/snapshot-sandbox-template.yaml.j2 b/snapshot-sandbox-template.yaml.j2 new file mode 100644 index 0000000000..4e25cb5833 --- /dev/null +++ b/snapshot-sandbox-template.yaml.j2 @@ -0,0 +1,46 @@ +--- +apiVersion: extensions.agents.x-k8s.io/v1alpha1 +kind: SandboxTemplate +metadata: + name: {{ template_name }} + namespace: {{ ns }} +spec: + podTemplate: + metadata: + labels: + app: snapshot-benchmark-workload + spec: + serviceAccountName: {{ ksa_name }} + runtimeClassName: gvisor + containers: + - name: preloader + image: python:3.11-slim + command: ["python3", "-c"] + args: + - | + import time, os + preload_mb = int(os.environ.get("PRELOAD_MB", "10")) + print(f"Preloading {preload_mb} MB of memory...", flush=True) + _ballast = bytearray(preload_mb * 1024 * 1024) + print(f"Preload complete. Starting counter.", flush=True) + i = 0 + while True: + print(f"Count: {i}", flush=True) + i += 1 + time.sleep(1) + env: + - name: PRELOAD_MB + value: "{{ preload_mb }}" + resources: + requests: + cpu: "250m" + memory: "{{ memory_mi }}Mi" + ephemeral-storage: "512Mi" + nodeSelector: + pkb_nodepool: sandbox + tolerations: + - key: "sandbox.gke.io/runtime" + operator: "Equal" + value: "gvisor" + effect: "NoSchedule" + restartPolicy: "OnFailure" From 8fa0c68d3ea867cc6a0ae9dac46dd62909f109c7 Mon Sep 17 00:00:00 2001 From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com> Date: Mon, 29 Jun 2026 12:31:43 -0400 Subject: [PATCH 3/5] renames --- .../config/agentic_benchmark_config.yaml | 16 ++-- .../kubernetes/agentic/gke_deploy_utils.py | 14 +++- .../agentic/gke_image_build_utils.py | 20 +++++ ...chmark_utils.py => k8s_benchmark_utils.py} | 6 +- ...k.py => k8s_chromium_density_benchmark.py} | 40 +++++----- ...benchmark.py => k8s_deletion_benchmark.py} | 48 ++++++------ ..._benchmark.py => k8s_payload_benchmark.py} | 32 ++++---- ...ark.py => k8s_python_density_benchmark.py} | 38 ++++----- ..._qps_benchmark.py => k8s_qps_benchmark.py} | 62 +++++++-------- ...benchmark.py => k8s_snapshot_benchmark.py} | 77 +++++++++++-------- ...benchmark.py => k8s_warmpool_benchmark.py} | 50 ++++++------ snapshot-sandbox-template.yaml.j2 | 46 ----------- 12 files changed, 222 insertions(+), 227 deletions(-) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_benchmark_utils.py => k8s_benchmark_utils.py} (99%) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_chromium_density_benchmark.py => k8s_chromium_density_benchmark.py} (89%) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_deletion_benchmark.py => k8s_deletion_benchmark.py} (92%) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_payload_benchmark.py => k8s_payload_benchmark.py} (95%) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_python_density_benchmark.py => k8s_python_density_benchmark.py} (91%) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_qps_benchmark.py => k8s_qps_benchmark.py} (94%) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_snapshot_benchmark.py => k8s_snapshot_benchmark.py} (94%) rename perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/{gke_warmpool_benchmark.py => k8s_warmpool_benchmark.py} (91%) delete mode 100644 snapshot-sandbox-template.yaml.j2 diff --git a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml index 95077b469c..0098eff013 100644 --- a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml +++ b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml @@ -13,7 +13,7 @@ # --temp_dir= # # Benchmark-specific sweep parameters (vary per run): -# --gke_python_density_concurrent_sandbox_count=N +# --k8s_python_density_concurrent_sandbox_count=N # --gke_snapshot_preload_mb=N # etc. @@ -21,7 +21,7 @@ # Shared cluster configuration (identical across all benchmarks) # =========================================================================== -gke_python_density: +k8s_python_density: flags: # --- Cluster creation flags --- gke_additional_flags: @@ -66,7 +66,7 @@ gke_python_density: type: gvisor -gke_chromium_density: +k8s_chromium_density: flags: gke_additional_flags: - "--enable-pod-snapshots" @@ -109,7 +109,7 @@ gke_chromium_density: type: gvisor -gke_payload: +k8s_payload: flags: gke_additional_flags: - "--enable-pod-snapshots" @@ -152,7 +152,7 @@ gke_payload: type: gvisor -gke_qps: +k8s_qps: flags: gke_additional_flags: - "--enable-pod-snapshots" @@ -195,7 +195,7 @@ gke_qps: type: gvisor -gke_snapshot: +k8s_snapshot: flags: gke_additional_flags: - "--enable-pod-snapshots" @@ -238,7 +238,7 @@ gke_snapshot: type: gvisor -gke_warmpool: +k8s_warmpool: flags: gke_additional_flags: - "--enable-pod-snapshots" @@ -281,7 +281,7 @@ gke_warmpool: type: gvisor -gke_deletion: +k8s_deletion: flags: gke_additional_flags: - "--enable-pod-snapshots" diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py index 9ff1684951..297b06758f 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py @@ -124,6 +124,14 @@ def _RenderAndApply(template_name, **kwargs): return retcode == 0 +flags.DEFINE_bool( + "skip_deploy_snapshots", + False, + "Skip deployment of Pod Snapshot infrastructure. " + "Set to True on non-GKE clusters where pod snapshots are not supported.", +) + + # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- @@ -230,6 +238,10 @@ def DeploySnapshots(): 4. Bind IAM roles 5. Deploy PodSnapshotStorageConfig + PodSnapshotPolicy """ + if FLAGS.skip_deploy_snapshots: + logging.info("Skipping snapshot infrastructure (--skip_deploy_snapshots=True).") + return + ns = FLAGS.k8s_namespace project = getattr(FLAGS, 'project', '') or '' zone = getattr(FLAGS, 'zone', '') or '' @@ -241,7 +253,7 @@ def DeploySnapshots(): bucket_name = "agent-sandbox-snapshots-{}".format(project) snapshot_folder = "benchmark-snapshots" - ksa_name = FLAGS.gke_snapshot_ksa_name + ksa_name = FLAGS.k8s_snapshot_ksa_name logging.info("=== DeploySnapshots: bucket=%s ===", bucket_name) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py index 13340184bc..a339af8022 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py @@ -30,6 +30,14 @@ # Architecture detection # --------------------------------------------------------------------------- +flags.DEFINE_string( + "target_arch", + "", + "Target CPU architecture for container images (amd64 or arm64). " + "If set, skips gcloud machine-type detection. " + "Use this for non-GCP environments or when gcloud is unavailable.", +) + _ARCH_MAP = { "X86_64": "amd64", "ARM64": "arm64", @@ -44,6 +52,18 @@ def _DetectArchitecture(machine_type, zone, project): Falls back to amd64 if gcloud fails. """ + # Quick exit if user provided arch explicitly + if FLAGS.target_arch: + arch = FLAGS.target_arch.lower() + if arch in ("amd64", "arm64"): + logging.info("Using user-provided target_arch: %s", arch) + return arch + logging.warning( + "Invalid --target_arch='%s'. Must be amd64 or arm64. " + "Proceeding with gcloud detection.", + FLAGS.target_arch, + ) + try: stdout, _, retcode = vm_util.IssueCommand( [ diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py similarity index 99% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py index 02d2d40a81..e23aa32a6d 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_benchmark_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_benchmark_utils.py @@ -41,7 +41,7 @@ ) flags.DEFINE_string( - "gke_benchmark_note", + "k8s_benchmark_note", "", "Arbitrary note string attached to every sample for tagging runs.", ) @@ -233,8 +233,8 @@ def BuildMetadata(namespace, extra=None): machine_type = getattr(cluster.vm_spec, 'machine_type', None) if machine_type: metadata["machine_type"] = machine_type - if FLAGS.gke_benchmark_note: - metadata["note"] = FLAGS.gke_benchmark_note + if FLAGS.k8s_benchmark_note: + metadata["note"] = FLAGS.k8s_benchmark_note if extra: metadata.update(extra) return metadata diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py similarity index 89% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py index 24d55350b5..346f59a8b0 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_chromium_density_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py @@ -11,9 +11,9 @@ Usage: python pkb.py --benchmarks=gke_chromium_density \\ - --gke_chromium_density_concurrent_sessions=4 \\ - --gke_chromium_density_task_count=10 \\ - --gke_chromium_density_warmup_tasks=5 \\ + --k8s_chromium_density_concurrent_sessions=4 \\ + --k8s_chromium_density_task_count=10 \\ + --k8s_chromium_density_warmup_tasks=5 \\ --k8s_namespace=agentic \\ --k8s_agent_api_url=http://localhost:8080 @@ -43,7 +43,7 @@ from absl import flags from perfkitbenchmarker import configs from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( - gke_benchmark_utils as utils, + k8s_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, @@ -51,9 +51,9 @@ FLAGS = flags.FLAGS -BENCHMARK_NAME = "gke_chromium_density" +BENCHMARK_NAME = "k8s_chromium_density" BENCHMARK_CONFIG = """ -gke_chromium_density: +k8s_chromium_density: description: > Atomic single-point Chromium browser sandbox density measurement on a pre-provisioned GKE cluster with gVisor isolation. @@ -67,37 +67,37 @@ # --------------------------------------------------------------------------- flags.DEFINE_integer( - "gke_chromium_density_concurrent_sessions", + "k8s_chromium_density_concurrent_sessions", 1, "Number of concurrent Chromium browser sessions to run.", ) flags.DEFINE_integer( - "gke_chromium_density_task_count", + "k8s_chromium_density_task_count", 10, "Number of browser task iterations per Chromium session.", ) flags.DEFINE_integer( - "gke_chromium_density_warmup_tasks", + "k8s_chromium_density_warmup_tasks", 5, "Number of warmup iterations per session (excluded from stats).", ) flags.DEFINE_bool( - "gke_chromium_density_patch_warmpool", + "k8s_chromium_density_patch_warmpool", True, "Patch SandboxWarmPool replicas to match density before measurement.", ) flags.DEFINE_integer( - "gke_chromium_density_exec_timeout", + "k8s_chromium_density_exec_timeout", 120, "Sandbox command execution timeout in seconds.", ) flags.DEFINE_integer( - "gke_chromium_density_provision_timeout", + "k8s_chromium_density_provision_timeout", 300, "Max seconds to wait for warm pool pods to reach Running.", ) @@ -134,7 +134,7 @@ def Run(benchmark_spec): utils.set_benchmark_spec(benchmark_spec) ns = FLAGS.k8s_namespace - density = FLAGS.gke_chromium_density_concurrent_sessions + density = FLAGS.k8s_chromium_density_concurrent_sessions logging.info("=== Run: chromium_density=%d ===", density) @@ -142,21 +142,21 @@ def Run(benchmark_spec): utils.EnsurePortForward() # Patch warm pool (moved from Prepare for sweep compatibility) - if FLAGS.gke_chromium_density_patch_warmpool: + if FLAGS.k8s_chromium_density_patch_warmpool: utils.PatchWarmPool( namespace=ns, warmpool_name=_WARMPOOL_NAME, replicas=density, label=_WARMPOOL_LABEL, - wait_timeout=FLAGS.gke_chromium_density_provision_timeout, + wait_timeout=FLAGS.k8s_chromium_density_provision_timeout, ) # POST to agent API payload = { - "task_count": FLAGS.gke_chromium_density_task_count, - "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks, + "task_count": FLAGS.k8s_chromium_density_task_count, + "warmup_tasks": FLAGS.k8s_chromium_density_warmup_tasks, "concurrent_sessions": density, - "sandbox_exec_timeout_s": FLAGS.gke_chromium_density_exec_timeout, + "sandbox_exec_timeout_s": FLAGS.k8s_chromium_density_exec_timeout, } t0 = time.time() @@ -179,8 +179,8 @@ def Run(benchmark_spec): "density": density, "successful_sessions": successful, "failed_sessions": failed, - "task_count": FLAGS.gke_chromium_density_task_count, - "warmup_tasks": FLAGS.gke_chromium_density_warmup_tasks, + "task_count": FLAGS.k8s_chromium_density_task_count, + "warmup_tasks": FLAGS.k8s_chromium_density_warmup_tasks, "wall_time_s": round(wall_time, 2), } diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py similarity index 92% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py index 92b360919d..ddeae29f9d 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deletion_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py @@ -12,12 +12,12 @@ Usage: python pkb.py --benchmarks=gke_deletion \\ - --gke_deletion_batch_size=100 \\ - --gke_deletion_warmpool_name=python-sandbox-warmpool \\ - --gke_deletion_pod_label=sandbox=python-sandbox-example \\ - --gke_deletion_poll_interval_s=1.0 \\ - --gke_deletion_provision_timeout_s=120.0 \\ - --gke_deletion_drain_timeout_s=300.0 \\ + --k8s_deletion_batch_size=100 \\ + --k8s_deletion_warmpool_name=python-sandbox-warmpool \\ + --k8s_deletion_pod_label=sandbox=python-sandbox-example \\ + --k8s_deletion_poll_interval_s=1.0 \\ + --k8s_deletion_provision_timeout_s=120.0 \\ + --k8s_deletion_drain_timeout_s=300.0 \\ --k8s_namespace=agentic \\ --gke_machine_type=c4-standard-8 @@ -43,7 +43,7 @@ from absl import flags from perfkitbenchmarker import configs from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( - gke_benchmark_utils as utils, + k8s_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, @@ -51,9 +51,9 @@ FLAGS = flags.FLAGS -BENCHMARK_NAME = "gke_deletion" +BENCHMARK_NAME = "k8s_deletion" BENCHMARK_CONFIG = """ -gke_deletion: +k8s_deletion: description: > Atomic single-point bulk deletion and IP reclamation measurement on a pre-provisioned GKE cluster with gVisor isolation. @@ -64,37 +64,37 @@ # --------------------------------------------------------------------------- flags.DEFINE_integer( - "gke_deletion_batch_size", + "k8s_deletion_batch_size", 100, "Number of sandbox pods to provision then bulk-delete.", ) flags.DEFINE_string( - "gke_deletion_warmpool_name", + "k8s_deletion_warmpool_name", "python-sandbox-warmpool", "SandboxWarmPool resource name.", ) flags.DEFINE_string( - "gke_deletion_pod_label", + "k8s_deletion_pod_label", "sandbox=python-sandbox-example", "Label selector for warm pool pods.", ) flags.DEFINE_float( - "gke_deletion_poll_interval_s", + "k8s_deletion_poll_interval_s", 1.0, "Seconds between kubectl polls during deletion.", ) flags.DEFINE_float( - "gke_deletion_provision_timeout_s", + "k8s_deletion_provision_timeout_s", 120.0, "Max seconds to wait for pods to reach Running before deletion.", ) flags.DEFINE_float( - "gke_deletion_drain_timeout_s", + "k8s_deletion_drain_timeout_s", 300.0, "Max seconds to wait for all pods to terminate after scale-to-0.", ) @@ -130,12 +130,12 @@ def Run(benchmark_spec): utils.set_benchmark_spec(benchmark_spec) ns = FLAGS.k8s_namespace - batch_size = FLAGS.gke_deletion_batch_size - warmpool_name = FLAGS.gke_deletion_warmpool_name - label = FLAGS.gke_deletion_pod_label - poll_interval = FLAGS.gke_deletion_poll_interval_s - provision_timeout = FLAGS.gke_deletion_provision_timeout_s - drain_timeout = FLAGS.gke_deletion_drain_timeout_s + batch_size = FLAGS.k8s_deletion_batch_size + warmpool_name = FLAGS.k8s_deletion_warmpool_name + label = FLAGS.k8s_deletion_pod_label + poll_interval = FLAGS.k8s_deletion_poll_interval_s + provision_timeout = FLAGS.k8s_deletion_provision_timeout_s + drain_timeout = FLAGS.k8s_deletion_drain_timeout_s logging.info("=== Run: batch_size=%d ===", batch_size) @@ -392,11 +392,11 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Best-effort drain of warm pool after measurement.""" ns = FLAGS.k8s_namespace - warmpool_name = FLAGS.gke_deletion_warmpool_name - label = FLAGS.gke_deletion_pod_label + warmpool_name = FLAGS.k8s_deletion_warmpool_name + label = FLAGS.k8s_deletion_pod_label logging.info("Cleanup: draining warm pool to 0.") - utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_deletion_drain_timeout_s)) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_deletion_drain_timeout_s)) utils.StopPortForward() logging.info("Cleanup complete.") diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py similarity index 95% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py index 7d95d4bc82..9f31aee342 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_payload_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py @@ -11,9 +11,9 @@ Usage: python pkb.py --benchmarks=gke_payload \ - --gke_payload_size_mb=50 \ - --gke_payload_iterations=20 \ - --gke_payload_concurrent_sessions=5 \ + --k8s_payload_size_mb=50 \ + --k8s_payload_iterations=20 \ + --k8s_payload_concurrent_sessions=5 \ --k8s_namespace=agentic \ --k8s_agent_api_url=http://localhost:8080 @@ -66,7 +66,7 @@ from absl import flags from perfkitbenchmarker import configs from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( - gke_benchmark_utils as utils, + k8s_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, @@ -74,9 +74,9 @@ FLAGS = flags.FLAGS -BENCHMARK_NAME = "gke_payload" +BENCHMARK_NAME = "k8s_payload" BENCHMARK_CONFIG = """ -gke_payload: +k8s_payload: description: > Atomic single-point payload transfer saturation measurement on a pre-provisioned GKE cluster with gVisor isolation. @@ -90,31 +90,31 @@ # --------------------------------------------------------------------------- flags.DEFINE_float( - "gke_payload_size_mb", + "k8s_payload_size_mb", 1.0, "Payload size in megabytes to transfer from the sandbox.", ) flags.DEFINE_integer( - "gke_payload_iterations", + "k8s_payload_iterations", 20, "Number of transfer iterations per sandbox session.", ) flags.DEFINE_integer( - "gke_payload_concurrent_sessions", + "k8s_payload_concurrent_sessions", 5, "Number of parallel sandbox sessions.", ) flags.DEFINE_integer( - "gke_payload_exec_timeout", + "k8s_payload_exec_timeout", 300, "Sandbox command execution timeout in seconds.", ) flags.DEFINE_bool( - "gke_payload_patch_warmpool", + "k8s_payload_patch_warmpool", True, "Patch SandboxWarmPool replicas to match concurrent_sessions before measurement.", ) @@ -151,9 +151,9 @@ def Run(benchmark_spec): utils.set_benchmark_spec(benchmark_spec) ns = FLAGS.k8s_namespace - payload_size_mb = FLAGS.gke_payload_size_mb - iterations = FLAGS.gke_payload_iterations - concurrent = FLAGS.gke_payload_concurrent_sessions + payload_size_mb = FLAGS.k8s_payload_size_mb + iterations = FLAGS.k8s_payload_iterations + concurrent = FLAGS.k8s_payload_concurrent_sessions logging.info( "=== Run: payload_size_mb=%s, iterations=%d, concurrent=%d ===", @@ -166,7 +166,7 @@ def Run(benchmark_spec): utils.EnsurePortForward() # Patch warm pool (moved from Prepare for sweep compatibility) - if FLAGS.gke_payload_patch_warmpool: + if FLAGS.k8s_payload_patch_warmpool: utils.PatchWarmPool( namespace=ns, warmpool_name=_WARMPOOL_NAME, @@ -179,7 +179,7 @@ def Run(benchmark_spec): "payload_size_mb": payload_size_mb, "payload_iterations": iterations, "concurrent_sessions": concurrent, - "sandbox_exec_timeout_s": FLAGS.gke_payload_exec_timeout, + "sandbox_exec_timeout_s": FLAGS.k8s_payload_exec_timeout, } t0 = time.time() diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py similarity index 91% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py index e323be4d31..207fd40a20 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_python_density_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py @@ -20,9 +20,9 @@ Usage: python pkb.py --benchmarks=gke_python_density \\ - --gke_python_density_concurrent_sandbox_count=16 \\ - --gke_python_density_sample_count=20 \\ - --gke_python_density_sample_warmup=0 \\ + --k8s_python_density_concurrent_sandbox_count=16 \\ + --k8s_python_density_sample_count=20 \\ + --k8s_python_density_sample_warmup=0 \\ --k8s_namespace=agentic \\ --k8s_agent_api_url=http://localhost:8080 @@ -55,7 +55,7 @@ from absl import flags from perfkitbenchmarker import configs from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( - gke_benchmark_utils as utils, + k8s_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, @@ -63,9 +63,9 @@ FLAGS = flags.FLAGS -BENCHMARK_NAME = "gke_python_density" +BENCHMARK_NAME = "k8s_python_density" BENCHMARK_CONFIG = """ -gke_python_density: +k8s_python_density: description: > Atomic single-point Python sandbox density measurement on a pre-provisioned GKE cluster with gVisor isolation. @@ -79,19 +79,19 @@ # --------------------------------------------------------------------------- flags.DEFINE_integer( - "gke_python_density_concurrent_sandbox_count", + "k8s_python_density_concurrent_sandbox_count", 1, "Number of concurrent sandbox sessions to run.", ) flags.DEFINE_integer( - "gke_python_density_sample_count", + "k8s_python_density_sample_count", 20, "Number of sample iterations per sandbox session.", ) flags.DEFINE_integer( - "gke_python_density_sample_warmup", + "k8s_python_density_sample_warmup", 0, "Number of warmup iterations per session (excluded from stats). " "Warmup iterations execute the same benchmark tasks as measured " @@ -101,13 +101,13 @@ ) flags.DEFINE_bool( - "gke_python_density_patch_warmpool", + "k8s_python_density_patch_warmpool", True, "Patch SandboxWarmPool replicas to match density before measurement.", ) flags.DEFINE_integer( - "gke_python_density_exec_timeout", + "k8s_python_density_exec_timeout", 600, "Timeout in seconds for the API call.", ) @@ -144,7 +144,7 @@ def Run(benchmark_spec): utils.set_benchmark_spec(benchmark_spec) ns = FLAGS.k8s_namespace - density = FLAGS.gke_python_density_concurrent_sandbox_count + density = FLAGS.k8s_python_density_concurrent_sandbox_count logging.info("=== Run: density=%d ===", density) @@ -152,7 +152,7 @@ def Run(benchmark_spec): utils.EnsurePortForward() # Patch warm pool to match density (moved from Prepare for sweep compatibility) - if FLAGS.gke_python_density_patch_warmpool: + if FLAGS.k8s_python_density_patch_warmpool: utils.PatchWarmPool( namespace=ns, warmpool_name=_WARMPOOL_NAME, @@ -162,10 +162,10 @@ def Run(benchmark_spec): # POST to agent API payload = { - "sample_count": FLAGS.gke_python_density_sample_count, - "sample_warmup": FLAGS.gke_python_density_sample_warmup, + "sample_count": FLAGS.k8s_python_density_sample_count, + "sample_warmup": FLAGS.k8s_python_density_sample_warmup, "concurrent_sessions": density, - "sandbox_exec_timeout_s": FLAGS.gke_python_density_exec_timeout, + "sandbox_exec_timeout_s": FLAGS.k8s_python_density_exec_timeout, } t0 = time.time() @@ -188,8 +188,8 @@ def Run(benchmark_spec): "density": density, "successful_sessions": successful, "failed_sessions": failed, - "sample_count": FLAGS.gke_python_density_sample_count, - "sample_warmup": FLAGS.gke_python_density_sample_warmup, + "sample_count": FLAGS.k8s_python_density_sample_count, + "sample_warmup": FLAGS.k8s_python_density_sample_warmup, "wall_time_s": round(wall_time, 2), } @@ -334,7 +334,7 @@ def Cleanup(benchmark_spec): ns = FLAGS.k8s_namespace logging.info("Cleanup: draining warm pool.") - if FLAGS.gke_python_density_patch_warmpool: + if FLAGS.k8s_python_density_patch_warmpool: utils.DrainWarmPool( namespace=ns, warmpool_name=_WARMPOOL_NAME, diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py similarity index 94% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py index 2146489752..4528082ad5 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_qps_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py @@ -15,20 +15,20 @@ Usage: # Agent mode python pkb.py --benchmarks=gke_qps \\ - --gke_qps_target_qps=5.0 \\ - --gke_qps_pool_size=70 \\ - --gke_qps_step_duration_s=30.0 \\ - --gke_qps_mode=agent \\ + --k8s_qps_target_qps=5.0 \\ + --k8s_qps_pool_size=70 \\ + --k8s_qps_step_duration_s=30.0 \\ + --k8s_qps_mode=agent \\ --k8s_namespace=agentic \\ --k8s_agent_api_url=http://localhost:8080 # Raw claim mode python pkb.py --benchmarks=gke_qps \\ - --gke_qps_target_qps=5.0 \\ - --gke_qps_pool_size=70 \\ - --gke_qps_step_duration_s=30.0 \\ - --gke_qps_mode=raw_claim \\ - --gke_qps_claim_timeout_s=60.0 \\ + --k8s_qps_target_qps=5.0 \\ + --k8s_qps_pool_size=70 \\ + --k8s_qps_step_duration_s=30.0 \\ + --k8s_qps_mode=raw_claim \\ + --k8s_qps_claim_timeout_s=60.0 \\ --k8s_namespace=agentic Samples emitted (per run): @@ -62,7 +62,7 @@ from perfkitbenchmarker import data from perfkitbenchmarker.resources.container_service import kubectl from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( - gke_benchmark_utils as utils, + k8s_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, @@ -70,9 +70,9 @@ FLAGS = flags.FLAGS -BENCHMARK_NAME = "gke_qps" +BENCHMARK_NAME = "k8s_qps" BENCHMARK_CONFIG = """ -gke_qps: +k8s_qps: description: > Atomic single-point QPS saturation measurement on a pre-provisioned GKE cluster with gVisor isolation. @@ -88,44 +88,44 @@ # --------------------------------------------------------------------------- flags.DEFINE_float( - "gke_qps_target_qps", + "k8s_qps_target_qps", 5.0, "Target requests per second (sandbox claims per second).", ) flags.DEFINE_integer( - "gke_qps_pool_size", + "k8s_qps_pool_size", 70, "Warm pool size maintained during the measurement.", ) flags.DEFINE_float( - "gke_qps_step_duration_s", + "k8s_qps_step_duration_s", 30.0, "Duration of the QPS burst in seconds.", ) flags.DEFINE_integer( - "gke_qps_sandbox_exec_timeout_s", + "k8s_qps_sandbox_exec_timeout_s", 30, "Sandbox command execution timeout in seconds.", ) flags.DEFINE_float( - "gke_qps_provision_timeout_s", + "k8s_qps_provision_timeout_s", 180.0, "Max seconds to wait for pool pods to reach Running.", ) flags.DEFINE_string( - "gke_qps_mode", + "k8s_qps_mode", "agent", "Operating mode: 'agent' (POST to orchestrator API) or " "'raw_claim' (create SandboxClaims directly via kubectl).", ) flags.DEFINE_float( - "gke_qps_claim_timeout_s", + "k8s_qps_claim_timeout_s", 60.0, "Max seconds to wait for a raw claim to bind " "(only used with mode=raw_claim).", ) @@ -149,7 +149,7 @@ def Prepare(benchmark_spec): logging.info("=== Prepare: deploying workloads ===") deploy_utils.DeployWorkloads(benchmark_spec) - mode = FLAGS.gke_qps_mode + mode = FLAGS.k8s_qps_mode if mode == "agent": utils.CheckAgentHealthz(required=False) utils.EnsurePortForward() @@ -165,7 +165,7 @@ def Run(benchmark_spec): utils.set_benchmark_spec(benchmark_spec) ns = FLAGS.k8s_namespace - pool_size = FLAGS.gke_qps_pool_size + pool_size = FLAGS.k8s_qps_pool_size # Scale warm pool (moved from Prepare for sweep compatibility) utils.PatchWarmPool( @@ -173,10 +173,10 @@ def Run(benchmark_spec): warmpool_name=_WARMPOOL_NAME, replicas=pool_size, label=_WARMPOOL_LABEL, - wait_timeout=int(FLAGS.gke_qps_provision_timeout_s), + wait_timeout=int(FLAGS.k8s_qps_provision_timeout_s), ) - mode = FLAGS.gke_qps_mode + mode = FLAGS.k8s_qps_mode if mode == "raw_claim": return _RunRawClaim(benchmark_spec) @@ -211,9 +211,9 @@ def Cleanup(benchmark_spec): def _RunAgent(benchmark_spec): """Fire QPS burst via the orchestrator API.""" ns = FLAGS.k8s_namespace - target_qps = FLAGS.gke_qps_target_qps - pool_size = FLAGS.gke_qps_pool_size - step_duration = FLAGS.gke_qps_step_duration_s + target_qps = FLAGS.k8s_qps_target_qps + pool_size = FLAGS.k8s_qps_pool_size + step_duration = FLAGS.k8s_qps_step_duration_s logging.info( "=== Run (agent): target_qps=%s, pool_size=%d, duration=%ss ===", @@ -232,7 +232,7 @@ def _RunAgent(benchmark_spec): payload = { "target_qps": target_qps, "duration_s": step_duration, - "sandbox_exec_timeout_s": FLAGS.gke_qps_sandbox_exec_timeout_s, + "sandbox_exec_timeout_s": FLAGS.k8s_qps_sandbox_exec_timeout_s, } t0 = time.time() @@ -378,10 +378,10 @@ def _RunAgent(benchmark_spec): def _RunRawClaim(benchmark_spec): """Fire SandboxClaims directly at target_qps (no agent).""" ns = FLAGS.k8s_namespace - target_qps = FLAGS.gke_qps_target_qps - pool_size = FLAGS.gke_qps_pool_size - step_duration = FLAGS.gke_qps_step_duration_s - claim_timeout = FLAGS.gke_qps_claim_timeout_s + target_qps = FLAGS.k8s_qps_target_qps + pool_size = FLAGS.k8s_qps_pool_size + step_duration = FLAGS.k8s_qps_step_duration_s + claim_timeout = FLAGS.k8s_qps_claim_timeout_s logging.info( "=== Run (raw_claim): target_qps=%s, pool_size=%d, duration=%ss ===", diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py similarity index 94% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py index 44d21fcc84..cb49011b08 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_snapshot_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py @@ -11,24 +11,24 @@ Usage: python pkb.py --benchmarks=gke_snapshot \\ - --gke_snapshot_preload_mb=50 \\ - --gke_snapshot_burst_size=3 \\ + --k8s_snapshot_preload_mb=50 \\ + --k8s_snapshot_burst_size=3 \\ --k8s_namespace=agentic \\ - --gke_snapshot_skip_snapshot=false + --k8s_snapshot_skip_snapshot=false Samples emitted (per run): - - gke_snapshot_snapshot_p50 (seconds) - - gke_snapshot_snapshot_p95 (seconds) - - gke_snapshot_snapshot_max (seconds) - - gke_snapshot_restore_p50 (seconds) - - gke_snapshot_restore_p95 (seconds) - - gke_snapshot_restore_max (seconds) - - gke_snapshot_ttfe_p50 (seconds) - - gke_snapshot_ttfe_p95 (seconds) - - gke_snapshot_ttfe_max (seconds) - - gke_snapshot_startup_time (seconds) - - gke_snapshot_restore_correct_count (count) - - gke_snapshot_wall_time (seconds) + - k8s_snapshot_snapshot_p50 (seconds) + - k8s_snapshot_snapshot_p95 (seconds) + - k8s_snapshot_snapshot_max (seconds) + - k8s_snapshot_restore_p50 (seconds) + - k8s_snapshot_restore_p95 (seconds) + - k8s_snapshot_restore_max (seconds) + - k8s_snapshot_ttfe_p50 (seconds) + - k8s_snapshot_ttfe_p95 (seconds) + - k8s_snapshot_ttfe_max (seconds) + - k8s_snapshot_startup_time (seconds) + - k8s_snapshot_restore_correct_count (count) + - k8s_snapshot_wall_time (seconds) """ import json @@ -46,7 +46,7 @@ from perfkitbenchmarker.resources.container_service import kubectl from perfkitbenchmarker import sample from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( - gke_benchmark_utils as utils, + k8s_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, @@ -54,9 +54,9 @@ FLAGS = flags.FLAGS -BENCHMARK_NAME = "gke_snapshot" +BENCHMARK_NAME = "k8s_snapshot" BENCHMARK_CONFIG = """ -gke_snapshot: +k8s_snapshot: description: > Atomic single-point Pod Snapshot saturation measurement on a pre-provisioned GKE cluster with gVisor isolation. @@ -67,37 +67,37 @@ # --------------------------------------------------------------------------- flags.DEFINE_integer( - "gke_snapshot_preload_mb", + "k8s_snapshot_preload_mb", 10, "Megabytes of memory to pre-allocate in the sandbox before snapshot.", ) flags.DEFINE_integer( - "gke_snapshot_burst_size", + "k8s_snapshot_burst_size", 1, "Number of concurrent source/snapshot/restore pods per measurement.", ) flags.DEFINE_string( - "gke_snapshot_ksa_name", + "k8s_snapshot_ksa_name", "pod-snapshot-sa", "Kubernetes service account for pod snapshots.", ) flags.DEFINE_integer( - "gke_snapshot_pod_timeout", + "k8s_snapshot_pod_timeout", 180, "Max seconds to wait for pod Running / preload.", ) flags.DEFINE_boolean( - "gke_snapshot_skip_snapshot", + "k8s_snapshot_skip_snapshot", False, "Skip snapshot/restore phases — measure cold-start TTFE only.", ) flags.DEFINE_string( - "gke_snapshot_preload_mode", + "k8s_snapshot_preload_mode", "synthetic", "Preload mode: 'synthetic' (os.urandom fill) or " "'script:' to run a custom startup script.", @@ -120,19 +120,28 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads, snapshot infra, and validate readiness.""" ns = FLAGS.k8s_namespace - preload_mb = FLAGS.gke_snapshot_preload_mb + preload_mb = FLAGS.k8s_snapshot_preload_mb logging.info( "=== Prepare: preload_mb=%d, burst_size=%d ===", preload_mb, - FLAGS.gke_snapshot_burst_size, + FLAGS.k8s_snapshot_burst_size, ) # Deploy Agent Sandbox ecosystem (idempotent) deploy_utils.DeployWorkloads(benchmark_spec) # Deploy Pod Snapshot infrastructure (idempotent) - deploy_utils.DeploySnapshots() + # Pod Snapshots are GKE-specific; skip on other platforms + cloud = getattr( + getattr(benchmark_spec, "container_cluster", None), "cloud", "GCP" + ) + if cloud == "GCP" and not FLAGS.skip_deploy_snapshots: + deploy_utils.DeploySnapshots() + elif cloud != "GCP": + logging.info( + "Pod Snapshot infrastructure skipped (cloud=%s, GKE required).", cloud + ) # 1. Verify PodSnapshotStorageConfig exists (cluster-scoped). _, _, retcode = utils.RunKubectl( @@ -157,7 +166,7 @@ def Prepare(benchmark_spec): logging.warning("PodSnapshotPolicy not found in namespace %s.", ns) # 3. Verify the service account exists. - ksa = FLAGS.gke_snapshot_ksa_name + ksa = FLAGS.k8s_snapshot_ksa_name _, _, retcode = utils.RunKubectl( ["get", "serviceaccount", ksa, "-n", ns], timeout=30, @@ -189,12 +198,12 @@ def Run(benchmark_spec): utils.set_benchmark_spec(benchmark_spec) ns = FLAGS.k8s_namespace - preload_mb = FLAGS.gke_snapshot_preload_mb - burst_size = FLAGS.gke_snapshot_burst_size - skip_snapshot = FLAGS.gke_snapshot_skip_snapshot - preload_mode = FLAGS.gke_snapshot_preload_mode - ksa_name = FLAGS.gke_snapshot_ksa_name - pod_timeout = FLAGS.gke_snapshot_pod_timeout + preload_mb = FLAGS.k8s_snapshot_preload_mb + burst_size = FLAGS.k8s_snapshot_burst_size + skip_snapshot = FLAGS.k8s_snapshot_skip_snapshot + preload_mode = FLAGS.k8s_snapshot_preload_mode + ksa_name = FLAGS.k8s_snapshot_ksa_name + pod_timeout = FLAGS.k8s_snapshot_pod_timeout logging.info( "=== Run: preload_mb=%d, burst_size=%d, skip_snapshot=%s ===", diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py similarity index 91% rename from perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py rename to perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py index e696b089db..62c6462351 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_warmpool_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py @@ -11,12 +11,12 @@ Usage: python pkb.py --benchmarks=gke_warmpool \ - --gke_warmpool_target_replicas=100 \ - --gke_warmpool_name=python-sandbox-warmpool \ - --gke_warmpool_pod_label=sandbox=python-sandbox-example \ - --gke_warmpool_ready_threshold_s=300 \ - --gke_warmpool_poll_interval_s=2.0 \ - --gke_warmpool_drain_timeout_s=300 \ + --k8s_warmpool_target_replicas=100 \ + --k8s_warmpool_name=python-sandbox-warmpool \ + --k8s_warmpool_pod_label=sandbox=python-sandbox-example \ + --k8s_warmpool_ready_threshold_s=300 \ + --k8s_warmpool_poll_interval_s=2.0 \ + --k8s_warmpool_drain_timeout_s=300 \ --k8s_namespace=agentic \ --gke_machine_type=c4-standard-8 @@ -50,7 +50,7 @@ from datetime import datetime, timezone from perfkitbenchmarker import configs from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( - gke_benchmark_utils as utils, + k8s_benchmark_utils as utils, ) from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_deploy_utils as deploy_utils, @@ -58,9 +58,9 @@ FLAGS = flags.FLAGS -BENCHMARK_NAME = "gke_warmpool" +BENCHMARK_NAME = "k8s_warmpool" BENCHMARK_CONFIG = """ -gke_warmpool: +k8s_warmpool: description: > Atomic single-point warm pool scale-up measurement on a pre-provisioned GKE cluster with gVisor isolation. @@ -71,37 +71,37 @@ # --------------------------------------------------------------------------- flags.DEFINE_integer( - "gke_warmpool_target_replicas", + "k8s_warmpool_target_replicas", 100, "Number of warm pool replicas to provision from zero.", ) flags.DEFINE_string( - "gke_warmpool_name", + "k8s_warmpool_name", "python-sandbox-warmpool", "SandboxWarmPool resource name.", ) flags.DEFINE_string( - "gke_warmpool_pod_label", + "k8s_warmpool_pod_label", "sandbox=python-sandbox-example", "Label selector for warm pool pods.", ) flags.DEFINE_float( - "gke_warmpool_ready_threshold_s", + "k8s_warmpool_ready_threshold_s", 300.0, "Max seconds allowed for all pods to reach Running.", ) flags.DEFINE_float( - "gke_warmpool_poll_interval_s", + "k8s_warmpool_poll_interval_s", 2.0, "Seconds between kubectl polls during provisioning.", ) flags.DEFINE_float( - "gke_warmpool_drain_timeout_s", + "k8s_warmpool_drain_timeout_s", 300.0, "Max seconds to wait for drain to 0.", ) @@ -137,14 +137,14 @@ def Run(benchmark_spec): utils.set_benchmark_spec(benchmark_spec) ns = FLAGS.k8s_namespace - target = FLAGS.gke_warmpool_target_replicas - warmpool_name = FLAGS.gke_warmpool_name - label = FLAGS.gke_warmpool_pod_label - threshold_s = FLAGS.gke_warmpool_ready_threshold_s - poll_interval = FLAGS.gke_warmpool_poll_interval_s + target = FLAGS.k8s_warmpool_target_replicas + warmpool_name = FLAGS.k8s_warmpool_name + label = FLAGS.k8s_warmpool_pod_label + threshold_s = FLAGS.k8s_warmpool_ready_threshold_s + poll_interval = FLAGS.k8s_warmpool_poll_interval_s # Drain to 0 for clean measurement (moved from Prepare for sweep compatibility) - utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s)) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s)) time.sleep(3) logging.info("=== Run: scaling %s to %d replicas ===", warmpool_name, target) @@ -153,7 +153,7 @@ def Run(benchmark_spec): # 1. Measure drain time (should be near-zero since Prepare drained) t0 = time.time() - utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s)) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s)) drain_time_s = round(time.time() - t0, 2) time.sleep(2) @@ -309,11 +309,11 @@ def Run(benchmark_spec): def Cleanup(benchmark_spec): """Drain warm pool back to 0 after measurement.""" ns = FLAGS.k8s_namespace - warmpool_name = FLAGS.gke_warmpool_name - label = FLAGS.gke_warmpool_pod_label + warmpool_name = FLAGS.k8s_warmpool_name + label = FLAGS.k8s_warmpool_pod_label logging.info("Cleanup: draining warm pool to 0.") - utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.gke_warmpool_drain_timeout_s)) + utils.DrainWarmPool(ns, warmpool_name, label, timeout=int(FLAGS.k8s_warmpool_drain_timeout_s)) utils.StopPortForward() logging.info("Cleanup complete.") diff --git a/snapshot-sandbox-template.yaml.j2 b/snapshot-sandbox-template.yaml.j2 deleted file mode 100644 index 4e25cb5833..0000000000 --- a/snapshot-sandbox-template.yaml.j2 +++ /dev/null @@ -1,46 +0,0 @@ ---- -apiVersion: extensions.agents.x-k8s.io/v1alpha1 -kind: SandboxTemplate -metadata: - name: {{ template_name }} - namespace: {{ ns }} -spec: - podTemplate: - metadata: - labels: - app: snapshot-benchmark-workload - spec: - serviceAccountName: {{ ksa_name }} - runtimeClassName: gvisor - containers: - - name: preloader - image: python:3.11-slim - command: ["python3", "-c"] - args: - - | - import time, os - preload_mb = int(os.environ.get("PRELOAD_MB", "10")) - print(f"Preloading {preload_mb} MB of memory...", flush=True) - _ballast = bytearray(preload_mb * 1024 * 1024) - print(f"Preload complete. Starting counter.", flush=True) - i = 0 - while True: - print(f"Count: {i}", flush=True) - i += 1 - time.sleep(1) - env: - - name: PRELOAD_MB - value: "{{ preload_mb }}" - resources: - requests: - cpu: "250m" - memory: "{{ memory_mi }}Mi" - ephemeral-storage: "512Mi" - nodeSelector: - pkb_nodepool: sandbox - tolerations: - - key: "sandbox.gke.io/runtime" - operator: "Equal" - value: "gvisor" - effect: "NoSchedule" - restartPolicy: "OnFailure" From 84ddaf5ed191952f3b403dff9bd4469cdbb41829 Mon Sep 17 00:00:00 2001 From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com> Date: Tue, 30 Jun 2026 22:10:03 -0400 Subject: [PATCH 4/5] pkb-native image building, bug fixes, and optimizations --- .../agentic/adk-agent}/.dockerignore | 0 .../agentic/adk-agent}/.gcloudignore | 0 .../agentic/adk-agent}/Dockerfile | 0 .../agentic/adk-agent}/__init__.py | 0 .../gke_performance_agent/__init__.py | 0 .../adk-agent}/gke_performance_agent/agent.py | 16 +- .../agentic/adk-agent}/main.py | 22 +- .../agentic/adk-agent}/requirements.txt | 0 .../python_test_app/benchmark_density.py | 0 .../python_test_app/benchmark_payload.py | 0 .../python_test_app/benchmark_qps.py | 0 .../config/agentic_benchmark_config.yaml | 378 +++++------------- .../workloads/adk_agent/cloudbuild.yaml | 13 - .../adk_agent/generated.env.template | 26 -- .../chromium_test_app/benchmark_density.js | 177 -------- .../kubernetes/agentic/gke_deploy_utils.py | 88 ++-- .../agentic/gke_image_build_utils.py | 199 +-------- .../kubernetes/agentic/gke_post_teardown.py | 40 +- .../kubernetes/agentic/gke_prerequisites.py | 116 +++++- .../agentic/k8s_snapshot_benchmark.py | 31 +- 20 files changed, 350 insertions(+), 756 deletions(-) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/.dockerignore (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/.gcloudignore (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/Dockerfile (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/__init__.py (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/gke_performance_agent/__init__.py (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/gke_performance_agent/agent.py (96%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/main.py (97%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/requirements.txt (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/sandboxed_apps/python_test_app/benchmark_density.py (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/sandboxed_apps/python_test_app/benchmark_payload.py (100%) rename perfkitbenchmarker/data/{k8s_agents/workloads/adk_agent => docker/agentic/adk-agent}/sandboxed_apps/python_test_app/benchmark_qps.py (100%) delete mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml delete mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template delete mode 100644 perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore b/perfkitbenchmarker/data/docker/agentic/adk-agent/.dockerignore similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.dockerignore rename to perfkitbenchmarker/data/docker/agentic/adk-agent/.dockerignore diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore b/perfkitbenchmarker/data/docker/agentic/adk-agent/.gcloudignore similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/.gcloudignore rename to perfkitbenchmarker/data/docker/agentic/adk-agent/.gcloudignore diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile b/perfkitbenchmarker/data/docker/agentic/adk-agent/Dockerfile similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/Dockerfile rename to perfkitbenchmarker/data/docker/agentic/adk-agent/Dockerfile diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/__init__.py similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/__init__.py rename to perfkitbenchmarker/data/docker/agentic/adk-agent/__init__.py diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/__init__.py similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/__init__.py rename to perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/__init__.py diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py similarity index 96% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py rename to perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py index 2aef3c153c..6561942960 100644 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/gke_performance_agent/agent.py +++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/gke_performance_agent/agent.py @@ -40,6 +40,7 @@ from google.adk.models.base_llm import BaseLlm from google.adk.models.llm_response import LlmResponse from google.genai import types +from concurrent.futures import ThreadPoolExecutor from dotenv import load_dotenv from google.adk.apps import App import logging @@ -166,6 +167,12 @@ async def generate_content_async(self, llm_request, stream=False): # ========================================================================= +# Module-level thread pool for sandbox I/O operations. +# Initialized once at import time to avoid thread-safety issues +# with lazy initialization inside _execute_in_sandbox(). +_SANDBOX_POOL = ThreadPoolExecutor(max_workers=16) + + class V3GkeCodeExecutor(GkeCodeExecutor): def _execute_in_sandbox(self, code: str) -> CodeExecutionResult: """Executes code using the v0.4.6 compatible SandboxClient.""" @@ -173,17 +180,10 @@ def _execute_in_sandbox(self, code: str) -> CodeExecutionResult: from k8s_agent_sandbox.models import SandboxDirectConnectionConfig import logging import time - from concurrent.futures import ThreadPoolExecutor logging.info("Executing via V3 SandboxClient (v0.4.6 compatible).") - # Shared thread pool for sandbox operations to allow overlapping - # blocking I/O when sessions run on different threads. - global _SANDBOX_POOL - try: - _SANDBOX_POOL - except NameError: - _SANDBOX_POOL = ThreadPoolExecutor(max_workers=16) + # _SANDBOX_POOL is initialized at module level (thread-safe). # Use DirectConnection when SANDBOX_ROUTER_URL is set (in-cluster), # otherwise fall back to kubectl port-forward (dev mode). diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/main.py similarity index 97% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py rename to perfkitbenchmarker/data/docker/agentic/adk-agent/main.py index bcdb090188..473c2072c2 100644 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/main.py +++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/main.py @@ -386,8 +386,13 @@ async def benchmark_python_density(req: BenchmarkRequest): prompt = "Please start the GKE performance benchmark workflow." - # Fire concurrent sessions. Run each session in its own thread so - # blocking ADK/Runner activity cannot serialize session start. + # Fire concurrent sessions. + # DESIGN NOTE: Each session runs in its own thread via asyncio.to_thread() + # with a nested asyncio.run() to create a per-thread event loop. This is + # intentional -- the ADK Runner performs blocking I/O (sandbox lifecycle + # via kubectl/HTTP) that would starve a shared event loop and serialize + # session starts. The per-thread event loop overhead (~0.1ms) is negligible + # compared to sandbox round-trip times (~200ms+). thread_tasks = [ asyncio.create_task( asyncio.to_thread( @@ -472,8 +477,13 @@ async def benchmark_python_payload(req: PayloadBenchmarkRequest): prompt = "Please start the GKE performance benchmark workflow." - # Fire concurrent sessions. Run each session in its own thread so - # blocking ADK/Runner activity cannot serialize session start. + # Fire concurrent sessions. + # DESIGN NOTE: Each session runs in its own thread via asyncio.to_thread() + # with a nested asyncio.run() to create a per-thread event loop. This is + # intentional -- the ADK Runner performs blocking I/O (sandbox lifecycle + # via kubectl/HTTP) that would starve a shared event loop and serialize + # session starts. The per-thread event loop overhead (~0.1ms) is negligible + # compared to sandbox round-trip times (~200ms+). thread_tasks = [ asyncio.create_task( asyncio.to_thread( @@ -544,7 +554,7 @@ async def benchmark_python_qps(req: QpsBenchmarkRequest): qps_code = "import json; print(json.dumps({'sandbox_status': 'ok'}))" sandbox_template = os.getenv("SANDBOX_TEMPLATE", "python-sandbox-template") - sandbox_namespace = os.getenv("SANDBOX_NAMESPACE", "agentic") + sandbox_namespace = os.getenv("AGENTIC_NAMESPACE", "agentic") exec_timeout = req.sandbox_exec_timeout_s qps_claim_label = {"created-by": "pkb-qps-benchmark"} @@ -791,7 +801,7 @@ async def benchmark_chromium_density(req: ChromiumBenchmarkRequest): k8s_config.load_kube_config() core_v1 = k8s_client.CoreV1Api() - # Inline HTML test page (same as benchmark_density.js used) + # Inline HTML test page (data: URL avoids network dependencies) test_page = """data:text/html, diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt b/perfkitbenchmarker/data/docker/agentic/adk-agent/requirements.txt similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/requirements.txt rename to perfkitbenchmarker/data/docker/agentic/adk-agent/requirements.txt diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_density.py similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_density.py rename to perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_density.py diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_payload.py similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_payload.py rename to perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_payload.py diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py b/perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_qps.py similarity index 100% rename from perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/python_test_app/benchmark_qps.py rename to perfkitbenchmarker/data/docker/agentic/adk-agent/sandboxed_apps/python_test_app/benchmark_qps.py diff --git a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml index 0098eff013..69922efdb0 100644 --- a/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml +++ b/perfkitbenchmarker/data/k8s_agents/config/agentic_benchmark_config.yaml @@ -14,311 +14,137 @@ # # Benchmark-specific sweep parameters (vary per run): # --k8s_python_density_concurrent_sandbox_count=N -# --gke_snapshot_preload_mb=N +# --k8s_snapshot_preload_mb=N # etc. # =========================================================================== -# Shared cluster configuration (identical across all benchmarks) +# Shared configuration (defined once, referenced by all benchmarks via YAML +# anchors). PKB ignores top-level keys that don't match a benchmark name. # =========================================================================== -k8s_python_density: - flags: - # --- Cluster creation flags --- - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - gce_subnet_region: "us-central1" +_shared_flags: &shared_flags + # --- Cluster creation flags --- + gke_additional_flags: + - "--enable-pod-snapshots" + - "--enable-dataplane-v2" + - "--enable-private-nodes" + - "--enable-ip-alias" + - "--master-ipv4-cidr=172.16.0.0/28" + gke_additional_nodepool_flags: + - "--max-pods-per-node=250" + container_cluster_version: "1.35.5-gke.1057002" + gke_enable_shielded_nodes: false + gce_subnet_region: "us-central1" + + # --- Agentic workload flags --- + k8s_namespace: "agentic" + agent_sandbox_version: "v0.4.6" + k8s_gvisor: true + k8s_agent_api_url: "http://localhost:8080" + +_shared_cluster: &shared_cluster + cloud: GCP + type: Kubernetes + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 50 + nodepools: + sandbox: + vm_count: 1 + vm_spec: + GCP: + machine_type: c4-standard-8 + zone: us-central1-a + boot_disk_type: hyperdisk-balanced + boot_disk_size: 100 + sandbox_config: + type: gvisor + +_shared_registry: &shared_registry + cloud: GCP + spec: + GCP: + zone: us-central1-a + + +_shared_container_specs: &shared_container_specs + adk_agent: + image: agentic/adk-agent - # --- Agentic workload flags --- - k8s_namespace: "agentic" - agent_sandbox_version: "v0.4.6" - k8s_gvisor: true - k8s_agent_api_url: "http://localhost:8080" - skip_image_build: false +# =========================================================================== +# Benchmark definitions (each references the shared anchors above) +# =========================================================================== +k8s_python_density: + flags: + <<: *shared_flags + container_registry: + <<: *shared_registry + container_specs: + <<: *shared_container_specs container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor - + <<: *shared_cluster k8s_chromium_density: flags: - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - gce_subnet_region: "us-central1" - - k8s_namespace: "agentic" - agent_sandbox_version: "v0.4.6" - k8s_gvisor: true - k8s_agent_api_url: "http://localhost:8080" - skip_image_build: false - + <<: *shared_flags + container_registry: + <<: *shared_registry + container_specs: + <<: *shared_container_specs container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor - + <<: *shared_cluster k8s_payload: flags: - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - gce_subnet_region: "us-central1" - - k8s_namespace: "agentic" - agent_sandbox_version: "v0.4.6" - k8s_gvisor: true - k8s_agent_api_url: "http://localhost:8080" - skip_image_build: false - + <<: *shared_flags + container_registry: + <<: *shared_registry + container_specs: + <<: *shared_container_specs container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor - + <<: *shared_cluster k8s_qps: flags: - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - gce_subnet_region: "us-central1" - - k8s_namespace: "agentic" - agent_sandbox_version: "v0.4.6" - k8s_gvisor: true - k8s_agent_api_url: "http://localhost:8080" - skip_image_build: false - + <<: *shared_flags + container_registry: + <<: *shared_registry + container_specs: + <<: *shared_container_specs container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor - + <<: *shared_cluster k8s_snapshot: flags: - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - gce_subnet_region: "us-central1" - - k8s_namespace: "agentic" - agent_sandbox_version: "v0.4.6" - k8s_gvisor: true - k8s_agent_api_url: "http://localhost:8080" - skip_image_build: false - + <<: *shared_flags + container_registry: + <<: *shared_registry + container_specs: + <<: *shared_container_specs container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor - + <<: *shared_cluster k8s_warmpool: flags: - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - gce_subnet_region: "us-central1" - - k8s_namespace: "agentic" - agent_sandbox_version: "v0.4.6" - k8s_gvisor: true - k8s_agent_api_url: "http://localhost:8080" - skip_image_build: false - + <<: *shared_flags + container_registry: + <<: *shared_registry + container_specs: + <<: *shared_container_specs container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor - + <<: *shared_cluster k8s_deletion: flags: - gke_additional_flags: - - "--enable-pod-snapshots" - - "--enable-dataplane-v2" - - "--enable-private-nodes" - - "--enable-ip-alias" - - "--master-ipv4-cidr=172.16.0.0/28" - gke_additional_nodepool_flags: - - "--max-pods-per-node=250" - container_cluster_version: "1.35.3-gke.1389000" - gke_enable_shielded_nodes: false - gce_subnet_region: "us-central1" - - k8s_namespace: "agentic" - agent_sandbox_version: "v0.4.6" - k8s_gvisor: true - k8s_agent_api_url: "http://localhost:8080" - skip_image_build: false - + <<: *shared_flags + container_registry: + <<: *shared_registry + container_specs: + <<: *shared_container_specs container_cluster: - cloud: GCP - type: Kubernetes - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 50 - nodepools: - sandbox: - vm_count: 1 - vm_spec: - GCP: - machine_type: c4-standard-8 - zone: us-central1-a - boot_disk_type: hyperdisk-balanced - boot_disk_size: 100 - sandbox_config: - type: gvisor + <<: *shared_cluster diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml deleted file mode 100644 index f3f3f4b810..0000000000 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml +++ /dev/null @@ -1,13 +0,0 @@ -steps: - - name: 'gcr.io/cloud-builders/docker' - args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.'] - -images: - - '${_IMAGE_PATH}' - -options: - logging: CLOUD_LOGGING_ONLY - -substitutions: - _IMAGE_PATH: '' - _PLATFORM: 'linux/amd64' diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template deleted file mode 100644 index 3ec5f62d0b..0000000000 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/generated.env.template +++ /dev/null @@ -1,26 +0,0 @@ -# ========================================================================== -# ADK Agent — Generated Environment File Template -# ========================================================================== -# Load generated.env (rendered by gke_image_build_utils._GenerateEnvFile from PKB flags). -# -# For local dev, manually create generated.env with your values. -# ========================================================================== - -# --- Required: GKE executor config --- -CLUSTER_NAME="${CLUSTER_NAME}" -GOOGLE_CLOUD_PROJECT="${GOOGLE_CLOUD_PROJECT}" -GOOGLE_CLOUD_LOCATION="${GOOGLE_CLOUD_LOCATION}" -AGENTIC_NAMESPACE="${AGENTIC_NAMESPACE}" -GOOGLE_GENAI_USE_VERTEXAI="${GOOGLE_GENAI_USE_VERTEXAI}" - -# --- Sandbox connection (set in-cluster; leave blank for local dev mode) --- -# When set, SandboxClient uses DirectConnection (bypasses kubectl port-forward). -# For local dev, set to "" to use per-pod kubectl port-forward tunnels. -SANDBOX_ROUTER_URL="http://sandbox-router-svc.${AGENTIC_NAMESPACE}.svc.cluster.local:8080" - -# --- Optional: benchmark defaults (overridden by HTTP request params) --- -SAMPLE_COUNT="${SAMPLE_COUNT}" -SAMPLE_WARMUP="${SAMPLE_WARMUP}" -PAYLOAD_SIZE_MB="${PAYLOAD_SIZE_MB}" -PAYLOAD_ITERATIONS="${PAYLOAD_ITERATIONS}" - diff --git a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js b/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js deleted file mode 100644 index 7638720691..0000000000 --- a/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/sandboxed_apps/chromium_test_app/benchmark_density.js +++ /dev/null @@ -1,177 +0,0 @@ -// Agentic Chromium Sandbox Benchmark (UC-C) -// Measures: Interaction Latency, Screenshot Generation, DOM Evaluation, RSS -// Requires: Playwright (pre-installed in the container image) -// -// Self-contained — no external Mock LLM service needed. Uses data: URLs -// and inline HTML to avoid network dependencies so the benchmark measures -// pure gVisor + Chromium overhead. -// -// Environment variables (injected by orchestrator): -// TASK_COUNT — iterations per run (default: 10) -// WARMUP_TASKS — warmup iterations excluded from stats (default: 2) - -const { chromium } = require('playwright'); -const os = require('os'); - -const TASK_COUNT = parseInt(process.env.TASK_COUNT || '10'); -const WARMUP_TASKS = parseInt(process.env.WARMUP_TASKS || '2'); - -// Inline HTML page — avoids network round-trips so we measure pure -// browser engine + gVisor overhead. -const TEST_PAGE = `data:text/html, - - -PKB Chromium Benchmark - -

Hello Sandbox

- - -
- - -`; - -function percentile(sorted, p) { - if (!sorted.length) return null; - const idx = Math.min(Math.floor(sorted.length * p), sorted.length - 1); - return sorted[idx]; -} - -function getMemoryMB() { - try { - const usage = process.memoryUsage(); - return { - rss_mb: Math.round(usage.rss / 1024 / 1024 * 100) / 100, - heap_used_mb: Math.round(usage.heapUsed / 1024 / 1024 * 100) / 100, - heap_total_mb: Math.round(usage.heapTotal / 1024 / 1024 * 100) / 100, - }; - } catch (e) { - return { rss_mb: null, heap_used_mb: null, heap_total_mb: null }; - } -} - -async function runBenchmark() { - const memStart = getMemoryMB(); - - // ── Cold Start: browser launch ── - const coldStart = performance.now(); - const browser = await chromium.launch({ - headless: true, - args: [ - '--no-sandbox', - '--disable-gpu', - '--disable-dev-shm-usage', - '--disable-async-dns', - '--single-process', - ], - }); - const cold_start_ms = performance.now() - coldStart; - - const context = await browser.newContext(); - const page = await context.newPage(); - - // Navigate once before the loop — amortize first-navigation overhead - await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' }); - - // Per-task latency arrays (filled during measured runs only) - const navigate_ms = []; - const screenshot_ms = []; - const evaluate_ms = []; - const click_ms = []; - const fill_ms = []; - const interaction_ms = []; // all task types pooled - - for (let run = 0; run < WARMUP_TASKS + TASK_COUNT; run++) { - const measuring = run >= WARMUP_TASKS; - - // 1. Navigate (reload the data: page) - let t0 = performance.now(); - await page.goto(TEST_PAGE, { waitUntil: 'domcontentloaded' }); - let elapsed = performance.now() - t0; - if (measuring) { navigate_ms.push(elapsed); interaction_ms.push(elapsed); } - - // 2. DOM evaluate — read heading text - t0 = performance.now(); - await page.evaluate(() => document.getElementById('heading').textContent); - elapsed = performance.now() - t0; - if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); } - - // 3. Fill input - t0 = performance.now(); - await page.fill('#search', `query-${run}`); - elapsed = performance.now() - t0; - if (measuring) { fill_ms.push(elapsed); interaction_ms.push(elapsed); } - - // 4. Click button - t0 = performance.now(); - await page.click('#btn'); - elapsed = performance.now() - t0; - if (measuring) { click_ms.push(elapsed); interaction_ms.push(elapsed); } - - // 5. Verify click effect (DOM mutation) - t0 = performance.now(); - await page.evaluate(() => document.getElementById('output').textContent); - elapsed = performance.now() - t0; - if (measuring) { evaluate_ms.push(elapsed); interaction_ms.push(elapsed); } - - // 6. Screenshot (snapshot generation) - t0 = performance.now(); - await page.screenshot({ path: '/tmp/snap.png' }); - elapsed = performance.now() - t0; - if (measuring) { screenshot_ms.push(elapsed); interaction_ms.push(elapsed); } - } - - await browser.close(); - const memEnd = getMemoryMB(); - - // ── Compute stats ── - const computeStats = (arr) => { - if (!arr.length) return null; - const sorted = [...arr].sort((a, b) => a - b); - const sum = sorted.reduce((a, b) => a + b, 0); - return { - mean_ms: Math.round(sum / sorted.length * 1000) / 1000, - p50_ms: Math.round(percentile(sorted, 0.50) * 1000) / 1000, - p95_ms: Math.round(percentile(sorted, 0.95) * 1000) / 1000, - p99_ms: Math.round(percentile(sorted, 0.99) * 1000) / 1000, - min_ms: Math.round(sorted[0] * 1000) / 1000, - max_ms: Math.round(sorted[sorted.length - 1] * 1000) / 1000, - }; - }; - - const summary = { - sandbox_status: 'ok', - cold_start_ms: Math.round(cold_start_ms * 1000) / 1000, - task_count: TASK_COUNT, - warmup_tasks: WARMUP_TASKS, - // Per-task-type latency stats - navigate: computeStats(navigate_ms), - evaluate: computeStats(evaluate_ms), - fill: computeStats(fill_ms), - click: computeStats(click_ms), - screenshot: computeStats(screenshot_ms), - // Pooled interaction latency (all types) - interaction: computeStats(interaction_ms), - // Memory - rss_start_mb: memStart.rss_mb, - rss_end_mb: memEnd.rss_mb, - rss_growth_mb: memEnd.rss_mb != null && memStart.rss_mb != null - ? Math.round((memEnd.rss_mb - memStart.rss_mb) * 100) / 100 - : null, - }; - - // Print JSON to stdout — orchestrator parses this - console.log(JSON.stringify(summary)); -} - -runBenchmark().catch((e) => { - console.log(JSON.stringify({ - sandbox_status: 'error', - error: `${e.name}: ${e.message}`, - })); - process.exit(1); -}); diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py index 297b06758f..b2d31e026b 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_deploy_utils.py @@ -70,11 +70,7 @@ "Timeout in seconds for workload deployment rollout.", ) -flags.DEFINE_bool( - "skip_image_build", - False, - "Skip container image builds during Prepare.", -) + # Module-level derived images (set during DeployWorkloads) @@ -131,6 +127,12 @@ def _RenderAndApply(template_name, **kwargs): "Set to True on non-GKE clusters where pod snapshots are not supported.", ) +flags.DEFINE_string( + "k8s_snapshot_ksa_name", + "pod-snapshot-sa", + "Kubernetes service account for pod snapshots.", +) + # --------------------------------------------------------------------------- # Public API @@ -175,6 +177,7 @@ def DeployWorkloads(benchmark_spec=None): region = "" machine_type = "" cluster_name = "" + cluster = None if benchmark_spec: cluster = getattr(benchmark_spec, 'container_cluster', None) if cluster: @@ -197,31 +200,36 @@ def DeployWorkloads(benchmark_spec=None): zone = getattr(FLAGS, 'zone', '') or '' region = zone[:-2] if zone else '' - # Build images if requested - # Detect architecture and derive image paths + # Derive image paths for template rendering. + # Chrome and Router images are built during prerequisites + # (gke_prerequisites.py), not during Prepare. + # ADK agent image is built by PKB container_specs during Provision. from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import ( gke_image_build_utils, ) - zone = cluster.zone if cluster else FLAGS.zone - arch = gke_image_build_utils._DetectArchitecture(machine_type, zone, project) - + arch = FLAGS.target_arch or "amd64" global _derived_images _derived_images = _DeriveImagePaths(project, region, arch) - - if not FLAGS.skip_image_build: - gke_image_build_utils.build_images_with_config( - project=project, - region=region, - machine_type=machine_type, - zone=zone, - arch=arch, - ) + logging.info( + "DeployWorkloads: project=%s region=%s arch=%s", + project, region, arch, + ) + logging.info("_derived_images: %s", _derived_images) _CreateNamespace(ns) _InstallCRDs() _DeploySandboxTemplates(ns) _DeploySandboxRouter(ns) - _DeployADKAgent(ns, project=project, region=region, cluster_name=cluster_name) + # Prefer ADK image from PKB-native container_specs (built during Provision). + # Falls back to FLAGS.k8s_agent_image or derived image path. + adk_image_from_specs = "" + if benchmark_spec: + specs = getattr(benchmark_spec, "container_specs", {}) + adk_spec = specs.get("adk_agent") + if adk_spec and getattr(adk_spec, "image", None): + adk_image_from_specs = adk_spec.image + logging.info("Using ADK image from container_specs: %s", adk_image_from_specs) + _DeployADKAgent(ns, project=project, region=region, cluster_name=cluster_name, adk_image_override=adk_image_from_specs) _DeployPSIReader(ns) _WaitForAgentReady(ns) @@ -365,13 +373,29 @@ def _DeploySandboxRouter(ns): ) -def _DeployADKAgent(ns, project="", region="", cluster_name=""): +def _DeployADKAgent(ns, project="", region="", cluster_name="", adk_image_override=""): """Deploy ADK Agent: SA, ClusterRole, RoleBinding, Deployment, Service.""" - adk_image = FLAGS.k8s_agent_image or _derived_images.get("adk_agent", "") + adk_image = adk_image_override or FLAGS.k8s_agent_image or _derived_images.get("adk_agent", "") + + # Validate the image looks like a registry path, not a Dockerfile path. + # When Prepare runs separately from Provision, container_specs may not + # have the built image path. The config YAML default (agentic/adk-agent) + # is the Dockerfile lookup path, not a valid registry reference. + if adk_image and "docker.pkg.dev" not in adk_image: + derived = _derived_images.get("adk_agent", "") + if derived: + logging.warning( + "ADK image %s is not a registry path. Using derived: %s", + adk_image, derived, + ) + adk_image = derived + if not adk_image: logging.info("ADK agent image not set, skipping agent deployment.") return + logging.info("Using ADK image: %s", adk_image) + project = project or "" region = region or "" cluster = cluster_name or "" @@ -392,14 +416,17 @@ def _DeployPSIReader(ns): def _WaitForAgentReady(ns): - """Wait for ADK agent deployment to be ready.""" - adk_image = FLAGS.k8s_agent_image - if not adk_image: - logging.info("ADK agent not deployed, skipping rollout wait.") - return + """Wait for ADK agent deployment to be ready. + + Always attempts the rollout wait regardless of how the image was + specified (FLAGS.k8s_agent_image, container_specs, or _derived_images). + kubectl rollout status returns non-zero harmlessly if the deployment + does not exist, and raise_on_failure=False prevents that from + propagating. + """ timeout = FLAGS.k8s_deploy_timeout logging.info("Waiting for adk-agent rollout (timeout=%ds)...", timeout) - kubectl.RunKubectlCommand( + _, stderr, retcode = kubectl.RunKubectlCommand( [ "rollout", "status", "deployment/adk-agent", "-n", ns, @@ -407,6 +434,11 @@ def _WaitForAgentReady(ns): ], raise_on_failure=False, ) + if retcode != 0: + logging.warning( + "adk-agent rollout status returned %d: %s", + retcode, stderr.strip()[:200], + ) def _GetProjectNumber(project): diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py index a339af8022..750ae05988 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py @@ -1,14 +1,15 @@ """Shared image build utilities for GKE Agent Sandbox benchmarks. -Builds and pushes container images (ADK agent, Chrome sandbox, Sandbox Router) -via Google Cloud Build. Called from: - - Provision() when --gke_skip_image_build is False (via BuildImages()) - - prerequisite_setup.py (via build_images_with_config()) +Builds and pushes container images (Chrome sandbox, Sandbox Router) via +Google Cloud Build. Called from gke_deploy_utils.DeployWorkloads() during +the Prepare stage. + +NOTE: The ADK Agent image is built by the PKB native container_specs +mechanism during the Provision stage, not by this module. Images built: - - ADK Agent: perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/ -> {region}-docker.pkg.dev/{project}/adk-repo/adk-agent:{arch} - - Chrome Sandbox: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/chrome-sandbox:{arch} - - Sandbox Router: cloned from agent-sandbox repo -> {region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{arch} + - Chrome Sandbox: cloned from agent-sandbox repo + - Sandbox Router: cloned from agent-sandbox repo """ import logging @@ -18,6 +19,7 @@ import tempfile from absl import flags +from perfkitbenchmarker import vm_util FLAGS = flags.FLAGS @@ -25,7 +27,6 @@ logger = logging.getLogger(__name__) - # --------------------------------------------------------------------------- # Architecture detection # --------------------------------------------------------------------------- @@ -95,18 +96,17 @@ def _DetectArchitecture(machine_type, zone, project): return "amd64" -def build_images_with_config(project, region, machine_type, zone, arch, cloud_build_sa=None): +def build_images_with_config(project, region, machine_type, zone, arch): """Core image build logic — no FLAGS dependency. Callable from both PKB (via BuildImages()) and prerequisite_setup.py. + Uses the project's default Cloud Build SA (no custom SA needed). Args: project: GCP project ID. region: GCP region (e.g. "us-central1"). machine_type: Machine type string (e.g. "c4-standard-8"). Used to derive target architecture (arm64 for c4a, amd64 otherwise). - cloud_build_sa: Cloud Build service account email. - If None, defaults to "adk-cloud-build-sa@{project}.iam.gserviceaccount.com". """ # Architecture passed in from caller (detected via gcloud) target_arch = arch @@ -120,33 +120,20 @@ def build_images_with_config(project, region, machine_type, zone, arch, cloud_bu f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{target_arch}" ) - # Cloud Build SA - if cloud_build_sa is None: - cloud_build_sa = f"adk-cloud-build-sa@{project}.iam.gserviceaccount.com" - logger.info("=== Building Container Images ===") + logger.info("=== Building Container Images (Chrome + Router only) ===") logger.info(" Project: %s", project) logger.info(" Region: %s", region) logger.info(" Architecture: %s", target_arch) - logger.info(" Cloud Build SA: %s", cloud_build_sa) - - # 1. Build ADK Agent - _BuildADKAgentImage( - project=project, - region=region, - target_arch=target_arch, - image_path=adk_image, - cloud_build_sa=cloud_build_sa, - machine_type=machine_type, - ) + logger.info(" Cloud Build SA: default (project Cloud Build SA)") + logger.info(" NOTE: ADK Agent image is built by PKB via container_specs") - # 2. Build Chrome Sandbox + # 1. Build Chrome Sandbox _BuildChromeSandboxImage( project=project, region=region, target_arch=target_arch, image_path=chrome_image, - cloud_build_sa=cloud_build_sa, ) # 3. Build Sandbox Router @@ -155,86 +142,19 @@ def build_images_with_config(project, region, machine_type, zone, arch, cloud_bu region=region, target_arch=target_arch, image_path=router_image, - cloud_build_sa=cloud_build_sa, ) - logger.info("=== All images built successfully ===") - logger.info(" ADK Agent: %s", adk_image) + logger.info("=== Chrome + Router images built successfully ===") logger.info(" Chrome Sandbox: %s", chrome_image) logger.info(" Sandbox Router: %s", router_image) - - -def BuildImages(): - """FLAGS-based entry point. - - Reads configuration from native PKB FLAGS. - Delegates to build_images_with_config() for the actual work. - """ - project = getattr(FLAGS, 'project', '') or '' - zone = getattr(FLAGS, 'zone', '') or '' - region = zone[:-2] if zone else '' - machine_type = getattr(FLAGS, 'machine_type', '') or '' - build_images_with_config( - project=project, - region=region, - machine_type=machine_type, - ) + logger.info(" (ADK Agent built by PKB via container_specs)") # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- - -def _BuildADKAgentImage( - project, region, target_arch, image_path, cloud_build_sa, machine_type=None -): - """Build and push the ADK Agent image. - - Uses the existing perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml with --substitutions - rather than generating a new one (avoids overwriting the committed file). - """ - logger.info("Building ADK Agent image: %s", image_path) - - # Locate the agent source directory - # Expected layout: repo_root/perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/ - repo_root = _FindRepoRoot() - agent_dir = os.path.join(repo_root, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent") - - if not os.path.isdir(agent_dir): - raise RuntimeError( - f"ADK agent source not found at {agent_dir}. " - "Ensure you are running from the repository root." - ) - - # Generate generated.env from template - _GenerateEnvFile(agent_dir, project, region, machine_type=machine_type) - - # Use the existing cloudbuild.yaml with substitutions (don't overwrite) - cloudbuild_path = os.path.join(agent_dir, "cloudbuild.yaml") - if not os.path.isfile(cloudbuild_path): - raise RuntimeError( - f"cloudbuild.yaml not found at {cloudbuild_path}. " - "Expected perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/cloudbuild.yaml to exist." - ) - - _RunCmd( - [ - "gcloud", - "builds", - "submit", - agent_dir, - f"--config={cloudbuild_path}", - f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}", - f"--project={project}", - f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}", - ] - ) - - logger.info("ADK Agent image built successfully.") - - -def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_build_sa): +def _BuildChromeSandboxImage(project, region, target_arch, image_path): """Build and push the Chrome Sandbox image.""" logger.info("Building Chrome Sandbox image: %s", image_path) @@ -280,7 +200,6 @@ def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_bui image_path=image_path, target_arch=target_arch, project=project, - cloud_build_sa=cloud_build_sa, ) logger.info("Chrome Sandbox image built successfully.") @@ -288,7 +207,7 @@ def _BuildChromeSandboxImage(project, region, target_arch, image_path, cloud_bui shutil.rmtree(tmp_dir, ignore_errors=True) -def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_build_sa): +def _BuildSandboxRouterImage(project, region, target_arch, image_path): """Build and push the Sandbox Router image.""" logger.info("Building Sandbox Router image: %s", image_path) @@ -330,7 +249,6 @@ def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_bui image_path=image_path, target_arch=target_arch, project=project, - cloud_build_sa=cloud_build_sa, ) logger.info("Sandbox Router image built successfully.") @@ -338,60 +256,11 @@ def _BuildSandboxRouterImage(project, region, target_arch, image_path, cloud_bui shutil.rmtree(tmp_dir, ignore_errors=True) -def _GenerateEnvFile( - agent_dir, project, region, machine_type=None, namespace="agentic" -): - """Render generated.env from template with current config values.""" - template_path = os.path.join(agent_dir, "generated.env.template") - output_path = os.path.join(agent_dir, "generated.env") - - if not os.path.isfile(template_path): - logger.warning( - "generated.env.template not found at %s, skipping.", template_path - ) - return - - with open(template_path, "r") as f: - content = f.read() - - # Derive cluster name - machine_family = machine_type.split("-")[0] if machine_type else "c4" - suffix_map = {"c3": "c3metal", "c4": "c4", "c4d": "c4d", "c4a": "c4a"} - cluster_suffix = suffix_map.get(machine_family, "c4") - - # Get username prefix for cluster name - user = os.environ.get("USER", "benchmark") - user_prefix = user.split(".")[0] if "." in user else user - cluster_name = f"{user_prefix}-agentic-{cluster_suffix}" - - # Substitute variables - replacements = { - "${CLUSTER_NAME}": cluster_name, - "${GOOGLE_CLOUD_PROJECT}": project, - "${GOOGLE_CLOUD_LOCATION}": region, - "${AGENTIC_NAMESPACE}": namespace, - "${GOOGLE_GENAI_USE_VERTEXAI}": "true", - "${SANDBOX_ROUTER_URL}": f"http://sandbox-router-svc.{namespace}.svc.cluster.local:8080", - "${SAMPLE_COUNT}": "20", - "${SAMPLE_WARMUP}": "0", - "${PAYLOAD_SIZE_MB}": "1", - "${PAYLOAD_ITERATIONS}": "20", - } - - for key, value in replacements.items(): - content = content.replace(key, value) - - with open(output_path, "w") as f: - f.write(content) - - logger.info("Generated %s", output_path) - - -def _SubmitCloudBuild(source_dir, image_path, target_arch, project, cloud_build_sa): +def _SubmitCloudBuild(source_dir, image_path, target_arch, project): """Generate a cloudbuild.yaml with substitutions and submit via Cloud Build. Used for Chrome and Router images (built in temp directories). - The ADK agent uses its own committed cloudbuild.yaml instead. + Uses the project's default Cloud Build SA. """ cloudbuild_content = """steps: - name: 'gcr.io/cloud-builders/docker' @@ -419,36 +288,10 @@ def _SubmitCloudBuild(source_dir, image_path, target_arch, project, cloud_build_ f"--config={cloudbuild_path}", f"--substitutions=_IMAGE_PATH={image_path},_PLATFORM=linux/{target_arch}", f"--project={project}", - f"--service-account=projects/{project}/serviceAccounts/{cloud_build_sa}", ] ) -def _FindRepoRoot(): - """Find the repository root by looking for known markers.""" - # Try relative to this file - this_dir = os.path.dirname(os.path.abspath(__file__)) - # Expected: perfkitbenchmarker/linux_benchmarks/ -> go up 2 levels - candidate = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(this_dir)))) - if os.path.isdir(os.path.join(candidate, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")): - return candidate - - # Try CWD - cwd = os.getcwd() - if os.path.isdir(os.path.join(cwd, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")): - return cwd - - # Try parent of CWD - parent = os.path.dirname(cwd) - if os.path.isdir(os.path.join(parent, "perfkitbenchmarker", "data", "k8s_agents", "workloads", "adk_agent")): - return parent - - raise RuntimeError( - "Cannot locate repository root (looking for perfkitbenchmarker/data/k8s_agents/workloads/adk_agent/). " - "Run from the repository root directory." - ) - - def _RunCmd(cmd, cwd=None): """Run a shell command, raising on failure.""" logger.info(" CMD: %s", " ".join(cmd)) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py index 49e04bb83d..1bae7b41d4 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_post_teardown.py @@ -27,17 +27,31 @@ def _run(cmd, check=False, timeout=300): return result -def teardown_cloud_build_sa(project_id): - logger.info("=== Deleting Cloud Build SA ===") - sa_email = f"adk-cloud-build-sa@{project_id}.iam.gserviceaccount.com" +def revoke_cloudbuild_sa_permissions(project_id): + """Revoke extra IAM roles from Cloud Build SA(s). + + Mirrors grant_cloudbuild_sa_permissions() from gke_prerequisites.py. + Revokes roles from both possible SAs. Does NOT delete them + (they are project-managed). + """ + logger.info("=== Revoking extra permissions from Cloud Build SA(s) ===") + result = _run(["gcloud", "projects", "describe", project_id, + "--format=value(projectNumber)"]) + project_number = result.stdout.strip() + if not project_number: + logger.warning("Could not determine project number, skipping SA cleanup") + return + sa_emails = [ + f"{project_number}@cloudbuild.gserviceaccount.com", + f"{project_number}-compute@developer.gserviceaccount.com", + ] roles = ["roles/logging.logWriter", "roles/storage.objectViewer", "roles/artifactregistry.writer", "roles/serviceusage.serviceUsageConsumer"] - for role in roles: - _run(["gcloud", "projects", "remove-iam-policy-binding", project_id, - f"--member=serviceAccount:{sa_email}", f"--role={role}", "--quiet"]) - _run(["gcloud", "iam", "service-accounts", "delete", sa_email, - f"--project={project_id}", "--quiet"]) - logger.info("Cloud Build SA deleted.") + for sa_email in sa_emails: + for role in roles: + _run(["gcloud", "projects", "remove-iam-policy-binding", project_id, + f"--member=serviceAccount:{sa_email}", f"--role={role}", "--quiet"]) + logger.info("Cloud Build SA extra permissions revoked.") def teardown_snapshot_bucket(project_id, region): @@ -52,7 +66,11 @@ def teardown_snapshot_bucket(project_id, region): def teardown_images(project_id, region): logger.info("=== Deleting AR repos ===") - for repo in ["adk-repo", "agent-sandbox"]: + # "adk-repo" is created/deleted by PKB container_registry lifecycle + # (Provision creates it, Teardown deletes it). If you skip PKB Teardown, + # run: gcloud artifacts repositories delete adk-repo --location= + # Only "agent-sandbox" (Chrome + Router images) needs manual cleanup here. + for repo in ["agent-sandbox"]: _run(["gcloud", "artifacts", "repositories", "delete", repo, f"--location={region}", f"--project={project_id}", "--quiet"]) logger.info("AR repos deleted.") @@ -65,7 +83,7 @@ def main(): p.add_argument("--keep_images", action="store_true", help="Skip AR repo deletion") p.add_argument("--keep_bucket", action="store_true", help="Skip snapshot bucket deletion") args = p.parse_args() - teardown_cloud_build_sa(args.project_id) + revoke_cloudbuild_sa_permissions(args.project_id) if not args.keep_bucket: teardown_snapshot_bucket(args.project_id, args.region) if not args.keep_images: diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py index 9c45f02449..72c32d5b1f 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_prerequisites.py @@ -55,7 +55,10 @@ def enable_apis(project_id): def create_artifact_registry(project_id, region): logger.info("=== Creating Artifact Registry Repos ===") - for repo in ["adk-repo", "agent-sandbox"]: + # "adk-repo" is no longer needed here -- PKB creates its own AR repo + # via container_registry during the Provision stage. + # Only "agent-sandbox" is needed for Chrome/Router images. + for repo in ["agent-sandbox"]: if _exists(["gcloud", "artifacts", "repositories", "describe", repo, f"--location={region}", f"--project={project_id}"]): logger.info("AR repo %s already exists.", repo) @@ -66,39 +69,112 @@ def create_artifact_registry(project_id, region): logger.info("AR repo %s created.", repo) -def create_cloud_build_sa(project_id): - logger.info("=== Creating Cloud Build SA ===") - sa_name = "adk-cloud-build-sa" - sa_email = f"{sa_name}@{project_id}.iam.gserviceaccount.com" - if not _exists(["gcloud", "iam", "service-accounts", "describe", - sa_email, f"--project={project_id}"]): - _run(["gcloud", "iam", "service-accounts", "create", sa_name, - f"--display-name={sa_name}", f"--project={project_id}"]) - logger.info("SA %s created. Waiting for propagation...", sa_email) - time.sleep(10) - else: - logger.info("SA %s already exists.", sa_email) +def grant_cloudbuild_sa_permissions(project_id): + """Grant required IAM roles to the Cloud Build service account(s). + + Auto-detects which SA Cloud Build uses in this project: + - Legacy projects: {number}@cloudbuild.gserviceaccount.com + - Newer projects: {number}-compute@developer.gserviceaccount.com + + Grants permissions to both SAs to ensure compatibility regardless + of project configuration. This is idempotent and safe. + """ + logger.info("=== Granting permissions to Cloud Build SA(s) ===") + result = _run(["gcloud", "projects", "describe", project_id, + "--format=value(projectNumber)"]) + project_number = result.stdout.strip() + if not project_number: + logger.error("Could not determine project number for %s", project_id) + return + + # Both possible Cloud Build SAs + cloudbuild_sa = f"{project_number}@cloudbuild.gserviceaccount.com" + compute_sa = f"{project_number}-compute@developer.gserviceaccount.com" + + # Detect which SA(s) exist + sa_emails = [] + for sa in [cloudbuild_sa, compute_sa]: + if _exists(["gcloud", "iam", "service-accounts", "describe", + sa, f"--project={project_id}"]): + sa_emails.append(sa) + logger.info("Found Cloud Build SA: %s", sa) + else: + logger.info("SA not found (skipping): %s", sa) + + if not sa_emails: + logger.error("No Cloud Build SA found in project %s", project_id) + return + roles = [ "roles/logging.logWriter", "roles/storage.objectViewer", "roles/artifactregistry.writer", "roles/serviceusage.serviceUsageConsumer", ] - for role in roles: - _run(["gcloud", "projects", "add-iam-policy-binding", project_id, - f"--member=serviceAccount:{sa_email}", - f"--role={role}", "--condition=None", "--quiet"], check=False) - logger.info("Cloud Build SA roles bound.") - + for sa_email in sa_emails: + logger.info("Granting roles to %s", sa_email) + for role in roles: + _run(["gcloud", "projects", "add-iam-policy-binding", project_id, + f"--member=serviceAccount:{sa_email}", + f"--role={role}", "--condition=None", "--quiet"], check=False) + logger.info("Cloud Build SA permissions granted.") + + + + +def build_sandbox_images(project_id, region, target_arch): + """Build Chrome Sandbox and Sandbox Router images via Cloud Build.""" + logger.info("=== Building Sandbox Images (arch=%s) ===", target_arch) + from perfkitbenchmarker.linux_benchmarks.kubernetes.agentic import gke_image_build_utils + + chrome_image = ( + f"{region}-docker.pkg.dev/{project_id}/agent-sandbox/chrome-sandbox:{target_arch}" + ) + router_image = ( + f"{region}-docker.pkg.dev/{project_id}/agent-sandbox/sandbox-router:{target_arch}" + ) + + gke_image_build_utils._BuildChromeSandboxImage( + project=project_id, + region=region, + target_arch=target_arch, + image_path=chrome_image, + ) + + gke_image_build_utils._BuildSandboxRouterImage( + project=project_id, + region=region, + target_arch=target_arch, + image_path=router_image, + ) + + logger.info("Sandbox images built successfully.") + logger.info(" Chrome: %s", chrome_image) + logger.info(" Router: %s", router_image) def main(): p = argparse.ArgumentParser(description="GKE Agentic Benchmark Prerequisites") p.add_argument("--project_id", required=True, help="GCP project ID") p.add_argument("--region", default="us-central1", help="GCP region") + p.add_argument( + "--target_arch", + required=True, + choices=["amd64", "arm64"], + help="Target CPU architecture for container images (amd64 or arm64)", + ) + p.add_argument( + "--skip_image_build", + action="store_true", + help="Skip Chrome and Router image builds (images already in registry)", + ) args = p.parse_args() enable_apis(args.project_id) create_artifact_registry(args.project_id, args.region) - create_cloud_build_sa(args.project_id) + grant_cloudbuild_sa_permissions(args.project_id) + if not args.skip_image_build: + build_sandbox_images(args.project_id, args.region, args.target_arch) + else: + logger.info("Skipping image builds (--skip_image_build)") print("\nPrerequisite setup complete!") diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py index cb49011b08..c6fa3577bc 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py @@ -78,11 +78,9 @@ "Number of concurrent source/snapshot/restore pods per measurement.", ) -flags.DEFINE_string( - "k8s_snapshot_ksa_name", - "pod-snapshot-sa", - "Kubernetes service account for pod snapshots.", -) +# k8s_snapshot_ksa_name is defined in gke_deploy_utils.py +# (where DeploySnapshots() consumes it) and is available here +# via the deploy_utils import. flags.DEFINE_integer( "k8s_snapshot_pod_timeout", @@ -131,16 +129,23 @@ def Prepare(benchmark_spec): # Deploy Agent Sandbox ecosystem (idempotent) deploy_utils.DeployWorkloads(benchmark_spec) - # Deploy Pod Snapshot infrastructure (idempotent) - # Pod Snapshots are GKE-specific; skip on other platforms - cloud = getattr( - getattr(benchmark_spec, "container_cluster", None), "cloud", "GCP" - ) - if cloud == "GCP" and not FLAGS.skip_deploy_snapshots: + # Deploy Pod Snapshot infrastructure (idempotent). + # Pod Snapshots are GKE-specific; skip on other platforms. + # Only attempt deployment when we have a confirmed GCP cluster + # (avoids surprise failures on pre-existing clusters where + # benchmark_spec.container_cluster may be None). + cluster = getattr(benchmark_spec, "container_cluster", None) + if cluster and getattr(cluster, "cloud", None) == "GCP" and not FLAGS.skip_deploy_snapshots: deploy_utils.DeploySnapshots() - elif cloud != "GCP": + elif not cluster: + logging.info( + "Pod Snapshot infrastructure skipped (no container_cluster in " + "benchmark_spec). Use --skip_deploy_snapshots=False to force." + ) + elif getattr(cluster, "cloud", None) != "GCP": logging.info( - "Pod Snapshot infrastructure skipped (cloud=%s, GKE required).", cloud + "Pod Snapshot infrastructure skipped (cloud=%s, GKE required).", + getattr(cluster, "cloud", "unknown"), ) # 1. Verify PodSnapshotStorageConfig exists (cluster-scoped). From 0ca3cdd8e2d6c942021f6fa444a9252ebd285cef Mon Sep 17 00:00:00 2001 From: George Kalisse <20505232+george-kalisse-sada@users.noreply.github.com> Date: Fri, 3 Jul 2026 14:23:47 -0400 Subject: [PATCH 5/5] multiple fixes --- .../agentic/adk-agent/cloudbuild-arm64.yaml | 20 +++++++ .../agentic/gke_image_build_utils.py | 55 ++++++++++++++++--- .../agentic/k8s_chromium_density_benchmark.py | 1 + .../agentic/k8s_deletion_benchmark.py | 1 + .../agentic/k8s_payload_benchmark.py | 1 + .../agentic/k8s_python_density_benchmark.py | 1 + .../kubernetes/agentic/k8s_qps_benchmark.py | 1 + .../agentic/k8s_snapshot_benchmark.py | 1 + .../agentic/k8s_warmpool_benchmark.py | 1 + .../providers/gcp/google_kubernetes_engine.py | 25 ++++++--- 10 files changed, 93 insertions(+), 14 deletions(-) create mode 100644 perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml diff --git a/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml b/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml new file mode 100644 index 0000000000..653f07fcf8 --- /dev/null +++ b/perfkitbenchmarker/data/docker/agentic/adk-agent/cloudbuild-arm64.yaml @@ -0,0 +1,20 @@ +# Cloud Build config for cross-compiling to ARM64. +# Used by PKB when --container_remote_build_config points to this file. +# The _IMAGE substitution is passed by PKB RemoteBuild() automatically. +steps: + - name: 'gcr.io/cloud-builders/docker' + args: ['run', '--privileged', 'multiarch/qemu-user-static', '--reset', '-p', 'yes'] + id: 'qemu-setup' + - name: 'gcr.io/cloud-builders/docker' + args: ['buildx', 'create', '--use', '--name', 'multiarch-builder'] + id: 'create-builder' + waitFor: ['qemu-setup'] + - name: 'gcr.io/cloud-builders/docker' + args: ['buildx', 'build', '--platform', 'linux/arm64', '-t', '${_IMAGE}', '--push', '.'] + id: 'build-and-push' + waitFor: ['create-builder'] +options: + logging: CLOUD_LOGGING_ONLY + machineType: E2_HIGHCPU_32 +substitutions: + _IMAGE: '' diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py index 750ae05988..2e976207f5 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/gke_image_build_utils.py @@ -68,7 +68,10 @@ def _DetectArchitecture(machine_type, zone, project): try: stdout, _, retcode = vm_util.IssueCommand( [ - "gcloud", "compute", "machine-types", "describe", + "gcloud", + "compute", + "machine-types", + "describe", machine_type, f"--zone={zone}", f"--project={project}", @@ -83,15 +86,20 @@ def _DetectArchitecture(machine_type, zone, project): if docker_arch: logging.info( "Detected architecture for %s: %s -> %s", - machine_type, gcp_arch, docker_arch, + machine_type, + gcp_arch, + docker_arch, ) return docker_arch logging.warning( "Unknown GCP architecture '%s' for %s. Falling back to amd64.", - gcp_arch, machine_type, + gcp_arch, + machine_type, ) except Exception as e: - logging.warning("gcloud machine-type describe failed: %s. Falling back to amd64.", e) + logging.warning( + "gcloud machine-type describe failed: %s. Falling back to amd64.", e + ) return "amd64" @@ -120,7 +128,6 @@ def build_images_with_config(project, region, machine_type, zone, arch): f"{region}-docker.pkg.dev/{project}/agent-sandbox/sandbox-router:{target_arch}" ) - logger.info("=== Building Container Images (Chrome + Router only) ===") logger.info(" Project: %s", project) logger.info(" Region: %s", region) @@ -154,6 +161,7 @@ def build_images_with_config(project, region, machine_type, zone, arch): # Internal helpers # --------------------------------------------------------------------------- + def _BuildChromeSandboxImage(project, region, target_arch, image_path): """Build and push the Chrome Sandbox image.""" logger.info("Building Chrome Sandbox image: %s", image_path) @@ -261,8 +269,15 @@ def _SubmitCloudBuild(source_dir, image_path, target_arch, project): Used for Chrome and Router images (built in temp directories). Uses the project's default Cloud Build SA. + + For cross-architecture builds (e.g. arm64 on amd64 workers), uses + QEMU emulation + Docker Buildx to produce the target-arch image. + A high-CPU machine type (E2_HIGHCPU_32) is used to offset the + overhead of QEMU instruction translation. """ - cloudbuild_content = """steps: + if target_arch == "amd64": + # Native build — no emulation needed + cloudbuild_content = """steps: - name: 'gcr.io/cloud-builders/docker' args: ['build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '.'] env: @@ -274,6 +289,32 @@ def _SubmitCloudBuild(source_dir, image_path, target_arch, project): substitutions: _IMAGE_PATH: '' _PLATFORM: 'linux/amd64' +""" + else: + # Cross-arch build — QEMU + Buildx required. + # Cloud Build workers are amd64; QEMU registers binfmt handlers + # so the kernel can execute arm64 binaries transparently. + # E2_HIGHCPU_32 provides 32 vCPUs to offset emulation overhead. + # Buildx --push handles the registry push directly, so no + # top-level 'images:' key is needed. + cloudbuild_content = """steps: + - name: 'gcr.io/cloud-builders/docker' + args: ['run', '--privileged', 'multiarch/qemu-user-static', '--reset', '-p', 'yes'] + id: 'qemu-setup' + - name: 'gcr.io/cloud-builders/docker' + args: ['buildx', 'create', '--use', '--name', 'multiarch-builder'] + id: 'create-builder' + waitFor: ['qemu-setup'] + - name: 'gcr.io/cloud-builders/docker' + args: ['buildx', 'build', '--platform', '${_PLATFORM}', '-t', '${_IMAGE_PATH}', '--push', '.'] + id: 'build-and-push' + waitFor: ['create-builder'] +options: + logging: CLOUD_LOGGING_ONLY + machineType: E2_HIGHCPU_32 +substitutions: + _IMAGE_PATH: '' + _PLATFORM: 'linux/amd64' """ cloudbuild_path = os.path.join(source_dir, "cloudbuild.yaml") with open(cloudbuild_path, "w") as f: @@ -302,7 +343,7 @@ def _RunCmd(cmd, cwd=None): capture_output=True, text=True, cwd=cwd, - timeout=600, + timeout=2400, # 40 min: allows for QEMU cross-arch builds env=env, ) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py index 346f59a8b0..bd9114877c 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_chromium_density_benchmark.py @@ -118,6 +118,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" + benchmark_spec.always_call_cleanup = True logging.info("=== Prepare: deploying workloads ===") deploy_utils.DeployWorkloads(benchmark_spec) utils.CheckAgentHealthz(required=False) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py index ddeae29f9d..418b5c1ed9 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_deletion_benchmark.py @@ -115,6 +115,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads onto the cluster.""" + benchmark_spec.always_call_cleanup = True logging.info("=== Prepare: deploying workloads ===") deploy_utils.DeployWorkloads(benchmark_spec) utils.EnsurePortForward() diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py index 9f31aee342..109ab0efe6 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_payload_benchmark.py @@ -135,6 +135,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" + benchmark_spec.always_call_cleanup = True logging.info("=== Prepare: deploying workloads ===") deploy_utils.DeployWorkloads(benchmark_spec) utils.CheckAgentHealthz(required=False) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py index 207fd40a20..7760f23ff7 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_python_density_benchmark.py @@ -128,6 +128,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" + benchmark_spec.always_call_cleanup = True logging.info("=== Prepare: deploying workloads ===") deploy_utils.DeployWorkloads(benchmark_spec) utils.CheckAgentHealthz(required=False) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py index 4528082ad5..feb82c8614 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_qps_benchmark.py @@ -146,6 +146,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads and verify agent API.""" + benchmark_spec.always_call_cleanup = True logging.info("=== Prepare: deploying workloads ===") deploy_utils.DeployWorkloads(benchmark_spec) diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py index c6fa3577bc..8d78c6649b 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_snapshot_benchmark.py @@ -117,6 +117,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads, snapshot infra, and validate readiness.""" + benchmark_spec.always_call_cleanup = True ns = FLAGS.k8s_namespace preload_mb = FLAGS.k8s_snapshot_preload_mb diff --git a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py index 62c6462351..9024f9f28e 100644 --- a/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py +++ b/perfkitbenchmarker/linux_benchmarks/kubernetes/agentic/k8s_warmpool_benchmark.py @@ -122,6 +122,7 @@ def GetConfig(user_config): def Prepare(benchmark_spec): """Deploy workloads onto the cluster.""" + benchmark_spec.always_call_cleanup = True logging.info("=== Prepare: deploying workloads ===") deploy_utils.DeployWorkloads(benchmark_spec) utils.EnsurePortForward() diff --git a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py index c4012faf1a..06d4a295dc 100644 --- a/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py +++ b/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py @@ -102,14 +102,25 @@ def _Delete(self): ).Issue() def RemoteBuild(self, image: container.ContainerImage): - """Builds the image remotely.""" - if not gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value: - full_tag = self.GetFullRegistryTag(image.name) + """Builds the image remotely. + + If --container_remote_build_config is set, uses it as the + --config argument to `gcloud builds submit` and passes the + image tag via --substitutions _IMAGE=. + Otherwise uses the simple --tag shorthand. + """ + full_tag = self.GetFullRegistryTag(image.name) + if gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value: + build_cmd = util.GcloudCommand( + self, 'builds', 'submit', + '--config', gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value, + '--substitutions', f'_IMAGE={full_tag}', + image.directory, + ) else: - full_tag = gcp_flags.CONTAINER_REMOTE_BUILD_CONFIG.value - build_cmd = util.GcloudCommand( - self, 'builds', 'submit', '--tag', full_tag, image.directory - ) + build_cmd = util.GcloudCommand( + self, 'builds', 'submit', '--tag', full_tag, image.directory, + ) build_cmd.Issue(timeout=None)