Move benchmark make target

rgarcia · rgarcia · commit 1c6432119616 · 2026-05-18T15:15:10.000Z
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 SHELL := /bin/bash
-.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build build-linux test test-linux test-darwin test-guestmemory-linux test-guestmemory-vz install-tools gen-jwt download-ch-binaries download-firecracker-binaries download-ch-spec ensure-ch-binaries ensure-firecracker-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean build-embedded bench-activity-ramp
+.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build build-linux test test-linux test-darwin test-guestmemory-linux test-guestmemory-vz install-tools gen-jwt download-ch-binaries download-firecracker-binaries download-ch-spec ensure-ch-binaries ensure-firecracker-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean build-embedded
 
 # Directory where local binaries will be installed
 BIN_DIR ?= $(CURDIR)/bin
@@ -15,17 +15,6 @@ AIR ?= $(BIN_DIR)/air
 WIRE ?= $(BIN_DIR)/wire
 XCADDY ?= $(BIN_DIR)/xcaddy
 TEST_TIMEOUT ?= $(GO_TEST_TIMEOUT)
-K6 ?= k6
-K6_OUT_DIR ?= .bench/k6
-HYPEMAN_BASE_URL ?= http://127.0.0.1:8080
-HYPEMAN_IMAGE ?= docker.io/library/nginx:alpine
-HYPEMAN_BENCH_MAX_VUS ?= 16
-HYPEMAN_BENCH_VU_STEP ?= 1
-HYPEMAN_BENCH_STAGE_DURATION ?= 2m
-HYPEMAN_BENCH_DASHBOARD_PERIOD ?= 120s
-HYPEMAN_HYPERVISOR ?= cloud-hypervisor
-HYPEMAN_INGRESS_HOST_PORT ?= 80
-HYPEMAN_CREATE_REJECTED_BACKOFF_SECONDS ?= 1
 
 # Install oapi-codegen (pinned to match committed generated code)
 $(OAPI_CODEGEN): | $(BIN_DIR)
@@ -300,35 +289,6 @@ test-linux: ensure-ch-binaries ensure-firecracker-binaries ensure-caddy-binaries
 			go test -tags containers_image_openpgp $$VERBOSE_FLAG -timeout=$(TEST_TIMEOUT) ./...; \
 	fi
 
-bench-activity-ramp:
-	@if ! command -v $(K6) >/dev/null 2>&1; then \
-		echo "k6 not found; install k6 or run with K6=/path/to/k6"; \
-		exit 1; \
-	fi
-	@if [ -z "$$HYPEMAN_API_KEY" ]; then \
-		echo "HYPEMAN_API_KEY is required"; \
-		exit 1; \
-	fi
-	@mkdir -p $(K6_OUT_DIR)
-	K6_WEB_DASHBOARD=true \
-	K6_WEB_DASHBOARD_PORT=-1 \
-	K6_WEB_DASHBOARD_PERIOD=$(HYPEMAN_BENCH_DASHBOARD_PERIOD) \
-	K6_WEB_DASHBOARD_EXPORT=$(K6_OUT_DIR)/activity-ramp.html \
-	$(K6) run \
-		--summary-mode=full \
-		--summary-trend-stats="avg,med,p(90),p(95),p(99),min,max" \
-		--summary-export=$(K6_OUT_DIR)/activity-ramp-summary.json \
-		-e HYPEMAN_BASE_URL="$(HYPEMAN_BASE_URL)" \
-		-e HYPEMAN_API_KEY="$$HYPEMAN_API_KEY" \
-		-e HYPEMAN_IMAGE="$(HYPEMAN_IMAGE)" \
-		-e HYPEMAN_HYPERVISOR="$(HYPEMAN_HYPERVISOR)" \
-		-e HYPEMAN_BENCH_MAX_VUS="$(HYPEMAN_BENCH_MAX_VUS)" \
-		-e HYPEMAN_BENCH_VU_STEP="$(HYPEMAN_BENCH_VU_STEP)" \
-		-e HYPEMAN_BENCH_STAGE_DURATION="$(HYPEMAN_BENCH_STAGE_DURATION)" \
-		-e HYPEMAN_INGRESS_HOST_PORT="$(HYPEMAN_INGRESS_HOST_PORT)" \
-		-e HYPEMAN_CREATE_REJECTED_BACKOFF_SECONDS="$(HYPEMAN_CREATE_REJECTED_BACKOFF_SECONDS)" \
-		benchmarks/k6/activity-ramp.ts
-
 # macOS tests (no sudo needed, adds e2fsprogs to PATH)
 # Uses 'go list' to discover compilable packages, then filters out packages
 # whose test files reference Linux-only symbols (network, devices, system/init).
diff --git a/benchmarks/Makefile b/benchmarks/Makefile
@@ -0,0 +1,46 @@
+SHELL := /bin/bash
+
+.PHONY: bench-activity-ramp
+
+REPO_ROOT := $(abspath $(CURDIR)/..)
+
+K6 ?= k6
+K6_OUT_DIR ?= $(REPO_ROOT)/.bench/k6
+HYPEMAN_BASE_URL ?= http://127.0.0.1:8080
+HYPEMAN_IMAGE ?= docker.io/library/nginx:alpine
+HYPEMAN_BENCH_MAX_VUS ?= 16
+HYPEMAN_BENCH_VU_STEP ?= 1
+HYPEMAN_BENCH_STAGE_DURATION ?= 2m
+HYPEMAN_BENCH_DASHBOARD_PERIOD ?= 120s
+HYPEMAN_HYPERVISOR ?= cloud-hypervisor
+HYPEMAN_INGRESS_HOST_PORT ?= 80
+HYPEMAN_CREATE_REJECTED_BACKOFF_SECONDS ?= 1
+
+bench-activity-ramp:
+	@if ! command -v $(K6) >/dev/null 2>&1; then \
+		echo "k6 not found; install k6 or run with K6=/path/to/k6"; \
+		exit 1; \
+	fi
+	@if [ -z "$$HYPEMAN_API_KEY" ]; then \
+		echo "HYPEMAN_API_KEY is required"; \
+		exit 1; \
+	fi
+	@mkdir -p $(K6_OUT_DIR)
+	K6_WEB_DASHBOARD=true \
+	K6_WEB_DASHBOARD_PORT=-1 \
+	K6_WEB_DASHBOARD_PERIOD=$(HYPEMAN_BENCH_DASHBOARD_PERIOD) \
+	K6_WEB_DASHBOARD_EXPORT=$(K6_OUT_DIR)/activity-ramp.html \
+	$(K6) run \
+		--summary-mode=full \
+		--summary-trend-stats="avg,med,p(90),p(95),p(99),min,max" \
+		--summary-export=$(K6_OUT_DIR)/activity-ramp-summary.json \
+		-e HYPEMAN_BASE_URL="$(HYPEMAN_BASE_URL)" \
+		-e HYPEMAN_API_KEY="$$HYPEMAN_API_KEY" \
+		-e HYPEMAN_IMAGE="$(HYPEMAN_IMAGE)" \
+		-e HYPEMAN_HYPERVISOR="$(HYPEMAN_HYPERVISOR)" \
+		-e HYPEMAN_BENCH_MAX_VUS="$(HYPEMAN_BENCH_MAX_VUS)" \
+		-e HYPEMAN_BENCH_VU_STEP="$(HYPEMAN_BENCH_VU_STEP)" \
+		-e HYPEMAN_BENCH_STAGE_DURATION="$(HYPEMAN_BENCH_STAGE_DURATION)" \
+		-e HYPEMAN_INGRESS_HOST_PORT="$(HYPEMAN_INGRESS_HOST_PORT)" \
+		-e HYPEMAN_CREATE_REJECTED_BACKOFF_SECONDS="$(HYPEMAN_CREATE_REJECTED_BACKOFF_SECONDS)" \
+		k6/activity-ramp.ts
diff --git a/benchmarks/k6/README.md b/benchmarks/k6/README.md
@@ -15,7 +15,7 @@ The default ramp increases concurrency by one virtual user every two minutes up
 
 ```sh
 export HYPEMAN_API_KEY=...
-make bench-activity-ramp \
+make -C benchmarks bench-activity-ramp \
   HYPEMAN_BASE_URL=http://127.0.0.1:8080 \
   HYPEMAN_IMAGE=docker.io/library/nginx:alpine \
   HYPEMAN_HYPERVISOR=cloud-hypervisor \
diff --git a/benchmarks/k6/activity-ramp.ts b/benchmarks/k6/activity-ramp.ts
@@ -5,6 +5,16 @@ import { Counter, Rate, Trend } from 'k6/metrics';
 
 type Tags = Record<string, string>;
 
+// k6 runs this file in a few phases:
+//
+// 1. The module top level runs once per virtual user (VU). Put metrics,
+//    options, and helper definitions here.
+// 2. setup() runs once before load starts. We use it to verify the Hypeman API,
+//    ensure the image exists, and create the shared pattern ingress.
+// 3. The default function is the workload. k6 calls it repeatedly in every VU
+//    while the ramping-vus scenario is active.
+// 4. teardown() runs once after load stops. It removes any instances tagged
+//    with this benchmark run ID.
 interface Config {
   baseUrl: string;
   apiKey: string;
@@ -33,12 +43,17 @@ interface Config {
   imageReadyTimeoutSeconds: number;
 }
 
+// Trend metrics store latency distributions. Passing true tells k6 these values
+// are durations in milliseconds, so summaries and dashboards format them as time.
 const createMs = new Trend('hypeman_create_instance_ms', true);
 const waitRunningMs = new Trend('hypeman_wait_running_ms', true);
 const probeReadyMs = new Trend('hypeman_probe_ready_ms', true);
 const probeHTTPMs = new Trend('hypeman_probe_http_ms', true);
 const deleteMs = new Trend('hypeman_delete_instance_ms', true);
 const activityMs = new Trend('hypeman_activity_total_ms', true);
+
+// Rate metrics track the fraction of samples that were true. Counter metrics
+// track raw counts. These give us capacity signals alongside latency.
 const activityOk = new Rate('hypeman_activity_ok');
 const cleanupOk = new Rate('hypeman_cleanup_ok');
 const createRejected = new Rate('hypeman_create_rejected');
@@ -51,6 +66,8 @@ export const options = {
   setupTimeout: '15m',
   teardownTimeout: '10m',
   scenarios: {
+    // ramping-vus changes the number of concurrent virtual users over time.
+    // Each active VU loops through the activity until k6 lowers concurrency.
     activity_ramp: {
       executor: 'ramping-vus',
       startVUs: config.startVUs,
@@ -59,12 +76,17 @@ export const options = {
     },
   },
   thresholds: {
+    // Thresholds mark the run failed if cleanup or probe success gets too low.
+    // Create rejections are measured separately because they are the capacity
+    // signal we are trying to find, not a script bug by themselves.
     hypeman_cleanup_ok: ['rate>0.95'],
     hypeman_probe_ok: ['rate>0.80'],
   },
 };
 
 export function setup() {
+  // setup() returns data that k6 passes into every default() iteration.
+  // The run ID is shared so all VUs use the same cleanup tag.
   checkRequiredConfig(config);
   ensureHealthy();
   ensureImageReady(config.image);
@@ -76,6 +98,9 @@ export function setup() {
 }
 
 export default function (data: { runId: string }) {
+  // One iteration is one full user-facing activity:
+  // create -> wait for Running -> send one HTTP probe -> delete.
+  // k6 repeats this loop in each VU for as long as that VU is scheduled.
   const iterationStart = Date.now();
   const instanceName = instanceNameFor(data.runId);
   const tags: Tags = {
@@ -90,6 +115,8 @@ export default function (data: { runId: string }) {
   try {
     created = createInstance(instanceName, tags);
     if (!created) {
+      // A false return means Hypeman rejected the create due to capacity. The
+      // rejection was already counted, so this VU ends the iteration quietly.
       return;
     }
     waitForRunning(instanceName, tags);
@@ -106,6 +133,7 @@ export default function (data: { runId: string }) {
 }
 
 export function teardown(data: { runId: string }) {
+  // Best-effort cleanup handles interrupted iterations or a failed test run.
   cleanupRunInstances(data.runId);
 }
 
@@ -143,6 +171,8 @@ function loadConfig(): Config {
 }
 
 function rampStages(cfg: Config): Array<{ duration: string; target: number }> {
+  // Stages are the k6 ramp plan. With the defaults this produces:
+  // 1 VU start, then 2, 3, 4, ... 16 VUs, spending 2 minutes at each target.
   const stages: Array<{ duration: string; target: number }> = [];
   for (let target = cfg.startVUs + cfg.vuStep; target <= cfg.maxVUs; target += cfg.vuStep) {
     stages.push({ duration: cfg.stageDuration, target });
@@ -172,6 +202,8 @@ function ensureHealthy() {
 }
 
 function ensureImageReady(image: string) {
+  // Hypeman imports images asynchronously. The benchmark should measure
+  // instance lifecycle under load, not image import time, so setup waits here.
   let imageBody = findImage(image);
   if (!imageBody) {
     const res = apiPost('/images', { name: image }, { kind: 'setup', step: 'image-create' });
@@ -211,6 +243,9 @@ function findImage(image: string): { status?: string; error?: string } | null {
 }
 
 function ensurePatternIngress() {
+  // The ingress uses a hostname pattern where {instance} is replaced by each
+  // instance name. That lets all iterations share one ingress instead of
+  // creating and deleting ingress resources inside the hot loop.
   const encoded = encodeURIComponent(config.ingressName);
   const existing = apiGet(`/ingresses/${encoded}`, { kind: 'setup', step: 'ingress-get' });
   if (existing.status === 200) {
@@ -281,6 +316,8 @@ function createInstance(name: string, tags: Tags): boolean {
     return true;
   }
   if (res.status === 409) {
+    // 409 is useful data: it means the server admitted that this concurrency
+    // level is beyond current capacity. Count it without failing the script.
     createRejected.add(true, tags);
     createRejections.add(1, tags);
     sleep(config.createRejectedBackoffSeconds);
@@ -291,6 +328,7 @@ function createInstance(name: string, tags: Tags): boolean {
 }
 
 function waitForRunning(name: string, tags: Tags) {
+  // This measures control-plane latency from accepted create to Running state.
   const started = Date.now();
   const path = `/instances/${encodeURIComponent(name)}`;
   const deadline = started + config.waitTimeoutSeconds * 1000;
@@ -315,6 +353,9 @@ function waitForRunning(name: string, tags: Tags) {
 }
 
 function probeInstance(name: string, tags: Tags) {
+  // The probe goes through the shared ingress URL. The Host header selects the
+  // instance via the pattern ingress, so latency here reflects the data path
+  // through Hypeman into the guest workload.
   const started = Date.now();
   const probeURL = `${config.probeUrl}${config.probePath.startsWith('/') ? config.probePath : `/${config.probePath}`}`;
   const host = `${name}${config.probeHostSuffix}`;
@@ -350,6 +391,7 @@ function deleteInstance(name: string, tags: Tags): boolean {
 }
 
 function cleanupRunInstances(runId: string) {
+  // Query by benchmark tags so teardown only touches instances from this run.
   const query = `tags%5Bbenchmark%5D=activity-ramp&tags%5Brun_id%5D=${encodeURIComponent(runId)}`;
   const res = apiGet(`/instances?${query}`, { kind: 'teardown', step: 'list-run-instances', run_id: runId });
   if (res.status !== 200) {
@@ -417,10 +459,14 @@ function assertStatus(res: RefinedResponse<ResponseType | undefined>, allowed: n
 }
 
 function tagStep(tags: Tags, step: string): Tags {
+  // Tags are attached to k6 metric samples. They make it possible to filter
+  // results by step, hypervisor, run ID, or instance in JSON outputs.
   return { ...tags, step };
 }
 
 function instanceNameFor(runId: string): string {
+  // k6 exposes the current virtual user and iteration through k6/execution.
+  // Including both values keeps names unique even when many VUs run at once.
   const vu = exec.vu.idInTest;
   const iter = exec.scenario.iterationInTest;
   const suffix = `-${vu}-${iter}`;