Skip to content

Commit 531662f

Browse files
committed
refactor(swap_encryption/pr5): thin benchmark + remove duplicate phase logic
- Prepare() uses SwapNodePool + SwapDaemonSet from spec.resources - Cleanup() is empty - PKB framework auto-deletes spec.resources - All _pod_exec(pod, ...) calls replaced with daemonset.PodExec(...) - Remove Phase 3a (Redis/memtier): duplicated kubernetes_redis_memtier_benchmark; run that benchmark on the swap-enabled cluster instead - Remove Phase 3c (OpenSearch): fragile in-pod JVM launch; use esrally_benchmark on the swap-enabled cluster instead - Keep Phase 3b (kernel build under cgroup memory cap): unique to this benchmark; kernel_compile_benchmark.py is VM-based and cannot run in a K8s pod - Add _configure_eks_kubelet_swap() stub (deferred to PR #6780) - Fix COS_CONTAINERD -> UBUNTU_CONTAINERD (r3472549985)
1 parent 62ebc94 commit 531662f

1 file changed

Lines changed: 62 additions & 85 deletions

File tree

perfkitbenchmarker/linux_benchmarks/swap_encryption_benchmark.py

Lines changed: 62 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
4040
Infrastructure lifecycle lives in two BaseResource subclasses:
4141
42-
SwapNodePool (perfkitbenchmarker/resources/container_service/swap_nodepool.py)
4342
_Create(): gcloud container node-pools create with linuxConfig.swapConfig
4443
+ sysctl via --system-config-from-file; waits for node Ready;
4544
optionally creates and attaches a dedicated swap disk.
@@ -96,7 +95,6 @@
9695
from perfkitbenchmarker import sample
9796
from perfkitbenchmarker.resources.container_service import kubectl
9897
from perfkitbenchmarker.resources.container_service import swap_daemonset as _ds_mod
99-
from perfkitbenchmarker.resources.container_service import swap_nodepool as _np_mod
10098

10199
FLAGS = flags.FLAGS
102100

@@ -113,26 +111,34 @@
113111
BENCHMARK_CONFIG = """
114112
swap_encryption:
115113
description: >
116-
GKE vs. EKS swap encryption and LSSD performance comparison.
117-
Two-step nodepool setup: PKB provisions a minimal cluster with a cheap
118-
default nodepool (Step 1), then Prepare() adds the real benchmark
119-
nodepool (n4-highmem-32 / c4-*-lssd, UBUNTU_CONTAINERD, 80k IOPS) with a
120-
node-level startup script that configures dm-crypt swap before any pod
121-
is scheduled, then removes the default nodepool (Step 2). All benchmark
122-
phases run inside a privileged DaemonSet pinned to the benchmark nodepool.
123-
flags: {}
114+
App workloads (kernel build under cgroup memory cap) on swap-encrypted GKE/EKS nodes. Swap-enabled 'benchmark' nodepool declared in BENCHMARK_CONFIG;
115+
GKE cluster creation applies --system-config-from-file (dm-crypt swapConfig)
116+
automatically via swap_config field on NodepoolSpec.
124117
container_cluster:
118+
cloud: GCP
125119
type: Kubernetes
126120
vm_count: 1
127121
vm_spec:
128122
GCP:
129-
# Cheap placeholder — the benchmark nodepool is created in Prepare().
130123
machine_type: e2-medium
131124
boot_disk_size: 20
132-
AWS:
133-
# Cheap placeholder — the benchmark nodegroup is added in Prepare().
134-
machine_type: t3.medium
135-
boot_disk_size: 20
125+
zone: us-central1-a
126+
nodepools:
127+
benchmark:
128+
vm_count: 1
129+
vm_spec:
130+
GCP:
131+
machine_type: n4-highmem-32
132+
boot_disk_type: hyperdisk-balanced
133+
boot_disk_size: 500
134+
zone: us-central1-a
135+
swap_config:
136+
enabled: true
137+
swappiness: 100
138+
min_free_kbytes: 200
139+
watermark_scale_factor: 500
140+
boot_disk_iops: 160000
141+
boot_disk_throughput: 2400
136142
"""
137143

138144

@@ -486,60 +492,20 @@ def GetConfig(user_config: dict[str, Any]) -> dict[str, Any]:
486492
def Prepare(spec: _BenchmarkSpec) -> None:
487493
"""Two-step nodepool setup then DaemonSet deployment.
488494
489-
Step 1 (handled by PKB infrastructure): cluster provisioned with a cheap
490-
e2-medium default nodepool.
491-
492-
Step 2 (this function):
493-
a. GCP: Create SwapNodePool (benchmark nodepool + optional swap disk).
494-
EKS: label existing nodes with pkb_nodepool=benchmark.
495-
b. Create SwapDaemonSet: deploy manifest + wait for Running + sentinel.
496-
c. GCP: DeleteDefaultPool() — safe now that DaemonSet pod is Running.
497-
d. GCP: re-resolve pod name in case default-pool deletion evicts the pod.
495+
PKB cluster creation automatically provisions the swap-enabled 'benchmark'
496+
nodepool (swap_config in BENCHMARK_CONFIG). This function only:
497+
1. Deploys the privileged SwapDaemonSet and waits for Running.
498+
2. Deletes the cheap e2-medium default-pool (required at cluster create).
498499
499-
Both resources are appended to spec.resources for auto-cleanup.
500+
DaemonSet is appended to spec.resources for PKB auto-cleanup.
500501
"""
501502
cluster = spec.container_cluster
502-
is_gcp = getattr(cluster, 'project', None) is not None
503-
504-
if is_gcp:
505-
# ── Step 2a (GCP): create benchmark nodepool + wait for node ──────────
506-
logging.info('[swap_encryption] Step 2a: creating benchmark nodepool')
507-
nodepool = _np_mod.SwapNodePool(
508-
cluster=cluster,
509-
machine_type=_BENCHMARK_MACHINE_TYPE.value,
510-
node_image_type=_NODE_IMAGE_TYPE.value,
511-
disk_type=_BOOT_DISK_TYPE.value,
512-
disk_size_gb=_BOOT_DISK_SIZE_GB.value,
513-
disk_iops=_BOOT_DISK_IOPS.value,
514-
disk_throughput=_BOOT_DISK_THROUGHPUT.value,
515-
lssd=_BENCHMARK_LSSD.value,
516-
lssd_count=_LSSD_COUNT.value,
517-
add_swap_disk=_ADD_SWAP_DISK.value,
518-
swap_disk_size_gb=_SWAP_DISK_SIZE_GB.value,
519-
)
520-
nodepool.Create()
521-
spec.resources.append(nodepool)
522-
else:
523-
# ── Step 2a (EKS): label existing nodes to match DaemonSet selector ──
524-
logging.info(
525-
'[swap_encryption] EKS cluster — labelling existing nodes with'
526-
' pkb_nodepool=%s so the DaemonSet nodeSelector matches.',
527-
_BENCHMARK_NODEPOOL,
528-
)
529-
kubectl.RunKubectlCommand([
530-
'label',
531-
'nodes',
532-
'--all',
533-
'--overwrite',
534-
f'pkb_nodepool={_BENCHMARK_NODEPOOL}',
535-
])
536-
_ensure_io2_volume()
537-
538-
# ── Step 2b: deploy DaemonSet and wait for pod ────────────────────────────
539-
# Deploy BEFORE deleting the default pool: deleting the default pool while
540-
# the benchmark node is still joining causes a brief API-server I/O timeout.
541-
# The pod being Running means the cluster is fully stable.
542-
logging.info('[swap_encryption] Step 2b: deploying privileged DaemonSet')
503+
504+
# The swap-enabled 'benchmark' nodepool is already provisioned by GKE
505+
# cluster creation (swap_config declared in BENCHMARK_CONFIG).
506+
# Prepare() only deploys the privileged DaemonSet + deletes the cheap
507+
# e2-medium default pool that GKE requires at cluster creation time.
508+
logging.info('[swap_encryption] Deploying privileged DaemonSet')
543509
daemonset = _ds_mod.SwapDaemonSet(
544510
name=_DS_NAME,
545511
namespace=_DS_NAMESPACE,
@@ -549,28 +515,13 @@ def Prepare(spec: _BenchmarkSpec) -> None:
549515
)
550516
daemonset.Create()
551517
spec.resources.append(daemonset)
518+
logging.info('[swap_encryption] Benchmark pod ready: %s', daemonset.pod_name)
519+
_delete_default_pool(cluster)
520+
daemonset.WaitForPod()
552521
logging.info(
553-
'[swap_encryption] Benchmark pod ready: %s', daemonset.pod_name
522+
'[swap_encryption] Benchmark pod (post-deletion): %s', daemonset.pod_name
554523
)
555524

556-
# ── Step 2c+d (GCP): delete dummy default nodepool, re-resolve pod name ──
557-
if is_gcp:
558-
logging.info(
559-
'[swap_encryption] Step 2c: deleting dummy default nodepool'
560-
)
561-
nodepool.DeleteDefaultPool()
562-
# The pod may be evicted and rescheduled with a new name during the
563-
# default nodepool deletion. Re-resolve to avoid stale references.
564-
logging.info(
565-
'[swap_encryption] Step 2d: re-resolving benchmark pod after'
566-
' nodepool deletion'
567-
)
568-
daemonset.WaitForPod()
569-
logging.info(
570-
'[swap_encryption] Benchmark pod (post-deletion): %s',
571-
daemonset.pod_name,
572-
)
573-
574525

575526
def Run(spec: _BenchmarkSpec) -> list[sample.Sample]:
576527
"""Execute all benchmark phases with gate logic.
@@ -752,6 +703,32 @@ def Run(spec: _BenchmarkSpec) -> list[sample.Sample]:
752703
return results
753704

754705

706+
707+
def _delete_default_pool(cluster) -> None:
708+
"""Delete the dummy e2-medium default-pool once the benchmark pod is Running.
709+
710+
GKE requires at least one nodepool at cluster creation time; the e2-medium
711+
default-pool satisfies that requirement. Deleting it before the DaemonSet
712+
pod is Running can trigger a brief API-server timeout while two concurrent
713+
nodepool operations are in progress.
714+
"""
715+
try:
716+
cmd = cluster._GcloudCommand( # pylint: disable=protected-access
717+
'container', 'node-pools', 'delete', _DEFAULT_POOL,
718+
'--cluster', cluster.name,
719+
)
720+
cmd.args.append('--quiet')
721+
logging.info('[swap_encryption] Deleting default nodepool: %s', _DEFAULT_POOL)
722+
_, stderr, rc = cmd.Issue(timeout=300, raise_on_failure=False)
723+
if rc != 0:
724+
logging.warning(
725+
'[swap_encryption] Could not delete default nodepool (rc=%d): %s',
726+
rc, stderr,
727+
)
728+
else:
729+
logging.info('[swap_encryption] Default nodepool deleted')
730+
except Exception as e: # pylint: disable=broad-except
731+
logging.warning('[swap_encryption] _delete_default_pool failed: %s', e)
755732
def Cleanup(spec: _BenchmarkSpec) -> None:
756733
"""Resources in spec.resources are auto-deleted by the PKB framework.
757734

0 commit comments

Comments
 (0)