@@ -166,53 +166,15 @@ spec:
166166 tar -xf "$PKB_KTARBALL" -C "$PKB_KROOT" 2>&1 || \\
167167 echo "[pkb] WARNING: kernel source extraction failed" >&2
168168 fi
169- echo "[pkb] Unlocking container cgroup swap limits..."
170- # GKE cgroup v2 sets memory.swap.max=0 per-container, which
171- # prevents swap usage even when the node has a swap device and
172- # vm.swappiness>0. Stress-ng gets OOM-killed in ~15s because
173- # the kernel can't page out to swap for this cgroup.
174- #
175- # NOTE: the old approach derived the cgroup path from
176- # /proc/self/cgroup, but inside a cgroup namespace that reports
177- # "0::/" — so the write targeted the host ROOT cgroup, silently
178- # no-op'd, and swap stayed locked (the OOM-in-15s symptom above).
179- # /sys is the host cgroup tree (hostPath mount) and this pod is
180- # privileged, so instead unlock swap across the entire kubepods
181- # hierarchy, which is guaranteed to contain our own container.
182- if [ -d /sys/fs/cgroup/kubepods.slice ] || \
183- [ -d /sys/fs/cgroup/kubepods ]; then
184- # cgroup v2: write 'max' to every memory.swap.max under kubepods*.
185- find /sys/fs/cgroup -path '*kubepods*' -name memory.swap.max \
186- 2>/dev/null | while read -r _f; do
187- echo max > "$_f" 2>/dev/null || true
188- done
189- fi
190- # Best-effort: our own namespaced path and the unified root.
191- PKB_CG=$(awk -F: '$2==""{print $3; exit}' /proc/self/cgroup \
192- 2>/dev/null)
193- for _cgf in "/sys/fs/cgroup${PKB_CG}/memory.swap.max" \
194- /sys/fs/cgroup/memory.swap.max; do
195- [ -f "$_cgf" ] && { echo max > "$_cgf" 2>/dev/null || true; }
196- done
197- # cgroup v1 fallback: lift the combined RAM+swap hard ceiling.
198- find /sys/fs/cgroup/memory -path '*kubepods*' \
199- -name memory.memsw.limit_in_bytes 2>/dev/null \
200- | while read -r _f; do
201- echo -1 > "$_f" 2>/dev/null || true
202- done
203- # Verify and surface the result in the pod log. grep -L lists
204- # files that do NOT contain 'max' on their first line, i.e. ones
205- # still capping swap.
206- PKB_STILL_CAPPED=$(find /sys/fs/cgroup -path '*kubepods*' \
207- -name memory.swap.max 2>/dev/null \
208- | xargs -r grep -L '^max' 2>/dev/null | head -1)
209- if [ -n "$PKB_STILL_CAPPED" ]; then
210- echo "[pkb] WARNING: cgroup swap still capped at \
211- $PKB_STILL_CAPPED=$(cat "$PKB_STILL_CAPPED" 2>/dev/null) — stress-ng may be \
212- OOM-killed before swap is exercised" >&2
213- else
214- echo "[pkb] cgroup swap unlocked (memory.swap.max=max across kubepods)"
215- fi
169+ # Container cgroup swap limits are managed by the kubelet when
170+ # kubeletConfig.memorySwapBehavior=LimitedSwap is set via
171+ # --system-config-from-file (GKE) or kubelet-config.json (EKS).
172+ # Manually writing memory.swap.max=max across kubepods is not
173+ # required and is superseded by the kubelet swap config.
174+ # Reference: Ajay's review comment go/pkb-swap-encryption-pr1
175+ # #r3457928855 — https://github.com/GoogleCloudPlatform/
176+ # PerfKitBenchmarker/pull/6776#discussion_r3457928855
177+ echo "[pkb] Swap limits managed by kubelet (LimitedSwap config)."
216178 echo "[pkb] Tools installed. Writing ready sentinel."
217179 touch /tmp/pkb_ready
218180 sleep infinity
@@ -264,3 +226,4 @@ spec:
264226 hostPath:
265227 path: /lib/modules
266228 type: Directory
229+