Skip to content

Commit 18d533e

Browse files
ShadowCurseJackThomson2
authored andcommitted
devtool: more robust logic for tweaking kvm module
There are several issues with the current implementation which result in a spurious failures/timeouts in a CI. Additionally, the kvm update logic was duplicated in a couple of places. To resolve these issues, refactor the code a bit to move kvm loading/checking logic into one place and implement more robust locking mechanism. Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
1 parent ba56cd2 commit 18d533e

1 file changed

Lines changed: 113 additions & 74 deletions

File tree

tools/devtool

Lines changed: 113 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ TARGET_PREFIX="$(uname -m)-unknown-linux-"
133133
# Container path to directory where we store built CI artifacts.
134134
CTR_CI_ARTIFACTS_PATH="${CTR_FC_ROOT_DIR}/resources/$(uname -m)"
135135

136+
# Lockfile used while modifying KVM modules
137+
KVM_MODULE_LOCKFILE="/tmp/.kvm_module_lock"
138+
136139
# Check if Docker is available and exit if it's not.
137140
# Upon returning from this call, the caller can be certain Docker is available.
138141
#
@@ -583,89 +586,127 @@ ensure_ci_artifacts() {
583586
fi
584587
}
585588

586-
# Attempt to load the appropriate KVM module for the current platform.
587-
# Returns 0 on success, non-zero on failure.
588-
#
589-
load_kvm() {
590-
local arch
591-
arch=$(uname -m)
592-
593-
case "$arch" in
594-
x86_64|i*86)
595-
if grep -q "vmx" /proc/cpuinfo; then
596-
modprobe kvm_intel || return 1
597-
elif grep -q "svm" /proc/cpuinfo; then
598-
modprobe kvm_amd avic=1 || return 1
599-
else
600-
return 1
601-
fi
602-
;;
603-
aarch64|arm*)
604-
modprobe kvm || return 1
605-
;;
606-
*)
589+
# Acquire the KVM module lock and run the given command.
590+
# Uses flock with a timeout for safe, automatic lock management.
591+
# Usage: with_kvm_module_lock <command> [args...]
592+
with_kvm_module_lock() {
593+
local LOCK_TIMEOUT=120
594+
(
595+
if ! flock -w "$LOCK_TIMEOUT" 9; then
596+
say_warn "Timed out waiting for KVM module lock after: ${LOCK_TIMEOUT}s"
597+
exit 1
598+
fi
599+
echo "Successfully acquired lock"
600+
"$@"
601+
) 9>"$KVM_MODULE_LOCKFILE"
602+
}
603+
604+
# Reload KVM modules with the given vendor module and kvm params.
605+
# Always enables avic=1 on AMD. Unloads first if already loaded.
606+
# Usage: reload_kvm_modules <vendor_mod> [kvm_param...]
607+
# e.g. reload_kvm_modules kvm_intel nx_huge_pages=never
608+
reload_kvm_modules() {
609+
local vendor_mod=$1; shift
610+
611+
# Unload if already loaded
612+
if lsmod | grep -qP "^kvm_(amd|intel)"; then
613+
if ! sudo modprobe -r $vendor_mod kvm; then
614+
say_warn "Failed to unload KVM modules (${vendor_mod}, kvm) (may be in use)"
607615
return 1
608-
;;
609-
esac
616+
fi
617+
fi
610618

611-
# Check /dev/kvm now exists
612-
[[ -c /dev/kvm ]]
619+
if ! sudo modprobe kvm "$@"; then
620+
say_warn "Failed to load kvm module"
621+
return 1
622+
fi
623+
if [[ $vendor_mod == "kvm_amd" ]]; then
624+
if ! sudo modprobe kvm_amd avic=1; then
625+
say_warn "Failed to load kvm_amd module"
626+
return 1
627+
fi
628+
else
629+
if ! sudo modprobe $vendor_mod; then
630+
say_warn "Failed to load $vendor_mod module"
631+
return 1
632+
fi
633+
fi
613634
}
614635

615-
# Check if /dev/kvm exists. Attempt to load the module if it doesn't.
616-
# Exit if KVM is unavailable. Upon returning from this call, the caller
617-
# can be certain /dev/kvm is available.
618-
#
619-
ensure_kvm() {
620-
[[ -c /dev/kvm ]] || load_kvm || die "/dev/kvm not found. Aborting."
636+
# Determine the KVM vendor module for the current CPU.
637+
kvm_vendor_mod() {
638+
if grep -q "vmx" /proc/cpuinfo; then
639+
echo kvm_intel
640+
elif grep -q "svm" /proc/cpuinfo; then
641+
echo kvm_amd
642+
else
643+
# aarch64
644+
echo kvm
645+
fi
621646
}
622647

623-
apply_linux_61_tweaks() {
624-
KV=$(uname -r)
625-
if [[ $KV != 6.1.* ]] || [ $(uname -m) != x86_64 ]; then
626-
return
648+
# Ensure /dev/kvm is available and apply platform-specific KVM tweaks.
649+
# - Loads KVM modules if not present
650+
# - On Linux 6.1 x86_64: applies nx_huge_pages=never for non-vulnerable CPUs,
651+
# checks favordynmods for vulnerable ones
652+
# - On AMD: ensures AVIC is enabled
653+
setup_kvm() {
654+
local kernel_version=$(uname -r)
655+
local arch=$(uname -m)
656+
local vendor_mod=$(kvm_vendor_mod)
657+
658+
local need_kvm_reload=0
659+
local kvm_extra_params=()
660+
661+
# Load KVM if not already available
662+
if [[ ! -c /dev/kvm ]]; then
663+
need_kvm_reload=1
627664
fi
628-
say "Applying Linux 6.1 boot-time regression mitigations"
629-
630-
KVM_VENDOR_MOD=$(lsmod |grep -P "^kvm_(amd|intel)" | awk '{print $1}')
631-
ITLB_MULTIHIT=/sys/devices/system/cpu/vulnerabilities/itlb_multihit
632-
NX_HUGEPAGES=/sys/module/kvm/parameters/nx_huge_pages
633-
634-
# If m6a/m6i
635-
if grep -q "Not affected" $ITLB_MULTIHIT; then
636-
echo -e "CPU not vulnerable to iTLB multihit, using kvm.nx_huge_pages=never mitigation"
637-
# we need a lock so another process is not running the same thing and to
638-
# avoid race conditions.
639-
lockfile="/tmp/.linux61_tweaks.lock"
640-
set -C # noclobber
641-
while true; do
642-
if echo "$$" > "$lockfile"; then
643-
echo "Successfully acquired lock"
644-
if ! grep -q "never" $NX_HUGEPAGES; then
645-
echo "Reloading KVM modules with nx_huge_pages=never"
646-
sudo modprobe -r $KVM_VENDOR_MOD kvm
647-
sudo modprobe kvm nx_huge_pages=never
648-
sudo modprobe $KVM_VENDOR_MOD
649-
fi
650-
rm "$lockfile"
651-
break
652-
else
653-
sleep 5s
665+
666+
local itlb_multihit=/sys/devices/system/cpu/vulnerabilities/itlb_multihit
667+
local nx_huge_pages=/sys/module/kvm/parameters/nx_huge_pages
668+
# Linux 6.1 x86_64: mitigate boot-time regression
669+
if [[ $kernel_version == 6.1.* ]] && [[ $arch == x86_64 ]]; then
670+
671+
say "Applying Linux 6.1 boot-time regression mitigations"
672+
if grep -q "Not affected" $itlb_multihit; then
673+
echo "CPU not vulnerable to iTLB multihit, using kvm.nx_huge_pages=never mitigation"
674+
if ! grep -q "never" $nx_huge_pages 2>/dev/null; then
675+
kvm_extra_params+=(nx_huge_pages=never)
676+
need_kvm_reload=1
654677
fi
655-
done
656-
tail -v $ITLB_MULTIHIT $NX_HUGEPAGES
657-
# else (m5d Skylake and CascadeLake)
658-
else
659-
echo "CPU vulnerable to iTLB_multihit, checking if favordynmods is enabled"
660-
mount |grep cgroup |grep -q favordynmods
661-
if [ $? -ne 0 ]; then
662-
say_warn "cgroups' favordynmods option not enabled; VM creation performance may be impacted"
663678
else
664-
echo "favordynmods is enabled"
679+
echo "CPU vulnerable to iTLB_multihit, checking if favordynmods is enabled"
680+
if mount | grep cgroup | grep -q favordynmods; then
681+
echo "favordynmods is enabled"
682+
else
683+
say_warn "cgroups' favordynmods option not enabled; VM creation performance may be impacted"
684+
fi
665685
fi
666686
fi
667-
}
668687

688+
# AMD: ensure AVIC is enabled
689+
local avic_param=/sys/module/kvm_amd/parameters/avic
690+
if [[ $vendor_mod == "kvm_amd" ]]; then
691+
if ! grep -q "Y\|1" $avic_param; then
692+
echo "AVIC not enabled, will reload kvm_amd with avic=1"
693+
need_kvm_reload=1
694+
fi
695+
fi
696+
697+
if [[ $need_kvm_reload -eq 1 ]]; then
698+
echo "Reloading KVM modules"
699+
reload_kvm_modules "$vendor_mod" "${kvm_extra_params[@]}"
700+
ok_or_die "Could not reload kvm modules"
701+
fi
702+
703+
tail -v $itlb_multihit $nx_huge_pages
704+
if [[ $vendor_mod == "kvm_amd" ]]; then
705+
tail -v $avic_param
706+
fi
707+
708+
[[ -c /dev/kvm ]] || die "/dev/kvm not found. Aborting."
709+
}
669710

670711
# Modifies the processors CPU governor and P-state configuration (x86_64 only) for consistent performance. This means
671712
# - Disable turbo boost (Intel only) by writing 1 to /sys/devices/system/cpu/intel_pstate/no_turbo
@@ -759,7 +800,7 @@ cmd_test() {
759800
done
760801

761802
# Check prerequisites.
762-
[ $do_kvm_check != 0 ] && ensure_kvm
803+
[ $do_kvm_check != 0 ] && with_kvm_module_lock setup_kvm
763804
ensure_devctr
764805
ensure_build_dir
765806
ensure_ci_artifacts
@@ -771,8 +812,6 @@ cmd_test() {
771812
fi
772813
fi
773814

774-
apply_linux_61_tweaks
775-
776815
# If we got to here, we've got all we need to continue.
777816
say "Kernel version: $(uname -r)"
778817
say "$(sed '/^processor.*: 0$/,/^processor.*: 1$/!d; /^processor.*: 1$/d' /proc/cpuinfo)"

0 commit comments

Comments
 (0)