Skip to content

Commit adb7233

Browse files
committed
devtool: more robust logic for tweaking kvm module
There are several issues with the current implementation which result in a spurious failures/timeouts in a CI. Additionally, the kvm update logic was duplicated in a couple of places. To resolve these issues, refactor the code a bit to move kvm loading/checking logic into one place and implement more robust locking mechanism. Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
1 parent 92fbd9f commit adb7233

1 file changed

Lines changed: 103 additions & 103 deletions

File tree

tools/devtool

Lines changed: 103 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -692,123 +692,126 @@ download_ci_artifacts() {
692692
LOCAL_ARTIFACTS_PATH=$local_artifacts_path
693693
}
694694

695-
# Attempt to load the appropriate KVM module for the current platform.
696-
# Returns 0 on success, non-zero on failure.
697-
#
698-
load_kvm() {
699-
local arch
700-
arch=$(uname -m)
701-
702-
case "$arch" in
703-
x86_64|i*86)
704-
if grep -q "vmx" /proc/cpuinfo; then
705-
modprobe kvm_intel || return 1
706-
elif grep -q "svm" /proc/cpuinfo; then
707-
modprobe kvm_amd avic=1 || return 1
708-
else
709-
return 1
710-
fi
711-
;;
712-
aarch64|arm*)
713-
modprobe kvm || return 1
714-
;;
715-
*)
695+
# Acquire the KVM module lock and run the given command.
696+
# Uses flock with a timeout for safe, automatic lock management.
697+
# Usage: with_kvm_module_lock <command> [args...]
698+
with_kvm_module_lock() {
699+
local LOCK_TIMEOUT=120
700+
(
701+
if ! flock -w "$LOCK_TIMEOUT" 9; then
702+
say_warn "Timed out waiting for KVM module lock after: ${LOCK_TIMEOUT}s"
703+
exit 1
704+
fi
705+
echo "Successfully acquired lock"
706+
"$@"
707+
) 9>"$KVM_MODULE_LOCKFILE"
708+
}
709+
710+
# Reload KVM modules with the given vendor module and kvm params.
711+
# Always enables avic=1 on AMD. Unloads first if already loaded.
712+
# Usage: reload_kvm_modules <vendor_mod> [kvm_param...]
713+
# e.g. reload_kvm_modules kvm_intel nx_huge_pages=never
714+
reload_kvm_modules() {
715+
local vendor_mod=$1; shift
716+
717+
# Unload if already loaded
718+
if lsmod | grep -qP "^kvm_(amd|intel)"; then
719+
if ! sudo modprobe -r $vendor_mod kvm; then
720+
say_warn "Failed to unload KVM modules (${vendor_mod}, kvm) (may be in use)"
716721
return 1
717-
;;
718-
esac
722+
fi
723+
fi
719724

720-
# Check /dev/kvm now exists
721-
[[ -c /dev/kvm ]]
725+
if ! sudo modprobe kvm "$@"; then
726+
say_warn "Failed to load kvm module"
727+
return 1
728+
fi
729+
if [[ $vendor_mod == "kvm_amd" ]]; then
730+
if ! sudo modprobe kvm_amd avic=1; then
731+
say_warn "Failed to load kvm_amd module"
732+
return 1
733+
fi
734+
else
735+
if ! sudo modprobe $vendor_mod; then
736+
say_warn "Failed to load $vendor_mod module"
737+
return 1
738+
fi
739+
fi
722740
}
723741

724-
# Check if /dev/kvm exists. Attempt to load the module if it doesn't.
725-
# Exit if KVM is unavailable. Upon returning from this call, the caller
726-
# can be certain /dev/kvm is available.
727-
#
728-
ensure_kvm() {
729-
[[ -c /dev/kvm ]] || load_kvm || die "/dev/kvm not found. Aborting."
742+
# Determine the KVM vendor module for the current CPU.
743+
kvm_vendor_mod() {
744+
if grep -q "vmx" /proc/cpuinfo; then
745+
echo kvm_intel
746+
elif grep -q "svm" /proc/cpuinfo; then
747+
echo kvm_amd
748+
else
749+
# aarch64
750+
echo kvm
751+
fi
730752
}
731753

732-
apply_linux_61_tweaks() {
733-
KV=$(uname -r)
734-
if [[ $KV != 6.1.* ]] || [ $(uname -m) != x86_64 ]; then
735-
return
754+
# Ensure /dev/kvm is available and apply platform-specific KVM tweaks.
755+
# - Loads KVM modules if not present
756+
# - On Linux 6.1 x86_64: applies nx_huge_pages=never for non-vulnerable CPUs,
757+
# checks favordynmods for vulnerable ones
758+
# - On AMD: ensures AVIC is enabled
759+
setup_kvm() {
760+
local kernel_version=$(uname -r)
761+
local arch=$(uname -m)
762+
local vendor_mod=$(kvm_vendor_mod)
763+
764+
local need_kvm_reload=0
765+
local kvm_extra_params=()
766+
767+
# Load KVM if not already available
768+
if [[ ! -c /dev/kvm ]]; then
769+
need_kvm_reload=1
736770
fi
737-
say "Applying Linux 6.1 boot-time regression mitigations"
738-
739-
KVM_VENDOR_MOD=$(lsmod |grep -P "^kvm_(amd|intel)" | awk '{print $1}')
740-
ITLB_MULTIHIT=/sys/devices/system/cpu/vulnerabilities/itlb_multihit
741-
NX_HUGEPAGES=/sys/module/kvm/parameters/nx_huge_pages
742-
743-
# If m6a/m6i
744-
if grep -q "Not affected" $ITLB_MULTIHIT; then
745-
echo -e "CPU not vulnerable to iTLB multihit, using kvm.nx_huge_pages=never mitigation"
746-
# we need a lock so another process is not running the same thing and to
747-
# avoid race conditions.
748-
set -C # noclobber
749-
while true; do
750-
if echo "$$" > "$KVM_MODULE_LOCKFILE"; then
751-
echo "Successfully acquired lock"
752-
if ! grep -q "never" $NX_HUGEPAGES; then
753-
echo "Reloading KVM modules with nx_huge_pages=never"
754-
sudo modprobe -r $KVM_VENDOR_MOD kvm
755-
sudo modprobe kvm nx_huge_pages=never
756-
if [[ $KVM_VENDOR_MOD == "kvm_amd" ]]; then
757-
sudo modprobe kvm_amd avic=1
758-
else
759-
sudo modprobe $KVM_VENDOR_MOD
760-
fi
761-
fi
762-
rm "$KVM_MODULE_LOCKFILE"
763-
break
764-
else
765-
sleep 5s
771+
772+
local itlb_multihit=/sys/devices/system/cpu/vulnerabilities/itlb_multihit
773+
local nx_huge_pages=/sys/module/kvm/parameters/nx_huge_pages
774+
# Linux 6.1 x86_64: mitigate boot-time regression
775+
if [[ $kernel_version == 6.1.* ]] && [[ $arch == x86_64 ]]; then
776+
777+
say "Applying Linux 6.1 boot-time regression mitigations"
778+
if grep -q "Not affected" $itlb_multihit; then
779+
echo "CPU not vulnerable to iTLB multihit, using kvm.nx_huge_pages=never mitigation"
780+
if ! grep -q "never" $nx_huge_pages 2>/dev/null; then
781+
kvm_extra_params+=(nx_huge_pages=never)
782+
need_kvm_reload=1
766783
fi
767-
done
768-
tail -v $ITLB_MULTIHIT $NX_HUGEPAGES
769-
# else (m5d Skylake and CascadeLake)
770-
else
771-
echo "CPU vulnerable to iTLB_multihit, checking if favordynmods is enabled"
772-
mount |grep cgroup |grep -q favordynmods
773-
if [ $? -ne 0 ]; then
774-
say_warn "cgroups' favordynmods option not enabled; VM creation performance may be impacted"
775784
else
776-
echo "favordynmods is enabled"
785+
echo "CPU vulnerable to iTLB_multihit, checking if favordynmods is enabled"
786+
if mount | grep cgroup | grep -q favordynmods; then
787+
echo "favordynmods is enabled"
788+
else
789+
say_warn "cgroups' favordynmods option not enabled; VM creation performance may be impacted"
790+
fi
777791
fi
778792
fi
779-
}
780793

781-
apply_amd_avic_tweaks() {
782-
if ! grep -q "svm" /proc/cpuinfo; then
783-
return
794+
# AMD: ensure AVIC is enabled
795+
local avic_param=/sys/module/kvm_amd/parameters/avic
796+
if [[ $vendor_mod == "kvm_amd" ]]; then
797+
if ! grep -q "Y\|1" $avic_param; then
798+
echo "AVIC not enabled, will reload kvm_amd with avic=1"
799+
need_kvm_reload=1
800+
fi
784801
fi
785802

786-
AVIC_PARAM=/sys/module/kvm_amd/parameters/avic
787-
if [[ ! -f $AVIC_PARAM ]]; then
788-
return
803+
if [[ $need_kvm_reload -eq 1 ]]; then
804+
echo "Reloading KVM modules"
805+
reload_kvm_modules "$vendor_mod" "${kvm_extra_params[@]}"
806+
ok_or_die "Could not reload kvm modules"
789807
fi
790808

791-
if grep -q "Y\|1" $AVIC_PARAM; then
792-
echo "AVIC already enabled"
793-
return
809+
tail -v $itlb_multihit $nx_huge_pages
810+
if [[ $vendor_mod == "kvm_amd" ]]; then
811+
tail -v $avic_param
794812
fi
795813

796-
set -C # noclobber
797-
while true; do
798-
if echo "$$" > "$KVM_MODULE_LOCKFILE"; then
799-
echo "Successfully acquired lock"
800-
if ! grep -q "Y\|1" $AVIC_PARAM; then
801-
echo "Reloading kvm_amd with avic=1"
802-
sudo modprobe -r kvm_amd
803-
sudo modprobe kvm_amd avic=1
804-
fi
805-
rm "$KVM_MODULE_LOCKFILE"
806-
break
807-
else
808-
sleep 5s
809-
fi
810-
done
811-
tail -v $AVIC_PARAM
814+
[[ -c /dev/kvm ]] || die "/dev/kvm not found. Aborting."
812815
}
813816

814817
# Modifies the processors CPU governor and P-state configuration (x86_64 only) for consistent performance. This means
@@ -945,7 +948,7 @@ cmd_test() {
945948
done
946949

947950
# Check prerequisites.
948-
[ $do_kvm_check != 0 ] && ensure_kvm
951+
[ $do_kvm_check != 0 ] && with_kvm_module_lock setup_kvm
949952
ensure_devctr
950953
[ $do_build_dir_check != 0 ] && ensure_build_dir
951954
if [ $do_artifacts_check != 0 ]; then
@@ -964,9 +967,6 @@ cmd_test() {
964967
fi
965968
fi
966969

967-
apply_linux_61_tweaks
968-
apply_amd_avic_tweaks
969-
970970
# If we got to here, we've got all we need to continue.
971971
say "Kernel version: $(uname -r)"
972972
say "$(sed '/^processor.*: 0$/,/^processor.*: 1$/!d; /^processor.*: 1$/d' /proc/cpuinfo)"

0 commit comments

Comments
 (0)