From 6f9f292d63bf0d90fb39eb31b22a1a4a76ae6a65 Mon Sep 17 00:00:00 2001 From: Inigo Date: Sat, 30 May 2026 09:09:12 -0700 Subject: [PATCH] fix: correct sku-cpu label for Harvest VMs using actual core count Harvest VMs expose more physical cores to the guest OS than the nominal vCPU count reported by the CRP SKU API (e.g., harvest_e8s_v3 reports 8 but the guest sees 16). This causes the kubernetes.azure.com/sku-cpu label to be incorrect, leading to scheduling mismatches. Linux: Add fixSkuCpuLabel() to cse_helpers.sh that compares the RP-provided sku-cpu label against nproc output and corrects it before kubelet starts. Windows: Add Fix-SkuCpuLabel to kubeletfunc.ps1 that compares against Win32_ComputerSystem.NumberOfLogicalProcessors and corrects the label in kubeletstart.ps1 before kubelet args are built. Both approaches are generic and work for any SKU where the CRP-reported vCPU count doesn't match the actual cores visible to the OS. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../linux/cloud-init/artifacts/cse_helpers.sh | 27 +++++++++++++++++++ parts/linux/cloud-init/artifacts/cse_main.sh | 4 +++ .../cloud-init/artifacts/cse_helpers_spec.sh | 24 +++++++++++++++++ staging/cse/windows/kubeletfunc.ps1 | 26 ++++++++++++++++++ staging/cse/windows/kubeletfunc.tests.ps1 | 25 +++++++++++++++++ .../provisioningscripts/kubeletstart.ps1 | 3 +++ 6 files changed, 109 insertions(+) diff --git a/parts/linux/cloud-init/artifacts/cse_helpers.sh b/parts/linux/cloud-init/artifacts/cse_helpers.sh index 2e3c4e31ba8..68c2de7210b 100755 --- a/parts/linux/cloud-init/artifacts/cse_helpers.sh +++ b/parts/linux/cloud-init/artifacts/cse_helpers.sh @@ -1124,6 +1124,33 @@ removeKubeletNodeLabel() { fi } +# For Harvest VMs (and potentially other oversubscribed SKUs), the sku-cpu label set by the RP +# reflects the nominal vCPU count from the CRP SKU API, but the guest OS may see more cores. +# This function detects the mismatch by comparing the label value against nproc output and +# corrects the label to match the actual core count visible to the OS, which is what kubelet +# reports as capacity.cpu. +fixSkuCpuLabel() { + local actual_cores + actual_cores=$(nproc 2>/dev/null) + if [ -z "$actual_cores" ] || [ "$actual_cores" -eq 0 ]; then + echo "WARNING: could not detect actual CPU cores via nproc, skipping sku-cpu fix" + return 0 + fi + + # Extract current sku-cpu value from labels + local current_value + current_value=$(echo "$KUBELET_NODE_LABELS" | grep -oP 'kubernetes\.azure\.com/sku-cpu=\K[^,]*' || true) + if [ -z "$current_value" ]; then + # Label not present — nothing to fix + return 0 + fi + + if [ "$current_value" != "$actual_cores" ]; then + echo "Correcting sku-cpu label from ${current_value} to ${actual_cores} (actual cores from nproc)" + KUBELET_NODE_LABELS="${KUBELET_NODE_LABELS//kubernetes.azure.com\/sku-cpu=${current_value}/kubernetes.azure.com\/sku-cpu=${actual_cores}}" + fi +} + # generate kubenode binary registry url from acs-mirror url updateKubeBinaryRegistryURL() { # if rp already passes registry url, then directly use the registry url that rp passes diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index 0ab9de70e2d..71e23713610 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -555,6 +555,10 @@ function nodePrep { addKubeletNodeLabel "kubernetes.azure.com/localdns-exporter=enabled" fi + # For Harvest VMs, the RP-provided sku-cpu label reflects the nominal SKU vCPU count, + # but the guest OS sees more cores. Correct the label to match actual nproc output. + fixSkuCpuLabel + logs_to_events "AKS.CSE.ensureKubelet" ensureKubelet # Configure localdns metrics exporter socket after ensureKubelet. diff --git a/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh index 79ab03650eb..a07a8182494 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh @@ -234,6 +234,30 @@ Describe 'cse_helpers.sh' End End + Describe 'fixSkuCpuLabel' + nproc() { echo "16"; } + + It 'should correct sku-cpu when nproc reports more cores than the label' + KUBELET_NODE_LABELS="agentpool=harvest,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=harvest" + When call fixSkuCpuLabel + The stdout should include 'Correcting sku-cpu label from 8 to 16' + The variable KUBELET_NODE_LABELS should equal 'agentpool=harvest,kubernetes.azure.com/sku-cpu=16,kubernetes.azure.com/agentpool=harvest' + End + + It 'should not modify sku-cpu when it already matches nproc' + nproc() { echo "8"; } + KUBELET_NODE_LABELS="agentpool=pool1,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=pool1" + When call fixSkuCpuLabel + The variable KUBELET_NODE_LABELS should equal 'agentpool=pool1,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=pool1' + End + + It 'should do nothing when sku-cpu label is not present' + KUBELET_NODE_LABELS="agentpool=pool1,kubernetes.azure.com/agentpool=pool1" + When call fixSkuCpuLabel + The variable KUBELET_NODE_LABELS should equal 'agentpool=pool1,kubernetes.azure.com/agentpool=pool1' + End + End + Describe 'assert_refresh_token' # Helper function to create a mock JWT token # Usage: create_mock_jwt_token '{"permissions":{"actions":["read","pull"]}}' diff --git a/staging/cse/windows/kubeletfunc.ps1 b/staging/cse/windows/kubeletfunc.ps1 index 149113d5e05..060b57f481a 100644 --- a/staging/cse/windows/kubeletfunc.ps1 +++ b/staging/cse/windows/kubeletfunc.ps1 @@ -260,6 +260,32 @@ function Remove-KubeletNodeLabel { $global:KubeletNodeLabels = $filtered -join "," } +# For Harvest VMs (and potentially other oversubscribed SKUs), the sku-cpu label set by the RP +# reflects the nominal vCPU count from the CRP SKU API, but the guest OS may see more cores. +# This function detects the mismatch by comparing the label value against the actual logical +# processor count and corrects the label to match what kubelet reports as capacity.cpu. +function Fix-SkuCpuLabel { + $actualCores = (Get-CimInstance -ClassName Win32_ComputerSystem).NumberOfLogicalProcessors + if (-not $actualCores -or $actualCores -eq 0) { + Write-Log "WARNING: could not detect actual CPU cores, skipping sku-cpu fix" + return + } + + $labelList = $global:KubeletNodeLabels -split "," + $skuCpuLabel = $labelList | Where-Object { $_ -match '^kubernetes\.azure\.com/sku-cpu=' } + if (-not $skuCpuLabel) { + return + } + + $currentValue = ($skuCpuLabel -split '=')[1] + if ($currentValue -ne "$actualCores") { + Write-Log "Correcting sku-cpu label from $currentValue to $actualCores (actual logical processors)" + $labelList = $labelList | Where-Object { $_ -notmatch '^kubernetes\.azure\.com/sku-cpu=' } + $labelList += "kubernetes.azure.com/sku-cpu=$actualCores" + $global:KubeletNodeLabels = $labelList -join "," + } +} + function Get-TagValue { Param( [Parameter(Mandatory=$true)][string] diff --git a/staging/cse/windows/kubeletfunc.tests.ps1 b/staging/cse/windows/kubeletfunc.tests.ps1 index b5a2acdbbc5..0b7eae52aba 100644 --- a/staging/cse/windows/kubeletfunc.tests.ps1 +++ b/staging/cse/windows/kubeletfunc.tests.ps1 @@ -400,3 +400,28 @@ Describe 'Remove-KubeletNodeLabel' { Compare-Object $global:KubeletNodeLabels $expected | Should -Be $null } } + +Describe 'Fix-SkuCpuLabel' { + It "Should correct sku-cpu when actual cores differ from label value" { + Mock Get-CimInstance -MockWith { [PSCustomObject]@{ NumberOfLogicalProcessors = 16 } } + $global:KubeletNodeLabels = "agentpool=harvest,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=harvest" + Fix-SkuCpuLabel + $global:KubeletNodeLabels | Should -Match 'kubernetes\.azure\.com/sku-cpu=16' + $global:KubeletNodeLabels | Should -Not -Match 'sku-cpu=8' + } + + It "Should not modify sku-cpu when it already matches actual cores" { + Mock Get-CimInstance -MockWith { [PSCustomObject]@{ NumberOfLogicalProcessors = 8 } } + $global:KubeletNodeLabels = "agentpool=pool1,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=pool1" + Fix-SkuCpuLabel + $global:KubeletNodeLabels | Should -Match 'kubernetes\.azure\.com/sku-cpu=8' + } + + It "Should do nothing when sku-cpu label is not present" { + Mock Get-CimInstance -MockWith { [PSCustomObject]@{ NumberOfLogicalProcessors = 16 } } + $global:KubeletNodeLabels = "agentpool=pool1,kubernetes.azure.com/agentpool=pool1" + $expected = $global:KubeletNodeLabels + Fix-SkuCpuLabel + $global:KubeletNodeLabels | Should -Be $expected + } +} diff --git a/staging/cse/windows/provisioningscripts/kubeletstart.ps1 b/staging/cse/windows/provisioningscripts/kubeletstart.ps1 index 5a7557333cb..55a24264d08 100644 --- a/staging/cse/windows/provisioningscripts/kubeletstart.ps1 +++ b/staging/cse/windows/provisioningscripts/kubeletstart.ps1 @@ -13,6 +13,9 @@ $global:ExternalNetwork = "ext" $global:CNIConfig = "$CNIConfig" $global:NetworkPlugin = $Global:ClusterConfiguration.Cni.Name $global:KubeletNodeLabels = $Global:ClusterConfiguration.Kubernetes.Kubelet.NodeLabels + +# Correct sku-cpu label if it doesn't match actual logical processor count (e.g., Harvest VMs) +Fix-SkuCpuLabel $global:IsSkipCleanupNetwork = [System.Convert]::ToBoolean($Global:ClusterConfiguration.Services.IsSkipCleanupNetwork) $global:EnableSecureTLSBootstrapping = [System.Convert]::ToBoolean($Global:ClusterConfiguration.Kubernetes.Kubelet.SecureTLSBootstrapArgs.Enabled)