diff --git a/parts/linux/cloud-init/artifacts/cse_helpers.sh b/parts/linux/cloud-init/artifacts/cse_helpers.sh index 2e3c4e31ba8..68c2de7210b 100755 --- a/parts/linux/cloud-init/artifacts/cse_helpers.sh +++ b/parts/linux/cloud-init/artifacts/cse_helpers.sh @@ -1124,6 +1124,33 @@ removeKubeletNodeLabel() { fi } +# For Harvest VMs (and potentially other oversubscribed SKUs), the sku-cpu label set by the RP +# reflects the nominal vCPU count from the CRP SKU API, but the guest OS may see more cores. +# This function detects the mismatch by comparing the label value against nproc output and +# corrects the label to match the actual core count visible to the OS, which is what kubelet +# reports as capacity.cpu. +fixSkuCpuLabel() { + local actual_cores + actual_cores=$(nproc 2>/dev/null) + if [ -z "$actual_cores" ] || [ "$actual_cores" -eq 0 ]; then + echo "WARNING: could not detect actual CPU cores via nproc, skipping sku-cpu fix" + return 0 + fi + + # Extract current sku-cpu value from labels + local current_value + current_value=$(echo "$KUBELET_NODE_LABELS" | grep -oP 'kubernetes\.azure\.com/sku-cpu=\K[^,]*' || true) + if [ -z "$current_value" ]; then + # Label not present — nothing to fix + return 0 + fi + + if [ "$current_value" != "$actual_cores" ]; then + echo "Correcting sku-cpu label from ${current_value} to ${actual_cores} (actual cores from nproc)" + KUBELET_NODE_LABELS="${KUBELET_NODE_LABELS//kubernetes.azure.com\/sku-cpu=${current_value}/kubernetes.azure.com\/sku-cpu=${actual_cores}}" + fi +} + # generate kubenode binary registry url from acs-mirror url updateKubeBinaryRegistryURL() { # if rp already passes registry url, then directly use the registry url that rp passes diff --git a/parts/linux/cloud-init/artifacts/cse_main.sh b/parts/linux/cloud-init/artifacts/cse_main.sh index 0ab9de70e2d..71e23713610 100755 --- a/parts/linux/cloud-init/artifacts/cse_main.sh +++ b/parts/linux/cloud-init/artifacts/cse_main.sh @@ -555,6 +555,10 @@ function nodePrep { addKubeletNodeLabel "kubernetes.azure.com/localdns-exporter=enabled" fi + # For Harvest VMs, the RP-provided sku-cpu label reflects the nominal SKU vCPU count, + # but the guest OS sees more cores. Correct the label to match actual nproc output. + fixSkuCpuLabel + logs_to_events "AKS.CSE.ensureKubelet" ensureKubelet # Configure localdns metrics exporter socket after ensureKubelet. diff --git a/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh b/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh index 79ab03650eb..a07a8182494 100644 --- a/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh +++ b/spec/parts/linux/cloud-init/artifacts/cse_helpers_spec.sh @@ -234,6 +234,30 @@ Describe 'cse_helpers.sh' End End + Describe 'fixSkuCpuLabel' + nproc() { echo "16"; } + + It 'should correct sku-cpu when nproc reports more cores than the label' + KUBELET_NODE_LABELS="agentpool=harvest,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=harvest" + When call fixSkuCpuLabel + The stdout should include 'Correcting sku-cpu label from 8 to 16' + The variable KUBELET_NODE_LABELS should equal 'agentpool=harvest,kubernetes.azure.com/sku-cpu=16,kubernetes.azure.com/agentpool=harvest' + End + + It 'should not modify sku-cpu when it already matches nproc' + nproc() { echo "8"; } + KUBELET_NODE_LABELS="agentpool=pool1,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=pool1" + When call fixSkuCpuLabel + The variable KUBELET_NODE_LABELS should equal 'agentpool=pool1,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=pool1' + End + + It 'should do nothing when sku-cpu label is not present' + KUBELET_NODE_LABELS="agentpool=pool1,kubernetes.azure.com/agentpool=pool1" + When call fixSkuCpuLabel + The variable KUBELET_NODE_LABELS should equal 'agentpool=pool1,kubernetes.azure.com/agentpool=pool1' + End + End + Describe 'assert_refresh_token' # Helper function to create a mock JWT token # Usage: create_mock_jwt_token '{"permissions":{"actions":["read","pull"]}}' diff --git a/staging/cse/windows/kubeletfunc.ps1 b/staging/cse/windows/kubeletfunc.ps1 index 149113d5e05..060b57f481a 100644 --- a/staging/cse/windows/kubeletfunc.ps1 +++ b/staging/cse/windows/kubeletfunc.ps1 @@ -260,6 +260,32 @@ function Remove-KubeletNodeLabel { $global:KubeletNodeLabels = $filtered -join "," } +# For Harvest VMs (and potentially other oversubscribed SKUs), the sku-cpu label set by the RP +# reflects the nominal vCPU count from the CRP SKU API, but the guest OS may see more cores. +# This function detects the mismatch by comparing the label value against the actual logical +# processor count and corrects the label to match what kubelet reports as capacity.cpu. +function Fix-SkuCpuLabel { + $actualCores = (Get-CimInstance -ClassName Win32_ComputerSystem).NumberOfLogicalProcessors + if (-not $actualCores -or $actualCores -eq 0) { + Write-Log "WARNING: could not detect actual CPU cores, skipping sku-cpu fix" + return + } + + $labelList = $global:KubeletNodeLabels -split "," + $skuCpuLabel = $labelList | Where-Object { $_ -match '^kubernetes\.azure\.com/sku-cpu=' } + if (-not $skuCpuLabel) { + return + } + + $currentValue = ($skuCpuLabel -split '=')[1] + if ($currentValue -ne "$actualCores") { + Write-Log "Correcting sku-cpu label from $currentValue to $actualCores (actual logical processors)" + $labelList = $labelList | Where-Object { $_ -notmatch '^kubernetes\.azure\.com/sku-cpu=' } + $labelList += "kubernetes.azure.com/sku-cpu=$actualCores" + $global:KubeletNodeLabels = $labelList -join "," + } +} + function Get-TagValue { Param( [Parameter(Mandatory=$true)][string] diff --git a/staging/cse/windows/kubeletfunc.tests.ps1 b/staging/cse/windows/kubeletfunc.tests.ps1 index b5a2acdbbc5..0b7eae52aba 100644 --- a/staging/cse/windows/kubeletfunc.tests.ps1 +++ b/staging/cse/windows/kubeletfunc.tests.ps1 @@ -400,3 +400,28 @@ Describe 'Remove-KubeletNodeLabel' { Compare-Object $global:KubeletNodeLabels $expected | Should -Be $null } } + +Describe 'Fix-SkuCpuLabel' { + It "Should correct sku-cpu when actual cores differ from label value" { + Mock Get-CimInstance -MockWith { [PSCustomObject]@{ NumberOfLogicalProcessors = 16 } } + $global:KubeletNodeLabels = "agentpool=harvest,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=harvest" + Fix-SkuCpuLabel + $global:KubeletNodeLabels | Should -Match 'kubernetes\.azure\.com/sku-cpu=16' + $global:KubeletNodeLabels | Should -Not -Match 'sku-cpu=8' + } + + It "Should not modify sku-cpu when it already matches actual cores" { + Mock Get-CimInstance -MockWith { [PSCustomObject]@{ NumberOfLogicalProcessors = 8 } } + $global:KubeletNodeLabels = "agentpool=pool1,kubernetes.azure.com/sku-cpu=8,kubernetes.azure.com/agentpool=pool1" + Fix-SkuCpuLabel + $global:KubeletNodeLabels | Should -Match 'kubernetes\.azure\.com/sku-cpu=8' + } + + It "Should do nothing when sku-cpu label is not present" { + Mock Get-CimInstance -MockWith { [PSCustomObject]@{ NumberOfLogicalProcessors = 16 } } + $global:KubeletNodeLabels = "agentpool=pool1,kubernetes.azure.com/agentpool=pool1" + $expected = $global:KubeletNodeLabels + Fix-SkuCpuLabel + $global:KubeletNodeLabels | Should -Be $expected + } +} diff --git a/staging/cse/windows/provisioningscripts/kubeletstart.ps1 b/staging/cse/windows/provisioningscripts/kubeletstart.ps1 index 5a7557333cb..55a24264d08 100644 --- a/staging/cse/windows/provisioningscripts/kubeletstart.ps1 +++ b/staging/cse/windows/provisioningscripts/kubeletstart.ps1 @@ -13,6 +13,9 @@ $global:ExternalNetwork = "ext" $global:CNIConfig = "$CNIConfig" $global:NetworkPlugin = $Global:ClusterConfiguration.Cni.Name $global:KubeletNodeLabels = $Global:ClusterConfiguration.Kubernetes.Kubelet.NodeLabels + +# Correct sku-cpu label if it doesn't match actual logical processor count (e.g., Harvest VMs) +Fix-SkuCpuLabel $global:IsSkipCleanupNetwork = [System.Convert]::ToBoolean($Global:ClusterConfiguration.Services.IsSkipCleanupNetwork) $global:EnableSecureTLSBootstrapping = [System.Convert]::ToBoolean($Global:ClusterConfiguration.Kubernetes.Kubelet.SecureTLSBootstrapArgs.Enabled)