Skip to content

Commit 89a8999

Browse files
authored
fix(windows): move add kubelet failure restart code (#8142)
1 parent 8b53e66 commit 89a8999

3 files changed

Lines changed: 68 additions & 2 deletions

File tree

e2e/scenario_win_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ func Test_Windows2022_AzureNetwork(t *testing.T) {
6767
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
6868
ValidateCiliumIsNotRunningWindows(ctx, s)
6969
ValidateDotnetNotInstalledWindows(ctx, s)
70+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
7071
},
7172
},
7273
})
@@ -88,6 +89,7 @@ func Test_Windows2022AzureOverlayNetworkDualStack(t *testing.T) {
8889
ValidateFileHasContent(ctx, s, "/k/kubeletstart.ps1", "--container-runtime=remote")
8990
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
9091
ValidateCiliumIsNotRunningWindows(ctx, s)
92+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
9193
},
9294
},
9395
})
@@ -110,6 +112,7 @@ func Test_Windows2022Gen2AzureNetwork(t *testing.T) {
110112
ValidateCiliumIsNotRunningWindows(ctx, s)
111113
ValidateDotnetNotInstalledWindows(ctx, s)
112114
ValidateFileHasContent(ctx, s, "/AzureData/CustomDataSetupScript.log", "CSEScriptsPackageUrl used for provision is https://packages.aks.azure.com/aks/windows/cse/aks-windows-cse-scripts-current.zip")
115+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
113116
},
114117
},
115118
})
@@ -132,6 +135,7 @@ func Test_Windows2022Gen2AzureOverlayNetworkDualStack(t *testing.T) {
132135
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
133136
ValidateCiliumIsNotRunningWindows(ctx, s)
134137
ValidateFileHasContent(ctx, s, "/AzureData/CustomDataSetupScript.log", "CSEScriptsPackageUrl used for provision is https://packages.aks.azure.com/aks/windows/cse/aks-windows-cse-scripts-current.zip")
138+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
135139
},
136140
},
137141
})
@@ -152,6 +156,7 @@ func Test_Windows23H2AzureNetwork(t *testing.T) {
152156
ValidateFileHasContent(ctx, s, "/k/kubeletstart.ps1", "--container-runtime=remote")
153157
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
154158
ValidateCiliumIsNotRunningWindows(ctx, s)
159+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
155160
},
156161
},
157162
})
@@ -173,6 +178,7 @@ func Test_Windows23H2AzureOverlayNetworkDualStack(t *testing.T) {
173178
ValidateFileHasContent(ctx, s, "/k/kubeletstart.ps1", "--container-runtime=remote")
174179
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
175180
ValidateCiliumIsNotRunningWindows(ctx, s)
181+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
176182
},
177183
},
178184
})
@@ -194,6 +200,7 @@ func Test_Windows23H2Gen2AzureNetwork(t *testing.T) {
194200
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
195201
ValidateCiliumIsNotRunningWindows(ctx, s)
196202
ValidateFileHasContent(ctx, s, "/AzureData/CustomDataSetupScript.log", "CSEScriptsPackageUrl used for provision is https://packages.aks.azure.com/aks/windows/cse/aks-windows-cse-scripts-current.zip")
203+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
197204
},
198205
},
199206
})
@@ -216,6 +223,7 @@ func Test_Windows23H2Gen2AzureOverlayDualStack(t *testing.T) {
216223
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
217224
ValidateCiliumIsNotRunningWindows(ctx, s)
218225
ValidateFileHasContent(ctx, s, "/AzureData/CustomDataSetupScript.log", "CSEScriptsPackageUrl used for provision is https://packages.aks.azure.com/aks/windows/cse/aks-windows-cse-scripts-current.zip")
226+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
219227
},
220228
},
221229
})
@@ -277,6 +285,7 @@ func Test_Windows2025(t *testing.T) {
277285
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
278286
ValidateCiliumIsNotRunningWindows(ctx, s)
279287
ValidateDotnetNotInstalledWindows(ctx, s)
288+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
280289
},
281290
},
282291
})
@@ -300,6 +309,7 @@ func Test_Windows2025Gen2(t *testing.T) {
300309
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
301310
ValidateCiliumIsNotRunningWindows(ctx, s)
302311
ValidateDotnetNotInstalledWindows(ctx, s)
312+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
303313
},
304314
},
305315
})
@@ -329,6 +339,7 @@ func Test_Windows2022_SecureTLSBootstrapping_BootstrapToken_Fallback(t *testing.
329339
ValidateFileHasContent(ctx, s, "/k/kubeletstart.ps1", "--container-runtime=remote")
330340
ValidateCiliumIsNotRunningWindows(ctx, s)
331341
ValidateDotnetNotInstalledWindows(ctx, s)
342+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
332343
},
333344
},
334345
})
@@ -382,6 +393,7 @@ func Test_Windows2022_VHDCaching(t *testing.T) {
382393
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
383394
ValidateCiliumIsNotRunningWindows(ctx, s)
384395
ValidateDotnetNotInstalledWindows(ctx, s)
396+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
385397
},
386398
},
387399
})
@@ -406,6 +418,7 @@ func Test_Windows2022Gen2_k8s_133(t *testing.T) {
406418
ValidateFileHasContent(ctx, s, "/k/kubeletstart.ps1", "--container-runtime=remote")
407419
ValidateCiliumIsNotRunningWindows(ctx, s)
408420
ValidateDotnetNotInstalledWindows(ctx, s)
421+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
409422
},
410423
},
411424
})
@@ -427,6 +440,7 @@ func Test_Windows23H2_Cilium2(t *testing.T) {
427440
ValidateFileHasContent(ctx, s, "/k/kubeletstart.ps1", "--container-runtime=remote")
428441
ValidateWindowsProcessHasCliArguments(ctx, s, "kubelet.exe", []string{"--rotate-certificates=true", "--client-ca-file=c:\\k\\ca.crt"})
429442
ValidateCiliumIsRunningWindows(ctx, s)
443+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
430444
},
431445
},
432446
})
@@ -448,6 +462,7 @@ func Test_Windows23H2Gen2_WindowsCiliumNetworking(t *testing.T) {
448462
},
449463
Validator: func(ctx context.Context, s *Scenario) {
450464
ValidateWindowsCiliumIsRunning(ctx, s)
465+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
451466
},
452467
},
453468
})
@@ -474,6 +489,7 @@ func Test_Windows2022_McrChinaCloud_Windows(t *testing.T) {
474489
`C:\ProgramData\containerd\certs.d\mcr.azk8s.cn\hosts.toml`,
475490
`https://mcr.azk8s.cn`)
476491
ValidateDotnetNotInstalledWindows(ctx, s)
492+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
477493
},
478494
},
479495
})
@@ -508,6 +524,7 @@ func Test_Windows2025Gen2_McrChinaCloud_Windows(t *testing.T) {
508524
ValidateFileHasContent(ctx, s,
509525
`C:\ProgramData\containerd\certs.d\mcr.azk8s.cn\hosts.toml`,
510526
`https://mcr.azk8s.cn`)
527+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
511528
},
512529
},
513530
})
@@ -553,6 +570,7 @@ func Test_NetworkIsolatedCluster_Windows_WithEgress(t *testing.T) {
553570
ValidateFileExists(ctx, s, `C:\ProgramData\containerd\certs.d\mcr.microsoft.com\hosts.toml`)
554571
ValidateFileDoesNotExist(ctx, s, `C:\ProgramData\containerd\certs.d\mcr.azk8s.cn\hosts.toml`)
555572
ValidateDotnetNotInstalledWindows(ctx, s)
573+
ValidateWindowsSystemServicesRestartConfiguration(ctx, s)
556574
},
557575
},
558576
})

e2e/validators.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,55 @@ func ValidateDotnetNotInstalledWindows(ctx context.Context, s *Scenario) {
739739
".NET should not be installed on the Windows node")
740740
}
741741

742+
func ValidateWindowsSystemServiceRestartConfiguration(ctx context.Context, s *Scenario, serviceName string) {
743+
s.T.Helper()
744+
745+
command := []string{
746+
fmt.Sprintf("sc.exe qfailure %s", serviceName),
747+
}
748+
749+
execResult := execScriptOnVMForScenarioValidateExitCode(
750+
ctx,
751+
s,
752+
strings.Join(command, "\n"),
753+
0,
754+
fmt.Sprintf("failed to validate restart configuration for Windows service %s", serviceName),
755+
)
756+
757+
var RESET_PERIOD = "RESET_PERIOD"
758+
var FAILURE_ACTIONS = "FAILURE_ACTIONS"
759+
760+
fields := map[string]string{}
761+
sdtout := execResult.stdout
762+
lines := strings.Split(sdtout, "\n")
763+
for _, line := range lines {
764+
parts := strings.SplitN(line, ":", 2)
765+
if len(parts) != 2 {
766+
continue
767+
}
768+
key := strings.TrimSpace(parts[0])
769+
value := strings.TrimSpace(parts[1])
770+
if strings.Contains(key, RESET_PERIOD) {
771+
fields[RESET_PERIOD] = value
772+
}
773+
if strings.Contains(key, FAILURE_ACTIONS) {
774+
fields[FAILURE_ACTIONS] = value
775+
}
776+
}
777+
if fields[RESET_PERIOD] != "900" {
778+
s.T.Fatalf("Expected 'Reset fail counter after' to be set to 900 seconds for service %s, but got: %s", serviceName, sdtout)
779+
}
780+
if fields[FAILURE_ACTIONS] != "RESTART -- Delay = 60000 milliseconds." {
781+
s.T.Fatalf("Expected 'Failure actions' to be set to 'RESTART -- Delay = 60000 milliseconds.' for service %s, but got: %s", serviceName, sdtout)
782+
}
783+
}
784+
785+
func ValidateWindowsSystemServicesRestartConfiguration(ctx context.Context, s *Scenario) {
786+
ValidateWindowsSystemServiceRestartConfiguration(ctx, s, "kubelet")
787+
ValidateWindowsSystemServiceRestartConfiguration(ctx, s, "containerd")
788+
ValidateWindowsSystemServiceRestartConfiguration(ctx, s, "kubeproxy")
789+
}
790+
742791
func ValidateSystemdUnitIsNotFailed(ctx context.Context, s *Scenario, serviceName string) {
743792
s.T.Helper()
744793
command := []string{

parts/windows/kuberneteswindowssetup.ps1

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,6 @@ function BasePrep {
471471
Adjust-PageFileSize
472472
Logs-To-Event -TaskName "AKS.WindowsCSE.PreprovisionExtension" -TaskMessage "Start preProvisioning script"
473473
PREPROVISION_EXTENSION
474-
Update-ServiceFailureActions
475474
Adjust-DynamicPortRange
476475
Register-LogsCleanupScriptTask
477476
Register-NodeResetScriptTask
@@ -505,6 +504,7 @@ function BasePrep {
505504
# All operations that should only run when connecting to the actual cluster
506505
function NodePrep {
507506
Install-KubernetesServices -KubeDir $global:KubeDir
507+
Update-ServiceFailureActions
508508

509509
Write-Log "Starting NodePrep - Cluster integration"
510510
Logs-To-Event -TaskName "AKS.WindowsCSE.NodePrep" -TaskMessage "Starting NodePrep - Cluster integration"
@@ -598,7 +598,6 @@ function NodePrep {
598598
$timer.Stop()
599599
Write-Log -Message "We waited [$($timer.Elapsed.TotalSeconds)] seconds on NodeResetScriptTask"
600600
}
601-
602601
Write-Log "NodePrep completed successfully"
603602
Logs-To-Event -TaskName "AKS.WindowsCSE.NodePrep" -TaskMessage "NodePrep completed successfully"
604603
}

0 commit comments

Comments
 (0)