Skip to content

Commit 2c1bafc

Browse files
author
Ranjith Rajaram
committed
Add E2E test for MaxParallelUpgrades enforcement during userData changes
Enhances the existing testUserDataTamper test to verify that MaxParallelUpgrades=1 is enforced when userData template changes trigger machine deletions (pub-key-hash mismatch deletion path). The test now: 1. Deploys parallel-upgrades-checker monitoring job before userData change 2. Verifies the checker did not fail (which would indicate >1 simultaneous upgrades) 3. Cleans up the checker job after the test This tests the specific scenario where the bug was bypassing MaxParallelUpgrades: userData template changes causing simultaneous deletion of machines across MachineSets. The implementation leverages existing infrastructure: - hack/e2e/resources/parallel-upgrade-checker-job.yaml (monitoring job) - testUserDataTamper (already triggers userData changes) - waitForNewMachineNodes (already waits for machine recreation) Helper functions added: - deployParallelUpgradesChecker() - Deploys the monitoring job - verifyParallelUpgradesChecker() - Checks for violations - cleanupParallelUpgradesChecker() - Removes the job The test will skip if fewer than 2 Machine nodes are present, as the parallel behavior cannot be verified with a single node. Signed-off-by: Ranjith Rajaram <ranjith@redhat.com>
1 parent bfa7339 commit 2c1bafc

2 files changed

Lines changed: 93 additions & 0 deletions

File tree

test/e2e/main_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,18 @@ import (
44
"context"
55
"flag"
66
"fmt"
7+
"log"
78
"os"
9+
"os/exec"
10+
"strings"
811
"testing"
912
"time"
1013

1114
config "github.com/openshift/api/config/v1"
1215
imageClient "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1"
1316
"github.com/openshift/library-go/pkg/image/imageutil"
1417
core "k8s.io/api/core/v1"
18+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1519
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
1620

1721
"github.com/openshift/windows-machine-config-operator/pkg/retry"
@@ -184,3 +188,76 @@ func TestMain(m *testing.M) {
184188

185189
os.Exit(m.Run())
186190
}
191+
192+
// deployParallelUpgradesChecker deploys the monitoring job that checks for MaxParallelUpgrades violations
193+
func (tc *testContext) deployParallelUpgradesChecker() error {
194+
// Read the job YAML template
195+
jobYAML, err := os.ReadFile("hack/e2e/resources/parallel-upgrade-checker-job.yaml")
196+
if err != nil {
197+
return fmt.Errorf("failed to read parallel upgrades checker job YAML: %w", err)
198+
}
199+
200+
// Get the tools image from the OpenShift imagestream
201+
toolsImage, err := getOpenShiftToolsImage(tc.client.Images)
202+
if err != nil {
203+
// Fall back to a default tools image if imagestream is not available
204+
log.Printf("Warning: could not get tools image from imagestream: %v. Using default.", err)
205+
toolsImage = "registry.ci.openshift.org/ocp/4.17:tools"
206+
}
207+
208+
// Replace placeholders
209+
jobYAMLStr := string(jobYAML)
210+
jobYAMLStr = strings.Replace(jobYAMLStr, "REPLACE_WITH_OPENSHIFT_TOOLS_IMAGE", toolsImage, 1)
211+
jobYAMLStr = strings.Replace(jobYAMLStr, "namespace: wmco-test",
212+
fmt.Sprintf("namespace: %s", tc.workloadNamespace), 1)
213+
214+
// Create the job using kubectl apply
215+
cmd := exec.Command("kubectl", "apply", "-f", "-")
216+
cmd.Stdin = strings.NewReader(jobYAMLStr)
217+
output, err := cmd.CombinedOutput()
218+
if err != nil {
219+
return fmt.Errorf("failed to create parallel upgrades checker job: %w\nOutput: %s", err, string(output))
220+
}
221+
222+
log.Printf("Deployed parallel upgrades checker job in namespace %s", tc.workloadNamespace)
223+
return nil
224+
}
225+
226+
// verifyParallelUpgradesChecker checks if the parallel upgrades checker job detected any violations
227+
func (tc *testContext) verifyParallelUpgradesChecker() error {
228+
// Check for failed pods
229+
failedPods, err := tc.client.K8s.CoreV1().Pods(tc.workloadNamespace).List(context.TODO(), meta.ListOptions{
230+
LabelSelector: "job-name=parallel-upgrades-checker",
231+
FieldSelector: "status.phase=Failed",
232+
})
233+
if err != nil {
234+
return fmt.Errorf("error checking parallel upgrades checker status: %w", err)
235+
}
236+
237+
// Gather logs for debugging
238+
_, err = tc.gatherPodLogs("job-name=parallel-upgrades-checker", false)
239+
if err != nil {
240+
log.Printf("warning: unable to gather parallel upgrades checker logs: %v", err)
241+
}
242+
243+
// If any pod failed, the check detected a violation
244+
if len(failedPods.Items) > 0 {
245+
return fmt.Errorf("parallel upgrades check failed: MaxParallelUpgrades was violated (%d pods failed)",
246+
len(failedPods.Items))
247+
}
248+
249+
log.Printf("Parallel upgrades checker passed: MaxParallelUpgrades=1 was enforced")
250+
return nil
251+
}
252+
253+
// cleanupParallelUpgradesChecker removes the parallel upgrades checker job
254+
func (tc *testContext) cleanupParallelUpgradesChecker() {
255+
propagationPolicy := meta.DeletePropagationBackground
256+
err := tc.client.K8s.BatchV1().Jobs(tc.workloadNamespace).Delete(context.TODO(),
257+
"parallel-upgrades-checker", meta.DeleteOptions{
258+
PropagationPolicy: &propagationPolicy,
259+
})
260+
if err != nil && !apierrors.IsNotFound(err) {
261+
log.Printf("warning: failed to cleanup parallel upgrades checker job: %v", err)
262+
}
263+
}

test/e2e/secrets_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,23 @@ func (tc *testContext) testUserData(t *testing.T) {
7878
}
7979

8080
// testUserDataTamper tests if userdata reverts to previous value if updated
81+
// AND verifies MaxParallelUpgrades=1 during userData-triggered deletions
8182
func (tc *testContext) testUserDataTamper(t *testing.T) {
8283
validUserDataSecret, err := tc.client.K8s.CoreV1().Secrets(clusterinfo.MachineAPINamespace).Get(context.TODO(),
8384
secrets.UserDataSecret, meta.GetOptions{})
8485
require.NoError(t, err, "could not find Windows userData secret in required namespace")
8586

87+
// Requires at least 2 Machine nodes to test parallel behavior
88+
require.NoError(t, tc.loadExistingNodes(), "error loading existing nodes")
89+
if len(gc.machineNodes) < 2 {
90+
t.Skip("Test requires at least 2 Machine nodes to verify MaxParallelUpgrades enforcement")
91+
}
92+
93+
// Deploy parallel upgrades checker before triggering userData change
94+
err = tc.deployParallelUpgradesChecker()
95+
require.NoError(t, err, "could not deploy parallel upgrades checker")
96+
defer tc.cleanupParallelUpgradesChecker()
97+
8698
updatedSecret := validUserDataSecret.DeepCopy()
8799
updatedSecret.Data["userData"] = []byte("invalid data")
88100
_, err = tc.client.K8s.CoreV1().Secrets(clusterinfo.MachineAPINamespace).Update(context.TODO(), updatedSecret,
@@ -93,6 +105,10 @@ func (tc *testContext) testUserDataTamper(t *testing.T) {
93105
// until the Machine is back up.
94106
assert.NoError(t, tc.waitForNewMachineNodes(), "error waiting for Machine nodes to be reconfigured")
95107
assert.NoError(t, tc.waitForValidUserData(validUserDataSecret), "error waiting for valid userdata")
108+
109+
// Verify that MaxParallelUpgrades=1 was enforced throughout the process
110+
err = tc.verifyParallelUpgradesChecker()
111+
assert.NoError(t, err, "MaxParallelUpgrades was violated during userData change")
96112
}
97113

98114
// waitForNewMachineNodes returns an error if waitForConfiguredWindowsNodes returns the same Machine backed nodes

0 commit comments

Comments
 (0)