Skip to content

Commit 4e5e616

Browse files
authored
Merge pull request #4844 from Azure/bragazzi/ARO-26990-isclusterupgrading-false-positive
ARO-26990: eliminate IsClusterUpgrading() false positive during non-upgrade MCO rollouts
2 parents 4f7815e + 7a3a53a commit 4e5e616

7 files changed

Lines changed: 252 additions & 15 deletions

File tree

pkg/operator/controllers/dnsmasq/cluster_controller_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,16 @@ func TestClusterReconciler(t *testing.T) {
278278
},
279279
Spec: configv1.ClusterVersionSpec{},
280280
Status: configv1.ClusterVersionStatus{
281+
History: []configv1.UpdateHistory{
282+
{
283+
State: configv1.PartialUpdate,
284+
Version: "4.19.0",
285+
},
286+
{
287+
State: configv1.CompletedUpdate,
288+
Version: "4.18.30",
289+
},
290+
},
281291
Conditions: []configv1.ClusterOperatorStatusCondition{
282292
{
283293
Type: configv1.OperatorProgressing,
@@ -320,6 +330,16 @@ func TestClusterReconciler(t *testing.T) {
320330
},
321331
Spec: configv1.ClusterVersionSpec{},
322332
Status: configv1.ClusterVersionStatus{
333+
History: []configv1.UpdateHistory{
334+
{
335+
State: configv1.PartialUpdate,
336+
Version: "4.19.0",
337+
},
338+
{
339+
State: configv1.CompletedUpdate,
340+
Version: "4.18.30",
341+
},
342+
},
323343
Conditions: []configv1.ClusterOperatorStatusCondition{
324344
{
325345
Type: configv1.OperatorProgressing,

pkg/operator/controllers/dnsmasq/machineconfig_controller_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,16 @@ func TestMachineConfigReconcilerClusterUpgrading(t *testing.T) {
494494
},
495495
Spec: configv1.ClusterVersionSpec{},
496496
Status: configv1.ClusterVersionStatus{
497+
History: []configv1.UpdateHistory{
498+
{
499+
State: configv1.PartialUpdate,
500+
Version: "4.19.0",
501+
},
502+
{
503+
State: configv1.CompletedUpdate,
504+
Version: "4.18.30",
505+
},
506+
},
497507
Conditions: []configv1.ClusterOperatorStatusCondition{
498508
{
499509
Type: configv1.OperatorProgressing,

pkg/operator/controllers/dnsmasq/machineconfigpool_controller_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,16 @@ func TestMachineConfigPoolReconcilerClusterUpgrading(t *testing.T) {
516516
},
517517
Spec: configv1.ClusterVersionSpec{},
518518
Status: configv1.ClusterVersionStatus{
519+
History: []configv1.UpdateHistory{
520+
{
521+
State: configv1.PartialUpdate,
522+
Version: "4.19.0",
523+
},
524+
{
525+
State: configv1.CompletedUpdate,
526+
Version: "4.18.30",
527+
},
528+
},
519529
Conditions: []configv1.ClusterOperatorStatusCondition{
520530
{
521531
Type: configv1.OperatorProgressing,

pkg/operator/controllers/etchosts/cluster_controller_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,16 @@ var (
138138
},
139139
Spec: configv1.ClusterVersionSpec{},
140140
Status: configv1.ClusterVersionStatus{
141+
History: []configv1.UpdateHistory{
142+
{
143+
State: configv1.PartialUpdate,
144+
Version: "4.19.0",
145+
},
146+
{
147+
State: configv1.CompletedUpdate,
148+
Version: "4.10.11",
149+
},
150+
},
141151
Conditions: []configv1.ClusterOperatorStatusCondition{
142152
{
143153
Type: configv1.OperatorProgressing,

pkg/operator/controllers/machinehealthcheck/machinehealthcheck_controller_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ func TestMachineHealthCheckReconciler(t *testing.T) {
4444
Name: "version",
4545
},
4646
Status: configv1.ClusterVersionStatus{
47+
History: []configv1.UpdateHistory{
48+
{
49+
State: configv1.CompletedUpdate,
50+
Version: "4.18.30",
51+
},
52+
},
4753
Conditions: []configv1.ClusterOperatorStatusCondition{
4854
{
4955
Type: configv1.OperatorProgressing,
@@ -53,6 +59,16 @@ func TestMachineHealthCheckReconciler(t *testing.T) {
5359
},
5460
}
5561
clusterversionUpgrading := clusterversionDefault.DeepCopy()
62+
clusterversionUpgrading.Status.History = []configv1.UpdateHistory{
63+
{
64+
State: configv1.PartialUpdate,
65+
Version: "4.19.0",
66+
},
67+
{
68+
State: configv1.CompletedUpdate,
69+
Version: "4.18.30",
70+
},
71+
}
5672
clusterversionUpgrading.Status.Conditions = []configv1.ClusterOperatorStatusCondition{
5773
{
5874
Type: configv1.OperatorProgressing,

pkg/util/version/clusterversion.go

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,16 @@ func ClusterVersionLessThan(ctx context.Context, configcli configclient.Interfac
5353
return clusterVersion.Lt(checkVersion), clusterVersion, nil
5454
}
5555

56+
// IsClusterUpgrading returns true when the CVO has initiated an OCP version upgrade.
57+
// It checks status.history[0].state rather than the Progressing condition because
58+
// Progressing=True is also set during node rollouts that are not real CVO-driven
59+
// OCP upgrades. Callers use this helper to gate reboot-causing reconciliation so
60+
// it only proceeds during actual cluster upgrades. See ARO-26990 for details.
5661
func IsClusterUpgrading(cv *configv1.ClusterVersion) bool {
57-
var isUpgrading bool
58-
if c := findClusterOperatorStatusCondition(cv.Status.Conditions, configv1.OperatorProgressing); c != nil && c.Status == configv1.ConditionTrue {
59-
isUpgrading = true
60-
} else {
61-
isUpgrading = false
62+
if cv == nil || len(cv.Status.History) == 0 {
63+
return false
6264
}
63-
return isUpgrading
64-
}
65-
66-
func findClusterOperatorStatusCondition(conditions []configv1.ClusterOperatorStatusCondition, name configv1.ClusterStatusConditionType) *configv1.ClusterOperatorStatusCondition {
67-
for i := range conditions {
68-
if conditions[i].Type == name {
69-
return &conditions[i]
70-
}
71-
}
72-
return nil
65+
// Return true only when the CVO has actually initiated an upgrade
66+
// (a new non-Completed entry exists at history[0])
67+
return cv.Status.History[0].State != configv1.CompletedUpdate
7368
}

pkg/util/version/clusterversion_test.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,179 @@ func TestGetClusterVersion(t *testing.T) {
111111
})
112112
}
113113
}
114+
115+
func TestIsClusterUpgrading(t *testing.T) {
116+
for _, tt := range []struct {
117+
name string
118+
mocks func(*configv1.ClusterVersion)
119+
nilCV bool
120+
want bool
121+
comment string
122+
}{
123+
{
124+
name: "nil ClusterVersion returns false",
125+
nilCV: true,
126+
want: false,
127+
comment: "Safety check for nil input",
128+
},
129+
{
130+
name: "empty history returns false",
131+
mocks: nil,
132+
want: false,
133+
comment: "Fresh cluster or no upgrade history",
134+
},
135+
{
136+
name: "steady state - Completed update, Progressing=False returns false",
137+
mocks: func(cv *configv1.ClusterVersion) {
138+
cv.Status.History = []configv1.UpdateHistory{
139+
{
140+
State: configv1.CompletedUpdate,
141+
Version: "4.18.30",
142+
},
143+
}
144+
cv.Status.Conditions = []configv1.ClusterOperatorStatusCondition{
145+
{
146+
Type: configv1.OperatorProgressing,
147+
Status: configv1.ConditionFalse,
148+
},
149+
}
150+
},
151+
want: false,
152+
comment: "Normal steady state cluster",
153+
},
154+
{
155+
name: "ARO-26990 bug case - Completed update but Progressing=True returns false",
156+
mocks: func(cv *configv1.ClusterVersion) {
157+
cv.Status.History = []configv1.UpdateHistory{
158+
{
159+
State: configv1.CompletedUpdate,
160+
Version: "4.18.30",
161+
},
162+
}
163+
cv.Status.Conditions = []configv1.ClusterOperatorStatusCondition{
164+
{
165+
Type: configv1.OperatorProgressing,
166+
Status: configv1.ConditionTrue,
167+
Reason: "ClusterOperatorProgressing",
168+
Message: "Working towards 4.18.30: some cluster operators are still updating",
169+
},
170+
}
171+
},
172+
want: false,
173+
comment: "MCO rollout from ARO MC deploy - should NOT trigger dnsmasq updates",
174+
},
175+
{
176+
name: "upgrade intent not initiated - desiredUpdate set but history[0] still Completed returns false",
177+
mocks: func(cv *configv1.ClusterVersion) {
178+
cv.Spec.DesiredUpdate = &configv1.Update{
179+
Version: "4.19.0",
180+
}
181+
cv.Status.History = []configv1.UpdateHistory{
182+
{
183+
State: configv1.CompletedUpdate,
184+
Version: "4.18.30",
185+
},
186+
}
187+
cv.Status.Conditions = []configv1.ClusterOperatorStatusCondition{
188+
{
189+
Type: configv1.OperatorProgressing,
190+
Status: configv1.ConditionFalse,
191+
},
192+
}
193+
},
194+
want: false,
195+
comment: "Customer expressed upgrade intent but CVO hasn't started (e.g., pending adminack)",
196+
},
197+
{
198+
name: "upgrade initiated - Partial update at history[0] returns true",
199+
mocks: func(cv *configv1.ClusterVersion) {
200+
cv.Spec.DesiredUpdate = &configv1.Update{
201+
Version: "4.19.0",
202+
}
203+
cv.Status.History = []configv1.UpdateHistory{
204+
{
205+
State: configv1.PartialUpdate,
206+
Version: "4.19.0",
207+
},
208+
{
209+
State: configv1.CompletedUpdate,
210+
Version: "4.18.30",
211+
},
212+
}
213+
cv.Status.Conditions = []configv1.ClusterOperatorStatusCondition{
214+
{
215+
Type: configv1.OperatorProgressing,
216+
Status: configv1.ConditionTrue,
217+
Reason: "ClusterOperatorProgressing",
218+
Message: "Working towards 4.19.0",
219+
},
220+
}
221+
},
222+
want: true,
223+
comment: "OCP upgrade in progress - dnsmasq updates allowed",
224+
},
225+
{
226+
name: "upgrade initiated - no State field (treated as non-Completed) returns true",
227+
mocks: func(cv *configv1.ClusterVersion) {
228+
cv.Status.History = []configv1.UpdateHistory{
229+
{
230+
Version: "4.19.0",
231+
// State field missing - older clusters or edge cases
232+
},
233+
{
234+
State: configv1.CompletedUpdate,
235+
Version: "4.18.30",
236+
},
237+
}
238+
},
239+
want: true,
240+
comment: "Missing State field treated as upgrade in progress",
241+
},
242+
{
243+
name: "multiple completed updates returns false",
244+
mocks: func(cv *configv1.ClusterVersion) {
245+
cv.Status.History = []configv1.UpdateHistory{
246+
{
247+
State: configv1.CompletedUpdate,
248+
Version: "4.18.30",
249+
},
250+
{
251+
State: configv1.CompletedUpdate,
252+
Version: "4.18.0",
253+
},
254+
{
255+
State: configv1.CompletedUpdate,
256+
Version: "4.17.38",
257+
},
258+
}
259+
},
260+
want: false,
261+
comment: "Cluster with upgrade history, currently at steady state",
262+
},
263+
} {
264+
t.Run(tt.name, func(t *testing.T) {
265+
var cv *configv1.ClusterVersion
266+
267+
if !tt.nilCV {
268+
cv = &configv1.ClusterVersion{
269+
ObjectMeta: metav1.ObjectMeta{
270+
Name: "version",
271+
},
272+
Status: configv1.ClusterVersionStatus{
273+
History: []configv1.UpdateHistory{},
274+
Conditions: []configv1.ClusterOperatorStatusCondition{},
275+
},
276+
}
277+
278+
if tt.mocks != nil {
279+
tt.mocks(cv)
280+
}
281+
}
282+
283+
got := IsClusterUpgrading(cv)
284+
if got != tt.want {
285+
t.Errorf("IsClusterUpgrading() = %v, want %v (%s)", got, tt.want, tt.comment)
286+
}
287+
})
288+
}
289+
}

0 commit comments

Comments
 (0)