Skip to content

Commit b59c793

Browse files
committed
fix(inplaceupgrader): OCPBUGS-75887: include node name in degraded upgrade error message
Include the node name in the error message when an in-place upgrade fails, making it easier to identify which node encountered the issue.
1 parent 9b4b004 commit b59c793

2 files changed

Lines changed: 159 additions & 1 deletion

File tree

control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ func (r *Reconciler) reconcileInPlaceUpgrade(ctx context.Context, nodePoolUpgrad
220220
log.Info("Reconciled MachineSet", "result", result)
221221
}
222222

223-
return fmt.Errorf("degraded node found, cannot progress in-place upgrade. Degraded reason: %v", node.Annotations[MachineConfigDaemonMessageAnnotationKey])
223+
return fmt.Errorf("degraded node %s found, cannot progress in-place upgrade. Degraded reason: %v", node.Name, node.Annotations[MachineConfigDaemonMessageAnnotationKey])
224224
}
225225

226226
if nodeNeedsUpgrade(node, currentConfigVersionHash, targetConfigVersionHash) {

control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader_test.go

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,3 +725,161 @@ func TestReconcileInPlaceUpgradeAnnotatesMachineWithNodePoolVersion(t *testing.T
725725
g.Expect(err).ToNot(HaveOccurred())
726726
g.Expect(updatedMachine.Annotations[hyperv1.NodePoolReleaseVersionAnnotation]).To(Equal(nodePoolVersion))
727727
}
728+
729+
func TestReconcileInPlaceUpgradeDegradedNodeErrorMessage(t *testing.T) {
730+
_ = capiv1.AddToScheme(scheme.Scheme)
731+
732+
targetConfigVersion := "target-hash"
733+
currentConfigVersion := "current-hash"
734+
degradedReason := "disk validation failed: node disk usage exceeds threshold"
735+
736+
selector := map[string]string{"pool": "test"}
737+
738+
testCases := []struct {
739+
name string
740+
nodeName string
741+
mcdState string
742+
mcdMessage string
743+
expectError bool
744+
expectedErrorContainsNodeName bool
745+
expectedErrorContainsReason bool
746+
expectedAnnotationNodeName bool
747+
}{
748+
{
749+
name: "when a node is degraded it should include the node name in the error message",
750+
nodeName: "degraded-node-xyz",
751+
mcdState: MachineConfigDaemonStateDegraded,
752+
mcdMessage: degradedReason,
753+
expectError: true,
754+
expectedErrorContainsNodeName: true,
755+
expectedErrorContainsReason: true,
756+
expectedAnnotationNodeName: true,
757+
},
758+
{
759+
name: "when a node is not degraded it should not return a degraded error",
760+
nodeName: "healthy-node",
761+
mcdState: MachineConfigDaemonStateDone,
762+
mcdMessage: "",
763+
expectError: false,
764+
expectedErrorContainsNodeName: false,
765+
expectedErrorContainsReason: false,
766+
expectedAnnotationNodeName: false,
767+
},
768+
}
769+
770+
for _, tc := range testCases {
771+
t.Run(tc.name, func(t *testing.T) {
772+
g := NewWithT(t)
773+
774+
machineSet := &capiv1.MachineSet{
775+
ObjectMeta: metav1.ObjectMeta{
776+
Name: "test-ms",
777+
Namespace: "test-ns",
778+
UID: "ms-uid",
779+
Annotations: map[string]string{
780+
nodePoolAnnotationTargetConfigVersion: targetConfigVersion,
781+
nodePoolAnnotationCurrentConfigVersion: currentConfigVersion,
782+
},
783+
},
784+
Spec: capiv1.MachineSetSpec{
785+
Selector: metav1.LabelSelector{MatchLabels: selector},
786+
},
787+
}
788+
789+
machine := &capiv1.Machine{
790+
ObjectMeta: metav1.ObjectMeta{
791+
Name: "test-machine",
792+
Namespace: "test-ns",
793+
Labels: selector,
794+
OwnerReferences: []metav1.OwnerReference{
795+
{
796+
Kind: "MachineSet",
797+
Name: machineSet.Name,
798+
UID: machineSet.UID,
799+
Controller: ptr.To(true),
800+
},
801+
},
802+
},
803+
Status: capiv1.MachineStatus{
804+
NodeRef: &corev1.ObjectReference{Name: tc.nodeName},
805+
},
806+
}
807+
808+
node := &corev1.Node{
809+
ObjectMeta: metav1.ObjectMeta{
810+
Name: tc.nodeName,
811+
Annotations: map[string]string{
812+
CurrentMachineConfigAnnotationKey: currentConfigVersion,
813+
DesiredMachineConfigAnnotationKey: targetConfigVersion,
814+
MachineConfigDaemonStateAnnotationKey: tc.mcdState,
815+
MachineConfigDaemonMessageAnnotationKey: tc.mcdMessage,
816+
},
817+
},
818+
}
819+
820+
tokenSecret := &corev1.Secret{
821+
ObjectMeta: metav1.ObjectMeta{
822+
Name: "token-test",
823+
Namespace: "test-ns",
824+
},
825+
Data: map[string][]byte{
826+
TokenSecretPayloadKey: []byte("payload"),
827+
},
828+
}
829+
830+
mgmtClient := fake.NewClientBuilder().WithScheme(scheme.Scheme).
831+
WithObjects(machineSet, machine).Build()
832+
guestClient := fake.NewClientBuilder().WithScheme(scheme.Scheme).
833+
WithObjects(node).Build()
834+
835+
r := &Reconciler{
836+
client: mgmtClient,
837+
guestClusterClient: guestClient,
838+
CreateOrUpdateProvider: upsert.New(false),
839+
}
840+
841+
upgradeAPI := &nodePoolUpgradeAPI{
842+
spec: struct {
843+
targetConfigVersion string
844+
poolRef *capiv1.MachineSet
845+
}{
846+
targetConfigVersion: targetConfigVersion,
847+
poolRef: machineSet,
848+
},
849+
status: struct {
850+
currentConfigVersion string
851+
}{
852+
currentConfigVersion: currentConfigVersion,
853+
},
854+
}
855+
856+
err := r.reconcileInPlaceUpgrade(t.Context(), upgradeAPI, tokenSecret, "mco-image", "4.18.37")
857+
858+
if tc.expectError {
859+
g.Expect(err).To(HaveOccurred())
860+
861+
if tc.expectedErrorContainsNodeName {
862+
g.Expect(err.Error()).To(ContainSubstring(tc.nodeName),
863+
"error message should contain the degraded node name")
864+
}
865+
if tc.expectedErrorContainsReason {
866+
g.Expect(err.Error()).To(ContainSubstring(tc.mcdMessage),
867+
"error message should contain the MCD degraded reason")
868+
}
869+
if tc.expectedAnnotationNodeName {
870+
updatedMS := &capiv1.MachineSet{}
871+
err = mgmtClient.Get(t.Context(), client.ObjectKeyFromObject(machineSet), updatedMS)
872+
g.Expect(err).ToNot(HaveOccurred())
873+
g.Expect(updatedMS.Annotations[nodePoolAnnotationUpgradeInProgressFalse]).To(
874+
ContainSubstring(tc.nodeName),
875+
"MachineSet annotation should contain the degraded node name")
876+
_, hasTrue := updatedMS.Annotations[nodePoolAnnotationUpgradeInProgressTrue]
877+
g.Expect(hasTrue).To(BeFalse(),
878+
"upgradeInProgressTrue annotation should be deleted on degradation")
879+
}
880+
} else {
881+
g.Expect(err).ToNot(HaveOccurred())
882+
}
883+
})
884+
}
885+
}

0 commit comments

Comments
 (0)