Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
return ctrl.Result{}, fmt.Errorf("token secret %s/%s is missing %q key", tokenSecret.Namespace, tokenSecret.Name, TokenSecretReleaseVersionKey)
}

return ctrl.Result{}, r.reconcileInPlaceUpgrade(ctx, nodePoolUpgradeAPI, tokenSecret, mcoImage, releaseVersion)
if err := r.reconcileInPlaceUpgrade(ctx, nodePoolUpgradeAPI, tokenSecret, mcoImage, releaseVersion); err != nil {
return ctrl.Result{}, err
}
// Requeue periodically while an upgrade is in progress. The controller only
// watches Nodes and MachineSets, so if an upgrade pod is force-deleted the
// deletion event is missed and the replacement pod is never created. A
// periodic recheck closes that gap.
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

type nodePoolUpgradeAPI struct {
Expand Down Expand Up @@ -277,7 +284,7 @@ func (r *Reconciler) reconcileInPlaceUpgrade(ctx context.Context, nodePoolUpgrad

err = r.reconcileUpgradePods(ctx, r.guestClusterClient, nodes, nodePoolUpgradeAPI.spec.poolRef.GetName(), mcoImage, nodePoolUpgradeAPI.proxy)
if err != nil {
return fmt.Errorf("failed to delete idle upgrade pods: %w", err)
return fmt.Errorf("failed to reconcile upgrade pods: %w", err)
}
return nil
}
Expand Down Expand Up @@ -312,7 +319,8 @@ func (r *Reconciler) reconcileUpgradePods(ctx context.Context, hostedClusterClie
pod := inPlaceUpgradePod(namespace.Name, node.Name)

if node.Annotations[CurrentMachineConfigAnnotationKey] == node.Annotations[DesiredMachineConfigAnnotationKey] &&
node.Annotations[DesiredDrainerAnnotationKey] == node.Annotations[LastAppliedDrainerAnnotationKey] {
node.Annotations[DesiredDrainerAnnotationKey] == node.Annotations[LastAppliedDrainerAnnotationKey] &&
node.Annotations[MachineConfigDaemonStateAnnotationKey] == MachineConfigDaemonStateDone {
// the node is updated and does not require a MCD running
if err := hostedClusterClient.Get(ctx, client.ObjectKeyFromObject(pod), pod); err != nil {
if apierrors.IsNotFound(err) {
Expand Down Expand Up @@ -349,6 +357,18 @@ func (r *Reconciler) reconcileUpgradePods(ctx context.Context, hostedClusterClie
} else {
log.Info("create upgrade pod", "result", result)
}
} else if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
if pod.DeletionTimestamp != nil {
continue
}
log.Info("Detected terminated upgrade pod on node that still needs upgrade, deleting for retry",
"node", node.Name, "podPhase", pod.Status.Phase)
if err := hostedClusterClient.Delete(ctx, pod); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return fmt.Errorf("error deleting terminated upgrade MCD pod for node %s: %w", node.Name, err)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package inplaceupgrader

import (
"testing"
"time"

. "github.com/onsi/gomega"

Expand All @@ -11,6 +12,7 @@ import (
configv1 "github.com/openshift/api/config/v1"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/utils/ptr"
Expand Down Expand Up @@ -590,6 +592,220 @@ func TestCreateUpgradePod(t *testing.T) {
}
}

func TestReconcileUpgradePods(t *testing.T) {
_ = capiv1.AddToScheme(scheme.Scheme)

poolName := "test-pool"
mcoImage := "mco-image:latest"
namespace := inPlaceUpgradeNamespace(poolName)
targetConfig := "target-hash"

testCases := []struct {
name string
node *corev1.Node
existingPod *corev1.Pod
expectPodDeleted bool
expectPodCreated bool
expectPodRetained bool
expectPodSkipped bool
}{
{
name: "When MCD pod is in Succeeded phase on a node needing upgrade it should delete the terminated pod",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Annotations: map[string]string{
CurrentMachineConfigAnnotationKey: "old-hash",
DesiredMachineConfigAnnotationKey: targetConfig,
},
},
},
existingPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace.Name,
Name: "machine-config-daemon-node1",
},
Status: corev1.PodStatus{
Phase: corev1.PodSucceeded,
},
},
expectPodDeleted: true,
},
{
name: "When MCD pod is in Failed phase on a node needing upgrade it should delete the terminated pod",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Annotations: map[string]string{
CurrentMachineConfigAnnotationKey: "old-hash",
DesiredMachineConfigAnnotationKey: targetConfig,
},
},
},
existingPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace.Name,
Name: "machine-config-daemon-node1",
},
Status: corev1.PodStatus{
Phase: corev1.PodFailed,
},
},
expectPodDeleted: true,
},
{
name: "When MCD pod is in Running phase on a node needing upgrade it should leave the pod alone",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Annotations: map[string]string{
CurrentMachineConfigAnnotationKey: "old-hash",
DesiredMachineConfigAnnotationKey: targetConfig,
},
},
},
existingPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace.Name,
Name: "machine-config-daemon-node1",
},
Status: corev1.PodStatus{
Phase: corev1.PodRunning,
},
},
expectPodRetained: true,
},
{
name: "When no MCD pod exists on a node needing upgrade it should create a new pod",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Annotations: map[string]string{
CurrentMachineConfigAnnotationKey: "old-hash",
DesiredMachineConfigAnnotationKey: targetConfig,
},
},
},
existingPod: nil,
expectPodCreated: true,
},
{
name: "When terminated MCD pod is already being deleted it should skip without error",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Annotations: map[string]string{
CurrentMachineConfigAnnotationKey: "old-hash",
DesiredMachineConfigAnnotationKey: targetConfig,
},
},
},
existingPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace.Name,
Name: "machine-config-daemon-node1",
DeletionTimestamp: &metav1.Time{Time: time.Now()},
Finalizers: []string{"test-finalizer"},
},
Status: corev1.PodStatus{
Phase: corev1.PodFailed,
},
},
expectPodSkipped: true,
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
{
name: "When MCD pod is in Succeeded phase on a fully updated node it should delete the idle pod",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Annotations: map[string]string{
CurrentMachineConfigAnnotationKey: targetConfig,
DesiredMachineConfigAnnotationKey: targetConfig,
DesiredDrainerAnnotationKey: "uncordon-xxx",
LastAppliedDrainerAnnotationKey: "uncordon-xxx",
MachineConfigDaemonStateAnnotationKey: MachineConfigDaemonStateDone,
},
},
},
existingPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace.Name,
Name: "machine-config-daemon-node1",
},
Status: corev1.PodStatus{
Phase: corev1.PodSucceeded,
},
},
expectPodDeleted: true,
},
{
name: "When node config matches but MCD state is not Done it should delete the terminated pod for retry",
node: &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node1",
Annotations: map[string]string{
CurrentMachineConfigAnnotationKey: targetConfig,
DesiredMachineConfigAnnotationKey: targetConfig,
DesiredDrainerAnnotationKey: "uncordon-xxx",
LastAppliedDrainerAnnotationKey: "uncordon-xxx",
MachineConfigDaemonStateAnnotationKey: "Working",
},
},
},
existingPod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace.Name,
Name: "machine-config-daemon-node1",
},
Status: corev1.PodStatus{
Phase: corev1.PodFailed,
},
},
expectPodDeleted: true,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)

var guestObjects []client.Object
if tc.existingPod != nil {
guestObjects = append(guestObjects, tc.existingPod)
}
guestClient := fake.NewClientBuilder().WithScheme(scheme.Scheme).
WithObjects(guestObjects...).Build()

r := &Reconciler{
CreateOrUpdateProvider: upsert.New(false),
}

err := r.reconcileUpgradePods(t.Context(), guestClient, []*corev1.Node{tc.node}, poolName, mcoImage, nil)
g.Expect(err).ToNot(HaveOccurred())

pod := inPlaceUpgradePod(namespace.Name, tc.node.Name)
getErr := guestClient.Get(t.Context(), client.ObjectKeyFromObject(pod), pod)

if tc.expectPodDeleted {
g.Expect(apierrors.IsNotFound(getErr)).To(BeTrue(), "expected pod to be deleted, got: %v", getErr)
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
if tc.expectPodCreated {
g.Expect(getErr).ToNot(HaveOccurred(), "expected pod to be created")
g.Expect(pod.Spec.Containers).To(HaveLen(1))
g.Expect(pod.Spec.Containers[0].Image).To(Equal(mcoImage))
}
if tc.expectPodRetained {
g.Expect(getErr).ToNot(HaveOccurred(), "expected pod to be retained")
g.Expect(pod.Status.Phase).To(Equal(corev1.PodRunning))
}
if tc.expectPodSkipped {
g.Expect(getErr).ToNot(HaveOccurred(), "expected pod to still exist (skip due to DeletionTimestamp)")
g.Expect(pod.DeletionTimestamp).ToNot(BeNil(), "expected DeletionTimestamp to remain from original object")
}
})
}
}

func TestReconcileInPlaceUpgradeAnnotatesMachineWithNodePoolVersion(t *testing.T) {
g := NewWithT(t)
_ = capiv1.AddToScheme(scheme.Scheme)
Expand Down