diff --git a/pkg/apps/deployment/progressing.go b/pkg/apps/deployment/progressing.go new file mode 100644 index 0000000000..027c4cf1c5 --- /dev/null +++ b/pkg/apps/deployment/progressing.go @@ -0,0 +1,65 @@ +package deployment + +import ( + "fmt" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + + operatorv1 "github.com/openshift/api/operator/v1" +) + +// DeploymentProgressingCondition computes an operator Progressing condition from +// the deployment's own status conditions and replica counts. +func DeploymentProgressingCondition(deployment *appsv1.Deployment) operatorv1.OperatorCondition { + desiredReplicas := ptr.Deref(deployment.Spec.Replicas, 1) + timedOutMessage, timedOut := HasDeploymentTimedOutProgressing(deployment.Status) + + switch { + case !HasDeploymentProgressed(deployment.Status) && !timedOut: + return operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest revision and %d/%d pods are available", deployment.Name, deployment.Namespace, deployment.Status.UpdatedReplicas, desiredReplicas, deployment.Status.AvailableReplicas, desiredReplicas), + } + + case timedOut: + return operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: fmt.Sprintf("deployment/%s.%s has timed out progressing: %s", deployment.Name, deployment.Namespace, timedOutMessage), + } + + default: + return operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + } + } +} + +// HasDeploymentProgressed returns true if the deployment reports NewReplicaSetAvailable +// via the DeploymentProgressing condition. +func HasDeploymentProgressed(status appsv1.DeploymentStatus) bool { + for _, cond := range status.Conditions { + if cond.Type == appsv1.DeploymentProgressing { + return cond.Status == corev1.ConditionTrue && cond.Reason == "NewReplicaSetAvailable" + } + } + return false +} + +// HasDeploymentTimedOutProgressing returns true if the deployment reports ProgressDeadlineExceeded. +// The function returns the Progressing condition message as the first return value. +func HasDeploymentTimedOutProgressing(status appsv1.DeploymentStatus) (string, bool) { + for _, cond := range status.Conditions { + if cond.Type == appsv1.DeploymentProgressing { + return cond.Message, cond.Status == corev1.ConditionFalse && cond.Reason == "ProgressDeadlineExceeded" + } + } + return "", false +} diff --git a/pkg/apps/deployment/progressing_test.go b/pkg/apps/deployment/progressing_test.go new file mode 100644 index 0000000000..f118dcf5b3 --- /dev/null +++ b/pkg/apps/deployment/progressing_test.go @@ -0,0 +1,225 @@ +package deployment + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + operatorv1 "github.com/openshift/api/operator/v1" +) + +func TestDeploymentProgressingCondition(t *testing.T) { + tests := []struct { + name string + deploy *appsv1.Deployment + expected operatorv1.OperatorCondition + }{ + { + name: "active rollout, not all pods updated", + deploy: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "ns"}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](3)}, + Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 1, + AvailableReplicas: 2, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "ReplicaSetUpdated"}, + }, + }, + }, + expected: operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/web.ns: 1/3 pods have been updated to the latest revision and 2/3 pods are available", + }, + }, + { + name: "rollout complete", + deploy: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "ns"}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](3)}, + Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 3, + AvailableReplicas: 3, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + expected: operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + }, + { + name: "progress deadline exceeded", + deploy: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "ns"}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](3)}, + Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 1, + AvailableReplicas: 2, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionFalse, Reason: "ProgressDeadlineExceeded", Message: "timed out waiting"}, + }, + }, + }, + expected: operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/web.ns has timed out progressing: timed out waiting", + }, + }, + { + name: "no progressing condition on deployment", + deploy: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "ns"}, + Spec: appsv1.DeploymentSpec{Replicas: ptr.To[int32](1)}, + Status: appsv1.DeploymentStatus{}, + }, + expected: operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/web.ns: 0/1 pods have been updated to the latest revision and 0/1 pods are available", + }, + }, + { + name: "nil replicas defaults to 1", + deploy: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Name: "web", Namespace: "ns"}, + Spec: appsv1.DeploymentSpec{}, + Status: appsv1.DeploymentStatus{ + UpdatedReplicas: 1, + AvailableReplicas: 1, + Conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + }, + }, + expected: operatorv1.OperatorCondition{ + Type: operatorv1.OperatorStatusTypeProgressing, + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := DeploymentProgressingCondition(tt.deploy) + if d := cmp.Diff(tt.expected, got); d != "" { + t.Errorf("unexpected condition (-want +got):\n%s", d) + } + }) + } +} + +func TestHasDeploymentProgressed(t *testing.T) { + tests := []struct { + name string + conditions []appsv1.DeploymentCondition + want bool + }{ + { + name: "NewReplicaSetAvailable and True", + conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "NewReplicaSetAvailable"}, + }, + want: true, + }, + { + name: "ReplicaSetUpdated and True", + conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "ReplicaSetUpdated"}, + }, + want: false, + }, + { + name: "NewReplicaSetAvailable but False", + conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionFalse, Reason: "NewReplicaSetAvailable"}, + }, + want: false, + }, + { + name: "no conditions", + conditions: nil, + want: false, + }, + { + name: "unrelated condition only", + conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentAvailable, Status: corev1.ConditionTrue}, + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := HasDeploymentProgressed(appsv1.DeploymentStatus{Conditions: tt.conditions}) + if got != tt.want { + t.Errorf("got %v, want %v", got, tt.want) + } + }) + } +} + +func TestHasDeploymentTimedOutProgressing(t *testing.T) { + tests := []struct { + name string + conditions []appsv1.DeploymentCondition + wantMessage string + wantTimedOut bool + }{ + { + name: "ProgressDeadlineExceeded", + conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionFalse, Reason: "ProgressDeadlineExceeded", Message: "timed out"}, + }, + wantMessage: "timed out", + wantTimedOut: true, + }, + { + name: "ProgressDeadlineExceeded but True status", + conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "ProgressDeadlineExceeded", Message: "timed out"}, + }, + wantMessage: "timed out", + wantTimedOut: false, + }, + { + name: "normal progressing", + conditions: []appsv1.DeploymentCondition{ + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, Reason: "ReplicaSetUpdated", Message: "progressing"}, + }, + wantMessage: "progressing", + wantTimedOut: false, + }, + { + name: "no conditions", + conditions: nil, + wantMessage: "", + wantTimedOut: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotMsg, gotTimedOut := HasDeploymentTimedOutProgressing(appsv1.DeploymentStatus{Conditions: tt.conditions}) + if gotMsg != tt.wantMessage { + t.Errorf("message: got %q, want %q", gotMsg, tt.wantMessage) + } + if gotTimedOut != tt.wantTimedOut { + t.Errorf("timedOut: got %v, want %v", gotTimedOut, tt.wantTimedOut) + } + }) + } +} diff --git a/pkg/operator/apiserver/controller/workload/workload.go b/pkg/operator/apiserver/controller/workload/workload.go index 51f38dc97d..42f3b5c349 100644 --- a/pkg/operator/apiserver/controller/workload/workload.go +++ b/pkg/operator/apiserver/controller/workload/workload.go @@ -301,34 +301,16 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o desiredReplicas := ptr.Deref(workload.Spec.Replicas, 1) - // If the workload is up to date, then we are no longer progressing - workloadAtHighestGeneration := workload.ObjectMeta.Generation == workload.Status.ObservedGeneration - // Update is done when all pods have been updated to the latest revision - // and the deployment controller has reported NewReplicaSetAvailable - workloadIsBeingUpdated := !workloadAtHighestGeneration || !hasDeploymentProgressed(workload.Status) - workloadIsBeingUpdatedTooLong := v1helpers.IsUpdatingTooLong(previousStatus, *deploymentProgressingCondition.Type) - if !workloadAtHighestGeneration { - deploymentProgressingCondition = deploymentProgressingCondition. - WithStatus(operatorv1.ConditionTrue). - WithReason("NewGeneration"). - WithMessage(fmt.Sprintf("deployment/%s.%s: observed generation is %d, desired generation is %d.", workload.Name, c.targetNamespace, workload.Status.ObservedGeneration, workload.ObjectMeta.Generation)) - } else if workloadIsBeingUpdated { - deploymentProgressingCondition = deploymentProgressingCondition. - WithStatus(operatorv1.ConditionTrue). - WithReason("PodsUpdating"). - WithMessage(fmt.Sprintf("deployment/%s.%s: %d/%d pods have been updated to the latest generation and %d/%d pods are available", workload.Name, c.targetNamespace, workload.Status.UpdatedReplicas, desiredReplicas, workload.Status.AvailableReplicas, desiredReplicas)) - } else { - // Terminating pods don't account for any of the other status fields but - // still can exist in a state when they are accepting connections and would - // contribute to unexpected behavior when we report Progressing=False. - // The case of too many pods might occur for example if `TerminationGracePeriodSeconds` is set - // - // The workload should ensure this does not happen by using for example EnsureAtMostOnePodPerNode - // so that the old pods terminate before the new ones are started. - deploymentProgressingCondition = deploymentProgressingCondition. - WithStatus(operatorv1.ConditionFalse). - WithReason("AsExpected") - } + // Update is done when the deployment controller has reported NewReplicaSetAvailable. + // Checking the current vs. observed generation here is not possible since we don't want to be Progressing on scaling. + progressingCond := deployment.DeploymentProgressingCondition(workload) + _, workloadIsBeingUpdatedTooLong := deployment.HasDeploymentTimedOutProgressing(workload.Status) + workloadIsBeingUpdated := !deployment.HasDeploymentProgressed(workload.Status) && !workloadIsBeingUpdatedTooLong + + deploymentProgressingCondition = deploymentProgressingCondition. + WithStatus(progressingCond.Status). + WithReason(progressingCond.Reason). + WithMessage(progressingCond.Message) // During a rollout the default maxSurge (25%) will allow the available // replicas to temporarily exceed the desired replica count. If this were @@ -354,6 +336,7 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o // if the deployment is all available and at the expected generation, then update the version to the latest // when we update, the image pull spec should immediately be different, which should immediately cause a deployment rollout // which should immediately result in a deployment generation diff, which should cause this block to be skipped until it is ready. + workloadAtHighestGeneration := workload.ObjectMeta.Generation == workload.Status.ObservedGeneration workloadHasAllPodsUpdated := workload.Status.UpdatedReplicas == desiredReplicas if workloadAtHighestGeneration && workloadHasAllPodsAvailable && workloadHasAllPodsUpdated && operatorConfigAtHighestGeneration { c.versionRecorder.SetVersion(c.constructOperandNameFor(workload.Name), c.targetOperandVersion) @@ -373,17 +356,6 @@ func (c *Controller) constructOperandNameFor(name string) string { return name } -// hasDeploymentProgressed returns true if the deployment reports NewReplicaSetAvailable -// via the DeploymentProgressing condition -func hasDeploymentProgressed(status appsv1.DeploymentStatus) bool { - for _, cond := range status.Conditions { - if cond.Type == appsv1.DeploymentProgressing { - return cond.Status == corev1.ConditionTrue && cond.Reason == "NewReplicaSetAvailable" - } - } - return false -} - // EnsureAtMostOnePodPerNode updates the deployment spec to prevent more than // one pod of a given replicaset from landing on a node. It accomplishes this // by adding a label on the template and updates the pod anti-affinity term to include that label. diff --git a/pkg/operator/apiserver/controller/workload/workload_test.go b/pkg/operator/apiserver/controller/workload/workload_test.go index 71abce9055..f91a3cf6fd 100644 --- a/pkg/operator/apiserver/controller/workload/workload_test.go +++ b/pkg/operator/apiserver/controller/workload/workload_test.go @@ -61,7 +61,6 @@ func TestUpdateOperatorStatus(t *testing.T) { operatorPreconditionsNotReady bool preconditionError error errors []error - previousConditions []operatorv1.OperatorCondition validateOperatorStatus func(*operatorv1.OperatorStatus) error }{ @@ -133,7 +132,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: we have an unavailable workload being updated for too long and no errors thus we are degraded", + name: "scenario: unavailable workload with progress deadline exceeded", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -169,14 +168,6 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, }, - previousConditions: []operatorv1.OperatorCondition{ - { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "PodsUpdating", - LastTransitionTime: metav1.NewTime(time.Now().Add(-16 * time.Minute)), - }, - }, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ { @@ -197,16 +188,16 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest generation and 0/3 pods are available", + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: timed out", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "scenario: we have an unavailable workload being updated for a short time and no errors so we are progressing", + name: "scenario: unavailable workload progressing normally", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -233,8 +224,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Ready: false, State: corev1.ContainerState{ Waiting: &corev1.ContainerStateWaiting{ - Reason: "ImagePull", - Message: "slow registry", + Reason: "ContainerCreating", }, }, }, @@ -242,14 +232,6 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, }, - previousConditions: []operatorv1.OperatorCondition{ - { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "PodsUpdating", - LastTransitionTime: metav1.NewTime(time.Now().Add(-4 * time.Minute)), - }, - }, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { expectedConditions := []operatorv1.OperatorCondition{ { @@ -271,7 +253,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), Status: operatorv1.ConditionTrue, Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest generation and 0/3 pods are available", + Message: "deployment/apiserver.openshift-apiserver: 0/3 pods have been updated to the latest revision and 0/3 pods are available", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) @@ -344,7 +326,7 @@ func TestUpdateOperatorStatus(t *testing.T) { }, }, { - name: "scenario: we have a complete workload and no errors thus we are available", + name: "scenario: zero available after successful rollout", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -354,7 +336,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 3, + AvailableReplicas: 0, UpdatedReplicas: 3, Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, @@ -365,9 +347,9 @@ func TestUpdateOperatorStatus(t *testing.T) { expectedConditions := []operatorv1.OperatorCondition{ { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), - Status: operatorv1.ConditionTrue, - Reason: "AsExpected", - Message: "", + Status: operatorv1.ConditionFalse, + Reason: "NoPod", + Message: "no apiserver.openshift-apiserver pods available on any node.", }, { Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), @@ -375,37 +357,32 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - Message: "", + Status: operatorv1.ConditionTrue, + Reason: "UnavailablePod", + Message: `3 of 3 requested instances are unavailable for apiserver.openshift-apiserver (no pods found with labels "")`, }, { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - Message: "", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "scenario: we have an outdated (generation) workload and no errors thus we are available and we are progressing", + name: "scenario: we have a complete workload and no errors thus we are available", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "apiserver", - Namespace: "openshift-apiserver", - Generation: 100, + Name: "apiserver", + Namespace: "openshift-apiserver", }, Spec: appsv1.DeploymentSpec{ Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - Replicas: 3, - ReadyReplicas: 3, - AvailableReplicas: 3, - UpdatedReplicas: 3, - ObservedGeneration: 99, + AvailableReplicas: 3, + UpdatedReplicas: 3, Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, }, @@ -431,17 +408,16 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "NewGeneration", - Message: "deployment/apiserver.openshift-apiserver: observed generation is 99, desired generation is 100.", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) }, }, - { - name: "scenario: rare case when we have an outdated (generation) workload and one old replica failing is but it will be picked up soon by the new rollout thus we are available and we are progressing", + name: "scenario: generation mismatch does not trigger progressing", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -453,8 +429,8 @@ func TestUpdateOperatorStatus(t *testing.T) { }, Status: appsv1.DeploymentStatus{ Replicas: 3, - ReadyReplicas: 2, - AvailableReplicas: 2, + ReadyReplicas: 3, + AvailableReplicas: 3, UpdatedReplicas: 3, ObservedGeneration: 99, Conditions: []appsv1.DeploymentCondition{ @@ -482,9 +458,9 @@ func TestUpdateOperatorStatus(t *testing.T) { }, { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionTrue, - Reason: "NewGeneration", - Message: "deployment/apiserver.openshift-apiserver: observed generation is 99, desired generation is 100.", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) @@ -567,14 +543,14 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), Status: operatorv1.ConditionTrue, Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 1/3 pods have been updated to the latest generation and 2/3 pods are available", + Message: "deployment/apiserver.openshift-apiserver: 1/3 pods have been updated to the latest revision and 2/3 pods are available", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "progressing==false for a longer time shouldn't make the otherwise fine workload degraded", + name: "scenario: all pods updated but not all available yet", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "apiserver", @@ -584,19 +560,67 @@ func TestUpdateOperatorStatus(t *testing.T) { Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 3, + AvailableReplicas: 2, + ReadyReplicas: 2, UpdatedReplicas: 3, Conditions: []appsv1.DeploymentCondition{ - {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "NewReplicaSetAvailable", Message: "has successfully progressed"}, + {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, }, }, }, - previousConditions: []operatorv1.OperatorCondition{ - { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", - LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), + validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { + expectedConditions := []operatorv1.OperatorCondition{ + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeAvailable), + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sWorkloadDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + }, + { + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "", + }, + { + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionTrue, + Reason: "PodsUpdating", + Message: "deployment/apiserver.openshift-apiserver: 3/3 pods have been updated to the latest revision and 2/3 pods are available", + }, + } + return areConditionsEqual(expectedConditions, actualStatus.Conditions) + }, + }, + { + name: "scenario: available workload with progress deadline exceeded", + workload: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 2, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: ptr.To[int32](3), + }, + Status: appsv1.DeploymentStatus{ + AvailableReplicas: 2, + UpdatedReplicas: 1, + ObservedGeneration: 2, + Conditions: []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentProgressing, + Status: corev1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment has timed out", + LastUpdateTime: metav1.Now(), + LastTransitionTime: metav1.Now(), + }, + }, }, }, validateOperatorStatus: func(actualStatus *operatorv1.OperatorStatus) error { @@ -611,33 +635,37 @@ func TestUpdateOperatorStatus(t *testing.T) { Status: operatorv1.ConditionFalse, }, { - Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", + Type: fmt.Sprintf("%sDeploymentDegraded", defaultControllerName), + Status: operatorv1.ConditionTrue, + Reason: "UnavailablePod", + Message: `1 of 3 requested instances are unavailable for apiserver.openshift-apiserver (no pods found with labels "")`, }, { - Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), - Status: operatorv1.ConditionFalse, - Reason: "AsExpected", + Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), + Status: operatorv1.ConditionFalse, + Reason: "ProgressDeadlineExceeded", + Message: "deployment/apiserver.openshift-apiserver has timed out progressing: deployment has timed out", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) }, }, { - name: "some pods rolled out and waiting for old terminating pod before we can progress further", + name: "scenario: workload rollout with maxSurge (4 of 3 replicas available)", workload: &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "apiserver", - Namespace: "openshift-apiserver", + Name: "apiserver", + Namespace: "openshift-apiserver", + Generation: 5, }, Spec: appsv1.DeploymentSpec{ Replicas: ptr.To[int32](3), }, Status: appsv1.DeploymentStatus{ - AvailableReplicas: 2, - ReadyReplicas: 2, - UpdatedReplicas: 3, + Replicas: 4, + AvailableReplicas: 4, + UpdatedReplicas: 2, + ObservedGeneration: 5, Conditions: []appsv1.DeploymentCondition{ {Type: appsv1.DeploymentProgressing, Status: corev1.ConditionTrue, LastUpdateTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "ReplicaSetUpdated", Message: "progressing"}, }, @@ -665,7 +693,7 @@ func TestUpdateOperatorStatus(t *testing.T) { Type: fmt.Sprintf("%sDeployment%s", defaultControllerName, operatorv1.OperatorStatusTypeProgressing), Status: operatorv1.ConditionTrue, Reason: "PodsUpdating", - Message: "deployment/apiserver.openshift-apiserver: 3/3 pods have been updated to the latest generation and 2/3 pods are available", + Message: "deployment/apiserver.openshift-apiserver: 2/3 pods have been updated to the latest revision and 4/3 pods are available", }, } return areConditionsEqual(expectedConditions, actualStatus.Conditions) @@ -680,9 +708,7 @@ func TestUpdateOperatorStatus(t *testing.T) { &operatorv1.OperatorSpec{ ManagementState: operatorv1.Managed, }, - &operatorv1.OperatorStatus{ - Conditions: scenario.previousConditions, - }, + &operatorv1.OperatorStatus{}, nil, ) targetNs := ""