44 "context"
55 "errors"
66 "fmt"
7- "strings"
87
98 appsv1 "k8s.io/api/apps/v1"
109 corev1 "k8s.io/api/core/v1"
@@ -19,7 +18,6 @@ import (
1918
2019 operatorv1 "github.com/openshift/api/operator/v1"
2120 applyoperatorv1 "github.com/openshift/client-go/operator/applyconfigurations/operator/v1"
22- "github.com/openshift/library-go/pkg/apps/deployment"
2321 "github.com/openshift/library-go/pkg/controller/factory"
2422 "github.com/openshift/library-go/pkg/operator/events"
2523 "github.com/openshift/library-go/pkg/operator/status"
@@ -291,7 +289,7 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
291289 deploymentAvailableCondition = deploymentAvailableCondition .
292290 WithStatus (operatorv1 .ConditionFalse ).
293291 WithReason ("NoPod" ).
294- WithMessage (fmt .Sprintf ("no %s.%s pods available on any node. " , workload .Name , c .targetNamespace ))
292+ WithMessage (fmt .Sprintf ("no %s.%s pods available on any node" , workload .Name , c .targetNamespace ))
295293 } else {
296294 deploymentAvailableCondition = deploymentAvailableCondition .
297295 WithStatus (operatorv1 .ConditionTrue ).
@@ -303,23 +301,24 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
303301 desiredReplicas = * (workload .Spec .Replicas )
304302 }
305303
306- // If the workload is up to date, then we are no longer progressing
307- workloadAtHighestGeneration := workload .ObjectMeta .Generation == workload .Status .ObservedGeneration
308- // Update is done when all pods have been updated to the latest revision
309- // and the deployment controller has reported NewReplicaSetAvailable
310- workloadIsBeingUpdated := ! workloadAtHighestGeneration || ! hasDeploymentProgressed (workload .Status )
311- workloadIsBeingUpdatedTooLong := v1helpers .IsUpdatingTooLong (previousStatus , * deploymentProgressingCondition .Type )
312- if ! workloadAtHighestGeneration {
313- deploymentProgressingCondition = deploymentProgressingCondition .
314- WithStatus (operatorv1 .ConditionTrue ).
315- WithReason ("NewGeneration" ).
316- WithMessage (fmt .Sprintf ("deployment/%s.%s: observed generation is %d, desired generation is %d." , workload .Name , c .targetNamespace , workload .Status .ObservedGeneration , workload .ObjectMeta .Generation ))
317- } else if workloadIsBeingUpdated {
304+ // Update is done when the deployment controller has reported NewReplicaSetAvailable.
305+ // Checking the current vs. observed generation here is not possible since we don't want to be Progressing on scaling.
306+ progressTimedOutMessage , workloadIsBeingUpdatedTooLong := hasDeploymentTimedOutProgressing (workload .Status )
307+ workloadIsBeingUpdated := ! hasDeploymentProgressed (workload .Status ) && ! workloadIsBeingUpdatedTooLong
308+ switch {
309+ case workloadIsBeingUpdated :
318310 deploymentProgressingCondition = deploymentProgressingCondition .
319311 WithStatus (operatorv1 .ConditionTrue ).
320312 WithReason ("PodsUpdating" ).
321- WithMessage (fmt .Sprintf ("deployment/%s.%s: %d/%d pods have been updated to the latest generation and %d/%d pods are available" , workload .Name , c .targetNamespace , workload .Status .UpdatedReplicas , desiredReplicas , workload .Status .AvailableReplicas , desiredReplicas ))
322- } else {
313+ WithMessage (fmt .Sprintf ("deployment/%s.%s: %d/%d pods have been updated to the latest revision and %d/%d pods are available" , workload .Name , c .targetNamespace , workload .Status .UpdatedReplicas , desiredReplicas , workload .Status .AvailableReplicas , desiredReplicas ))
314+
315+ case workloadIsBeingUpdatedTooLong :
316+ deploymentProgressingCondition = deploymentProgressingCondition .
317+ WithStatus (operatorv1 .ConditionFalse ).
318+ WithReason ("ProgressDeadlineExceeded" ).
319+ WithMessage (fmt .Sprintf ("deployment/%s.%s has timed out progressing: %s" , workload .Name , c .targetNamespace , progressTimedOutMessage ))
320+
321+ default :
323322 // Terminating pods don't account for any of the other status fields but
324323 // still can exist in a state when they are accepting connections and would
325324 // contribute to unexpected behavior when we report Progressing=False.
@@ -332,22 +331,22 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
332331 WithReason ("AsExpected" )
333332 }
334333
335- // During a rollout the default maxSurge (25%) will allow the available
336- // replicas to temporarily exceed the desired replica count. If this were
337- // to occur, the operator should not report degraded.
338- workloadHasAllPodsAvailable := workload .Status .AvailableReplicas >= desiredReplicas
339- if ! workloadHasAllPodsAvailable && (! workloadIsBeingUpdated || workloadIsBeingUpdatedTooLong ) {
340- numNonAvailablePods := desiredReplicas - workload .Status .AvailableReplicas
334+ // Degraded is set when the deployment is not Available or timed out progressing.
335+ // This will cause the operator to go Degraded during the initial rollout.
336+ switch {
337+ case workload .Status .AvailableReplicas == 0 :
341338 deploymentDegradedCondition = deploymentDegradedCondition .
342339 WithStatus (operatorv1 .ConditionTrue ).
343- WithReason ("UnavailablePod" )
344- podContainersStatus , err := deployment .PodContainersStatus (workload , c .podsLister )
345- if err != nil {
346- podContainersStatus = []string {fmt .Sprintf ("failed to get pod containers details: %v" , err )}
347- }
340+ WithReason ("Unavailable" ).
341+ WithMessage (fmt .Sprintf ("no %s.%s pods available on any node" , workload .Name , c .targetNamespace ))
342+
343+ case workloadIsBeingUpdatedTooLong :
348344 deploymentDegradedCondition = deploymentDegradedCondition .
349- WithMessage (fmt .Sprintf ("%v of %v requested instances are unavailable for %s.%s (%s)" , numNonAvailablePods , desiredReplicas , workload .Name , c .targetNamespace , strings .Join (podContainersStatus , ", " )))
350- } else {
345+ WithStatus (operatorv1 .ConditionTrue ).
346+ WithReason ("ProgressDeadlineExceeded" ).
347+ WithMessage (fmt .Sprintf ("deployment/%s.%s has timed out progressing: %s" , workload .Name , c .targetNamespace , progressTimedOutMessage ))
348+
349+ default :
351350 deploymentDegradedCondition = deploymentDegradedCondition .
352351 WithStatus (operatorv1 .ConditionFalse ).
353352 WithReason ("AsExpected" )
@@ -356,8 +355,11 @@ func (c *Controller) updateOperatorStatus(ctx context.Context, previousStatus *o
356355 // if the deployment is all available and at the expected generation, then update the version to the latest
357356 // when we update, the image pull spec should immediately be different, which should immediately cause a deployment rollout
358357 // which should immediately result in a deployment generation diff, which should cause this block to be skipped until it is ready.
359- workloadHasAllPodsUpdated := workload .Status .UpdatedReplicas == desiredReplicas
360- if workloadAtHighestGeneration && workloadHasAllPodsAvailable && workloadHasAllPodsUpdated && operatorConfigAtHighestGeneration {
358+ if operatorConfigAtHighestGeneration &&
359+ workload .ObjectMeta .Generation == workload .Status .ObservedGeneration &&
360+ workload .Status .AvailableReplicas == desiredReplicas &&
361+ workload .Status .UpdatedReplicas == desiredReplicas {
362+
361363 c .versionRecorder .SetVersion (c .constructOperandNameFor (workload .Name ), c .targetOperandVersion )
362364 }
363365
@@ -386,6 +388,17 @@ func hasDeploymentProgressed(status appsv1.DeploymentStatus) bool {
386388 return false
387389}
388390
391+ // hasDeploymentTimedOutProgressing returns true if the deployment reports ProgressDeadlineExceeded.
392+ // The function returns the Progressing condition message as the first return value.
393+ func hasDeploymentTimedOutProgressing (status appsv1.DeploymentStatus ) (string , bool ) {
394+ for _ , cond := range status .Conditions {
395+ if cond .Type == appsv1 .DeploymentProgressing {
396+ return cond .Message , cond .Status == corev1 .ConditionFalse && cond .Reason == "ProgressDeadlineExceeded"
397+ }
398+ }
399+ return "" , false
400+ }
401+
389402// EnsureAtMostOnePodPerNode updates the deployment spec to prevent more than
390403// one pod of a given replicaset from landing on a node. It accomplishes this
391404// by adding a label on the template and updates the pod anti-affinity term to include that label.
0 commit comments