diff --git a/api/hypershift/v1beta1/hostedcluster_conditions.go b/api/hypershift/v1beta1/hostedcluster_conditions.go index e9e01de0acc..204c012044d 100644 --- a/api/hypershift/v1beta1/hostedcluster_conditions.go +++ b/api/hypershift/v1beta1/hostedcluster_conditions.go @@ -252,6 +252,12 @@ const ( // **False / AutoNodeProgressing** means AutoNode is being enabled or disabled — the operation is in progress. // **False / AutoNodeNotConfigured** means AutoNode is not configured in the spec and all Karpenter components have been removed. AutoNodeEnabled ConditionType = "AutoNodeEnabled" + + // HostedClusterDeleting indicates whether the HostedCluster is being deleted and + // provides first-class visibility into which phase of deletion the cluster is in. + // **False / AsExpected** means the cluster is not being deleted. + // **True** means deletion is in progress; the Reason and Message indicate the current phase. + HostedClusterDeleting ConditionType = "HostedClusterDeleting" ) // Reasons. @@ -315,6 +321,8 @@ const ( CloudResourcesCleanupSkippedReason = "CloudResourcesCleanupSkipped" + CloudResourcesDeletionTimedOutReason = "CloudResourcesDeletionTimedOut" + DataPlaneConnectionNoKonnectivityAgentPodsNotFoundReason = "KonnectivityAgentPodsNotFound" DataPlaneConnectionLogsAccessFailedReason = "LogsAccessFailed" @@ -334,6 +342,15 @@ const ( AutoNodeNotConfiguredReason = "AutoNodeNotConfigured" AutoNodeProgressingReason = "AutoNodeProgressing" AutoNodeEvaluationFailedReason = "AutoNodeEvaluationFailed" + + // HostedClusterDeleting reasons. + DeletionWaitingForNodePoolDeletionReason = "WaitingForNodePoolDeletion" + DeletionWaitingForCAPIClusterDeletionReason = "WaitingForCAPIClusterDeletion" + DeletionWaitingForEndpointServiceDeletionReason = "WaitingForEndpointServiceDeletion" + DeletionWaitingForPrivateConnectDeletionReason = "WaitingForPrivateConnectDeletion" + DeletionWaitingForControlPlaneDeletionReason = "WaitingForControlPlaneDeletion" + DeletionWaitingForNamespaceDeletionReason = "WaitingForNamespaceDeletion" + DeletionCompletedReason = "DeletionCompleted" ) // Messages. diff --git a/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go b/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go index 1dba7b85b32..7d7250380b6 100644 --- a/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go +++ b/control-plane-operator/controllers/awsprivatelink/awsprivatelink_controller.go @@ -129,7 +129,6 @@ func (r *PrivateServiceObserver) SetupWithManager(ctx context.Context, mgr ctrl. } func (r *PrivateServiceObserver) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - r.log.Info("reconciling") // Fetch the Service svc, err := r.clientset.CoreV1().Services(req.Namespace).Get(ctx, req.Name, metav1.GetOptions{}) @@ -181,7 +180,6 @@ func (r *PrivateServiceObserver) Reconcile(ctx context.Context, req ctrl.Request }); err != nil { return ctrl.Result{}, fmt.Errorf("failed to reconcile AWSEndpointService: %w", err) } - r.log.Info("reconcile complete", "request", req) return ctrl.Result{}, nil } @@ -412,8 +410,6 @@ func (r *AWSEndpointServiceReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, fmt.Errorf("logger not found: %w", err) } - log.Info("reconciling") - // Fetch the AWSEndpointService obj := &hyperv1.AWSEndpointService{ ObjectMeta: metav1.ObjectMeta{ @@ -548,7 +544,6 @@ func (r *AWSEndpointServiceReconciler) Reconcile(ctx context.Context, req ctrl.R } } - log.Info("reconciliation complete") // always requeue to catch and report out of band changes in AWS // NOTICE: if the RequeueAfter interval is short enough, it could result in hitting some AWS request limits. return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil diff --git a/control-plane-operator/controllers/gcpprivateserviceconnect/observer.go b/control-plane-operator/controllers/gcpprivateserviceconnect/observer.go index 72c3db45da3..4fdfc1f8573 100644 --- a/control-plane-operator/controllers/gcpprivateserviceconnect/observer.go +++ b/control-plane-operator/controllers/gcpprivateserviceconnect/observer.go @@ -64,8 +64,6 @@ func (r *GCPPrivateServiceObserver) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, nil } - r.log.Info("reconciling") - // Fetch the Service svc := &corev1.Service{} if err := r.Get(ctx, req.NamespacedName, svc); err != nil { @@ -114,7 +112,6 @@ func (r *GCPPrivateServiceObserver) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, fmt.Errorf("failed to reconcile GCPPrivateServiceConnect: %w", err) } - r.log.Info("reconcile complete", "request", req, "loadBalancerIP", loadBalancerIP) return ctrl.Result{}, nil } diff --git a/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go b/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go index 7b7b40059fe..8ba034c1f0d 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go +++ b/control-plane-operator/controllers/hostedcontrolplane/hostedcontrolplane_controller.go @@ -2431,9 +2431,10 @@ func (r *HostedControlPlaneReconciler) removeCloudResources(ctx context.Context, return true, nil } - // check if cleanup has been skipped + // check if cleanup has been skipped or timed out if resourcesDestroyedCond != nil && resourcesDestroyedCond.Status == metav1.ConditionFalse && - resourcesDestroyedCond.Reason == string(hyperv1.CloudResourcesCleanupSkippedReason) { + (resourcesDestroyedCond.Reason == string(hyperv1.CloudResourcesCleanupSkippedReason) || + resourcesDestroyedCond.Reason == string(hyperv1.CloudResourcesDeletionTimedOutReason)) { log.Info("Cleanup has been skipped", "reason", resourcesDestroyedCond.Message) return true, nil } @@ -2453,6 +2454,19 @@ func (r *HostedControlPlaneReconciler) removeCloudResources(ctx context.Context, if timeElapsed > resourceDeletionTimeout { log.Info("Giving up on resource deletion after timeout", "timeElapsed", duration.ShortHumanDuration(timeElapsed)) + message := fmt.Sprintf("Giving up on cloud resource deletion after %s", duration.ShortHumanDuration(timeElapsed)) + if resourcesDestroyedCond != nil && resourcesDestroyedCond.Message != "" { + message = fmt.Sprintf("%s (last status: %s)", message, resourcesDestroyedCond.Message) + } + meta.SetStatusCondition(&hcp.Status.Conditions, metav1.Condition{ + Type: string(hyperv1.CloudResourcesDestroyed), + Status: metav1.ConditionFalse, + Reason: string(hyperv1.CloudResourcesDeletionTimedOutReason), + Message: message, + }) + if err := r.Status().Update(ctx, hcp); err != nil { + return false, fmt.Errorf("failed to update cloud resources destroyed condition: %w", err) + } return true, nil } return false, nil diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/drainer/drainer.go b/control-plane-operator/hostedclusterconfigoperator/controllers/drainer/drainer.go index f7993e7d5f6..002da915061 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/drainer/drainer.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/drainer/drainer.go @@ -43,7 +43,6 @@ type Reconciler struct { func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") node := &corev1.Node{} err := r.guestClusterClient.Get(ctx, req.NamespacedName, node) diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/globalps/globalps.go b/control-plane-operator/hostedclusterconfigoperator/controllers/globalps/globalps.go index 1c4eb345857..f1e3fcd3861 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/globalps/globalps.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/globalps/globalps.go @@ -47,8 +47,6 @@ type Reconciler struct { } func (r *Reconciler) Reconcile(ctx context.Context, req crreconcile.Request) (crreconcile.Result, error) { - log := ctrl.LoggerFrom(ctx) - log.Info("reconciling global pull secret") // Reconcile GlobalPullSecret if err := r.reconcileGlobalPullSecret(ctx); err != nil { @@ -77,7 +75,6 @@ func (r *Reconciler) reconcileGlobalPullSecret(ctx context.Context) error { ok bool ) log := ctrl.LoggerFrom(ctx) - log.Info("reconciling global pull secret") // Create ServiceAccount for global-pull-secret-syncer serviceAccount := manifests.GlobalPullSecretServiceAccount() @@ -180,8 +177,6 @@ func (r *Reconciler) reconcileGlobalPullSecret(ctx context.Context) error { } func reconcileDaemonSet(ctx context.Context, daemonSet *appsv1.DaemonSet, globalPullSecretName string, originalPullSecretName string, configSeed string, c crclient.Client, createOrUpdate upsert.CreateOrUpdateFN, hccoImage string) error { - log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling global pull secret daemon set") if _, err := createOrUpdate(ctx, c, daemonSet, func() error { daemonSet.Spec = appsv1.DaemonSetSpec{ diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/hcpstatus/hcpstatus.go b/control-plane-operator/hostedclusterconfigoperator/controllers/hcpstatus/hcpstatus.go index 173f26790e8..6ffe2579271 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/hcpstatus/hcpstatus.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/hcpstatus/hcpstatus.go @@ -99,7 +99,6 @@ func findClusterOperatorStatusCondition(conditions []configv1.ClusterOperatorSta func (h *hcpStatusReconciler) reconcile(ctx context.Context, hcp *hyperv1.HostedControlPlane) error { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling hosted cluster version conditions") var clusterVersion configv1.ClusterVersion err := h.hostedClusterClient.Get(ctx, crclient.ObjectKey{Name: "version"}, &clusterVersion) diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader.go b/control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader.go index 944a98117e0..d574f782ab0 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/inplaceupgrader/inplaceupgrader.go @@ -71,7 +71,6 @@ type Reconciler struct { func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") // Fetch the MachineSet. machineSet := &capiv1.MachineSet{} diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/machine/machine.go b/control-plane-operator/hostedclusterconfigoperator/controllers/machine/machine.go index 6f770c581c5..c60b926e4db 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/machine/machine.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/machine/machine.go @@ -32,7 +32,6 @@ const ( func (r *reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") hcp := &hyperv1.HostedControlPlane{} if err := r.client.Get(ctx, r.hcpKey, hcp); err != nil { diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/node/node.go b/control-plane-operator/hostedclusterconfigoperator/controllers/node/node.go index d43a6d48e7d..38f389c42bb 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/node/node.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/node/node.go @@ -36,7 +36,6 @@ type reconciler struct { func (r *reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") node := &corev1.Node{} if err := r.guestClusterClient.Get(ctx, req.NamespacedName, node); err != nil { diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/nodecount/controller.go b/control-plane-operator/hostedclusterconfigoperator/controllers/nodecount/controller.go index 5c46d71989a..fa48821df74 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/nodecount/controller.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/nodecount/controller.go @@ -27,7 +27,6 @@ type reconciler struct { func (r *reconciler) Reconcile(ctx context.Context, _ reconcile.Request) (reconcile.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") var hcp hypershiftv1beta1.HostedControlPlane if err := r.lister.Get(ctx, client.ObjectKey{ diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/kas/admissionpolicies.go b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/kas/admissionpolicies.go index fcee20b8d52..b7708238a77 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/kas/admissionpolicies.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/kas/admissionpolicies.go @@ -20,7 +20,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -61,9 +60,6 @@ var ( // ReconcileKASValidatingAdmissionPolicies will create ValidatingAdmissionPolicies which block certain resources // from being updated/deleted from the DataPlane side. func ReconcileKASValidatingAdmissionPolicies(ctx context.Context, hcp *hyperv1.HostedControlPlane, client client.Client, createOrUpdate upsert.CreateOrUpdateFN) error { - log := ctrl.LoggerFrom(ctx) - log.Info("reconciling validating admission policies") - if err := reconcileConfigValidatingAdmissionPolicy(ctx, hcp, client, createOrUpdate); err != nil { return fmt.Errorf("failed to reconcile Config Validating Admission Policy: %v", err) } diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/registry/admissionpolicies.go b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/registry/admissionpolicies.go index 85f3a774983..55bc7cc5d3d 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/registry/admissionpolicies.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/registry/admissionpolicies.go @@ -38,9 +38,6 @@ var ( ) func ReconcileRegistryConfigValidatingAdmissionPolicies(ctx context.Context, hcp *hyperv1.HostedControlPlane, client client.Client, createOrUpdate upsert.CreateOrUpdateFN) error { - log := ctrl.LoggerFrom(ctx) - log.Info("reconciling image registry config validating admission policies") - if err := reconcileRegistryConfigManagementStateValidatingAdmissionPolicy(ctx, hcp, client, createOrUpdate); err != nil { return fmt.Errorf("failed to reconcile ManagementState Validating Admission Policy: %v", err) } diff --git a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go index 9b91e284ccb..e7723a87450 100644 --- a/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go +++ b/control-plane-operator/hostedclusterconfigoperator/controllers/resources/resources.go @@ -318,7 +318,6 @@ func Setup(ctx context.Context, opts *operator.HostedClusterConfigOperatorConfig func (r *reconciler) Reconcile(ctx context.Context, _ ctrl.Request) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") hcp := manifests.HostedControlPlane(r.hcpNamespace, r.hcpName) if err := r.cpClient.Get(ctx, client.ObjectKeyFromObject(hcp), hcp); err != nil { diff --git a/docs/content/reference/aggregated-docs.md b/docs/content/reference/aggregated-docs.md index 9ef5da72720..f96aad438d4 100644 --- a/docs/content/reference/aggregated-docs.md +++ b/docs/content/reference/aggregated-docs.md @@ -37716,6 +37716,12 @@ When this is false for too long and there’s no clear indication in the &ld
HostedClusterDegraded indicates whether the HostedCluster is encountering an error that may require user intervention to resolve.
"HostedClusterDeleting"
HostedClusterDeleting indicates whether the HostedCluster is being deleted and +provides first-class visibility into which phase of deletion the cluster is in. +False / AsExpected means the cluster is not being deleted. +True means deletion is in progress; the Reason and Message indicate the current phase.
+"HostedClusterDestroyed"
HostedClusterDestroyed indicates that a hosted has finished destroying and that it is waiting for a destroy grace period to go away. The grace period is determined by the hypershift.openshift.io/destroy-grace-period annotation in the HostedCluster if present.
diff --git a/docs/content/reference/api.md b/docs/content/reference/api.md index a9ce510fc01..1ae4c88d8ac 100644 --- a/docs/content/reference/api.md +++ b/docs/content/reference/api.md @@ -5888,6 +5888,12 @@ When this is false for too long and there’s no clear indication in the &ldHostedClusterDegraded indicates whether the HostedCluster is encountering an error that may require user intervention to resolve.
"HostedClusterDeleting"
HostedClusterDeleting indicates whether the HostedCluster is being deleted and +provides first-class visibility into which phase of deletion the cluster is in. +False / AsExpected means the cluster is not being deleted. +True means deletion is in progress; the Reason and Message indicate the current phase.
+"HostedClusterDestroyed"
HostedClusterDestroyed indicates that a hosted has finished destroying and that it is waiting for a destroy grace period to go away. The grace period is determined by the hypershift.openshift.io/destroy-grace-period annotation in the HostedCluster if present.
diff --git a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go index d2ce1788ef4..693378a8428 100644 --- a/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go +++ b/hypershift-operator/controllers/hostedcluster/hostedcluster_controller.go @@ -338,7 +338,6 @@ func pauseHostedControlPlane(ctx context.Context, c client.Client, hcp *hyperv1. func (r *HostedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("reconciling") // Look up the HostedCluster instance to reconcile hcluster := &hyperv1.HostedCluster{} @@ -490,6 +489,20 @@ func (r *HostedClusterReconciler) reconcile(ctx context.Context, req ctrl.Reques } } + // Ensure HostedClusterDeleting condition always exists. + if meta.FindStatusCondition(hcluster.Status.Conditions, string(hyperv1.HostedClusterDeleting)) == nil { + meta.SetStatusCondition(&hcluster.Status.Conditions, metav1.Condition{ + Type: string(hyperv1.HostedClusterDeleting), + Status: metav1.ConditionFalse, + Reason: hyperv1.AsExpectedReason, + Message: "HostedCluster is not being deleted", + ObservedGeneration: hcluster.Generation, + }) + if err := r.Client.Status().Update(ctx, hcluster); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to initialize HostedClusterDeleting condition: %w", err) + } + } + // If deleted, clean up and return early. if !hcluster.DeletionTimestamp.IsZero() { // This new condition is necessary for OCM personnel to report any cloud dangling objects to the user. @@ -3337,6 +3350,22 @@ func (r *HostedClusterReconciler) delete(ctx context.Context, hc *hyperv1.Hosted controlPlaneNamespace := manifests.HostedControlPlaneNamespace(hc.Namespace, hc.Name) log := ctrl.LoggerFrom(ctx) + setDeletionProgress := func(reason, message string) error { + condition := metav1.Condition{ + Type: string(hyperv1.HostedClusterDeleting), + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + ObservedGeneration: hc.Generation, + } + old := meta.FindStatusCondition(hc.Status.Conditions, string(hyperv1.HostedClusterDeleting)) + if old != nil && old.Reason == condition.Reason && old.Message == condition.Message { + return nil + } + meta.SetStatusCondition(&hc.Status.Conditions, condition) + return r.Client.Status().Update(ctx, hc) + } + // Unpause CAPI cluster to allow deletion to proceed if err := pauseCAPICluster(ctx, r.Client, hc, false); err != nil { return false, err @@ -3393,6 +3422,10 @@ func (r *HostedClusterReconciler) delete(ctx context.Context, hc *hyperv1.Hosted if exists { log.Info("Waiting for cluster deletion", "clusterName", hc.Spec.InfraID, "controlPlaneNamespace", controlPlaneNamespace) + if err := setDeletionProgress(hyperv1.DeletionWaitingForCAPIClusterDeletionReason, + fmt.Sprintf("Waiting for CAPI cluster %s/%s to be deleted", controlPlaneNamespace, hc.Spec.InfraID)); err != nil { + return false, fmt.Errorf("failed to update deletion progress: %w", err) + } return false, nil } else { // once infra is deleted remove finalizers. @@ -3432,6 +3465,10 @@ func (r *HostedClusterReconciler) delete(ctx context.Context, hc *hyperv1.Hosted } if exists { log.Info("Waiting for awsendpointservice deletion", "controlPlaneNamespace", controlPlaneNamespace) + if err := setDeletionProgress(hyperv1.DeletionWaitingForEndpointServiceDeletionReason, + fmt.Sprintf("Waiting for AWS endpoint services in %s to be deleted", controlPlaneNamespace)); err != nil { + return false, fmt.Errorf("failed to update deletion progress: %w", err) + } return false, nil } } @@ -3443,6 +3480,10 @@ func (r *HostedClusterReconciler) delete(ctx context.Context, hc *hyperv1.Hosted } if exists { log.Info("Waiting for gcpprivateserviceconnect deletion", "controlPlaneNamespace", controlPlaneNamespace) + if err := setDeletionProgress(hyperv1.DeletionWaitingForPrivateConnectDeletionReason, + fmt.Sprintf("Waiting for GCP Private Service Connect resources in %s to be deleted", controlPlaneNamespace)); err != nil { + return false, fmt.Errorf("failed to update deletion progress: %w", err) + } return false, nil } } @@ -3474,6 +3515,10 @@ func (r *HostedClusterReconciler) delete(ctx context.Context, hc *hyperv1.Hosted } if exists { log.Info("Waiting for hostedcontrolplane deletion", "controlPlaneNamespace", controlPlaneNamespace) + if err := setDeletionProgress(hyperv1.DeletionWaitingForControlPlaneDeletionReason, + fmt.Sprintf("Waiting for HostedControlPlane %s/%s to be deleted", controlPlaneNamespace, hc.Name)); err != nil { + return false, fmt.Errorf("failed to update deletion progress: %w", err) + } return false, nil } @@ -3484,6 +3529,9 @@ func (r *HostedClusterReconciler) delete(ctx context.Context, hc *hyperv1.Hosted r.KubevirtInfraClients.Delete(hc.Spec.InfraID) if skipNSDeletion := hc.Annotations[hyperv1.SkipControlPlaneNamespaceDeletionAnnotation]; skipNSDeletion == "true" { + if err := setDeletionProgress(hyperv1.DeletionCompletedReason, "Deletion completed (namespace deletion skipped)"); err != nil { + return false, fmt.Errorf("failed to update deletion progress: %w", err) + } return true, nil } @@ -3496,10 +3544,45 @@ func (r *HostedClusterReconciler) delete(ctx context.Context, hc *hyperv1.Hosted return false, err } if exists { - log.Info("Waiting for namespace deletion", "controlPlaneNamespace", controlPlaneNamespace) + message := fmt.Sprintf("Waiting for namespace %s to be deleted", controlPlaneNamespace) + + // Fetch the namespace to inspect its phase and conditions + ns := &corev1.Namespace{} + if getErr := r.Client.Get(ctx, types.NamespacedName{Name: controlPlaneNamespace}, ns); getErr == nil { + message = fmt.Sprintf("Waiting for namespace %s to be deleted (phase: %s)", controlPlaneNamespace, ns.Status.Phase) + var details []string + for _, cond := range ns.Status.Conditions { + switch cond.Type { + case corev1.NamespaceContentRemaining, + corev1.NamespaceFinalizersRemaining, + corev1.NamespaceDeletionContentFailure: + if cond.Status == corev1.ConditionTrue { + details = append(details, fmt.Sprintf("%s: %s", cond.Type, cond.Message)) + log.Info("Namespace deletion blocked", + "controlPlaneNamespace", controlPlaneNamespace, + "conditionType", cond.Type, + "reason", cond.Reason, + "message", cond.Message, + ) + } + } + } + if len(details) > 0 { + message = fmt.Sprintf("Waiting for namespace %s to be deleted (phase: %s): %s", + controlPlaneNamespace, ns.Status.Phase, strings.Join(details, "; ")) + } + } + + log.Info(message, "controlPlaneNamespace", controlPlaneNamespace) + if err := setDeletionProgress(hyperv1.DeletionWaitingForNamespaceDeletionReason, message); err != nil { + return false, fmt.Errorf("failed to update deletion progress: %w", err) + } return false, nil } + if err := setDeletionProgress(hyperv1.DeletionCompletedReason, "Deletion completed"); err != nil { + return false, fmt.Errorf("failed to update deletion progress: %w", err) + } return true, nil } diff --git a/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_validation_controller.go b/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_validation_controller.go index 6186237b1b3..eb3143f9777 100644 --- a/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_validation_controller.go +++ b/hypershift-operator/controllers/hostedclustersizing/hostedclustersizing_validation_controller.go @@ -16,7 +16,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" metav1applyconfigurations "k8s.io/client-go/applyconfigurations/meta/v1" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -27,9 +26,6 @@ type validator struct { } func (r *validator) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { - log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") - config := schedulingv1alpha1.ClusterSizingConfiguration{} if err := r.lister.Get(ctx, request.NamespacedName, &config); err != nil { return reconcile.Result{}, fmt.Errorf("failed to get cluster sizing configuration %s: %w", request.NamespacedName.String(), err) diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller.go b/hypershift-operator/controllers/nodepool/nodepool_controller.go index c272c6162a2..a16da524a24 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller.go @@ -180,7 +180,6 @@ func (r *NodePoolReconciler) managedResources() []client.Object { func (r *NodePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") // Fetch the nodePool instance nodePool := &hyperv1.NodePool{} diff --git a/hypershift-operator/controllers/platform/aws/controller.go b/hypershift-operator/controllers/platform/aws/controller.go index a2aeb890b2c..f73b68286f1 100644 --- a/hypershift-operator/controllers/platform/aws/controller.go +++ b/hypershift-operator/controllers/platform/aws/controller.go @@ -264,8 +264,6 @@ func (r *AWSEndpointServiceReconciler) Reconcile(ctx context.Context, req ctrl.R if err != nil { return ctrl.Result{}, fmt.Errorf("no logger found: %w", err) } - log.Info("reconciling") - // Fetch the AWSEndpointService obj := &hyperv1.AWSEndpointService{ ObjectMeta: metav1.ObjectMeta{ @@ -372,7 +370,6 @@ func (r *AWSEndpointServiceReconciler) Reconcile(ctx context.Context, req ctrl.R } } - log.Info("reconciliation complete") // always requeue to catch and report out of band changes in AWS // NOTICE: if the RequeueAfter interval is short enough, it could result in hitting some AWS request limits. return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil diff --git a/hypershift-operator/controllers/scheduler/aws/autoscaler.go b/hypershift-operator/controllers/scheduler/aws/autoscaler.go index 3f6167e2274..dd9bbc76eca 100644 --- a/hypershift-operator/controllers/scheduler/aws/autoscaler.go +++ b/hypershift-operator/controllers/scheduler/aws/autoscaler.go @@ -342,8 +342,6 @@ func (r *RequestServingNodeAutoscaler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } log := ctrl.LoggerFrom(ctx) - log.Info("Reconciling") - podList := &corev1.PodList{} if err := r.List(ctx, podList, client.InNamespace(placeholderNamespace), client.HasLabels{PlaceholderLabel}); err != nil { return ctrl.Result{}, fmt.Errorf("failed to list placeholder pods: %w", err) diff --git a/hypershift-operator/controllers/scheduler/azure/controller.go b/hypershift-operator/controllers/scheduler/azure/controller.go index a96f4560694..09571c85263 100644 --- a/hypershift-operator/controllers/scheduler/azure/controller.go +++ b/hypershift-operator/controllers/scheduler/azure/controller.go @@ -67,7 +67,6 @@ func (r *Scheduler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resul } if !hc.DeletionTimestamp.IsZero() { - log.Info("hostedcluster is being deleted, aborting reconcile") return ctrl.Result{}, nil } diff --git a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go index e9e01de0acc..204c012044d 100644 --- a/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go +++ b/vendor/github.com/openshift/hypershift/api/hypershift/v1beta1/hostedcluster_conditions.go @@ -252,6 +252,12 @@ const ( // **False / AutoNodeProgressing** means AutoNode is being enabled or disabled — the operation is in progress. // **False / AutoNodeNotConfigured** means AutoNode is not configured in the spec and all Karpenter components have been removed. AutoNodeEnabled ConditionType = "AutoNodeEnabled" + + // HostedClusterDeleting indicates whether the HostedCluster is being deleted and + // provides first-class visibility into which phase of deletion the cluster is in. + // **False / AsExpected** means the cluster is not being deleted. + // **True** means deletion is in progress; the Reason and Message indicate the current phase. + HostedClusterDeleting ConditionType = "HostedClusterDeleting" ) // Reasons. @@ -315,6 +321,8 @@ const ( CloudResourcesCleanupSkippedReason = "CloudResourcesCleanupSkipped" + CloudResourcesDeletionTimedOutReason = "CloudResourcesDeletionTimedOut" + DataPlaneConnectionNoKonnectivityAgentPodsNotFoundReason = "KonnectivityAgentPodsNotFound" DataPlaneConnectionLogsAccessFailedReason = "LogsAccessFailed" @@ -334,6 +342,15 @@ const ( AutoNodeNotConfiguredReason = "AutoNodeNotConfigured" AutoNodeProgressingReason = "AutoNodeProgressing" AutoNodeEvaluationFailedReason = "AutoNodeEvaluationFailed" + + // HostedClusterDeleting reasons. + DeletionWaitingForNodePoolDeletionReason = "WaitingForNodePoolDeletion" + DeletionWaitingForCAPIClusterDeletionReason = "WaitingForCAPIClusterDeletion" + DeletionWaitingForEndpointServiceDeletionReason = "WaitingForEndpointServiceDeletion" + DeletionWaitingForPrivateConnectDeletionReason = "WaitingForPrivateConnectDeletion" + DeletionWaitingForControlPlaneDeletionReason = "WaitingForControlPlaneDeletion" + DeletionWaitingForNamespaceDeletionReason = "WaitingForNamespaceDeletion" + DeletionCompletedReason = "DeletionCompleted" ) // Messages.