@@ -75,7 +75,6 @@ import (
7575 "context"
7676 "fmt"
7777 "sort"
78- "strings"
7978 "time"
8079
8180 "github.com/go-logr/logr"
@@ -184,33 +183,102 @@ func removeProtectionFinalizer(pvc *corev1.PersistentVolumeClaim, log logr.Logge
184183 return false
185184}
186185
187- // verifyPVCleanupComplete checks if all PVs associated with the RabbitMQ instance
188- // have been deleted. Returns true if cleanup is complete, false if still in progress.
186+ // removePVProtectionFinalizer removes the kubernetes.io/pv-protection finalizer from a PV.
187+ // Returns true if the finalizer was found and removed.
188+ //
189+ // Other finalizers (CSI drivers, storage backend) are preserved.
190+ //
191+ // The kubernetes.io/pv-protection finalizer prevents PV deletion while bound to a PVC.
192+ // When a PVC is deleted, the PV goes to Released state, but sometimes the finalizer
193+ // isn't automatically removed by Kubernetes. We remove it to allow the Delete reclaim
194+ // policy to take effect.
195+ func removePVProtectionFinalizer (pv * corev1.PersistentVolume , log logr.Logger ) bool {
196+ originalCount := len (pv .Finalizers )
197+ if originalCount == 0 {
198+ return false
199+ }
200+
201+ // Filter out only the kubernetes.io/pv-protection finalizer
202+ var filtered []string
203+ for _ , f := range pv .Finalizers {
204+ if f != "kubernetes.io/pv-protection" {
205+ filtered = append (filtered , f )
206+ }
207+ }
208+
209+ if len (filtered ) < originalCount {
210+ log .Info ("Removing PV protection finalizer to allow deletion during upgrade" ,
211+ "pvName" , pv .Name ,
212+ "removedFinalizer" , "kubernetes.io/pv-protection" ,
213+ "remainingFinalizers" , filtered )
214+ pv .Finalizers = filtered
215+ return true
216+ }
217+
218+ return false
219+ }
220+
221+ // verifyPVCleanupComplete checks if all PVs from the provided list have been deleted.
222+ // Returns true if cleanup is complete, false if still in progress.
189223//
190224// Verifies clean storage before recreating the cluster.
191225//
192226// Implements timeout protection (30 minutes) for PV deletion.
193- func (r * Reconciler ) verifyPVCleanupComplete (ctx context.Context , namespace , instanceName string , storageWipeStartedAt * time.Time , log logr.Logger ) (bool , error ) {
194- pvList := & corev1.PersistentVolumeList {}
195- if err := r .List (ctx , pvList ); err != nil {
196- return false , fmt .Errorf ("failed to list PVs during upgrade verification: %w. " +
197- "List PVs: kubectl get pv -A" , err )
227+ //
228+ // pvNames is the list of PV names that were captured during PVC deletion.
229+ // This optimization avoids scanning all PVs in the cluster.
230+ func (r * Reconciler ) verifyPVCleanupComplete (ctx context.Context , pvNames []string , storageWipeStartedAt * time.Time , log logr.Logger ) (bool , error ) {
231+ // If no PVs were tracked, cleanup is complete
232+ if len (pvNames ) == 0 {
233+ log .Info ("No PVs to clean up" )
234+ return true , nil
198235 }
199236
200- // Check if any PVs are still referencing our PVCs
201- pvcPrefix := fmt .Sprintf ("persistence-%s-" , instanceName )
237+ // Check which PVs still exist and remove protection finalizers if needed
202238 var stuckPVs []string
203- for _ , pv := range pvList .Items {
204- if pv .Spec .ClaimRef != nil &&
205- pv .Spec .ClaimRef .Namespace == namespace &&
206- strings .HasPrefix (pv .Spec .ClaimRef .Name , pvcPrefix ) {
207-
208- log .Info ("Waiting for PV cleanup to complete" ,
209- "pvName" , pv .Name ,
210- "phase" , pv .Status .Phase ,
211- "pvcName" , pv .Spec .ClaimRef .Name )
212- stuckPVs = append (stuckPVs , pv .Name )
239+ for _ , pvName := range pvNames {
240+ pv := & corev1.PersistentVolume {}
241+ err := r .Get (ctx , types.NamespacedName {Name : pvName }, pv )
242+
243+ // If PV doesn't exist, it's been cleaned up (expected)
244+ if k8s_errors .IsNotFound (err ) {
245+ continue
246+ }
247+
248+ // If we can't get the PV for other reasons, include it to be safe
249+ if err != nil {
250+ log .V (1 ).Info ("Unable to get PV, including in cleanup check" ,
251+ "pvName" , pvName ,
252+ "error" , err )
253+ stuckPVs = append (stuckPVs , pvName )
254+ continue
213255 }
256+
257+ // If PV is in Released phase (PVC deleted but PV still exists),
258+ // remove kubernetes.io/pv-protection finalizer to allow deletion
259+ if pv .Status .Phase == corev1 .VolumeReleased {
260+ if removePVProtectionFinalizer (pv , log ) {
261+ if err := r .Update (ctx , pv ); err != nil {
262+ log .Error (err , "Failed to remove pv-protection finalizer from PV" ,
263+ "pvName" , pvName )
264+ // Don't fail - just log and continue, will retry next reconcile
265+ } else {
266+ log .Info ("Removed pv-protection finalizer from Released PV" ,
267+ "pvName" , pvName )
268+ }
269+ }
270+ }
271+
272+ // PV still exists - track it
273+ pvcName := ""
274+ if pv .Spec .ClaimRef != nil {
275+ pvcName = pv .Spec .ClaimRef .Name
276+ }
277+ log .Info ("Waiting for PV cleanup to complete" ,
278+ "pvName" , pvName ,
279+ "phase" , pv .Status .Phase ,
280+ "pvcName" , pvcName )
281+ stuckPVs = append (stuckPVs , pvName )
214282 }
215283
216284 if len (stuckPVs ) > 0 {
@@ -250,10 +318,10 @@ func (r *Reconciler) verifyPVCleanupComplete(ctx context.Context, namespace, ins
250318// 1. PV deletion (due to Delete reclaim policy)
251319// 2. Storage backend cleanup
252320//
253- // Returns true if PVCs were deleted.
254- func (r * Reconciler ) deletePVCsForUpgrade (ctx context.Context , pvcList * corev1.PersistentVolumeClaimList , log logr.Logger ) (bool , error ) {
321+ // Returns true if PVCs were deleted and the list of PV names that should be deleted .
322+ func (r * Reconciler ) deletePVCsForUpgrade (ctx context.Context , pvcList * corev1.PersistentVolumeClaimList , log logr.Logger ) (bool , [] string , error ) {
255323 if len (pvcList .Items ) == 0 {
256- return false , nil
324+ return false , nil , nil
257325 }
258326
259327 // Sort PVCs by name to ensure deterministic deletion order
@@ -264,14 +332,17 @@ func (r *Reconciler) deletePVCsForUpgrade(ctx context.Context, pvcList *corev1.P
264332
265333 log .Info ("Deleting PVCs in deterministic order" , "pvcCount" , len (pvcList .Items ))
266334
335+ // Track which PVs should be deleted (for efficient cleanup verification)
336+ pvNames := make ([]string , 0 , len (pvcList .Items ))
337+
267338 for i := range pvcList .Items {
268339 pvc := & pvcList .Items [i ]
269340
270341 // Remove kubernetes.io/pvc-protection finalizer to allow deletion
271342 // Other finalizers are preserved for CSI driver cleanup
272343 if removeProtectionFinalizer (pvc , log ) {
273344 if err := r .Update (ctx , pvc ); err != nil {
274- return false , fmt .Errorf ("failed to remove finalizer from PVC %s: %w. " +
345+ return false , nil , fmt .Errorf ("failed to remove finalizer from PVC %s: %w. " +
275346 "Check PVC: kubectl get pvc %s -n %s -o yaml" ,
276347 pvc .Name , err , pvc .Name , pvc .Namespace )
277348 }
@@ -280,17 +351,22 @@ func (r *Reconciler) deletePVCsForUpgrade(ctx context.Context, pvcList *corev1.P
280351 // Delete the PVC
281352 // The Delete reclaim policy on the PV triggers automatic storage cleanup
282353 if err := r .Delete (ctx , pvc ); err != nil && ! k8s_errors .IsNotFound (err ) {
283- return false , fmt .Errorf ("failed to delete PVC %s: %w. " +
354+ return false , nil , fmt .Errorf ("failed to delete PVC %s: %w. " +
284355 "Check PVC: kubectl describe pvc %s -n %s" ,
285356 pvc .Name , err , pvc .Name , pvc .Namespace )
286357 }
287358
288359 log .Info ("Deleted PVC - storage will be automatically wiped by Delete reclaim policy" ,
289360 "pvcName" , pvc .Name ,
290361 "pvName" , pvc .Spec .VolumeName )
362+
363+ // Track the PV for cleanup verification
364+ if pvc .Spec .VolumeName != "" {
365+ pvNames = append (pvNames , pvc .Spec .VolumeName )
366+ }
291367 }
292368
293- return true , nil
369+ return true , pvNames , nil
294370}
295371
296372// waitForPodsTermination checks if all pods associated with the RabbitMQ instance
@@ -340,6 +416,8 @@ type StorageWipeParams struct {
340416 Reason string
341417 // Timestamp when storage wipe started (for timeout tracking)
342418 StorageWipeStartedAt * time.Time
419+ // PV names being deleted (populated during PVC deletion for efficient verification)
420+ PVsBeingDeleted []string
343421 // Function to delete the RabbitMQCluster
344422 DeleteCluster func (ctx context.Context ) error
345423 // Function to delete the ha-all mirrored policy (optional)
@@ -429,20 +507,24 @@ func (r *Reconciler) performStorageWipe(
429507
430508 // Step 4b: Delete all PVCs
431509 // The Delete reclaim policy will trigger automatic storage cleanup
432- pvcDeleted , err := r .deletePVCsForUpgrade (ctx , pvcList , log )
510+ pvcDeleted , pvNames , err := r .deletePVCsForUpgrade (ctx , pvcList , log )
433511 if err != nil {
434512 log .Error (err , "Failed to delete PVCs during storage wipe" )
435513 return ctrl.Result {}, err
436514 }
437515
438516 if pvcDeleted {
439- log .Info ("Deleted all PVCs, waiting for PV cleanup to complete" , "pvcCount" , len (pvcList .Items ))
517+ // Store the PV names for efficient cleanup verification
518+ params .PVsBeingDeleted = pvNames
519+ log .Info ("Deleted all PVCs, waiting for PV cleanup to complete" ,
520+ "pvcCount" , len (pvcList .Items ),
521+ "pvCount" , len (pvNames ))
440522 return ctrl.Result {RequeueAfter : UpgradeCheckInterval }, nil
441523 }
442524 }
443525
444526 // Step 5: Verify all PVs are completely cleaned up
445- cleanupComplete , err := r .verifyPVCleanupComplete (ctx , params .Namespace , params . InstanceName , params .StorageWipeStartedAt , log )
527+ cleanupComplete , err := r .verifyPVCleanupComplete (ctx , params .PVsBeingDeleted , params .StorageWipeStartedAt , log )
446528 if err != nil {
447529 log .Error (err , "Failed to verify PV cleanup" )
448530 return ctrl.Result {}, fmt .Errorf ("PV cleanup verification failed: %w. " +
0 commit comments