Skip to content

Commit 397e29b

Browse files
committed
fixup storage tracking
1 parent 9be78b8 commit 397e29b

8 files changed

Lines changed: 299 additions & 51 deletions

File tree

apis/bases/rabbitmq.openstack.org_rabbitmqs.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,6 +1969,14 @@ spec:
19691969
the opentack-operator in the top-level CR (e.g. the ContainerImage)
19701970
format: int64
19711971
type: integer
1972+
pvsBeingDeleted:
1973+
description: |-
1974+
PVsBeingDeleted - list of PV names that are expected to be deleted during storage wipe
1975+
Tracked to avoid scanning all PVs in the cluster during cleanup verification
1976+
items:
1977+
type: string
1978+
type: array
1979+
x-kubernetes-list-type: set
19721980
queueType:
19731981
description: QueueType - store whether default ha-all policy is present
19741982
or not

apis/rabbitmq/v1beta1/rabbitmq_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,11 @@ type RabbitMqStatus struct {
151151
// StorageWipeStartedAt - timestamp when storage wipe process started
152152
// Used to implement timeout protection against stuck PV/PVC deletions
153153
StorageWipeStartedAt *metav1.Time `json:"storageWipeStartedAt,omitempty"`
154+
155+
// PVsBeingDeleted - list of PV names that are expected to be deleted during storage wipe
156+
// Tracked to avoid scanning all PVs in the cluster during cleanup verification
157+
// +listType=set
158+
PVsBeingDeleted []string `json:"pvsBeingDeleted,omitempty"`
154159
}
155160

156161
//+kubebuilder:object:root=true

apis/rabbitmq/v1beta1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/rabbitmq.openstack.org_rabbitmqs.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,6 +1969,14 @@ spec:
19691969
the opentack-operator in the top-level CR (e.g. the ContainerImage)
19701970
format: int64
19711971
type: integer
1972+
pvsBeingDeleted:
1973+
description: |-
1974+
PVsBeingDeleted - list of PV names that are expected to be deleted during storage wipe
1975+
Tracked to avoid scanning all PVs in the cluster during cleanup verification
1976+
items:
1977+
type: string
1978+
type: array
1979+
x-kubernetes-list-type: set
19721980
queueType:
19731981
description: QueueType - store whether default ha-all policy is present
19741982
or not

config/rbac/role.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,21 @@ rules:
2323
- ""
2424
resources:
2525
- persistentvolumeclaims
26+
verbs:
27+
- delete
28+
- get
29+
- list
30+
- update
31+
- watch
32+
- apiGroups:
33+
- ""
34+
resources:
2635
- persistentvolumes
2736
verbs:
2837
- delete
2938
- get
3039
- list
40+
- patch
3141
- update
3242
- watch
3343
- apiGroups:

internal/controller/rabbitmq/rabbitmq_controller.go

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ type Reconciler struct {
153153
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;update;delete
154154

155155
// Required to manage PersistentVolumes for version upgrades (ensuring clean storage)
156-
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;delete
156+
// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;update;patch;delete
157157

158158
// Required to manage PodDisruptionBudgets for multi-replica deployments
159159
// +kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets,verbs=get;list;watch;create;update;patch;delete
@@ -560,6 +560,29 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
560560
instance.Status.StorageWipeStartedAt = &now
561561
Log.Info("Starting storage wipe", "reason", wipeReason, "phase", "DeletingStorage")
562562

563+
// CRITICAL: Capture PV names NOW, before any deletion happens
564+
// This ensures we track the correct PVs from the current cluster
565+
// Always reset tracking when starting a new wipe to clear stale data
566+
pvcList, err := r.listPVCsForInstance(ctx, instance.Namespace, instance.Name)
567+
if err != nil {
568+
Log.Error(err, "Failed to list PVCs for PV tracking during wipe start")
569+
return ctrl.Result{}, err
570+
}
571+
572+
// Always update pvsBeingDeleted, even if empty - this clears stale data
573+
pvNames := make([]string, 0, len(pvcList.Items))
574+
for i := range pvcList.Items {
575+
if pvcList.Items[i].Spec.VolumeName != "" {
576+
pvNames = append(pvNames, pvcList.Items[i].Spec.VolumeName)
577+
}
578+
}
579+
instance.Status.PVsBeingDeleted = pvNames
580+
if len(pvNames) > 0 {
581+
Log.Info("Captured PV names for tracking before wipe", "pvCount", len(pvNames), "pvNames", pvNames)
582+
} else {
583+
Log.Info("No PVCs found to track - clearing stale PV tracking data")
584+
}
585+
563586
// Emit event for observability
564587
if r.Recorder != nil {
565588
if wipeReason == "version upgrade" && instance.Annotations != nil {
@@ -574,7 +597,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
574597
}
575598
}
576599

577-
// Persist the phase update and timestamp
600+
// Persist the phase update, timestamp, and PV tracking
578601
if err := helper.PatchInstance(ctx, instance); err != nil {
579602
return ctrl.Result{}, err
580603
}
@@ -598,6 +621,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
598621
CurrentQueueType: string(instance.Status.QueueType),
599622
Reason: wipeReason,
600623
StorageWipeStartedAt: storageWipeStartedAt,
624+
PVsBeingDeleted: instance.Status.PVsBeingDeleted,
601625
DeleteCluster: func(ctx context.Context) error {
602626
err := rmqCluster.Delete(ctx, helper)
603627
if err != nil && !k8s_errors.IsNotFound(err) {
@@ -612,12 +636,23 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
612636

613637
// Perform the storage wipe (handles all steps: delete cluster, wait for pods,
614638
// patch PV reclaim policy, delete PVCs, verify cleanup)
615-
result, err := r.performStorageWipe(ctx, wipeParams, Log)
639+
result, err := r.performStorageWipe(ctx, &wipeParams, Log)
616640
if err != nil {
617641
Log.Error(err, "Storage wipe failed")
618642
return result, err
619643
}
620644

645+
// Update status with PV tracking information (if changed during this reconcile)
646+
if len(wipeParams.PVsBeingDeleted) > 0 && len(instance.Status.PVsBeingDeleted) == 0 {
647+
instance.Status.PVsBeingDeleted = wipeParams.PVsBeingDeleted
648+
// Persist status immediately so PV tracking survives across reconciles
649+
if err := helper.PatchInstance(ctx, instance); err != nil {
650+
Log.Error(err, "Failed to update status with PV tracking")
651+
return ctrl.Result{}, err
652+
}
653+
Log.Info("Updated status with PV tracking", "pvCount", len(wipeParams.PVsBeingDeleted))
654+
}
655+
621656
// If result has Requeue set, we're still in progress
622657
if result.Requeue || result.RequeueAfter > 0 {
623658
return result, nil
@@ -628,9 +663,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
628663
if instance.Annotations != nil {
629664
if targetVersion, hasTarget := instance.Annotations[rabbitmqv1beta1.AnnotationTargetVersion]; hasTarget && targetVersion != "" {
630665
instance.Status.CurrentVersion = targetVersion
631-
// Clear the upgrade phase and timestamp
666+
// Clear the upgrade phase, timestamp, and PV tracking
632667
instance.Status.UpgradePhase = ""
633668
instance.Status.StorageWipeStartedAt = nil
669+
instance.Status.PVsBeingDeleted = nil
634670

635671
// If queue type changed during upgrade, update Status.QueueType to prevent
636672
// triggering another wipe for "queue type migration"
@@ -654,6 +690,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ct
654690
// Queue migration complete - update Status.QueueType
655691
instance.Status.UpgradePhase = ""
656692
instance.Status.StorageWipeStartedAt = nil
693+
instance.Status.PVsBeingDeleted = nil
657694
if instance.Spec.QueueType != nil {
658695
switch *instance.Spec.QueueType {
659696
case rabbitmqv1beta1.QueueTypeQuorum:

0 commit comments

Comments
 (0)