|
| 1 | +// SPDX-License-Identifier: AGPL-3.0-only |
| 2 | + |
| 3 | +package controller |
| 4 | + |
| 5 | +import ( |
| 6 | + "context" |
| 7 | + "encoding/json" |
| 8 | + "fmt" |
| 9 | + |
| 10 | + corev1 "k8s.io/api/core/v1" |
| 11 | + apierrors "k8s.io/apimachinery/pkg/api/errors" |
| 12 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 13 | + "k8s.io/apimachinery/pkg/types" |
| 14 | + ctrl "sigs.k8s.io/controller-runtime" |
| 15 | + "sigs.k8s.io/controller-runtime/pkg/client" |
| 16 | + "sigs.k8s.io/controller-runtime/pkg/cluster" |
| 17 | + "sigs.k8s.io/controller-runtime/pkg/handler" |
| 18 | + "sigs.k8s.io/controller-runtime/pkg/log" |
| 19 | + "sigs.k8s.io/controller-runtime/pkg/reconcile" |
| 20 | + mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" |
| 21 | + mccontext "sigs.k8s.io/multicluster-runtime/pkg/context" |
| 22 | + mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" |
| 23 | + "sigs.k8s.io/multicluster-runtime/pkg/multicluster" |
| 24 | + mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" |
| 25 | + |
| 26 | + computev1alpha "go.datum.net/compute/api/v1alpha" |
| 27 | +) |
| 28 | + |
| 29 | +// CompanionGCReconciler runs on each cell cluster and deletes orphaned companion |
| 30 | +// ConfigMaps and Secrets whose WorkloadDeployment referrers are gone from that |
| 31 | +// cell. It is the authoritative GC path that does not depend on Karmada's |
| 32 | +// ResourceBinding cascade completing correctly. |
| 33 | +// |
| 34 | +// The reconciler is triggered by WorkloadDeployment events. On each trigger it |
| 35 | +// lists all companions in the WD's namespace (by the referenced-data=true |
| 36 | +// label), parses the referenced-by annotation on each companion, and deletes |
| 37 | +// any companion for which every listed WD name is absent from the local cell |
| 38 | +// namespace. |
| 39 | +// |
| 40 | +// Per-cell multi-referrer safety: the referenced-by annotation is written by |
| 41 | +// the hub-side ReferencedDataController using PROJECT-plane namespace keys |
| 42 | +// (e.g. "default/mount-pristine-default-dfw"). On the cell the WD lives in |
| 43 | +// ns-{project-uid}, not "default". hasLiveReferrer therefore looks up WDs by |
| 44 | +// name only, in the companion's own namespace. This means: |
| 45 | +// |
| 46 | +// - A WD on a different cell is never present locally → counted absent |
| 47 | +// → companion on that cell is correctly deleted when its own local WD is |
| 48 | +// also gone. |
| 49 | +// - A WD on this cell is present → companion preserved → correct. |
| 50 | +type CompanionGCReconciler struct { |
| 51 | + mgr mcmanager.Manager |
| 52 | +} |
| 53 | + |
| 54 | +// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;delete |
| 55 | +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;delete |
| 56 | +// +kubebuilder:rbac:groups=compute.datumapis.com,resources=workloaddeployments,verbs=get;list;watch |
| 57 | + |
| 58 | +// Reconcile is invoked for each companion ConfigMap or Secret that carries the |
| 59 | +// referenced-data=true label. It deletes the companion when every WD listed in |
| 60 | +// its referenced-by annotation is absent from the same namespace on this cell. |
| 61 | +func (r *CompanionGCReconciler) Reconcile(ctx context.Context, req mcreconcile.Request) (ctrl.Result, error) { |
| 62 | + logger := log.FromContext(ctx).WithValues( |
| 63 | + "namespace", req.Namespace, |
| 64 | + "name", req.Name, |
| 65 | + ) |
| 66 | + |
| 67 | + cl, err := r.mgr.GetCluster(ctx, req.ClusterName) |
| 68 | + if err != nil { |
| 69 | + return ctrl.Result{}, err |
| 70 | + } |
| 71 | + ctx = mccontext.WithCluster(ctx, req.ClusterName) |
| 72 | + cellClient := cl.GetClient() |
| 73 | + |
| 74 | + return ctrl.Result{}, r.reconcileCompanion(ctx, cellClient, req.NamespacedName, logger) |
| 75 | +} |
| 76 | + |
| 77 | +// reconcileCompanion checks one ConfigMap-or-Secret companion and deletes it |
| 78 | +// when safe. The function tries ConfigMap first; if not found it tries Secret. |
| 79 | +func (r *CompanionGCReconciler) reconcileCompanion( |
| 80 | + ctx context.Context, |
| 81 | + cellClient client.Client, |
| 82 | + key types.NamespacedName, |
| 83 | + logger interface{ Info(string, ...any) }, |
| 84 | +) error { |
| 85 | + // Try ConfigMap. |
| 86 | + var cm corev1.ConfigMap |
| 87 | + err := cellClient.Get(ctx, key, &cm) |
| 88 | + if err == nil { |
| 89 | + return r.maybeDeleteConfigMap(ctx, cellClient, &cm, logger) |
| 90 | + } |
| 91 | + if !apierrors.IsNotFound(err) { |
| 92 | + return fmt.Errorf("get ConfigMap %s: %w", key, err) |
| 93 | + } |
| 94 | + |
| 95 | + // Not a ConfigMap — try Secret. |
| 96 | + var secret corev1.Secret |
| 97 | + err = cellClient.Get(ctx, key, &secret) |
| 98 | + if err == nil { |
| 99 | + return r.maybeDeleteSecret(ctx, cellClient, &secret, logger) |
| 100 | + } |
| 101 | + if !apierrors.IsNotFound(err) { |
| 102 | + return fmt.Errorf("get Secret %s: %w", key, err) |
| 103 | + } |
| 104 | + |
| 105 | + // Object already gone — nothing to do. |
| 106 | + return nil |
| 107 | +} |
| 108 | + |
| 109 | +func (r *CompanionGCReconciler) maybeDeleteConfigMap( |
| 110 | + ctx context.Context, |
| 111 | + cellClient client.Client, |
| 112 | + cm *corev1.ConfigMap, |
| 113 | + logger interface{ Info(string, ...any) }, |
| 114 | +) error { |
| 115 | + if !isCompanion(cm) { |
| 116 | + return nil |
| 117 | + } |
| 118 | + alive, err := r.hasLiveReferrer(ctx, cellClient, cm.Namespace, cm.Annotations) |
| 119 | + if err != nil { |
| 120 | + return err |
| 121 | + } |
| 122 | + if alive { |
| 123 | + return nil |
| 124 | + } |
| 125 | + logger.Info("deleting orphaned companion ConfigMap", "name", cm.Name, "namespace", cm.Namespace) |
| 126 | + if err := cellClient.Delete(ctx, cm); client.IgnoreNotFound(err) != nil { |
| 127 | + return fmt.Errorf("delete companion ConfigMap %s/%s: %w", cm.Namespace, cm.Name, err) |
| 128 | + } |
| 129 | + return nil |
| 130 | +} |
| 131 | + |
| 132 | +func (r *CompanionGCReconciler) maybeDeleteSecret( |
| 133 | + ctx context.Context, |
| 134 | + cellClient client.Client, |
| 135 | + secret *corev1.Secret, |
| 136 | + logger interface{ Info(string, ...any) }, |
| 137 | +) error { |
| 138 | + if !isCompanion(secret) { |
| 139 | + return nil |
| 140 | + } |
| 141 | + alive, err := r.hasLiveReferrer(ctx, cellClient, secret.Namespace, secret.Annotations) |
| 142 | + if err != nil { |
| 143 | + return err |
| 144 | + } |
| 145 | + if alive { |
| 146 | + return nil |
| 147 | + } |
| 148 | + logger.Info("deleting orphaned companion Secret", "name", secret.Name, "namespace", secret.Namespace) |
| 149 | + if err := cellClient.Delete(ctx, secret); client.IgnoreNotFound(err) != nil { |
| 150 | + return fmt.Errorf("delete companion Secret %s/%s: %w", secret.Namespace, secret.Name, err) |
| 151 | + } |
| 152 | + return nil |
| 153 | +} |
| 154 | + |
| 155 | +// isCompanion reports whether the object carries the referenced-data=true label. |
| 156 | +// The label is the authoritative signal that the companion was created by the |
| 157 | +// referenced-data controller. Objects without it are not touched by GC. |
| 158 | +func isCompanion(obj metav1.Object) bool { |
| 159 | + labels := obj.GetLabels() |
| 160 | + return labels[computev1alpha.ReferencedDataLabel] == computev1alpha.ReferencedDataLabelValue |
| 161 | +} |
| 162 | + |
| 163 | +// hasLiveReferrer returns true when at least one WD listed in the referenced-by |
| 164 | +// annotation still exists in the companion's namespace on this cell. |
| 165 | +// |
| 166 | +// The annotation is written by the hub-side ReferencedDataController as |
| 167 | +// "projectNamespace/wdName" (e.g. "default/mount-pristine-default-dfw"). On |
| 168 | +// the cell the WD lives in ns-{project-uid}, not in the project namespace. To |
| 169 | +// find it we look up by NAME ONLY in the companion's own namespace — the cell |
| 170 | +// WD name is always equal to the project WD name (set by |
| 171 | +// upsertDownstreamDeployment). This also gives us the correct per-cell |
| 172 | +// semantics: a WD that runs on a different cell is never present locally, so it |
| 173 | +// contributes nothing to liveness on this cell. |
| 174 | +// |
| 175 | +// A companion is considered still needed if any referrer is present in any |
| 176 | +// state (including terminating) to avoid premature deletion during the WD |
| 177 | +// teardown window. |
| 178 | +// |
| 179 | +// Returns (false, nil) when the annotation is absent or empty. |
| 180 | +// Returns (true, nil) when at least one referrer is found. |
| 181 | +// Returns (true, err) when the annotation is corrupt or an API call fails so |
| 182 | +// that the caller does NOT delete the companion on transient faults. |
| 183 | +func (r *CompanionGCReconciler) hasLiveReferrer( |
| 184 | + ctx context.Context, |
| 185 | + cellClient client.Client, |
| 186 | + companionNamespace string, |
| 187 | + annotations map[string]string, |
| 188 | +) (bool, error) { |
| 189 | + wdKeys, err := decodeCompanionRefCount(annotations) |
| 190 | + if err != nil { |
| 191 | + // Corrupt annotation: treat as "has live referrer" to avoid accidental GC. |
| 192 | + return true, err |
| 193 | + } |
| 194 | + if len(wdKeys) == 0 { |
| 195 | + return false, nil |
| 196 | + } |
| 197 | + |
| 198 | + for _, key := range wdKeys { |
| 199 | + wdName := wdNameFromKey(key) |
| 200 | + if wdName == "" { |
| 201 | + // Malformed key: conservatively assume the referrer is alive. |
| 202 | + return true, nil |
| 203 | + } |
| 204 | + |
| 205 | + // Look up by name in the companion's namespace. The annotation carries the |
| 206 | + // PROJECT namespace as the key prefix, but the cell WD lives in |
| 207 | + // ns-{project-uid} — the same namespace the companion is in. |
| 208 | + var wd computev1alpha.WorkloadDeployment |
| 209 | + err := cellClient.Get(ctx, types.NamespacedName{Namespace: companionNamespace, Name: wdName}, &wd) |
| 210 | + if err == nil { |
| 211 | + // Referrer exists (any state — including terminating). Companion stays. |
| 212 | + return true, nil |
| 213 | + } |
| 214 | + if !apierrors.IsNotFound(err) { |
| 215 | + return true, fmt.Errorf("get WorkloadDeployment %s/%s: %w", companionNamespace, wdName, err) |
| 216 | + } |
| 217 | + // Not found — this referrer is gone; continue checking the rest. |
| 218 | + } |
| 219 | + |
| 220 | + // Every listed referrer is absent from this cell. |
| 221 | + return false, nil |
| 222 | +} |
| 223 | + |
| 224 | +// wdNameFromKey extracts the WD name from a "namespace/name" annotation key. |
| 225 | +// The referenced-by annotation always uses "projectNamespace/wdName" format; |
| 226 | +// we want only the name portion, which is the part after the last slash. |
| 227 | +// Returns "" for an empty key (caller should treat as malformed). |
| 228 | +func wdNameFromKey(key string) string { |
| 229 | + if key == "" { |
| 230 | + return "" |
| 231 | + } |
| 232 | + for i := len(key) - 1; i >= 0; i-- { |
| 233 | + if key[i] == '/' { |
| 234 | + return key[i+1:] |
| 235 | + } |
| 236 | + } |
| 237 | + // No slash — the whole string is the name. |
| 238 | + return key |
| 239 | +} |
| 240 | + |
| 241 | +// decodeCompanionRefCount parses the companionRefCountAnnotation value into a |
| 242 | +// slice of WD keys. It is a cell-local copy of the hub-side decodeRefCount so |
| 243 | +// the GC reconciler does not share internal state with the |
| 244 | +// ReferencedDataController. The annotation format is identical: a JSON array. |
| 245 | +// |
| 246 | +// Returns (nil, nil) when the annotation is absent or empty. |
| 247 | +// Returns an error when the annotation value is present but cannot be parsed. |
| 248 | +func decodeCompanionRefCount(annotations map[string]string) ([]string, error) { |
| 249 | + raw, ok := annotations[companionRefCountAnnotation] |
| 250 | + if !ok || raw == "" { |
| 251 | + return nil, nil |
| 252 | + } |
| 253 | + var keys []string |
| 254 | + if err := json.Unmarshal([]byte(raw), &keys); err != nil { |
| 255 | + return nil, fmt.Errorf("companion-gc: corrupt ref-count annotation %q: %w", raw, err) |
| 256 | + } |
| 257 | + return keys, nil |
| 258 | +} |
| 259 | + |
| 260 | +// SetupWithManager registers the CompanionGCReconciler with the multicluster |
| 261 | +// manager. It should only be called when cell controllers are enabled |
| 262 | +// (--enable-cell-controllers). The reconciler watches ConfigMaps (its primary |
| 263 | +// For type) and uses WorkloadDeployment events to enqueue companion objects in |
| 264 | +// the same namespace. Secret companions are discovered at reconcile time via |
| 265 | +// Get, not through a separate For/Watches registration, because both kinds map |
| 266 | +// to the same reconcile loop. |
| 267 | +func (r *CompanionGCReconciler) SetupWithManager(mgr mcmanager.Manager) error { |
| 268 | + r.mgr = mgr |
| 269 | + |
| 270 | + return mcbuilder.ControllerManagedBy(mgr). |
| 271 | + For(&corev1.ConfigMap{}, mcbuilder.WithEngageWithLocalCluster(false)). |
| 272 | + // When a WorkloadDeployment changes (including deletion), re-enqueue all |
| 273 | + // companion ConfigMaps and Secrets in the same namespace so the GC |
| 274 | + // reconciler can decide whether they are still referenced. |
| 275 | + Watches(&computev1alpha.WorkloadDeployment{}, |
| 276 | + func(_ multicluster.ClusterName, cl cluster.Cluster) handler.TypedEventHandler[client.Object, mcreconcile.Request] { |
| 277 | + return handler.TypedEnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []mcreconcile.Request { |
| 278 | + return enqueueCompanionsForNamespace(ctx, cl.GetClient(), obj.GetNamespace()) |
| 279 | + }) |
| 280 | + }, |
| 281 | + mcbuilder.WithEngageWithLocalCluster(false), |
| 282 | + ). |
| 283 | + Named("companion-gc"). |
| 284 | + Complete(r) |
| 285 | +} |
| 286 | + |
| 287 | +// enqueueCompanionsForNamespace returns mcreconcile.Requests for every companion |
| 288 | +// ConfigMap and Secret in the given namespace. It is called from the WD watch |
| 289 | +// handler so the GC reconciler is triggered whenever a WD changes in a namespace |
| 290 | +// that may contain companions. |
| 291 | +func enqueueCompanionsForNamespace(ctx context.Context, cellClient client.Client, namespace string) []mcreconcile.Request { |
| 292 | + companionLabel := client.MatchingLabels{ |
| 293 | + computev1alpha.ReferencedDataLabel: computev1alpha.ReferencedDataLabelValue, |
| 294 | + } |
| 295 | + nsSelector := client.InNamespace(namespace) |
| 296 | + |
| 297 | + var reqs []mcreconcile.Request |
| 298 | + |
| 299 | + var cms corev1.ConfigMapList |
| 300 | + if err := cellClient.List(ctx, &cms, nsSelector, companionLabel); err == nil { |
| 301 | + for i := range cms.Items { |
| 302 | + reqs = append(reqs, mcreconcile.Request{ |
| 303 | + Request: reconcile.Request{ |
| 304 | + NamespacedName: types.NamespacedName{ |
| 305 | + Namespace: cms.Items[i].Namespace, |
| 306 | + Name: cms.Items[i].Name, |
| 307 | + }, |
| 308 | + }, |
| 309 | + }) |
| 310 | + } |
| 311 | + } |
| 312 | + |
| 313 | + var secrets corev1.SecretList |
| 314 | + if err := cellClient.List(ctx, &secrets, nsSelector, companionLabel); err == nil { |
| 315 | + for i := range secrets.Items { |
| 316 | + reqs = append(reqs, mcreconcile.Request{ |
| 317 | + Request: reconcile.Request{ |
| 318 | + NamespacedName: types.NamespacedName{ |
| 319 | + Namespace: secrets.Items[i].Namespace, |
| 320 | + Name: secrets.Items[i].Name, |
| 321 | + }, |
| 322 | + }, |
| 323 | + }) |
| 324 | + } |
| 325 | + } |
| 326 | + |
| 327 | + return reqs |
| 328 | +} |
0 commit comments