From a03de42eda1bf43ff3b6c261845066fda6462d86 Mon Sep 17 00:00:00 2001 From: Andrey Kolkov Date: Thu, 4 Jun 2026 16:28:08 +0400 Subject: [PATCH] feat(main): add migration tool Signed-off-by: Andrey Kolkov --- .gitignore | 2 + Makefile | 4 + cmd/etcd-migrate/apply_adopt.go | 373 +++++++++++++ cmd/etcd-migrate/backup.go | 263 +++++++++ cmd/etcd-migrate/config.go | 137 +++++ cmd/etcd-migrate/inspect.go | 188 +++++++ cmd/etcd-migrate/main.go | 286 ++++++++++ cmd/etcd-migrate/main_test.go | 616 +++++++++++++++++++++ cmd/etcd-migrate/output.go | 96 ++++ cmd/etcd-migrate/run.go | 347 ++++++++++++ cmd/kubectl-etcd/helpers_test.go | 42 +- cmd/kubectl-etcd/main.go | 79 +-- controllers/etcdcluster_controller.go | 52 +- controllers/etcdcluster_controller_test.go | 55 ++ controllers/etcdmember_controller.go | 30 +- controllers/etcdmember_controller_test.go | 108 ++++ controllers/helpers.go | 145 ++++- controllers/helpers_test.go | 75 +++ docs/concepts.md | 11 + docs/migration.md | 254 ++++++++- go.mod | 2 +- internal/migrate/adopt.go | 320 +++++++++++ internal/migrate/adopt_test.go | 211 +++++++ internal/migrate/cronjob.go | 137 +++++ internal/migrate/cronjob_test.go | 109 ++++ internal/migrate/legacy/types.go | 168 ++++++ internal/migrate/plan.go | 89 +++ internal/migrate/snapshotjob.go | 201 +++++++ internal/migrate/snapshotjob_test.go | 121 ++++ internal/migrate/translate.go | 504 +++++++++++++++++ internal/migrate/translate_test.go | 428 ++++++++++++++ internal/portforward/portforward.go | 108 ++++ internal/portforward/portforward_test.go | 56 ++ 33 files changed, 5446 insertions(+), 171 deletions(-) create mode 100644 cmd/etcd-migrate/apply_adopt.go create mode 100644 cmd/etcd-migrate/backup.go create mode 100644 cmd/etcd-migrate/config.go create mode 100644 cmd/etcd-migrate/inspect.go create mode 100644 cmd/etcd-migrate/main.go create mode 100644 cmd/etcd-migrate/main_test.go create mode 100644 cmd/etcd-migrate/output.go create mode 100644 cmd/etcd-migrate/run.go create mode 100644 internal/migrate/adopt.go create mode 100644 internal/migrate/adopt_test.go create mode 100644 internal/migrate/cronjob.go create mode 100644 internal/migrate/cronjob_test.go create mode 100644 internal/migrate/legacy/types.go create mode 100644 internal/migrate/plan.go create mode 100644 internal/migrate/snapshotjob.go create mode 100644 internal/migrate/snapshotjob_test.go create mode 100644 internal/migrate/translate.go create mode 100644 internal/migrate/translate_test.go create mode 100644 internal/portforward/portforward.go create mode 100644 internal/portforward/portforward_test.go diff --git a/.gitignore b/.gitignore index d2706f46..ed60d564 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ bin # kubectl-etcd plugin: build to bin/ (see Makefile); never commit a root-level build artifact /kubectl-etcd +# etcd-migrate tool: same rule — build to bin/, never commit the root-level artifact +/etcd-migrate # Test binary, build with `go test -c` *.test diff --git a/Makefile b/Makefile index c6a45a40..64c47dd3 100644 --- a/Makefile +++ b/Makefile @@ -79,6 +79,10 @@ build: manifests generate fmt vet ## Build manager binary. kubectl-etcd: fmt vet ## Build the kubectl-etcd plugin binary. go build -o bin/kubectl-etcd ./cmd/kubectl-etcd +.PHONY: etcd-migrate +etcd-migrate: fmt vet ## Build the etcd-migrate (legacy v1alpha1 -> v1alpha2) CLI binary. + go build -o bin/etcd-migrate ./cmd/etcd-migrate + .PHONY: run run: manifests generate fmt vet ## Run a controller from your host. go run ./main.go diff --git a/cmd/etcd-migrate/apply_adopt.go b/cmd/etcd-migrate/apply_adopt.go new file mode 100644 index 00000000..26791d9f --- /dev/null +++ b/cmd/etcd-migrate/apply_adopt.go @@ -0,0 +1,373 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package main + +import ( + "context" + "fmt" + "io" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" + "sigs.k8s.io/controller-runtime/pkg/client" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/controllers" + "github.com/cozystack/etcd-operator/internal/migrate" +) + +// applyAdoption executes one cluster's in-place adoption. The etcd pods are +// never restarted: only object ownership, labels, member annotations and CRs +// change. Every step is idempotent, so an interrupted run is completed by +// re-running the tool. +// +// Ordering is load-bearing in three places: +// +// - The new-API CRs are created with their status prefilled before the +// user scales the new operator up (the tool runs with both operators +// down), so the cluster controller's bootstrap branch never fires. +// - The legacy headless Service is owner-referenced to the adopted members +// BEFORE the legacy CRs are deleted — otherwise the Service is briefly +// sole-owned by a now-missing object and GC could reap it. +// - The legacy StatefulSet is orphan-deleted (and its deletion awaited) +// BEFORE pod owner references are rewritten — while it exists, its +// controller would adopt the pods right back. +func applyAdoption(ctx context.Context, c client.Client, dyn dynamic.Interface, p *migrate.ResourcePlan, out io.Writer) error { + a := p.Adoption + cluster := p.Target.(*lll.EtcdCluster) + ns := p.Namespace + + // 1. Create the new-API cluster (+ companion Secret) with prefilled + // status. Done first: the prefilled status.clusterID keeps the bootstrap + // branch from ever firing, and the live cluster UID owns the headless + // Service recreated in step 6. + for _, extra := range p.Extras { + if err := c.Create(ctx, extra); err != nil && !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create %s %s/%s: %w", + extra.GetObjectKind().GroupVersionKind().Kind, ns, extra.GetName(), err) + } + } + if err := c.Create(ctx, cluster); err != nil && !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create EtcdCluster: %w", err) + } + liveCluster := &lll.EtcdCluster{} + if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: cluster.Name}, liveCluster); err != nil { + return fmt.Errorf("re-read EtcdCluster: %w", err) + } + // Fill-if-empty: a re-run must not clobber status once the new operator + // has taken over (both operators are down during a normal run, but stay + // safe against misuse). + if liveCluster.Status.ClusterID == "" { + liveCluster.Status = a.ClusterStatus + if err := c.Status().Update(ctx, liveCluster); err != nil { + return fmt.Errorf("prefill EtcdCluster status: %w", err) + } + fmt.Fprintf(out, " created EtcdCluster %q (clusterID=%s prefilled — bootstrap will not fire)\n", + cluster.Name, a.ClusterStatus.ClusterID) + } + + // 2. Create the per-pod EtcdMembers (+ status prefill) and capture their + // live UIDs — required before owner-referencing the legacy headless + // Service to them in step 3. + liveMembers := make([]*lll.EtcdMember, len(a.Members)) + for i, ma := range a.Members { + if err := c.Create(ctx, ma.Member); err != nil && !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create EtcdMember %q: %w", ma.Member.Name, err) + } + liveMember := &lll.EtcdMember{} + if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: ma.Member.Name}, liveMember); err != nil { + return fmt.Errorf("re-read EtcdMember %q: %w", ma.Member.Name, err) + } + if liveMember.Status.MemberID == "" { + liveMember.Status = ma.Status + if err := c.Status().Update(ctx, liveMember); err != nil { + return fmt.Errorf("prefill EtcdMember %q status: %w", ma.Member.Name, err) + } + } + liveMembers[i] = liveMember + } + + // 3. Point the legacy headless Service's ownerReferences at the adopted + // members (replacing the legacy controller owner). Kubernetes deletes a + // dependent once all its owners are gone, so with one owner ref per + // adopted member the Service survives while any adopted member remains + // and is auto-GC'd when the last one rolls away. Replacement (native) + // members are not owners, so they never keep it alive. Done BEFORE the + // legacy-CR deletion to avoid a premature-GC race. + if a.HeadlessServiceName != "" { + if err := pointServiceAtMembers(ctx, c, ns, a.HeadlessServiceName, liveMembers); err != nil { + return err + } + fmt.Fprintf(out, " owner-referenced legacy headless Service %q to %d adopted member(s) (auto-GCs as they roll)\n", + a.HeadlessServiceName, len(liveMembers)) + } + + // 4. Dismantle the legacy control plane, keeping the data plane. Orphan + // propagation everywhere so the pods/PVCs/Services survive. + if p.DeleteRef != nil { + orphan := metav1.DeletePropagationOrphan + err := dyn.Resource(p.DeleteRef.GVR).Namespace(ns). + Delete(ctx, p.DeleteRef.Name, metav1.DeleteOptions{PropagationPolicy: &orphan}) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("orphan-delete legacy EtcdCluster: %w", err) + } + fmt.Fprintf(out, " orphan-deleted legacy EtcdCluster (children survive)\n") + } + + sts := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: a.StatefulSetName}} + orphan := metav1.DeletePropagationOrphan + if err := c.Delete(ctx, sts, &client.DeleteOptions{PropagationPolicy: &orphan}); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("orphan-delete legacy StatefulSet: %w", err) + } + if err := waitGone(ctx, c, types.NamespacedName{Namespace: ns, Name: a.StatefulSetName}, &appsv1.StatefulSet{}, 2*time.Minute); err != nil { + return fmt.Errorf("await StatefulSet deletion: %w", err) + } + fmt.Fprintf(out, " orphan-deleted legacy StatefulSet %q (pods survive)\n", a.StatefulSetName) + + cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: a.ConfigMapName}} + if err := c.Delete(ctx, cm); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete legacy cluster-state ConfigMap: %w", err) + } + // The new operator emits its own PDB under the same name; remove the + // legacy one so the two never select the same pods concurrently. + pdb := &policyv1.PodDisruptionBudget{ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: a.PDBName}} + if err := c.Delete(ctx, pdb); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete legacy PodDisruptionBudget: %w", err) + } + + // 5. Re-own the pods and PVCs to their EtcdMembers — only now that the + // StatefulSet is gone and its controller can no longer fight us. + for i, ma := range a.Members { + if err := adoptPod(ctx, c, ns, ma.Member.Name, cluster.Name, liveMembers[i]); err != nil { + return err + } + if err := adoptPVC(ctx, c, ns, ma.PVCName, cluster.Name, liveMembers[i]); err != nil { + return err + } + fmt.Fprintf(out, " adopted member %q (pod + PVC re-owned, memberID=%s)\n", ma.Member.Name, ma.Status.MemberID) + } + + // 6. Client-Service cutover. The legacy client Service is named after the + // cluster, which collides with the operator's native headless Service. + // Delete it and immediately recreate a headless Service of the same name + // (owned by the new cluster) so the DNS name keeps resolving with the + // minimum possible gap, rather than leaving the window open until the + // operator's first reconcile. + if err := cutoverHeadlessService(ctx, c, ns, cluster.Name, liveCluster); err != nil { + return err + } + fmt.Fprintf(out, " cut over Service %q to the operator's native headless Service\n", cluster.Name) + + return nil +} + +// pointServiceAtMembers replaces a Service's ownerReferences with one +// non-controller, non-blocking entry per EtcdMember. A full Update (not a +// merge patch) is used deliberately: the legacy controller owner reference +// must be STRIPPED, and a strategic merge patch keyed on owner UID would +// merge the new refs in alongside the stale one rather than replacing the +// list. Idempotent — a re-run rewrites the same refs. +func pointServiceAtMembers(ctx context.Context, c client.Client, ns, name string, members []*lll.EtcdMember) error { + svc := &corev1.Service{} + if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: name}, svc); err != nil { + if apierrors.IsNotFound(err) { + // Already GC'd by a prior complete run (all adopted members + // rolled away) — nothing to keep alive. + return nil + } + return fmt.Errorf("read legacy headless Service %q: %w", name, err) + } + gvk := lll.GroupVersion.WithKind("EtcdMember") + refs := make([]metav1.OwnerReference, 0, len(members)) + for _, m := range members { + refs = append(refs, metav1.OwnerReference{ + APIVersion: gvk.GroupVersion().String(), + Kind: gvk.Kind, + Name: m.Name, + UID: m.UID, + Controller: ptrTo(false), + BlockOwnerDeletion: ptrTo(false), + }) + } + svc.OwnerReferences = refs + if err := c.Update(ctx, svc); err != nil { + return fmt.Errorf("owner-reference legacy headless Service %q to members: %w", name, err) + } + return nil +} + +// cutoverHeadlessService ensures `name` is the operator's native headless +// Service, owned by the new EtcdCluster. If a ClusterIP Service already holds +// the name (the legacy client Service, whose name collides with the native +// headless), it is deleted and recreated headless — clusterIP is immutable, +// so an in-place flip is impossible. Idempotent: an already-headless Service +// at the name is left untouched. +func cutoverHeadlessService(ctx context.Context, c client.Client, ns, name string, owner *lll.EtcdCluster) error { + svc := &corev1.Service{} + err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: name}, svc) + switch { + case apierrors.IsNotFound(err): + // Nothing at the name — just create the headless Service below. + case err != nil: + return fmt.Errorf("read Service %q: %w", name, err) + case svc.Spec.ClusterIP == corev1.ClusterIPNone: + // Already headless (a prior run, or an override that never collided). + return nil + default: + // A ClusterIP Service (the legacy client) holds the name. Delete it so + // the headless Service can take the name. + if err := c.Delete(ctx, svc); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete legacy client Service %q: %w", name, err) + } + if err := waitGone(ctx, c, types.NamespacedName{Namespace: ns, Name: name}, &corev1.Service{}, time.Minute); err != nil { + return fmt.Errorf("await legacy client Service %q deletion: %w", name, err) + } + } + + gvk := lll.GroupVersion.WithKind("EtcdCluster") + headless := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: controllers.ClusterLabels(owner.Name), + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: gvk.GroupVersion().String(), + Kind: gvk.Kind, + Name: owner.Name, + UID: owner.UID, + Controller: ptrTo(true), + BlockOwnerDeletion: ptrTo(true), + }}, + }, + // Matches the operator's native headless Service (ensureServices), so + // its first reconcile finds no drift to reconcile. + Spec: corev1.ServiceSpec{ + ClusterIP: corev1.ClusterIPNone, + PublishNotReadyAddresses: true, + Selector: map[string]string{controllers.LabelCluster: owner.Name}, + Ports: []corev1.ServicePort{ + {Name: "client", Port: 2379}, + {Name: "peer", Port: 2380}, + }, + }, + } + if err := c.Create(ctx, headless); err != nil && !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("create native headless Service %q: %w", name, err) + } + return nil +} + +// adoptPod stamps the operator's member labels (incl. role=voter — every +// adopted member is a voter) and rewrites the controller owner reference to +// the EtcdMember. The pod itself is not restarted; labels and owner refs are +// mutable on live pods. +func adoptPod(ctx context.Context, c client.Client, ns, podName, clusterName string, owner *lll.EtcdMember) error { + pod := &corev1.Pod{} + if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: podName}, pod); err != nil { + return fmt.Errorf("read pod %q: %w", podName, err) + } + orig := pod.DeepCopy() + if pod.Labels == nil { + pod.Labels = map[string]string{} + } + for k, v := range controllers.MemberLabels(clusterName, podName) { + pod.Labels[k] = v + } + pod.Labels[controllers.LabelRole] = controllers.RoleVoter + setControllerOwner(&pod.ObjectMeta, owner) + if err := c.Patch(ctx, pod, client.MergeFrom(orig)); err != nil { + return fmt.Errorf("re-own pod %q: %w", podName, err) + } + return nil +} + +// adoptPVC mirrors adoptPod for the member's data PVC. The new member +// controller refuses PVCs without its own controller owner reference +// (pvcOwnedBy), so this patch is what makes ensurePVC pass. +func adoptPVC(ctx context.Context, c client.Client, ns, pvcName, clusterName string, owner *lll.EtcdMember) error { + pvc := &corev1.PersistentVolumeClaim{} + if err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: pvcName}, pvc); err != nil { + return fmt.Errorf("read PVC %q: %w", pvcName, err) + } + orig := pvc.DeepCopy() + if pvc.Labels == nil { + pvc.Labels = map[string]string{} + } + for k, v := range controllers.MemberLabels(clusterName, owner.Name) { + pvc.Labels[k] = v + } + setControllerOwner(&pvc.ObjectMeta, owner) + if err := c.Patch(ctx, pvc, client.MergeFrom(orig)); err != nil { + return fmt.Errorf("re-own PVC %q: %w", pvcName, err) + } + return nil +} + +// setControllerOwner replaces any existing controller owner reference with +// one pointing at the EtcdMember, matching what the member controller's +// SetControllerReference would produce. +func setControllerOwner(meta *metav1.ObjectMeta, owner *lll.EtcdMember) { + gvk := lll.GroupVersion.WithKind("EtcdMember") + replaceControllerRef(meta, metav1.OwnerReference{ + APIVersion: gvk.GroupVersion().String(), + Kind: gvk.Kind, + Name: owner.Name, + UID: owner.UID, + Controller: ptrTo(true), + BlockOwnerDeletion: ptrTo(true), + }) +} + +// replaceControllerRef drops any previous controller=true reference (the +// orphaned StatefulSet's, a prior partial run's) and appends `ref`. +// Idempotent: a matching ref is left in place. +func replaceControllerRef(meta *metav1.ObjectMeta, ref metav1.OwnerReference) { + kept := meta.OwnerReferences[:0] + for _, o := range meta.OwnerReferences { + if o.UID == ref.UID && o.Kind == ref.Kind { + continue // re-added below in canonical form + } + if o.Controller != nil && *o.Controller { + continue // displaced by the new controller owner + } + kept = append(kept, o) + } + meta.OwnerReferences = append(kept, ref) +} + +// waitGone polls until the object disappears. +func waitGone(ctx context.Context, c client.Client, key types.NamespacedName, obj client.Object, timeout time.Duration) error { + deadline := time.After(timeout) + for { + err := c.Get(ctx, key, obj) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return err + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-deadline: + return fmt.Errorf("%s/%s still present after %s", key.Namespace, key.Name, timeout) + case <-time.After(2 * time.Second): + } + } +} + +func ptrTo[T any](v T) *T { return &v } diff --git a/cmd/etcd-migrate/backup.go b/cmd/etcd-migrate/backup.go new file mode 100644 index 00000000..a7bf5092 --- /dev/null +++ b/cmd/etcd-migrate/backup.go @@ -0,0 +1,263 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package main + +import ( + "context" + "fmt" + "io" + "time" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/etcdclient" + "github.com/cozystack/etcd-operator/internal/migrate" + "github.com/cozystack/etcd-operator/internal/portforward" +) + +// jobPollInterval is how often waitForJob re-reads the snapshot Job. +const jobPollInterval = 5 * time.Second + +// runBackups executes the per-cluster safety-backup phase before anything is +// mutated: a one-off agent Job snapshots each to-be-adopted cluster to the +// configured destination. Nothing is restored from the artifact — the data +// stays in place — it exists so a botched adoption is recoverable. A failure +// flips that cluster's plan to ActionError so the adoption never starts for +// an unprotected cluster. +func runBackups(ctx context.Context, cfg *Config, restCfg *rest.Config, kube kubernetes.Interface, + c client.Client, plans []migrate.ResourcePlan, d discovered, out io.Writer) error { + + agentImage, err := resolveAgentImage(ctx, cfg, kube) + if err != nil { + return err + } + dest := backupDestination(cfg) + + specs := map[string]legacyCluster{} + for _, lc := range d.Clusters { + specs[lc.Namespace+"/"+lc.Name] = lc + } + + for i := range plans { + p := &plans[i] + if p.SourceKind != "EtcdCluster" || p.Action != migrate.ActionAdopt { + continue + } + lc, ok := specs[p.Namespace+"/"+p.SourceName] + if !ok { + continue + } + + fmt.Fprintf(out, "backing up legacy cluster %s/%s …\n", lc.Namespace, lc.Name) + if err := backupOne(ctx, cfg, kube, c, lc, dest, agentImage, out); err != nil { + p.Action = migrate.ActionError + p.Errors = append(p.Errors, fmt.Sprintf("backup failed: %v", err)) + p.DeleteRef = nil + fmt.Fprintf(out, " ERROR: %v — cluster left untouched\n", err) + continue + } + fmt.Fprintf(out, " backup stored\n") + } + return nil +} + +// backupOne handles a single legacy cluster's one-off agent Job. Idempotent: +// a completed Job from a previous run is reused (the agent's SNAPSHOT_UID +// overwrite guard recognizes its own artifact), a failed or stale one is +// replaced. Note the Job's etcdctl dial is anonymous — for auth-enabled +// clusters the auth-disable phase runs BEFORE this one. +func backupOne(ctx context.Context, cfg *Config, kube kubernetes.Interface, + c client.Client, lc legacyCluster, dest lll.SnapshotLocation, agentImage string, out io.Writer) error { + _ = kube // parity with backup destinations that may need lookups later + + job := migrate.BuildSnapshotJob(lc.Name, lc.Namespace, lc.UID, lc.Spec, dest, agentImage) + + existing := &batchv1.Job{} + getErr := c.Get(ctx, types.NamespacedName{Namespace: job.Namespace, Name: job.Name}, existing) + switch { + case getErr == nil && jobSucceeded(existing): + fmt.Fprintf(out, " snapshot Job already completed — reusing its artifact\n") + return nil + case getErr == nil: + // Leftover from a previous failed/interrupted attempt: replace it. + policy := metav1.DeletePropagationForeground + if err := c.Delete(ctx, existing, &client.DeleteOptions{PropagationPolicy: &policy}); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("delete stale snapshot Job: %w", err) + } + if err := waitJobGone(ctx, c, job.Namespace, job.Name, 2*time.Minute); err != nil { + return err + } + case !apierrors.IsNotFound(getErr): + return fmt.Errorf("read snapshot Job: %w", getErr) + } + + if err := c.Create(ctx, job); err != nil { + return fmt.Errorf("create snapshot Job: %w", err) + } + return waitForJob(ctx, c, job.Namespace, job.Name, cfg.BackupTimeout) +} + +// jobSucceeded reports a Complete=True condition. +func jobSucceeded(job *batchv1.Job) bool { + for _, cond := range job.Status.Conditions { + if cond.Type == batchv1.JobComplete && cond.Status == corev1.ConditionTrue { + return true + } + } + return false +} + +// waitJobGone polls until the Job (deleted with foreground propagation) +// disappears, so a same-name recreate cannot race its terminating pods. +func waitJobGone(ctx context.Context, c client.Client, namespace, name string, timeout time.Duration) error { + deadline := time.After(timeout) + for { + err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &batchv1.Job{}) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return fmt.Errorf("await snapshot Job deletion: %w", err) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-deadline: + return fmt.Errorf("stale snapshot Job %s/%s did not terminate within %s", namespace, name, timeout) + case <-time.After(jobPollInterval): + } + } +} + +// resolveAgentImage returns --agent-image, falling back to the image in the +// NEW controller Deployment's spec — readable even at spec.replicas=0. With +// neither available the snapshot phase cannot run. +func resolveAgentImage(ctx context.Context, cfg *Config, kube kubernetes.Interface) (string, error) { + if cfg.AgentImage != "" { + return cfg.AgentImage, nil + } + ns, name, _ := splitRef(cfg.NewController) + dep, err := kube.AppsV1().Deployments(ns).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return "", fmt.Errorf("the backup phase needs the agent image: the new controller Deployment %s/%s does not exist, pass --agent-image explicitly", ns, name) + } + return "", fmt.Errorf("read new controller Deployment %s/%s: %w", ns, name, err) + } + for _, ctr := range dep.Spec.Template.Spec.Containers { + if ctr.Name == "manager" { + return ctr.Image, nil + } + } + if len(dep.Spec.Template.Spec.Containers) > 0 { + return dep.Spec.Template.Spec.Containers[0].Image, nil + } + return "", fmt.Errorf("new controller Deployment %s/%s has no containers; pass --agent-image", ns, name) +} + +// disableLegacyAuth turns authentication off on the still-running legacy +// etcd so the snapshot carries no auth state. The legacy root user is +// NoPassword (cert-only), so the dial authenticates with the legacy +// operator's client certificate over a port-forward to a member Pod — the +// same identity the legacy operator itself used for auth management. +func disableLegacyAuth(ctx context.Context, restCfg *rest.Config, kube kubernetes.Interface, lc legacyCluster) error { + pod, err := findRunningEtcdPod(ctx, kube, lc) + if err != nil { + return err + } + localPort, stop, err := portforward.ForwardToPod(restCfg, lc.Namespace, pod, 2379) + if err != nil { + return fmt.Errorf("port-forward to %s: %w", pod, err) + } + defer stop() + + tlsCfg, err := legacyOperatorTLSConfig(ctx, kube, lc) + if err != nil { + return err + } + + cli, err := etcdclient.New([]string{fmt.Sprintf("localhost:%d", localPort)}, tlsCfg, "", "") + if err != nil { + return fmt.Errorf("dial legacy etcd: %w", err) + } + defer func() { _ = cli.Close() }() + + statusCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + status, err := cli.AuthStatus(statusCtx) + if err != nil { + return fmt.Errorf("read auth status: %w", err) + } + if !status.Enabled { + return nil // already off — nothing to do + } + disableCtx, cancel2 := context.WithTimeout(ctx, 10*time.Second) + defer cancel2() + if _, err := cli.AuthDisable(disableCtx); err != nil { + return fmt.Errorf("auth disable: %w", err) + } + return nil +} + +// findRunningEtcdPod picks one Running member Pod of the legacy cluster +// (label set: app.kubernetes.io/name=etcd + instance=). +func findRunningEtcdPod(ctx context.Context, kube kubernetes.Interface, lc legacyCluster) (string, error) { + pods, err := kube.CoreV1().Pods(lc.Namespace).List(ctx, metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=etcd,app.kubernetes.io/instance=" + lc.Name, + }) + if err != nil { + return "", fmt.Errorf("list etcd pods: %w", err) + } + for _, p := range pods.Items { + if p.Status.Phase == corev1.PodRunning { + return p.Name, nil + } + } + return "", fmt.Errorf("no Running etcd pod found for legacy cluster %s/%s", lc.Namespace, lc.Name) +} + +// waitForJob polls until the Job reports Complete or Failed, or the timeout +// elapses. +func waitForJob(ctx context.Context, c client.Client, namespace, name string, timeout time.Duration) error { + deadline := time.After(timeout) + for { + job := &batchv1.Job{} + if err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, job); err != nil { + return fmt.Errorf("read snapshot Job: %w", err) + } + for _, cond := range job.Status.Conditions { + if cond.Status != corev1.ConditionTrue { + continue + } + switch cond.Type { + case batchv1.JobComplete: + return nil + case batchv1.JobFailed: + return fmt.Errorf("snapshot Job failed: %s: %s", cond.Reason, cond.Message) + } + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-deadline: + return fmt.Errorf("snapshot Job did not finish within %s", timeout) + case <-time.After(jobPollInterval): + } + } +} diff --git a/cmd/etcd-migrate/config.go b/cmd/etcd-migrate/config.go new file mode 100644 index 00000000..8b65d152 --- /dev/null +++ b/cmd/etcd-migrate/config.go @@ -0,0 +1,137 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package main + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/spf13/cobra" + "k8s.io/client-go/util/homedir" +) + +// defaultControllerRef is where both this repo's kustomize config and the +// legacy repo's deploy the controller; the two generations share the name, +// so a single Deployment commonly answers both checks. +const defaultControllerRef = "etcd-operator-system/etcd-operator-controller-manager" + +// Config holds every flag of the migrate CLI. +type Config struct { + Kubeconfig string + Namespace string // "" = all namespaces + + Apply bool + Yes bool + SkipControllerCheck bool + LegacyController string // ns/name + NewController string // ns/name + + Version string // etcd version override for every cluster + AuthSecret string // existing basic-auth Secret name for auth clusters + + // Backup phase: a safety snapshot of every cluster taken right before + // adoption. Nothing is restored from it — the data stays in place — but + // adoption rewires ownership of live storage, so "no backup" must be an + // explicit choice (--skip-backup), not a forgotten flag. + SkipBackup bool + AgentImage string + BackupTimeout time.Duration + + BackupS3Endpoint string + BackupS3Bucket string + BackupS3Key string + BackupS3Region string + BackupS3ForcePathStyle bool + BackupS3CredentialsSecret string + + BackupPVCClaim string + BackupPVCSubPath string +} + +// bindFlags registers every flag on the root command. +func bindFlags(cmd *cobra.Command, cfg *Config) { + defaultKubeconfig := os.Getenv("KUBECONFIG") + if defaultKubeconfig == "" { + defaultKubeconfig = filepath.Join(homedir.HomeDir(), ".kube", "config") + } + + f := cmd.PersistentFlags() + f.StringVarP(&cfg.Kubeconfig, "kubeconfig", "k", defaultKubeconfig, "Path to the kubeconfig file") + f.StringVarP(&cfg.Namespace, "namespace", "n", "", "Namespace to migrate (default: all namespaces)") + f.BoolVar(&cfg.Apply, "apply", false, "Execute the adoption. Without it the tool only prints the plan (dry-run).") + f.BoolVarP(&cfg.Yes, "yes", "y", false, "Skip the interactive confirmation before --apply mutates the cluster") + f.BoolVar(&cfg.SkipControllerCheck, "skip-controller-check", false, "Skip verifying that both operator Deployments are scaled down") + f.StringVar(&cfg.LegacyController, "legacy-controller", defaultControllerRef, "Legacy operator Deployment as namespace/name") + f.StringVar(&cfg.NewController, "new-controller", defaultControllerRef, "New operator Deployment as namespace/name") + f.StringVar(&cfg.Version, "version", "", "etcd version (X.Y.Z) to set on every migrated cluster, overriding image-tag extraction") + f.StringVar(&cfg.AuthSecret, "auth-secret", "", "Existing kubernetes.io/basic-auth Secret (in each cluster's namespace) to reference for clusters with enableAuth; default generates one per cluster") + + f.BoolVar(&cfg.SkipBackup, "skip-backup", false, "Skip the pre-adoption safety snapshot (NOT recommended)") + f.StringVar(&cfg.AgentImage, "agent-image", "", "Operator image carrying the snapshot agent (default: taken from the new controller Deployment's spec)") + f.DurationVar(&cfg.BackupTimeout, "backup-timeout", 30*time.Minute, "How long to wait for each backup Job") + f.StringVar(&cfg.BackupS3Endpoint, "backup-s3-endpoint", "", "S3 endpoint for backup storage") + f.StringVar(&cfg.BackupS3Bucket, "backup-s3-bucket", "", "S3 bucket for backup storage") + f.StringVar(&cfg.BackupS3Key, "backup-s3-key", "", "S3 key prefix for backup storage") + f.StringVar(&cfg.BackupS3Region, "backup-s3-region", "", "S3 region") + f.BoolVar(&cfg.BackupS3ForcePathStyle, "backup-s3-force-path-style", false, "Use path-style S3 addressing (MinIO/Ceph)") + f.StringVar(&cfg.BackupS3CredentialsSecret, "backup-s3-credentials-secret", "", "Secret with AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY, required in EVERY migrated cluster's namespace") + f.StringVar(&cfg.BackupPVCClaim, "backup-pvc-claim", "", "PVC for backup storage, required in EVERY migrated cluster's namespace (mutually exclusive with the s3 flags)") + f.StringVar(&cfg.BackupPVCSubPath, "backup-pvc-subpath", "", "Subdirectory within the backup PVC") +} + +// validate cross-checks the flag set. +func (cfg *Config) validate() error { + if _, _, err := splitRef(cfg.LegacyController); err != nil { + return fmt.Errorf("--legacy-controller: %w", err) + } + if _, _, err := splitRef(cfg.NewController); err != nil { + return fmt.Errorf("--new-controller: %w", err) + } + + s3 := cfg.BackupS3Endpoint != "" || cfg.BackupS3Bucket != "" || cfg.BackupS3CredentialsSecret != "" + pvc := cfg.BackupPVCClaim != "" + switch { + case cfg.SkipBackup && (s3 || pvc): + return fmt.Errorf("--skip-backup contradicts the --backup-* destination flags; drop one side") + case cfg.SkipBackup: + return nil + case s3 && pvc: + return fmt.Errorf("--backup-s3-* and --backup-pvc-* are mutually exclusive") + case !s3 && !pvc: + // The dry-run is allowed to proceed without a destination so users + // can review the plan first; --apply is not. + if cfg.Apply { + return fmt.Errorf("adoption rewires ownership of live etcd storage; provide a backup destination " + + "(--backup-s3-{endpoint,bucket,credentials-secret} or --backup-pvc-claim) or opt out explicitly with --skip-backup") + } + return nil + case s3 && (cfg.BackupS3Endpoint == "" || cfg.BackupS3Bucket == "" || cfg.BackupS3CredentialsSecret == ""): + return fmt.Errorf("S3 backup destination needs all of --backup-s3-endpoint, --backup-s3-bucket and --backup-s3-credentials-secret") + } + return nil +} + +// backupConfigured reports whether a backup destination is set. +func (cfg *Config) backupConfigured() bool { + return !cfg.SkipBackup && (cfg.BackupS3Endpoint != "" || cfg.BackupPVCClaim != "") +} + +// splitRef parses a "namespace/name" flag value. +func splitRef(ref string) (namespace, name string, err error) { + parts := strings.Split(ref, "/") + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + return "", "", fmt.Errorf("%q is not of the form namespace/name", ref) + } + return parts[0], parts[1], nil +} diff --git a/cmd/etcd-migrate/inspect.go b/cmd/etcd-migrate/inspect.go new file mode 100644 index 00000000..96dece40 --- /dev/null +++ b/cmd/etcd-migrate/inspect.go @@ -0,0 +1,188 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package main + +import ( + "context" + "crypto/tls" + "fmt" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/cozystack/etcd-operator/internal/etcdclient" + "github.com/cozystack/etcd-operator/internal/migrate" + "github.com/cozystack/etcd-operator/internal/portforward" +) + +// inspectCluster gathers everything BuildAdoption needs from one LIVE legacy +// cluster: the etcd member list + cluster ID + auth status (read-only RPCs +// over a port-forward, authenticated the same way the legacy operator +// dialed), and the matching pods/PVCs from the apiserver. Read-only — safe +// in dry-run, where it makes the rendered plan concrete instead of +// placeholder-ridden. +func inspectCluster(ctx context.Context, restCfg *rest.Config, kube kubernetes.Interface, lc legacyCluster) (migrate.ClusterFacts, error) { + var facts migrate.ClusterFacts + + pods, err := kube.CoreV1().Pods(lc.Namespace).List(ctx, metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=etcd,app.kubernetes.io/instance=" + lc.Name, + }) + if err != nil { + return facts, fmt.Errorf("list etcd pods: %w", err) + } + podByName := map[string]*corev1.Pod{} + var dialPod string + for i := range pods.Items { + p := &pods.Items[i] + podByName[p.Name] = p + if dialPod == "" && p.Status.Phase == corev1.PodRunning { + dialPod = p.Name + } + } + if dialPod == "" { + return facts, fmt.Errorf("no Running etcd pod found for legacy cluster %s/%s", lc.Namespace, lc.Name) + } + + localPort, stop, err := portforward.ForwardToPod(restCfg, lc.Namespace, dialPod, 2379) + if err != nil { + return facts, fmt.Errorf("port-forward to %s: %w", dialPod, err) + } + defer stop() + + tlsCfg, err := legacyOperatorTLSConfig(ctx, kube, lc) + if err != nil { + return facts, err + } + cli, err := etcdclient.New([]string{fmt.Sprintf("localhost:%d", localPort)}, tlsCfg, "", "") + if err != nil { + return facts, fmt.Errorf("dial legacy etcd: %w", err) + } + defer func() { _ = cli.Close() }() + + listCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + resp, err := cli.MemberList(listCtx) + if err != nil { + return facts, fmt.Errorf("etcd MemberList: %w", err) + } + facts.ClusterIDHex = fmt.Sprintf("%016x", resp.Header.ClusterId) + + for _, m := range resp.Members { + fact := migrate.MemberFact{ + Name: m.Name, + IDHex: fmt.Sprintf("%016x", m.ID), + IsLearner: m.IsLearner, + } + if len(m.PeerURLs) > 0 { + fact.PeerURL = m.PeerURLs[0] + } + // Every member must be backed by a same-name pod: the legacy + // operator ran members with --name=$(POD_NAME), and the adopted + // EtcdMember CR name doubles as the pod lookup key. A missing pod + // surfaces as PodUID=="" and BuildAdoption turns it into a plan + // error. + if pod, ok := podByName[m.Name]; ok && pod.Status.Phase == corev1.PodRunning { + fact.PodUID = string(pod.UID) + } + facts.Members = append(facts.Members, fact) + } + if len(facts.Members) == 0 { + return facts, fmt.Errorf("etcd reported an empty member list") + } + + authCtx, cancel2 := context.WithTimeout(ctx, 10*time.Second) + defer cancel2() + status, err := cli.AuthStatus(authCtx) + if err != nil { + return facts, fmt.Errorf("etcd AuthStatus: %w", err) + } + facts.AuthEnabled = status.Enabled + + return facts, nil +} + +// verifyAdoptionPVCs checks that every adopted member's PVC exists before +// anything is mutated. ensurePVC on the new member controller hard-fails on +// a missing or foreign-owned PVC, so catch it at plan time with a precise +// message instead. +func verifyAdoptionPVCs(ctx context.Context, kube kubernetes.Interface, namespace string, plan *migrate.ResourcePlan) { + for _, ma := range plan.Adoption.Members { + pvc, err := kube.CoreV1().PersistentVolumeClaims(namespace).Get(ctx, ma.PVCName, metav1.GetOptions{}) + if err != nil { + plan.Errors = append(plan.Errors, fmt.Sprintf("PVC %q for member %q: %v", ma.PVCName, ma.Member.Name, err)) + continue + } + for _, o := range pvc.OwnerReferences { + if o.Controller != nil && *o.Controller && o.Kind != "EtcdMember" { + plan.Errors = append(plan.Errors, fmt.Sprintf( + "PVC %q is controller-owned by %s %q; refusing to re-own it", ma.PVCName, o.Kind, o.Name)) + } + } + } + if len(plan.Errors) > 0 { + plan.Action = migrate.ActionError + plan.DeleteRef = nil + } +} + +// legacyOperatorTLSConfig assembles the client TLS config the legacy +// operator dialed with: CA from serverTrustedCASecret (falling back to the +// server secret's ca.crt), identity from clientSecret. ServerName pins the +// expected SAN — the legacy client Service DNS — because the port-forward +// connects to localhost, which is never in the cert. +func legacyOperatorTLSConfig(ctx context.Context, kube kubernetes.Interface, lc legacyCluster) (*tls.Config, error) { + if lc.Spec.Security == nil || lc.Spec.Security.TLS.ServerSecret == "" { + return nil, nil // plaintext legacy cluster + } + t := lc.Spec.Security.TLS + + caSecret := t.ServerTrustedCASecret + if caSecret == "" { + caSecret = t.ServerSecret + } + caData, err := secretKey(ctx, kube, lc.Namespace, caSecret, "ca.crt") + if err != nil { + return nil, err + } + + var certPEM, keyPEM []byte + if t.ClientSecret != "" { + if certPEM, err = secretKey(ctx, kube, lc.Namespace, t.ClientSecret, "tls.crt"); err != nil { + return nil, err + } + if keyPEM, err = secretKey(ctx, kube, lc.Namespace, t.ClientSecret, "tls.key"); err != nil { + return nil, err + } + } + + tlsCfg, err := etcdclient.TLSConfig(caData, certPEM, keyPEM) + if err != nil { + return nil, fmt.Errorf("build TLS config from legacy secrets: %w", err) + } + tlsCfg.ServerName = fmt.Sprintf("%s.%s.svc", lc.Name, lc.Namespace) + return tlsCfg, nil +} + +// secretKey fetches one key of one Secret, with precise errors. +func secretKey(ctx context.Context, kube kubernetes.Interface, namespace, name, key string) ([]byte, error) { + sec, err := kube.CoreV1().Secrets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("read secret %s/%s: %w", namespace, name, err) + } + data := sec.Data[key] + if len(data) == 0 { + return nil, fmt.Errorf("secret %s/%s has no %q key", namespace, name, key) + } + return data, nil +} diff --git a/cmd/etcd-migrate/main.go b/cmd/etcd-migrate/main.go new file mode 100644 index 00000000..d9a2eee4 --- /dev/null +++ b/cmd/etcd-migrate/main.go @@ -0,0 +1,286 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +// etcd-migrate adopts running legacy etcd.aenix.io/v1alpha1 clusters onto +// etcd-operator.cozystack.io/v1alpha2 IN PLACE. It runs in the window where +// BOTH operator Deployments are scaled to zero while the etcd Pods keep +// serving: it inspects each live cluster (member list, cluster ID), takes a +// safety backup, creates the new-API CRs with prefilled status, re-owns the +// existing Pods/PVCs/Services, and dismantles the legacy CR + StatefulSet +// with Orphan propagation. The etcd Pods are never restarted and no data is +// moved — the new operator simply takes over the running data plane. +package main + +import ( + "context" + "fmt" + "io" + "os" + + "github.com/spf13/cobra" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + _ "k8s.io/client-go/plugin/pkg/client/auth" // Import all auth providers + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + "sigs.k8s.io/controller-runtime/pkg/client" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/migrate" +) + +func main() { + cfg := &Config{} + rootCmd := &cobra.Command{ + Use: "etcd-migrate", + Short: "Migrate etcd.aenix.io/v1alpha1 resources to etcd-operator.cozystack.io/v1alpha2", + Long: `etcd-migrate adopts running legacy etcd-operator clusters (EtcdCluster, +EtcdBackup, EtcdBackupSchedule of group etcd.aenix.io/v1alpha1) onto +etcd-operator.cozystack.io/v1alpha2 IN PLACE: the etcd pods, their PVCs and +Services stay exactly as they are; only ownership, labels and CRs change. + +Run it with BOTH operator Deployments scaled to zero (the etcd pods keep +serving traffic throughout). By default it is a dry-run that inspects each +live cluster and prints the planned manifests and steps; --apply executes +the adoption. Scale the NEW operator up afterwards. + +Safety: before anything is mutated, each cluster is snapshotted to the +--backup-s3-*/--backup-pvc-* destination. Nothing is restored from the +artifact — it exists for disaster recovery. Skipping it requires an +explicit --skip-backup.`, + SilenceUsage: true, + RunE: func(cmd *cobra.Command, args []string) error { + if err := cfg.validate(); err != nil { + return err + } + return runMigration(cmd.Context(), cfg, os.Stdin, os.Stdout) + }, + } + bindFlags(rootCmd, cfg) + if err := rootCmd.Execute(); err != nil { + os.Exit(1) + } +} + +// newScheme registers everything the tool writes: the v1alpha2 types plus +// the core/batch/rbac kinds used for Secrets, Jobs and printed manifests. +func newScheme() (*runtime.Scheme, error) { + scheme := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(scheme); err != nil { + return nil, err + } + if err := lll.AddToScheme(scheme); err != nil { + return nil, err + } + return scheme, nil +} + +// runMigration is the top-level flow: clients → gate → discover → plan → +// render → (confirm → snapshot → apply). +func runMigration(ctx context.Context, cfg *Config, stdin io.Reader, stdout io.Writer) error { + restCfg, err := clientcmd.BuildConfigFromFlags("", cfg.Kubeconfig) + if err != nil { + return fmt.Errorf("error building kubeconfig: %w", err) + } + kube, err := kubernetes.NewForConfig(restCfg) + if err != nil { + return fmt.Errorf("error creating Kubernetes client: %w", err) + } + dyn, err := dynamic.NewForConfig(restCfg) + if err != nil { + return fmt.Errorf("error creating dynamic client: %w", err) + } + scheme, err := newScheme() + if err != nil { + return err + } + ctrlClient, err := client.New(restCfg, client.Options{Scheme: scheme}) + if err != nil { + return fmt.Errorf("error creating controller-runtime client: %w", err) + } + + // Safety gate: neither generation of the operator may be running. + if !cfg.SkipControllerCheck { + legacyNS, legacyName, _ := splitRef(cfg.LegacyController) + newNS, newName, _ := splitRef(cfg.NewController) + if err := checkControllersDown(ctx, kube, []deployRef{ + {Namespace: legacyNS, Name: legacyName}, + {Namespace: newNS, Name: newName}, + }); err != nil { + return err + } + fmt.Fprintln(stdout, "✓ both operator Deployments are down") + } else { + fmt.Fprintln(stdout, "! controller check skipped (--skip-controller-check)") + } + + d, err := discover(ctx, dyn, cfg.Namespace) + if err != nil { + return err + } + if len(d.Clusters)+len(d.Backups)+len(d.Schedules) == 0 { + fmt.Fprintln(stdout, "no legacy etcd.aenix.io resources found — nothing to migrate") + printCRDNotice(stdout) + return nil + } + fmt.Fprintf(stdout, "discovered %d EtcdCluster, %d EtcdBackup, %d EtcdBackupSchedule (legacy)\n\n", + len(d.Clusters), len(d.Backups), len(d.Schedules)) + + // Inspect every live cluster (read-only: MemberList + AuthStatus over a + // port-forward, pod/PVC reads). Runs in dry-run too, so the rendered + // plan shows the real cluster ID and member IDs. + facts := map[string]migrate.ClusterFacts{} + inspectErrs := map[string]error{} + for _, lc := range d.Clusters { + f, ierr := inspectCluster(ctx, restCfg, kube, lc) + if ierr != nil { + inspectErrs[lc.Namespace+"/"+lc.Name] = ierr + continue + } + facts[lc.Namespace+"/"+lc.Name] = f + fmt.Fprintf(stdout, "inspected %s/%s: clusterID=%s, %d members, auth=%v\n", + lc.Namespace, lc.Name, f.ClusterIDHex, len(f.Members), f.AuthEnabled) + } + fmt.Fprintln(stdout) + + plans := buildPlans(d, facts, inspectErrs, migrate.TranslateOptions{ + VersionOverride: cfg.Version, + AuthSecretName: cfg.AuthSecret, + }) + for i := range plans { + if plans[i].Action == migrate.ActionAdopt { + verifyAdoptionPVCs(ctx, kube, plans[i].Namespace, &plans[i]) + } + } + if err := markExisting(ctx, ctrlClient, plans); err != nil { + return err + } + if !cfg.backupConfigured() && !cfg.SkipBackup { + for i := range plans { + if plans[i].Action == migrate.ActionAdopt { + plans[i].Warnings = append(plans[i].Warnings, + "no backup destination configured; --apply will require --backup-s3-*/--backup-pvc-* or an explicit --skip-backup") + } + } + } + + render(stdout, plans) + + if !cfg.Apply { + fmt.Fprintln(stdout, "\nDry-run complete: nothing was changed. Re-run with --apply to execute the plan.") + printCRDNotice(stdout) + return errorIfPlanFailed(plans) + } + + if !cfg.Yes && !confirm(stdin, stdout, + "\nThis will ADOPT the clusters above in place (re-own pods/PVCs/Services, replace the legacy CRs; pods keep running). Proceed?") { + return fmt.Errorf("aborted") + } + + var backup func() error + if cfg.backupConfigured() { + backup = func() error { return runBackups(ctx, cfg, restCfg, kube, ctrlClient, plans, d, stdout) } + } else { + fmt.Fprintln(stdout, "! pre-adoption backup skipped (--skip-backup)") + } + + stats, err := runMutationPhases( + func() error { return disableAuthForAdoptions(ctx, restCfg, kube, plans, d, facts, stdout) }, + backup, + func() (applyStats, error) { return applyPlans(ctx, ctrlClient, dyn, plans, stdout) }, + ) + if err != nil { + return err + } + fmt.Fprintf(stdout, "\ndone: %d adopted, %d created, %d legacy CRs deleted, %d skipped (already migrated), %d print-only, %d errored\n", + stats.Adopted, stats.Created, stats.Deleted, stats.Skipped, stats.Printed, stats.Errored) + if stats.Adopted > 0 { + fmt.Fprintln(stdout, "\nNEXT: scale the new operator up — it will take over the adopted clusters without touching the pods:\n kubectl -n "+ + mustNamespace(cfg.NewController)+" scale deploy "+mustName(cfg.NewController)+" --replicas=1") + } + printCRDNotice(stdout) + return errorIfPlanFailed(plans) +} + +// runMutationPhases runs the post-confirmation adoption phases in the order +// their inter-phase contracts REQUIRE, and returns the apply stats: +// +// 1. authDisable — switch auth off on every auth-enabled legacy etcd. +// 2. backup — snapshot each to-be-adopted cluster (nil ⇒ --skip-backup). +// 3. apply — re-own the data plane and create the new CRs. +// +// Auth-disable MUST precede backup: the snapshot Job dials etcd anonymously +// (cert-only, no user), and etcd gates the Maintenance Snapshot RPC behind +// auth when it is enabled — so for an auth-enabled cluster (the Cozystack/ +// Kamaji case) the backup can only succeed once auth is off. Running them in +// the reverse order silently flips exactly those clusters to ActionError and +// excludes them from adoption. A cluster whose auth-disable fails is itself +// flipped to ActionError and then skipped by the backup and apply phases, so +// an unprotected cluster is never adopted. +func runMutationPhases(authDisable func() error, backup func() error, apply func() (applyStats, error)) (applyStats, error) { + if err := authDisable(); err != nil { + return applyStats{}, err + } + if backup != nil { + if err := backup(); err != nil { + return applyStats{}, err + } + } + return apply() +} + +// mustNamespace/mustName split a pre-validated namespace/name ref. +func mustNamespace(ref string) string { ns, _, _ := splitRef(ref); return ns } +func mustName(ref string) string { _, n, _ := splitRef(ref); return n } + +// disableAuthForAdoptions runs `auth disable` on every to-be-adopted cluster +// whose live etcd reports auth enabled. Idempotent (already-off is a no-op). +func disableAuthForAdoptions(ctx context.Context, restCfg *rest.Config, kube kubernetes.Interface, + plans []migrate.ResourcePlan, d discovered, facts map[string]migrate.ClusterFacts, out io.Writer) error { + specs := map[string]legacyCluster{} + for _, lc := range d.Clusters { + specs[lc.Namespace+"/"+lc.Name] = lc + } + for i := range plans { + p := &plans[i] + if p.SourceKind != "EtcdCluster" || p.Action != migrate.ActionAdopt { + continue + } + key := p.Namespace + "/" + p.SourceName + if !facts[key].AuthEnabled { + continue + } + lc := specs[key] + fmt.Fprintf(out, "disabling auth on legacy etcd %s …\n", key) + if err := disableLegacyAuth(ctx, restCfg, kube, lc); err != nil { + p.Action = migrate.ActionError + p.Errors = append(p.Errors, fmt.Sprintf("auth disable failed: %v", err)) + fmt.Fprintf(out, " ERROR: %v — cluster left untouched\n", err) + } + } + return nil +} + +// errorIfPlanFailed makes the process exit non-zero when any resource could +// not be migrated, so scripts and CI can gate on it. +func errorIfPlanFailed(plans []migrate.ResourcePlan) error { + errored := 0 + for i := range plans { + if plans[i].Action == migrate.ActionError { + errored++ + } + } + if errored > 0 { + return fmt.Errorf("%d resource(s) could not be migrated — see the errors above", errored) + } + return nil +} diff --git a/cmd/etcd-migrate/main_test.go b/cmd/etcd-migrate/main_test.go new file mode 100644 index 00000000..8a350317 --- /dev/null +++ b/cmd/etcd-migrate/main_test.go @@ -0,0 +1,616 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package main + +import ( + "context" + "fmt" + "io" + "strings" + "testing" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + dynfake "k8s.io/client-go/dynamic/fake" + k8sfake "k8s.io/client-go/kubernetes/fake" + ctrlfake "sigs.k8s.io/controller-runtime/pkg/client/fake" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/controllers" + "github.com/cozystack/etcd-operator/internal/migrate" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +func ptrInt32(v int32) *int32 { return &v } + +func deployment(ns, name string, replicas int32) *appsv1.Deployment { + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: name}, + Spec: appsv1.DeploymentSpec{ + Replicas: ptrInt32(replicas), + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": name}}, + }, + } +} + +func controllerPod(ns, app string, phase corev1.PodPhase) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: app + "-pod", Labels: map[string]string{"app": app}}, + Status: corev1.PodStatus{Phase: phase}, + } +} + +// TestCheckControllersDown covers the safety gate's verdicts. +func TestCheckControllersDown(t *testing.T) { + ctx := context.Background() + ref := deployRef{Namespace: "sys", Name: "mgr"} + + t.Run("absent deployment is down", func(t *testing.T) { + kube := k8sfake.NewClientset() + if err := checkControllersDown(ctx, kube, []deployRef{ref}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + t.Run("scaled to zero with no pods is down", func(t *testing.T) { + kube := k8sfake.NewClientset(deployment("sys", "mgr", 0)) + if err := checkControllersDown(ctx, kube, []deployRef{ref}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + t.Run("replicas above zero aborts", func(t *testing.T) { + kube := k8sfake.NewClientset(deployment("sys", "mgr", 1)) + err := checkControllersDown(ctx, kube, []deployRef{ref}) + if err == nil || !strings.Contains(err.Error(), "not scaled down") { + t.Fatalf("err = %v, want not-scaled-down", err) + } + }) + + t.Run("lingering pod aborts even at replicas zero", func(t *testing.T) { + kube := k8sfake.NewClientset(deployment("sys", "mgr", 0), controllerPod("sys", "mgr", corev1.PodRunning)) + err := checkControllersDown(ctx, kube, []deployRef{ref}) + if err == nil || !strings.Contains(err.Error(), "still has pod") { + t.Fatalf("err = %v, want still-has-pod", err) + } + }) + + t.Run("terminated pods are tolerated", func(t *testing.T) { + kube := k8sfake.NewClientset(deployment("sys", "mgr", 0), controllerPod("sys", "mgr", corev1.PodSucceeded)) + if err := checkControllersDown(ctx, kube, []deployRef{ref}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + + t.Run("identical coordinates checked once", func(t *testing.T) { + kube := k8sfake.NewClientset(deployment("sys", "mgr", 0)) + if err := checkControllersDown(ctx, kube, []deployRef{ref, ref}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + gets := 0 + for _, a := range kube.Actions() { + if a.GetVerb() == "get" && a.GetResource().Resource == "deployments" { + gets++ + } + } + if gets != 1 { + t.Errorf("deployment fetched %d times, want 1 (dedup)", gets) + } + }) +} + +// legacyUnstructured builds a legacy CR as the dynamic client would return it. +func legacyUnstructured(kind, ns, name string, spec map[string]any) *unstructured.Unstructured { + return &unstructured.Unstructured{Object: map[string]any{ + "apiVersion": "etcd.aenix.io/v1alpha1", + "kind": kind, + "metadata": map[string]any{"namespace": ns, "name": name, "uid": "uid-" + name}, + "spec": spec, + }} +} + +func newDynFake(objs ...runtime.Object) *dynfake.FakeDynamicClient { + scheme := runtime.NewScheme() + return dynfake.NewSimpleDynamicClientWithCustomListKinds(scheme, map[schema.GroupVersionResource]string{ + migrate.ClusterGVR: "EtcdClusterList", + migrate.BackupGVR: "EtcdBackupList", + migrate.ScheduleGVR: "EtcdBackupScheduleList", + }, objs...) +} + +var clusterSpec = map[string]any{ + "replicas": int64(3), + "storage": map[string]any{ + "volumeClaimTemplate": map[string]any{ + "spec": map[string]any{ + "resources": map[string]any{ + "requests": map[string]any{"storage": "1Gi"}, + }, + }, + }, + }, +} + +// TestDiscover covers the dynamic listing + trimmed-struct decode, including +// the uninstalled-CRD tolerance. +func TestDiscover(t *testing.T) { + ctx := context.Background() + + t.Run("decodes all three kinds", func(t *testing.T) { + dyn := newDynFake( + legacyUnstructured("EtcdCluster", "ns1", "c1", clusterSpec), + legacyUnstructured("EtcdBackup", "ns1", "b1", map[string]any{ + "clusterRef": map[string]any{"name": "c1"}, + "destination": map[string]any{"pvc": map[string]any{"claimName": "claim"}}, + }), + legacyUnstructured("EtcdBackupSchedule", "ns2", "s1", map[string]any{ + "clusterRef": map[string]any{"name": "c2"}, + "schedule": "@hourly", + "destination": map[string]any{"pvc": map[string]any{"claimName": "claim"}}, + }), + ) + d, err := discover(ctx, dyn, "") + if err != nil { + t.Fatalf("discover: %v", err) + } + if len(d.Clusters) != 1 || len(d.Backups) != 1 || len(d.Schedules) != 1 { + t.Fatalf("discovered %d/%d/%d, want 1/1/1", len(d.Clusters), len(d.Backups), len(d.Schedules)) + } + c := d.Clusters[0] + if c.Name != "c1" || c.Namespace != "ns1" || c.UID != "uid-c1" { + t.Errorf("cluster identity = %+v", c) + } + if c.Spec.Replicas == nil || *c.Spec.Replicas != 3 { + t.Errorf("decoded replicas = %v", c.Spec.Replicas) + } + if got := c.Spec.Storage.VolumeClaimTemplate.Spec.Resources.Requests[corev1.ResourceStorage]; got.String() != "1Gi" { + t.Errorf("decoded storage request = %s", got.String()) + } + if d.Backups[0].Spec.Destination.PVC == nil || d.Backups[0].Spec.Destination.PVC.ClaimName != "claim" { + t.Errorf("decoded backup destination = %+v", d.Backups[0].Spec.Destination) + } + if d.Schedules[0].Spec.Schedule != "@hourly" { + t.Errorf("decoded schedule = %q", d.Schedules[0].Spec.Schedule) + } + }) + + t.Run("namespace filter applies", func(t *testing.T) { + dyn := newDynFake( + legacyUnstructured("EtcdCluster", "ns1", "c1", clusterSpec), + legacyUnstructured("EtcdCluster", "ns2", "c2", clusterSpec), + ) + d, err := discover(ctx, dyn, "ns2") + if err != nil { + t.Fatalf("discover: %v", err) + } + if len(d.Clusters) != 1 || d.Clusters[0].Name != "c2" { + t.Fatalf("clusters = %+v, want only ns2/c2", d.Clusters) + } + }) +} + +func newCtrlFake(t *testing.T, objs ...runtime.Object) *ctrlfake.ClientBuilder { + t.Helper() + scheme, err := newScheme() + if err != nil { + t.Fatalf("newScheme: %v", err) + } + return ctrlfake.NewClientBuilder().WithScheme(scheme). + WithStatusSubresource(&lll.EtcdCluster{}, &lll.EtcdMember{}). + WithRuntimeObjects(objs...) +} + +// factsFixture is what inspectCluster would report for a healthy 3-member +// legacy cluster named c1 in ns. +func factsFixture() migrate.ClusterFacts { + f := migrate.ClusterFacts{ClusterIDHex: "00000000deadbeef"} + for i := 0; i < 3; i++ { + name := fmt.Sprintf("c1-%d", i) + f.Members = append(f.Members, migrate.MemberFact{ + Name: name, + IDHex: fmt.Sprintf("%016x", 0xa00+i), + PeerURL: fmt.Sprintf("https://%s.c1-headless.ns.svc:2380", name), + PodUID: "uid-" + name, + }) + } + return f +} + +// dataPlaneFixture is the legacy data plane the adoption re-owns: STS, pods, +// PVCs, Services, state ConfigMap. +func dataPlaneFixture() []runtime.Object { + objs := []runtime.Object{ + &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "c1"}}, + &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "c1-cluster-state"}}, + // The legacy headless Service starts controller-owned by the legacy + // EtcdCluster — the migration must STRIP this stale ref when it + // re-points ownership at the adopted members. + &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "c1-headless", + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: "etcd.aenix.io/v1alpha1", Kind: "EtcdCluster", Name: "c1", + UID: types.UID("legacy-uid"), Controller: ptrTo(true), BlockOwnerDeletion: ptrTo(true), + }}}}, + // The legacy client Service is a real ClusterIP (the collision case); + // the cutover deletes it and recreates "c1" as a headless Service. + &corev1.Service{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "c1"}, + Spec: corev1.ServiceSpec{ClusterIP: "10.0.0.10"}}, + } + for i := 0; i < 3; i++ { + name := fmt.Sprintf("c1-%d", i) + objs = append(objs, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: name, UID: types.UID("uid-" + name), + Labels: map[string]string{"app.kubernetes.io/name": "etcd", "app.kubernetes.io/instance": "c1"}}, + Status: corev1.PodStatus{Phase: corev1.PodRunning}, + }, + &corev1.PersistentVolumeClaim{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "data-" + name}}, + ) + } + return objs +} + +// TestMarkExisting: a pre-existing adoption target keeps Action=Adopt (every +// adoption step is idempotent and a partial run must be completed, not +// skipped) with an explanatory note; a pre-existing Create target (backup) +// still downgrades to Skip. +func TestMarkExisting(t *testing.T) { + ctx := context.Background() + existingCluster := &lll.EtcdCluster{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "c1"}} + existingSnap := &lll.EtcdSnapshot{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "b1"}} + c := newCtrlFake(t, existingCluster, existingSnap).Build() + + d := discovered{ + Clusters: []legacyCluster{{Name: "c1", Namespace: "ns", Spec: legacySpecFixture()}}, + Backups: []legacyBackup{{Name: "b1", Namespace: "ns", Spec: legacy.EtcdBackupSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "c1"}, + Destination: legacy.BackupDestination{PVC: &legacy.PVCBackupDestination{ClaimName: "claim"}}, + }}}, + } + facts := map[string]migrate.ClusterFacts{"ns/c1": factsFixture()} + plans := buildPlans(d, facts, nil, migrate.TranslateOptions{}) + + if err := markExisting(ctx, c, plans); err != nil { + t.Fatalf("markExisting: %v", err) + } + if plans[0].Action != migrate.ActionAdopt { + t.Errorf("adoption target exists: Action = %s, want Adopt (idempotent re-run)", plans[0].Action) + } + noteFound := false + for _, n := range plans[0].Notes { + if strings.Contains(n, "re-run idempotently") { + noteFound = true + } + } + if !noteFound { + t.Errorf("expected idempotent-re-run note, got %v", plans[0].Notes) + } + if plans[1].Action != migrate.ActionSkip { + t.Errorf("existing snapshot target: Action = %s, want Skip", plans[1].Action) + } +} + +// TestBuildPlans_InspectFailure: a cluster whose live inspection failed gets +// an error plan and is never adopted. +func TestBuildPlans_InspectFailure(t *testing.T) { + d := discovered{Clusters: []legacyCluster{{Name: "c1", Namespace: "ns", Spec: legacySpecFixture()}}} + plans := buildPlans(d, nil, map[string]error{"ns/c1": fmt.Errorf("no Running etcd pod")}, migrate.TranslateOptions{}) + if len(plans) != 1 || plans[0].Action != migrate.ActionError { + t.Fatalf("plans = %+v, want one error plan", plans) + } + if !strings.Contains(plans[0].Errors[0], "no Running etcd pod") { + t.Errorf("error should carry the inspection failure: %v", plans[0].Errors) + } +} + +// TestApplyAdoption walks the full in-place adoption against fake clients: +// legacy control plane dismantled (CR, STS, ConfigMap — pods survive), new +// CRs created with prefilled status, pods/PVCs re-owned and labeled, +// Services re-owned. Then runs the apply a second time to pin idempotency. +func TestApplyAdoption(t *testing.T) { + ctx := context.Background() + dyn := newDynFake(legacyUnstructured("EtcdCluster", "ns", "c1", clusterSpec)) + c := newCtrlFake(t, dataPlaneFixture()...).Build() + + plan := migrate.BuildAdoption("c1", "ns", legacySpecFixture(), factsFixture(), migrate.TranslateOptions{}) + if plan.Action != migrate.ActionAdopt { + t.Fatalf("Action = %s (errors %v)", plan.Action, plan.Errors) + } + plans := []migrate.ResourcePlan{plan, + { // an errored plan must be inert + SourceKind: "EtcdCluster", SourceName: "broken", Namespace: "ns", + Action: migrate.ActionError, Errors: []string{"x"}, + }, + } + + stats, err := applyPlans(ctx, c, dyn, plans, io.Discard) + if err != nil { + t.Fatalf("applyPlans: %v", err) + } + if stats.Adopted != 1 || stats.Errored != 1 { + t.Fatalf("stats = %+v", stats) + } + + // Legacy CR gone, STS gone, state ConfigMap gone — but every pod alive. + if _, err := dyn.Resource(migrate.ClusterGVR).Namespace("ns").Get(ctx, "c1", metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Errorf("legacy CR still present (err=%v)", err) + } + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "c1"}, &appsv1.StatefulSet{}); !apierrors.IsNotFound(err) { + t.Errorf("legacy StatefulSet still present (err=%v)", err) + } + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "c1-cluster-state"}, &corev1.ConfigMap{}); !apierrors.IsNotFound(err) { + t.Errorf("legacy ConfigMap still present (err=%v)", err) + } + + // New cluster: prefilled status (bootstrap gate). No headless override on + // the spec anymore — it lives as an annotation on the adopted members. + cluster := &lll.EtcdCluster{} + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "c1"}, cluster); err != nil { + t.Fatalf("new EtcdCluster missing: %v", err) + } + if cluster.Status.ClusterID != "00000000deadbeef" || cluster.Status.ClusterToken != "c1-ns" || cluster.Status.Observed == nil { + t.Errorf("cluster status not prefilled: %+v", cluster.Status) + } + if cluster.Status.Observed != nil && cluster.Status.Observed.Replicas != 3 { + t.Errorf("observed.replicas = %d, want live member count 3", cluster.Status.Observed.Replicas) + } + + for i := 0; i < 3; i++ { + name := fmt.Sprintf("c1-%d", i) + + member := &lll.EtcdMember{} + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: name}, member); err != nil { + t.Fatalf("EtcdMember %s missing: %v", name, err) + } + if member.Annotations[controllers.AnnDataDirSubPath] != migrate.LegacyDataDirSubPath { + t.Errorf("%s data-dir-subpath annotation = %q, want %q", name, member.Annotations[controllers.AnnDataDirSubPath], migrate.LegacyDataDirSubPath) + } + if member.Annotations[controllers.AnnHeadlessServiceName] != "c1-headless" { + t.Errorf("%s headless-service-name annotation = %q", name, member.Annotations[controllers.AnnHeadlessServiceName]) + } + if !strings.Contains(member.Spec.InitialCluster, name+"=https://"+name+".c1-headless.ns.svc:2380") { + t.Errorf("%s initialCluster = %q, want the persisted peer URLs", name, member.Spec.InitialCluster) + } + if member.Status.MemberID == "" || !member.Status.IsVoter || member.Status.PodUID != "uid-"+name { + t.Errorf("%s status not prefilled: %+v", name, member.Status) + } + + pod := &corev1.Pod{} + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: name}, pod); err != nil { + t.Fatalf("pod %s gone — adoption must never delete pods: %v", name, err) + } + assertControllerOwner(t, pod.OwnerReferences, "EtcdMember", name) + if pod.Labels["etcd-operator.cozystack.io/cluster"] != "c1" || pod.Labels["etcd-operator.cozystack.io/role"] != "voter" { + t.Errorf("pod %s labels not stamped: %v", name, pod.Labels) + } + if pod.Labels["app.kubernetes.io/instance"] != "c1" { + t.Errorf("pod %s legacy labels must survive (old Services select them): %v", name, pod.Labels) + } + + pvc := &corev1.PersistentVolumeClaim{} + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "data-" + name}, pvc); err != nil { + t.Fatalf("PVC data-%s gone: %v", name, err) + } + assertControllerOwner(t, pvc.OwnerReferences, "EtcdMember", name) + } + + // Legacy headless Service is owner-referenced to the 3 adopted members + // (non-controller refs) so it self-GCs as they roll — NOT controller-owned + // by the cluster. + legacyHeadless := &corev1.Service{} + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "c1-headless"}, legacyHeadless); err != nil { + t.Fatalf("legacy headless Service gone: %v", err) + } + for i := 0; i < 3; i++ { + name := fmt.Sprintf("c1-%d", i) + assertMemberOwnerRef(t, legacyHeadless.OwnerReferences, name) + } + for _, o := range legacyHeadless.OwnerReferences { + if o.Controller != nil && *o.Controller { + t.Errorf("legacy headless Service must carry no controller owner; got %+v", o) + } + if o.Kind != "EtcdMember" { + t.Errorf("legacy headless Service owner refs must all be EtcdMember; got %q", o.Kind) + } + } + + // The legacy client Service "c1" has been replaced in place by the + // operator's native headless Service of the same name (clusterIP None), + // controller-owned by the new EtcdCluster. + nativeHeadless := &corev1.Service{} + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "c1"}, nativeHeadless); err != nil { + t.Fatalf("native headless Service c1 gone: %v", err) + } + if nativeHeadless.Spec.ClusterIP != corev1.ClusterIPNone { + t.Errorf("Service c1 must be headless after cutover; ClusterIP=%q", nativeHeadless.Spec.ClusterIP) + } + assertControllerOwner(t, nativeHeadless.OwnerReferences, "EtcdCluster", "c1") + + // Second pass: every step must tolerate the already-adopted state. + plans2 := []migrate.ResourcePlan{migrate.BuildAdoption("c1", "ns", legacySpecFixture(), factsFixture(), migrate.TranslateOptions{})} + if _, err := applyPlans(ctx, c, dyn, plans2, io.Discard); err != nil { + t.Fatalf("second applyPlans must be idempotent: %v", err) + } +} + +// TestRunMutationPhases_AuthDisableBeforeBackup pins the inter-phase contract +// that the snapshot Job depends on: auth-disable MUST run before the backup, +// because the Job dials etcd anonymously and etcd gates the Maintenance +// Snapshot RPC behind auth. The historical bug ran backup first, which made +// the safety backup fail for exactly the auth-enabled clusters the tool +// targets. This test fails if the order regresses. +func TestRunMutationPhases_AuthDisableBeforeBackup(t *testing.T) { + var order []string + authDisable := func() error { order = append(order, "auth"); return nil } + backup := func() error { order = append(order, "backup"); return nil } + apply := func() (applyStats, error) { order = append(order, "apply"); return applyStats{Adopted: 1}, nil } + + stats, err := runMutationPhases(authDisable, backup, apply) + if err != nil { + t.Fatalf("runMutationPhases: %v", err) + } + if stats.Adopted != 1 { + t.Errorf("stats not propagated from apply: %+v", stats) + } + want := []string{"auth", "backup", "apply"} + if len(order) != len(want) { + t.Fatalf("phase order = %v, want %v", order, want) + } + for i := range want { + if order[i] != want[i] { + t.Fatalf("phase order = %v, want %v (auth-disable must precede backup)", order, want) + } + } +} + +// TestRunMutationPhases_AuthFailureSkipsBackupAndApply: when auth-disable +// errors, neither backup nor apply runs and the error propagates — an +// unprotected, still-auth'd cluster must never reach the mutation phases. +func TestRunMutationPhases_AuthFailureSkipsBackupAndApply(t *testing.T) { + wantErr := fmt.Errorf("auth boom") + var ran []string + _, err := runMutationPhases( + func() error { ran = append(ran, "auth"); return wantErr }, + func() error { ran = append(ran, "backup"); return nil }, + func() (applyStats, error) { ran = append(ran, "apply"); return applyStats{}, nil }, + ) + if err == nil { + t.Fatal("expected auth-disable error to propagate") + } + if len(ran) != 1 || ran[0] != "auth" { + t.Errorf("after auth-disable failure, ran = %v; want only [auth]", ran) + } +} + +// TestRunMutationPhases_NilBackupSkips: --skip-backup (nil backup fn) still +// runs auth-disable then apply, in that order. +func TestRunMutationPhases_NilBackupSkips(t *testing.T) { + var order []string + _, err := runMutationPhases( + func() error { order = append(order, "auth"); return nil }, + nil, + func() (applyStats, error) { order = append(order, "apply"); return applyStats{}, nil }, + ) + if err != nil { + t.Fatalf("runMutationPhases: %v", err) + } + want := []string{"auth", "apply"} + if len(order) != len(want) || order[0] != want[0] || order[1] != want[1] { + t.Errorf("phase order with nil backup = %v, want %v", order, want) + } +} + +func assertControllerOwner(t *testing.T, refs []metav1.OwnerReference, kind, name string) { + t.Helper() + for _, o := range refs { + if o.Kind == kind && o.Name == name && o.Controller != nil && *o.Controller { + return + } + } + t.Errorf("no controller ownerRef %s/%s in %+v", kind, name, refs) +} + +// assertMemberOwnerRef checks for a non-controller, non-blocking EtcdMember +// owner reference — the shape the migration tool stamps on the legacy +// headless Service so it self-GCs once the last adopted member rolls away. +func assertMemberOwnerRef(t *testing.T, refs []metav1.OwnerReference, name string) { + t.Helper() + for _, o := range refs { + if o.Kind == "EtcdMember" && o.Name == name { + if o.Controller != nil && *o.Controller { + t.Errorf("EtcdMember owner ref %q must not be a controller ref", name) + } + if o.BlockOwnerDeletion != nil && *o.BlockOwnerDeletion { + t.Errorf("EtcdMember owner ref %q must not block owner deletion", name) + } + return + } + } + t.Errorf("no EtcdMember ownerRef %q in %+v", name, refs) +} + +// TestApplyPlans_BackupCreate: the EtcdBackup → EtcdSnapshot path still +// creates the new CR and deletes the legacy source. +func TestApplyPlans_BackupCreate(t *testing.T) { + ctx := context.Background() + dyn := newDynFake(legacyUnstructured("EtcdBackup", "ns", "b1", map[string]any{ + "clusterRef": map[string]any{"name": "c1"}, + "destination": map[string]any{"pvc": map[string]any{"claimName": "claim"}}, + })) + c := newCtrlFake(t).Build() + + plans := []migrate.ResourcePlan{migrate.TranslateBackup("b1", "ns", legacy.EtcdBackupSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "c1"}, + Destination: legacy.BackupDestination{PVC: &legacy.PVCBackupDestination{ClaimName: "claim"}}, + })} + stats, err := applyPlans(ctx, c, dyn, plans, io.Discard) + if err != nil { + t.Fatalf("applyPlans: %v", err) + } + if stats.Created != 1 || stats.Deleted != 1 { + t.Fatalf("stats = %+v", stats) + } + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "b1"}, &lll.EtcdSnapshot{}); err != nil { + t.Errorf("new EtcdSnapshot missing: %v", err) + } + if _, err := dyn.Resource(migrate.BackupGVR).Namespace("ns").Get(ctx, "b1", metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Errorf("legacy EtcdBackup still present (err=%v)", err) + } +} + +// TestApplyPlans_SkipStillCleansUp: a Skip (target pre-existing) still +// deletes the leftover legacy CR. +func TestApplyPlans_SkipStillCleansUp(t *testing.T) { + ctx := context.Background() + dyn := newDynFake(legacyUnstructured("EtcdBackup", "ns", "b1", map[string]any{ + "clusterRef": map[string]any{"name": "c1"}, + "destination": map[string]any{"pvc": map[string]any{"claimName": "claim"}}, + })) + existing := &lll.EtcdSnapshot{ObjectMeta: metav1.ObjectMeta{Namespace: "ns", Name: "b1"}} + c := newCtrlFake(t, existing).Build() + + plans := []migrate.ResourcePlan{migrate.TranslateBackup("b1", "ns", legacy.EtcdBackupSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "c1"}, + Destination: legacy.BackupDestination{PVC: &legacy.PVCBackupDestination{ClaimName: "claim"}}, + })} + if err := markExisting(ctx, c, plans); err != nil { + t.Fatalf("markExisting: %v", err) + } + stats, err := applyPlans(ctx, c, dyn, plans, io.Discard) + if err != nil { + t.Fatalf("applyPlans: %v", err) + } + if stats.Skipped != 1 || stats.Deleted != 1 { + t.Fatalf("stats = %+v, want skipped=1 deleted=1", stats) + } + _, err = dyn.Resource(migrate.BackupGVR).Namespace("ns").Get(ctx, "b1", metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + t.Errorf("legacy CR still present (err=%v)", err) + } +} + +// legacySpecFixture is a minimal migratable legacy cluster spec. +func legacySpecFixture() legacy.EtcdClusterSpec { + return legacy.EtcdClusterSpec{ + Replicas: ptrInt32(3), + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("1Gi")}, + }}, + }}, + } +} diff --git a/cmd/etcd-migrate/output.go b/cmd/etcd-migrate/output.go new file mode 100644 index 00000000..89c9529c --- /dev/null +++ b/cmd/etcd-migrate/output.go @@ -0,0 +1,96 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package main + +import ( + "fmt" + "io" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + "github.com/cozystack/etcd-operator/internal/migrate" +) + +// render prints the full plan: per-resource action, errors/warnings/notes, +// and the manifests the tool will create (or, for schedules, the manifests +// the user applies themselves). +func render(w io.Writer, plans []migrate.ResourcePlan) { + for i := range plans { + p := &plans[i] + fmt.Fprintf(w, "── %s %s/%s → %s ──\n", p.SourceKind, p.Namespace, p.SourceName, p.Action) + for _, e := range p.Errors { + fmt.Fprintf(w, " ERROR: %s\n", e) + } + for _, warn := range p.Warnings { + fmt.Fprintf(w, " warning: %s\n", warn) + } + for _, note := range p.Notes { + fmt.Fprintf(w, " note: %s\n", note) + } + if p.Action == migrate.ActionAdopt && p.Adoption != nil { + a := p.Adoption + fmt.Fprintf(w, " steps (pods are never restarted):\n") + fmt.Fprintf(w, " 1. create EtcdCluster (status prefilled: clusterID=%s) and %d EtcdMember CRs (with reserved annotations)\n", + a.ClusterStatus.ClusterID, len(a.Members)) + fmt.Fprintf(w, " 2. owner-reference legacy headless Service %q to the adopted members (auto-GCs as they roll)\n", + a.HeadlessServiceName) + fmt.Fprintf(w, " 3. orphan-delete legacy EtcdCluster %s/%s and StatefulSet %q (children survive)\n", + p.Namespace, p.SourceName, a.StatefulSetName) + fmt.Fprintf(w, " 4. delete legacy ConfigMap %q and PodDisruptionBudget %q\n", a.ConfigMapName, a.PDBName) + fmt.Fprintf(w, " 5. re-own + label each member's Pod and PVC\n") + fmt.Fprintf(w, " 6. replace legacy client Service %q in place with the operator's native headless Service of the same name\n", + a.ClientServiceName) + for _, extra := range p.Extras { + renderManifest(w, extra) + } + renderManifest(w, p.Target) + for _, ma := range a.Members { + renderManifest(w, ma.Member) + } + fmt.Fprintln(w) + continue + } + if p.DeleteRef != nil { + fmt.Fprintf(w, " cleanup: delete legacy %s %s/%s\n", p.DeleteRef.GVR.Resource, p.DeleteRef.Namespace, p.DeleteRef.Name) + } + if p.Action == migrate.ActionCreate || p.Action == migrate.ActionPrint { + for _, extra := range p.Extras { + renderManifest(w, extra) + } + if p.Target != nil { + renderManifest(w, p.Target) + } + } + fmt.Fprintln(w) + } +} + +// renderManifest prints one object as a `---`-separated YAML document. +func renderManifest(w io.Writer, obj client.Object) { + data, err := yaml.Marshal(obj) + if err != nil { + fmt.Fprintf(w, " (failed to render %T: %v)\n", obj, err) + return + } + fmt.Fprintln(w, "---") + _, _ = w.Write(data) +} + +// printCRDNotice reminds about the one cleanup step the tool never performs. +func printCRDNotice(w io.Writer) { + fmt.Fprintln(w, ` +NOTE: the legacy CRDs are not removed by this tool. Once no etcd.aenix.io +CRs remain (check EtcdBackupSchedules — they are never auto-deleted), remove +the CRDs manually: + + kubectl delete crd etcdclusters.etcd.aenix.io etcdbackups.etcd.aenix.io etcdbackupschedules.etcd.aenix.io`) +} diff --git a/cmd/etcd-migrate/run.go b/cmd/etcd-migrate/run.go new file mode 100644 index 00000000..c66f1dd0 --- /dev/null +++ b/cmd/etcd-migrate/run.go @@ -0,0 +1,347 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package main + +import ( + "bufio" + "context" + "fmt" + "io" + "strings" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + "sigs.k8s.io/controller-runtime/pkg/client" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/migrate" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +// deployRef is a parsed --legacy-controller / --new-controller coordinate. +type deployRef struct { + Namespace string + Name string +} + +// checkControllersDown verifies every Deployment coordinate is either absent +// or scaled to zero with no pods left under its selector. Identical +// coordinates (the common case: both generations deploy under the same name) +// are checked once. +func checkControllersDown(ctx context.Context, kube kubernetes.Interface, refs []deployRef) error { + seen := map[deployRef]bool{} + for _, ref := range refs { + if seen[ref] { + continue + } + seen[ref] = true + + dep, err := kube.AppsV1().Deployments(ref.Namespace).Get(ctx, ref.Name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + continue // not installed — trivially down + } + if err != nil { + return fmt.Errorf("check controller %s/%s: %w", ref.Namespace, ref.Name, err) + } + if dep.Spec.Replicas == nil || *dep.Spec.Replicas != 0 { + return fmt.Errorf("controller %s/%s is not scaled down (spec.replicas=%v); "+ + "scale it to 0 first: kubectl -n %s scale deploy %s --replicas=0 (or pass --skip-controller-check)", + ref.Namespace, ref.Name, dep.Spec.Replicas, ref.Namespace, ref.Name) + } + selector, err := metav1.LabelSelectorAsSelector(dep.Spec.Selector) + if err != nil { + return fmt.Errorf("check controller %s/%s: bad selector: %w", ref.Namespace, ref.Name, err) + } + pods, err := kube.CoreV1().Pods(ref.Namespace).List(ctx, metav1.ListOptions{LabelSelector: selector.String()}) + if err != nil { + return fmt.Errorf("check controller %s/%s pods: %w", ref.Namespace, ref.Name, err) + } + for _, p := range pods.Items { + if p.Status.Phase == corev1.PodSucceeded || p.Status.Phase == corev1.PodFailed { + continue + } + return fmt.Errorf("controller %s/%s still has pod %s (%s); "+ + "wait for it to terminate (or pass --skip-controller-check)", + ref.Namespace, ref.Name, p.Name, p.Status.Phase) + } + } + return nil +} + +// legacyCluster is one discovered legacy EtcdCluster, with the identity bits +// the snapshot phase needs alongside the decoded spec. +type legacyCluster struct { + Name string + Namespace string + UID string + Spec legacy.EtcdClusterSpec +} + +type legacyBackup struct { + Name string + Namespace string + Spec legacy.EtcdBackupSpec +} + +type legacySchedule struct { + Name string + Namespace string + Spec legacy.EtcdBackupScheduleSpec +} + +// discovered is everything the legacy API still holds. +type discovered struct { + Clusters []legacyCluster + Backups []legacyBackup + Schedules []legacySchedule +} + +// discover lists all legacy CRs in the namespace ("" = all). A missing +// legacy CRD is treated as zero resources of that kind, so the tool works +// on clusters where only a subset of the legacy CRDs was ever installed. +func discover(ctx context.Context, dyn dynamic.Interface, namespace string) (discovered, error) { + var out discovered + + err := listLegacy(ctx, dyn, migrate.ClusterGVR, namespace, func(u *unstructured.Unstructured) error { + var spec legacy.EtcdClusterSpec + if err := decodeSpec(u, &spec); err != nil { + return err + } + out.Clusters = append(out.Clusters, legacyCluster{ + Name: u.GetName(), Namespace: u.GetNamespace(), UID: string(u.GetUID()), Spec: spec, + }) + return nil + }) + if err != nil { + return out, err + } + + err = listLegacy(ctx, dyn, migrate.BackupGVR, namespace, func(u *unstructured.Unstructured) error { + var spec legacy.EtcdBackupSpec + if err := decodeSpec(u, &spec); err != nil { + return err + } + out.Backups = append(out.Backups, legacyBackup{Name: u.GetName(), Namespace: u.GetNamespace(), Spec: spec}) + return nil + }) + if err != nil { + return out, err + } + + err = listLegacy(ctx, dyn, migrate.ScheduleGVR, namespace, func(u *unstructured.Unstructured) error { + var spec legacy.EtcdBackupScheduleSpec + if err := decodeSpec(u, &spec); err != nil { + return err + } + out.Schedules = append(out.Schedules, legacySchedule{Name: u.GetName(), Namespace: u.GetNamespace(), Spec: spec}) + return nil + }) + return out, err +} + +// listLegacy lists one legacy GVR, tolerating an uninstalled CRD. +func listLegacy(ctx context.Context, dyn dynamic.Interface, gvr schema.GroupVersionResource, namespace string, visit func(*unstructured.Unstructured) error) error { + list, err := dyn.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + if apierrors.IsNotFound(err) || meta.IsNoMatchError(err) { + return nil // legacy CRD not installed + } + return fmt.Errorf("list %s: %w", gvr.Resource, err) + } + for i := range list.Items { + if err := visit(&list.Items[i]); err != nil { + return fmt.Errorf("decode %s %s/%s: %w", gvr.Resource, list.Items[i].GetNamespace(), list.Items[i].GetName(), err) + } + } + return nil +} + +// decodeSpec converts an unstructured legacy object's .spec into a trimmed +// legacy struct. +func decodeSpec(u *unstructured.Unstructured, into any) error { + spec, found, err := unstructured.NestedMap(u.Object, "spec") + if err != nil { + return err + } + if !found { + return fmt.Errorf("object has no spec") + } + return runtime.DefaultUnstructuredConverter.FromUnstructured(spec, into) +} + +// buildPlans translates everything discovered into per-resource plans. +// Clusters become in-place adoptions, built from the live facts gathered by +// inspectCluster; a cluster whose inspection failed gets an error plan (its +// pods/PVCs are never touched). +func buildPlans(d discovered, facts map[string]migrate.ClusterFacts, inspectErrs map[string]error, opts migrate.TranslateOptions) []migrate.ResourcePlan { + var plans []migrate.ResourcePlan + for _, c := range d.Clusters { + key := c.Namespace + "/" + c.Name + if err, failed := inspectErrs[key]; failed { + plans = append(plans, migrate.ResourcePlan{ + SourceKind: "EtcdCluster", + SourceName: c.Name, + Namespace: c.Namespace, + Action: migrate.ActionError, + Errors: []string{fmt.Sprintf("inspecting the live cluster failed: %v", err)}, + }) + continue + } + plans = append(plans, migrate.BuildAdoption(c.Name, c.Namespace, c.Spec, facts[key], opts)) + } + for _, b := range d.Backups { + plans = append(plans, migrate.TranslateBackup(b.Name, b.Namespace, b.Spec)) + } + for _, s := range d.Schedules { + plans = append(plans, migrate.TranslateSchedule(s.Name, s.Namespace, s.Spec)) + } + return plans +} + +// markExisting downgrades Create plans whose target already exists to Skip, +// making the tool re-runnable after a partial apply. The legacy delete still +// proceeds on apply — the target exists, the source is leftover. Adoption +// plans are NOT downgraded: every adoption step is idempotent and a partial +// previous run (CRs created, pod patches missing) must be completed, not +// skipped. +func markExisting(ctx context.Context, c client.Client, plans []migrate.ResourcePlan) error { + for i := range plans { + if plans[i].Action == migrate.ActionAdopt && plans[i].Target != nil { + existing := plans[i].Target.DeepCopyObject().(client.Object) + err := c.Get(ctx, types.NamespacedName{Namespace: plans[i].Target.GetNamespace(), Name: plans[i].Target.GetName()}, existing) + if err == nil { + plans[i].Notes = append(plans[i].Notes, + "new-API EtcdCluster already exists (previous run); adoption steps re-run idempotently to complete any partial state") + } else if !apierrors.IsNotFound(err) { + return fmt.Errorf("check existing %s %s/%s: %w", + plans[i].SourceKind, plans[i].Namespace, plans[i].Target.GetName(), err) + } + continue + } + if plans[i].Action != migrate.ActionCreate || plans[i].Target == nil { + continue + } + existing := plans[i].Target.DeepCopyObject().(client.Object) + err := c.Get(ctx, types.NamespacedName{Namespace: plans[i].Target.GetNamespace(), Name: plans[i].Target.GetName()}, existing) + switch { + case apierrors.IsNotFound(err): + // proceed with Create + case err != nil: + return fmt.Errorf("check existing %s %s/%s: %w", + plans[i].SourceKind, plans[i].Namespace, plans[i].Target.GetName(), err) + default: + plans[i].Action = migrate.ActionSkip + plans[i].Notes = append(plans[i].Notes, + "target already exists under the new API (created by a previous run); only the legacy CR cleanup remains") + } + } + return nil +} + +// applyStats summarizes one apply pass. +type applyStats struct { + Adopted, Created, Deleted, Skipped, Printed, Errored int +} + +// applyPlans executes the plan: extras + target created per resource, then +// the legacy source is deleted (children cascade via their owner refs; the +// legacy CRs carry no finalizers, so deletion does not block). +func applyPlans(ctx context.Context, c client.Client, dyn dynamic.Interface, plans []migrate.ResourcePlan, out io.Writer) (applyStats, error) { + var stats applyStats + for i := range plans { + p := &plans[i] + switch p.Action { + case migrate.ActionError: + stats.Errored++ + continue + case migrate.ActionPrint: + stats.Printed++ + continue + case migrate.ActionAdopt: + if err := applyAdoption(ctx, c, dyn, p, out); err != nil { + return stats, fmt.Errorf("adopt %s/%s: %w", p.Namespace, p.SourceName, err) + } + stats.Adopted++ + continue + case migrate.ActionCreate: + for _, extra := range p.Extras { + if err := c.Create(ctx, extra); err != nil { + if apierrors.IsAlreadyExists(err) { + fmt.Fprintf(out, " %s %s/%s already exists — left untouched\n", + extra.GetObjectKind().GroupVersionKind().Kind, extra.GetNamespace(), extra.GetName()) + continue + } + return stats, fmt.Errorf("create %s %s/%s: %w", + extra.GetObjectKind().GroupVersionKind().Kind, extra.GetNamespace(), extra.GetName(), err) + } + } + if err := c.Create(ctx, p.Target); err != nil && !apierrors.IsAlreadyExists(err) { + return stats, fmt.Errorf("create %s %s/%s: %w", + p.SourceKind, p.Namespace, p.Target.GetName(), err) + } + stats.Created++ + case migrate.ActionSkip: + stats.Skipped++ + } + + if p.DeleteRef != nil { + policy := metav1.DeletePropagationBackground + err := dyn.Resource(p.DeleteRef.GVR).Namespace(p.DeleteRef.Namespace). + Delete(ctx, p.DeleteRef.Name, metav1.DeleteOptions{PropagationPolicy: &policy}) + if err != nil && !apierrors.IsNotFound(err) { + return stats, fmt.Errorf("delete legacy %s %s/%s: %w", + p.DeleteRef.GVR.Resource, p.DeleteRef.Namespace, p.DeleteRef.Name, err) + } + stats.Deleted++ + fmt.Fprintf(out, " deleted legacy %s %s/%s\n", p.DeleteRef.GVR.Resource, p.DeleteRef.Namespace, p.DeleteRef.Name) + } + } + return stats, nil +} + +// confirm asks for an interactive yes before a mutating run. +func confirm(in io.Reader, out io.Writer, prompt string) bool { + fmt.Fprintf(out, "%s [y/N]: ", prompt) + reader := bufio.NewReader(in) + line, err := reader.ReadString('\n') + if err != nil { + return false + } + answer := strings.ToLower(strings.TrimSpace(line)) + return answer == "y" || answer == "yes" +} + +// backupDestination assembles the SnapshotLocation from the --backup-* +// flags. validate() has already checked exactly one destination is set. +func backupDestination(cfg *Config) lll.SnapshotLocation { + if cfg.BackupPVCClaim != "" { + return lll.SnapshotLocation{PVC: &lll.PVCSnapshotLocation{ + ClaimName: cfg.BackupPVCClaim, + SubPath: cfg.BackupPVCSubPath, + }} + } + return lll.SnapshotLocation{S3: &lll.S3SnapshotLocation{ + Endpoint: cfg.BackupS3Endpoint, + Bucket: cfg.BackupS3Bucket, + Key: cfg.BackupS3Key, + Region: cfg.BackupS3Region, + ForcePathStyle: cfg.BackupS3ForcePathStyle, + CredentialsSecretRef: corev1.LocalObjectReference{Name: cfg.BackupS3CredentialsSecret}, + }} +} diff --git a/cmd/kubectl-etcd/helpers_test.go b/cmd/kubectl-etcd/helpers_test.go index cb482f4c..3723d00a 100644 --- a/cmd/kubectl-etcd/helpers_test.go +++ b/cmd/kubectl-etcd/helpers_test.go @@ -2,53 +2,15 @@ package main import ( "errors" - "fmt" - "strings" "testing" - "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" ) -// ── A failed/never-ready port-forward must not hang ───────────────────────── - -func TestAwaitForward_Ready(t *testing.T) { - ready := make(chan struct{}, 1) - close(ready) - if err := awaitForward(ready, make(chan error, 1), make(chan struct{}, 1), time.Second); err != nil { - t.Fatalf("ready forward should succeed, got %v", err) - } -} - -func TestAwaitForward_ErrorBeforeReady(t *testing.T) { - // ForwardPorts returns an error without ever closing readyChan — the old - // code blocked on <-readyChan forever. awaitForward must return the error. - forwardErr := make(chan error, 1) - forwardErr <- fmt.Errorf("dial tcp: connection refused") - err := awaitForward(make(chan struct{}), forwardErr, make(chan struct{}, 1), time.Second) - if err == nil { - t.Fatal("a forward failure must return an error, not hang or succeed") - } - if !strings.Contains(err.Error(), "connection refused") { - t.Errorf("error should wrap the forward failure, got %v", err) - } -} - -func TestAwaitForward_Timeout(t *testing.T) { - stop := make(chan struct{}, 1) - // Nothing ever signals ready and no error arrives → must time out, not hang. - err := awaitForward(make(chan struct{}), make(chan error, 1), stop, 10*time.Millisecond) - if err == nil { - t.Fatal("a never-ready forward must time out, not hang") - } - select { - case <-stop: - default: - t.Error("timeout must close stopChan to tear the forwarder down") - } -} +// The awaitForward tests moved to internal/portforward alongside the +// port-forward machinery itself. // ── TLS secret discovery on the etcd container ─────────────────────────────── diff --git a/cmd/kubectl-etcd/main.go b/cmd/kubectl-etcd/main.go index b3569528..c1328371 100644 --- a/cmd/kubectl-etcd/main.go +++ b/cmd/kubectl-etcd/main.go @@ -6,8 +6,6 @@ import ( "errors" "fmt" "io" - "net/http" - "net/url" "os" "path/filepath" "strconv" @@ -23,11 +21,10 @@ import ( "k8s.io/client-go/kubernetes" _ "k8s.io/client-go/plugin/pkg/client/auth" // Import all auth providers "k8s.io/client-go/tools/clientcmd" - "k8s.io/client-go/tools/portforward" - "k8s.io/client-go/transport/spdy" "k8s.io/client-go/util/homedir" "github.com/cozystack/etcd-operator/internal/etcdclient" + "github.com/cozystack/etcd-operator/internal/portforward" ) func main() { @@ -547,86 +544,24 @@ This operation is typically used for backup purposes.`, return snapshotCmd } -// forwardReadyTimeout bounds how long setupPortForwarding waits for the -// port-forward to signal ready before giving up. -const forwardReadyTimeout = 10 * time.Second - -// awaitForward blocks until the port-forward signals ready, fails, or times -// out — whichever comes first. It exists so a forward that dies before -// becoming ready (which leaves readyChan unclosed) surfaces as an error -// instead of hanging the CLI. On timeout it closes stopChan to tear the -// forwarder down. -func awaitForward(readyChan <-chan struct{}, forwardErr <-chan error, stopChan chan struct{}, timeout time.Duration) error { - select { - case <-readyChan: - return nil - case err := <-forwardErr: - if err == nil { - err = fmt.Errorf("exited before becoming ready") - } - return fmt.Errorf("port forwarding failed: %w", err) - case <-time.After(timeout): - close(stopChan) - return fmt.Errorf("timed out after %s waiting for port forwarding to become ready", timeout) - } -} - func setupPortForwarding(config *Config, clientset *kubernetes.Clientset) (*tls.Config, uint16, error) { pod, err := clientset.CoreV1().Pods(config.Namespace).Get(context.Background(), config.PodName, metav1.GetOptions{}) if err != nil { return nil, 0, fmt.Errorf("failed to get pod: %w", err) } - path := fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/portforward", config.Namespace, config.PodName) clientConfig, err := clientcmd.BuildConfigFromFlags("", config.Kubeconfig) if err != nil { return nil, 0, fmt.Errorf("error building kubeconfig: %w", err) } - transport, upgrader, err := spdy.RoundTripperFor(clientConfig) - if err != nil { - return nil, 0, fmt.Errorf("failed to create round tripper: %w", err) - } - - hostURL, err := url.Parse(clientConfig.Host) - if err != nil { - return nil, 0, fmt.Errorf("failed to parse host URL: %w", err) - } - - hostURL.Path = path - - stopChan, readyChan := make(chan struct{}, 1), make(chan struct{}, 1) - dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", hostURL) - - silentOut := &silentWriter{} - portForwarder, err := portforward.New(dialer, []string{"0:2379"}, stopChan, readyChan, silentOut, os.Stderr) + // The forward lives for the rest of the process (each invocation runs + // one command and exits), so the stop function is deliberately unused. + localPort, _, err := portforward.ForwardToPod(clientConfig, config.Namespace, config.PodName, 2379) if err != nil { - return nil, 0, fmt.Errorf("failed to create port forwarder: %w", err) - } - - // ForwardPorts blocks until the forward is torn down; run it in the - // background and surface a startup failure via forwardErr. On a dial - // failure (RBAC on pods/portforward, API-server connectivity, protocol - // negotiation) ForwardPorts returns WITHOUT ever closing readyChan, so - // blocking on readyChan alone would hang the CLI forever — awaitForward - // selects on the error and a timeout too. - forwardErr := make(chan error, 1) - go func() { - forwardErr <- portForwarder.ForwardPorts() - }() - - if err := awaitForward(readyChan, forwardErr, stopChan, forwardReadyTimeout); err != nil { return nil, 0, err } - // Obtaining the local port used for forwarding - forwardedPorts, err := portForwarder.GetPorts() - if err != nil { - return nil, 0, fmt.Errorf("failed to get forwarded ports: %w", err) - } - - localPort := forwardedPorts[0].Local - tlsConfig, err := getTLSConfig(clientset, pod, config.Namespace) if err != nil { return nil, 0, fmt.Errorf("failed to get TLS config: %w", err) @@ -768,9 +703,3 @@ func findSecretNameForTLS(pod *corev1.Pod, container corev1.Container) (string, return "", false, fmt.Errorf("secret for the TLS certificate file not found") } - -type silentWriter struct{} - -func (sw *silentWriter) Write(p []byte) (int, error) { - return len(p), nil -} diff --git a/controllers/etcdcluster_controller.go b/controllers/etcdcluster_controller.go index 14b3f854..93ec54ea 100644 --- a/controllers/etcdcluster_controller.go +++ b/controllers/etcdcluster_controller.go @@ -322,7 +322,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // here too. Cheap: list etcd once and try to promote any learner; // no-op if none. if cluster.Status.ClusterID != "" && len(running) > 0 { - endpoints := memberEndpoints(clusterClientScheme(cluster), running, cluster.Name, cluster.Namespace) + endpoints := memberEndpoints(clusterClientScheme(cluster), running, cluster.Namespace) tlsCfg, tlsErr := buildOperatorTLSConfig(ctx, r.Client, cluster) if tlsErr != nil { // Don't bail out of the whole reconcile — updateStatus @@ -485,7 +485,7 @@ func (r *EtcdClusterReconciler) bootstrap( if seed.Spec.InitialCluster == "" { original := seed.DeepCopy() - seed.Spec.InitialCluster = buildInitialCluster(clusterPeerScheme(cluster), []string{seed.Name}, cluster.Name, cluster.Namespace) + seed.Spec.InitialCluster = buildInitialCluster(clusterPeerScheme(cluster), []string{seed.Name}, memberServiceName(seed), cluster.Namespace) // Now that apiserver-assigned name is known, populate the // per-member component label so the seed CR's label set matches // the Pod/PVC the member controller will create. @@ -548,7 +548,7 @@ func (r *EtcdClusterReconciler) tryDiscoverCluster( } return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } - endpoints := []string{clientURL(clusterClientScheme(cluster), seed.Name, cluster.Name, cluster.Namespace)} + endpoints := []string{clientURL(clusterClientScheme(cluster), seed.Name, memberServiceName(seed), cluster.Namespace)} tlsCfg, err := buildOperatorTLSConfig(ctx, r.Client, cluster) if err != nil { @@ -580,7 +580,7 @@ func (r *EtcdClusterReconciler) tryDiscoverCluster( log.Info("waiting for definitive single-member response", "members", len(resp.Members)) return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } - expectedPeer := peerURL(clusterPeerScheme(cluster), seed.Name, cluster.Name, cluster.Namespace) + expectedPeer := peerURL(clusterPeerScheme(cluster), seed.Name, memberServiceName(seed), cluster.Namespace) matched := resp.Members[0].Name == seed.Name if !matched { for _, p := range resp.Members[0].PeerURLs { @@ -770,7 +770,7 @@ func (r *EtcdClusterReconciler) scaleUp( // CR, which we just handled). From this point on we only care about // running (non-dormant) members for the etcd-side flow. running := filterRunningMembers(members) - endpoints := memberEndpoints(clusterClientScheme(cluster), running, cluster.Name, cluster.Namespace) + endpoints := memberEndpoints(clusterClientScheme(cluster), running, cluster.Namespace) tlsCfg, err := buildOperatorTLSConfig(ctx, r.Client, cluster) if err != nil { log.Error(err, "cannot build operator TLS config for scale-up") @@ -869,7 +869,7 @@ func (r *EtcdClusterReconciler) completePendingMember( pending *lll.EtcdMember, ) (ctrl.Result, error) { log := log.FromContext(ctx) - pendingPeerURL := peerURL(clusterPeerScheme(cluster), pending.Name, cluster.Name, cluster.Namespace) + pendingPeerURL := peerURL(clusterPeerScheme(cluster), pending.Name, memberServiceName(pending), cluster.Namespace) alreadyAdded := false for _, m := range listResp.Members { @@ -897,21 +897,30 @@ func (r *EtcdClusterReconciler) completePendingMember( etcdMembers = addResp.Members } - // Build --initial-cluster from etcd's view. The new pod's flags MUST - // match what etcd will report when it tries to join, so use the list - // from etcd directly rather than the CR set. - allNames := make([]string, 0, len(etcdMembers)) + // Build --initial-cluster from etcd's view, using each member's peer URL + // VERBATIM rather than reconstructing it from a single cluster Service + // name. During an in-place migration adopted members carry peer URLs + // under the legacy headless Service while the joining (native) member + // uses the cluster name, so a reconstruction keyed off one Service name + // would emit the wrong DNS for half the cluster. etcd already reports the + // correct persisted peer URL for every member (and the one we just + // registered via pendingPeerURL), so echo it back. + parts := make([]string, 0, len(etcdMembers)) for _, m := range etcdMembers { + if len(m.PeerURLs) == 0 { + continue + } name := m.Name - if name == "" && len(m.PeerURLs) > 0 { + if name == "" { name = memberNameFromPeerURL(m.PeerURLs[0]) } - if name != "" { - allNames = append(allNames, name) + if name == "" { + continue } + parts = append(parts, name+"="+m.PeerURLs[0]) } - sort.Strings(allNames) - initialCluster := buildInitialCluster(clusterPeerScheme(cluster), allNames, cluster.Name, cluster.Namespace) + sort.Strings(parts) + initialCluster := strings.Join(parts, ",") original := pending.DeepCopy() pending.Spec.InitialCluster = initialCluster @@ -1046,7 +1055,7 @@ func (r *EtcdClusterReconciler) syncIsVoter( if !m.DeletionTimestamp.IsZero() { continue } - url := peerURL(clusterPeerScheme(cluster), m.Name, cluster.Name, cluster.Namespace) + url := peerURL(clusterPeerScheme(cluster), m.Name, memberServiceName(m), cluster.Namespace) var wantVoter bool switch { case voterByURL[url]: @@ -1126,7 +1135,7 @@ func (r *EtcdClusterReconciler) reconcileAuth(ctx context.Context, cluster *lll. return nil, nil } - endpoints := memberEndpoints(clusterClientScheme(cluster), running, cluster.Name, cluster.Namespace) + endpoints := memberEndpoints(clusterClientScheme(cluster), running, cluster.Namespace) tlsCfg, err := buildOperatorTLSConfig(ctx, r.Client, cluster) if err != nil { log.Error(err, "cannot build operator TLS config for auth-enable; retrying") @@ -1789,7 +1798,14 @@ func peerCertDNSNames(cluster *lll.EtcdCluster, clusterDomain string) []string { // ── Services ───────────────────────────────────────────────────────────── func (r *EtcdClusterReconciler) ensureServices(ctx context.Context, cluster *lll.EtcdCluster) error { - // Headless service — provides per-pod DNS for peer discovery. + // Headless service — provides per-pod DNS for peer discovery. Always + // named after the cluster: this is the operator's native Service, and + // it is the name every operator-created (non-adopted) member resolves + // under. In-place-migrated clusters additionally keep the legacy + // headless Service alive (owner-referenced to the adopted members, so it + // self-GCs as they roll) for the adopted pods, which carry the legacy + // name in their AnnHeadlessServiceName annotation; the operator does not + // manage that one. if err := r.ensureService(ctx, cluster, cluster.Name, corev1.ServiceSpec{ ClusterIP: corev1.ClusterIPNone, PublishNotReadyAddresses: true, diff --git a/controllers/etcdcluster_controller_test.go b/controllers/etcdcluster_controller_test.go index e129bbdc..dcf8bc38 100644 --- a/controllers/etcdcluster_controller_test.go +++ b/controllers/etcdcluster_controller_test.go @@ -2811,6 +2811,61 @@ func TestBootstrap_PropagatesOptionsToSeed(t *testing.T) { } } +// TestBootstrap_NativeServicesAndNoReservedAnnotations pins the post-review +// contract: the operator's native services are "" (headless) and +// "-client", and the operator NEVER stamps the reserved +// AnnHeadlessServiceName annotation on members it creates — so a natively +// created member resolves under the cluster's own name and its +// --initial-cluster is built in the cluster's DNS domain. This is what makes +// an adopted member's headless-service-name override self-wipe as the cluster +// rolls: replacements come up native. +func TestBootstrap_NativeServicesAndNoReservedAnnotations(t *testing.T) { + ctx := context.Background() + cluster := &lll.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd", Namespace: "ns"}, + Spec: lll.EtcdClusterSpec{ + Replicas: ptrInt32(1), + Version: "3.5.17", + Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, + }, + } + c, _ := newTestClient(t, cluster) + r := &EtcdClusterReconciler{Client: c, Scheme: testScheme(t), EtcdClientFactory: factoryReturning(newFakeEtcd(0xdeadbeef))} + + reconcileUntilStable(t, r, c, "etcd", "ns", 8) + + // Native headless Service "" and client Service "-client". + headless := &corev1.Service{} + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "etcd"}, headless); err != nil { + t.Fatalf("native headless Service : %v", err) + } + if headless.Spec.ClusterIP != corev1.ClusterIPNone { + t.Errorf("headless Service must be headless; ClusterIP=%q", headless.Spec.ClusterIP) + } + if err := c.Get(ctx, types.NamespacedName{Namespace: "ns", Name: "etcd-client"}, &corev1.Service{}); err != nil { + t.Errorf("client Service must be -client: %v", err) + } + + members := &lll.EtcdMemberList{} + if err := c.List(ctx, members, client.InNamespace("ns")); err != nil { + t.Fatalf("List: %v", err) + } + if len(members.Items) != 1 { + t.Fatalf("expected one seed member; got %d", len(members.Items)) + } + seed := members.Items[0] + if _, ok := seed.Annotations[AnnHeadlessServiceName]; ok { + t.Errorf("operator stamped reserved annotation %s on a member it created; it must never do so", AnnHeadlessServiceName) + } + if _, ok := seed.Annotations[AnnDataDirSubPath]; ok { + t.Errorf("operator stamped reserved annotation %s on a member it created; it must never do so", AnnDataDirSubPath) + } + wantInitial := seed.Name + "=http://" + seed.Name + ".etcd.ns.svc:2380" + if seed.Spec.InitialCluster != wantInitial { + t.Errorf("seed InitialCluster = %q; want %q", seed.Spec.InitialCluster, wantInitial) + } +} + // TestBootstrap_PropagatesResourcesToSeed verifies the wiring of // spec.resources onto the seed EtcdMember at cluster creation. The // member controller reads its own Spec.Resources at buildPod time, so diff --git a/controllers/etcdmember_controller.go b/controllers/etcdmember_controller.go index 395d9678..6b0a88b0 100644 --- a/controllers/etcdmember_controller.go +++ b/controllers/etcdmember_controller.go @@ -254,7 +254,10 @@ func (r *EtcdMemberReconciler) removeMemberFromEtcd(ctx context.Context, cluster } otherMembers++ if m.Status.PodName != "" { - endpoints = append(endpoints, clientURL(scheme, m.Name, member.Spec.ClusterName, member.Namespace)) + // Build each peer's endpoint with that peer's OWN Service name: + // an adopted peer resolves under the legacy headless Service even + // when `member` (the one being removed) is native, or vice versa. + endpoints = append(endpoints, clientURL(scheme, m.Name, memberServiceName(&m), member.Namespace)) } } if len(endpoints) == 0 { @@ -325,7 +328,7 @@ func (r *EtcdMemberReconciler) resolveMemberID( } // Match by peer URL too — etcd may have a member added via MemberAdd // whose Name is empty until it joins. - expected := peerURL(memberPeerScheme(member), member.Name, member.Spec.ClusterName, member.Namespace) + expected := peerURL(memberPeerScheme(member), member.Name, memberServiceName(member), member.Namespace) for _, p := range m.PeerURLs { if p == expected { return m.ID, nil @@ -619,8 +622,8 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { peerTLS := peerScheme == "https" clientMTLS := clientTLS && member.Spec.TLS != nil && member.Spec.TLS.ClientMTLS - pAddr := peerURL(peerScheme, member.Name, member.Spec.ClusterName, member.Namespace) - cAddr := clientURL(clientScheme, member.Name, member.Spec.ClusterName, member.Namespace) + pAddr := peerURL(peerScheme, member.Name, memberServiceName(member), member.Namespace) + cAddr := clientURL(clientScheme, member.Name, memberServiceName(member), member.Namespace) // Data volume source: tmpfs emptyDir for memory-backed members, // PVC otherwise. SizeLimit on the emptyDir caps tmpfs allocation; @@ -656,10 +659,16 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { } labels, annotations := applyAdditionalMetadata(labels, nil, member.Spec.AdditionalMetadata) + // Data dir defaults to the volume root; adopted legacy members carry an + // AnnDataDirSubPath annotation (e.g. "default.etcd") so a replacement + // Pod resumes from the existing data dir instead of bootstrapping an + // empty one. memberDataDir validates the annotation and fails closed. + dataDir := memberDataDir(member) + cmd := []string{ "etcd", "--name=" + member.Name, - "--data-dir=/var/lib/etcd", + "--data-dir=" + dataDir, "--listen-peer-urls=" + peerScheme + "://0.0.0.0:2380", "--listen-client-urls=" + clientScheme + "://0.0.0.0:2379", "--advertise-client-urls=" + cAddr, @@ -742,7 +751,7 @@ func (r *EtcdMemberReconciler) buildPod(member *lll.EtcdMember) *corev1.Pod { }, Spec: corev1.PodSpec{ Hostname: member.Name, - Subdomain: member.Spec.ClusterName, + Subdomain: memberServiceName(member), Affinity: member.Spec.Affinity, TopologySpreadConstraints: member.Spec.TopologySpreadConstraints, InitContainers: initContainers, @@ -1016,7 +1025,10 @@ func (r *EtcdMemberReconciler) discoverMemberID(ctx context.Context, member *lll // context budget. continue } - endpoints = append(endpoints, clientURL(scheme, m.Name, member.Spec.ClusterName, member.Namespace)) + // Each peer's endpoint uses that peer's own Service name (adopted + // peers resolve under the legacy headless Service); self below uses + // this member's. + endpoints = append(endpoints, clientURL(scheme, m.Name, memberServiceName(&m), member.Namespace)) } // Self is a *fallback*, not an always-on endpoint: dial our own etcd // only when no voter peer is available (single-node bootstrap, or no @@ -1026,7 +1038,7 @@ func (r *EtcdMemberReconciler) discoverMemberID(ctx context.Context, member *lll // with "rpc not supported for learner" — wedging discovery even though // a voter was in the list. Mirrors memberEndpoints' voter-or-fallback. if len(endpoints) == 0 { - endpoints = append(endpoints, clientURL(scheme, member.Name, member.Spec.ClusterName, member.Namespace)) + endpoints = append(endpoints, clientURL(scheme, member.Name, memberServiceName(member), member.Namespace)) } // Build the operator-side dial config. Only TLS clusters need the parent @@ -1065,7 +1077,7 @@ func (r *EtcdMemberReconciler) discoverMemberID(ctx context.Context, member *lll if err != nil { return 0, err } - expectedPeer := peerURL(memberPeerScheme(member), member.Name, member.Spec.ClusterName, member.Namespace) + expectedPeer := peerURL(memberPeerScheme(member), member.Name, memberServiceName(member), member.Namespace) for _, m := range resp.Members { if m.Name == member.Name { return m.ID, nil diff --git a/controllers/etcdmember_controller_test.go b/controllers/etcdmember_controller_test.go index c7e7b625..dcd24280 100644 --- a/controllers/etcdmember_controller_test.go +++ b/controllers/etcdmember_controller_test.go @@ -2342,6 +2342,114 @@ func TestBuildPod_AppliesEtcdOptions(t *testing.T) { } } +// TestBuildPod_AdoptionAnnotations covers the two in-place-migration knobs, +// now carried as reserved EtcdMember annotations rather than spec fields: the +// AnnHeadlessServiceName annotation must drive both the Pod's spec.subdomain +// and every constructed URL (so an adopted legacy member's DNS identity — +// "...svc" — keeps matching what etcd has +// persisted), and AnnDataDirSubPath must relocate --data-dir into the PVC +// subdirectory where the legacy operator kept the data. Without these, a +// replacement Pod of an adopted member comes up unreachable (wrong +// subdomain ⇒ its persisted peer URL stops resolving) and empty (wrong +// data dir ⇒ crashloops against the cluster with a fresh identity). +func TestBuildPod_AdoptionAnnotations(t *testing.T) { + r := &EtcdMemberReconciler{} + pod := r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd-0", Namespace: "ns", + Annotations: map[string]string{ + AnnHeadlessServiceName: "etcd-headless", + AnnDataDirSubPath: "default.etcd", + }, + }, + Spec: lll.EtcdMemberSpec{ + ClusterName: "etcd", Version: "3.5.17", + Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, + }, + }) + if pod.Spec.Subdomain != "etcd-headless" { + t.Errorf("subdomain = %q; want the annotation's headless service name", pod.Spec.Subdomain) + } + cmd := pod.Spec.Containers[0].Command + for _, want := range []string{ + "--data-dir=/var/lib/etcd/default.etcd", + "--advertise-client-urls=http://etcd-0.etcd-headless.ns.svc:2379", + "--initial-advertise-peer-urls=http://etcd-0.etcd-headless.ns.svc:2380", + } { + if !cmdContains(cmd, want) { + t.Errorf("command missing %q; got %v", want, cmd) + } + } + + // Defaults preserved: no annotations ⇒ subdomain = cluster name, data dir + // at the volume root. + pod = r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "m", Namespace: "ns"}, + Spec: lll.EtcdMemberSpec{ + ClusterName: "test", Version: "3.5.17", + Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, + }, + }) + if pod.Spec.Subdomain != "test" { + t.Errorf("default subdomain = %q; want cluster name", pod.Spec.Subdomain) + } + if !cmdContains(pod.Spec.Containers[0].Command, "--data-dir=/var/lib/etcd") { + t.Errorf("default --data-dir missing: %v", pod.Spec.Containers[0].Command) + } +} + +// TestBuildPod_DataDirSubPathFailsClosed pins the in-code validation that +// replaced the apiserver-enforced pattern the spec field used to carry. An +// annotation has no schema, so a value that could escape the mount (a slash +// or "..") — or is otherwise malformed — must be ignored and --data-dir must +// fall back to the volume root, never substituting the unsafe value. +func TestBuildPod_DataDirSubPathFailsClosed(t *testing.T) { + r := &EtcdMemberReconciler{} + for _, bad := range []string{ + "../../etc", // parent-dir escape + "a/b", // nested path + "..", // bare parent + "/abs", // absolute + ".hidden", // leading dot (pattern reject) + "with space", // pattern reject + "a..b", // contains ".." + } { + pod := r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m", Namespace: "ns", + Annotations: map[string]string{AnnDataDirSubPath: bad}, + }, + Spec: lll.EtcdMemberSpec{ + ClusterName: "test", Version: "3.5.17", + Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, + }, + }) + if !cmdContains(pod.Spec.Containers[0].Command, "--data-dir=/var/lib/etcd") { + t.Errorf("subpath %q: --data-dir not fail-closed to volume root; got %v", bad, pod.Spec.Containers[0].Command) + } + for _, c := range pod.Spec.Containers[0].Command { + if c != "--data-dir=/var/lib/etcd" && len(c) > len("--data-dir=") && c[:len("--data-dir=")] == "--data-dir=" { + t.Errorf("subpath %q: unsafe value reached --data-dir: %q", bad, c) + } + } + } + + // A valid single-component subpath is still honoured. + pod := r.buildPod(&lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{ + Name: "m", Namespace: "ns", + Annotations: map[string]string{AnnDataDirSubPath: "default.etcd"}, + }, + Spec: lll.EtcdMemberSpec{ + ClusterName: "test", Version: "3.5.17", + Storage: lll.StorageSpec{Size: quickQty(t, "1Gi")}, + }, + }) + if !cmdContains(pod.Spec.Containers[0].Command, "--data-dir=/var/lib/etcd/default.etcd") { + t.Errorf("valid subpath rejected: %v", pod.Spec.Containers[0].Command) + } +} + // TestDeriveMemberTLS covers the cluster→member projection. ClientMTLS // must be true iff OperatorClientSecretRef is set; secret refs are deep- // copied so a later edit to the parent's pointer can't mutate the diff --git a/controllers/helpers.go b/controllers/helpers.go index 32647d55..eb6f7503 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -2,6 +2,8 @@ package controllers import ( "fmt" + "path" + "regexp" "strings" corev1 "k8s.io/api/core/v1" @@ -27,18 +29,100 @@ const ( // MemberFinalizer is placed on EtcdMember resources to ensure // graceful removal from the etcd cluster before deletion. MemberFinalizer = "etcd-operator.cozystack.io/member-cleanup" + + // ReservedAnnotationPrefix namespaces the operator-interpreted + // annotations below. additionalMetadata must never be able to set a + // key under this prefix (applyAdditionalMetadata strips it): the + // reserved annotations drive member DNS identity and the --data-dir + // path, so a user-settable copy would (a) make every operator-created + // member inherit a migration knob — breaking the self-wipe — and (b) + // turn data-dir-subpath into a user-controllable path into --data-dir. + ReservedAnnotationPrefix = "etcd-operator.cozystack.io/" + + // AnnHeadlessServiceName overrides the headless Service name a member's + // DNS identity keys off: its Pod subdomain and every peer/client URL + // the operator constructs for it. Absent ⇒ the cluster's own name + // (native behaviour). Stamped ONLY by the in-place migration tool on + // the EtcdMembers it creates for adopted pods (whose immutable + // spec.subdomain and persisted peer URLs use the legacy Service name); + // the operator never stamps it, so every rolled/replaced member comes + // up native and the override self-wipes once the cluster fully rolls. + AnnHeadlessServiceName = ReservedAnnotationPrefix + "headless-service-name" + + // AnnDataDirSubPath relocates etcd's --data-dir to a subdirectory of + // the member's data volume (/var/lib/etcd/). Absent ⇒ the + // volume root. Same migration-only contract as AnnHeadlessServiceName: + // the legacy operator kept etcd data under "default.etcd" inside the + // PVC, so an adopted member's replacement Pod finds the existing data + // dir instead of bootstrapping empty. The value is validated in code + // (validDataDirSubPath) — an annotation has no apiserver schema, so the + // controller fails closed against a mount-escaping value. + AnnDataDirSubPath = ReservedAnnotationPrefix + "data-dir-subpath" ) -// peerURL returns the etcd peer URL for a member, using the headless Service DNS. +// etcdDataDirRoot is the mount path of every member's data volume; --data-dir +// is this root or, for adopted members, a validated subdirectory of it. +const etcdDataDirRoot = "/var/lib/etcd" + +// dataDirSubPathRe is the same pattern the removed spec.dataDirSubPath field +// carried as an apiserver-enforced kubebuilder marker. Now that the value +// arrives as an unvalidated annotation, the controller enforces it here. +var dataDirSubPathRe = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`) + +// validDataDirSubPath fails closed: the value must be a single safe path +// component — no slash (so it cannot name a nested path) and no ".." (so it +// cannot escape the mount) — matching the original field's pattern. Anything +// else is rejected and the caller falls back to the native data-dir root. +func validDataDirSubPath(s string) bool { + if strings.ContainsRune(s, '/') || strings.Contains(s, "..") { + return false + } + return dataDirSubPathRe.MatchString(s) +} + +// memberDataDir resolves a member's etcd --data-dir from its +// AnnDataDirSubPath annotation, falling back to the volume root when the +// annotation is absent or fails validation (fail-closed: an invalid value is +// ignored, never substituted into the path). +func memberDataDir(member *lll.EtcdMember) string { + sub := member.Annotations[AnnDataDirSubPath] + if sub == "" || !validDataDirSubPath(sub) { + return etcdDataDirRoot + } + return path.Join(etcdDataDirRoot, sub) +} + +// peerURL returns the etcd peer URL for a member, using the headless Service +// DNS. `service` is the headless Service name the member resolves under — +// resolve it per-member via memberServiceName (the cluster's own name by +// default, or the AnnHeadlessServiceName override an adopted member carries), +// never assume cluster.Name for every member: during an in-place migration +// adopted and rolled members live under different Service names at once. // scheme is "http" or "https" depending on whether peer TLS is enabled. -func peerURL(scheme, member, cluster, namespace string) string { - return fmt.Sprintf("%s://%s.%s.%s.svc:2380", scheme, member, cluster, namespace) +func peerURL(scheme, member, service, namespace string) string { + return fmt.Sprintf("%s://%s.%s.%s.svc:2380", scheme, member, service, namespace) } -// clientURL returns the etcd client URL for a member. +// clientURL returns the etcd client URL for a member. Same service-name +// contract as peerURL. // scheme is "http" or "https" depending on whether client TLS is enabled. -func clientURL(scheme, member, cluster, namespace string) string { - return fmt.Sprintf("%s://%s.%s.%s.svc:2379", scheme, member, cluster, namespace) +func clientURL(scheme, member, service, namespace string) string { + return fmt.Sprintf("%s://%s.%s.%s.svc:2379", scheme, member, service, namespace) +} + +// memberServiceName resolves the headless Service name a member resolves +// under: the AnnHeadlessServiceName annotation when present (set only by the +// migration tool on adopted members), the owning cluster's own name +// otherwise. Every constructed member URL and the Pod's spec.subdomain key +// off this. The operator never stamps the annotation on members it creates, +// so a rolled/replaced member defaults to the cluster name and the override +// self-wipes as the cluster rolls — there is deliberately no cluster-level +// equivalent (the operator's native headless Service is always cluster.Name). +func memberServiceName(member *lll.EtcdMember) string { + if v := member.Annotations[AnnHeadlessServiceName]; v != "" { + return v + } + return member.Spec.ClusterName } // clusterClientScheme returns "https" when the cluster has client TLS @@ -78,10 +162,10 @@ func memberPeerScheme(member *lll.EtcdMember) string { } // buildInitialCluster builds the --initial-cluster flag value from member names. -func buildInitialCluster(peerScheme string, names []string, cluster, namespace string) string { +func buildInitialCluster(peerScheme string, names []string, service, namespace string) string { parts := make([]string, len(names)) for i, name := range names { - parts[i] = name + "=" + peerURL(peerScheme, name, cluster, namespace) + parts[i] = name + "=" + peerURL(peerScheme, name, service, namespace) } return strings.Join(parts, ",") } @@ -199,19 +283,25 @@ func peerSecretName(cluster *lll.EtcdCluster) string { // learner's own discoverMemberID call where the peer list is just // "self" — letting the dialer try anyway is no worse than silently // returning []. -func memberEndpoints(scheme string, members []lll.EtcdMember, cluster, namespace string) []string { +// +// Each endpoint is built with the member's OWN Service name +// (memberServiceName), not a single cluster-wide name: during an in-place +// migration adopted members resolve under the legacy headless Service while +// rolled members resolve under the cluster name, so a shared `service` would +// dial the wrong DNS for half the cluster. +func memberEndpoints(scheme string, members []lll.EtcdMember, namespace string) []string { voters := make([]string, 0, len(members)) - for _, m := range members { - if m.Status.IsVoter { - voters = append(voters, clientURL(scheme, m.Name, cluster, namespace)) + for i := range members { + if members[i].Status.IsVoter { + voters = append(voters, clientURL(scheme, members[i].Name, memberServiceName(&members[i]), namespace)) } } if len(voters) > 0 { return voters } eps := make([]string, len(members)) - for i, m := range members { - eps[i] = clientURL(scheme, m.Name, cluster, namespace) + for i := range members { + eps[i] = clientURL(scheme, members[i].Name, memberServiceName(&members[i]), namespace) } return eps } @@ -233,6 +323,20 @@ func memberLabels(cluster, member string) map[string]string { return l } +// MemberLabels exposes the operator's member-level label set to external +// writers that must stamp objects exactly the way the controllers expect — +// the in-place migration tool labels adopted Pods/PVCs with it so the +// headless-Service selector, the PDB selector and the /scale Selector all +// match from the moment the new operator starts. +func MemberLabels(cluster, member string) map[string]string { + return memberLabels(cluster, member) +} + +// ClusterLabels is the cluster-level counterpart of MemberLabels. +func ClusterLabels(cluster string) map[string]string { + return clusterLabels(cluster) +} + // applyAdditionalMetadata merges the user-supplied labels/annotations from // spec.additionalMetadata onto a child object's ObjectMeta. Operator-owned // keys (everything already present in objLabels/objAnnotations) always win @@ -241,6 +345,13 @@ func memberLabels(cluster, member string) map[string]string { // are mutated in place (allocated when nil and there is something to merge) // and returned, so callers can assign the results straight onto // ObjectMeta.{Labels,Annotations}. +// +// Keys under ReservedAnnotationPrefix are dropped from BOTH maps before +// merging. This is load-bearing, not hygiene: additionalMetadata is mirrored +// onto every EtcdMember the operator creates, so if a user could set +// AnnHeadlessServiceName / AnnDataDirSubPath through it, every new member +// would inherit the migration knobs (breaking the self-wipe) and +// data-dir-subpath would become a user-controllable path into --data-dir. func applyAdditionalMetadata(objLabels, objAnnotations map[string]string, md *lll.AdditionalMetadata) (labels, annotations map[string]string) { if md == nil { return objLabels, objAnnotations @@ -249,6 +360,9 @@ func applyAdditionalMetadata(objLabels, objAnnotations map[string]string, md *ll objLabels = make(map[string]string, len(md.Labels)) } for k, v := range md.Labels { + if strings.HasPrefix(k, ReservedAnnotationPrefix) { + continue + } if _, taken := objLabels[k]; !taken { objLabels[k] = v } @@ -257,6 +371,9 @@ func applyAdditionalMetadata(objLabels, objAnnotations map[string]string, md *ll objAnnotations = make(map[string]string, len(md.Annotations)) } for k, v := range md.Annotations { + if strings.HasPrefix(k, ReservedAnnotationPrefix) { + continue + } if _, taken := objAnnotations[k]; !taken { objAnnotations[k] = v } diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index 1ebe36a6..5457d165 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -13,6 +13,8 @@ package controllers import ( "testing" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + lll "github.com/cozystack/etcd-operator/api/v1alpha2" ) @@ -66,3 +68,76 @@ func TestApplyAdditionalMetadata_NilInputsStayNil(t *testing.T) { t.Errorf("applyAdditionalMetadata with empty md = %v, %v; want nil, nil", labels, annotations) } } + +// TestApplyAdditionalMetadata_StripsReservedPrefix proves the reserved keys +// are DROPPED, not merely collision-losing: even when the operator hasn't +// pre-populated them (so collision-precedence couldn't protect anything), a +// user-supplied AnnHeadlessServiceName / AnnDataDirSubPath must never reach +// the merged result. Otherwise every operator-created member would inherit +// the migration knobs (breaking the self-wipe) and data-dir-subpath would +// become a user-controllable path into --data-dir. +func TestApplyAdditionalMetadata_StripsReservedPrefix(t *testing.T) { + md := &lll.AdditionalMetadata{ + Labels: map[string]string{ + AnnHeadlessServiceName: "evil-svc", // reserved → must be stripped + "cozystack.io/tenant": "foo", // fresh → must merge + }, + Annotations: map[string]string{ + AnnHeadlessServiceName: "evil-svc", // reserved → must be stripped + AnnDataDirSubPath: "../escape", // reserved → must be stripped + "example.com/note": "bar", // fresh → must merge + }, + } + labels, annotations := applyAdditionalMetadata(nil, nil, md) + + if _, present := labels[AnnHeadlessServiceName]; present { + t.Errorf("reserved label %s reached the merged labels", AnnHeadlessServiceName) + } + if labels["cozystack.io/tenant"] != "foo" { + t.Errorf("non-reserved label dropped: %v", labels) + } + for _, k := range []string{AnnHeadlessServiceName, AnnDataDirSubPath} { + if _, present := annotations[k]; present { + t.Errorf("reserved annotation %s reached the merged annotations", k) + } + } + if annotations["example.com/note"] != "bar" { + t.Errorf("non-reserved annotation dropped: %v", annotations) + } +} + +// TestMemberEndpoints_PerMemberServiceName pins that during the mixed +// migration window each member's dial endpoint is built under its OWN +// Service name: an adopted member (AnnHeadlessServiceName=legacy) resolves +// under the legacy headless Service, while a native (rolled) member resolves +// under the cluster's own name. A shared cluster-wide name would dial the +// wrong DNS for half the cluster. +func TestMemberEndpoints_PerMemberServiceName(t *testing.T) { + adopted := lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd-0", + Annotations: map[string]string{AnnHeadlessServiceName: "etcd-headless"}, + }, + Spec: lll.EtcdMemberSpec{ClusterName: "etcd"}, + Status: lll.EtcdMemberStatus{IsVoter: true}, + } + native := lll.EtcdMember{ + ObjectMeta: metav1.ObjectMeta{Name: "etcd-9"}, + Spec: lll.EtcdMemberSpec{ClusterName: "etcd"}, + Status: lll.EtcdMemberStatus{IsVoter: true}, + } + + got := memberEndpoints("http", []lll.EtcdMember{adopted, native}, "ns") + want := []string{ + "http://etcd-0.etcd-headless.ns.svc:2379", + "http://etcd-9.etcd.ns.svc:2379", + } + if len(got) != len(want) { + t.Fatalf("endpoints = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Errorf("endpoint[%d] = %q, want %q", i, got[i], want[i]) + } + } +} diff --git a/docs/concepts.md b/docs/concepts.md index b42ee182..e6d1dc97 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -231,6 +231,17 @@ The set is deliberately **closed and typed**. The legacy aenix operator exposed Like `spec.resources`, options are latched through `status.observed` and apply **to newly-created members only** (scale-up, replacement) — the operator does not roll existing Pods when they change. To apply a tuning change to an existing cluster, delete one Pod at a time and let the operator recreate it with the new flags. A transient mix of old- and new-flag members is harmless: these are per-member settings (backend quota, compaction cadence, raft snapshot interval), the same heterogeneity any manual rolling flag change passes through. +## Member DNS identity (adoption annotations) + +The operator's headless Service (per-member DNS for peer discovery) is always named after the cluster, and every URL the operator constructs for a member — peer/client dial endpoints, `--initial-cluster`, the Pod's `spec.subdomain` — derives from that member's resolved Service name. There is **no cluster-level override**: a natively-created member always resolves under the cluster's own name. + +For [in-place migration from the legacy operator](migration.md#tool-driven-in-place-migration-etcd-migrate), the migration tool stamps two **reserved annotations** on the `EtcdMember`s it creates for adopted pods — never on members the operator itself creates: + +- `etcd-operator.cozystack.io/headless-service-name` — overrides the Service name that member's DNS keys off. Adopted StatefulSet pods carry an immutable subdomain of `-headless`, so the annotation makes the operator's URL convention match the adopted pods' actual DNS exactly. +- `etcd-operator.cozystack.io/data-dir-subpath` — records where the legacy layout kept etcd's data inside the PVC (`default.etcd/`), so replacement Pods of adopted members resume from the existing data dir. The value is validated in code (a single safe path component — no `/`, no `..`) and fails closed to the volume root. + +Because the operator never stamps these annotations, every rolled or replaced member comes up native, and the override **self-wipes** as the cluster rolls — once fully rolled, the cluster is indistinguishable from one created natively. `additionalMetadata` cannot set keys under the `etcd-operator.cozystack.io/` reserved prefix, so the annotations can be neither forged by a user nor inherited by operator-created members. + ## TLS `spec.tls` configures transport-layer security for the cluster's two etcd surfaces: the client API (port 2379) and the peer API (port 2380). Each subtree is independently optional — you can opt one surface into TLS without the other. The whole `tls` subtree is immutable post-create (see the validation table above): toggling TLS on an existing cluster is a rolling change that v1 doesn't perform, so the policy is delete-and-recreate. diff --git a/docs/migration.md b/docs/migration.md index 56364b5e..881f2e42 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -4,17 +4,242 @@ Notes for migrating onto this operator (`etcd-operator.cozystack.io/v1alpha2`) from the legacy aenix operator (`etcd.aenix.io/v1alpha1`), and for behavioural changes that need an explicit migration step. -This document grows as more legacy features are ported. Right now it covers the -**`EtcdBackup` → `EtcdSnapshot` rename** (a pre-GA naming change), the -**`spec.options` map → typed fields** change, and the one change that has a -hard migration requirement: **etcd authentication credentials**. - -> **TODO — full legacy-operator migration.** The end-to-end story for moving an -> existing `etcd.aenix.io/v1alpha1` cluster onto `etcd-operator.cozystack.io/v1alpha2` -> (CRD shape, data-dir adoption vs snapshot/restore, member-ID continuity) is not -> written yet — the two operators manage members differently (the new one uses -> per-member `EtcdMember` CRs + Pods, not a single StatefulSet), so this is not a -> drop-in CRD swap. Fill this in as the migration path is validated. +This document covers the **tool-driven migration from the legacy operator** +(`etcd-migrate`), the **`EtcdBackup` → `EtcdSnapshot` rename** (a pre-GA naming +change), the **`spec.options` map → typed fields** change, and the one change +that has a hard migration requirement: **etcd authentication credentials**. + +## Tool-driven in-place migration (`etcd-migrate`) + +`etcd-migrate` adopts running legacy clusters **in place**: the etcd pods and +their PVCs stay exactly as they are — only ownership, labels, member +annotations and CRs change, and the new operator takes over the live data +plane. No data is moved, no pod is restarted, and quorum is never touched. +Clients that connect by DNS name keep working; one Service changes shape +(ClusterIP → headless) and has consumer prerequisites — see +[Endpoint compatibility](#endpoint-compatibility) before you `--apply`. + +Build it with `make etcd-migrate` (lands in `bin/etcd-migrate`). + +### How adoption works + +The adopted pods are made to look native through **durable identity stamped +as two reserved annotations on each adopted `EtcdMember`** — there is no +permanent, user-facing API knob for this, and the annotations **self-wipe** as +the cluster rolls: + +- **`etcd-operator.cozystack.io/headless-service-name`**. Legacy StatefulSet + pods carry an immutable `spec.subdomain` of `-headless`, and the + peer URLs persisted *inside etcd* use that DNS domain. The annotation makes + every URL the operator constructs for that member (dial endpoints, + `--initial-cluster`, replacement-pod DNS) match the adopted pod's actual + identity — no special cases. +- **`etcd-operator.cozystack.io/data-dir-subpath`**. The legacy operator kept + etcd's data under the `default.etcd/` subdirectory of the PVC; the annotation + relocates `--data-dir` so a future replacement Pod resumes from the existing + data dir instead of crashlooping with a fresh identity. The controller + validates the value in code (single safe path component — no `/`, no `..`) + and fails closed to the volume root on anything malformed. + +The operator **never stamps these annotations on members it creates**. So +every member the operator rolls or replaces comes up *native* (cluster-name +DNS, data dir at the volume root); once the cluster has fully rolled, no member +carries either annotation and the cluster is indistinguishable from one created +natively — no permanent knob, nothing to deprecate later. `additionalMetadata` +cannot set keys under the `etcd-operator.cozystack.io/` reserved prefix, so a +user can neither forge these annotations nor break the self-wipe. + +Per cluster, the tool: + +1. **Inspects** the live etcd (read-only, over a port-forward with the legacy + operator's client certificate): member list, cluster ID, auth status. + Runs in dry-run too, so the printed plan shows the real IDs. +2. **Disables legacy auth** if enabled (the legacy NoPassword root can never + match a credentials Secret; the new operator re-enables auth itself). This + runs **before** the backup on purpose: the snapshot Job dials etcd + anonymously, and etcd rejects the Maintenance Snapshot RPC while auth is on. +3. **Backs up** the cluster (see below) — before anything is mutated. +4. **Creates the new CRs with prefilled status**: the `EtcdCluster` gets + `status.clusterID`/`clusterToken`/`observed` (so the operator's bootstrap + branch never fires against a cluster that already exists), and one + `EtcdMember` per pod — named exactly like the pod, carrying the reserved + adoption annotations above — gets its `status.memberID` and `isVoter=true`. +5. **Owner-references the legacy headless Service to the adopted members, + then dismantles the legacy control plane** — in that order. The legacy + headless Service (`-headless`) has its `ownerReferences` replaced + with one non-controller entry per adopted `EtcdMember`, so Kubernetes GC + removes it exactly when the last adopted member rolls away (new members + aren't owners, so they never keep it alive). Only then are the legacy + `EtcdCluster` and its StatefulSet deleted with **Orphan** propagation (pods + survive) and the cluster-state ConfigMap + legacy PDB removed. Doing the + owner-ref rewrite first avoids a window where the Service is sole-owned by + a now-deleted object and gets reaped prematurely. +6. **Re-owns the data plane**: each pod and PVC gets the operator's labels + and a controller owner reference to its `EtcdMember` (only after the + StatefulSet is gone, so its controller can't re-adopt the pods). +7. **Cuts over the client Service**: the legacy client Service is named after + the cluster (``), which collides with the operator's *native* + headless Service of the same name. The tool deletes the legacy client + Service and immediately recreates it as the native headless Service (owned + by the new `EtcdCluster`), so the DNS name keeps resolving with the minimum + possible gap rather than waiting for the operator's first reconcile. See + [Endpoint compatibility](#endpoint-compatibility) for what this means for + consumers. + +Every step is idempotent — re-running the tool completes a partially-applied +adoption. + +### Prerequisites + +1. **Scale both operators to zero.** The legacy etcd Pods keep running — only + the controllers must be quiet: + + ```sh + kubectl -n etcd-operator-system scale deploy etcd-operator-controller-manager --replicas=0 + ``` + + The tool verifies this for both Deployments before doing anything + (`--legacy-controller` / `--new-controller` override the coordinates, + `--skip-controller-check` bypasses the gate). +2. The new CRDs (`etcd-operator.cozystack.io/v1alpha2`) must be installed + (`make install`). +3. A kubeconfig that can list/delete the legacy CRs cluster-wide, create the + new ones, and patch pods/PVCs/Services. +4. **All etcd pods Ready.** Adoption refuses clusters with missing members, + learners, or unreachable etcd. + +### Workflow: dry-run first + +```sh +# Dry-run (the default): inspects each live cluster and prints the planned +# v1alpha2 manifests, the adoption steps, and warnings for legacy settings +# that do not carry over. +bin/etcd-migrate + +# Execute the adoption (backup destination required — see below). +bin/etcd-migrate --apply \ + --backup-s3-endpoint=https://s3.example.com \ + --backup-s3-bucket=etcd-migration \ + --backup-s3-credentials-secret=s3-creds # needed in EVERY migrated namespace +``` + +What gets migrated: + +| Legacy (`etcd.aenix.io/v1alpha1`) | New (`etcd-operator.cozystack.io/v1alpha2`) | +|---|---| +| `EtcdCluster` | `EtcdCluster` + `EtcdMember`s **adopting the running pods in place** | +| `EtcdBackup` | `EtcdSnapshot` (created; legacy CR deleted) | +| `EtcdBackupSchedule` | a `CronJob` manifest creating `EtcdSnapshot`s — **printed only**, never applied; the legacy CR is left for you to delete | + +Every legacy knob with no v1alpha2 equivalent (`spec.options` keys beyond the +[four typed ones](#specoptions-free-form-map--typed-fields), service/PDB +templates, podTemplate overrides beyond affinity/topology-spread/resources/ +metadata) is reported as a warning — review them before `--apply`. Hard +blockers (`emptyDir` storage — nothing to adopt, an unparsable etcd image tag +without `--version`, `enableAuth` without server TLS, a non-integer +`quota-backend-bytes`/`snapshot-count`, a failed inspection) skip that +cluster and exit non-zero. + +TLS caveat: the legacy API kept CAs in separate Secrets +(`serverTrustedCASecret`, `peerTrustedCASecret`); the new operator reads +`ca.crt` from the server/peer Secret itself. The tool warns per cluster — +merge the CA into the referenced Secret **before** starting the new operator +(with cert-manager-issued secrets, `ca.crt` is typically already in place). + +### The safety backup + +Adoption rewires ownership of live storage, so the tool snapshots every +cluster to the `--backup-s3-*`/`--backup-pvc-claim` destination **before any +ownership/data-plane mutation** — the only step that precedes it is the +auth-disable above, which the snapshot Job's anonymous dial depends on (a +one-off Job running the operator image's snapshot agent — +`--agent-image` overrides; by default the image is read from the new +controller Deployment's spec, which works at replicas=0). Nothing is restored +from the artifact — the data never moves — it exists purely for disaster +recovery. A failed backup excludes that cluster from the apply. Skipping the +backup requires an explicit `--skip-backup`. + +### Auth during migration + +The legacy operator provisioned the etcd `root` user with **NoPassword** +(certificate-only identity). The new operator requires BYO root credentials +(see [Authentication](#authentication-root-credentials-are-byo-and-required) +below). The tool bridges this: it generates a `kubernetes.io/basic-auth` +Secret (`-root-credentials`, random password) per auth-enabled +cluster — or references the one you name via `--auth-secret` — runs +`auth disable` on the live etcd (authenticating with the legacy operator's +client certificate), and lets the new operator re-enable auth with the +Secret's password once it takes over. Mind the window: auth is off from that +moment until the new operator latches `status.authEnabled`. Update consumers +(e.g. a Kamaji `DataStore` `basicAuth`) to point at the Secret. + +### Endpoint compatibility + +The etcd cluster ID is preserved (it's an adoption, not a restore) and the pods +keep their IPs, but the **client Service changes shape** because of a naming +collision you must plan for. + +The legacy operator names its **client** Service `` and its headless +Service `-headless`. The native operator names its **headless** Service +`` and its client Service `-client`. So the native headless +Service collides with the legacy client Service on the name ``. Since +a Service's `clusterIP` is immutable, the collision cannot be reconciled in +place — the tool deletes the legacy client Service and recreates `` as +a **headless** Service (step 7 above). + +What this means for consumers connecting to `..svc:2379`: + +- **The DNS name keeps resolving** and the server-cert SAN still covers it, so + clients that connect **by DNS name** (a normal etcd client with retries — a + Kamaji `DataStore`, for example) keep working across the cutover. The + recreate happens back-to-back, so the no-resolution window is minimal. +- **The ClusterIP VIP disappears.** `` is now headless (it returns + pod A-records directly instead of a single virtual IP), and it publishes + not-ready addresses. Any consumer that **depends on the ClusterIP/VIP + semantics** — a cached service IP, a NetworkPolicy keyed on the VIP, a + customized legacy client Service (`LoadBalancer`/`NodePort`/external-dns + annotations) — will break, and the customizations are lost. + +> **Prerequisite — repoint VIP-dependent consumers before cutover.** If any +> consumer relies on ClusterIP/VIP behaviour rather than plain DNS, point it at +> the operator's native **`-client`** Service (a regular ClusterIP +> Service the operator creates) before you run `--apply`. DNS-name consumers +> need no change. + +The legacy headless Service (`-headless`) is **not** managed by the +operator; it is owner-referenced to the adopted members and is garbage-collected +automatically once the last adopted member is replaced (see step 5). The adopted +pods remain reachable under it for their whole lifetime (their immutable +`spec.subdomain` points at it); rolled/replacement members come up under the +native `` headless Service instead. + +> **Prerequisite — externally-issued certs must carry both DNS domains during +> the mixed window.** Server/peer certs here are external (e.g. Cozystack +> cert-manager); the operator does not synthesize them. The operator's SAN +> contract is a wildcard pinned to the Service name (`*...svc`). +> During rollover, adopted members resolve under `-headless` and +> rolled members under ``, so the cert the pods mount must carry +> **both** `*.-headless..svc` and `*...svc` (plus the +> `.` FQDN forms) for the duration. Drop the legacy SAN once +> rollover completes. Coordinate this with whoever issues the certs before +> starting the new operator. + +### Final cleanup + +After `--apply` succeeds, **scale the new operator up** — it takes over the +adopted clusters without touching the pods: + +```sh +kubectl -n etcd-operator-system scale deploy etcd-operator-controller-manager --replicas=1 +``` + +The tool deletes the migrated legacy **CRs** but never the **CRDs**. Once no +`etcd.aenix.io` CRs remain (remember `EtcdBackupSchedule`s are left in +place), remove them: + +```sh +kubectl delete crd etcdclusters.etcd.aenix.io etcdbackups.etcd.aenix.io etcdbackupschedules.etcd.aenix.io +``` ## Snapshot CRD renamed: `EtcdBackup` → `EtcdSnapshot` @@ -65,9 +290,10 @@ The key mapping, using Cozystack's actual legacy values: ``` Note the value types: `quotaBackendBytes` and `snapshotCount` are integers, not -quoted strings. Any other key the legacy map accepted has no typed equivalent — -if you relied on one, file an issue; the flag gets a typed field, not a -pass-through. +quoted strings. `etcd-migrate` performs this mapping automatically. Any other +key the legacy map accepted has no typed equivalent — the tool drops it with a +warning; if you relied on one, file an issue: the flag gets a typed field, not +a pass-through. ## Authentication: root credentials are BYO and required diff --git a/go.mod b/go.mod index 43191623..3bbd56ce 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( k8s.io/apimachinery v0.33.11 k8s.io/client-go v0.33.11 sigs.k8s.io/controller-runtime v0.21.0 + sigs.k8s.io/yaml v1.4.0 ) require ( @@ -115,5 +116,4 @@ require ( sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/internal/migrate/adopt.go b/internal/migrate/adopt.go new file mode 100644 index 00000000..20c672dc --- /dev/null +++ b/internal/migrate/adopt.go @@ -0,0 +1,320 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "fmt" + "sort" + "strings" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/controllers" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +// LegacyDataDirSubPath is where the legacy operator kept etcd's data inside +// the member PVC: the volume was mounted at /var/run/etcd and etcd ran with +// --data-dir=/var/run/etcd/default.etcd (main:internal/controller/factory/ +// statefulset.go). Recorded on every adopted EtcdMember so a replacement Pod +// finds the existing data dir. +const LegacyDataDirSubPath = "default.etcd" + +// Legacy object-naming conventions, mirrored from the legacy operator's +// factory package (main:internal/controller/factory). The adopted cluster +// keeps using the legacy headless Service name — it is baked into the pods' +// immutable spec.subdomain and into the peer URLs persisted inside etcd. + +// LegacyHeadlessServiceName mirrors factory.GetHeadlessServiceName. +func LegacyHeadlessServiceName(name string, spec legacy.EtcdClusterSpec) string { + if spec.HeadlessServiceTemplate != nil && spec.HeadlessServiceTemplate.Name != "" { + return spec.HeadlessServiceTemplate.Name + } + return name + "-headless" +} + +// LegacyClientServiceName mirrors factory.GetServiceName. +func LegacyClientServiceName(name string, spec legacy.EtcdClusterSpec) string { + if spec.ServiceTemplate != nil && spec.ServiceTemplate.Name != "" { + return spec.ServiceTemplate.Name + } + return name +} + +// LegacyClusterToken mirrors the legacy cluster-state ConfigMap's +// ETCD_INITIAL_CLUSTER_TOKEN derivation ("-"). Recorded in +// status.clusterToken so future scale-ups keep the token the existing +// members were bootstrapped with. +func LegacyClusterToken(name, namespace string) string { + return name + "-" + namespace +} + +// LegacyStateConfigMapName mirrors factory.GetClusterStateConfigMapName. +func LegacyStateConfigMapName(name string) string { + return name + "-cluster-state" +} + +// MemberFact is one etcd member as reported by the live legacy cluster +// (MemberList over a port-forward). +type MemberFact struct { + // Name is the etcd member name. The legacy operator ran members with + // --name=$(POD_NAME), so this is also the Pod and EtcdMember name. + Name string + // IDHex is the etcd member ID in lowercase 16-digit hex — the format + // EtcdMemberStatus.MemberID uses. + IDHex string + // PeerURL is the member's first persisted peer URL, used verbatim in + // the adopted members' spec.initialCluster. + PeerURL string + // IsLearner blocks adoption: the legacy operator never created + // learners, so one indicates an intervention the tool cannot reason + // about. + IsLearner bool + // PodUID is the UID of the running Pod backing this member. + PodUID string +} + +// ClusterFacts is everything the inspection phase learned about one live +// legacy cluster. BuildAdoption is pure given these facts, so the dry-run +// renders exactly what --apply executes. +type ClusterFacts struct { + // ClusterIDHex is the etcd cluster ID in lowercase hex (the format + // EtcdClusterStatus.ClusterID uses). + ClusterIDHex string + // Members is the live member list. Sorted by name in BuildAdoption. + Members []MemberFact + // AuthEnabled reports etcd's live auth status; when true the apply + // phase must run `auth disable` before the new operator starts. + AuthEnabled bool +} + +// MemberAdoption is one existing pod+PVC pair becoming an EtcdMember. +type MemberAdoption struct { + // Member is the EtcdMember CR to create. Its name equals the existing + // Pod's name, so the member controller finds the Pod without creating + // anything. + Member *lll.EtcdMember + // Status is the prefilled status written via the status subresource + // right after Create: MemberID and IsVoter spare the controller a + // discovery round-trip, and IsVoter=true specifically must be there + // before the first reconcile (learner-filtering and the PDB's + // role=voter label both key off it). + Status lll.EtcdMemberStatus + // PVCName is the existing PVC ("data-") to label and re-own. + PVCName string +} + +// AdoptionPlan is the in-place adoption payload for one cluster: what to +// create under the new API, what to re-own, and what legacy machinery to +// dismantle (leaving the pods untouched). +type AdoptionPlan struct { + // ClusterStatus is written to the new EtcdCluster's status subresource + // right after Create. Prefilling ClusterID + ClusterToken + Observed + // is what keeps the cluster controller's bootstrap branch (which would + // create a seed pod with --initial-cluster-state=new) from ever + // firing. + ClusterStatus lll.EtcdClusterStatus + // Members lists the pod adoptions, sorted by member name. + Members []MemberAdoption + // StatefulSetName is the legacy StatefulSet to delete with Orphan + // propagation BEFORE pod owner references are rewritten — while it + // exists, its controller would fight for the pods. + StatefulSetName string + // ConfigMapName is the legacy cluster-state ConfigMap to delete. + ConfigMapName string + // PDBName is the legacy PodDisruptionBudget to delete; the new + // operator emits its own under the same name afterwards. + PDBName string + // HeadlessServiceName is the legacy headless Service (e.g. + // "-headless"). The apply phase owner-references it to the + // adopted EtcdMembers so Kubernetes GC removes it exactly when the last + // adopted member rolls away — no operator code manages it. + HeadlessServiceName string + // ClientServiceName is the legacy client Service (e.g. ""). + // Its name collides with the operator's native headless Service, so the + // apply phase deletes it and immediately recreates a headless Service of + // the same name (owned by the new EtcdCluster). The DNS name keeps + // resolving for consumers; see docs/migration.md for the + // ClusterIP→headless caveats. + ClientServiceName string +} + +// BuildAdoption translates one legacy EtcdCluster into an in-place adoption +// plan: the spec translation of TranslateCluster, plus the new-API member +// CRs mirroring the live pods, the status prefills, and the legacy-object +// bookkeeping. Pure given the facts — no cluster access. +func BuildAdoption(name, namespace string, spec legacy.EtcdClusterSpec, facts ClusterFacts, opts TranslateOptions) ResourcePlan { + plan := TranslateCluster(name, namespace, spec, opts) + if plan.Action == ActionError { + return plan + } + plan.Action = ActionAdopt + cluster := plan.Target.(*lll.EtcdCluster) + + // The legacy headless Service name is the keystone: stamping it as the + // AnnHeadlessServiceName annotation on each adopted member makes every + // URL the new operator constructs for that member match the DNS the + // adopted pod actually has (immutable spec.subdomain) and the peer URL + // etcd has persisted. The operator never stamps this annotation on + // members it creates, so replacements come up under the cluster's own + // (native) headless Service and the override self-wipes as the cluster + // rolls. It lives on the members, not the cluster spec. + legacyHeadless := LegacyHeadlessServiceName(name, spec) + + members := append([]MemberFact(nil), facts.Members...) + sort.Slice(members, func(i, j int) bool { return members[i].Name < members[j].Name }) + + for _, m := range members { + if m.IsLearner { + plan.Errors = append(plan.Errors, fmt.Sprintf( + "etcd member %q is a learner; the legacy operator never creates learners, refusing to adopt a cluster in an unrecognized state", m.Name)) + } + if m.PodUID == "" { + plan.Errors = append(plan.Errors, fmt.Sprintf( + "etcd member %q has no running Pod of the same name; every member must be backed by a Ready pod to adopt", m.Name)) + } + } + + // Replicas follow the LIVE member count. A legacy spec disagreeing with + // reality (mid-scale crash, manual edits) is surfaced, not silently + // trusted — adopting with spec.replicas != len(members) would make the + // new operator immediately start scaling a cluster it just took over. + replicas := int32(len(members)) + if spec.Replicas != nil && *spec.Replicas != replicas { + plan.Warnings = append(plan.Warnings, fmt.Sprintf( + "legacy spec.replicas=%d disagrees with the live member count %d; adopting with replicas=%d (the live state)", + *spec.Replicas, replicas, replicas)) + } + cluster.Spec.Replicas = &replicas + + if len(plan.Errors) > 0 { + plan.Action = ActionError + plan.Target = nil + plan.Extras = nil + plan.DeleteRef = nil + return plan + } + + plan.Adoption = &AdoptionPlan{} + + // --initial-cluster for adopted members is built from etcd's OWN view + // (the persisted peer URLs), not reconstructed from conventions. etcd + // ignores the flag when the data dir exists, but the member controller + // refuses to start a pod with an empty value — and an empty value also + // reads as "pending scale-up" to the cluster controller, which would + // run MemberAddAsLearner against the live cluster. + parts := make([]string, 0, len(members)) + for _, m := range members { + parts = append(parts, m.Name+"="+m.PeerURL) + } + initialCluster := strings.Join(parts, ",") + + token := LegacyClusterToken(name, namespace) + memberTLS := deriveAdoptedMemberTLS(cluster) + + for _, m := range members { + em := &lll.EtcdMember{ + TypeMeta: metav1.TypeMeta{APIVersion: lll.GroupVersion.String(), Kind: "EtcdMember"}, + ObjectMeta: metav1.ObjectMeta{ + Name: m.Name, + Namespace: namespace, + Labels: controllers.MemberLabels(name, m.Name), + // Reserved annotations the member controller interprets: + // the legacy headless Service name drives this member's DNS + // identity, and the legacy data-dir subpath relocates + // --data-dir so the replacement Pod finds the existing data. + // Only adopted members carry these; the operator never + // stamps them, so the cluster self-wipes back to native as + // members roll. + Annotations: map[string]string{ + controllers.AnnHeadlessServiceName: legacyHeadless, + controllers.AnnDataDirSubPath: LegacyDataDirSubPath, + }, + }, + Spec: lll.EtcdMemberSpec{ + ClusterName: name, + Version: cluster.Spec.Version, + Storage: cluster.Spec.Storage, + Resources: cluster.Spec.Resources, + AdditionalMetadata: cluster.Spec.AdditionalMetadata, + Affinity: cluster.Spec.Affinity, + TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, + Options: cluster.Spec.Options, + Bootstrap: false, + InitialCluster: initialCluster, + ClusterToken: token, + Replicas: 1, + TLS: memberTLS, + }, + } + plan.Adoption.Members = append(plan.Adoption.Members, MemberAdoption{ + Member: em, + Status: lll.EtcdMemberStatus{ + MemberID: m.IDHex, + PodName: m.Name, + PodUID: m.PodUID, + PVCName: "data-" + m.Name, + IsVoter: true, + }, + PVCName: "data-" + m.Name, + }) + } + + plan.Adoption.ClusterStatus = lll.EtcdClusterStatus{ + ClusterID: facts.ClusterIDHex, + ClusterToken: token, + Observed: &lll.ObservedClusterSpec{ + Replicas: replicas, + Version: cluster.Spec.Version, + Storage: cluster.Spec.Storage, + Resources: cluster.Spec.Resources, + Affinity: cluster.Spec.Affinity, + TopologySpreadConstraints: cluster.Spec.TopologySpreadConstraints, + AdditionalMetadata: cluster.Spec.AdditionalMetadata, + Options: cluster.Spec.Options, + }, + } + plan.Adoption.StatefulSetName = name + plan.Adoption.ConfigMapName = LegacyStateConfigMapName(name) + plan.Adoption.PDBName = name + plan.Adoption.HeadlessServiceName = legacyHeadless + plan.Adoption.ClientServiceName = LegacyClientServiceName(name, spec) + + plan.Notes = append(plan.Notes, + "in-place adoption: the etcd pods and their PVCs stay exactly as they are; only ownership, labels and member annotations change", + fmt.Sprintf("adopted members carry annotation %s=%q so the operator's URL convention matches the adopted pods' DNS; it self-wipes as members roll", controllers.AnnHeadlessServiceName, legacyHeadless), + fmt.Sprintf("the legacy headless Service %q is owner-referenced to the adopted members and is garbage-collected automatically once the last adopted member is replaced", legacyHeadless), + fmt.Sprintf("the legacy client Service %q is replaced in place by the operator's native headless Service of the same name (consumers using its DNS name keep working; see docs/migration.md for the ClusterIP→headless caveats)", LegacyClientServiceName(name, spec))) + + return plan +} + +// deriveAdoptedMemberTLS mirrors the controller's cluster→member TLS +// projection for the BYO-secret mode (the only mode a legacy translation +// produces): server secret ref + the "operator presents a client cert" bit, +// peer secret ref. +func deriveAdoptedMemberTLS(cluster *lll.EtcdCluster) *lll.EtcdMemberTLS { + tls := cluster.Spec.TLS + if tls == nil || (tls.Client == nil && tls.Peer == nil) { + return nil + } + out := &lll.EtcdMemberTLS{} + if tls.Client != nil && tls.Client.ServerSecretRef != nil { + out.ClientServerSecretRef = &corev1.LocalObjectReference{Name: tls.Client.ServerSecretRef.Name} + out.ClientMTLS = tls.Client.OperatorClientSecretRef != nil + } + if tls.Peer != nil && tls.Peer.SecretRef != nil { + out.PeerSecretRef = &corev1.LocalObjectReference{Name: tls.Peer.SecretRef.Name} + } + return out +} diff --git a/internal/migrate/adopt_test.go b/internal/migrate/adopt_test.go new file mode 100644 index 00000000..6c943087 --- /dev/null +++ b/internal/migrate/adopt_test.go @@ -0,0 +1,211 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "fmt" + "strings" + "testing" + + corev1 "k8s.io/api/core/v1" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/controllers" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +func adoptSpecFixture(t *testing.T) legacy.EtcdClusterSpec { + t.Helper() + three := int32(3) + return legacy.EtcdClusterSpec{ + Replicas: &three, + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "10Gi")}}}, + }}, + } +} + +func adoptFactsFixture(n int) ClusterFacts { + f := ClusterFacts{ClusterIDHex: "00000000deadbeef"} + for i := 0; i < n; i++ { + name := fmt.Sprintf("etcd-%d", i) + f.Members = append(f.Members, MemberFact{ + Name: name, + IDHex: fmt.Sprintf("%016x", 0xa00+i), + PeerURL: fmt.Sprintf("https://%s.etcd-headless.ns.svc:2380", name), + PodUID: "uid-" + name, + }) + } + return f +} + +// TestBuildAdoption_HappyPath pins the adoption contract end to end: the +// headless override matching the pods' immutable DNS, the member CRs +// mirroring the live pods (data-dir subPath, persisted-URL initialCluster, +// legacy token), and the status prefills that keep bootstrap from firing. +func TestBuildAdoption_HappyPath(t *testing.T) { + plan := BuildAdoption("etcd", "ns", adoptSpecFixture(t), adoptFactsFixture(3), TranslateOptions{}) + if plan.Action != ActionAdopt { + t.Fatalf("Action = %s (errors %v)", plan.Action, plan.Errors) + } + cluster := plan.Target.(*lll.EtcdCluster) + if cluster.Spec.Replicas == nil || *cluster.Spec.Replicas != 3 { + t.Errorf("replicas = %v, want live count 3", cluster.Spec.Replicas) + } + + a := plan.Adoption + if a == nil { + t.Fatal("Adoption payload missing") + } + if a.ClusterStatus.ClusterID != "00000000deadbeef" || a.ClusterStatus.ClusterToken != "etcd-ns" { + t.Errorf("cluster status prefill = %+v", a.ClusterStatus) + } + if a.ClusterStatus.Observed == nil || a.ClusterStatus.Observed.Replicas != 3 { + t.Errorf("observed prefill = %+v", a.ClusterStatus.Observed) + } + if a.StatefulSetName != "etcd" || a.ConfigMapName != "etcd-cluster-state" || a.PDBName != "etcd" { + t.Errorf("legacy object names = %q/%q/%q", a.StatefulSetName, a.ConfigMapName, a.PDBName) + } + if a.HeadlessServiceName != "etcd-headless" || a.ClientServiceName != "etcd" { + t.Errorf("service names = %q/%q", a.HeadlessServiceName, a.ClientServiceName) + } + + if len(a.Members) != 3 { + t.Fatalf("members = %d, want 3", len(a.Members)) + } + wantInitial := "etcd-0=https://etcd-0.etcd-headless.ns.svc:2380," + + "etcd-1=https://etcd-1.etcd-headless.ns.svc:2380," + + "etcd-2=https://etcd-2.etcd-headless.ns.svc:2380" + for i, ma := range a.Members { + name := fmt.Sprintf("etcd-%d", i) + if ma.Member.Name != name { + t.Errorf("member[%d] name = %q (must equal the pod name)", i, ma.Member.Name) + } + if ma.Member.Annotations[controllers.AnnDataDirSubPath] != LegacyDataDirSubPath { + t.Errorf("%s data-dir-subpath annotation = %q, want %q", name, ma.Member.Annotations[controllers.AnnDataDirSubPath], LegacyDataDirSubPath) + } + if ma.Member.Annotations[controllers.AnnHeadlessServiceName] != "etcd-headless" { + t.Errorf("%s headless-service-name annotation = %q, want legacy convention", name, ma.Member.Annotations[controllers.AnnHeadlessServiceName]) + } + if ma.Member.Spec.InitialCluster != wantInitial { + t.Errorf("%s initialCluster = %q\nwant %q", name, ma.Member.Spec.InitialCluster, wantInitial) + } + if ma.Member.Spec.ClusterToken != "etcd-ns" { + t.Errorf("%s clusterToken = %q, want the legacy token", name, ma.Member.Spec.ClusterToken) + } + if ma.Member.Spec.Bootstrap { + t.Errorf("%s must not be a bootstrap seed", name) + } + if !ma.Status.IsVoter || ma.Status.MemberID == "" || ma.Status.PodUID != "uid-"+name { + t.Errorf("%s status prefill = %+v", name, ma.Status) + } + if ma.PVCName != "data-"+name { + t.Errorf("%s pvc = %q", name, ma.PVCName) + } + } +} + +// TestBuildAdoption_MirrorsTLSOntoMembers: the member-side TLS view must +// match what the controller's own deriveMemberTLS would produce, or the +// first replacement pod comes up plaintext against a TLS cluster. +func TestBuildAdoption_MirrorsTLSOntoMembers(t *testing.T) { + spec := adoptSpecFixture(t) + spec.Security = &legacy.SecuritySpec{TLS: legacy.TLSSpec{ + ServerSecret: "srv", ClientSecret: "op-client", PeerSecret: "peer", + }} + plan := BuildAdoption("etcd", "ns", spec, adoptFactsFixture(1), TranslateOptions{}) + if plan.Action != ActionAdopt { + t.Fatalf("Action = %s (errors %v)", plan.Action, plan.Errors) + } + mtls := plan.Adoption.Members[0].Member.Spec.TLS + if mtls == nil || mtls.ClientServerSecretRef == nil || mtls.ClientServerSecretRef.Name != "srv" { + t.Fatalf("member TLS = %+v, want server secret mirrored", mtls) + } + if !mtls.ClientMTLS { + t.Error("operator client secret set ⇒ member must demand client certs (ClientMTLS)") + } + if mtls.PeerSecretRef == nil || mtls.PeerSecretRef.Name != "peer" { + t.Errorf("peer secret not mirrored: %+v", mtls.PeerSecretRef) + } +} + +// TestBuildAdoption_Refusals pins the states the tool must not touch. +func TestBuildAdoption_Refusals(t *testing.T) { + t.Run("learner member", func(t *testing.T) { + facts := adoptFactsFixture(2) + facts.Members[1].IsLearner = true + plan := BuildAdoption("etcd", "ns", adoptSpecFixture(t), facts, TranslateOptions{}) + if plan.Action != ActionError { + t.Fatalf("Action = %s, want Error for a learner", plan.Action) + } + }) + + t.Run("member without running pod", func(t *testing.T) { + facts := adoptFactsFixture(2) + facts.Members[0].PodUID = "" + plan := BuildAdoption("etcd", "ns", adoptSpecFixture(t), facts, TranslateOptions{}) + if plan.Action != ActionError { + t.Fatalf("Action = %s, want Error for a podless member", plan.Action) + } + }) + + t.Run("emptyDir storage", func(t *testing.T) { + spec := adoptSpecFixture(t) + spec.Storage = legacy.StorageSpec{EmptyDir: &corev1.EmptyDirVolumeSource{}} + plan := BuildAdoption("etcd", "ns", spec, adoptFactsFixture(1), TranslateOptions{}) + if plan.Action != ActionError { + t.Fatalf("Action = %s, want Error for emptyDir", plan.Action) + } + }) +} + +// TestBuildAdoption_ReplicasFollowLiveState: a legacy spec disagreeing with +// the live member count is adopted at the LIVE count, with a warning. +func TestBuildAdoption_ReplicasFollowLiveState(t *testing.T) { + spec := adoptSpecFixture(t) // says replicas=3 + plan := BuildAdoption("etcd", "ns", spec, adoptFactsFixture(2), TranslateOptions{}) + if plan.Action != ActionAdopt { + t.Fatalf("Action = %s (errors %v)", plan.Action, plan.Errors) + } + cluster := plan.Target.(*lll.EtcdCluster) + if cluster.Spec.Replicas == nil || *cluster.Spec.Replicas != 2 { + t.Errorf("replicas = %v, want live count 2", cluster.Spec.Replicas) + } + found := false + for _, w := range plan.Warnings { + if strings.Contains(w, "disagrees with the live member count") { + found = true + } + } + if !found { + t.Errorf("expected replicas-mismatch warning, got %v", plan.Warnings) + } +} + +// TestBuildAdoption_HeadlessServiceTemplateOverride: a legacy cluster that +// renamed its headless Service via the template keeps that exact name. +func TestBuildAdoption_HeadlessServiceTemplateOverride(t *testing.T) { + spec := adoptSpecFixture(t) + spec.HeadlessServiceTemplate = &legacy.EmbeddedMetadataResource{ + EmbeddedObjectMetadata: legacy.EmbeddedObjectMetadata{Name: "custom-peers"}, + } + plan := BuildAdoption("etcd", "ns", spec, adoptFactsFixture(1), TranslateOptions{}) + if plan.Action != ActionAdopt { + t.Fatalf("Action = %s (errors %v)", plan.Action, plan.Errors) + } + if got := plan.Adoption.Members[0].Member.Annotations[controllers.AnnHeadlessServiceName]; got != "custom-peers" { + t.Errorf("member headless-service-name annotation = %q, want the template override", got) + } + if plan.Adoption.HeadlessServiceName != "custom-peers" { + t.Errorf("legacy headless Service to GC = %q, want the template override", plan.Adoption.HeadlessServiceName) + } +} diff --git a/internal/migrate/cronjob.go b/internal/migrate/cronjob.go new file mode 100644 index 00000000..188b9e5f --- /dev/null +++ b/internal/migrate/cronjob.go @@ -0,0 +1,137 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "fmt" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +// kubectlImage runs the generated CronJob's container. Pinned rather than +// :latest so the printed manifest is reproducible; the user reviews the +// manifest anyway and can swap in an in-house image. +const kubectlImage = "bitnami/kubectl:1.33" + +// TranslateSchedule converts one legacy EtcdBackupSchedule into a PRINTED +// CronJob manifest (plus the ServiceAccount/Role/RoleBinding it needs). The +// new API has no schedule CRD by design — recurring snapshots are driven +// from outside — so nothing is applied and the legacy CR is not deleted; the +// user reviews, adjusts, and applies the manifests themselves. +func TranslateSchedule(name, namespace string, spec legacy.EtcdBackupScheduleSpec) ResourcePlan { + plan := ResourcePlan{ + SourceKind: "EtcdBackupSchedule", + SourceName: name, + Namespace: namespace, + Action: ActionPrint, + } + + dest, err := translateLocation(spec.Destination) + if err != nil { + plan.Action = ActionError + plan.Errors = append(plan.Errors, "spec.destination: "+err.Error()) + return plan + } + + // The EtcdSnapshot each tick creates. generateName + `kubectl create` + // (not apply) because snapshots are immutable one-shots — every tick + // must produce a fresh object. + snapshot := &lll.EtcdSnapshot{ + TypeMeta: metav1.TypeMeta{APIVersion: lll.GroupVersion.String(), Kind: "EtcdSnapshot"}, + ObjectMeta: metav1.ObjectMeta{GenerateName: name + "-", Namespace: namespace}, + Spec: lll.EtcdSnapshotSpec{ + ClusterRef: spec.ClusterRef, + Destination: dest, + }, + } + snapshotYAML, yErr := yaml.Marshal(snapshot) + if yErr != nil { + plan.Action = ActionError + plan.Errors = append(plan.Errors, "render EtcdSnapshot template: "+yErr.Error()) + return plan + } + + saName := name + "-snapshotter" + script := fmt.Sprintf("kubectl create -f - <<'EOF'\n%sEOF\n", snapshotYAML) + + plan.Target = &batchv1.CronJob{ + TypeMeta: metav1.TypeMeta{APIVersion: "batch/v1", Kind: "CronJob"}, + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Spec: batchv1.CronJobSpec{ + Schedule: spec.Schedule, + SuccessfulJobsHistoryLimit: spec.SuccessfulJobsHistoryLimit, + FailedJobsHistoryLimit: spec.FailedJobsHistoryLimit, + JobTemplate: batchv1.JobTemplateSpec{ + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + ServiceAccountName: saName, + RestartPolicy: corev1.RestartPolicyNever, + Containers: []corev1.Container{{ + Name: "create-snapshot", + Image: kubectlImage, + Command: []string{"/bin/sh", "-ec", script}, + }}, + }, + }, + }, + }, + }, + } + plan.Extras = scheduleRBAC(saName, namespace) + + plan.Warnings = append(plan.Warnings, + "EtcdBackupSchedule has no v1alpha2 CRD: the CronJob manifest above is PRINTED ONLY — review it (image, schedule, RBAC) and apply it yourself", + "the legacy EtcdBackupSchedule CR is left in place; delete it manually once the CronJob replacement is applied") + return plan +} + +// scheduleRBAC builds the SA/Role/RoleBinding the CronJob needs to create +// EtcdSnapshot objects in its namespace. +func scheduleRBAC(saName, namespace string) []client.Object { + return []client.Object{ + &corev1.ServiceAccount{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "ServiceAccount"}, + ObjectMeta: metav1.ObjectMeta{Name: saName, Namespace: namespace}, + }, + &rbacv1.Role{ + TypeMeta: metav1.TypeMeta{APIVersion: "rbac.authorization.k8s.io/v1", Kind: "Role"}, + ObjectMeta: metav1.ObjectMeta{Name: saName, Namespace: namespace}, + Rules: []rbacv1.PolicyRule{{ + APIGroups: []string{lll.GroupVersion.Group}, + Resources: []string{"etcdsnapshots"}, + Verbs: []string{"create"}, + }}, + }, + &rbacv1.RoleBinding{ + TypeMeta: metav1.TypeMeta{APIVersion: "rbac.authorization.k8s.io/v1", Kind: "RoleBinding"}, + ObjectMeta: metav1.ObjectMeta{Name: saName, Namespace: namespace}, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: saName, + }, + Subjects: []rbacv1.Subject{{ + Kind: rbacv1.ServiceAccountKind, + Name: saName, + Namespace: namespace, + }}, + }, + } +} diff --git a/internal/migrate/cronjob_test.go b/internal/migrate/cronjob_test.go new file mode 100644 index 00000000..85542875 --- /dev/null +++ b/internal/migrate/cronjob_test.go @@ -0,0 +1,109 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "strings" + "testing" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +// TestTranslateSchedule pins the schedule → CronJob generation: print-only +// action, schedule/limits mapping, the embedded `kubectl create` of an +// EtcdSnapshot with generateName, and the companion RBAC objects. +func TestTranslateSchedule(t *testing.T) { + plan := TranslateSchedule("nightly", "ns", legacy.EtcdBackupScheduleSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "my-etcd"}, + Schedule: "0 2 * * *", + SuccessfulJobsHistoryLimit: ptrInt32(7), + FailedJobsHistoryLimit: ptrInt32(2), + Destination: legacy.BackupDestination{PVC: &legacy.PVCBackupDestination{ + ClaimName: "backups", SubPath: "etcd", + }}, + }) + + if plan.Action != ActionPrint { + t.Fatalf("Action = %s, want Print (schedules are never applied)", plan.Action) + } + if plan.DeleteRef != nil { + t.Fatal("schedules must not be deleted: the replacement is not applied by the tool") + } + + cj, ok := plan.Target.(*batchv1.CronJob) + if !ok { + t.Fatalf("Target is %T, want *CronJob", plan.Target) + } + if cj.Spec.Schedule != "0 2 * * *" { + t.Errorf("schedule = %q", cj.Spec.Schedule) + } + if cj.Spec.SuccessfulJobsHistoryLimit == nil || *cj.Spec.SuccessfulJobsHistoryLimit != 7 || + cj.Spec.FailedJobsHistoryLimit == nil || *cj.Spec.FailedJobsHistoryLimit != 2 { + t.Errorf("history limits = %v/%v", cj.Spec.SuccessfulJobsHistoryLimit, cj.Spec.FailedJobsHistoryLimit) + } + + pod := cj.Spec.JobTemplate.Spec.Template.Spec + if pod.ServiceAccountName != "nightly-snapshotter" { + t.Errorf("serviceAccountName = %q", pod.ServiceAccountName) + } + if len(pod.Containers) != 1 { + t.Fatalf("containers = %d", len(pod.Containers)) + } + script := strings.Join(pod.Containers[0].Command, "\n") + for _, want := range []string{ + "kubectl create", + "kind: EtcdSnapshot", + "generateName: nightly-", + "name: my-etcd", // clusterRef + "claimName: backups", // destination mapped through + } { + if !strings.Contains(script, want) { + t.Errorf("CronJob script missing %q:\n%s", want, script) + } + } + + // RBAC companions: SA + Role(create etcdsnapshots) + RoleBinding. + if len(plan.Extras) != 3 { + t.Fatalf("extras = %d, want SA+Role+RoleBinding", len(plan.Extras)) + } + var role *rbacv1.Role + for _, e := range plan.Extras { + if r, ok := e.(*rbacv1.Role); ok { + role = r + } + } + if role == nil { + t.Fatal("no Role among extras") + } + if len(role.Rules) != 1 || role.Rules[0].Resources[0] != "etcdsnapshots" || role.Rules[0].Verbs[0] != "create" { + t.Errorf("Role rules = %+v", role.Rules) + } + + if !hasWarning(plan.Warnings, "PRINTED ONLY") { + t.Errorf("expected review-and-apply warning, got %v", plan.Warnings) + } +} + +// TestTranslateSchedule_MalformedDestination: an invalid destination is a +// hard error, mirroring the backup translation. +func TestTranslateSchedule_MalformedDestination(t *testing.T) { + plan := TranslateSchedule("s", "ns", legacy.EtcdBackupScheduleSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "c"}, + Schedule: "@hourly", + }) + if plan.Action != ActionError { + t.Fatalf("Action = %s, want Error", plan.Action) + } +} diff --git a/internal/migrate/legacy/types.go b/internal/migrate/legacy/types.go new file mode 100644 index 00000000..9a15bd77 --- /dev/null +++ b/internal/migrate/legacy/types.go @@ -0,0 +1,168 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +// Package legacy holds trimmed copies of the legacy etcd.aenix.io/v1alpha1 +// spec types, as defined on this repository's `main` branch. The migration +// tool decodes legacy CRs (fetched as unstructured) into these structs with +// runtime.DefaultUnstructuredConverter — there is no scheme registration and +// no deepcopy generation, because the legacy API is consumed read-only and +// never written back. Status types are intentionally omitted: the legacy +// status carries only conditions, none of which inform the translation. +// +// Keep the json tags byte-for-byte identical to the originals +// (main:api/v1alpha1/*_types.go); the converter matches on them. +package legacy + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +// GroupVersion identifiers for the legacy API, used by the discovery layer. +const ( + Group = "etcd.aenix.io" + Version = "v1alpha1" +) + +// DefaultEtcdImage is the image the legacy operator ran when podTemplate did +// not override it; its tag is the version-extraction fallback. +const DefaultEtcdImage = "quay.io/coreos/etcd:v3.5.12" + +// EtcdClusterSpec mirrors the legacy EtcdCluster spec. +type EtcdClusterSpec struct { + Replicas *int32 `json:"replicas,omitempty"` + Options map[string]string `json:"options,omitempty"` + PodTemplate PodTemplate `json:"podTemplate,omitempty"` + ServiceTemplate *EmbeddedService `json:"serviceTemplate,omitempty"` + HeadlessServiceTemplate *EmbeddedMetadataResource `json:"headlessServiceTemplate,omitempty"` + PodDisruptionBudgetTemplate *EmbeddedPodDisruptionBudget `json:"podDisruptionBudgetTemplate,omitempty"` + Storage StorageSpec `json:"storage"` + Security *SecuritySpec `json:"security,omitempty"` + Bootstrap *BootstrapSpec `json:"bootstrap,omitempty"` +} + +// BootstrapSpec mirrors the legacy restore-at-creation config. +type BootstrapSpec struct { + Restore *RestoreSpec `json:"restore,omitempty"` +} + +// RestoreSpec mirrors the legacy restore source. +type RestoreSpec struct { + Source BackupDestination `json:"source"` +} + +// EmbeddedObjectMetadata mirrors the legacy embedded metadata subset. +type EmbeddedObjectMetadata struct { + Name string `json:"name,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` +} + +// PodTemplate mirrors the legacy pod-template override. +type PodTemplate struct { + EmbeddedObjectMetadata `json:"metadata,omitempty"` + Spec corev1.PodSpec `json:"spec,omitempty"` +} + +// StorageSpec mirrors the legacy storage config: emptyDir takes precedence +// over volumeClaimTemplate when both are set. +type StorageSpec struct { + EmptyDir *corev1.EmptyDirVolumeSource `json:"emptyDir,omitempty"` + VolumeClaimTemplate EmbeddedPersistentVolumeClaim `json:"volumeClaimTemplate,omitempty"` +} + +// SecuritySpec mirrors the legacy security config. +type SecuritySpec struct { + TLS TLSSpec `json:"tls,omitempty"` + EnableAuth bool `json:"enableAuth,omitempty"` +} + +// TLSSpec mirrors the legacy six-secret TLS layout. All fields are secret +// names in the cluster's namespace. +type TLSSpec struct { + PeerTrustedCASecret string `json:"peerTrustedCASecret,omitempty"` + PeerSecret string `json:"peerSecret,omitempty"` + ServerTrustedCASecret string `json:"serverTrustedCASecret,omitempty"` + ServerSecret string `json:"serverSecret,omitempty"` + ClientTrustedCASecret string `json:"clientTrustedCASecret,omitempty"` + ClientSecret string `json:"clientSecret,omitempty"` +} + +// EmbeddedPersistentVolumeClaim mirrors the legacy embedded PVC template. +// (Status is dropped: read-only and irrelevant to translation.) +type EmbeddedPersistentVolumeClaim struct { + metav1.TypeMeta `json:",inline"` + EmbeddedObjectMetadata `json:"metadata,omitempty"` + Spec corev1.PersistentVolumeClaimSpec `json:"spec,omitempty"` +} + +// EmbeddedPodDisruptionBudget mirrors the legacy PDB template. The inner +// spec is irrelevant to translation (the new operator owns the PDB), so it +// is kept opaque — its mere presence triggers a warning. +type EmbeddedPodDisruptionBudget struct { + EmbeddedObjectMetadata `json:"metadata,omitempty"` + Spec PodDisruptionBudgetSpec `json:"spec"` +} + +// PodDisruptionBudgetSpec mirrors the legacy PDB knobs. Translation only +// reports the template's presence, never these values. +type PodDisruptionBudgetSpec struct { + MinAvailable *intstr.IntOrString `json:"minAvailable,omitempty"` + MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"` +} + +// EmbeddedService mirrors the legacy client-service template. +type EmbeddedService struct { + EmbeddedObjectMetadata `json:"metadata,omitempty"` + Spec corev1.ServiceSpec `json:"spec,omitempty"` +} + +// EmbeddedMetadataResource mirrors the legacy headless-service template. +type EmbeddedMetadataResource struct { + EmbeddedObjectMetadata `json:"metadata,omitempty"` +} + +// EtcdBackupSpec mirrors the legacy one-shot backup spec. +type EtcdBackupSpec struct { + ClusterRef corev1.LocalObjectReference `json:"clusterRef"` + Destination BackupDestination `json:"destination"` +} + +// BackupDestination mirrors the legacy S3-or-PVC destination union. +type BackupDestination struct { + S3 *S3BackupDestination `json:"s3,omitempty"` + PVC *PVCBackupDestination `json:"pvc,omitempty"` +} + +// S3BackupDestination mirrors the legacy S3 destination. +type S3BackupDestination struct { + Endpoint string `json:"endpoint"` + Bucket string `json:"bucket"` + Key string `json:"key,omitempty"` + CredentialsSecretRef corev1.LocalObjectReference `json:"credentialsSecretRef"` + Region string `json:"region,omitempty"` + ForcePathStyle bool `json:"forcePathStyle,omitempty"` +} + +// PVCBackupDestination mirrors the legacy PVC destination. +type PVCBackupDestination struct { + ClaimName string `json:"claimName"` + SubPath string `json:"subPath,omitempty"` +} + +// EtcdBackupScheduleSpec mirrors the legacy recurring-backup spec. +type EtcdBackupScheduleSpec struct { + ClusterRef corev1.LocalObjectReference `json:"clusterRef"` + Schedule string `json:"schedule"` + Destination BackupDestination `json:"destination"` + SuccessfulJobsHistoryLimit *int32 `json:"successfulJobsHistoryLimit,omitempty"` + FailedJobsHistoryLimit *int32 `json:"failedJobsHistoryLimit,omitempty"` +} diff --git a/internal/migrate/plan.go b/internal/migrate/plan.go new file mode 100644 index 00000000..38e2b9d6 --- /dev/null +++ b/internal/migrate/plan.go @@ -0,0 +1,89 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +// Package migrate translates legacy etcd.aenix.io/v1alpha1 resources into +// their etcd-operator.cozystack.io/v1alpha2 equivalents. The translation +// layer is pure — it never touches a cluster — so the orchestration in +// cmd/etcd-migrate can render the same plan in dry-run and apply modes. +package migrate + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Action says what the tool intends to do for one planned resource. +type Action string + +const ( + // ActionAdopt performs an in-place adoption of a live legacy cluster: + // create the new-API CRs with prefilled status, re-own the existing + // pods/PVCs/Services, dismantle the legacy CR + StatefulSet with + // Orphan propagation. The etcd pods are never touched. + ActionAdopt Action = "Adopt" + // ActionCreate creates a new-API object (and afterwards deletes the + // legacy source named by DeleteRef, when set). + ActionCreate Action = "Create" + // ActionPrint renders a manifest for the user to review and apply + // manually (EtcdBackupSchedule → CronJob). Nothing is written and the + // legacy source is NOT deleted. + ActionPrint Action = "Print" + // ActionSkip means the target already exists; the plan is re-runnable. + ActionSkip Action = "Skip" + // ActionError means the source cannot be migrated; nothing is created + // or deleted for it. + ActionError Action = "Error" +) + +// ObjectRef names a legacy object to delete after its replacement exists. +type ObjectRef struct { + GVR schema.GroupVersionResource + Namespace string + Name string +} + +// ResourcePlan is the per-resource unit the tool renders or applies. +type ResourcePlan struct { + // SourceKind/SourceName/Namespace identify the legacy object. + SourceKind string + SourceName string + Namespace string + + Action Action + + // Target is the new-API object to create (EtcdCluster, EtcdSnapshot, + // generated Secret) or to print (CronJob + RBAC). nil on pure errors. + Target client.Object + // Extras are companion objects sharing Target's fate: created right + // before Target on ActionCreate (e.g. a generated root-credentials + // Secret), printed alongside it on ActionPrint (e.g. the CronJob's + // ServiceAccount/Role/RoleBinding). + Extras []client.Object + + // DeleteRef names the legacy object to delete once Target exists. + // nil for ActionPrint/ActionError (and for ActionSkip deletes still + // proceed — the target exists, the source is leftover). For + // ActionAdopt the delete uses Orphan propagation: the children + // (StatefulSet, Services) must survive the legacy CR. + DeleteRef *ObjectRef + + // Adoption carries the in-place payload for ActionAdopt cluster plans: + // member CRs, status prefills and the legacy objects to re-own or + // dismantle. nil for every other kind/action. + Adoption *AdoptionPlan + + // Warnings list legacy settings that do not carry over (dropped fields, + // manual follow-ups like merging CA bundles into secrets). + Warnings []string + // Errors explain why Action == ActionError. + Errors []string + // Notes are informational (endpoint compatibility, auth follow-ups). + Notes []string +} diff --git a/internal/migrate/snapshotjob.go b/internal/migrate/snapshotjob.go new file mode 100644 index 00000000..11613a95 --- /dev/null +++ b/internal/migrate/snapshotjob.go @@ -0,0 +1,201 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "fmt" + + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +// Mount paths inside the snapshot Job, mirroring the operator's own +// snapshot-job layout (controllers/snapshot_job.go). +const ( + snapshotCAMountPath = "/etc/etcd/pki/ca" + snapshotClientMountPath = "/etc/etcd/pki/client" + snapshotPVCMountPath = "/snapshot/data" + + snapshotJobTTLSeconds int32 = 600 + snapshotJobBackoffLim int32 = 3 + snapshotJobActiveDeadlineSeconds int64 = 1800 +) + +// SnapshotName is the SNAPSHOT_NAME the migration Job stores the artifact +// under. Namespaced so clusters sharing one S3 bucket/prefix don't collide. +func SnapshotName(namespace, cluster string) string { + return namespace + "-" + cluster + "-migration" +} + +// SnapshotJobName names the one-off Job the tool creates per cluster. +func SnapshotJobName(cluster string) string { + return cluster + "-migration-snapshot" +} + +// LegacyClientEndpoint is the legacy operator's client Service URL — the +// Service is named after the cluster (the new operator's "-client" suffix +// does not exist yet at snapshot time). https iff the legacy cluster serves +// TLS on the client port. +func LegacyClientEndpoint(name, namespace string, spec legacy.EtcdClusterSpec) string { + scheme := "http" + if spec.Security != nil && spec.Security.TLS.ServerSecret != "" { + scheme = "https" + } + return fmt.Sprintf("%s://%s.%s.svc:2379", scheme, name, namespace) +} + +// BuildSnapshotJob constructs the one-off Job that snapshots a LEGACY +// cluster with the new operator's snapshot agent (the agent is env-driven +// and needs no Kubernetes API access, so it works with both controllers +// stopped). Mirrors the operator's buildSnapshotJob with two deltas: +// +// - endpoints point at the legacy client Service; +// - the TLS material comes from the legacy secret layout, where the CA +// lives in a SEPARATE secret (serverTrustedCASecret) and the client +// identity is the legacy operator's clientSecret. +// +// No ETCD_USERNAME/PASSWORD: the legacy root user is NoPassword (cert-only), +// and when the cluster had auth enabled the tool disables it before this Job +// runs, so the dial is anonymous either way. +func BuildSnapshotJob(name, namespace, clusterUID string, spec legacy.EtcdClusterSpec, dest lll.SnapshotLocation, agentImage string) *batchv1.Job { + env := []corev1.EnvVar{ + {Name: "ETCD_ENDPOINTS", Value: LegacyClientEndpoint(name, namespace, spec)}, + {Name: "SNAPSHOT_NAME", Value: SnapshotName(namespace, name)}, + // Stamped onto the S3 object so a re-run recognizes its own prior + // upload instead of failing the agent's overwrite guard. + {Name: "SNAPSHOT_UID", Value: clusterUID}, + } + + var volumes []corev1.Volume + var mounts []corev1.VolumeMount + + if spec.Security != nil && spec.Security.TLS.ServerSecret != "" { + t := spec.Security.TLS + // Legacy keeps the client-plane CA in serverTrustedCASecret; fall + // back to the server secret's own ca.crt when no separate CA secret + // is set (the post-merge layout the migration asks users for). + caSecret := t.ServerTrustedCASecret + if caSecret == "" { + caSecret = t.ServerSecret + } + volumes = append(volumes, corev1.Volume{ + Name: "etcd-ca", + VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: caSecret}}, + }) + mounts = append(mounts, corev1.VolumeMount{Name: "etcd-ca", MountPath: snapshotCAMountPath, ReadOnly: true}) + env = append(env, corev1.EnvVar{Name: "ETCD_TLS_CA_PATH", Value: snapshotCAMountPath + "/ca.crt"}) + + if t.ClientSecret != "" { + volumes = append(volumes, corev1.Volume{ + Name: "etcd-client", + VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: t.ClientSecret}}, + }) + mounts = append(mounts, corev1.VolumeMount{Name: "etcd-client", MountPath: snapshotClientMountPath, ReadOnly: true}) + env = append(env, + corev1.EnvVar{Name: "ETCD_TLS_CERT_PATH", Value: snapshotClientMountPath + "/tls.crt"}, + corev1.EnvVar{Name: "ETCD_TLS_KEY_PATH", Value: snapshotClientMountPath + "/tls.key"}, + ) + } + } + + switch { + case dest.S3 != nil: + s3 := dest.S3 + env = append(env, + corev1.EnvVar{Name: "SNAPSHOT_DEST_KIND", Value: "s3"}, + corev1.EnvVar{Name: "S3_ENDPOINT", Value: s3.Endpoint}, + corev1.EnvVar{Name: "S3_BUCKET", Value: s3.Bucket}, + corev1.EnvVar{Name: "S3_KEY", Value: s3.Key}, + corev1.EnvVar{Name: "S3_REGION", Value: s3.Region}, + corev1.EnvVar{Name: "S3_FORCE_PATH_STYLE", Value: fmt.Sprintf("%t", s3.ForcePathStyle)}, + corev1.EnvVar{Name: "AWS_ACCESS_KEY_ID", ValueFrom: &corev1.EnvVarSource{SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: s3.CredentialsSecretRef, Key: "AWS_ACCESS_KEY_ID", + }}}, + corev1.EnvVar{Name: "AWS_SECRET_ACCESS_KEY", ValueFrom: &corev1.EnvVarSource{SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: s3.CredentialsSecretRef, Key: "AWS_SECRET_ACCESS_KEY", + }}}, + ) + case dest.PVC != nil: + env = append(env, + corev1.EnvVar{Name: "SNAPSHOT_DEST_KIND", Value: "pvc"}, + corev1.EnvVar{Name: "PVC_MOUNT_PATH", Value: snapshotPVCMountPath}, + corev1.EnvVar{Name: "PVC_SUBPATH", Value: dest.PVC.SubPath}, + ) + volumes = append(volumes, corev1.Volume{ + Name: "snapshot-data", + VolumeSource: corev1.VolumeSource{PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: dest.PVC.ClaimName, + }}, + }) + mounts = append(mounts, corev1.VolumeMount{Name: "snapshot-data", MountPath: snapshotPVCMountPath}) + } + + ttl := snapshotJobTTLSeconds + backoff := snapshotJobBackoffLim + activeDeadline := snapshotJobActiveDeadlineSeconds + notRoot := true + user := int64(65532) + noAutomount := false + noEscalation := false + + return &batchv1.Job{ + TypeMeta: metav1.TypeMeta{APIVersion: "batch/v1", Kind: "Job"}, + ObjectMeta: metav1.ObjectMeta{ + Name: SnapshotJobName(name), + Namespace: namespace, + Labels: map[string]string{"app.kubernetes.io/created-by": "etcd-migrate"}, + }, + Spec: batchv1.JobSpec{ + BackoffLimit: &backoff, + TTLSecondsAfterFinished: &ttl, + ActiveDeadlineSeconds: &activeDeadline, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + AutomountServiceAccountToken: &noAutomount, + SecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: ¬Root, + RunAsUser: &user, + RunAsGroup: &user, + FSGroup: &user, + SeccompProfile: &corev1.SeccompProfile{Type: corev1.SeccompProfileTypeRuntimeDefault}, + }, + Containers: []corev1.Container{{ + Name: "snapshot-agent", + Image: agentImage, + Command: []string{"/manager", "snapshot-agent"}, + Env: env, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &noEscalation, + Capabilities: &corev1.Capabilities{Drop: []corev1.Capability{"ALL"}}, + }, + VolumeMounts: mounts, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("64Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }}, + Volumes: volumes, + }, + }, + }, + } +} diff --git a/internal/migrate/snapshotjob_test.go b/internal/migrate/snapshotjob_test.go new file mode 100644 index 00000000..dcd24433 --- /dev/null +++ b/internal/migrate/snapshotjob_test.go @@ -0,0 +1,121 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +// TestBuildSnapshotJob pins the Job wiring against the agent's env contract +// and the LEGACY secret layout (separate CA secret, legacy client service). +func TestBuildSnapshotJob(t *testing.T) { + spec := legacy.EtcdClusterSpec{ + Security: &legacy.SecuritySpec{ + EnableAuth: true, + TLS: legacy.TLSSpec{ + ServerSecret: "srv", + ServerTrustedCASecret: "srv-ca", + ClientSecret: "op-client", + }, + }, + } + dest := lll.SnapshotLocation{S3: &lll.S3SnapshotLocation{ + Endpoint: "https://minio", Bucket: "b", Key: "k", Region: "r", ForcePathStyle: true, + CredentialsSecretRef: corev1.LocalObjectReference{Name: "s3-creds"}, + }} + + job := BuildSnapshotJob("my-etcd", "ns", "uid-1", spec, dest, "ghcr.io/op:1") + + if job.Name != "my-etcd-migration-snapshot" || job.Namespace != "ns" { + t.Errorf("job identity = %s/%s", job.Namespace, job.Name) + } + ctr := job.Spec.Template.Spec.Containers[0] + if ctr.Image != "ghcr.io/op:1" || ctr.Command[1] != "snapshot-agent" { + t.Errorf("container = %s %v", ctr.Image, ctr.Command) + } + + env := map[string]string{} + for _, e := range ctr.Env { + env[e.Name] = e.Value + } + // The endpoint is the LEGACY client service (named after the cluster), + // https because serverSecret is set. + if env["ETCD_ENDPOINTS"] != "https://my-etcd.ns.svc:2379" { + t.Errorf("ETCD_ENDPOINTS = %q", env["ETCD_ENDPOINTS"]) + } + if env["SNAPSHOT_NAME"] != "ns-my-etcd-migration" || env["SNAPSHOT_UID"] != "uid-1" { + t.Errorf("snapshot identity env = %q/%q", env["SNAPSHOT_NAME"], env["SNAPSHOT_UID"]) + } + if env["S3_ENDPOINT"] != "https://minio" || env["S3_BUCKET"] != "b" || env["S3_KEY"] != "k" || + env["S3_REGION"] != "r" || env["S3_FORCE_PATH_STYLE"] != "true" { + t.Errorf("S3 env = %v", env) + } + // NO auth env: the legacy root is NoPassword and auth is disabled + // before the Job runs. + for _, e := range ctr.Env { + if e.Name == "ETCD_USERNAME" || e.Name == "ETCD_PASSWORD" { + t.Errorf("snapshot Job must dial anonymously, found %s", e.Name) + } + } + if env["ETCD_TLS_CA_PATH"] == "" || env["ETCD_TLS_CERT_PATH"] == "" { + t.Errorf("TLS env missing: %v", env) + } + + // The CA mount must come from the SEPARATE legacy CA secret. + mounted := map[string]string{} + for _, v := range job.Spec.Template.Spec.Volumes { + if v.Secret != nil { + mounted[v.Name] = v.Secret.SecretName + } + } + if mounted["etcd-ca"] != "srv-ca" { + t.Errorf("CA volume from %q, want srv-ca (legacy separate CA secret)", mounted["etcd-ca"]) + } + if mounted["etcd-client"] != "op-client" { + t.Errorf("client volume from %q, want op-client", mounted["etcd-client"]) + } +} + +// TestBuildSnapshotJob_PlaintextAndCAFallback covers the plaintext endpoint +// scheme and the post-merge layout where the CA lives inside the server +// secret itself. +func TestBuildSnapshotJob_PlaintextAndCAFallback(t *testing.T) { + t.Run("plaintext", func(t *testing.T) { + job := BuildSnapshotJob("c", "ns", "u", legacy.EtcdClusterSpec{}, + lll.SnapshotLocation{PVC: &lll.PVCSnapshotLocation{ClaimName: "claim"}}, "img") + for _, e := range job.Spec.Template.Spec.Containers[0].Env { + if e.Name == "ETCD_ENDPOINTS" && e.Value != "http://c.ns.svc:2379" { + t.Errorf("ETCD_ENDPOINTS = %q, want plaintext", e.Value) + } + if e.Name == "ETCD_TLS_CA_PATH" { + t.Error("plaintext cluster must not mount TLS material") + } + } + }) + + t.Run("ca falls back to server secret", func(t *testing.T) { + spec := legacy.EtcdClusterSpec{Security: &legacy.SecuritySpec{ + TLS: legacy.TLSSpec{ServerSecret: "srv"}, // no separate CA secret + }} + job := BuildSnapshotJob("c", "ns", "u", spec, + lll.SnapshotLocation{PVC: &lll.PVCSnapshotLocation{ClaimName: "claim"}}, "img") + for _, v := range job.Spec.Template.Spec.Volumes { + if v.Name == "etcd-ca" && v.Secret.SecretName != "srv" { + t.Errorf("CA volume from %q, want srv", v.Secret.SecretName) + } + } + }) +} diff --git a/internal/migrate/translate.go b/internal/migrate/translate.go new file mode 100644 index 00000000..85e47dcb --- /dev/null +++ b/internal/migrate/translate.go @@ -0,0 +1,504 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "crypto/rand" + "encoding/hex" + "fmt" + "regexp" + "sort" + "strconv" + "strings" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +// Legacy GVRs the tool discovers and deletes. +var ( + ClusterGVR = schema.GroupVersionResource{Group: legacy.Group, Version: legacy.Version, Resource: "etcdclusters"} + BackupGVR = schema.GroupVersionResource{Group: legacy.Group, Version: legacy.Version, Resource: "etcdbackups"} + ScheduleGVR = schema.GroupVersionResource{Group: legacy.Group, Version: legacy.Version, Resource: "etcdbackupschedules"} +) + +// versionRe is the new API's spec.version pattern. +var versionRe = regexp.MustCompile(`^\d+\.\d+\.\d+$`) + +// TranslateOptions carries the per-run knobs that influence translation. +type TranslateOptions struct { + // VersionOverride forces spec.version for every cluster instead of + // extracting it from the legacy image tag. + VersionOverride string + // AuthSecretName references an existing kubernetes.io/basic-auth Secret + // (in each cluster's namespace) for clusters with enableAuth. Empty ⇒ + // the tool generates one per cluster. + AuthSecretName string +} + +// TranslateCluster converts one legacy EtcdCluster into a v1alpha2 plan +// entry. It is pure apart from generating a random password for the auth +// Secret when one is needed and none was supplied. +func TranslateCluster(name, namespace string, spec legacy.EtcdClusterSpec, opts TranslateOptions) ResourcePlan { + plan := ResourcePlan{ + SourceKind: "EtcdCluster", + SourceName: name, + Namespace: namespace, + Action: ActionCreate, + DeleteRef: &ObjectRef{GVR: ClusterGVR, Namespace: namespace, Name: name}, + } + + out := &lll.EtcdCluster{ + TypeMeta: metav1.TypeMeta{APIVersion: lll.GroupVersion.String(), Kind: "EtcdCluster"}, + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + } + + // Replicas: same semantics, same default (3). + if spec.Replicas != nil { + r := *spec.Replicas + out.Spec.Replicas = &r + } + + // Version from the etcd container image tag (or override). + version, vWarns, vErr := extractVersion(spec.PodTemplate.Spec, opts.VersionOverride) + plan.Warnings = append(plan.Warnings, vWarns...) + if vErr != nil { + plan.Errors = append(plan.Errors, vErr.Error()) + } + out.Spec.Version = version + + // Storage. + storage, sWarns, sErr := translateStorage(spec.Storage) + plan.Warnings = append(plan.Warnings, sWarns...) + if sErr != nil { + plan.Errors = append(plan.Errors, sErr.Error()) + } + out.Spec.Storage = storage + + // Pod template: the mappable subset, plus warnings for everything else. + translatePodTemplate(spec.PodTemplate, out, &plan) + + // Templates the new operator owns outright. + if spec.ServiceTemplate != nil { + plan.Warnings = append(plan.Warnings, + "spec.serviceTemplate is dropped: the new operator owns the client Service (named \"-client\")") + } + if spec.HeadlessServiceTemplate != nil { + plan.Warnings = append(plan.Warnings, + "spec.headlessServiceTemplate is dropped: the new operator owns the headless Service (named \"\")") + } + if spec.PodDisruptionBudgetTemplate != nil { + plan.Warnings = append(plan.Warnings, + "spec.podDisruptionBudgetTemplate is dropped: the new operator auto-emits a PDB with maxUnavailable=(voters-1)/2") + } + + // etcd args: v1alpha2's spec.options is a closed typed struct covering + // exactly the keys Cozystack's legacy package set. Map those four; + // anything else has no typed equivalent and is dropped with a warning. + if len(spec.Options) > 0 { + typed, oWarns, oErrs := translateEtcdOptions(spec.Options) + plan.Warnings = append(plan.Warnings, oWarns...) + plan.Errors = append(plan.Errors, oErrs...) + out.Spec.Options = typed + } + + // TLS. + tls, tWarns := translateTLS(spec.Security) + plan.Warnings = append(plan.Warnings, tWarns...) + out.Spec.TLS = tls + + // Auth. + translateAuth(spec.Security, out, &plan, opts) + + // Restore-at-bootstrap is dropped: the adopted cluster already has its + // data, and the new API consults spec.bootstrap only at first bootstrap + // — which an adopted cluster (status.clusterID prefilled) never runs. + if spec.Bootstrap != nil && spec.Bootstrap.Restore != nil { + plan.Warnings = append(plan.Warnings, + "spec.bootstrap.restore is dropped: it was consumed at the legacy cluster's creation and an adopted cluster never bootstraps") + } + + if len(plan.Errors) > 0 { + plan.Action = ActionError + plan.Target = nil + plan.Extras = nil + plan.DeleteRef = nil + return plan + } + plan.Target = out + return plan +} + +// extractVersion derives spec.version from the legacy etcd container image +// tag, honoring an override. +func extractVersion(podSpec corev1.PodSpec, override string) (string, []string, error) { + if override != "" { + if !versionRe.MatchString(override) { + return "", nil, fmt.Errorf("--version %q does not match required pattern X.Y.Z", override) + } + return override, nil, nil + } + image := legacy.DefaultEtcdImage + var warns []string + if c := findContainer(podSpec.Containers, "etcd"); c != nil && c.Image != "" { + image = c.Image + } else { + warns = append(warns, fmt.Sprintf("no etcd image override in podTemplate; assuming the legacy default %s", legacy.DefaultEtcdImage)) + } + idx := strings.LastIndex(image, ":") + if idx < 0 || idx == len(image)-1 { + return "", warns, fmt.Errorf("cannot extract etcd version from image %q (no tag); pass --version", image) + } + tag := strings.TrimPrefix(image[idx+1:], "v") + if !versionRe.MatchString(tag) { + return "", warns, fmt.Errorf("cannot derive etcd version from image tag %q (want X.Y.Z); pass --version", image[idx+1:]) + } + return tag, warns, nil +} + +func findContainer(containers []corev1.Container, name string) *corev1.Container { + for i := range containers { + if containers[i].Name == name { + return &containers[i] + } + } + return nil +} + +// translateStorage maps the legacy emptyDir/volumeClaimTemplate union onto +// the new size/medium/storageClassName triple. +func translateStorage(s legacy.StorageSpec) (lll.StorageSpec, []string, error) { + // emptyDir takes precedence over volumeClaimTemplate in the legacy + // operator, so it does here too. + if s.EmptyDir != nil { + // In-place adoption hands the new operator the EXISTING pods and + // their PVCs. An emptyDir cluster has no PVCs to adopt: the data + // lives in pod-bound volumes, an EtcdMember would have nothing to + // own, and the first replacement would silently lose the member's + // data. Recreate such clusters manually. + return lll.StorageSpec{}, nil, fmt.Errorf( + "storage.emptyDir cannot be migrated in place: the data lives in pod-bound volumes with no PVC for the new operator to adopt; recreate this cluster manually") + } + + vct := s.VolumeClaimTemplate + size, hasSize := vct.Spec.Resources.Requests[corev1.ResourceStorage] + vctSet := hasSize || vct.Spec.StorageClassName != nil || len(vct.Spec.AccessModes) > 0 || vct.Name != "" + if !vctSet { + // Neither emptyDir nor volumeClaimTemplate: the legacy operator + // defaulted to a disk-backed emptyDir — same dead end as above. + return lll.StorageSpec{}, nil, fmt.Errorf( + "no storage configured: the legacy operator defaulted to a disk-backed emptyDir, which cannot be migrated in place (no PVC to adopt); recreate this cluster manually") + } + + out := lll.StorageSpec{StorageClassName: vct.Spec.StorageClassName} + var warns []string + if hasSize { + out.Size = size + } else { + out.Size = resource.MustParse("1Gi") + warns = append(warns, "volumeClaimTemplate has no requests.storage; defaulting spec.storage.size to 1Gi") + } + return out, warns, nil +} + +// translatePodTemplate maps the supported pod-template subset and warns +// about everything else it finds populated. +// translateEtcdOptions maps the legacy free-form spec.options map onto +// v1alpha2's closed typed EtcdOptions. The four keys Cozystack's legacy +// package set translate 1:1; unknown keys are dropped with a warning. +// Unparsable numeric values are errors, not warnings — silently dropping a +// backend quota the user had set would change the cluster's NOSPACE +// behaviour on migrate. +func translateEtcdOptions(options map[string]string) (*lll.EtcdOptions, []string, []string) { + var warnings, errs []string + typed := &lll.EtcdOptions{} + mapped := false + + keys := make([]string, 0, len(options)) + for k := range options { + keys = append(keys, k) + } + sort.Strings(keys) + + var unknown []string + for _, k := range keys { + v := options[k] + switch k { + case "quota-backend-bytes": + n, err := strconv.ParseInt(v, 10, 64) + if err != nil { + errs = append(errs, fmt.Sprintf("spec.options[%q]=%q is not an integer", k, v)) + continue + } + typed.QuotaBackendBytes = &n + mapped = true + case "auto-compaction-mode": + if v != string(lll.AutoCompactionModePeriodic) && v != string(lll.AutoCompactionModeRevision) { + errs = append(errs, fmt.Sprintf("spec.options[%q]=%q must be %q or %q", k, v, + lll.AutoCompactionModePeriodic, lll.AutoCompactionModeRevision)) + continue + } + typed.AutoCompactionMode = lll.AutoCompactionMode(v) + mapped = true + case "auto-compaction-retention": + typed.AutoCompactionRetention = v + mapped = true + case "snapshot-count": + n, err := strconv.ParseInt(v, 10, 64) + if err != nil { + errs = append(errs, fmt.Sprintf("spec.options[%q]=%q is not an integer", k, v)) + continue + } + typed.SnapshotCount = &n + mapped = true + default: + unknown = append(unknown, fmt.Sprintf("%s=%q", k, v)) + } + } + if len(unknown) > 0 { + warnings = append(warnings, + "spec.options keys with no typed v1alpha2 equivalent; dropped etcd args: "+strings.Join(unknown, ", ")) + } + if !mapped { + typed = nil + } + return typed, warnings, errs +} + +func translatePodTemplate(pt legacy.PodTemplate, out *lll.EtcdCluster, plan *ResourcePlan) { + if len(pt.Labels) > 0 || len(pt.Annotations) > 0 { + out.Spec.AdditionalMetadata = &lll.AdditionalMetadata{ + Labels: pt.Labels, + Annotations: pt.Annotations, + } + } + ps := pt.Spec + out.Spec.Affinity = ps.Affinity + out.Spec.TopologySpreadConstraints = ps.TopologySpreadConstraints + + var dropped []string + if c := findContainer(ps.Containers, "etcd"); c != nil { + out.Spec.Resources = c.Resources + // Image and Resources are consumed above; everything else on the + // etcd container is an unmappable override. + for field, set := range map[string]bool{ + "command": len(c.Command) > 0, + "args": len(c.Args) > 0, + "env": len(c.Env) > 0, + "envFrom": len(c.EnvFrom) > 0, + "volumeMounts": len(c.VolumeMounts) > 0, + "ports": len(c.Ports) > 0, + "livenessProbe": c.LivenessProbe != nil, + "readinessProbe": c.ReadinessProbe != nil, + "startupProbe": c.StartupProbe != nil, + "securityContext": c.SecurityContext != nil, + } { + if set { + dropped = append(dropped, "containers[etcd]."+field) + } + } + } + for i := range ps.Containers { + if ps.Containers[i].Name != "etcd" { + dropped = append(dropped, fmt.Sprintf("containers[%s] (sidecar)", ps.Containers[i].Name)) + } + } + for field, set := range map[string]bool{ + "initContainers": len(ps.InitContainers) > 0, + "volumes": len(ps.Volumes) > 0, + "nodeSelector": len(ps.NodeSelector) > 0, + "tolerations": len(ps.Tolerations) > 0, + "serviceAccountName": ps.ServiceAccountName != "", + "securityContext": ps.SecurityContext != nil && !equality.Semantic.DeepEqual(*ps.SecurityContext, corev1.PodSecurityContext{}), + "priorityClassName": ps.PriorityClassName != "", + "imagePullSecrets": len(ps.ImagePullSecrets) > 0, + "hostNetwork": ps.HostNetwork, + "hostAliases": len(ps.HostAliases) > 0, + "dnsPolicy": ps.DNSPolicy != "", + "dnsConfig": ps.DNSConfig != nil, + "runtimeClassName": ps.RuntimeClassName != nil, + "schedulerName": ps.SchedulerName != "", + "terminationGracePeriodSeconds": ps.TerminationGracePeriodSeconds != nil, + } { + if set { + dropped = append(dropped, field) + } + } + if len(dropped) > 0 { + sort.Strings(dropped) + plan.Warnings = append(plan.Warnings, + "spec.podTemplate fields with no v1alpha2 equivalent are dropped: "+strings.Join(dropped, ", ")) + } +} + +// translateTLS maps the legacy six-secret layout onto the new two-subtree +// model. The new operator expects ca.crt INSIDE the server/peer secrets; +// legacy kept CAs in separate secrets, so merges become user follow-ups. +func translateTLS(sec *legacy.SecuritySpec) (*lll.EtcdClusterTLS, []string) { + if sec == nil { + return nil, nil + } + t := sec.TLS + var warns []string + out := &lll.EtcdClusterTLS{} + + if t.ServerSecret != "" { + out.Client = &lll.ClientTLS{ + ServerSecretRef: &corev1.LocalObjectReference{Name: t.ServerSecret}, + } + if t.ClientSecret != "" { + out.Client.OperatorClientSecretRef = &corev1.LocalObjectReference{Name: t.ClientSecret} + } + if t.ServerTrustedCASecret != "" && t.ServerTrustedCASecret != t.ServerSecret { + warns = append(warns, fmt.Sprintf( + "merge ca.crt from secret %q into secret %q before starting the new operator: v1alpha2 reads the client-plane CA from the server secret's ca.crt", + t.ServerTrustedCASecret, t.ServerSecret)) + } + if t.ClientTrustedCASecret != "" && t.ClientTrustedCASecret != t.ServerSecret { + warns = append(warns, fmt.Sprintf( + "secret %q (clientTrustedCASecret) is dropped: v1alpha2 uses the server secret's ca.crt as etcd's --trusted-ca-file; merge the CA into %q's ca.crt if client certs are signed by it", + t.ClientTrustedCASecret, t.ServerSecret)) + } + } else if t.ClientSecret != "" || t.ServerTrustedCASecret != "" || t.ClientTrustedCASecret != "" { + warns = append(warns, + "client-plane TLS secrets are dropped: legacy spec sets client-plane material without serverSecret, which enabled nothing in the legacy operator either") + } + + if t.PeerSecret != "" { + out.Peer = &lll.PeerTLS{SecretRef: &corev1.LocalObjectReference{Name: t.PeerSecret}} + if t.PeerTrustedCASecret != "" && t.PeerTrustedCASecret != t.PeerSecret { + warns = append(warns, fmt.Sprintf( + "merge ca.crt from secret %q into secret %q before starting the new operator: v1alpha2 reads the peer CA from the peer secret's ca.crt", + t.PeerTrustedCASecret, t.PeerSecret)) + } + } else if t.PeerTrustedCASecret != "" { + warns = append(warns, + "peerTrustedCASecret without peerSecret is dropped: it enabled nothing in the legacy operator either") + } + + if out.Client == nil && out.Peer == nil { + return nil, warns + } + return out, warns +} + +// translateAuth maps enableAuth onto the new BYO-credentials model. The +// legacy operator provisioned root with NoPassword (cert-only); the new one +// requires a password Secret, so one is referenced or generated here. +func translateAuth(sec *legacy.SecuritySpec, out *lll.EtcdCluster, plan *ResourcePlan, opts TranslateOptions) { + if sec == nil || !sec.EnableAuth { + return + } + if out.Spec.TLS == nil || out.Spec.TLS.Client == nil { + plan.Errors = append(plan.Errors, + "security.enableAuth=true requires client TLS in v1alpha2 (spec.auth.enabled demands spec.tls.client — credentials must not cross a plaintext wire), but the legacy cluster has no serverSecret") + return + } + + secretName := opts.AuthSecretName + if secretName == "" { + secretName = out.Name + "-root-credentials" + password := randomPassword() + plan.Extras = append(plan.Extras, &corev1.Secret{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Secret"}, + ObjectMeta: metav1.ObjectMeta{Name: secretName, Namespace: out.Namespace}, + Type: corev1.SecretTypeBasicAuth, + StringData: map[string]string{ + corev1.BasicAuthUsernameKey: "root", + corev1.BasicAuthPasswordKey: password, + }, + }) + plan.Notes = append(plan.Notes, fmt.Sprintf( + "generated root-credentials Secret %q: point etcd consumers (e.g. a Kamaji DataStore basicAuth) at it", + secretName)) + } + out.Spec.Auth = &lll.AuthSpec{ + Enabled: true, + RootCredentialsSecretRef: &corev1.LocalObjectReference{Name: secretName}, + } + + plan.Notes = append(plan.Notes, + "the tool disables auth on the legacy etcd (certificate-authenticated) at apply, because the legacy root user has NoPassword and could never match a credentials Secret; the new operator re-enables auth with the referenced Secret once it adopts the cluster") + plan.Warnings = append(plan.Warnings, + "auth is OFF from the moment the tool runs `auth disable` on the legacy etcd until the new operator re-enables it — plan the cutover window accordingly") +} + +// translateLocation maps the legacy S3/PVC destination union onto the +// (field-for-field identical) v1alpha2 SnapshotLocation. +func translateLocation(d legacy.BackupDestination) (lll.SnapshotLocation, error) { + switch { + case d.S3 != nil && d.PVC != nil: + return lll.SnapshotLocation{}, fmt.Errorf("both s3 and pvc destinations set; exactly one is allowed") + case d.S3 != nil: + return lll.SnapshotLocation{S3: &lll.S3SnapshotLocation{ + Endpoint: d.S3.Endpoint, + Bucket: d.S3.Bucket, + Key: d.S3.Key, + CredentialsSecretRef: d.S3.CredentialsSecretRef, + Region: d.S3.Region, + ForcePathStyle: d.S3.ForcePathStyle, + }}, nil + case d.PVC != nil: + return lll.SnapshotLocation{PVC: &lll.PVCSnapshotLocation{ + ClaimName: d.PVC.ClaimName, + SubPath: d.PVC.SubPath, + }}, nil + default: + return lll.SnapshotLocation{}, fmt.Errorf("neither s3 nor pvc destination set; exactly one is required") + } +} + +// TranslateBackup converts one legacy EtcdBackup into an EtcdSnapshot plan +// entry. The specs are field-for-field compatible. +func TranslateBackup(name, namespace string, spec legacy.EtcdBackupSpec) ResourcePlan { + plan := ResourcePlan{ + SourceKind: "EtcdBackup", + SourceName: name, + Namespace: namespace, + Action: ActionCreate, + DeleteRef: &ObjectRef{GVR: BackupGVR, Namespace: namespace, Name: name}, + } + dest, err := translateLocation(spec.Destination) + if err != nil { + plan.Action = ActionError + plan.DeleteRef = nil + plan.Errors = append(plan.Errors, "spec.destination: "+err.Error()) + return plan + } + plan.Target = &lll.EtcdSnapshot{ + TypeMeta: metav1.TypeMeta{APIVersion: lll.GroupVersion.String(), Kind: "EtcdSnapshot"}, + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, + Spec: lll.EtcdSnapshotSpec{ + ClusterRef: spec.ClusterRef, + Destination: dest, + }, + } + plan.Notes = append(plan.Notes, + "the EtcdSnapshot runs once the NEW operator is started and the referenced cluster exists under the new API") + return plan +} + +// randomPassword returns a 32-hex-char cryptographically random password. +func randomPassword() string { + b := make([]byte, 16) + if _, err := rand.Read(b); err != nil { + // crypto/rand failing means the platform's entropy source is + // broken; generating a weak password silently is worse than dying. + panic(fmt.Sprintf("crypto/rand failed: %v", err)) + } + return hex.EncodeToString(b) +} diff --git a/internal/migrate/translate_test.go b/internal/migrate/translate_test.go new file mode 100644 index 00000000..23be94ba --- /dev/null +++ b/internal/migrate/translate_test.go @@ -0,0 +1,428 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package migrate + +import ( + "strings" + "testing" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" + + lll "github.com/cozystack/etcd-operator/api/v1alpha2" + "github.com/cozystack/etcd-operator/internal/migrate/legacy" +) + +func qty(t *testing.T, s string) resource.Quantity { + t.Helper() + q, err := resource.ParseQuantity(s) + if err != nil { + t.Fatalf("ParseQuantity(%q): %v", s, err) + } + return q +} + +func ptrInt32(v int32) *int32 { return &v } + +// hasWarning reports whether any warning contains the substring. +func hasWarning(warnings []string, substr string) bool { + for _, w := range warnings { + if strings.Contains(w, substr) { + return true + } + } + return false +} + +func clusterTarget(t *testing.T, plan ResourcePlan) *lll.EtcdCluster { + t.Helper() + if plan.Action != ActionCreate { + t.Fatalf("Action = %s (errors: %v), want Create", plan.Action, plan.Errors) + } + out, ok := plan.Target.(*lll.EtcdCluster) + if !ok { + t.Fatalf("Target is %T, want *EtcdCluster", plan.Target) + } + return out +} + +// TestTranslateCluster_KitchenSink runs a fully-loaded legacy spec through +// the translator and pins every mapped field plus the exact set of dropped- +// field warnings. +func TestTranslateCluster_KitchenSink(t *testing.T) { + sc := "fast-ssd" + aff := &corev1.Affinity{PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{{TopologyKey: "kubernetes.io/hostname"}}, + }} + tsc := []corev1.TopologySpreadConstraint{{MaxSkew: 1, TopologyKey: "zone", WhenUnsatisfiable: corev1.DoNotSchedule}} + res := corev1.ResourceRequirements{Requests: corev1.ResourceList{corev1.ResourceCPU: qty(t, "500m")}} + + spec := legacy.EtcdClusterSpec{ + Replicas: ptrInt32(5), + Options: map[string]string{ + // The four cozystack keys map onto the typed spec.options… + "quota-backend-bytes": "10200547328", + "auto-compaction-mode": "periodic", + "auto-compaction-retention": "5m", + "snapshot-count": "10000", + // …anything else is dropped with a warning. + "enable-v2": "false", + }, + PodTemplate: legacy.PodTemplate{ + EmbeddedObjectMetadata: legacy.EmbeddedObjectMetadata{ + Labels: map[string]string{"team": "infra"}, + Annotations: map[string]string{"note": "x"}, + }, + Spec: corev1.PodSpec{ + Affinity: aff, + TopologySpreadConstraints: tsc, + NodeSelector: map[string]string{"disk": "ssd"}, + Containers: []corev1.Container{ + {Name: "etcd", Image: "quay.io/coreos/etcd:v3.5.21", Resources: res, + Env: []corev1.EnvVar{{Name: "X", Value: "y"}}}, + {Name: "exporter", Image: "metrics:1"}, + }, + }, + }, + ServiceTemplate: &legacy.EmbeddedService{}, + HeadlessServiceTemplate: &legacy.EmbeddedMetadataResource{}, + PodDisruptionBudgetTemplate: &legacy.EmbeddedPodDisruptionBudget{}, + Storage: legacy.StorageSpec{ + VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{ + StorageClassName: &sc, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "10Gi")}, + }, + }, + }, + }, + Security: &legacy.SecuritySpec{ + EnableAuth: true, + TLS: legacy.TLSSpec{ + ServerSecret: "srv", + ServerTrustedCASecret: "srv-ca", + ClientSecret: "op-client", + ClientTrustedCASecret: "client-ca", + PeerSecret: "peer", + PeerTrustedCASecret: "peer-ca", + }, + }, + } + + plan := TranslateCluster("my-etcd", "ns", spec, TranslateOptions{}) + out := clusterTarget(t, plan) + + if out.Spec.Replicas == nil || *out.Spec.Replicas != 5 { + t.Errorf("replicas = %v, want 5", out.Spec.Replicas) + } + if out.Spec.Version != "3.5.21" { + t.Errorf("version = %q, want 3.5.21", out.Spec.Version) + } + if out.Spec.Storage.Size.Cmp(qty(t, "10Gi")) != 0 || out.Spec.Storage.Medium != lll.StorageMediumDefault { + t.Errorf("storage = %+v, want 10Gi PVC", out.Spec.Storage) + } + if out.Spec.Storage.StorageClassName == nil || *out.Spec.Storage.StorageClassName != sc { + t.Errorf("storageClassName = %v, want %q", out.Spec.Storage.StorageClassName, sc) + } + if out.Spec.AdditionalMetadata == nil || + out.Spec.AdditionalMetadata.Labels["team"] != "infra" || + out.Spec.AdditionalMetadata.Annotations["note"] != "x" { + t.Errorf("additionalMetadata = %+v", out.Spec.AdditionalMetadata) + } + if !equality.Semantic.DeepEqual(out.Spec.Affinity, aff) { + t.Errorf("affinity not mapped: %+v", out.Spec.Affinity) + } + if !equality.Semantic.DeepEqual(out.Spec.TopologySpreadConstraints, tsc) { + t.Errorf("topologySpreadConstraints not mapped: %+v", out.Spec.TopologySpreadConstraints) + } + if !equality.Semantic.DeepEqual(out.Spec.Resources, res) { + t.Errorf("resources not mapped: %+v", out.Spec.Resources) + } + + // TLS mapping. + if out.Spec.TLS == nil || out.Spec.TLS.Client == nil || + out.Spec.TLS.Client.ServerSecretRef == nil || out.Spec.TLS.Client.ServerSecretRef.Name != "srv" { + t.Fatalf("tls.client.serverSecretRef not mapped: %+v", out.Spec.TLS) + } + if out.Spec.TLS.Client.OperatorClientSecretRef == nil || out.Spec.TLS.Client.OperatorClientSecretRef.Name != "op-client" { + t.Errorf("tls.client.operatorClientSecretRef = %+v, want op-client", out.Spec.TLS.Client.OperatorClientSecretRef) + } + if out.Spec.TLS.Peer == nil || out.Spec.TLS.Peer.SecretRef == nil || out.Spec.TLS.Peer.SecretRef.Name != "peer" { + t.Errorf("tls.peer.secretRef = %+v, want peer", out.Spec.TLS.Peer) + } + + // Auth: generated Secret referenced + emitted as an extra. + if out.Spec.Auth == nil || !out.Spec.Auth.Enabled || + out.Spec.Auth.RootCredentialsSecretRef == nil || + out.Spec.Auth.RootCredentialsSecretRef.Name != "my-etcd-root-credentials" { + t.Fatalf("auth = %+v", out.Spec.Auth) + } + if len(plan.Extras) != 1 { + t.Fatalf("extras = %d, want 1 generated Secret", len(plan.Extras)) + } + sec, ok := plan.Extras[0].(*corev1.Secret) + if !ok || sec.Type != corev1.SecretTypeBasicAuth || + sec.StringData[corev1.BasicAuthUsernameKey] != "root" || + len(sec.StringData[corev1.BasicAuthPasswordKey]) < 16 { + t.Fatalf("generated Secret malformed: %+v", plan.Extras[0]) + } + + // Typed options: the four cozystack keys map 1:1. + if out.Spec.Options == nil || + out.Spec.Options.QuotaBackendBytes == nil || *out.Spec.Options.QuotaBackendBytes != 10200547328 || + out.Spec.Options.AutoCompactionMode != lll.AutoCompactionModePeriodic || + out.Spec.Options.AutoCompactionRetention != "5m" || + out.Spec.Options.SnapshotCount == nil || *out.Spec.Options.SnapshotCount != 10000 { + t.Errorf("options not mapped: %+v", out.Spec.Options) + } + + // Exact warning set: every dropped legacy knob accounted for. + for _, want := range []string{ + `spec.options keys with no typed v1alpha2 equivalent; dropped etcd args: enable-v2="false"`, + "spec.serviceTemplate", + "spec.headlessServiceTemplate", + "spec.podDisruptionBudgetTemplate", + "containers[etcd].env", + "containers[exporter] (sidecar)", + "nodeSelector", + `merge ca.crt from secret "srv-ca" into secret "srv"`, + `secret "client-ca" (clientTrustedCASecret) is dropped`, + `merge ca.crt from secret "peer-ca" into secret "peer"`, + } { + if !hasWarning(plan.Warnings, want) { + t.Errorf("missing warning containing %q; got %v", want, plan.Warnings) + } + } + if plan.DeleteRef == nil || plan.DeleteRef.GVR != ClusterGVR || plan.DeleteRef.Name != "my-etcd" { + t.Errorf("DeleteRef = %+v", plan.DeleteRef) + } +} + +// TestTranslateCluster_VersionExtraction pins the image-tag → spec.version +// rules across default, override, and unparsable images. +func TestTranslateCluster_VersionExtraction(t *testing.T) { + base := legacy.EtcdClusterSpec{ + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "1Gi")}}}, + }}, + } + + t.Run("default image", func(t *testing.T) { + plan := TranslateCluster("c", "ns", base, TranslateOptions{}) + out := clusterTarget(t, plan) + if out.Spec.Version != "3.5.12" { + t.Errorf("version = %q, want 3.5.12 (legacy default image)", out.Spec.Version) + } + if !hasWarning(plan.Warnings, "assuming the legacy default") { + t.Errorf("expected default-image warning, got %v", plan.Warnings) + } + }) + + t.Run("override wins", func(t *testing.T) { + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{{Name: "etcd", Image: "etcd:v3.4.1"}} + plan := TranslateCluster("c", "ns", spec, TranslateOptions{VersionOverride: "3.6.11"}) + if out := clusterTarget(t, plan); out.Spec.Version != "3.6.11" { + t.Errorf("version = %q, want override 3.6.11", out.Spec.Version) + } + }) + + t.Run("unparsable tag errors", func(t *testing.T) { + spec := base + spec.PodTemplate.Spec.Containers = []corev1.Container{{Name: "etcd", Image: "registry/etcd:latest"}} + plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) + if plan.Action != ActionError { + t.Fatalf("Action = %s, want Error for unparsable tag", plan.Action) + } + if plan.DeleteRef != nil || plan.Target != nil { + t.Errorf("errored plan must not delete/create anything: %+v", plan) + } + }) + + t.Run("bad override errors", func(t *testing.T) { + plan := TranslateCluster("c", "ns", base, TranslateOptions{VersionOverride: "v3.6.11"}) + if plan.Action != ActionError { + t.Fatalf("Action = %s, want Error for malformed --version", plan.Action) + } + }) +} + +// TestTranslateStorage pins the storage union mapping. +func TestTranslateStorage(t *testing.T) { + // In-place adoption hands the new operator the existing PVCs; emptyDir + // clusters have none, so EVERY emptyDir variant must refuse loudly + // rather than translate into something the adoption cannot back. + t.Run("any emptyDir errors (nothing to adopt)", func(t *testing.T) { + size := qty(t, "256Mi") + for name, ed := range map[string]*corev1.EmptyDirVolumeSource{ + "memory with sizeLimit": {Medium: corev1.StorageMediumMemory, SizeLimit: &size}, + "memory bare": {Medium: corev1.StorageMediumMemory}, + "disk with sizeLimit": {SizeLimit: &size}, + "disk bare": {}, + } { + if _, _, err := translateStorage(legacy.StorageSpec{EmptyDir: ed}); err == nil { + t.Errorf("%s: expected error — emptyDir has no PVC for in-place adoption", name) + } + } + }) + + t.Run("no storage at all errors (legacy defaulted to disk emptyDir)", func(t *testing.T) { + _, _, err := translateStorage(legacy.StorageSpec{}) + if err == nil { + t.Fatal("expected error for the implicit legacy emptyDir default") + } + }) + + t.Run("vct without size defaults 1Gi with warning", func(t *testing.T) { + sc := "std" + got, warns, err := translateStorage(legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{StorageClassName: &sc}, + }}) + if err != nil { + t.Fatalf("err=%v", err) + } + if got.Size.Cmp(qty(t, "1Gi")) != 0 || !hasWarning(warns, "defaulting spec.storage.size to 1Gi") { + t.Errorf("got %+v warns=%v", got, warns) + } + }) +} + +// TestTranslateCluster_AuthRequiresClientTLS mirrors the v1alpha2 CEL rule: +// enableAuth without server TLS cannot be expressed in the new API. +func TestTranslateCluster_AuthRequiresClientTLS(t *testing.T) { + spec := legacy.EtcdClusterSpec{ + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "1Gi")}}}, + }}, + Security: &legacy.SecuritySpec{EnableAuth: true}, + } + plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) + if plan.Action != ActionError { + t.Fatalf("Action = %s, want Error (auth requires client TLS)", plan.Action) + } +} + +// TestTranslateCluster_AuthSecretFlagSkipsGeneration: an explicit +// --auth-secret is referenced as-is, with no generated Secret extra. +func TestTranslateCluster_AuthSecretFlagSkipsGeneration(t *testing.T) { + spec := legacy.EtcdClusterSpec{ + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "1Gi")}}}, + }}, + Security: &legacy.SecuritySpec{ + EnableAuth: true, + TLS: legacy.TLSSpec{ServerSecret: "srv"}, + }, + } + plan := TranslateCluster("c", "ns", spec, TranslateOptions{AuthSecretName: "my-creds"}) + out := clusterTarget(t, plan) + if out.Spec.Auth.RootCredentialsSecretRef.Name != "my-creds" { + t.Errorf("rootCredentialsSecretRef = %+v, want my-creds", out.Spec.Auth.RootCredentialsSecretRef) + } + if len(plan.Extras) != 0 { + t.Errorf("no Secret should be generated when --auth-secret is given; extras=%v", plan.Extras) + } +} + +// TestTranslateCluster_AuthNotes pins the auth-disable note and the +// auth-off-window warning every auth-enabled adoption carries. +func TestTranslateCluster_AuthNotes(t *testing.T) { + spec := legacy.EtcdClusterSpec{ + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "1Gi")}}}, + }}, + Security: &legacy.SecuritySpec{ + EnableAuth: true, + TLS: legacy.TLSSpec{ServerSecret: "srv", ClientSecret: "op"}, + }, + } + plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) + clusterTarget(t, plan) + if !hasWarning(plan.Warnings, "auth is OFF") { + t.Errorf("expected auth-off-window warning, got %v", plan.Warnings) + } + found := false + for _, n := range plan.Notes { + if strings.Contains(n, "disables auth on the legacy etcd") { + found = true + } + } + if !found { + t.Errorf("expected auth-disable note, got %v", plan.Notes) + } +} + +// TestTranslateCluster_RestoreDropped: an adopted cluster never bootstraps +// (status.clusterID is prefilled), so the legacy restore-at-creation config +// is dropped with a warning instead of carried into spec.bootstrap. +func TestTranslateCluster_RestoreDropped(t *testing.T) { + spec := legacy.EtcdClusterSpec{ + Storage: legacy.StorageSpec{VolumeClaimTemplate: legacy.EmbeddedPersistentVolumeClaim{ + Spec: corev1.PersistentVolumeClaimSpec{Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: qty(t, "1Gi")}}}, + }}, + Bootstrap: &legacy.BootstrapSpec{Restore: &legacy.RestoreSpec{Source: legacy.BackupDestination{ + S3: &legacy.S3BackupDestination{Endpoint: "https://s3", Bucket: "b", Key: "snap/x.db", + CredentialsSecretRef: corev1.LocalObjectReference{Name: "s3-creds"}}, + }}}, + } + plan := TranslateCluster("c", "ns", spec, TranslateOptions{}) + out := clusterTarget(t, plan) + if out.Spec.Bootstrap != nil { + t.Errorf("spec.bootstrap must not carry over to an adopted cluster: %+v", out.Spec.Bootstrap) + } + if !hasWarning(plan.Warnings, "spec.bootstrap.restore is dropped") { + t.Errorf("expected restore-dropped warning, got %v", plan.Warnings) + } +} + +// TestTranslateBackup pins the field-for-field EtcdBackup → EtcdSnapshot map. +func TestTranslateBackup(t *testing.T) { + plan := TranslateBackup("bk", "ns", legacy.EtcdBackupSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "my-etcd"}, + Destination: legacy.BackupDestination{S3: &legacy.S3BackupDestination{ + Endpoint: "https://minio", Bucket: "etcd", Key: "prefix", + CredentialsSecretRef: corev1.LocalObjectReference{Name: "s3"}, + Region: "us-east-1", ForcePathStyle: true, + }}, + }) + if plan.Action != ActionCreate { + t.Fatalf("Action = %s (errors %v)", plan.Action, plan.Errors) + } + snap, ok := plan.Target.(*lll.EtcdSnapshot) + if !ok { + t.Fatalf("Target is %T", plan.Target) + } + if snap.Spec.ClusterRef.Name != "my-etcd" { + t.Errorf("clusterRef = %+v", snap.Spec.ClusterRef) + } + s3 := snap.Spec.Destination.S3 + if s3 == nil || s3.Endpoint != "https://minio" || s3.Bucket != "etcd" || s3.Key != "prefix" || + s3.CredentialsSecretRef.Name != "s3" || s3.Region != "us-east-1" || !s3.ForcePathStyle { + t.Errorf("destination not mapped: %+v", s3) + } + if plan.DeleteRef == nil || plan.DeleteRef.GVR != BackupGVR { + t.Errorf("DeleteRef = %+v", plan.DeleteRef) + } + + t.Run("malformed destination errors", func(t *testing.T) { + p := TranslateBackup("bk", "ns", legacy.EtcdBackupSpec{ClusterRef: corev1.LocalObjectReference{Name: "c"}}) + if p.Action != ActionError { + t.Fatalf("Action = %s, want Error for empty destination", p.Action) + } + }) +} diff --git a/internal/portforward/portforward.go b/internal/portforward/portforward.go new file mode 100644 index 00000000..e8190117 --- /dev/null +++ b/internal/portforward/portforward.go @@ -0,0 +1,108 @@ +/* +Copyright 2024 The etcd-operator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +// Package portforward wraps client-go's SPDY port-forwarder with the +// dial/ready/timeout handling shared by the kubectl-etcd plugin and the +// etcd-migrate tool. Both CLIs reach in-cluster etcd Pods from the +// operator's machine over `kubectl port-forward`-style tunnels. +package portforward + +import ( + "fmt" + "net/http" + "net/url" + "os" + "time" + + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/portforward" + "k8s.io/client-go/transport/spdy" +) + +// readyTimeout bounds how long ForwardToPod waits for the forward to signal +// ready before giving up. +const readyTimeout = 10 * time.Second + +// ForwardToPod forwards a random local port to targetPort on the named Pod. +// It returns the local port and a stop function tearing the forward down. +// The forward stays up until stop is called (or the process exits). +func ForwardToPod(cfg *rest.Config, namespace, podName string, targetPort int) (uint16, func(), error) { + path := fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/portforward", namespace, podName) + + transport, upgrader, err := spdy.RoundTripperFor(cfg) + if err != nil { + return 0, nil, fmt.Errorf("failed to create round tripper: %w", err) + } + hostURL, err := url.Parse(cfg.Host) + if err != nil { + return 0, nil, fmt.Errorf("failed to parse host URL: %w", err) + } + hostURL.Path = path + + stopChan, readyChan := make(chan struct{}, 1), make(chan struct{}, 1) + dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", hostURL) + + forwarder, err := portforward.New(dialer, + []string{fmt.Sprintf("0:%d", targetPort)}, stopChan, readyChan, &silentWriter{}, os.Stderr) + if err != nil { + return 0, nil, fmt.Errorf("failed to create port forwarder: %w", err) + } + + // ForwardPorts blocks until the forward is torn down; run it in the + // background and surface a startup failure via forwardErr. On a dial + // failure (RBAC on pods/portforward, API-server connectivity, protocol + // negotiation) ForwardPorts returns WITHOUT ever closing readyChan, so + // blocking on readyChan alone would hang forever — awaitForward selects + // on the error and a timeout too. + forwardErr := make(chan error, 1) + go func() { + forwardErr <- forwarder.ForwardPorts() + }() + + if err := awaitForward(readyChan, forwardErr, stopChan, readyTimeout); err != nil { + return 0, nil, err + } + + ports, err := forwarder.GetPorts() + if err != nil { + close(stopChan) + return 0, nil, fmt.Errorf("failed to get forwarded ports: %w", err) + } + stop := func() { close(stopChan) } + return ports[0].Local, stop, nil +} + +// awaitForward blocks until the port-forward signals ready, fails, or times +// out — whichever comes first. It exists so a forward that dies before +// becoming ready (which leaves readyChan unclosed) surfaces as an error +// instead of hanging the CLI. On timeout it closes stopChan to tear the +// forwarder down. +func awaitForward(readyChan <-chan struct{}, forwardErr <-chan error, stopChan chan struct{}, timeout time.Duration) error { + select { + case <-readyChan: + return nil + case err := <-forwardErr: + if err == nil { + err = fmt.Errorf("exited before becoming ready") + } + return fmt.Errorf("port forwarding failed: %w", err) + case <-time.After(timeout): + close(stopChan) + return fmt.Errorf("timed out after %s waiting for port forwarding to become ready", timeout) + } +} + +// silentWriter discards the forwarder's stdout chatter ("Forwarding from +// ...") so CLI output stays clean; errors still go to stderr. +type silentWriter struct{} + +func (sw *silentWriter) Write(p []byte) (int, error) { + return len(p), nil +} diff --git a/internal/portforward/portforward_test.go b/internal/portforward/portforward_test.go new file mode 100644 index 00000000..aed299f8 --- /dev/null +++ b/internal/portforward/portforward_test.go @@ -0,0 +1,56 @@ +/* +Copyright 2023 Timofey Larkin. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 +*/ + +package portforward + +import ( + "fmt" + "strings" + "testing" + "time" +) + +// ── A failed/never-ready port-forward must not hang ───────────────────────── + +func TestAwaitForward_Ready(t *testing.T) { + ready := make(chan struct{}, 1) + close(ready) + if err := awaitForward(ready, make(chan error, 1), make(chan struct{}, 1), time.Second); err != nil { + t.Fatalf("ready forward should succeed, got %v", err) + } +} + +func TestAwaitForward_ErrorBeforeReady(t *testing.T) { + // ForwardPorts returns an error without ever closing readyChan — the old + // code blocked on <-readyChan forever. awaitForward must return the error. + forwardErr := make(chan error, 1) + forwardErr <- fmt.Errorf("dial tcp: connection refused") + err := awaitForward(make(chan struct{}), forwardErr, make(chan struct{}, 1), time.Second) + if err == nil { + t.Fatal("a forward failure must return an error, not hang or succeed") + } + if !strings.Contains(err.Error(), "connection refused") { + t.Errorf("error should wrap the forward failure, got %v", err) + } +} + +func TestAwaitForward_Timeout(t *testing.T) { + stop := make(chan struct{}, 1) + // Nothing ever signals ready and no error arrives → must time out, not hang. + err := awaitForward(make(chan struct{}), make(chan error, 1), stop, 10*time.Millisecond) + if err == nil { + t.Fatal("a never-ready forward must time out, not hang") + } + select { + case <-stop: + default: + t.Error("timeout must close stopChan to tear the forwarder down") + } +}