From 5c9506a36d6a4784f053149d2ac76cae27646fef Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 10:11:11 +0530 Subject: [PATCH 01/90] wip: add fencing mechanism Signed-off-by: Mayank Shah --- .../controller/postgrescluster/instance.go | 25 +++++++++++++++++++ internal/naming/annotations.go | 4 +++ 2 files changed, 29 insertions(+) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index b80da339d8..f4088ccb72 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -12,6 +12,7 @@ import ( "strings" "time" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" "github.com/pkg/errors" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -1265,6 +1266,24 @@ func (r *Reconciler) reconcileInstance( return err } +func getFencedInstances(cluster *v1beta1.PostgresCluster) []string { + annotations := cluster.GetAnnotations()[pNaming.ToCrunchyAnnotation(naming.FencedInstancesAnnotation)] + if annotations == "" { + return []string{} + } + return strings.Split(annotations, ",") +} + +func isInstanceFenced(cluster *v1beta1.PostgresCluster, instanceName string) bool { + fencedInstances := getFencedInstances(cluster) + for _, fencedInstance := range fencedInstances { + if instanceName != "" && fencedInstance == instanceName { + return true + } + } + return false +} + func generateInstanceStatefulSetIntent(_ context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec, @@ -1369,6 +1388,12 @@ func generateInstanceStatefulSetIntent(_ context.Context, sts.Spec.Replicas = initialize.Int32(1) } + // K8SPG-771 + // TODO (mayanshah1607): perform checkpoint before scaling down. + if isInstanceFenced(cluster, sts.GetName()) { + sts.Spec.Replicas = initialize.Int32(0) + } + // Restart containers any time they stop, die, are killed, etc. // - https://docs.k8s.io/concepts/workloads/pods/pod-lifecycle/#restart-policy sts.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyAlways diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index ec04eb0e9a..b2b875d1ed 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -81,4 +81,8 @@ const ( // is present, the controller will not update the ConfigMap, allowing users to make custom // modifications that won't be overwritten during reconciliation. OverrideConfigAnnotation = perconaAnnotationPrefix + "override-config" + + // K8SPG-771 + // FencedInstancesAnnotation is an annotation used to mark instances as fenced. + FencedInstancesAnnotation = perconaAnnotationPrefix + "fenced-instances" ) From c021fb2fde53daa4da12dd8e187e69741dc07d54 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 16:53:33 +0530 Subject: [PATCH 02/90] rename to suspended-instances Signed-off-by: Mayank Shah --- internal/controller/postgrescluster/instance.go | 12 ++++++------ internal/naming/annotations.go | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index f4088ccb72..fe457ae3f2 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -1266,16 +1266,16 @@ func (r *Reconciler) reconcileInstance( return err } -func getFencedInstances(cluster *v1beta1.PostgresCluster) []string { - annotations := cluster.GetAnnotations()[pNaming.ToCrunchyAnnotation(naming.FencedInstancesAnnotation)] +func getSuspendedInstances(cluster *v1beta1.PostgresCluster) []string { + annotations := cluster.GetAnnotations()[pNaming.ToCrunchyAnnotation(naming.SuspendedInstancesAnnotation)] if annotations == "" { return []string{} } return strings.Split(annotations, ",") } -func isInstanceFenced(cluster *v1beta1.PostgresCluster, instanceName string) bool { - fencedInstances := getFencedInstances(cluster) +func isInstanceSuspended(cluster *v1beta1.PostgresCluster, instanceName string) bool { + fencedInstances := getSuspendedInstances(cluster) for _, fencedInstance := range fencedInstances { if instanceName != "" && fencedInstance == instanceName { return true @@ -1389,8 +1389,8 @@ func generateInstanceStatefulSetIntent(_ context.Context, } // K8SPG-771 - // TODO (mayanshah1607): perform checkpoint before scaling down. - if isInstanceFenced(cluster, sts.GetName()) { + // TODO (mayanshah1607): perform checkpoint before scaling down, especially for primary. + if isInstanceSuspended(cluster, sts.GetName()) { sts.Spec.Replicas = initialize.Int32(0) } diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index b2b875d1ed..a545becdf1 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -83,6 +83,8 @@ const ( OverrideConfigAnnotation = perconaAnnotationPrefix + "override-config" // K8SPG-771 - // FencedInstancesAnnotation is an annotation used to mark instances as fenced. - FencedInstancesAnnotation = perconaAnnotationPrefix + "fenced-instances" + // SuspendedInstancesAnnotation is an annotation set on the PerconaPGCluster to suspend one or more instances. + // The instance names represent the names of the StatefulSets. + // Example: "pgv2.percona.com/suspended-instances=cluster1-abc,cluster2-xyz" + SuspendedInstancesAnnotation = perconaAnnotationPrefix + "suspended-instances" ) From 6043b7ab893d9e4dec8e2e83a9827c2a3c125920 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 16:56:30 +0530 Subject: [PATCH 03/90] add BackupSnapshots feature gate Signed-off-by: Mayank Shah --- internal/feature/features.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/feature/features.go b/internal/feature/features.go index a327dac503..36f76aacf6 100644 --- a/internal/feature/features.go +++ b/internal/feature/features.go @@ -94,6 +94,12 @@ const ( // Support VolumeSnapshots VolumeSnapshots = "VolumeSnapshots" + + // K8SPG-771 + // This feature gate enables the use of snapshot based backups. + // NOTE: This feature is different from VolumeSnapshots which is implemented by + // CrunchyData to perform snapshots of already existing backups. + BackupSnapshots = "BackupSnapshots" ) // NewGate returns a MutableGate with the Features defined in this package. @@ -111,6 +117,7 @@ func NewGate() MutableGate { PGUpgradeCPUConcurrency: {Default: false, PreRelease: featuregate.Alpha}, TablespaceVolumes: {Default: false, PreRelease: featuregate.Alpha}, VolumeSnapshots: {Default: false, PreRelease: featuregate.Alpha}, + BackupSnapshots: {Default: false, PreRelease: featuregate.Alpha}, }); err != nil { panic(err) } From 93c732b28a0c04b24d8e88c5003d74c4a9dacd18 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 18:56:54 +0530 Subject: [PATCH 04/90] implement reconciler logic Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgbackups.yaml | 21 ++ .../pgv2.percona.com_perconapgclusters.yaml | 17 ++ .../pgv2.percona.com_perconapgclusters.yaml | 38 +++ deploy/bundle.yaml | 38 +++ deploy/crd.yaml | 38 +++ deploy/cw-bundle.yaml | 38 +++ go.mod | 1 + go.sum | 288 ++++++++++++++++++ percona/controller/pgbackup/controller.go | 21 +- .../pgbackup/snapshots/reconcile.go | 178 +++++++++++ percona/postgres/common.go | 19 ++ .../v2/perconapgbackup_types.go | 21 ++ .../v2/perconapgcluster_types.go | 26 ++ .../v2/zz_generated.deepcopy.go | 45 +++ 14 files changed, 781 insertions(+), 8 deletions(-) create mode 100644 percona/controller/pgbackup/snapshots/reconcile.go diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index f9aaf1c422..2b63c75643 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -70,6 +70,13 @@ spec: type: object spec: properties: + backupMethod: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -391,6 +398,20 @@ spec: required: - name type: object + snapshot: + properties: + pvcName: + description: PVCName is the name of the PVC that contains the + snapshotted data. + type: string + targetPvcName: + description: TargetPVCName is the name of the source PVC that + is being snapshotted. + type: string + required: + - pvcName + - targetPvcName + type: object state: type: string storageType: diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml index 85a7b5717e..9b7e454e0c 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml @@ -7110,6 +7110,23 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + enabled: + type: boolean + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 8af487e0fd..7daf57fddf 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -69,6 +69,13 @@ spec: type: object spec: properties: + backupMethod: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -390,6 +397,20 @@ spec: required: - name type: object + snapshot: + properties: + pvcName: + description: PVCName is the name of the PVC that contains the + snapshotted data. + type: string + targetPvcName: + description: TargetPVCName is the name of the source PVC that + is being snapshotted. + type: string + required: + - pvcName + - targetPvcName + type: object state: type: string storageType: @@ -7515,6 +7536,23 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + enabled: + type: boolean + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index aa2c93d8bd..cd223c0c12 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -364,6 +364,13 @@ spec: type: object spec: properties: + backupMethod: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -685,6 +692,20 @@ spec: required: - name type: object + snapshot: + properties: + pvcName: + description: PVCName is the name of the PVC that contains the + snapshotted data. + type: string + targetPvcName: + description: TargetPVCName is the name of the source PVC that + is being snapshotted. + type: string + required: + - pvcName + - targetPvcName + type: object state: type: string storageType: @@ -7812,6 +7833,23 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + enabled: + type: boolean + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 68b8a5788f..dc56afaab4 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -364,6 +364,13 @@ spec: type: object spec: properties: + backupMethod: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -685,6 +692,20 @@ spec: required: - name type: object + snapshot: + properties: + pvcName: + description: PVCName is the name of the PVC that contains the + snapshotted data. + type: string + targetPvcName: + description: TargetPVCName is the name of the source PVC that + is being snapshotted. + type: string + required: + - pvcName + - targetPvcName + type: object state: type: string storageType: @@ -7812,6 +7833,23 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + enabled: + type: boolean + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 7c5c730043..2bac85fc5d 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -364,6 +364,13 @@ spec: type: object spec: properties: + backupMethod: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -685,6 +692,20 @@ spec: required: - name type: object + snapshot: + properties: + pvcName: + description: PVCName is the name of the PVC that contains the + snapshotted data. + type: string + targetPvcName: + description: TargetPVCName is the name of the source PVC that + is being snapshotted. + type: string + required: + - pvcName + - targetPvcName + type: object state: type: string storageType: @@ -7812,6 +7833,23 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + enabled: + type: boolean + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups diff --git a/go.mod b/go.mod index 900090711b..747cafd0c6 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/google/go-cmp v0.7.0 github.com/google/uuid v1.6.0 github.com/hashicorp/go-version v1.8.0 + github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0 github.com/onsi/ginkgo/v2 v2.27.5 github.com/onsi/gomega v1.38.3 diff --git a/go.sum b/go.sum index e17a06673f..ffaf3241bb 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,38 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= +cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= +cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= +cloud.google.com/go v0.51.0/go.mod h1:hWtGJ6gnXH+KgDv+V0zFGDvpi07n3z8ZNj3T1RW0Gcw= +cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= +cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= +cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= +cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/Azure/go-autorest/autorest v0.9.0/go.mod h1:xyHB1BMZT0cuDHU7I0+g046+BFDTQ8rEZB0s4Yfa6bI= +github.com/Azure/go-autorest/autorest v0.9.6/go.mod h1:/FALq9T/kS7b5J5qsQ+RSTUdAmGFqi0vUdVNNx8q630= +github.com/Azure/go-autorest/autorest/adal v0.5.0/go.mod h1:8Z9fGy2MpX0PvDjB1pEgQTmVqjGhiHBW7RJJEciWzS0= +github.com/Azure/go-autorest/autorest/adal v0.8.2/go.mod h1:ZjhuQClTqx435SRJ2iMlOxPYt3d2C/T/7TiQCVZSn3Q= +github.com/Azure/go-autorest/autorest/date v0.1.0/go.mod h1:plvfp3oPSKwf2DNjlBjWF/7vwR+cUD/ELuzDCXwHUVA= +github.com/Azure/go-autorest/autorest/date v0.2.0/go.mod h1:vcORJHLJEh643/Ioh9+vPmf1Ij9AEBM5FuBIXLmIy0g= +github.com/Azure/go-autorest/autorest/mocks v0.1.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= +github.com/Azure/go-autorest/autorest/mocks v0.2.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0= +github.com/Azure/go-autorest/autorest/mocks v0.3.0/go.mod h1:a8FDP3DYzQ4RYfVAxAN3SVSiiO77gL2j2ronKKP0syM= +github.com/Azure/go-autorest/logger v0.1.0/go.mod h1:oExouG+K6PryycPJfVSxi/koC6LSNgds39diKLz7Vrc= +github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbtp2fGCgRFtBroKn4Dk= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/Percona-Lab/percona-version-service v0.0.0-20230404081016-ea25e30cdcbc h1:aBpUepmWt8NsLH0fOA6vb8CCvIIJ4jMNxpvR36PaRSs= github.com/Percona-Lab/percona-version-service v0.0.0-20230404081016-ea25e30cdcbc/go.mod h1:dOzRkbUNd/qpn35nOSy60ZQBiP9wa9g1kh9kaMg/tOk= +github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/aws/aws-sdk-go v1.55.8 h1:JRmEUbU52aJQZ2AjX4q4Wu7t4uZjOu71uyNmaWlUkJQ= @@ -16,30 +47,50 @@ github.com/bool64/shared v0.1.6 h1:1u1IfTU84pZU285Mf1kQC5wX/VzSRE5E/+4KgFRGQ6o= github.com/bool64/shared v0.1.6/go.mod h1:AByMlOFBjavJDk8VdFBH/atMgv1q7qrKXD1XLAQTgZA= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= +github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= +github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= +github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/emicklei/go-restful v2.9.5+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE= github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -51,18 +102,29 @@ github.com/go-openapi/analysis v0.24.1 h1:Xp+7Yn/KOnVWYG8d+hPksOYnCYImE3TieBa7rB github.com/go-openapi/analysis v0.24.1/go.mod h1:dU+qxX7QGU1rl7IYhBC8bIfmWQdX4Buoea4TGtxXY84= github.com/go-openapi/errors v0.22.6 h1:eDxcf89O8odEnohIXwEjY1IB4ph5vmbUsBMsFNwXWPo= github.com/go-openapi/errors v0.22.6/go.mod h1:z9S8ASTUqx7+CP1Q8dD8ewGH/1JWFFLX/2PmAYNQLgk= +github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= +github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= +github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.22.1 h1:sHYI1He3b9NqJ4wXLoJDKmUmHkWy/L7rtEo92JUxBNk= github.com/go-openapi/jsonpointer v0.22.1/go.mod h1:pQT9OsLkfz1yWoMgYFy4x3U5GY5nUlsOn1qSBH5MkCM= +github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= +github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= +github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= github.com/go-openapi/jsonreference v0.21.3 h1:96Dn+MRPa0nYAR8DR1E03SblB5FJvh7W6krPI0Z7qMc= github.com/go-openapi/jsonreference v0.21.3/go.mod h1:RqkUP0MrLf37HqxZxrIAtTWW4ZJIK1VzduhXYBEeGc4= github.com/go-openapi/loads v0.23.2 h1:rJXAcP7g1+lWyBHC7iTY+WAF0rprtM+pm8Jxv1uQJp4= github.com/go-openapi/loads v0.23.2/go.mod h1:IEVw1GfRt/P2Pplkelxzj9BYFajiWOtY2nHZNj4UnWY= github.com/go-openapi/runtime v0.29.2 h1:UmwSGWNmWQqKm1c2MGgXVpC2FTGwPDQeUsBMufc5Yj0= github.com/go-openapi/runtime v0.29.2/go.mod h1:biq5kJXRJKBJxTDJXAa00DOTa/anflQPhT0/wmjuy+0= +github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc= +github.com/go-openapi/spec v0.19.3/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= github.com/go-openapi/spec v0.22.1 h1:beZMa5AVQzRspNjvhe5aG1/XyBSMeX1eEOs7dMoXh/k= github.com/go-openapi/spec v0.22.1/go.mod h1:c7aeIQT175dVowfp7FeCvXXnjN/MrpaONStibD2WtDA= github.com/go-openapi/strfmt v0.25.0 h1:7R0RX7mbKLa9EYCTHRcCuIPcaqlyQiWNPTXwClK0saQ= github.com/go-openapi/strfmt v0.25.0/go.mod h1:nNXct7OzbwrMY9+5tLX4I21pzcmE6ccMGXl3jFdPfn8= +github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= +github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.25.4 h1:OyUPUFYDPDBMkqyxOTkqDYFnrhuhi9NR6QVUvIochMU= github.com/go-openapi/swag v0.25.4/go.mod h1:zNfJ9WZABGHCFg2RnY0S4IOkAcVTzJ6z2Bi+Q4i6qFQ= github.com/go-openapi/swag/cmdutils v0.25.4 h1:8rYhB5n6WawR192/BfUu2iVlxqVR9aRgGJP6WaBoW+4= @@ -101,56 +163,110 @@ github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9L github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= +github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3 h1:B+8ClL/kCQkRiU82d9xajRPKYMrB7E0MbtzWVi1K4ns= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3/go.mod h1:NbCUVmiS4foBGBHOYlCT25+YmGpJ32dZPi75pGEUpj4= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3 h1:NmZ1PKzSTQbuGHw9DGPFomqkkLWMC+vZCkfs+FHv1Vg= github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.3/go.mod h1:zQrxl1YP88HQlA6i9c63DSVPFklWpGX4OWAc9bFuaH4= github.com/hashicorp/go-version v1.8.0 h1:KAkNb1HAiZd1ukkxDFGmokVZe1Xy9HG6NUp+bPle2i4= github.com/hashicorp/go-version v1.8.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/iancoleman/orderedmap v0.3.0 h1:5cbR2grmZR/DiVt+VJopEhtVs9YGInGIxAoMJn+Ichc= github.com/iancoleman/orderedmap v0.3.0/go.mod h1:XuLcCUkdL5owUCQeF2Ue9uuw1EptkJDkXXS7VoV7XGE= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 h1:nHHjmvjitIiyPlUHk/ofpgvBcNcawJLtf4PYHORLjAA= +github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0/go.mod h1:YBCo4DoEeDndqvAn6eeu0vWM7QdXmHEeI9cFWplmBys= github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0 h1:bMqrb3UHgHbP+PW9VwiejfDJU1R0PpXVZNMdeH8WYKI= github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0/go.mod h1:E3vdYxHj2C2q6qo8/Da4g7P+IcwqRZyy3gJBzYybV9Y= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.7.0/go.mod h1:KAzv3t3aY1NaHWoQz1+4F1ccyAH66Jk7yos7ldAVICs= github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= @@ -160,19 +276,28 @@ github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVO github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= +github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo/v2 v2.27.5 h1:ZeVgZMx2PDMdJm/+w5fE/OyG6ILo1Y3e+QX4zSR0zTE= github.com/onsi/ginkgo/v2 v2.27.5/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= +github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= +github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.38.3 h1:eTX+W6dobAYfFeGC2PV6RwXRu/MyT+cQguijutvkpSM= github.com/onsi/gomega v1.38.3/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= +github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pganalyze/pg_query_go/v6 v6.1.0 h1:jG5ZLhcVgL1FAw4C/0VNQaVmX1SUJx71wBGdtTtBvls= github.com/pganalyze/pg_query_go/v6 v6.1.0/go.mod h1:nvTHIuoud6e1SfrUaFwHqT0i4b5Nr+1rPWVds3B5+50= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -182,6 +307,7 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= @@ -190,17 +316,22 @@ github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzM github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg= +github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4= github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= @@ -224,6 +355,7 @@ github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6 github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82 h1:BHyfKlQyqbsFN5p3IfnEUduWvb9is428/nNb5L3U01M= github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.mongodb.org/mongo-driver v1.17.6 h1:87JUG1wZfWsr6rIz3ZmpH90rL5tea7O3IHuSwHUpsss= go.mongodb.org/mongo-driver v1.17.6/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= @@ -235,6 +367,9 @@ go.nhat.io/matcher/v2 v2.0.0 h1:W+rbHi0hKuZHtOQH4U5g+KwyKyfVioIxrxjoGRcUETE= go.nhat.io/matcher/v2 v2.0.0/go.mod h1:cL5oYp0M9A4L8jEGqjmUfy+k7AXVDddoVt6aYIL1r5g= go.nhat.io/wait v0.1.0 h1:aQ4YDzaOgFbypiJ9c/eAfOIB1G25VOv7Gd2QS8uz1gw= go.nhat.io/wait v0.1.0/go.mod h1:+ijMghc9/9zXi+HDcs49HNReprvXOZha2Q3jTOtqJrE= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 h1:ssfIgGNANqpVFCndZvcuyKbl0g+UAVcbBcqGkG28H0Y= @@ -268,24 +403,91 @@ go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -298,42 +500,109 @@ golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuX golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200616133436-c1934b75d054/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM= google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= @@ -343,29 +612,48 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +k8s.io/api v0.19.0/go.mod h1:I1K45XlvTrDjmj5LoM5LuP/KYrhWbjUKT/SoPG0qTjw= k8s.io/api v0.35.0 h1:iBAU5LTyBI9vw3L5glmat1njFK34srdLmktWwLTprlY= k8s.io/api v0.35.0/go.mod h1:AQ0SNTzm4ZAczM03QH42c7l3bih1TbAXYo0DkF8ktnA= k8s.io/apiextensions-apiserver v0.35.0 h1:3xHk2rTOdWXXJM+RDQZJvdx0yEOgC0FgQ1PlJatA5T4= k8s.io/apiextensions-apiserver v0.35.0/go.mod h1:E1Ahk9SADaLQ4qtzYFkwUqusXTcaV2uw3l14aqpL2LU= +k8s.io/apimachinery v0.19.0/go.mod h1:DnPGDnARWFvYa3pMHgSxtbZb7gpzzAZ1pTfaUNDVlmA= k8s.io/apimachinery v0.35.0 h1:Z2L3IHvPVv/MJ7xRxHEtk6GoJElaAqDCCU0S6ncYok8= k8s.io/apimachinery v0.35.0/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns= +k8s.io/client-go v0.19.0/go.mod h1:H9E/VT95blcFQnlyShFgnFT9ZnJOAceiUHM3MlRC+mU= k8s.io/client-go v0.35.0 h1:IAW0ifFbfQQwQmga0UdoH0yvdqrbwMdq9vIFEhRpxBE= k8s.io/client-go v0.35.0/go.mod h1:q2E5AAyqcbeLGPdoRB+Nxe3KYTfPce1Dnu1myQdqz9o= +k8s.io/code-generator v0.19.0/go.mod h1:moqLn7w0t9cMs4+5CQyxnfA/HV8MF6aAVENF+WZZhgk= k8s.io/component-base v0.35.0 h1:+yBrOhzri2S1BVqyVSvcM3PtPyx5GUxCK2tinZz1G94= k8s.io/component-base v0.35.0/go.mod h1:85SCX4UCa6SCFt6p3IKAPej7jSnF3L8EbfSyMZayJR0= +k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= +k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= +k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= +k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20200805222855-6aeccd4b50c6/go.mod h1:UuqjUnNftUyPE5H64/qeyjQoUZhGpeFDVdxjTeEVN2o= k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/utils v0.0.0-20200729134348-d5654de09c73/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A= sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 2ef563ac6e..cb6d46962f 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -27,6 +27,7 @@ import ( "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/clientcmd" "github.com/percona/percona-postgresql-operator/v2/percona/controller" + "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgbackup/snapshots" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" "github.com/percona/percona-postgresql-operator/v2/percona/pgbackrest" "github.com/percona/percona-postgresql-operator/v2/percona/watcher" @@ -80,6 +81,18 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re pgBackup.Default() + pgCluster := new(v2.PerconaPGCluster) + if err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: request.Namespace}, pgCluster); err != nil { + if !k8serrors.IsNotFound(err) { + return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") + } + pgCluster = nil + } + + if pgBackup.Spec.BackupMethod == v2.BackupMethodPhysicalSnapshot { + return snapshots.Reconcile(ctx, r.Client, pgBackup, pgCluster) + } + if !pgBackup.DeletionTimestamp.IsZero() || pgBackup.Status.State == v2.BackupFailed { if _, err := runFinalizers(ctx, r.Client, pgBackup); err != nil { return reconcile.Result{}, errors.Wrap(err, "failed to run finalizers") @@ -93,14 +106,6 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re } } - pgCluster := new(v2.PerconaPGCluster) - if err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: request.Namespace}, pgCluster); err != nil { - if !k8serrors.IsNotFound(err) { - return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") - } - pgCluster = nil - } - switch pgBackup.Status.State { case v2.BackupNew: if pgCluster == nil { diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go new file mode 100644 index 0000000000..521b7e60f9 --- /dev/null +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -0,0 +1,178 @@ +package snapshots + +import ( + "context" + "errors" + "fmt" + "time" + + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1" + "github.com/percona/percona-postgresql-operator/v2/internal/feature" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type snapshotExecutor interface { + // Prepare the cluster for performing a snapshot. + // Returns the name of the PVC that will be snapshotted. + prepare(ctx context.Context, pgCluster *v2.PerconaPGCluster) (string, error) + + // Complete the snapshot. + complete(ctx context.Context, pgCluster *v2.PerconaPGCluster) error +} + +// Reconcile backup snapshot +func Reconcile( + ctx context.Context, + cl client.Client, + pgBackup *v2.PerconaPGBackup, + pgCluster *v2.PerconaPGCluster, +) (reconcile.Result, error) { + + log := logging.FromContext(ctx). + WithName("SnapshotReconciler"). + WithValues("backup", pgBackup.Name, "cluster", pgCluster.Name) + + if !feature.Enabled(ctx, feature.VolumeSnapshots) { + log.Info(fmt.Sprintf("Feature gate '%s' is not enabled, skipping snapshot reconciliation", feature.BackupSnapshots)) + return reconcile.Result{}, nil + } + + // TODO: implement executor + var exec snapshotExecutor + + switch pgBackup.Status.State { + case v2.BackupNew: + return handleStateNew(ctx, log, cl, pgBackup, pgCluster) + case v2.BackupStarting: + return handleStateStarting(ctx, log, cl, exec, pgBackup, pgCluster) + case v2.BackupRunning: + return handleStateRunning(ctx, log, exec, cl, pgBackup, pgCluster) + case v2.BackupFailed: + log.Info("Backup failed") + case v2.BackupSucceeded: + log.Info("Backup succeeded") + } + return reconcile.Result{}, nil +} + +// +kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshotclasses,verbs=get;list;watch +func handleStateNew( + ctx context.Context, + log logging.Logger, + cl client.Client, + backup *v2.PerconaPGBackup, + pgCluster *v2.PerconaPGCluster, +) (reconcile.Result, error) { + // Ensure that the volume snapshot class exists. + className := pgCluster.Spec.Backups.VolumeSnapshots.ClassName + if className == "" { + return reconcile.Result{}, errors.New("volume snapshot class name is not set") + } + volumeSnapshotClass := &volumesnapshotv1.VolumeSnapshotClass{} + if err := cl.Get(ctx, client.ObjectKey{Name: className}, volumeSnapshotClass); err != nil { + stsErr := fmt.Errorf("failed to get volume snapshot class: %w", err) + backup.Status.State = v2.BackupFailed + backup.Status.Error = stsErr.Error() + return reconcile.Result{}, stsErr + } + + if pgCluster.Status.State != v2.AppStateReady { + log.Info("Waiting for cluster to be ready before creating snapshot") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + backup.Status.State = v2.BackupStarting + if err := cl.Status().Update(ctx, backup); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + } + log.Info("Backup is starting") + return reconcile.Result{}, nil +} + +func handleStateStarting( + ctx context.Context, + log logging.Logger, + cl client.Client, + exec snapshotExecutor, + backup *v2.PerconaPGBackup, + pgCluster *v2.PerconaPGCluster) (reconcile.Result, error) { + + pvcTarget, err := exec.prepare(ctx, pgCluster) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to prepare for snapshot: %w", err) + } + + backup.Status.State = v2.BackupRunning + backup.Status.Snapshot = &v2.SnapshotStatus{ + TargetPVCName: pvcTarget, + } + + if err := cl.Status().Update(ctx, backup); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + } + log.Info("Creating snapshot") + return reconcile.Result{}, nil +} + +// +kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshots,verbs=get;list;watch;create +func handleStateRunning( + ctx context.Context, + log logging.Logger, + exec snapshotExecutor, + cl client.Client, + backup *v2.PerconaPGBackup, + pgCluster *v2.PerconaPGCluster, +) (reconcile.Result, error) { + volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: backup.GetName(), + Namespace: backup.GetNamespace(), + }, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(pgCluster.Spec.Backups.VolumeSnapshots.ClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: &backup.Status.Snapshot.TargetPVCName, + }, + }, + } + + if err := cl.Create(ctx, volumeSnapshot); client.IgnoreAlreadyExists(err) != nil { + return reconcile.Result{}, fmt.Errorf("failed to create volume snapshot: %w", err) + } + + if err := cl.Get(ctx, client.ObjectKeyFromObject(volumeSnapshot), volumeSnapshot); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get volume snapshot: %w", err) + } + + if backup.Status.Snapshot.PVCName == "" { + backup.Status.Snapshot.PVCName = volumeSnapshot.GetName() + if err := cl.Status().Update(ctx, backup); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + } + } + + switch { + // snapshot is complete and ready to be restored. + case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): + if err := exec.complete(ctx, pgCluster); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) + } + log.Info("Snapshot is complete and ready to be used") + + backup.Status.State = v2.BackupSucceeded + backup.Status.CompletedAt = ptr.To(metav1.Now()) + // error occurred while creating the snapshot. + case volumeSnapshot.Status.Error != nil: + message := volumeSnapshot.Status.Error.Message + return reconcile.Result{}, fmt.Errorf("failed to create volume snapshot: %s", ptr.Deref(message, "")) + } + + // snapshot is still being created. + // TODO: controller should watch the snapshot for changes rather that periodically requeue. + return reconcile.Result{RequeueAfter: time.Second * 5}, nil +} diff --git a/percona/postgres/common.go b/percona/postgres/common.go index 41026ade48..1e57645b4e 100644 --- a/percona/postgres/common.go +++ b/percona/postgres/common.go @@ -54,6 +54,25 @@ func GetPrimaryPod(ctx context.Context, cli client.Client, cr *v2.PerconaPGClust return &podList.Items[0], nil } +// GetReplicaPods lists the replica pods for a given cluster. +func GetReplicaPods(ctx context.Context, cli client.Client, cr *v2.PerconaPGCluster) ([]corev1.Pod, error) { + podList := &corev1.PodList{} + role := "replica" + + err := cli.List(ctx, podList, &client.ListOptions{ + Namespace: cr.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + "app.kubernetes.io/instance": cr.GetName(), + "postgres-operator.crunchydata.com/role": role, + }), + }) + if err != nil { + return nil, errors.Wrap(err, "failed to list pods") + } + + return podList.Items, nil +} + func determineVersion(cr *v2.PerconaPGCluster) string { if cr.CompareVersion("2.7.0") <= 0 { return cr.Status.PatroniVersion diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 15c4433251..7caa62f470 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -47,6 +47,13 @@ type PerconaPGBackupList struct { Items []PerconaPGBackup `json:"items"` } +type BackupMethod string + +const ( + BackupMethodPhysical BackupMethod = "pgbackrest" + BackupMethodPhysicalSnapshot BackupMethod = "volumeSnapshot" +) + type PerconaPGBackupSpec struct { PGCluster string `json:"pgCluster"` @@ -55,6 +62,12 @@ type PerconaPGBackupSpec struct { // +kubebuilder:validation:Pattern=^repo[1-4] RepoName string `json:"repoName"` + // Method with which to perform the backup + // +kubebuilder:validation:Enum={pgbackrest,volumeSnapshot} + // +kubebuilder:default=pgbackrest + // +optional + BackupMethod BackupMethod `json:"backupMethod"` + // Command line options to include when running the pgBackRest backup command. // https://pgbackrest.org/command.html#command-backup // +optional @@ -94,6 +107,14 @@ type PerconaPGBackupStatus struct { BackupName string `json:"backupName,omitempty"` CRVersion string `json:"crVersion,omitempty"` LatestRestorableTime PITRestoreDateTime `json:"latestRestorableTime,omitempty"` + Snapshot *SnapshotStatus `json:"snapshot,omitempty"` +} + +type SnapshotStatus struct { + // PVCName is the name of the PVC that contains the snapshotted data. + PVCName string `json:"pvcName"` + // TargetPVCName is the name of the source PVC that is being snapshotted. + TargetPVCName string `json:"targetPvcName"` } // +kubebuilder:validation:Type=string diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index 613ae8fe15..ed6dfcabac 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -492,6 +492,32 @@ type Backups struct { // Enable tracking latest restorable time TrackLatestRestorableTime *bool `json:"trackLatestRestorableTime,omitempty"` + + // VolumeSnapshots configuration + // +optional + VolumeSnapshots *VolumeSnapshots `json:"volumeSnapshots,omitempty"` +} + +type VolumeSnapshotMode string + +const ( + // VolumeSnapshotModeOffline is the mode for taking offline VolumeSnapshots. + // With this mode, the operator will stop a replica and take a snapshot of the PVC. + VolumeSnapshotModeOffline VolumeSnapshotMode = "offline" +) + +type VolumeSnapshots struct { + Enabled *bool `json:"enabled,omitempty"` + + // Mode of the VolumeSnapshot. + // +kubebuilder:validation:Enum={offline} + // +kubebuilder:default=offline + // +optional + Mode VolumeSnapshotMode `json:"mode,omitempty"` + + // Name of the VolumeSnapshotClass to use. + // +kubebuilder:validation:Required + ClassName string `json:"className"` } func (b Backups) IsEnabled() bool { diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index 1409dcf859..e19ccd1490 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -30,6 +30,11 @@ func (in *Backups) DeepCopyInto(out *Backups) { *out = new(bool) **out = **in } + if in.VolumeSnapshots != nil { + in, out := &in.VolumeSnapshots, &out.VolumeSnapshots + *out = new(VolumeSnapshots) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Backups. @@ -662,6 +667,11 @@ func (in *PerconaPGBackupStatus) DeepCopyInto(out *PerconaPGBackupStatus) { (*in).DeepCopyInto(*out) } in.LatestRestorableTime.DeepCopyInto(&out.LatestRestorableTime) + if in.Snapshot != nil { + in, out := &in.Snapshot, &out.Snapshot + *out = new(SnapshotStatus) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PerconaPGBackupStatus. @@ -1210,3 +1220,38 @@ func (in *ServiceExpose) DeepCopy() *ServiceExpose { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SnapshotStatus) DeepCopyInto(out *SnapshotStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SnapshotStatus. +func (in *SnapshotStatus) DeepCopy() *SnapshotStatus { + if in == nil { + return nil + } + out := new(SnapshotStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VolumeSnapshots) DeepCopyInto(out *VolumeSnapshots) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeSnapshots. +func (in *VolumeSnapshots) DeepCopy() *VolumeSnapshots { + if in == nil { + return nil + } + out := new(VolumeSnapshots) + in.DeepCopyInto(out) + return out +} From bb96b8af8580b9f0319f2782e3f0da37a31e8814 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 20:27:29 +0530 Subject: [PATCH 05/90] implement offline executor Signed-off-by: Mayank Shah --- percona/controller/pgbackup/controller.go | 29 +--- .../controller/pgbackup/snapshots/offline.go | 159 ++++++++++++++++++ .../pgbackup/snapshots/reconcile.go | 90 +++++++--- .../v2/perconapgbackup_types.go | 16 ++ .../v2/perconapgcluster_types.go | 2 +- 5 files changed, 252 insertions(+), 44 deletions(-) create mode 100644 percona/controller/pgbackup/snapshots/offline.go diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index cb6d46962f..073c44d16d 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -23,6 +23,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/clientcmd" @@ -54,6 +55,7 @@ func (r *PGBackupReconciler) SetupWithManager(mgr manager.Manager) error { return (builder.ControllerManagedBy(mgr). For(&v2.PerconaPGBackup{}). WatchesRawSource(source.Channel(r.ExternalChan, &handler.EnqueueRequestForObject{})). + Owns(&volumesnapshotv1.VolumeSnapshot{}). Complete(r)) } @@ -160,7 +162,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re return reconcile.Result{}, errors.Errorf("%s repo not defined", pgBackup.Spec.RepoName) } - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.Destination = getDestination(pgCluster, pgBackup) bcp.Status.Image = pgCluster.Spec.Backups.PGBackRest.Image bcp.Status.Repo = repo @@ -224,7 +226,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re return reconcile.Result{}, errors.Wrap(err, "update PGBackup") } - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupRunning bcp.Status.JobName = job.Name }); err != nil { @@ -238,7 +240,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re if err != nil { // If something has deleted the job even with the finalizer, we should fail the backup. if k8serrors.IsNotFound(err) { - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed }); err != nil { return reconcile.Result{}, errors.Wrap(err, "update PGBackup status") @@ -270,7 +272,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.CompletedAt = job.Status.CompletionTime bcp.Status.State = status }); err != nil { @@ -306,7 +308,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re if err == nil { log.Info("Got latest restorable timestamp", "timestamp", latestRestorableTime) - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.LatestRestorableTime.Time = latestRestorableTime }); err != nil { return reconcile.Result{}, errors.Wrap(err, "update PGBackup status") @@ -478,7 +480,7 @@ func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod, stanzaName = info.Name if pgBackup.Status.BackupName == "" { - if err := updateStatus(ctx, c, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, c, func(bcp *v2.PerconaPGBackup) { bcp.Status.BackupName = backup.Label bcp.Status.BackupType = backup.Type }); err != nil { @@ -727,23 +729,10 @@ func failIfClusterIsNotReady(ctx context.Context, cl client.Client, pgCluster *v log.Info("Cluster is not ready for backup for too long. Setting it's state to Failed") - if err := updateStatus(ctx, cl, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed }); err != nil { return errors.Wrap(err, "update PGBackup status") } return nil } - -func updateStatus(ctx context.Context, cl client.Client, pgBackup *v2.PerconaPGBackup, updateFunc func(bcp *v2.PerconaPGBackup)) error { - return retry.RetryOnConflict(retry.DefaultBackoff, func() error { - bcp := new(v2.PerconaPGBackup) - if err := cl.Get(ctx, client.ObjectKeyFromObject(pgBackup), bcp); err != nil { - return errors.Wrap(err, "get PGBackup") - } - - updateFunc(bcp) - - return cl.Status().Update(ctx, bcp) - }) -} diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go new file mode 100644 index 0000000000..e55cac7863 --- /dev/null +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -0,0 +1,159 @@ +package snapshots + +import ( + "context" + "slices" + "strings" + "time" + + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type offlineExec struct { + cl client.Client +} + +func newOfflineExec(cl client.Client) *offlineExec { + return &offlineExec{ + cl: cl, + } +} + +func (e *offlineExec) prepare(ctx context.Context, pgCluster *v2.PerconaPGCluster) (string, error) { + replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, pgCluster) + if err != nil { + return "", errors.Wrap(err, "failed to get replica pods") + } + if len(replicas) == 0 { + return "", errors.New("no replica pods found") + } + + // sort by name to always get a predictable result + slices.SortFunc(replicas, func(x, y corev1.Pod) int { + return strings.Compare(x.GetName(), y.GetName()) + }) + + targetPod := replicas[0] + annotations := targetPod.GetAnnotations() + targetInstanceName := annotations[naming.LabelInstance] + + if err := e.suspendInstanceAndWait(ctx, targetInstanceName, pgCluster); err != nil { + return "", errors.Wrap(err, "failed to suspend instance") + } + + targetPVC, err := e.getTargetPVC(ctx, targetInstanceName, pgCluster.GetNamespace()) + if err != nil { + return "", errors.Wrap(err, "failed to get target PVC") + } + + return targetPVC, nil +} + +func (e *offlineExec) complete(ctx context.Context, pgCluster *v2.PerconaPGCluster) error { + if err := e.resumeSuspendedInstance(ctx, pgCluster); err != nil { + return errors.Wrap(err, "failed to resume suspended instance") + } + return nil +} + +func (e *offlineExec) suspendInstanceAndWait(ctx context.Context, instanceName string, pgCluster *v2.PerconaPGCluster) error { + // suspend the instance + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + pgCluster := &v2.PerconaPGCluster{} + if err := e.cl.Get(ctx, client.ObjectKeyFromObject(pgCluster), pgCluster); err != nil { + return errors.Wrap(err, "failed to get PGCluster") + } + annotations := pgCluster.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + annotations[naming.SuspendedInstancesAnnotation] = instanceName + pgCluster.SetAnnotations(annotations) + return e.cl.Update(ctx, pgCluster) + }); err != nil { + return errors.Wrap(err, "failed to update PGCluster") + } + + wCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + + // wait for the instance to be suspended + if err := wait.PollUntilContextTimeout(wCtx, 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + pods := &corev1.PodList{} + if err := e.cl.List(ctx, pods, &client.ListOptions{ + Namespace: pgCluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: instanceName, + }), + }); err != nil { + return false, errors.Wrap(err, "failed to list pods") + } + return len(pods.Items) == 0, nil + }); err != nil { + return errors.Wrap(err, "failed to wait for instance to suspend") + } + return nil +} + +func (e *offlineExec) resumeSuspendedInstance(ctx context.Context, pgCluster *v2.PerconaPGCluster) error { + suspendedInstancesVal, ok := pgCluster.GetAnnotations()[naming.SuspendedInstancesAnnotation] + if !ok || suspendedInstancesVal == "" { + return nil + } + + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + annots := pgCluster.GetAnnotations() + delete(annots, naming.SuspendedInstancesAnnotation) + pgCluster.SetAnnotations(annots) + return e.cl.Update(ctx, pgCluster) + }); err != nil { + return errors.Wrap(err, "failed to update PGCluster") + } + + wCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + + suspendedInstances := strings.Split(suspendedInstancesVal, ",") + for _, instanceName := range suspendedInstances { + if err := wait.PollUntilContextTimeout(wCtx, 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { + pods := &corev1.PodList{} + if err := e.cl.List(ctx, pods, &client.ListOptions{ + Namespace: pgCluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: instanceName, + }), + }); err != nil { + return false, errors.Wrap(err, "failed to list pods") + } + return len(pods.Items) == 1, nil + }); err != nil { + return errors.Wrap(err, "failed to wait for instance to resume") + } + } + return nil +} + +func (e *offlineExec) getTargetPVC(ctx context.Context, instanceName, namespace string) (string, error) { + pvcs := &corev1.PersistentVolumeClaimList{} + if err := e.cl.List(ctx, pvcs, &client.ListOptions{ + Namespace: namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: instanceName, + naming.LabelRole: naming.RolePostgresData, + }), + }); err != nil { + return "", errors.Wrap(err, "failed to list PVCs") + } + if len(pvcs.Items) == 0 { + return "", errors.New("no PVC found") + } + return pvcs.Items[0].GetName(), nil +} diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 521b7e60f9..d038eb3bb3 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -13,6 +13,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -20,7 +21,6 @@ type snapshotExecutor interface { // Prepare the cluster for performing a snapshot. // Returns the name of the PVC that will be snapshotted. prepare(ctx context.Context, pgCluster *v2.PerconaPGCluster) (string, error) - // Complete the snapshot. complete(ctx context.Context, pgCluster *v2.PerconaPGCluster) error } @@ -37,14 +37,39 @@ func Reconcile( WithName("SnapshotReconciler"). WithValues("backup", pgBackup.Name, "cluster", pgCluster.Name) + // Do nothing if the feature is not enabled. if !feature.Enabled(ctx, feature.VolumeSnapshots) { log.Info(fmt.Sprintf("Feature gate '%s' is not enabled, skipping snapshot reconciliation", feature.BackupSnapshots)) return reconcile.Result{}, nil } - // TODO: implement executor + // Check if volume snapshots are enabled for this cluster. + if pgCluster.Spec.Backups.VolumeSnapshots == nil || !pgCluster.Spec.Backups.VolumeSnapshots.Enabled { + if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = "Volume snapshots are not enabled for this cluster" + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + return reconcile.Result{}, nil + } + var exec snapshotExecutor + switch pgCluster.Spec.Backups.VolumeSnapshots.Mode { + case v2.VolumeSnapshotModeOffline: + exec = newOfflineExec(cl) + default: + stsErr := fmt.Errorf("invalid or unsupported volume snapshot mode: %s", pgCluster.Spec.Backups.VolumeSnapshots.Mode) + if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = stsErr.Error() + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + return reconcile.Result{}, stsErr + } + switch pgBackup.Status.State { case v2.BackupNew: return handleStateNew(ctx, log, cl, pgBackup, pgCluster) @@ -76,8 +101,12 @@ func handleStateNew( volumeSnapshotClass := &volumesnapshotv1.VolumeSnapshotClass{} if err := cl.Get(ctx, client.ObjectKey{Name: className}, volumeSnapshotClass); err != nil { stsErr := fmt.Errorf("failed to get volume snapshot class: %w", err) - backup.Status.State = v2.BackupFailed - backup.Status.Error = stsErr.Error() + if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = stsErr.Error() + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } return reconcile.Result{}, stsErr } @@ -86,9 +115,10 @@ func handleStateNew( return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - backup.Status.State = v2.BackupStarting - if err := cl.Status().Update(ctx, backup); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupStarting + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } log.Info("Backup is starting") return reconcile.Result{}, nil @@ -107,14 +137,15 @@ func handleStateStarting( return reconcile.Result{}, fmt.Errorf("failed to prepare for snapshot: %w", err) } - backup.Status.State = v2.BackupRunning - backup.Status.Snapshot = &v2.SnapshotStatus{ - TargetPVCName: pvcTarget, + if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupRunning + bcp.Status.Snapshot = &v2.SnapshotStatus{ + TargetPVCName: pvcTarget, + } + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } - if err := cl.Status().Update(ctx, backup); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) - } log.Info("Creating snapshot") return reconcile.Result{}, nil } @@ -140,6 +171,9 @@ func handleStateRunning( }, }, } + if err := controllerutil.SetOwnerReference(backup, volumeSnapshot, cl.Scheme()); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + } if err := cl.Create(ctx, volumeSnapshot); client.IgnoreAlreadyExists(err) != nil { return reconcile.Result{}, fmt.Errorf("failed to create volume snapshot: %w", err) @@ -150,9 +184,10 @@ func handleStateRunning( } if backup.Status.Snapshot.PVCName == "" { - backup.Status.Snapshot.PVCName = volumeSnapshot.GetName() - if err := cl.Status().Update(ctx, backup); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.Snapshot.PVCName = volumeSnapshot.GetName() + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } } @@ -164,15 +199,24 @@ func handleStateRunning( } log.Info("Snapshot is complete and ready to be used") - backup.Status.State = v2.BackupSucceeded - backup.Status.CompletedAt = ptr.To(metav1.Now()) + if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupSucceeded + bcp.Status.CompletedAt = ptr.To(metav1.Now()) + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + // error occurred while creating the snapshot. case volumeSnapshot.Status.Error != nil: - message := volumeSnapshot.Status.Error.Message - return reconcile.Result{}, fmt.Errorf("failed to create volume snapshot: %s", ptr.Deref(message, "")) + message := ptr.Deref(volumeSnapshot.Status.Error.Message, "") + log.Error(nil, "volume snapshot failed", "error", message) + if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = message + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } } - // snapshot is still being created. - // TODO: controller should watch the snapshot for changes rather that periodically requeue. - return reconcile.Result{RequeueAfter: time.Second * 5}, nil + return reconcile.Result{}, nil } diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 7caa62f470..e9bfe12a06 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -1,12 +1,15 @@ package v2 import ( + "context" "encoding/json" "fmt" "time" v "github.com/hashicorp/go-version" + "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -199,3 +202,16 @@ func (b *PerconaPGBackup) CompareVersion(ver string) int { backupVersion := v.Must(v.NewVersion(b.Status.CRVersion)) return backupVersion.Compare(v.Must(v.NewVersion(ver))) } + +func (pgBackup *PerconaPGBackup) UpdateStatus(ctx context.Context, cl client.Client, updateFunc func(bcp *PerconaPGBackup)) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + bcp := new(PerconaPGBackup) + if err := cl.Get(ctx, client.ObjectKeyFromObject(pgBackup), bcp); err != nil { + return errors.Wrap(err, "get PGBackup") + } + + updateFunc(bcp) + + return cl.Status().Update(ctx, bcp) + }) +} diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index ed6dfcabac..7c95812a50 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -507,7 +507,7 @@ const ( ) type VolumeSnapshots struct { - Enabled *bool `json:"enabled,omitempty"` + Enabled bool `json:"enabled,omitempty"` // Mode of the VolumeSnapshot. // +kubebuilder:validation:Enum={offline} From 22cb071b983b08f4e6ac205b8e830546a7b9933c Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 21:00:56 +0530 Subject: [PATCH 06/90] naming improvements Signed-off-by: Mayank Shah --- percona/controller/pgbackup/controller.go | 2 +- percona/controller/pgbackup/snapshots/offline.go | 9 +++------ percona/postgres/common.go | 4 ++-- pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go | 4 ++-- pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go | 7 +------ 5 files changed, 9 insertions(+), 17 deletions(-) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 073c44d16d..09e208758c 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -91,7 +91,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re pgCluster = nil } - if pgBackup.Spec.BackupMethod == v2.BackupMethodPhysicalSnapshot { + if pgBackup.Spec.BackupMethod == v2.BackupMethodVolumeSnapshot { return snapshots.Reconcile(ctx, r.Client, pgBackup, pgCluster) } diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index e55cac7863..c17e9e0274 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -2,7 +2,6 @@ package snapshots import ( "context" - "slices" "strings" "time" @@ -28,6 +27,9 @@ func newOfflineExec(cl client.Client) *offlineExec { } func (e *offlineExec) prepare(ctx context.Context, pgCluster *v2.PerconaPGCluster) (string, error) { + // TODO: for single node clusters, we should use the primary, + // but this is unsafe as it results in downtime during backup. + // We should at least let the user explicilty opt-in for this behaviour. replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, pgCluster) if err != nil { return "", errors.Wrap(err, "failed to get replica pods") @@ -36,11 +38,6 @@ func (e *offlineExec) prepare(ctx context.Context, pgCluster *v2.PerconaPGCluste return "", errors.New("no replica pods found") } - // sort by name to always get a predictable result - slices.SortFunc(replicas, func(x, y corev1.Pod) int { - return strings.Compare(x.GetName(), y.GetName()) - }) - targetPod := replicas[0] annotations := targetPod.GetAnnotations() targetInstanceName := annotations[naming.LabelInstance] diff --git a/percona/postgres/common.go b/percona/postgres/common.go index 1e57645b4e..fae20c55a2 100644 --- a/percona/postgres/common.go +++ b/percona/postgres/common.go @@ -9,6 +9,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) @@ -57,13 +58,12 @@ func GetPrimaryPod(ctx context.Context, cli client.Client, cr *v2.PerconaPGClust // GetReplicaPods lists the replica pods for a given cluster. func GetReplicaPods(ctx context.Context, cli client.Client, cr *v2.PerconaPGCluster) ([]corev1.Pod, error) { podList := &corev1.PodList{} - role := "replica" err := cli.List(ctx, podList, &client.ListOptions{ Namespace: cr.Namespace, LabelSelector: labels.SelectorFromSet(map[string]string{ "app.kubernetes.io/instance": cr.GetName(), - "postgres-operator.crunchydata.com/role": role, + "postgres-operator.crunchydata.com/role": naming.RolePatroniReplica, }), }) if err != nil { diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index e9bfe12a06..f0750f97a7 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -53,8 +53,8 @@ type PerconaPGBackupList struct { type BackupMethod string const ( - BackupMethodPhysical BackupMethod = "pgbackrest" - BackupMethodPhysicalSnapshot BackupMethod = "volumeSnapshot" + BackupMethodPGBackrest BackupMethod = "pgbackrest" + BackupMethodVolumeSnapshot BackupMethod = "volumeSnapshot" ) type PerconaPGBackupSpec struct { diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index e19ccd1490..6ad475b781 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -33,7 +33,7 @@ func (in *Backups) DeepCopyInto(out *Backups) { if in.VolumeSnapshots != nil { in, out := &in.VolumeSnapshots, &out.VolumeSnapshots *out = new(VolumeSnapshots) - (*in).DeepCopyInto(*out) + **out = **in } } @@ -1239,11 +1239,6 @@ func (in *SnapshotStatus) DeepCopy() *SnapshotStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VolumeSnapshots) DeepCopyInto(out *VolumeSnapshots) { *out = *in - if in.Enabled != nil { - in, out := &in.Enabled, &out.Enabled - *out = new(bool) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeSnapshots. From 478f8d99656291ffb74a6b7536d054f228f1942e Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 21:15:02 +0530 Subject: [PATCH 07/90] add to scheme Signed-off-by: Mayank Shah --- cmd/postgres-operator/main.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/postgres-operator/main.go b/cmd/postgres-operator/main.go index 6244d92223..88796a1be7 100644 --- a/cmd/postgres-operator/main.go +++ b/cmd/postgres-operator/main.go @@ -14,6 +14,7 @@ import ( "time" "unicode" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" "go.opentelemetry.io/otel" uzap "go.uber.org/zap" @@ -124,6 +125,8 @@ func main() { // Add Percona custom resource types to scheme assertNoError(v2.AddToScheme(mgr.GetScheme())) + assertNoError(volumesnapshotv1.AddToScheme(mgr.GetScheme())) + // add all PostgreSQL Operator controllers to the runtime manager err = addControllersToManager(ctx, mgr) assertNoError(err) From 8bff1578e760c62db524ad1d571f7c02fa1be8f8 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 21:15:23 +0530 Subject: [PATCH 08/90] fix reconcile state Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index d038eb3bb3..da567ea7f5 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -77,10 +77,8 @@ func Reconcile( return handleStateStarting(ctx, log, cl, exec, pgBackup, pgCluster) case v2.BackupRunning: return handleStateRunning(ctx, log, exec, cl, pgBackup, pgCluster) - case v2.BackupFailed: - log.Info("Backup failed") - case v2.BackupSucceeded: - log.Info("Backup succeeded") + case v2.BackupFailed, v2.BackupSucceeded: + return reconcile.Result{}, nil } return reconcile.Result{}, nil } From 0c906ea6f617b41bdbdfebe11407d858d4e9d995 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 22:29:55 +0530 Subject: [PATCH 09/90] bug fixes and improvements Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgbackups.yaml | 4 +-- percona/controller/pgbackup/controller.go | 2 +- .../controller/pgbackup/snapshots/offline.go | 9 +++-- .../pgbackup/snapshots/reconcile.go | 34 +++++-------------- .../v2/perconapgbackup_types.go | 4 +-- 5 files changed, 20 insertions(+), 33 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index 2b63c75643..09dd500c24 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -401,8 +401,8 @@ spec: snapshot: properties: pvcName: - description: PVCName is the name of the PVC that contains the - snapshotted data. + description: VolumeSnapshotName is the name of the VolumeSnapshot + that contains the snapshotted data. type: string targetPvcName: description: TargetPVCName is the name of the source PVC that diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 09e208758c..23c8611759 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -6,6 +6,7 @@ import ( "slices" "time" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -23,7 +24,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/clientcmd" diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index c17e9e0274..82e8a3ca61 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -39,8 +39,12 @@ func (e *offlineExec) prepare(ctx context.Context, pgCluster *v2.PerconaPGCluste } targetPod := replicas[0] - annotations := targetPod.GetAnnotations() - targetInstanceName := annotations[naming.LabelInstance] + labels := targetPod.GetLabels() + + targetInstanceName := labels[naming.LabelInstance] + if targetInstanceName == "" { + return "", errors.New("target instance name not found on pod labels") + } if err := e.suspendInstanceAndWait(ctx, targetInstanceName, pgCluster); err != nil { return "", errors.Wrap(err, "failed to suspend instance") @@ -64,7 +68,6 @@ func (e *offlineExec) complete(ctx context.Context, pgCluster *v2.PerconaPGClust func (e *offlineExec) suspendInstanceAndWait(ctx context.Context, instanceName string, pgCluster *v2.PerconaPGCluster) error { // suspend the instance if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - pgCluster := &v2.PerconaPGCluster{} if err := e.cl.Get(ctx, client.ObjectKeyFromObject(pgCluster), pgCluster); err != nil { return errors.Wrap(err, "failed to get PGCluster") } diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index da567ea7f5..18730d4cde 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -2,11 +2,10 @@ package snapshots import ( "context" - "errors" "fmt" "time" - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v4/apis/volumesnapshot/v1" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" @@ -38,7 +37,7 @@ func Reconcile( WithValues("backup", pgBackup.Name, "cluster", pgCluster.Name) // Do nothing if the feature is not enabled. - if !feature.Enabled(ctx, feature.VolumeSnapshots) { + if !feature.Enabled(ctx, feature.BackupSnapshots) { log.Info(fmt.Sprintf("Feature gate '%s' is not enabled, skipping snapshot reconciliation", feature.BackupSnapshots)) return reconcile.Result{}, nil } @@ -83,7 +82,6 @@ func Reconcile( return reconcile.Result{}, nil } -// +kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshotclasses,verbs=get;list;watch func handleStateNew( ctx context.Context, log logging.Logger, @@ -91,23 +89,6 @@ func handleStateNew( backup *v2.PerconaPGBackup, pgCluster *v2.PerconaPGCluster, ) (reconcile.Result, error) { - // Ensure that the volume snapshot class exists. - className := pgCluster.Spec.Backups.VolumeSnapshots.ClassName - if className == "" { - return reconcile.Result{}, errors.New("volume snapshot class name is not set") - } - volumeSnapshotClass := &volumesnapshotv1.VolumeSnapshotClass{} - if err := cl.Get(ctx, client.ObjectKey{Name: className}, volumeSnapshotClass); err != nil { - stsErr := fmt.Errorf("failed to get volume snapshot class: %w", err) - if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.State = v2.BackupFailed - bcp.Status.Error = stsErr.Error() - }); updErr != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) - } - return reconcile.Result{}, stsErr - } - if pgCluster.Status.State != v2.AppStateReady { log.Info("Waiting for cluster to be ready before creating snapshot") return reconcile.Result{RequeueAfter: time.Second * 5}, nil @@ -173,7 +154,10 @@ func handleStateRunning( return reconcile.Result{}, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) } - if err := cl.Create(ctx, volumeSnapshot); client.IgnoreAlreadyExists(err) != nil { + if err := cl.Create(ctx, volumeSnapshot); err == nil { + log.Info("Volume snapshot created successfully") + return reconcile.Result{}, nil + } else if client.IgnoreAlreadyExists(err) != nil { return reconcile.Result{}, fmt.Errorf("failed to create volume snapshot: %w", err) } @@ -181,9 +165,9 @@ func handleStateRunning( return reconcile.Result{}, fmt.Errorf("failed to get volume snapshot: %w", err) } - if backup.Status.Snapshot.PVCName == "" { + if backup.Status.Snapshot.VolumeSnapshotName == "" { if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.Snapshot.PVCName = volumeSnapshot.GetName() + bcp.Status.Snapshot.VolumeSnapshotName = volumeSnapshot.GetName() }); updErr != nil { return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } @@ -191,7 +175,7 @@ func handleStateRunning( switch { // snapshot is complete and ready to be restored. - case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): + case volumeSnapshot.Status != nil && ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): if err := exec.complete(ctx, pgCluster); err != nil { return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) } diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index f0750f97a7..d64bdd5b2b 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -114,8 +114,8 @@ type PerconaPGBackupStatus struct { } type SnapshotStatus struct { - // PVCName is the name of the PVC that contains the snapshotted data. - PVCName string `json:"pvcName"` + // VolumeSnapshotName is the name of the VolumeSnapshot that contains the snapshotted data. + VolumeSnapshotName string `json:"pvcName"` // TargetPVCName is the name of the source PVC that is being snapshotted. TargetPVCName string `json:"targetPvcName"` } From 77a96b68847a7bdbcebc787adeed46ac37d6d321 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 22:54:09 +0530 Subject: [PATCH 10/90] naming improvements and fixes Signed-off-by: Mayank Shah --- .../generated/pgv2.percona.com_perconapgbackups.yaml | 12 ++++++------ .../bases/pgv2.percona.com_perconapgclusters.yaml | 12 ++++++------ deploy/crd.yaml | 12 ++++++------ deploy/cw-bundle.yaml | 12 ++++++------ percona/controller/pgbackup/controller.go | 2 +- percona/controller/pgbackup/snapshots/reconcile.go | 10 ++++++++-- .../pgv2.percona.com/v2/perconapgbackup_types.go | 4 ++-- 7 files changed, 35 insertions(+), 29 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index 09dd500c24..e7d50dd6bd 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -70,7 +70,7 @@ spec: type: object spec: properties: - backupMethod: + method: default: pgbackrest description: Method with which to perform the backup enum: @@ -400,17 +400,17 @@ spec: type: object snapshot: properties: - pvcName: - description: VolumeSnapshotName is the name of the VolumeSnapshot - that contains the snapshotted data. - type: string targetPvcName: description: TargetPVCName is the name of the source PVC that is being snapshotted. type: string + volumeSnapshotName: + description: VolumeSnapshotName is the name of the VolumeSnapshot + that contains the snapshotted data. + type: string required: - - pvcName - targetPvcName + - volumeSnapshotName type: object state: type: string diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 7daf57fddf..bf4cc4e70e 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -69,7 +69,7 @@ spec: type: object spec: properties: - backupMethod: + method: default: pgbackrest description: Method with which to perform the backup enum: @@ -399,17 +399,17 @@ spec: type: object snapshot: properties: - pvcName: - description: PVCName is the name of the PVC that contains the - snapshotted data. - type: string targetPvcName: description: TargetPVCName is the name of the source PVC that is being snapshotted. type: string + volumeSnapshotName: + description: VolumeSnapshotName is the name of the VolumeSnapshot + that contains the snapshotted data. + type: string required: - - pvcName - targetPvcName + - volumeSnapshotName type: object state: type: string diff --git a/deploy/crd.yaml b/deploy/crd.yaml index dc56afaab4..331046d74d 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -364,7 +364,7 @@ spec: type: object spec: properties: - backupMethod: + method: default: pgbackrest description: Method with which to perform the backup enum: @@ -694,17 +694,17 @@ spec: type: object snapshot: properties: - pvcName: - description: PVCName is the name of the PVC that contains the - snapshotted data. - type: string targetPvcName: description: TargetPVCName is the name of the source PVC that is being snapshotted. type: string + volumeSnapshotName: + description: VolumeSnapshotName is the name of the VolumeSnapshot + that contains the snapshotted data. + type: string required: - - pvcName - targetPvcName + - volumeSnapshotName type: object state: type: string diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 2bac85fc5d..c6ce24609d 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -364,7 +364,7 @@ spec: type: object spec: properties: - backupMethod: + method: default: pgbackrest description: Method with which to perform the backup enum: @@ -694,17 +694,17 @@ spec: type: object snapshot: properties: - pvcName: - description: PVCName is the name of the PVC that contains the - snapshotted data. - type: string targetPvcName: description: TargetPVCName is the name of the source PVC that is being snapshotted. type: string + volumeSnapshotName: + description: VolumeSnapshotName is the name of the VolumeSnapshot + that contains the snapshotted data. + type: string required: - - pvcName - targetPvcName + - volumeSnapshotName type: object state: type: string diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 23c8611759..11f8a26457 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -91,7 +91,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re pgCluster = nil } - if pgBackup.Spec.BackupMethod == v2.BackupMethodVolumeSnapshot { + if pgBackup.Spec.Method == v2.BackupMethodVolumeSnapshot { return snapshots.Reconcile(ctx, r.Client, pgBackup, pgCluster) } diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 18730d4cde..54fce9d410 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -77,6 +77,7 @@ func Reconcile( case v2.BackupRunning: return handleStateRunning(ctx, log, exec, cl, pgBackup, pgCluster) case v2.BackupFailed, v2.BackupSucceeded: + // TODO: call exec.complete() here? return reconcile.Result{}, nil } return reconcile.Result{}, nil @@ -150,7 +151,7 @@ func handleStateRunning( }, }, } - if err := controllerutil.SetOwnerReference(backup, volumeSnapshot, cl.Scheme()); err != nil { + if err := controllerutil.SetControllerReference(backup, volumeSnapshot, cl.Scheme()); err != nil { return reconcile.Result{}, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) } @@ -174,8 +175,13 @@ func handleStateRunning( } switch { + // no status reported, requeue. + // Note: no need to set a RequeuAfter because the controller watches the child VolumeSnapshot object. + case volumeSnapshot.Status == nil: + return reconcile.Result{}, nil + // snapshot is complete and ready to be restored. - case volumeSnapshot.Status != nil && ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): + case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): if err := exec.complete(ctx, pgCluster); err != nil { return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) } diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index d64bdd5b2b..7507b64fb6 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -69,7 +69,7 @@ type PerconaPGBackupSpec struct { // +kubebuilder:validation:Enum={pgbackrest,volumeSnapshot} // +kubebuilder:default=pgbackrest // +optional - BackupMethod BackupMethod `json:"backupMethod"` + Method BackupMethod `json:"method"` // Command line options to include when running the pgBackRest backup command. // https://pgbackrest.org/command.html#command-backup @@ -115,7 +115,7 @@ type PerconaPGBackupStatus struct { type SnapshotStatus struct { // VolumeSnapshotName is the name of the VolumeSnapshot that contains the snapshotted data. - VolumeSnapshotName string `json:"pvcName"` + VolumeSnapshotName string `json:"volumeSnapshotName"` // TargetPVCName is the name of the source PVC that is being snapshotted. TargetPVCName string `json:"targetPvcName"` } From 7f0587fc2dbaa6ab010134b1c3d36382072f40b7 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 21 Jan 2026 22:54:31 +0530 Subject: [PATCH 11/90] ran make generate Signed-off-by: Mayank Shah --- deploy/bundle.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index cd223c0c12..56adcb740b 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -364,7 +364,7 @@ spec: type: object spec: properties: - backupMethod: + method: default: pgbackrest description: Method with which to perform the backup enum: @@ -694,17 +694,17 @@ spec: type: object snapshot: properties: - pvcName: - description: PVCName is the name of the PVC that contains the - snapshotted data. - type: string targetPvcName: description: TargetPVCName is the name of the source PVC that is being snapshotted. type: string + volumeSnapshotName: + description: VolumeSnapshotName is the name of the VolumeSnapshot + that contains the snapshotted data. + type: string required: - - pvcName - targetPvcName + - volumeSnapshotName type: object state: type: string From 7b33275b20e56b2fd7dcbd4d44becc450d848bcb Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 22 Jan 2026 10:30:06 +0530 Subject: [PATCH 12/90] fix PGBackup field validations Signed-off-by: Mayank Shah --- .../percona/generated/pgv2.percona.com_perconapgbackups.yaml | 4 ++++ config/crd/bases/pgv2.percona.com_perconapgclusters.yaml | 4 ++++ deploy/bundle.yaml | 4 ++++ deploy/crd.yaml | 4 ++++ deploy/cw-bundle.yaml | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index e7d50dd6bd..65f9f7c264 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -95,6 +95,10 @@ spec: - pgCluster - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: (self.method == "" || self.method == "pgbackrest") && self.repoName + == "" status: properties: backupName: diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index bf4cc4e70e..8c497bd618 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -94,6 +94,10 @@ spec: - pgCluster - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: (self.method == "" || self.method == "pgbackrest") && self.repoName + == "" status: properties: backupName: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 56adcb740b..1960b08dde 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -389,6 +389,10 @@ spec: - pgCluster - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: (self.method == "" || self.method == "pgbackrest") && self.repoName + == "" status: properties: backupName: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 331046d74d..bf14575fc2 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -389,6 +389,10 @@ spec: - pgCluster - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: (self.method == "" || self.method == "pgbackrest") && self.repoName + == "" status: properties: backupName: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index c6ce24609d..5d5e0a9938 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -389,6 +389,10 @@ spec: - pgCluster - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: (self.method == "" || self.method == "pgbackrest") && self.repoName + == "" status: properties: backupName: From 7e412fea5c84b5f2ce3fd3ead5e3fedc00eb17ca Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 22 Jan 2026 10:30:32 +0530 Subject: [PATCH 13/90] refactors and stability improvements Signed-off-by: Mayank Shah --- .../pgbackup/snapshots/reconcile.go | 220 ++++++++++++------ percona/naming/finalizers.go | 5 + .../v2/perconapgbackup_types.go | 2 +- 3 files changed, 151 insertions(+), 76 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 54fce9d410..6198a9b34d 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -8,7 +8,9 @@ import ( volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -24,6 +26,39 @@ type snapshotExecutor interface { complete(ctx context.Context, pgCluster *v2.PerconaPGCluster) error } +type snapshotReconciler struct { + cl client.Client + log logging.Logger + cluster *v2.PerconaPGCluster + backup *v2.PerconaPGBackup + exec snapshotExecutor +} + +func newSnapshotReconciler( + cl client.Client, + log logging.Logger, + cluster *v2.PerconaPGCluster, + backup *v2.PerconaPGBackup, + exec snapshotExecutor, +) *snapshotReconciler { + return &snapshotReconciler{ + cl: cl, + log: log, + cluster: cluster, + backup: backup, + exec: exec, + } +} + +func newSnapshotExec(mode v2.VolumeSnapshotMode, cl client.Client) (snapshotExecutor, error) { + switch mode { + case v2.VolumeSnapshotModeOffline: + return newOfflineExec(cl), nil + default: + return nil, fmt.Errorf("invalid or unsupported volume snapshot mode: %s", mode) + } +} + // Reconcile backup snapshot func Reconcile( ctx context.Context, @@ -31,6 +66,9 @@ func Reconcile( pgBackup *v2.PerconaPGBackup, pgCluster *v2.PerconaPGCluster, ) (reconcile.Result, error) { + if pgBackup == nil || pgCluster == nil { + return reconcile.Result{}, errors.New("pgBackup or pgCluster is nil") + } log := logging.FromContext(ctx). WithName("SnapshotReconciler"). @@ -53,12 +91,8 @@ func Reconcile( return reconcile.Result{}, nil } - var exec snapshotExecutor - - switch pgCluster.Spec.Backups.VolumeSnapshots.Mode { - case v2.VolumeSnapshotModeOffline: - exec = newOfflineExec(cl) - default: + exec, err := newSnapshotExec(pgCluster.Spec.Backups.VolumeSnapshots.Mode, cl) + if err != nil { stsErr := fmt.Errorf("invalid or unsupported volume snapshot mode: %s", pgCluster.Spec.Backups.VolumeSnapshots.Mode) if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed @@ -69,138 +103,117 @@ func Reconcile( return reconcile.Result{}, stsErr } - switch pgBackup.Status.State { + r := newSnapshotReconciler(cl, log, pgCluster, pgBackup, exec) + return r.reconcile(ctx) +} + +func (r *snapshotReconciler) reconcile(ctx context.Context) (reconcile.Result, error) { + if !r.backup.GetDeletionTimestamp().IsZero() { + return reconcile.Result{}, r.complete(ctx) + } + + switch r.backup.Status.State { case v2.BackupNew: - return handleStateNew(ctx, log, cl, pgBackup, pgCluster) + return r.reconcileNew(ctx) case v2.BackupStarting: - return handleStateStarting(ctx, log, cl, exec, pgBackup, pgCluster) + return r.reconcileStarting(ctx) case v2.BackupRunning: - return handleStateRunning(ctx, log, exec, cl, pgBackup, pgCluster) - case v2.BackupFailed, v2.BackupSucceeded: - // TODO: call exec.complete() here? + return r.reconcileRunning(ctx) + case v2.BackupFailed: + return reconcile.Result{}, r.complete(ctx) + case v2.BackupSucceeded: return reconcile.Result{}, nil } return reconcile.Result{}, nil } -func handleStateNew( - ctx context.Context, - log logging.Logger, - cl client.Client, - backup *v2.PerconaPGBackup, - pgCluster *v2.PerconaPGCluster, -) (reconcile.Result, error) { - if pgCluster.Status.State != v2.AppStateReady { - log.Info("Waiting for cluster to be ready before creating snapshot") +func (r *snapshotReconciler) reconcileNew(ctx context.Context) (reconcile.Result, error) { + if r.cluster.Status.State != v2.AppStateReady { + r.log.Info("Waiting for cluster to be ready before creating snapshot") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupStarting }); updErr != nil { return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } - log.Info("Backup is starting") + r.log.Info("Snapshot is starting") return reconcile.Result{}, nil } -func handleStateStarting( - ctx context.Context, - log logging.Logger, - cl client.Client, - exec snapshotExecutor, - backup *v2.PerconaPGBackup, - pgCluster *v2.PerconaPGCluster) (reconcile.Result, error) { - - pvcTarget, err := exec.prepare(ctx, pgCluster) - if err != nil { - return reconcile.Result{}, fmt.Errorf("failed to prepare for snapshot: %w", err) +func (r *snapshotReconciler) reconcileStarting(ctx context.Context) (reconcile.Result, error) { + if err := r.prepare(ctx); err != nil { + return reconcile.Result{}, err } - if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupRunning - bcp.Status.Snapshot = &v2.SnapshotStatus{ - TargetPVCName: pvcTarget, - } }); updErr != nil { return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } - - log.Info("Creating snapshot") + r.log.Info("Snapshot is running") return reconcile.Result{}, nil } // +kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshots,verbs=get;list;watch;create -func handleStateRunning( - ctx context.Context, - log logging.Logger, - exec snapshotExecutor, - cl client.Client, - backup *v2.PerconaPGBackup, - pgCluster *v2.PerconaPGCluster, -) (reconcile.Result, error) { +func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Result, error) { volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ ObjectMeta: metav1.ObjectMeta{ - Name: backup.GetName(), - Namespace: backup.GetNamespace(), + Name: r.backup.GetName(), + Namespace: r.backup.GetNamespace(), }, Spec: volumesnapshotv1.VolumeSnapshotSpec{ - VolumeSnapshotClassName: ptr.To(pgCluster.Spec.Backups.VolumeSnapshots.ClassName), + VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), Source: volumesnapshotv1.VolumeSnapshotSource{ - PersistentVolumeClaimName: &backup.Status.Snapshot.TargetPVCName, + PersistentVolumeClaimName: &r.backup.Status.Snapshot.TargetPVCName, }, }, } - if err := controllerutil.SetControllerReference(backup, volumeSnapshot, cl.Scheme()); err != nil { + if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { return reconcile.Result{}, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) } - if err := cl.Create(ctx, volumeSnapshot); err == nil { - log.Info("Volume snapshot created successfully") - return reconcile.Result{}, nil - } else if client.IgnoreAlreadyExists(err) != nil { - return reconcile.Result{}, fmt.Errorf("failed to create volume snapshot: %w", err) + created, err := r.ensureSnapshot(ctx, volumeSnapshot) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to ensure snapshot: %w", err) } - - if err := cl.Get(ctx, client.ObjectKeyFromObject(volumeSnapshot), volumeSnapshot); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to get volume snapshot: %w", err) + if created { + r.log.Info("Volume snapshot created successfully", "snapshot", volumeSnapshot.GetName()) + return reconcile.Result{}, nil // return back later to observe the status } - if backup.Status.Snapshot.VolumeSnapshotName == "" { - if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.Snapshot.VolumeSnapshotName = volumeSnapshot.GetName() - }); updErr != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) - } + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(volumeSnapshot), volumeSnapshot); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get volume snapshot: %w", err) } switch { - // no status reported, requeue. - // Note: no need to set a RequeuAfter because the controller watches the child VolumeSnapshot object. + // no status reported case volumeSnapshot.Status == nil: return reconcile.Result{}, nil // snapshot is complete and ready to be restored. case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): - if err := exec.complete(ctx, pgCluster); err != nil { + if err := r.exec.complete(ctx, r.cluster); err != nil { return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) } - log.Info("Snapshot is complete and ready to be used") - if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupSucceeded bcp.Status.CompletedAt = ptr.To(metav1.Now()) }); updErr != nil { return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } + r.log.Info("Snapshot is complete and ready to be used") // error occurred while creating the snapshot. case volumeSnapshot.Status.Error != nil: message := ptr.Deref(volumeSnapshot.Status.Error.Message, "") - log.Error(nil, "volume snapshot failed", "error", message) - if updErr := backup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + stsErr := fmt.Errorf("volume snapshot failed: %s", message) + r.log.Error(stsErr, "Volume snapshot failed") + if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed - bcp.Status.Error = message + bcp.Status.Error = stsErr.Error() }); updErr != nil { return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } @@ -208,3 +221,60 @@ func handleStateRunning( return reconcile.Result{}, nil } + +func (r *snapshotReconciler) ensureSnapshot(ctx context.Context, volumeSnapshot *volumesnapshotv1.VolumeSnapshot) (bool, error) { + if err := r.cl.Create(ctx, volumeSnapshot); err != nil { + return false, client.IgnoreAlreadyExists(err) + } + + if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + if bcp.Status.Snapshot == nil { + bcp.Status.Snapshot = &v2.SnapshotStatus{} + } + bcp.Status.Snapshot.VolumeSnapshotName = volumeSnapshot.GetName() + }); updErr != nil { + return true, fmt.Errorf("failed to update volumeSnapshot name in backup status: %w", updErr) + } + return true, nil +} + +func (r *snapshotReconciler) prepare(ctx context.Context) error { + if controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) { + return nil + } + + pvcTarget, err := r.exec.prepare(ctx, r.cluster) + if err != nil { + return fmt.Errorf("failed to prepare for snapshot: %w", err) + } + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + if bcp.Status.Snapshot == nil { + bcp.Status.Snapshot = &v2.SnapshotStatus{} + } + bcp.Status.Snapshot.TargetPVCName = pvcTarget + }); err != nil { + return fmt.Errorf("failed to update backup status: %w", err) + } + controllerutil.AddFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) + if err := r.cl.Update(ctx, r.backup); err != nil { + return fmt.Errorf("failed to update backup: %w", err) + } + r.log.Info("Prepared for snapshot") + return nil +} + +func (r *snapshotReconciler) complete(ctx context.Context) error { + if !controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) { + return nil + } + + if err := r.exec.complete(ctx, r.cluster); err != nil { + return fmt.Errorf("complete failed: %w", err) + } + + controllerutil.RemoveFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) + if err := r.cl.Update(ctx, r.backup); err != nil { + return fmt.Errorf("failed to update backup: %w", err) + } + return nil +} diff --git a/percona/naming/finalizers.go b/percona/naming/finalizers.go index d2fa2858bf..37bd64ec9b 100644 --- a/percona/naming/finalizers.go +++ b/percona/naming/finalizers.go @@ -7,6 +7,11 @@ const ( FinalizerStopWatchers = PrefixPerconaInternal + "stop-watchers" //nolint:gosec FinalizerDeleteBackups = PrefixPercona + "delete-backups" + // FinalizerCompleteSnapshot is set on PerconaPGBackup objects. + // It ensures that any changes made to the PGCluster are reverted upon + // snapshot completion (success or failure) or pre-mature deletion of the PGBackup. + FinalizerCompleteSnapshot = PrefixPercona + "complete-snapshot" + FinalizerStopWatchersDeprecated = PrefixPercona + "stop-watchers" //nolint:gosec ) diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 7507b64fb6..3fc9b1d9af 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -57,11 +57,11 @@ const ( BackupMethodVolumeSnapshot BackupMethod = "volumeSnapshot" ) +// +kubebuilder:validation:XValidation:rule="(self.method == \"\" || self.method == \"pgbackrest\") && self.repoName == \"\"",message="repoName is required when method is 'pgbackrest'" type PerconaPGBackupSpec struct { PGCluster string `json:"pgCluster"` // The name of the pgBackRest repo to run the backup command against. - // +kubebuilder:validation:Required // +kubebuilder:validation:Pattern=^repo[1-4] RepoName string `json:"repoName"` From d30df00e4d83ff67e91c9dbfd67373bb2a5670ab Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 22 Jan 2026 10:31:39 +0530 Subject: [PATCH 14/90] update cr.yaml examples Signed-off-by: Mayank Shah --- deploy/backup.yaml | 1 + deploy/cr.yaml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/deploy/backup.yaml b/deploy/backup.yaml index 4ca1ee3725..34d87572fd 100644 --- a/deploy/backup.yaml +++ b/deploy/backup.yaml @@ -5,5 +5,6 @@ metadata: spec: pgCluster: cluster1 repoName: repo1 +# method: volumeSnapshot # options: # - --type=full diff --git a/deploy/cr.yaml b/deploy/cr.yaml index 6fb3524ad6..0010d3a1e7 100644 --- a/deploy/cr.yaml +++ b/deploy/cr.yaml @@ -406,6 +406,10 @@ spec: backups: # trackLatestRestorableTime: true +# volumeSnapshots: +# enabled: true +# mode: offline +# className: VOLUME-SNAPSHOT-CLASS pgbackrest: # metadata: # labels: From 0e2537c4fbd76a0da3dbaeef4aec09e756547343 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 22 Jan 2026 10:35:03 +0530 Subject: [PATCH 15/90] organize imports Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/offline.go | 7 ++++--- percona/controller/pgbackup/snapshots/reconcile.go | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 82e8a3ca61..4f92dbac66 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -5,15 +5,16 @@ import ( "strings" "time" - "github.com/percona/percona-postgresql-operator/v2/internal/naming" - perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" - v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) type offlineExec struct { diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 6198a9b34d..c35f686f58 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -6,16 +6,17 @@ import ( "time" volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" - "github.com/percona/percona-postgresql-operator/v2/internal/feature" - "github.com/percona/percona-postgresql-operator/v2/internal/logging" - pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" - v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/percona/percona-postgresql-operator/v2/internal/feature" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) type snapshotExecutor interface { From 1a551b175285ddff5ccc3e4fe611e379cca7b58d Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 22 Jan 2026 10:37:53 +0530 Subject: [PATCH 16/90] misspells Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/offline.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 4f92dbac66..95f3fe5ce7 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -30,7 +30,7 @@ func newOfflineExec(cl client.Client) *offlineExec { func (e *offlineExec) prepare(ctx context.Context, pgCluster *v2.PerconaPGCluster) (string, error) { // TODO: for single node clusters, we should use the primary, // but this is unsafe as it results in downtime during backup. - // We should at least let the user explicilty opt-in for this behaviour. + // We should at least let the user explicitly opt-in for this behavior. replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, pgCluster) if err != nil { return "", errors.Wrap(err, "failed to get replica pods") From d4e09b5d0011f97ca241ba0fb916065a98a6ef3f Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 22 Jan 2026 15:17:53 +0530 Subject: [PATCH 17/90] improve fencing logic Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgbackups.yaml | 9 +- .../pgv2.percona.com_perconapgclusters.yaml | 9 +- deploy/bundle.yaml | 9 +- deploy/crd.yaml | 9 +- deploy/cw-bundle.yaml | 9 +- .../controller/postgrescluster/controller.go | 12 +- .../controller/postgrescluster/instance.go | 25 -- internal/controller/runtime/pod_client.go | 4 +- internal/naming/annotations.go | 6 - percona/controller/pgbackup/controller.go | 26 +- .../controller/pgbackup/snapshots/offline.go | 242 +++++++++++------- .../pgbackup/snapshots/reconcile.go | 31 ++- percona/controller/pgcluster/backup.go | 5 +- .../controller/pgcluster/controller_test.go | 2 +- percona/controller/pgcluster/schedule.go | 3 +- percona/watcher/wal.go | 8 +- percona/watcher/wal_test.go | 3 +- .../v2/perconapgbackup_types.go | 17 +- .../v2/perconapgbackup_types_test.go | 13 +- .../v2/zz_generated.deepcopy.go | 10 + 20 files changed, 258 insertions(+), 194 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index 65f9f7c264..3a1f9bf4de 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -87,18 +87,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object x-kubernetes-validations: - message: repoName is required when method is 'pgbackrest' - rule: (self.method == "" || self.method == "pgbackrest") && self.repoName - == "" + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 8c497bd618..750b44d840 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -86,18 +86,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object x-kubernetes-validations: - message: repoName is required when method is 'pgbackrest' - rule: (self.method == "" || self.method == "pgbackrest") && self.repoName - == "" + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 1960b08dde..64e633a143 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -381,18 +381,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object x-kubernetes-validations: - message: repoName is required when method is 'pgbackrest' - rule: (self.method == "" || self.method == "pgbackrest") && self.repoName - == "" + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index bf14575fc2..9c0873cc49 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -381,18 +381,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object x-kubernetes-validations: - message: repoName is required when method is 'pgbackrest' - rule: (self.method == "" || self.method == "pgbackrest") && self.repoName - == "" + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 5d5e0a9938..00f8b29e87 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -381,18 +381,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object x-kubernetes-validations: - message: repoName is required when method is 'pgbackrest' - rule: (self.method == "" || self.method == "pgbackrest") && self.repoName - == "" + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: diff --git a/internal/controller/postgrescluster/controller.go b/internal/controller/postgrescluster/controller.go index 6bd0b2e7b3..2bb8cd225f 100644 --- a/internal/controller/postgrescluster/controller.go +++ b/internal/controller/postgrescluster/controller.go @@ -8,7 +8,6 @@ import ( "context" "errors" "fmt" - "io" "time" "go.opentelemetry.io/otel/trace" @@ -61,13 +60,10 @@ type Reconciler struct { DiscoveryClient *discovery.DiscoveryClient IsOpenShift bool Owner client.FieldOwner - PodExec func( - ctx context.Context, namespace, pod, container string, - stdin io.Reader, stdout, stderr io.Writer, command ...string, - ) error - Recorder record.EventRecorder - Registration registration.Registration - Tracer trace.Tracer + PodExec runtime.PodExecutor + Recorder record.EventRecorder + Registration registration.Registration + Tracer trace.Tracer } // +kubebuilder:rbac:groups="",resources="events",verbs={create,patch} diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index fe457ae3f2..b80da339d8 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -12,7 +12,6 @@ import ( "strings" "time" - pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" "github.com/pkg/errors" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -1266,24 +1265,6 @@ func (r *Reconciler) reconcileInstance( return err } -func getSuspendedInstances(cluster *v1beta1.PostgresCluster) []string { - annotations := cluster.GetAnnotations()[pNaming.ToCrunchyAnnotation(naming.SuspendedInstancesAnnotation)] - if annotations == "" { - return []string{} - } - return strings.Split(annotations, ",") -} - -func isInstanceSuspended(cluster *v1beta1.PostgresCluster, instanceName string) bool { - fencedInstances := getSuspendedInstances(cluster) - for _, fencedInstance := range fencedInstances { - if instanceName != "" && fencedInstance == instanceName { - return true - } - } - return false -} - func generateInstanceStatefulSetIntent(_ context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec, @@ -1388,12 +1369,6 @@ func generateInstanceStatefulSetIntent(_ context.Context, sts.Spec.Replicas = initialize.Int32(1) } - // K8SPG-771 - // TODO (mayanshah1607): perform checkpoint before scaling down, especially for primary. - if isInstanceSuspended(cluster, sts.GetName()) { - sts.Spec.Replicas = initialize.Int32(0) - } - // Restart containers any time they stop, die, are killed, etc. // - https://docs.k8s.io/concepts/workloads/pods/pod-lifecycle/#restart-policy sts.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyAlways diff --git a/internal/controller/runtime/pod_client.go b/internal/controller/runtime/pod_client.go index 444b17d6ba..5f04348973 100644 --- a/internal/controller/runtime/pod_client.go +++ b/internal/controller/runtime/pod_client.go @@ -20,7 +20,7 @@ import ( // podExecutor runs command on container in pod in namespace. Non-nil streams // (stdin, stdout, and stderr) are attached the to the remote process. -type podExecutor func( +type PodExecutor func( ctx context.Context, namespace, pod, container string, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error @@ -37,7 +37,7 @@ func newPodClient(config *rest.Config) (rest.Interface, error) { // +kubebuilder:rbac:groups="",resources="pods/exec",verbs={create} -func NewPodExecutor(config *rest.Config) (podExecutor, error) { +func NewPodExecutor(config *rest.Config) (PodExecutor, error) { // Create a copy of the config to avoid modifying the original configCopy := rest.CopyConfig(config) diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index a545becdf1..ec04eb0e9a 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -81,10 +81,4 @@ const ( // is present, the controller will not update the ConfigMap, allowing users to make custom // modifications that won't be overwritten during reconciliation. OverrideConfigAnnotation = perconaAnnotationPrefix + "override-config" - - // K8SPG-771 - // SuspendedInstancesAnnotation is an annotation set on the PerconaPGCluster to suspend one or more instances. - // The instance names represent the names of the StatefulSets. - // Example: "pgv2.percona.com/suspended-instances=cluster1-abc,cluster2-xyz" - SuspendedInstancesAnnotation = perconaAnnotationPrefix + "suspended-instances" ) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 11f8a26457..97c3b3e592 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -15,6 +15,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -24,6 +25,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/clientcmd" @@ -45,13 +47,21 @@ var ErrBackupJobNotFound = errors.New("backup Job not found") // Reconciler holds resources for the PerconaPGBackup reconciler type PGBackupReconciler struct { - Client client.Client + Client client.Client + PodExec runtime.PodExecutor ExternalChan chan event.GenericEvent } // SetupWithManager adds the PerconaPGBackup controller to the provided runtime manager func (r *PGBackupReconciler) SetupWithManager(mgr manager.Manager) error { + if r.PodExec == nil { + var err error + r.PodExec, err = runtime.NewPodExecutor(mgr.GetConfig()) + if err != nil { + return err + } + } return (builder.ControllerManagedBy(mgr). For(&v2.PerconaPGBackup{}). WatchesRawSource(source.Channel(r.ExternalChan, &handler.EnqueueRequestForObject{})). @@ -91,8 +101,8 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re pgCluster = nil } - if pgBackup.Spec.Method == v2.BackupMethodVolumeSnapshot { - return snapshots.Reconcile(ctx, r.Client, pgBackup, pgCluster) + if *pgBackup.Spec.Method == v2.BackupMethodVolumeSnapshot { + return snapshots.Reconcile(ctx, r.Client, r.PodExec, pgBackup, pgCluster) } if !pgBackup.DeletionTimestamp.IsZero() || pgBackup.Status.State == v2.BackupFailed { @@ -418,7 +428,7 @@ func getBackupInProgress(ctx context.Context, c client.Client, clusterName, ns s } func getRepo(pg *v2.PerconaPGCluster, pb *v2.PerconaPGBackup) *v1beta1.PGBackRestRepo { - repoName := pb.Spec.RepoName + repoName := ptr.Deref(pb.Spec.RepoName, "") for i, r := range pg.Spec.Backups.PGBackRest.Repos { if repoName == r.Name { return &pg.Spec.Backups.PGBackRest.Repos[i] @@ -451,7 +461,7 @@ func getDestination(pg *v2.PerconaPGCluster, pb *v2.PerconaPGBackup) string { } func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod, pgBackup *v2.PerconaPGBackup) error { - info, err := pgbackrest.GetInfo(ctx, pod, pgBackup.Spec.RepoName) + info, err := pgbackrest.GetInfo(ctx, pod, ptr.Deref(pgBackup.Spec.RepoName, "")) if err != nil { return errors.Wrap(err, "get pgBackRest info") } @@ -488,7 +498,7 @@ func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod, } } - if err := pgbackrest.SetAnnotationsToBackup(ctx, pod, stanzaName, backup.Label, pgBackup.Spec.RepoName, map[string]string{ + if err := pgbackrest.SetAnnotationsToBackup(ctx, pod, stanzaName, backup.Label, ptr.Deref(pgBackup.Spec.RepoName, ""), map[string]string{ v2.PGBackrestAnnotationJobName: pgBackup.Status.JobName, }); err != nil { return errors.Wrap(err, "set annotations to backup") @@ -643,7 +653,7 @@ func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) e pg.Spec.Backups.PGBackRest.Manual = new(v1beta1.PGBackRestManualBackup) } - pg.Spec.Backups.PGBackRest.Manual.RepoName = pb.Spec.RepoName + pg.Spec.Backups.PGBackRest.Manual.RepoName = ptr.Deref(pb.Spec.RepoName, "") pg.Spec.Backups.PGBackRest.Manual.Options = pb.Spec.Options return c.Update(ctx, pg) @@ -671,7 +681,7 @@ func findBackupJob(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) err := c.List(ctx, jobList, client.InNamespace(pb.Namespace), client.MatchingLabelsSelector{ - Selector: naming.PGBackRestBackupJobSelector(pb.Spec.PGCluster, pb.Spec.RepoName, naming.BackupManual), + Selector: naming.PGBackRestBackupJobSelector(pb.Spec.PGCluster, ptr.Deref(pb.Spec.RepoName, ""), naming.BackupManual), }) if err != nil { return nil, errors.Wrap(err, "get backup jobs") diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 95f3fe5ce7..2aa2e99f57 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -2,7 +2,8 @@ package snapshots import ( "context" - "strings" + "fmt" + "io" "time" "github.com/pkg/errors" @@ -12,140 +13,196 @@ import ( "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) +const ( + annotationBackupTarget = pNaming.PrefixPerconaPGV2 + "backup-target" + + waitTimeout = 5 * time.Minute + retryInterval = 3 * time.Second +) + type offlineExec struct { - cl client.Client + cl client.Client + cluster *v2.PerconaPGCluster + backup *v2.PerconaPGBackup + podExec runtime.PodExecutor } -func newOfflineExec(cl client.Client) *offlineExec { +func newOfflineExec(cl client.Client, podExec runtime.PodExecutor, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { return &offlineExec{ - cl: cl, + cl: cl, + cluster: pgCluster, + backup: pgBackup, + podExec: podExec, } } -func (e *offlineExec) prepare(ctx context.Context, pgCluster *v2.PerconaPGCluster) (string, error) { - // TODO: for single node clusters, we should use the primary, - // but this is unsafe as it results in downtime during backup. - // We should at least let the user explicitly opt-in for this behavior. - replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, pgCluster) +func (e *offlineExec) prepare(ctx context.Context) (string, error) { + targetPod, err := e.getBackupTargetPod(ctx) if err != nil { - return "", errors.Wrap(err, "failed to get replica pods") - } - if len(replicas) == 0 { - return "", errors.New("no replica pods found") + return "", errors.Wrap(err, "failed to get backup target pod") } - targetPod := replicas[0] - labels := targetPod.GetLabels() - - targetInstanceName := labels[naming.LabelInstance] - if targetInstanceName == "" { - return "", errors.New("target instance name not found on pod labels") + if err := e.fenceInstance(ctx, targetPod); err != nil { + return "", errors.Wrap(err, "failed to fence instance") } - if err := e.suspendInstanceAndWait(ctx, targetInstanceName, pgCluster); err != nil { - return "", errors.Wrap(err, "failed to suspend instance") - } - - targetPVC, err := e.getTargetPVC(ctx, targetInstanceName, pgCluster.GetNamespace()) + targetPVC, err := e.getTargetPVC(ctx, targetPod) if err != nil { return "", errors.Wrap(err, "failed to get target PVC") } - return targetPVC, nil } -func (e *offlineExec) complete(ctx context.Context, pgCluster *v2.PerconaPGCluster) error { - if err := e.resumeSuspendedInstance(ctx, pgCluster); err != nil { - return errors.Wrap(err, "failed to resume suspended instance") +func (e *offlineExec) complete(ctx context.Context) error { + targetPod, err := e.getBackupTargetPod(ctx) + if err != nil { + return errors.Wrap(err, "failed to get backup target pod") + } + + if err := e.unfenceInstance(ctx, targetPod); err != nil { + return errors.Wrap(err, "failed to unfence instance") } return nil } -func (e *offlineExec) suspendInstanceAndWait(ctx context.Context, instanceName string, pgCluster *v2.PerconaPGCluster) error { - // suspend the instance - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if err := e.cl.Get(ctx, client.ObjectKeyFromObject(pgCluster), pgCluster); err != nil { - return errors.Wrap(err, "failed to get PGCluster") +func (e *offlineExec) getBackupTargetPod(ctx context.Context) (*corev1.Pod, error) { + // If we already determined it before, use the same pod. + if podName, ok := e.backup.GetAnnotations()[annotationBackupTarget]; ok && podName != "" { + pod := &corev1.Pod{} + if err := e.cl.Get(ctx, client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: podName}, pod); err != nil { + return nil, errors.Wrap(err, "failed to get backup target pod") } - annotations := pgCluster.GetAnnotations() - if annotations == nil { - annotations = make(map[string]string) + return pod, nil + } + + log := logging.FromContext(ctx) + + // TODO: single node clusters do not have replicas. + // We should allow using a primary pod as the backup target. + // Since this is unsafe, we should let the user explicitly opt-in for this behavior. + replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, e.cluster) + if err != nil { + return nil, errors.Wrap(err, "failed to get replica pods") + } + if len(replicas) == 0 { + return nil, errors.New("no replica pods found") + } + targetPod := replicas[0] + + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + bcp := e.backup.DeepCopy() + annots := bcp.GetAnnotations() + if annots == nil { + annots = make(map[string]string) } - annotations[naming.SuspendedInstancesAnnotation] = instanceName - pgCluster.SetAnnotations(annotations) - return e.cl.Update(ctx, pgCluster) + annots[annotationBackupTarget] = targetPod.GetName() + bcp.SetAnnotations(annots) + return e.cl.Update(ctx, bcp) }); err != nil { - return errors.Wrap(err, "failed to update PGCluster") - } - - wCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) - defer cancel() - - // wait for the instance to be suspended - if err := wait.PollUntilContextTimeout(wCtx, 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { - pods := &corev1.PodList{} - if err := e.cl.List(ctx, pods, &client.ListOptions{ - Namespace: pgCluster.GetNamespace(), - LabelSelector: labels.SelectorFromSet(map[string]string{ - naming.LabelInstance: instanceName, - }), - }); err != nil { - return false, errors.Wrap(err, "failed to list pods") + return nil, errors.Wrap(err, "failed to update backup annotations") + } + + log.Info("Selected backup target pod", "pod", targetPod.GetName()) + return targetPod.DeepCopy(), nil +} + +func (e *offlineExec) fenceInstance(ctx context.Context, instancePod *corev1.Pod) error { + // TODO: should we perform a checkpoint? Should it be configurable? What if it takes too long? + cmd := []string{"touch", "/pgdata/sleep-forever"} + if err := e.podExec(ctx, instancePod.GetNamespace(), instancePod.GetName(), naming.ContainerDatabase, nil, io.Discard, nil, cmd...); err != nil { + return fmt.Errorf("failed to run pod exec: %w", err) + } + + // Re-create the pod and wait for database container to be unready. + if err := e.cl.Delete(ctx, instancePod); err != nil { + return fmt.Errorf("failed to delete pod: %w", err) + } + + log := logging.FromContext(ctx) + + if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, false, func(ctx context.Context) (bool, error) { + pod := &corev1.Pod{} + if err := e.cl.Get(ctx, client.ObjectKeyFromObject(instancePod), pod); err != nil { + return false, client.IgnoreNotFound(err) + } + + if pod.Status.Phase != corev1.PodRunning { + return false, nil + } + + databaseReady := false + allOthersReady := true + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.Name == naming.ContainerDatabase { + databaseReady = containerStatus.Ready + continue + } + if !containerStatus.Ready { + allOthersReady = false + } } - return len(pods.Items) == 0, nil + + return allOthersReady && !databaseReady, nil }); err != nil { - return errors.Wrap(err, "failed to wait for instance to suspend") + return errors.Wrap(err, "failed to wait for pod to be unready") } + + log.Info("Instance fenced", "pod", instancePod.GetName()) return nil } -func (e *offlineExec) resumeSuspendedInstance(ctx context.Context, pgCluster *v2.PerconaPGCluster) error { - suspendedInstancesVal, ok := pgCluster.GetAnnotations()[naming.SuspendedInstancesAnnotation] - if !ok || suspendedInstancesVal == "" { - return nil +func (e *offlineExec) unfenceInstance(ctx context.Context, instancePod *corev1.Pod) error { + cmd := []string{"rm", "-f", "/pgdata/sleep-forever"} + if err := e.podExec(ctx, instancePod.GetNamespace(), instancePod.GetName(), naming.ContainerDatabase, nil, io.Discard, nil, cmd...); err != nil { + return fmt.Errorf("failed to run pod exec: %w", err) } - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - annots := pgCluster.GetAnnotations() - delete(annots, naming.SuspendedInstancesAnnotation) - pgCluster.SetAnnotations(annots) - return e.cl.Update(ctx, pgCluster) - }); err != nil { - return errors.Wrap(err, "failed to update PGCluster") - } - - wCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) - defer cancel() - - suspendedInstances := strings.Split(suspendedInstancesVal, ",") - for _, instanceName := range suspendedInstances { - if err := wait.PollUntilContextTimeout(wCtx, 1*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { - pods := &corev1.PodList{} - if err := e.cl.List(ctx, pods, &client.ListOptions{ - Namespace: pgCluster.GetNamespace(), - LabelSelector: labels.SelectorFromSet(map[string]string{ - naming.LabelInstance: instanceName, - }), - }); err != nil { - return false, errors.Wrap(err, "failed to list pods") + log := logging.FromContext(ctx) + + // wait for database container to be ready + if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, false, func(ctx context.Context) (bool, error) { + pod := &corev1.Pod{} + if err := e.cl.Get(ctx, client.ObjectKeyFromObject(instancePod), pod); err != nil { + return false, client.IgnoreNotFound(err) + } + + if pod.Status.Phase != corev1.PodRunning { + return false, nil + } + + // ensure all containers are ready. + for _, containerStatus := range pod.Status.ContainerStatuses { + if !containerStatus.Ready { + return false, nil } - return len(pods.Items) == 1, nil - }); err != nil { - return errors.Wrap(err, "failed to wait for instance to resume") } + + return true, nil + }); err != nil { + return errors.Wrap(err, "failed to wait for pod to be ready") } + + log.Info("Instance unfenced", "pod", instancePod.GetName()) return nil } -func (e *offlineExec) getTargetPVC(ctx context.Context, instanceName, namespace string) (string, error) { +func (e *offlineExec) getTargetPVC(ctx context.Context, targetPod *corev1.Pod) (string, error) { + instanceName := targetPod.GetLabels()[naming.LabelInstance] + if instanceName == "" { + return "", errors.New("cannot determine instance name from pod labels") + } + pvcs := &corev1.PersistentVolumeClaimList{} if err := e.cl.List(ctx, pvcs, &client.ListOptions{ - Namespace: namespace, + Namespace: targetPod.GetNamespace(), LabelSelector: labels.SelectorFromSet(map[string]string{ naming.LabelInstance: instanceName, naming.LabelRole: naming.RolePostgresData, @@ -153,8 +210,15 @@ func (e *offlineExec) getTargetPVC(ctx context.Context, instanceName, namespace }); err != nil { return "", errors.Wrap(err, "failed to list PVCs") } + if len(pvcs.Items) == 0 { return "", errors.New("no PVC found") } + + log := logging.FromContext(ctx) + + if len(pvcs.Items) > 1 { + log.V(1).Info("Multiple PVCs found, using the first one", "pvc", pvcs.Items[0].GetName()) + } return pvcs.Items[0].GetName(), nil } diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index c35f686f58..6ec2131c00 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -13,6 +13,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" @@ -22,9 +23,9 @@ import ( type snapshotExecutor interface { // Prepare the cluster for performing a snapshot. // Returns the name of the PVC that will be snapshotted. - prepare(ctx context.Context, pgCluster *v2.PerconaPGCluster) (string, error) + prepare(ctx context.Context) (string, error) // Complete the snapshot. - complete(ctx context.Context, pgCluster *v2.PerconaPGCluster) error + complete(ctx context.Context) error } type snapshotReconciler struct { @@ -51,10 +52,15 @@ func newSnapshotReconciler( } } -func newSnapshotExec(mode v2.VolumeSnapshotMode, cl client.Client) (snapshotExecutor, error) { - switch mode { +func newSnapshotExec( + cl client.Client, + podExec runtime.PodExecutor, + cluster *v2.PerconaPGCluster, + backup *v2.PerconaPGBackup, +) (snapshotExecutor, error) { + switch mode := cluster.Spec.Backups.VolumeSnapshots.Mode; mode { case v2.VolumeSnapshotModeOffline: - return newOfflineExec(cl), nil + return newOfflineExec(cl, podExec, cluster, backup), nil default: return nil, fmt.Errorf("invalid or unsupported volume snapshot mode: %s", mode) } @@ -64,11 +70,12 @@ func newSnapshotExec(mode v2.VolumeSnapshotMode, cl client.Client) (snapshotExec func Reconcile( ctx context.Context, cl client.Client, + podExec runtime.PodExecutor, pgBackup *v2.PerconaPGBackup, pgCluster *v2.PerconaPGCluster, ) (reconcile.Result, error) { if pgBackup == nil || pgCluster == nil { - return reconcile.Result{}, errors.New("pgBackup or pgCluster is nil") + return reconcile.Result{}, errors.New("pgBackup or pgCluster is nil or not found") } log := logging.FromContext(ctx). @@ -92,7 +99,7 @@ func Reconcile( return reconcile.Result{}, nil } - exec, err := newSnapshotExec(pgCluster.Spec.Backups.VolumeSnapshots.Mode, cl) + exec, err := newSnapshotExec(cl, podExec, pgCluster, pgBackup) if err != nil { stsErr := fmt.Errorf("invalid or unsupported volume snapshot mode: %s", pgCluster.Spec.Backups.VolumeSnapshots.Mode) if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { @@ -120,10 +127,8 @@ func (r *snapshotReconciler) reconcile(ctx context.Context) (reconcile.Result, e return r.reconcileStarting(ctx) case v2.BackupRunning: return r.reconcileRunning(ctx) - case v2.BackupFailed: + case v2.BackupFailed, v2.BackupSucceeded: return reconcile.Result{}, r.complete(ctx) - case v2.BackupSucceeded: - return reconcile.Result{}, nil } return reconcile.Result{}, nil } @@ -195,7 +200,7 @@ func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Re // snapshot is complete and ready to be restored. case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): - if err := r.exec.complete(ctx, r.cluster); err != nil { + if err := r.exec.complete(ctx); err != nil { return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) } @@ -244,7 +249,7 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { return nil } - pvcTarget, err := r.exec.prepare(ctx, r.cluster) + pvcTarget, err := r.exec.prepare(ctx) if err != nil { return fmt.Errorf("failed to prepare for snapshot: %w", err) } @@ -269,7 +274,7 @@ func (r *snapshotReconciler) complete(ctx context.Context) error { return nil } - if err := r.exec.complete(ctx, r.cluster); err != nil { + if err := r.exec.complete(ctx); err != nil { return fmt.Errorf("complete failed: %w", err) } diff --git a/percona/controller/pgcluster/backup.go b/percona/controller/pgcluster/backup.go index 37d19b3557..0da8021893 100644 --- a/percona/controller/pgcluster/backup.go +++ b/percona/controller/pgcluster/backup.go @@ -9,6 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -156,7 +157,7 @@ func reconcileBackupJob(ctx context.Context, cl client.Client, cr *v2.PerconaPGC }, Spec: v2.PerconaPGBackupSpec{ PGCluster: cr.Name, - RepoName: repoName, + RepoName: ptr.To(repoName), }, } if cr.CompareVersion("2.6.0") >= 0 && cr.Spec.Metadata != nil { @@ -217,7 +218,7 @@ func listPGBackups(ctx context.Context, cl client.Reader, cr *v2.PerconaPGCluste // we should not filter by label, because the user can create the resource without the label list := []v2.PerconaPGBackup{} for _, pgBackup := range pbList.Items { - if pgBackup.Spec.PGCluster != cr.Name || pgBackup.Spec.RepoName != repoName { + if pgBackup.Spec.PGCluster != cr.Name || ptr.Deref(pgBackup.Spec.RepoName, "") != repoName { continue } list = append(list, pgBackup) diff --git a/percona/controller/pgcluster/controller_test.go b/percona/controller/pgcluster/controller_test.go index 30de7aece2..b5427d3391 100644 --- a/percona/controller/pgcluster/controller_test.go +++ b/percona/controller/pgcluster/controller_test.go @@ -862,7 +862,7 @@ var _ = Describe("Pause with backup", Ordered, func() { }, Spec: v2.PerconaPGBackupSpec{ PGCluster: crName, - RepoName: "repo1", + RepoName: ptr.To("repo1"), }, } diff --git a/percona/controller/pgcluster/schedule.go b/percona/controller/pgcluster/schedule.go index ad5d65412f..49e2a85e19 100644 --- a/percona/controller/pgcluster/schedule.go +++ b/percona/controller/pgcluster/schedule.go @@ -9,6 +9,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" "github.com/percona/percona-postgresql-operator/v2/internal/controller/postgrescluster" "github.com/percona/percona-postgresql-operator/v2/internal/logging" @@ -120,7 +121,7 @@ func (r *PGClusterReconciler) createScheduledBackup(log logr.Logger, backupName, }, Spec: v2.PerconaPGBackupSpec{ PGCluster: cr.Name, - RepoName: repoName, + RepoName: ptr.To(repoName), Options: []string{"--type=" + backupType}, }, } diff --git a/percona/watcher/wal.go b/percona/watcher/wal.go index 6fa0b93434..b35a9c6728 100644 --- a/percona/watcher/wal.go +++ b/percona/watcher/wal.go @@ -9,6 +9,7 @@ import ( "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" @@ -42,6 +43,11 @@ func WatchCommitTimestamps(ctx context.Context, cli client.Client, eventChan cha return } + // TODO: add support + if cr.Spec.Backups.VolumeSnapshots != nil && cr.Spec.Backups.VolumeSnapshots.Enabled { + return + } + log.Info("Watching commit timestamps") execCli, err := clientcmd.NewClient() @@ -213,7 +219,7 @@ func getBackupStartTimestamp(ctx context.Context, cli client.Client, cr *pgv2.Pe return time.Time{}, errors.Wrap(PrimaryPodNotFound, err.Error()) } - pgbackrestInfo, err := pgbackrest.GetInfo(ctx, primary, backup.Spec.RepoName) + pgbackrestInfo, err := pgbackrest.GetInfo(ctx, primary, ptr.Deref(backup.Spec.RepoName, "")) if err != nil { return time.Time{}, errors.Wrap(err, "get pgbackrest info") } diff --git a/percona/watcher/wal_test.go b/percona/watcher/wal_test.go index f6442391b0..ca95309529 100644 --- a/percona/watcher/wal_test.go +++ b/percona/watcher/wal_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/percona/percona-postgresql-operator/v2/percona/testutils" @@ -331,7 +332,7 @@ func TestGetLatestCommitTimestamp(t *testing.T) { }, Spec: pgv2.PerconaPGBackupSpec{ PGCluster: "test-cluster", - RepoName: "repo1", + RepoName: ptr.To("repo1"), }, }, cluster: &pgv2.PerconaPGCluster{ diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 3fc9b1d9af..41cc0f07d5 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -10,6 +10,7 @@ import ( "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -38,6 +39,7 @@ type PerconaPGBackup struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` + // +kubebuilder:validation:XValidation:rule="self.method == \"volumeSnapshot\" || has(self.repoName)",message="repoName is required when method is 'pgbackrest'" Spec PerconaPGBackupSpec `json:"spec"` Status PerconaPGBackupStatus `json:"status,omitempty"` } @@ -57,19 +59,20 @@ const ( BackupMethodVolumeSnapshot BackupMethod = "volumeSnapshot" ) -// +kubebuilder:validation:XValidation:rule="(self.method == \"\" || self.method == \"pgbackrest\") && self.repoName == \"\"",message="repoName is required when method is 'pgbackrest'" type PerconaPGBackupSpec struct { PGCluster string `json:"pgCluster"` + // +optional // The name of the pgBackRest repo to run the backup command against. + // This is required when method is 'pgbackrest'. // +kubebuilder:validation:Pattern=^repo[1-4] - RepoName string `json:"repoName"` + RepoName *string `json:"repoName,omitempty"` // Method with which to perform the backup // +kubebuilder:validation:Enum={pgbackrest,volumeSnapshot} // +kubebuilder:default=pgbackrest // +optional - Method BackupMethod `json:"method"` + Method *BackupMethod `json:"method,omitempty"` // Command line options to include when running the pgBackRest backup command. // https://pgbackrest.org/command.html#command-backup @@ -192,7 +195,13 @@ const ( ) func (b *PerconaPGBackup) Default() { - b.Spec.Options = append(b.Spec.Options, fmt.Sprintf(`--annotation="%s"="%s"`, PGBackrestAnnotationBackupName, b.Name)) + if b.Spec.Method == nil { + b.Spec.Method = ptr.To(BackupMethodPGBackrest) + } + + if b.Spec.Method == ptr.To(BackupMethodPGBackrest) { + b.Spec.Options = append(b.Spec.Options, fmt.Sprintf(`--annotation="%s"="%s"`, PGBackrestAnnotationBackupName, b.Name)) + } } func (b *PerconaPGBackup) CompareVersion(ver string) int { diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go index e8b27d955c..d45d70d4d6 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" ) func TestPITRestoreDateTime_MarshalJSON(t *testing.T) { @@ -29,25 +30,25 @@ func TestPITRestoreDateTime_MarshalJSON(t *testing.T) { }, "non-pointer zero date time": { data: PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Time{})), + Time: ptr.To(metav1.NewTime(time.Time{})), }, expected: `"0001-01-01 00:00:00.000000+0000"`, }, "pointer zero date time": { data: &PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Time{})), + Time: ptr.To(metav1.NewTime(time.Time{})), }, expected: `"0001-01-01 00:00:00.000000+0000"`, }, "non-pointer with date time": { data: PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), + Time: ptr.To(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), }, expected: `"2025-11-21 13:14:15.345600+0000"`, }, "pointer with date time": { data: &PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), + Time: ptr.To(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), }, expected: `"2025-11-21 13:14:15.345600+0000"`, }, @@ -63,7 +64,3 @@ func TestPITRestoreDateTime_MarshalJSON(t *testing.T) { }) } } - -func ptr[T any](v T) *T { - return &v -} diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index 6ad475b781..53ba7ee95d 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -637,6 +637,16 @@ func (in *PerconaPGBackupList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PerconaPGBackupSpec) DeepCopyInto(out *PerconaPGBackupSpec) { *out = *in + if in.RepoName != nil { + in, out := &in.RepoName, &out.RepoName + *out = new(string) + **out = **in + } + if in.Method != nil { + in, out := &in.Method, &out.Method + *out = new(BackupMethod) + **out = **in + } if in.Options != nil { in, out := &in.Options, &out.Options *out = make([]string, len(*in)) From cdb8f430a6d67f8aa37ad6de6df848767d710f64 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 22 Jan 2026 15:25:13 +0530 Subject: [PATCH 18/90] add extra validation Signed-off-by: Mayank Shah --- percona/controller/pgbackup/controller.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 97c3b3e592..b5b3144a22 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -2,6 +2,7 @@ package pgbackup import ( "context" + "fmt" "path" "slices" "time" @@ -118,6 +119,16 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re } } + if ptr.Deref(pgBackup.Spec.RepoName, "") == "" { + if updErr := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = "repoName is required when method is 'pgbackrest'" + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + return reconcile.Result{}, errors.Errorf("'repoName' is required when method is 'pgbackrest'") + } + switch pgBackup.Status.State { case v2.BackupNew: if pgCluster == nil { @@ -169,7 +180,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re repo := getRepo(pgCluster, pgBackup) if repo == nil { - return reconcile.Result{}, errors.Errorf("%s repo not defined", pgBackup.Spec.RepoName) + return reconcile.Result{}, errors.Errorf("'%s' repo not defined", ptr.Deref(pgBackup.Spec.RepoName, "")) } if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { From c53a00a9e9a5fe9b249a900fd2acc2bcd2dc5ca5 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 11:26:08 +0530 Subject: [PATCH 19/90] improvements to suspended logic Signed-off-by: Mayank Shah --- .../controller/postgrescluster/instance.go | 10 +- .../postgrescluster/instance_test.go | 1 + .../postgrescluster/snapshots_test.go | 8 +- .../controller/pgbackup/snapshots/offline.go | 211 ++++++++---------- .../pgbackup/snapshots/reconcile.go | 49 ++-- percona/naming/annotations.go | 4 + 6 files changed, 148 insertions(+), 135 deletions(-) diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index b80da339d8..ccc6ff76d4 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -37,6 +37,7 @@ import ( "github.com/percona/percona-postgresql-operator/v2/internal/pki" "github.com/percona/percona-postgresql-operator/v2/internal/postgres" "github.com/percona/percona-postgresql-operator/v2/percona/k8s" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -1158,6 +1159,7 @@ func (r *Reconciler) reconcileInstance( ctx = logging.NewContext(ctx, log) existing := instance.DeepCopy() + _, suspended := existing.GetAnnotations()[pNaming.AnnotationInstanceSuspended] *instance = appsv1.StatefulSet{} instance.SetGroupVersionKind(appsv1.SchemeGroupVersion.WithKind("StatefulSet")) instance.Namespace, instance.Name = existing.Namespace, existing.Name @@ -1165,7 +1167,7 @@ func (r *Reconciler) reconcileInstance( if err == nil { generateInstanceStatefulSetIntent(ctx, cluster, spec, clusterPodService.Name, instanceServiceAccount.Name, instance, - numInstancePods) + numInstancePods, suspended) } var ( @@ -1272,6 +1274,7 @@ func generateInstanceStatefulSetIntent(_ context.Context, instanceServiceAccountName string, sts *appsv1.StatefulSet, numInstancePods int, + suspend bool, ) { sts.Annotations = naming.Merge( cluster.Spec.Metadata.GetAnnotationsOrNil(), @@ -1369,6 +1372,11 @@ func generateInstanceStatefulSetIntent(_ context.Context, sts.Spec.Replicas = initialize.Int32(1) } + // K8SPG-771 + if suspend { + sts.Spec.Replicas = initialize.Int32(0) + } + // Restart containers any time they stop, die, are killed, etc. // - https://docs.k8s.io/concepts/workloads/pods/pod-lifecycle/#restart-policy sts.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyAlways diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index e2811f1fee..970e89e3dd 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -1752,6 +1752,7 @@ func TestGenerateInstanceStatefulSetIntent(t *testing.T) { instanceServiceAccountName, sts, test.ip.numInstancePods, + false, ) test.run(t, sts) diff --git a/internal/controller/postgrescluster/snapshots_test.go b/internal/controller/postgrescluster/snapshots_test.go index dddd6bbb4d..766d1cce4a 100644 --- a/internal/controller/postgrescluster/snapshots_test.go +++ b/internal/controller/postgrescluster/snapshots_test.go @@ -500,7 +500,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { // Create instance set and volumes for reconcile sts := &appsv1.StatefulSet{} - generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1) + generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1, false) clusterVolumes := []corev1.PersistentVolumeClaim{} // Reconcile @@ -597,7 +597,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { // Create instance set and volumes for reconcile sts := &appsv1.StatefulSet{} - generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1) + generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1, false) clusterVolumes := []corev1.PersistentVolumeClaim{} // Reconcile @@ -696,7 +696,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { // Setup instances and volumes for reconcile sts := &appsv1.StatefulSet{} - generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1) + generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1, false) clusterVolumes := []corev1.PersistentVolumeClaim{} // Reconcile @@ -764,7 +764,7 @@ func TestDedicatedSnapshotVolumeRestore(t *testing.T) { } sts := &appsv1.StatefulSet{} - generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1) + generateInstanceStatefulSetIntent(ctx, cluster, &cluster.Spec.InstanceSets[0], "pod-service", "service-account", sts, 1, false) currentTime := metav1.Now() backupJob := testBackupJob(cluster, "backup-job-dedicated-snapshot-exists-1") backupJob.Status.CompletionTime = ¤tTime diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 2aa2e99f57..8298eb7e1d 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -2,18 +2,16 @@ package snapshots import ( "context" - "fmt" - "io" "time" "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" @@ -32,177 +30,156 @@ type offlineExec struct { cl client.Client cluster *v2.PerconaPGCluster backup *v2.PerconaPGBackup - podExec runtime.PodExecutor } -func newOfflineExec(cl client.Client, podExec runtime.PodExecutor, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { +func newOfflineExec(cl client.Client, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { return &offlineExec{ cl: cl, cluster: pgCluster, backup: pgBackup, - podExec: podExec, } } func (e *offlineExec) prepare(ctx context.Context) (string, error) { - targetPod, err := e.getBackupTargetPod(ctx) + targetInstance, err := e.getBackupTarget(ctx) if err != nil { return "", errors.Wrap(err, "failed to get backup target pod") } - if err := e.fenceInstance(ctx, targetPod); err != nil { - return "", errors.Wrap(err, "failed to fence instance") + if err := e.suspendInstance(ctx, targetInstance); err != nil { + return "", errors.Wrap(err, "failed to suspend instance") } - targetPVC, err := e.getTargetPVC(ctx, targetPod) + targetPVC, err := e.getTargetPVC(ctx, targetInstance) if err != nil { return "", errors.Wrap(err, "failed to get target PVC") } return targetPVC, nil } -func (e *offlineExec) complete(ctx context.Context) error { - targetPod, err := e.getBackupTargetPod(ctx) - if err != nil { - return errors.Wrap(err, "failed to get backup target pod") - } - - if err := e.unfenceInstance(ctx, targetPod); err != nil { - return errors.Wrap(err, "failed to unfence instance") - } - return nil -} - -func (e *offlineExec) getBackupTargetPod(ctx context.Context) (*corev1.Pod, error) { - // If we already determined it before, use the same pod. - if podName, ok := e.backup.GetAnnotations()[annotationBackupTarget]; ok && podName != "" { - pod := &corev1.Pod{} - if err := e.cl.Get(ctx, client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: podName}, pod); err != nil { - return nil, errors.Wrap(err, "failed to get backup target pod") - } - return pod, nil - } - - log := logging.FromContext(ctx) - - // TODO: single node clusters do not have replicas. - // We should allow using a primary pod as the backup target. - // Since this is unsafe, we should let the user explicitly opt-in for this behavior. - replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, e.cluster) - if err != nil { - return nil, errors.Wrap(err, "failed to get replica pods") - } - if len(replicas) == 0 { - return nil, errors.New("no replica pods found") +func (e *offlineExec) suspendInstance(ctx context.Context, instanceName string) error { + sts := &appsv1.StatefulSet{} + if err := e.cl.Get(ctx, client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName}, sts); err != nil { + return errors.Wrap(err, "failed to get stateful set") } - targetPod := replicas[0] if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - bcp := e.backup.DeepCopy() - annots := bcp.GetAnnotations() + orig := sts.DeepCopy() + annots := sts.GetAnnotations() if annots == nil { annots = make(map[string]string) } - annots[annotationBackupTarget] = targetPod.GetName() - bcp.SetAnnotations(annots) - return e.cl.Update(ctx, bcp) + annots[pNaming.AnnotationInstanceSuspended] = "" + sts.SetAnnotations(annots) + return e.cl.Patch(ctx, sts, client.MergeFrom(orig)) }); err != nil { - return nil, errors.Wrap(err, "failed to update backup annotations") + return errors.Wrap(err, "failed to update stateful set annotations") } - log.Info("Selected backup target pod", "pod", targetPod.GetName()) - return targetPod.DeepCopy(), nil + // wait for suspension + if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, true, func(ctx context.Context) (bool, error) { + if err := e.cl.Get(ctx, client.ObjectKey{ + Namespace: e.cluster.GetNamespace(), + Name: instanceName, + }, sts); err != nil { + return false, errors.Wrap(err, "failed to get stateful set") + } + return sts.Status.Replicas == 0 && sts.Status.ReadyReplicas == 0, nil + }); err != nil { + return errors.Wrap(err, "failed to wait for suspension") + } + return nil } -func (e *offlineExec) fenceInstance(ctx context.Context, instancePod *corev1.Pod) error { - // TODO: should we perform a checkpoint? Should it be configurable? What if it takes too long? - cmd := []string{"touch", "/pgdata/sleep-forever"} - if err := e.podExec(ctx, instancePod.GetNamespace(), instancePod.GetName(), naming.ContainerDatabase, nil, io.Discard, nil, cmd...); err != nil { - return fmt.Errorf("failed to run pod exec: %w", err) +func (e *offlineExec) resumeInstance(ctx context.Context, instanceName string) error { + sts := &appsv1.StatefulSet{} + if err := e.cl.Get(ctx, client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName}, sts); err != nil { + return errors.Wrap(err, "failed to get stateful set") } - // Re-create the pod and wait for database container to be unready. - if err := e.cl.Delete(ctx, instancePod); err != nil { - return fmt.Errorf("failed to delete pod: %w", err) + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + orig := sts.DeepCopy() + annots := sts.GetAnnotations() + delete(annots, pNaming.AnnotationInstanceSuspended) + sts.SetAnnotations(annots) + return e.cl.Patch(ctx, sts, client.MergeFrom(orig)) + }); err != nil { + return errors.Wrap(err, "failed to update stateful set annotations") } - log := logging.FromContext(ctx) - - if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, false, func(ctx context.Context) (bool, error) { - pod := &corev1.Pod{} - if err := e.cl.Get(ctx, client.ObjectKeyFromObject(instancePod), pod); err != nil { - return false, client.IgnoreNotFound(err) + // wait for resume + if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, true, func(ctx context.Context) (bool, error) { + if err := e.cl.Get(ctx, client.ObjectKey{ + Namespace: e.cluster.GetNamespace(), + Name: instanceName, + }, sts); err != nil { + return false, errors.Wrap(err, "failed to get stateful set") } - - if pod.Status.Phase != corev1.PodRunning { - return false, nil - } - - databaseReady := false - allOthersReady := true - for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.Name == naming.ContainerDatabase { - databaseReady = containerStatus.Ready - continue - } - if !containerStatus.Ready { - allOthersReady = false - } - } - - return allOthersReady && !databaseReady, nil + return sts.Status.Replicas > 0 && sts.Status.ReadyReplicas > 0, nil }); err != nil { - return errors.Wrap(err, "failed to wait for pod to be unready") + return errors.Wrap(err, "failed to wait for suspension") + } + return nil +} + +func (e *offlineExec) finalize(ctx context.Context) error { + targetInstance, err := e.getBackupTarget(ctx) + if err != nil { + return errors.Wrap(err, "failed to get backup target") } - log.Info("Instance fenced", "pod", instancePod.GetName()) + if err := e.resumeInstance(ctx, targetInstance); err != nil { + return errors.Wrap(err, "failed to resume instance") + } return nil } -func (e *offlineExec) unfenceInstance(ctx context.Context, instancePod *corev1.Pod) error { - cmd := []string{"rm", "-f", "/pgdata/sleep-forever"} - if err := e.podExec(ctx, instancePod.GetNamespace(), instancePod.GetName(), naming.ContainerDatabase, nil, io.Discard, nil, cmd...); err != nil { - return fmt.Errorf("failed to run pod exec: %w", err) +func (e *offlineExec) getBackupTarget(ctx context.Context) (string, error) { + // If we already determined it before, use it. + if name, ok := e.backup.GetAnnotations()[annotationBackupTarget]; ok && name != "" { + return name, nil } log := logging.FromContext(ctx) - // wait for database container to be ready - if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, false, func(ctx context.Context) (bool, error) { - pod := &corev1.Pod{} - if err := e.cl.Get(ctx, client.ObjectKeyFromObject(instancePod), pod); err != nil { - return false, client.IgnoreNotFound(err) - } - - if pod.Status.Phase != corev1.PodRunning { - return false, nil - } + // TODO: single node clusters do not have replicas. + // We should allow using a primary pod as the backup target. + // Since this is unsafe, we should let the user explicitly opt-in for this behavior. + replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, e.cluster) + if err != nil { + return "", errors.Wrap(err, "failed to get replica pods") + } + if len(replicas) == 0 { + return "", errors.New("no replica pods found") + } + targetPod := replicas[0] + instanceName := targetPod.GetLabels()[naming.LabelInstance] + if instanceName == "" { + return "", errors.New("cannot determine instance name from pod labels") + } - // ensure all containers are ready. - for _, containerStatus := range pod.Status.ContainerStatuses { - if !containerStatus.Ready { - return false, nil - } + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + orig := e.backup.DeepCopy() + bcp := e.backup.DeepCopy() + annots := bcp.GetAnnotations() + if annots == nil { + annots = make(map[string]string) } - - return true, nil + annots[annotationBackupTarget] = instanceName + bcp.SetAnnotations(annots) + return e.cl.Patch(ctx, bcp, client.MergeFrom(orig)) }); err != nil { - return errors.Wrap(err, "failed to wait for pod to be ready") + return "", errors.Wrap(err, "failed to update backup annotations") } - log.Info("Instance unfenced", "pod", instancePod.GetName()) - return nil + log.Info("Selected backup target", "instance", instanceName) + return instanceName, nil } -func (e *offlineExec) getTargetPVC(ctx context.Context, targetPod *corev1.Pod) (string, error) { - instanceName := targetPod.GetLabels()[naming.LabelInstance] - if instanceName == "" { - return "", errors.New("cannot determine instance name from pod labels") - } - +func (e *offlineExec) getTargetPVC(ctx context.Context, instanceName string) (string, error) { pvcs := &corev1.PersistentVolumeClaimList{} if err := e.cl.List(ctx, pvcs, &client.ListOptions{ - Namespace: targetPod.GetNamespace(), + Namespace: e.cluster.GetNamespace(), LabelSelector: labels.SelectorFromSet(map[string]string{ naming.LabelInstance: instanceName, naming.LabelRole: naming.RolePostgresData, diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 6ec2131c00..599a3aba70 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -8,6 +8,7 @@ import ( volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -25,7 +26,7 @@ type snapshotExecutor interface { // Returns the name of the PVC that will be snapshotted. prepare(ctx context.Context) (string, error) // Complete the snapshot. - complete(ctx context.Context) error + finalize(ctx context.Context) error } type snapshotReconciler struct { @@ -54,13 +55,12 @@ func newSnapshotReconciler( func newSnapshotExec( cl client.Client, - podExec runtime.PodExecutor, cluster *v2.PerconaPGCluster, backup *v2.PerconaPGBackup, ) (snapshotExecutor, error) { switch mode := cluster.Spec.Backups.VolumeSnapshots.Mode; mode { case v2.VolumeSnapshotModeOffline: - return newOfflineExec(cl, podExec, cluster, backup), nil + return newOfflineExec(cl, cluster, backup), nil default: return nil, fmt.Errorf("invalid or unsupported volume snapshot mode: %s", mode) } @@ -99,7 +99,7 @@ func Reconcile( return reconcile.Result{}, nil } - exec, err := newSnapshotExec(cl, podExec, pgCluster, pgBackup) + exec, err := newSnapshotExec(cl, pgCluster, pgBackup) if err != nil { stsErr := fmt.Errorf("invalid or unsupported volume snapshot mode: %s", pgCluster.Spec.Backups.VolumeSnapshots.Mode) if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { @@ -200,7 +200,7 @@ func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Re // snapshot is complete and ready to be restored. case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): - if err := r.exec.complete(ctx); err != nil { + if err := r.exec.finalize(ctx); err != nil { return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) } @@ -245,14 +245,18 @@ func (r *snapshotReconciler) ensureSnapshot(ctx context.Context, volumeSnapshot } func (r *snapshotReconciler) prepare(ctx context.Context) error { + // finalizer already present, prepare already completed if controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) { return nil } + // prepare the cluster pvcTarget, err := r.exec.prepare(ctx) if err != nil { return fmt.Errorf("failed to prepare for snapshot: %w", err) } + + // update snapshot status if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { if bcp.Status.Snapshot == nil { bcp.Status.Snapshot = &v2.SnapshotStatus{} @@ -261,26 +265,45 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { }); err != nil { return fmt.Errorf("failed to update backup status: %w", err) } - controllerutil.AddFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) - if err := r.cl.Update(ctx, r.backup); err != nil { - return fmt.Errorf("failed to update backup: %w", err) + + // add finalizer + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + bcp := r.backup.DeepCopy() + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(bcp), bcp); err != nil { + return err + } + orig := bcp.DeepCopy() + controllerutil.AddFinalizer(bcp, pNaming.FinalizerCompleteSnapshot) + return r.cl.Patch(ctx, bcp, client.MergeFrom(orig)) + }); err != nil { + return fmt.Errorf("failed to add backup finalizer: %w", err) } r.log.Info("Prepared for snapshot") return nil } func (r *snapshotReconciler) complete(ctx context.Context) error { + // already finalized if !controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) { return nil } - if err := r.exec.complete(ctx); err != nil { - return fmt.Errorf("complete failed: %w", err) + // run finalize + if err := r.exec.finalize(ctx); err != nil { + return fmt.Errorf("finalize failed: %w", err) } - controllerutil.RemoveFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) - if err := r.cl.Update(ctx, r.backup); err != nil { - return fmt.Errorf("failed to update backup: %w", err) + // remove finalizer + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + bcp := r.backup.DeepCopy() + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(bcp), bcp); err != nil { + return err + } + orig := bcp.DeepCopy() + controllerutil.RemoveFinalizer(bcp, pNaming.FinalizerCompleteSnapshot) + return r.cl.Patch(ctx, bcp, client.MergeFrom(orig)) + }); err != nil { + return fmt.Errorf("failed to add remove finalizer: %w", err) } return nil } diff --git a/percona/naming/annotations.go b/percona/naming/annotations.go index dadb4242c8..accfbc80fa 100644 --- a/percona/naming/annotations.go +++ b/percona/naming/annotations.go @@ -46,4 +46,8 @@ const ( // Special annotation to disable `patroni-version-check` by overriding the patroni version with a custom value. AnnotationCustomPatroniVersion = PrefixPerconaPGV2 + "custom-patroni-version" + + // AnnotationInstanceSuspended must be set on the instance StatefulSet to mark + // the instance as suspended. + AnnotationInstanceSuspended = PrefixPerconaPGV2 + "instance-suspended" ) From 77bb6a82d409521a09e12e4cbb269e78d8b0f505 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 11:28:45 +0530 Subject: [PATCH 20/90] finalizer renaming Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile.go | 8 ++++---- percona/naming/finalizers.go | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 599a3aba70..13fd85550e 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -246,7 +246,7 @@ func (r *snapshotReconciler) ensureSnapshot(ctx context.Context, volumeSnapshot func (r *snapshotReconciler) prepare(ctx context.Context) error { // finalizer already present, prepare already completed - if controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) { + if controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerSnapshotInProgress) { return nil } @@ -273,7 +273,7 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { return err } orig := bcp.DeepCopy() - controllerutil.AddFinalizer(bcp, pNaming.FinalizerCompleteSnapshot) + controllerutil.AddFinalizer(bcp, pNaming.FinalizerSnapshotInProgress) return r.cl.Patch(ctx, bcp, client.MergeFrom(orig)) }); err != nil { return fmt.Errorf("failed to add backup finalizer: %w", err) @@ -284,7 +284,7 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { func (r *snapshotReconciler) complete(ctx context.Context) error { // already finalized - if !controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerCompleteSnapshot) { + if !controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerSnapshotInProgress) { return nil } @@ -300,7 +300,7 @@ func (r *snapshotReconciler) complete(ctx context.Context) error { return err } orig := bcp.DeepCopy() - controllerutil.RemoveFinalizer(bcp, pNaming.FinalizerCompleteSnapshot) + controllerutil.RemoveFinalizer(bcp, pNaming.FinalizerSnapshotInProgress) return r.cl.Patch(ctx, bcp, client.MergeFrom(orig)) }); err != nil { return fmt.Errorf("failed to add remove finalizer: %w", err) diff --git a/percona/naming/finalizers.go b/percona/naming/finalizers.go index 37bd64ec9b..6521c94dfd 100644 --- a/percona/naming/finalizers.go +++ b/percona/naming/finalizers.go @@ -7,11 +7,6 @@ const ( FinalizerStopWatchers = PrefixPerconaInternal + "stop-watchers" //nolint:gosec FinalizerDeleteBackups = PrefixPercona + "delete-backups" - // FinalizerCompleteSnapshot is set on PerconaPGBackup objects. - // It ensures that any changes made to the PGCluster are reverted upon - // snapshot completion (success or failure) or pre-mature deletion of the PGBackup. - FinalizerCompleteSnapshot = PrefixPercona + "complete-snapshot" - FinalizerStopWatchersDeprecated = PrefixPercona + "stop-watchers" //nolint:gosec ) @@ -23,6 +18,11 @@ const ( // PerconaPGBackup finalizers const ( FinalizerDeleteBackup = PrefixPerconaInternal + "delete-backup" //nolint:gosec + + // FinalizerSnapshotInProgress is set on PerconaPGBackup objects. + // It ensures that any changes made to the PGCluster are reverted upon + // snapshot completion (success or failure) or pre-mature deletion of the PGBackup. + FinalizerSnapshotInProgress = PrefixPercona + "snapshot-in-progress" //nolint:gosec ) // PerconaPGBackup job finalizers From c8a2daadaaf8c37b712ddb4862a0ef36bb6cf457 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 11:48:15 +0530 Subject: [PATCH 21/90] remove enabled field Signed-off-by: Mayank Shah --- .../generated/pgv2.percona.com_perconapgclusters.yaml | 2 -- config/crd/bases/pgv2.percona.com_perconapgclusters.yaml | 2 -- deploy/bundle.yaml | 2 -- deploy/crd.yaml | 2 -- deploy/cw-bundle.yaml | 2 -- percona/controller/pgbackup/snapshots/reconcile.go | 2 +- percona/watcher/wal.go | 2 +- pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go | 6 ++++-- 8 files changed, 6 insertions(+), 14 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml index 9b7e454e0c..7e0a13138b 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml @@ -7116,8 +7116,6 @@ spec: className: description: Name of the VolumeSnapshotClass to use. type: string - enabled: - type: boolean mode: default: offline description: Mode of the VolumeSnapshot. diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 750b44d840..fafcc93045 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -7545,8 +7545,6 @@ spec: className: description: Name of the VolumeSnapshotClass to use. type: string - enabled: - type: boolean mode: default: offline description: Mode of the VolumeSnapshot. diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 64e633a143..e33e9ef08a 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -7842,8 +7842,6 @@ spec: className: description: Name of the VolumeSnapshotClass to use. type: string - enabled: - type: boolean mode: default: offline description: Mode of the VolumeSnapshot. diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 9c0873cc49..4d4ee80e6e 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -7842,8 +7842,6 @@ spec: className: description: Name of the VolumeSnapshotClass to use. type: string - enabled: - type: boolean mode: default: offline description: Mode of the VolumeSnapshot. diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 00f8b29e87..8506de5cb2 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -7842,8 +7842,6 @@ spec: className: description: Name of the VolumeSnapshotClass to use. type: string - enabled: - type: boolean mode: default: offline description: Mode of the VolumeSnapshot. diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 13fd85550e..da2b6b16aa 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -89,7 +89,7 @@ func Reconcile( } // Check if volume snapshots are enabled for this cluster. - if pgCluster.Spec.Backups.VolumeSnapshots == nil || !pgCluster.Spec.Backups.VolumeSnapshots.Enabled { + if !pgCluster.Spec.Backups.IsVolumeSnapshotsEnabled() { if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed bcp.Status.Error = "Volume snapshots are not enabled for this cluster" diff --git a/percona/watcher/wal.go b/percona/watcher/wal.go index b35a9c6728..1f40a67d4a 100644 --- a/percona/watcher/wal.go +++ b/percona/watcher/wal.go @@ -44,7 +44,7 @@ func WatchCommitTimestamps(ctx context.Context, cli client.Client, eventChan cha } // TODO: add support - if cr.Spec.Backups.VolumeSnapshots != nil && cr.Spec.Backups.VolumeSnapshots.Enabled { + if cr.Spec.Backups.IsVolumeSnapshotsEnabled() { return } diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index 7c95812a50..43ae96246b 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -507,8 +507,6 @@ const ( ) type VolumeSnapshots struct { - Enabled bool `json:"enabled,omitempty"` - // Mode of the VolumeSnapshot. // +kubebuilder:validation:Enum={offline} // +kubebuilder:default=offline @@ -520,6 +518,10 @@ type VolumeSnapshots struct { ClassName string `json:"className"` } +func (b Backups) IsVolumeSnapshotsEnabled() bool { + return b.VolumeSnapshots != nil && b.VolumeSnapshots.ClassName != "" +} + func (b Backups) IsEnabled() bool { return b.Enabled == nil || *b.Enabled } From 808fd855071b1002d278891b3715b2c6b9171785 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 12:55:08 +0530 Subject: [PATCH 22/90] implement snapshot schedules Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgclusters.yaml | 7 ++ .../pgv2.percona.com_perconapgclusters.yaml | 7 ++ deploy/bundle.yaml | 7 ++ deploy/cr.yaml | 1 + deploy/crd.yaml | 7 ++ deploy/cw-bundle.yaml | 7 ++ internal/naming/names.go | 8 ++ .../pgbackup/snapshots/reconcile.go | 2 +- percona/controller/pgcluster/schedule.go | 86 +++++++++++++++++++ .../v2/perconapgcluster_types.go | 7 ++ .../v2/zz_generated.deepcopy.go | 7 +- 11 files changed, 144 insertions(+), 2 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml index 7e0a13138b..7ce5f9bba5 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml @@ -7122,6 +7122,13 @@ spec: enum: - offline type: string + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string required: - className type: object diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index fafcc93045..fb0e66e43c 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -7551,6 +7551,13 @@ spec: enum: - offline type: string + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string required: - className type: object diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index e33e9ef08a..13c8933a35 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -7848,6 +7848,13 @@ spec: enum: - offline type: string + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string required: - className type: object diff --git a/deploy/cr.yaml b/deploy/cr.yaml index 0010d3a1e7..cbdc4f0a0d 100644 --- a/deploy/cr.yaml +++ b/deploy/cr.yaml @@ -410,6 +410,7 @@ spec: # enabled: true # mode: offline # className: VOLUME-SNAPSHOT-CLASS +# schedule: "0 0 * * 6" pgbackrest: # metadata: # labels: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 4d4ee80e6e..5dd2bf44a7 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -7848,6 +7848,13 @@ spec: enum: - offline type: string + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string required: - className type: object diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 8506de5cb2..3fb85f021a 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -7848,6 +7848,13 @@ spec: enum: - offline type: string + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string required: - className type: object diff --git a/internal/naming/names.go b/internal/naming/names.go index 74f484c54d..52fe1ed119 100644 --- a/internal/naming/names.go +++ b/internal/naming/names.go @@ -473,6 +473,14 @@ func PGBackRestCronJob(cluster *v1beta1.PostgresCluster, backuptype, repoName st } } +// PGBackRestCronJob returns the ObjectMeta for a pgBackRest CronJob +func VolumeSnapshotCronJob(cluster *v1beta1.PostgresCluster) metav1.ObjectMeta { + return metav1.ObjectMeta{ + Namespace: cluster.GetNamespace(), + Name: cluster.Name + "-snapshot", + } +} + // PGBackRestRestoreJob returns the ObjectMeta for a pgBackRest restore Job func PGBackRestRestoreJob(cluster *v1beta1.PostgresCluster) metav1.ObjectMeta { return metav1.ObjectMeta{ diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index da2b6b16aa..7fb3c0d2a9 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -221,7 +221,7 @@ func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Re bcp.Status.State = v2.BackupFailed bcp.Status.Error = stsErr.Error() }); updErr != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + return reconcile.Result{}, nil } } diff --git a/percona/controller/pgcluster/schedule.go b/percona/controller/pgcluster/schedule.go index 49e2a85e19..df35fd3c14 100644 --- a/percona/controller/pgcluster/schedule.go +++ b/percona/controller/pgcluster/schedule.go @@ -12,6 +12,7 @@ import ( "k8s.io/utils/ptr" "github.com/percona/percona-postgresql-operator/v2/internal/controller/postgrescluster" + "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" @@ -29,6 +30,12 @@ func (r *PGClusterReconciler) reconcileScheduledBackups(ctx context.Context, cr } } } + + if cr.Spec.Backups.IsVolumeSnapshotsEnabled() && feature.Enabled(ctx, feature.BackupSnapshots) { + if err := r.reconcileScheduledSnapshots(ctx, cr, cr.Spec.Backups.VolumeSnapshots.Schedule); err != nil { + return errors.Wrapf(err, "failed to reconcile scheduled snapshots") + } + } return nil } @@ -137,3 +144,82 @@ func (r *PGClusterReconciler) createScheduledBackup(log logr.Logger, backupName, } return nil } + +func (r *PGClusterReconciler) createScheduledSnapshotFunc(log logr.Logger, backupName, namespace, clusterName string) func() { + return func() { + if err := r.createScheduledSnapshot(log, backupName, namespace, clusterName); err != nil { + log.Error(err, "failed to create a scheduled snapshot") + } + } +} + +func (r *PGClusterReconciler) createScheduledSnapshot(log logr.Logger, backupName, namespace, clusterName string) error { + ctx := context.Background() + + cr := &v2.PerconaPGCluster{} + if err := r.Client.Get(ctx, types.NamespacedName{ + Namespace: namespace, + Name: clusterName, + }, cr); err != nil { + if k8serrors.IsNotFound(err) { + log.Info("cluster is not found, deleting the job", "name", backupName, "cluster", cr.Name, "namespace", cr.Namespace) + + r.Cron.DeleteBackupJob(backupName, namespace) + return nil + } + return err + } + if cr.Status.State != v2.AppStateReady { + log.Info("Cluster is not ready. Can't start scheduled snapshot") + return nil + } + + pb := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: backupName + "-", + Namespace: namespace, + }, + Spec: v2.PerconaPGBackupSpec{ + PGCluster: cr.Name, + Method: ptr.To(v2.BackupMethodVolumeSnapshot), + }, + } + + if cr.Spec.Metadata != nil { + pb.Annotations = cr.Spec.Metadata.Annotations + pb.Labels = cr.Spec.Metadata.Labels + } + + err := r.Client.Create(ctx, pb) + if err != nil { + return errors.Wrapf(err, "failed to create PerconaPGBackup %s", backupName) + } + return nil +} + +func (r *PGClusterReconciler) reconcileScheduledSnapshots( + ctx context.Context, + cr *v2.PerconaPGCluster, + schedule *string) error { + log := logging.FromContext(ctx) + + name := naming.VolumeSnapshotCronJob(&v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: cr.Name, + Namespace: cr.Namespace, + }}) + + if schedule == nil || *schedule == "" { + r.Cron.DeleteBackupJob(name.Name, name.Namespace) + return nil + } + + createBackupFunc := r.createScheduledSnapshotFunc(log, name.Name, cr.Namespace, cr.Name) + + if err := r.Cron.ApplyBackupJob(name.Name, name.Namespace, *schedule, createBackupFunc); err != nil { + log.Error(err, "failed to create a cron for a scheduled snapshot job") + return nil + } + + return nil +} diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index 43ae96246b..96875f9661 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -516,6 +516,13 @@ type VolumeSnapshots struct { // Name of the VolumeSnapshotClass to use. // +kubebuilder:validation:Required ClassName string `json:"className"` + + // Defines the Cron schedule for a VolumeSnapshot. + // Follows the standard Cron schedule syntax: + // https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + // +optional + // +kubebuilder:validation:MinLength=6 + Schedule *string `json:"schedule,omitempty"` } func (b Backups) IsVolumeSnapshotsEnabled() bool { diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index 53ba7ee95d..ff1027ccf0 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -33,7 +33,7 @@ func (in *Backups) DeepCopyInto(out *Backups) { if in.VolumeSnapshots != nil { in, out := &in.VolumeSnapshots, &out.VolumeSnapshots *out = new(VolumeSnapshots) - **out = **in + (*in).DeepCopyInto(*out) } } @@ -1249,6 +1249,11 @@ func (in *SnapshotStatus) DeepCopy() *SnapshotStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VolumeSnapshots) DeepCopyInto(out *VolumeSnapshots) { *out = *in + if in.Schedule != nil { + in, out := &in.Schedule, &out.Schedule + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeSnapshots. From ef509d7e8a46ccf957814cb947d3c906421bafa1 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 13:03:22 +0530 Subject: [PATCH 23/90] linting Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile.go | 2 +- percona/controller/pgcluster/schedule.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 7fb3c0d2a9..da2b6b16aa 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -221,7 +221,7 @@ func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Re bcp.Status.State = v2.BackupFailed bcp.Status.Error = stsErr.Error() }); updErr != nil { - return reconcile.Result{}, nil + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } } diff --git a/percona/controller/pgcluster/schedule.go b/percona/controller/pgcluster/schedule.go index df35fd3c14..125c5921de 100644 --- a/percona/controller/pgcluster/schedule.go +++ b/percona/controller/pgcluster/schedule.go @@ -218,7 +218,7 @@ func (r *PGClusterReconciler) reconcileScheduledSnapshots( if err := r.Cron.ApplyBackupJob(name.Name, name.Namespace, *schedule, createBackupFunc); err != nil { log.Error(err, "failed to create a cron for a scheduled snapshot job") - return nil + return err } return nil From 2d93ca5fec49fe413a6fd2954c52f24d02d232e4 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 15:52:53 +0530 Subject: [PATCH 24/90] implement in-place restore Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgrestores.yaml | 7 +- .../pgv2.percona.com_perconapgclusters.yaml | 7 +- deploy/bundle.yaml | 7 +- deploy/crd.yaml | 7 +- deploy/cw-bundle.yaml | 7 +- percona/controller/pgcluster/restore.go | 3 +- percona/controller/pgrestore/controller.go | 21 +- .../pgrestore/snapshot/reconcile.go | 424 ++++++++++++++++++ percona/naming/finalizers.go | 3 +- .../v2/perconapgrestore_types.go | 30 +- .../v2/zz_generated.deepcopy.go | 5 + 11 files changed, 505 insertions(+), 16 deletions(-) create mode 100644 percona/controller/pgrestore/snapshot/reconcile.go diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml index e1bc4d36b7..fb834cea73 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml @@ -74,10 +74,15 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + volumeSnapshotName: + description: The name of the VolumeSnapshot to perform restore from. + type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotName must be set + rule: has(self.repoName) || self.volumeSnapshotName != "" status: properties: completed: diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index fb0e66e43c..d5491fee15 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -21999,10 +21999,15 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + volumeSnapshotName: + description: The name of the VolumeSnapshot to perform restore from. + type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotName must be set + rule: has(self.repoName) || self.volumeSnapshotName != "" status: properties: completed: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 13c8933a35..f235cfccac 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -22298,10 +22298,15 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + volumeSnapshotName: + description: The name of the VolumeSnapshot to perform restore from. + type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotName must be set + rule: has(self.repoName) || self.volumeSnapshotName != "" status: properties: completed: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 5dd2bf44a7..b26439eac3 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -22298,10 +22298,15 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + volumeSnapshotName: + description: The name of the VolumeSnapshot to perform restore from. + type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotName must be set + rule: has(self.repoName) || self.volumeSnapshotName != "" status: properties: completed: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 3fb85f021a..27cda7b257 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -22298,10 +22298,15 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + volumeSnapshotName: + description: The name of the VolumeSnapshot to perform restore from. + type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotName must be set + rule: has(self.repoName) || self.volumeSnapshotName != "" status: properties: completed: diff --git a/percona/controller/pgcluster/restore.go b/percona/controller/pgcluster/restore.go index d941aa05b5..d5cc2fdfb1 100644 --- a/percona/controller/pgcluster/restore.go +++ b/percona/controller/pgcluster/restore.go @@ -5,6 +5,7 @@ import ( k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "github.com/percona/percona-postgresql-operator/v2/internal/naming" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" @@ -39,7 +40,7 @@ func (r *PGClusterReconciler) createBootstrapRestoreObject(ctx context.Context, }, Spec: v2.PerconaPGRestoreSpec{ PGCluster: cr.Name, - RepoName: repoName, + RepoName: ptr.To(repoName), }, } if cr.CompareVersion("2.6.0") >= 0 && cr.Spec.Metadata != nil { diff --git a/percona/controller/pgrestore/controller.go b/percona/controller/pgrestore/controller.go index 0061744463..93162e990c 100644 --- a/percona/controller/pgrestore/controller.go +++ b/percona/controller/pgrestore/controller.go @@ -10,6 +10,7 @@ import ( k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -19,6 +20,7 @@ import ( "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/controller" + "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/snapshot" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -62,6 +64,17 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, err } + pgCluster := &v2.PerconaPGCluster{} + err := r.Client.Get(ctx, types.NamespacedName{Name: pgRestore.Spec.PGCluster, Namespace: request.Namespace}, pgCluster) + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") + } + + if pgRestore.Spec.VolumeSnapshotName != "" { + // Delegate to snapshot restore reconciliation + return snapshot.Reconcile(ctx, r.Client, pgCluster, pgRestore) + } + if pgRestore.DeletionTimestamp != nil { if err := runFinalizers(ctx, r.Client, pgRestore); err != nil { return reconcile.Result{}, errors.Wrap(err, "failed to run finalizers") @@ -73,12 +86,6 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, nil } - pgCluster := &v2.PerconaPGCluster{} - err := r.Client.Get(ctx, types.NamespacedName{Name: pgRestore.Spec.PGCluster, Namespace: request.Namespace}, pgCluster) - if err != nil { - return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") - } - switch pgRestore.Status.State { case v2.RestoreNew: if restore := pgCluster.Spec.Backups.PGBackRest.Restore; restore != nil && *restore.Enabled { @@ -228,7 +235,7 @@ func startRestore(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, tvar := true pg.Spec.Backups.PGBackRest.Restore.Enabled = &tvar - pg.Spec.Backups.PGBackRest.Restore.RepoName = pr.Spec.RepoName + pg.Spec.Backups.PGBackRest.Restore.RepoName = ptr.Deref(pr.Spec.RepoName, "") pg.Spec.Backups.PGBackRest.Restore.Options = pr.Spec.Options if err := c.Patch(ctx, pg, client.MergeFrom(orig)); err != nil { diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go new file mode 100644 index 0000000000..e583717608 --- /dev/null +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -0,0 +1,424 @@ +package snapshot + +import ( + "context" + "fmt" + "time" + + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/yaml" + + "github.com/percona/percona-postgresql-operator/v2/internal/feature" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + "github.com/percona/percona-postgresql-operator/v2/percona/controller" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" +) + +type snapshotRestorer struct { + cl client.Client + log logging.Logger + cluster *v2.PerconaPGCluster + restore *v2.PerconaPGRestore +} + +func newSnapshotRestorer( + cl client.Client, + log logging.Logger, + cluster *v2.PerconaPGCluster, + restore *v2.PerconaPGRestore, +) *snapshotRestorer { + return &snapshotRestorer{ + cl: cl, + log: log, + cluster: cluster, + restore: restore, + } +} + +func Reconcile( + ctx context.Context, + c client.Client, + pg *v2.PerconaPGCluster, + restore *v2.PerconaPGRestore, +) (reconcile.Result, error) { + log := logging.FromContext(ctx).WithName("SnapshotRestorer") + + if !feature.Enabled(ctx, feature.BackupSnapshots) { + log.Info(fmt.Sprintf("Feature gate '%s' is not enabled, skipping snapshot restore", feature.BackupSnapshots)) + return reconcile.Result{}, nil + } + + r := newSnapshotRestorer(c, log, pg, restore) + + if !restore.GetDeletionTimestamp().IsZero() { + if ok, err := r.runFinalizers(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "run finalizers") + } else if !ok { + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + return reconcile.Result{}, nil + } + + switch restore.Status.State { + case v2.RestoreNew: + return r.reconcileNew(ctx) + case v2.RestoreStarting: + return r.reconcileStarting(ctx) + case v2.RestoreRunning: + return r.reconcileRunning(ctx) + case v2.RestoreSucceeded, v2.RestoreFailed: + ok, err := r.runFinalizers(ctx) + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "run finalizers") + } + if !ok { + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + return reconcile.Result{}, nil + } + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) reconcileNew(ctx context.Context) (reconcile.Result, error) { + if restore := r.cluster.Spec.Backups.PGBackRest.Restore; restore != nil && *restore.Enabled { + r.log.Info("Waiting for another restore to finish") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + restores := &v2.PerconaPGRestoreList{} + if err := r.cl.List(ctx, restores, client.InNamespace(r.cluster.Namespace)); err != nil { + return reconcile.Result{}, errors.Wrap(err, "list restores") + } + for _, restore := range restores.Items { + if restore.Spec.PGCluster != r.cluster.Name || restore.IsCompleted() || restore.GetName() == r.restore.GetName() { + continue + } + r.log.Info("Waiting for another restore to finish") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreStarting + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update restore status") + } + r.log.Info("Snapshot restore is starting") + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Result, error) { + // Check if specified volume snapshot exists + volumeSnapshotName := r.restore.Spec.VolumeSnapshotName + volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{} + if err := r.cl.Get(ctx, types.NamespacedName{Name: volumeSnapshotName, Namespace: r.cluster.Namespace}, volumeSnapshot); err != nil { + if k8serrors.IsNotFound(err) { + r.log.Info("Volume snapshot not found, failing restore") + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreFailed + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update restore status") + } + return reconcile.Result{}, nil + } + return reconcile.Result{}, errors.Wrap(err, "get volume snapshot") + } + + // pausing the cluster so the PVCs are unmounted and can be re-created. + if ok, err := r.pauseCluster(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "pause cluster") + } else if !ok { + r.log.Info("Waiting for cluster to be paused") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + if err := r.ensureFinalizers(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "ensure finalizers") + } + + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreRunning + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update restore status") + } + + r.log.Info("Snapshot restore is running") + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Result, error) { + volumeSnapshotName := r.restore.Spec.VolumeSnapshotName + clusterPVCs, err := r.listPVCs(ctx) + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "list PVCs") + } + + for _, pvc := range clusterPVCs.Items { + if ok, err := r.replacePVC(ctx, &pvc, volumeSnapshotName); err != nil { + return reconcile.Result{}, errors.Wrap(err, "replace PVC") + } else if !ok { + r.log.Info("Waiting for PVC to be replaced", "pvc", pvc.GetName()) + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + } + + // Start the cluster + if ok, err := r.resumeCluster(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "resume cluster") + } else if !ok { + r.log.Info("Waiting for cluster to be ready") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + // TODO: Implement PiTR + + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreSucceeded + restore.Status.CompletedAt = ptr.To(metav1.Now()) + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update restore status") + } + + r.log.Info("Snapshot restore complete") + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) pvcStateConfigMapName(restoreName string) metav1.ObjectMeta { + return metav1.ObjectMeta{ + Namespace: r.cluster.Namespace, + Name: fmt.Sprintf("pvc-state-%s-%s", r.cluster.Name, restoreName), + } +} + +// listPVCs retrieves the list of PVCs that need to be restored from snapshots. +// +// This function maintains a ConfigMap that stores the PVC specifications before +// they are deleted and recreated. This serves two critical purposes: +// +// 1. If the operator restarts during a restore operation, the +// ConfigMap preserves the list of PVCs that still need to be processed, +// allowing the restore to continue seamlessly. +// 2. During restore, PVCs are deleted and recreated from +// snapshots. Once deleted, the original PVC specifications are lost. By storing +// the state externally, the operator can identidy which PVCs need to be tracked. +func (r *snapshotRestorer) listPVCs( + ctx context.Context, +) (*corev1.PersistentVolumeClaimList, error) { + cm := &corev1.ConfigMap{ + ObjectMeta: r.pvcStateConfigMapName(r.restore.Name), + } + + result := &corev1.PersistentVolumeClaimList{} + + cmKey := "pvcs.yaml" + err := r.cl.Get(ctx, client.ObjectKeyFromObject(cm), cm) + + switch { + // ConfigMap was found. + case err == nil: + data := cm.Data[cmKey] + if err := yaml.Unmarshal([]byte(data), result); err != nil { + return nil, errors.Wrap(err, "unmarshal PVC state") + } + return result, nil + + // ConfigMap was not found, list the PVCs and create the ConfigMap. + case k8serrors.IsNotFound(err): + if err := r.cl.List(ctx, result, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.GetName(), + naming.LabelRole: naming.RolePostgresData, + }), + }); err != nil { + return nil, errors.Wrap(err, "list instance PVCs") + } + data, err := yaml.Marshal(result) + if err != nil { + return nil, errors.Wrap(err, "marshal PVC state") + } + cm.Data = map[string]string{cmKey: string(data)} + // TODO: add a finalizer on this configmap? + return result, r.cl.Create(ctx, cm) + } + return nil, err +} + +func (r *snapshotRestorer) replacePVC( + ctx context.Context, + pvc *corev1.PersistentVolumeClaim, + snapshotName string, +) (bool, error) { + observedPVC := &corev1.PersistentVolumeClaim{} + err := r.cl.Get(ctx, client.ObjectKeyFromObject(pvc), observedPVC) + + if k8serrors.IsNotFound(err) { + // PVC doesn't exist, create it from the snapshot + if err := r.createPVCWithSnapshot(ctx, pvc, snapshotName); err != nil { + return false, errors.Wrap(err, "create PVC with snapshot") + } + return false, nil + } else if err != nil { + return false, errors.Wrap(err, "get observed PVC") + } + + // Check if the PVC is already using the snapshot + if dataSource := observedPVC.Spec.DataSource; dataSource != nil { + if dataSource.Kind == "VolumeSnapshot" && + ptr.Deref(dataSource.APIGroup, "") == volumesnapshotv1.GroupName && + dataSource.Name == snapshotName { + return true, nil + } + } + + // If deleting, wait for it to be deleted before recreating + if !observedPVC.GetDeletionTimestamp().IsZero() { + return false, nil + } + + // Delete the existing PVC so we can recreate it from the snapshot + if err := r.cl.Delete(ctx, observedPVC); err != nil { + return false, errors.Wrap(err, "delete PVC") + } + return false, nil +} + +func (r snapshotRestorer) createPVCWithSnapshot(ctx context.Context, pvc *corev1.PersistentVolumeClaim, snapshotName string) error { + instanceName := pvc.GetLabels()[naming.LabelInstanceSet] + if instanceName == "" { + return errors.New("instance not known for PVC") + } + var volumeClaimSpec *corev1.PersistentVolumeClaimSpec + for _, instanceSet := range r.cluster.Spec.InstanceSets { + if instanceSet.Name == instanceName { + volumeClaimSpec = &instanceSet.DataVolumeClaimSpec + break + } + } + if volumeClaimSpec == nil { + return fmt.Errorf("instance set '%s' either not found or has no data volume claim spec", instanceName) + } + volumeClaimSpec.DataSource = &corev1.TypedLocalObjectReference{ + APIGroup: ptr.To(volumesnapshotv1.GroupName), + Kind: "VolumeSnapshot", + Name: snapshotName, + } + newPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvc.GetName(), + Namespace: pvc.GetNamespace(), + }, + Spec: *volumeClaimSpec, + } + return r.cl.Create(ctx, newPVC) +} + +func (r *snapshotRestorer) pauseCluster(ctx context.Context) (bool, error) { + // Check if already paused + if r.cluster.Spec.Pause != nil && *r.cluster.Spec.Pause { + return r.cluster.Status.State == v2.AppStatePaused, nil + } + + // Pause the cluster + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + orig := r.cluster.DeepCopy() + updated := orig.DeepCopy() + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(updated), updated); err != nil { + return err + } + updated.Spec.Pause = ptr.To(true) + return r.cl.Patch(ctx, updated, client.MergeFrom(orig)) + }); err != nil { + return false, err + } + return false, nil +} + +func (r *snapshotRestorer) resumeCluster(ctx context.Context) (bool, error) { + // Check if already resumed + if r.cluster.Spec.Pause == nil || !*r.cluster.Spec.Pause { + return r.cluster.Status.State == v2.AppStateReady, nil + } + + // Resume the cluster + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + orig := r.cluster.DeepCopy() + updated := orig.DeepCopy() + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(updated), updated); err != nil { + return err + } + updated.Spec.Pause = nil + return r.cl.Patch(ctx, updated, client.MergeFrom(orig)) + }); err != nil { + return false, err + } + return false, nil +} + +func (r *snapshotRestorer) ensureFinalizers(ctx context.Context) error { + orig := r.restore.DeepCopy() + + finalizers := []string{pNaming.FinalizerSnapshotRestore} + finalizersChanged := false + for _, f := range finalizers { + if controllerutil.AddFinalizer(r.restore, f) { + finalizersChanged = true + } + } + if !finalizersChanged { + return nil + } + + if err := r.cl.Patch(ctx, r.restore.DeepCopy(), client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch finalizers") + } + return nil +} + +func (r *snapshotRestorer) runFinalizers(ctx context.Context) (bool, error) { + finalizers := map[string]controller.FinalizerFunc[*v2.PerconaPGRestore]{ + pNaming.FinalizerSnapshotRestore: r.finalizeSnapshotRestore(r.cl, r.restore), + } + + finished := true + for finalizer, f := range finalizers { + done, err := controller.RunFinalizer(ctx, r.cl, r.restore, finalizer, f) + if err != nil { + return false, errors.Wrapf(err, "run finalizer %s", finalizer) + } + if !done { + finished = false + } + } + return finished, nil +} + +func (r *snapshotRestorer) finalizeSnapshotRestore(c client.Client, _ *v2.PerconaPGRestore) func(ctx context.Context, restore *v2.PerconaPGRestore) error { + return func(ctx context.Context, restore *v2.PerconaPGRestore) error { + // Resume the cluster if it was paused during restore + if _, err := r.resumeCluster(ctx); err != nil { + return errors.Wrap(err, "resume cluster") + } + + // Always clean up the PVC state ConfigMap regardless of restore success or failure + cm := &corev1.ConfigMap{ + ObjectMeta: r.pvcStateConfigMapName(restore.Name), + } + if err := c.Delete(ctx, cm); client.IgnoreNotFound(err) != nil { + return errors.Wrap(err, "delete PVC state configmap") + } + return nil + } +} diff --git a/percona/naming/finalizers.go b/percona/naming/finalizers.go index 6521c94dfd..db7bf5441c 100644 --- a/percona/naming/finalizers.go +++ b/percona/naming/finalizers.go @@ -27,5 +27,6 @@ const ( // PerconaPGBackup job finalizers const ( - FinalizerKeepJob = PrefixPerconaInternal + "keep-job" //nolint:gosec + FinalizerKeepJob = PrefixPerconaInternal + "keep-job" //nolint:gosec + FinalizerSnapshotRestore = PrefixPerconaInternal + "snapshot-restore" //nolint:gosec ) diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go index a1e395bf1a..193200540e 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go @@ -1,7 +1,12 @@ package v2 import ( + "context" + + "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" ) func init() { @@ -24,6 +29,7 @@ type PerconaPGRestore struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` + // +kubebuilder:validation:XValidation:rule="has(self.repoName) || self.volumeSnapshotName != \"\"",message="either repoName or volumeSnapshotName must be set" Spec PerconaPGRestoreSpec `json:"spec"` Status PerconaPGRestoreStatus `json:"status,omitempty"` } @@ -44,9 +50,12 @@ type PerconaPGRestoreSpec struct { // The name of the pgBackRest repo within the source PostgresCluster that contains the backups // that should be utilized to perform a pgBackRest restore when initializing the data source // for the new PostgresCluster. - // +kubebuilder:validation:Required // +kubebuilder:validation:Pattern=^repo[1-4] - RepoName string `json:"repoName"` + RepoName *string `json:"repoName,omitempty"` + + // The name of the VolumeSnapshot to perform restore from. + // +optional + VolumeSnapshotName string `json:"volumeSnapshotName,omitempty"` // Command line options to include when running the pgBackRest restore command. // https://pgbackrest.org/command.html#command-restore @@ -69,3 +78,20 @@ type PerconaPGRestoreStatus struct { State PGRestoreState `json:"state,omitempty"` CompletedAt *metav1.Time `json:"completed,omitempty"` } + +func (r *PerconaPGRestore) IsCompleted() bool { + return r.Status.State == RestoreSucceeded || r.Status.State == RestoreFailed +} + +func (pgRestore *PerconaPGRestore) UpdateStatus(ctx context.Context, cl client.Client, updateFunc func(restore *PerconaPGRestore)) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + restore := new(PerconaPGRestore) + if err := cl.Get(ctx, client.ObjectKeyFromObject(pgRestore), restore); err != nil { + return errors.Wrap(err, "get PGRestore") + } + + updateFunc(restore) + + return cl.Status().Update(ctx, restore) + }) +} diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index ff1027ccf0..d23779ed6a 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -962,6 +962,11 @@ func (in *PerconaPGRestoreList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PerconaPGRestoreSpec) DeepCopyInto(out *PerconaPGRestoreSpec) { *out = *in + if in.RepoName != nil { + in, out := &in.RepoName, &out.RepoName + *out = new(string) + **out = **in + } if in.Options != nil { in, out := &in.Options, &out.Options *out = make([]string, len(*in)) From 78032c434cc835e3f9c313545997e766653eb967 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 17:37:13 +0530 Subject: [PATCH 25/90] update cr.yaml example Signed-off-by: Mayank Shah --- deploy/restore.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/deploy/restore.yaml b/deploy/restore.yaml index e85f417e7c..2f8ccd86cb 100644 --- a/deploy/restore.yaml +++ b/deploy/restore.yaml @@ -5,6 +5,7 @@ metadata: spec: pgCluster: cluster1 repoName: repo1 +# volumeSnapshotName: backup1 # options: # - --type=time # - --target="2022-11-30 15:12:11+03" From bf0419925667abfab596b12af0e2d19083e456a4 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 17:46:04 +0530 Subject: [PATCH 26/90] linting Signed-off-by: Mayank Shah --- percona/controller/pgrestore/snapshot/reconcile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index e583717608..5a0aabb662 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -295,7 +295,7 @@ func (r *snapshotRestorer) replacePVC( return false, nil } -func (r snapshotRestorer) createPVCWithSnapshot(ctx context.Context, pvc *corev1.PersistentVolumeClaim, snapshotName string) error { +func (r *snapshotRestorer) createPVCWithSnapshot(ctx context.Context, pvc *corev1.PersistentVolumeClaim, snapshotName string) error { instanceName := pvc.GetLabels()[naming.LabelInstanceSet] if instanceName == "" { return errors.New("instance not known for PVC") From 581a629e2f656af1492278bae3a04c0ad9b87d8f Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 18:43:40 +0530 Subject: [PATCH 27/90] no need to use configmap to track Signed-off-by: Mayank Shah --- .../pgrestore/snapshot/reconcile.go | 108 ++++++------------ 1 file changed, 36 insertions(+), 72 deletions(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 5a0aabb662..8bfd684226 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -3,10 +3,13 @@ package snapshot import ( "context" "fmt" + "slices" + "strings" "time" volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -17,7 +20,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "sigs.k8s.io/yaml" "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" @@ -165,7 +167,7 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{}, errors.Wrap(err, "list PVCs") } - for _, pvc := range clusterPVCs.Items { + for _, pvc := range clusterPVCs { if ok, err := r.replacePVC(ctx, &pvc, volumeSnapshotName); err != nil { return reconcile.Result{}, errors.Wrap(err, "replace PVC") } else if !ok { @@ -195,65 +197,38 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{}, nil } -func (r *snapshotRestorer) pvcStateConfigMapName(restoreName string) metav1.ObjectMeta { - return metav1.ObjectMeta{ - Namespace: r.cluster.Namespace, - Name: fmt.Sprintf("pvc-state-%s-%s", r.cluster.Name, restoreName), - } -} - -// listPVCs retrieves the list of PVCs that need to be restored from snapshots. +// listPVCs returns the list of PostgreSQL data PVCs that need to be restored. // -// This function maintains a ConfigMap that stores the PVC specifications before -// they are deleted and recreated. This serves two critical purposes: +// Instead of listing existing PVCs directly, this function derives the PVC names +// from the cluster's StatefulSets. This approach is necessary because during restore, +// PVCs are deleted and recreated from snapshots. Listing live PVCs would miss PVCs that are +// currently being deleted or recreated. // -// 1. If the operator restarts during a restore operation, the -// ConfigMap preserves the list of PVCs that still need to be processed, -// allowing the restore to continue seamlessly. -// 2. During restore, PVCs are deleted and recreated from -// snapshots. Once deleted, the original PVC specifications are lost. By storing -// the state externally, the operator can identidy which PVCs need to be tracked. -func (r *snapshotRestorer) listPVCs( - ctx context.Context, -) (*corev1.PersistentVolumeClaimList, error) { - cm := &corev1.ConfigMap{ - ObjectMeta: r.pvcStateConfigMapName(r.restore.Name), +// The function returns PVC objects with only metadata populated (name and namespace), +// which is sufficient for tracking which PVCs need to be replaced. +func (r *snapshotRestorer) listPVCs(ctx context.Context) ([]corev1.PersistentVolumeClaim, error) { + instances := &appsv1.StatefulSetList{} + if err := r.cl.List(ctx, instances, &client.ListOptions{ + Namespace: r.cluster.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + }), + }); err != nil { + return nil, errors.Wrap(err, "list instances") } - result := &corev1.PersistentVolumeClaimList{} - - cmKey := "pvcs.yaml" - err := r.cl.Get(ctx, client.ObjectKeyFromObject(cm), cm) - - switch { - // ConfigMap was found. - case err == nil: - data := cm.Data[cmKey] - if err := yaml.Unmarshal([]byte(data), result); err != nil { - return nil, errors.Wrap(err, "unmarshal PVC state") - } - return result, nil - - // ConfigMap was not found, list the PVCs and create the ConfigMap. - case k8serrors.IsNotFound(err): - if err := r.cl.List(ctx, result, &client.ListOptions{ - Namespace: r.cluster.GetNamespace(), - LabelSelector: labels.SelectorFromSet(map[string]string{ - naming.LabelCluster: r.cluster.GetName(), - naming.LabelRole: naming.RolePostgresData, - }), - }); err != nil { - return nil, errors.Wrap(err, "list instance PVCs") - } - data, err := yaml.Marshal(result) - if err != nil { - return nil, errors.Wrap(err, "marshal PVC state") - } - cm.Data = map[string]string{cmKey: string(data)} - // TODO: add a finalizer on this configmap? - return result, r.cl.Create(ctx, cm) + result := []corev1.PersistentVolumeClaim{} + for _, instance := range instances.Items { + result = append(result, corev1.PersistentVolumeClaim{ + ObjectMeta: naming.InstancePostgresDataVolume(&instance), + }) } - return nil, err + + // sort to ensure consistent ordering + slices.SortStableFunc(result, func(a, b corev1.PersistentVolumeClaim) int { + return strings.Compare(a.GetName(), b.GetName()) + }) + return result, nil } func (r *snapshotRestorer) replacePVC( @@ -266,7 +241,7 @@ func (r *snapshotRestorer) replacePVC( if k8serrors.IsNotFound(err) { // PVC doesn't exist, create it from the snapshot - if err := r.createPVCWithSnapshot(ctx, pvc, snapshotName); err != nil { + if err := r.createPVCFromSnapshot(ctx, pvc, snapshotName); err != nil { return false, errors.Wrap(err, "create PVC with snapshot") } return false, nil @@ -295,7 +270,7 @@ func (r *snapshotRestorer) replacePVC( return false, nil } -func (r *snapshotRestorer) createPVCWithSnapshot(ctx context.Context, pvc *corev1.PersistentVolumeClaim, snapshotName string) error { +func (r *snapshotRestorer) createPVCFromSnapshot(ctx context.Context, pvc *corev1.PersistentVolumeClaim, snapshotName string) error { instanceName := pvc.GetLabels()[naming.LabelInstanceSet] if instanceName == "" { return errors.New("instance not known for PVC") @@ -316,11 +291,8 @@ func (r *snapshotRestorer) createPVCWithSnapshot(ctx context.Context, pvc *corev Name: snapshotName, } newPVC := &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: pvc.GetName(), - Namespace: pvc.GetNamespace(), - }, - Spec: *volumeClaimSpec, + ObjectMeta: pvc.ObjectMeta, + Spec: *volumeClaimSpec, } return r.cl.Create(ctx, newPVC) } @@ -405,20 +377,12 @@ func (r *snapshotRestorer) runFinalizers(ctx context.Context) (bool, error) { return finished, nil } -func (r *snapshotRestorer) finalizeSnapshotRestore(c client.Client, _ *v2.PerconaPGRestore) func(ctx context.Context, restore *v2.PerconaPGRestore) error { +func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.PerconaPGRestore) func(ctx context.Context, restore *v2.PerconaPGRestore) error { return func(ctx context.Context, restore *v2.PerconaPGRestore) error { // Resume the cluster if it was paused during restore if _, err := r.resumeCluster(ctx); err != nil { return errors.Wrap(err, "resume cluster") } - - // Always clean up the PVC state ConfigMap regardless of restore success or failure - cm := &corev1.ConfigMap{ - ObjectMeta: r.pvcStateConfigMapName(restore.Name), - } - if err := c.Delete(ctx, cm); client.IgnoreNotFound(err) != nil { - return errors.Wrap(err, "delete PVC state configmap") - } return nil } } From 834d27c4777b562ba9f0029566e0e4b892461d33 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 23 Jan 2026 20:06:30 +0530 Subject: [PATCH 28/90] more improvements Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgrestores.yaml | 12 ++++++- .../pgv2.percona.com_perconapgclusters.yaml | 12 ++++++- deploy/bundle.yaml | 12 ++++++- deploy/cr.yaml | 1 - deploy/crd.yaml | 12 ++++++- deploy/cw-bundle.yaml | 12 ++++++- .../pgrestore/snapshot/reconcile.go | 31 +++++++++++-------- percona/naming/kinds.go | 5 +++ .../v2/perconapgrestore_types.go | 5 ++- 9 files changed, 82 insertions(+), 20 deletions(-) create mode 100644 percona/naming/kinds.go diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml index fb834cea73..31b017719c 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml @@ -67,6 +67,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -74,15 +77,22 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf volumeSnapshotName: description: The name of the VolumeSnapshot to perform restore from. type: string + x-kubernetes-validations: + - message: volumeSnapshotName is an immutable field + rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.volumeSnapshotName != "" + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + != "" status: properties: completed: diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index d5491fee15..788778ca44 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -21992,6 +21992,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -21999,15 +22002,22 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf volumeSnapshotName: description: The name of the VolumeSnapshot to perform restore from. type: string + x-kubernetes-validations: + - message: volumeSnapshotName is an immutable field + rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.volumeSnapshotName != "" + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + != "" status: properties: completed: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index f235cfccac..8d2d81ff3b 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -22291,6 +22291,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -22298,15 +22301,22 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf volumeSnapshotName: description: The name of the VolumeSnapshot to perform restore from. type: string + x-kubernetes-validations: + - message: volumeSnapshotName is an immutable field + rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.volumeSnapshotName != "" + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + != "" status: properties: completed: diff --git a/deploy/cr.yaml b/deploy/cr.yaml index cbdc4f0a0d..83e45dbe85 100644 --- a/deploy/cr.yaml +++ b/deploy/cr.yaml @@ -407,7 +407,6 @@ spec: backups: # trackLatestRestorableTime: true # volumeSnapshots: -# enabled: true # mode: offline # className: VOLUME-SNAPSHOT-CLASS # schedule: "0 0 * * 6" diff --git a/deploy/crd.yaml b/deploy/crd.yaml index b26439eac3..bb5530d2d7 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -22291,6 +22291,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -22298,15 +22301,22 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf volumeSnapshotName: description: The name of the VolumeSnapshot to perform restore from. type: string + x-kubernetes-validations: + - message: volumeSnapshotName is an immutable field + rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.volumeSnapshotName != "" + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + != "" status: properties: completed: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 27cda7b257..317d6895a0 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -22291,6 +22291,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -22298,15 +22301,22 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf volumeSnapshotName: description: The name of the VolumeSnapshot to perform restore from. type: string + x-kubernetes-validations: + - message: volumeSnapshotName is an immutable field + rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.volumeSnapshotName != "" + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + != "" status: properties: completed: diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 8bfd684226..18a50a2fde 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -168,10 +168,10 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu } for _, pvc := range clusterPVCs { - if ok, err := r.replacePVC(ctx, &pvc, volumeSnapshotName); err != nil { - return reconcile.Result{}, errors.Wrap(err, "replace PVC") + if ok, err := r.restorePVCFromSnapshot(ctx, &pvc, volumeSnapshotName); err != nil { + return reconcile.Result{}, errors.Wrap(err, "restore PVC from snapshot") } else if !ok { - r.log.Info("Waiting for PVC to be replaced", "pvc", pvc.GetName()) + r.log.Info("Waiting for PVC to restored", "pvc", pvc.GetName()) return reconcile.Result{RequeueAfter: time.Second * 5}, nil } } @@ -200,18 +200,19 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu // listPVCs returns the list of PostgreSQL data PVCs that need to be restored. // // Instead of listing existing PVCs directly, this function derives the PVC names -// from the cluster's StatefulSets. This approach is necessary because during restore, -// PVCs are deleted and recreated from snapshots. Listing live PVCs would miss PVCs that are -// currently being deleted or recreated. +// from the cluster instance statefulsets. This is necessary because during restore, +// PVCs are deleted and recreated. Listing live PVCs would miss PVCs that are +// currently being recreated and lead to an inconsistent state. // -// The function returns PVC objects with only metadata populated (name and namespace), -// which is sufficient for tracking which PVCs need to be replaced. +// The function returns PVC objects with only metadata populated, +// which is sufficient for getting the job done. func (r *snapshotRestorer) listPVCs(ctx context.Context) ([]corev1.PersistentVolumeClaim, error) { instances := &appsv1.StatefulSetList{} if err := r.cl.List(ctx, instances, &client.ListOptions{ - Namespace: r.cluster.Namespace, + Namespace: r.cluster.GetNamespace(), LabelSelector: labels.SelectorFromSet(map[string]string{ naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, }), }); err != nil { return nil, errors.Wrap(err, "list instances") @@ -219,8 +220,12 @@ func (r *snapshotRestorer) listPVCs(ctx context.Context) ([]corev1.PersistentVol result := []corev1.PersistentVolumeClaim{} for _, instance := range instances.Items { + objectMeta := naming.InstancePostgresDataVolume(&instance) + objectMeta.SetLabels(map[string]string{ + naming.LabelInstanceSet: instance.Labels[naming.LabelInstanceSet], // needed for createPVCFromSnapshot + }) result = append(result, corev1.PersistentVolumeClaim{ - ObjectMeta: naming.InstancePostgresDataVolume(&instance), + ObjectMeta: objectMeta, }) } @@ -231,7 +236,7 @@ func (r *snapshotRestorer) listPVCs(ctx context.Context) ([]corev1.PersistentVol return result, nil } -func (r *snapshotRestorer) replacePVC( +func (r *snapshotRestorer) restorePVCFromSnapshot( ctx context.Context, pvc *corev1.PersistentVolumeClaim, snapshotName string, @@ -251,7 +256,7 @@ func (r *snapshotRestorer) replacePVC( // Check if the PVC is already using the snapshot if dataSource := observedPVC.Spec.DataSource; dataSource != nil { - if dataSource.Kind == "VolumeSnapshot" && + if dataSource.Kind == pNaming.KindVolumeSnapshot && ptr.Deref(dataSource.APIGroup, "") == volumesnapshotv1.GroupName && dataSource.Name == snapshotName { return true, nil @@ -287,7 +292,7 @@ func (r *snapshotRestorer) createPVCFromSnapshot(ctx context.Context, pvc *corev } volumeClaimSpec.DataSource = &corev1.TypedLocalObjectReference{ APIGroup: ptr.To(volumesnapshotv1.GroupName), - Kind: "VolumeSnapshot", + Kind: pNaming.KindVolumeSnapshot, Name: snapshotName, } newPVC := &corev1.PersistentVolumeClaim{ diff --git a/percona/naming/kinds.go b/percona/naming/kinds.go new file mode 100644 index 0000000000..6b1c8cbeca --- /dev/null +++ b/percona/naming/kinds.go @@ -0,0 +1,5 @@ +package naming + +const ( + KindVolumeSnapshot = "VolumeSnapshot" +) diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go index 193200540e..32a539554d 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go @@ -29,7 +29,7 @@ type PerconaPGRestore struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` - // +kubebuilder:validation:XValidation:rule="has(self.repoName) || self.volumeSnapshotName != \"\"",message="either repoName or volumeSnapshotName must be set" + // +kubebuilder:validation:XValidation:rule="has(self.repoName) || self.repoName != \"\" || self.volumeSnapshotName != \"\"",message="either repoName or volumeSnapshotName must be set" Spec PerconaPGRestoreSpec `json:"spec"` Status PerconaPGRestoreStatus `json:"status,omitempty"` } @@ -45,16 +45,19 @@ type PerconaPGRestoreList struct { type PerconaPGRestoreSpec struct { // The name of the PerconaPGCluster to perform restore. // +kubebuilder:validation:Required + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="pgCluster is an immutable field" PGCluster string `json:"pgCluster"` // The name of the pgBackRest repo within the source PostgresCluster that contains the backups // that should be utilized to perform a pgBackRest restore when initializing the data source // for the new PostgresCluster. // +kubebuilder:validation:Pattern=^repo[1-4] + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="repoName is an immutable field" RepoName *string `json:"repoName,omitempty"` // The name of the VolumeSnapshot to perform restore from. // +optional + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="volumeSnapshotName is an immutable field" VolumeSnapshotName string `json:"volumeSnapshotName,omitempty"` // Command line options to include when running the pgBackRest restore command. From 645cf65dbafaa3a72583303bb70ae6568f721c7f Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 27 Jan 2026 14:10:41 +0530 Subject: [PATCH 29/90] typo Signed-off-by: Mayank Shah --- pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 41cc0f07d5..3e98a5ddf7 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -199,7 +199,7 @@ func (b *PerconaPGBackup) Default() { b.Spec.Method = ptr.To(BackupMethodPGBackrest) } - if b.Spec.Method == ptr.To(BackupMethodPGBackrest) { + if *b.Spec.Method == BackupMethodPGBackrest { b.Spec.Options = append(b.Spec.Options, fmt.Sprintf(`--annotation="%s"="%s"`, PGBackrestAnnotationBackupName, b.Name)) } } From e9196e47229d3c80c7d616c147cdae41443885b3 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 27 Jan 2026 15:41:44 +0530 Subject: [PATCH 30/90] wip: PiTR Signed-off-by: Mayank Shah --- percona/controller/pgrestore/controller.go | 104 ++-------------- .../pgrestore/snapshot/reconcile.go | 38 +++++- .../controller/pgrestore/utils/pgbackrest.go | 113 ++++++++++++++++++ 3 files changed, 163 insertions(+), 92 deletions(-) create mode 100644 percona/controller/pgrestore/utils/pgbackrest.go diff --git a/percona/controller/pgrestore/controller.go b/percona/controller/pgrestore/controller.go index 93162e990c..02dc5a892b 100644 --- a/percona/controller/pgrestore/controller.go +++ b/percona/controller/pgrestore/controller.go @@ -10,7 +10,6 @@ import ( k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" - "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -18,12 +17,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/percona/percona-postgresql-operator/v2/internal/logging" - "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/controller" "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/snapshot" + restoreutils "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/utils" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" - "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) const ( @@ -86,6 +84,8 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, nil } + restorer := restoreutils.NewPGBackRestRestore(r.Client, pgCluster, pgRestore) + switch pgRestore.Status.State { case v2.RestoreNew: if restore := pgCluster.Spec.Backups.PGBackRest.Restore; restore != nil && *restore.Enabled { @@ -94,9 +94,12 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R } if _, ok := pgRestore.Annotations[pNaming.AnnotationClusterBootstrapRestore]; !ok { - if err := startRestore(ctx, r.Client, pgCluster, pgRestore); err != nil { + if err := restorer.Start(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "start restore") } + if err := ensureFinalizers(ctx, r.Client, pgRestore); err != nil { + return reconcile.Result{}, errors.Wrap(err, "ensure finalizers") + } } pgRestore.Status.State = v2.RestoreStarting @@ -123,26 +126,23 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, nil case v2.RestoreRunning: - job := &batchv1.Job{} - err := r.Client.Get(ctx, types.NamespacedName{Name: pgCluster.Name + "-pgbackrest-restore", Namespace: pgCluster.Namespace}, job) + status, completedAt, err := restorer.ObserveStatus(ctx) if err != nil { - return reconcile.Result{}, errors.Wrap(err, "get restore job") + return reconcile.Result{}, errors.Wrap(err, "observe restore status") } - - status := checkRestoreJob(job) switch status { case v2.RestoreFailed: log.Info("Restore failed") case v2.RestoreSucceeded: log.Info("Restore succeeded") - pgRestore.Status.CompletedAt = job.Status.CompletionTime + pgRestore.Status.CompletedAt = completedAt default: log.Info("Waiting for restore to complete") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } if _, ok := pgRestore.Annotations[pNaming.AnnotationClusterBootstrapRestore]; !ok { - if err := disableRestore(ctx, r.Client, pgCluster, pgRestore); err != nil { + if err := restorer.DisableRestore(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "disable restore") } } @@ -174,7 +174,8 @@ func runFinalizers(ctx context.Context, c client.Client, pr *v2.PerconaPGRestore if pg == nil { return nil } - return disableRestore(ctx, c, pg, pr) + restorer := restoreutils.NewPGBackRestRestore(c, pg, pr) + return restorer.DisableRestore(ctx) }, } @@ -205,82 +206,3 @@ func ensureFinalizers(ctx context.Context, cl client.Client, pr *v2.PerconaPGRes } return nil } - -func startRestore(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, pr *v2.PerconaPGRestore) error { - orig := pg.DeepCopy() - - if pg.Annotations == nil { - pg.Annotations = make(map[string]string) - } - pg.Annotations[naming.PGBackRestRestore] = pr.Name - - postgresCluster := new(v1beta1.PostgresCluster) - if err := c.Get(ctx, client.ObjectKeyFromObject(pg), postgresCluster); err != nil { - return errors.Wrap(err, "get PostgresCluster") - } - - origPostgres := postgresCluster.DeepCopy() - - postgresCluster.Status.PGBackRest.Restore = new(v1beta1.PGBackRestJobStatus) - - if err := c.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { - return errors.Wrap(err, "patch PGCluster") - } - - if pg.Spec.Backups.PGBackRest.Restore == nil { - pg.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ - PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, - } - } - - tvar := true - pg.Spec.Backups.PGBackRest.Restore.Enabled = &tvar - pg.Spec.Backups.PGBackRest.Restore.RepoName = ptr.Deref(pr.Spec.RepoName, "") - pg.Spec.Backups.PGBackRest.Restore.Options = pr.Spec.Options - - if err := c.Patch(ctx, pg, client.MergeFrom(orig)); err != nil { - return errors.Wrap(err, "patch PGCluster") - } - - if err := ensureFinalizers(ctx, c, pr); err != nil { - return errors.Wrap(err, "ensure restore finalizers") - } - - return nil -} - -func disableRestore(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, pr *v2.PerconaPGRestore) error { - if pr.Status.State == v2.RestoreSucceeded || pr.Status.State == v2.RestoreFailed { - return nil - } - - orig := pg.DeepCopy() - - if pg.Spec.Backups.PGBackRest.Restore == nil { - pg.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ - PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, - } - } - - fvar := false - pg.Spec.Backups.PGBackRest.Restore.Enabled = &fvar - - delete(pg.Annotations, naming.LabelPGBackRestRestore) - - if err := c.Patch(ctx, pg, client.MergeFrom(orig)); err != nil { - return errors.Wrap(err, "patch PGCluster") - } - - return nil -} - -func checkRestoreJob(job *batchv1.Job) v2.PGRestoreState { - switch { - case controller.JobCompleted(job): - return v2.RestoreSucceeded - case controller.JobFailed(job): - return v2.RestoreFailed - default: - return v2.RestoreRunning - } -} diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 18a50a2fde..a0a728b7f9 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -25,6 +25,7 @@ import ( "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/controller" + restoreutils "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/utils" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) @@ -184,7 +185,13 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - // TODO: Implement PiTR + // Restore PITR + if ok, err := r.restorePITR(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "restore PITR") + } else if !ok { + r.log.Info("Waiting for PITR to be restored") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { restore.Status.State = v2.RestoreSucceeded @@ -391,3 +398,32 @@ func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.Percon return nil } } + +func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { + if r.restore.Spec.RepoName == nil { + return true, nil + } + + pgbackrestRestore := restoreutils.NewPGBackRestRestore(r.cl, r.cluster, r.restore) + status, _, err := pgbackrestRestore.ObserveStatus(ctx) + if client.IgnoreNotFound(err) != nil { // ignore NotFound, we handle it below + return false, errors.Wrap(err, "observe PITR status") + } + + switch { + case k8serrors.IsNotFound(err): + return false, pgbackrestRestore.Start(ctx) + case status == v2.RestoreRunning: + return false, nil + case status == v2.RestoreSucceeded: + return true, pgbackrestRestore.DisableRestore(ctx) + case status == v2.RestoreFailed: + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreFailed + }); err != nil { + return false, errors.Wrap(err, "update restore status") + } + return true, nil + } + return false, nil +} diff --git a/percona/controller/pgrestore/utils/pgbackrest.go b/percona/controller/pgrestore/utils/pgbackrest.go new file mode 100644 index 0000000000..b8c8e18ff8 --- /dev/null +++ b/percona/controller/pgrestore/utils/pgbackrest.go @@ -0,0 +1,113 @@ +package utils + +import ( + "context" + + "github.com/pkg/errors" + batchv1 "k8s.io/api/batch/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + "github.com/percona/percona-postgresql-operator/v2/percona/controller" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +type PGBackRestRestore struct { + client.Client + + pgCluster *v2.PerconaPGCluster + pgRestore *v2.PerconaPGRestore +} + +func NewPGBackRestRestore(c client.Client, pgCluster *v2.PerconaPGCluster, pgRestore *v2.PerconaPGRestore) *PGBackRestRestore { + return &PGBackRestRestore{ + Client: c, + pgCluster: pgCluster, + pgRestore: pgRestore, + } +} + +func (r *PGBackRestRestore) Start(ctx context.Context) error { + orig := r.pgCluster.DeepCopy() + + if r.pgCluster.Annotations == nil { + r.pgCluster.Annotations = make(map[string]string) + } + r.pgCluster.Annotations[naming.PGBackRestRestore] = r.pgRestore.Name + + postgresCluster := new(v1beta1.PostgresCluster) + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(r.pgCluster), postgresCluster); err != nil { + return errors.Wrap(err, "get PostgresCluster") + } + + origPostgres := postgresCluster.DeepCopy() + + postgresCluster.Status.PGBackRest.Restore = new(v1beta1.PGBackRestJobStatus) + + if err := r.Client.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { + return errors.Wrap(err, "patch PGCluster") + } + + if r.pgCluster.Spec.Backups.PGBackRest.Restore == nil { + r.pgCluster.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ + PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, + } + } + + r.pgCluster.Spec.Backups.PGBackRest.Restore.Enabled = ptr.To(true) + r.pgCluster.Spec.Backups.PGBackRest.Restore.RepoName = ptr.Deref(r.pgRestore.Spec.RepoName, "") + r.pgCluster.Spec.Backups.PGBackRest.Restore.Options = r.pgRestore.Spec.Options + + if err := r.Client.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch PGCluster") + } + + return nil +} + +func (r *PGBackRestRestore) DisableRestore(ctx context.Context) error { + if r.pgRestore.Status.State == v2.RestoreSucceeded || r.pgRestore.Status.State == v2.RestoreFailed { + return nil + } + + orig := r.pgCluster.DeepCopy() + + if r.pgCluster.Spec.Backups.PGBackRest.Restore == nil { + r.pgCluster.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ + PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, + } + } + + r.pgCluster.Spec.Backups.PGBackRest.Restore.Enabled = ptr.To(false) + delete(r.pgCluster.Annotations, naming.LabelPGBackRestRestore) + + if err := r.Client.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch PGCluster") + } + + return nil +} + +func (r *PGBackRestRestore) ObserveStatus(ctx context.Context) (v2.PGRestoreState, *metav1.Time, error) { + job := &batchv1.Job{} + err := r.Client.Get(ctx, types.NamespacedName{Name: r.pgCluster.Name + "-pgbackrest-restore", Namespace: r.pgCluster.Namespace}, job) + if err != nil { + return v2.RestoreNew, nil, errors.Wrap(err, "get restore job") + } + return checkRestoreJob(job), job.Status.CompletionTime, nil +} + +func checkRestoreJob(job *batchv1.Job) v2.PGRestoreState { + switch { + case controller.JobCompleted(job): + return v2.RestoreSucceeded + case controller.JobFailed(job): + return v2.RestoreFailed + default: + return v2.RestoreRunning + } +} From 2b4981f083067e4240039d8019c217b399c62cc0 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 27 Jan 2026 15:47:08 +0530 Subject: [PATCH 31/90] support snapshots in WALWatcher Signed-off-by: Mayank Shah --- percona/watcher/wal.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/percona/watcher/wal.go b/percona/watcher/wal.go index 1f40a67d4a..cee366961d 100644 --- a/percona/watcher/wal.go +++ b/percona/watcher/wal.go @@ -43,11 +43,6 @@ func WatchCommitTimestamps(ctx context.Context, cli client.Client, eventChan cha return } - // TODO: add support - if cr.Spec.Backups.IsVolumeSnapshotsEnabled() { - return - } - log.Info("Watching commit timestamps") execCli, err := clientcmd.NewClient() @@ -142,7 +137,9 @@ func getLatestBackup(ctx context.Context, cli client.Client, cr *pgv2.PerconaPGC latest := &pgv2.PerconaPGBackup{} runningBackupExists := false for _, backup := range backupList.Items { - backup := backup + if ptr.Deref(backup.Spec.Method, pgv2.BackupMethodPGBackrest) == pgv2.BackupMethodVolumeSnapshot { + continue + } switch backup.Status.State { case pgv2.BackupSucceeded: From 1cbdcaba639a8c154d81682946ebd97fc7e4c864 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 27 Jan 2026 18:37:55 +0530 Subject: [PATCH 32/90] bug fix Signed-off-by: Mayank Shah --- percona/controller/pgrestore/snapshot/reconcile.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index a0a728b7f9..a2c85e18d4 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -179,8 +179,7 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu // Start the cluster if ok, err := r.resumeCluster(ctx); err != nil { - return reconcile.Result{}, errors.Wrap(err, "resume cluster") - } else if !ok { + } else if !ok && !r.isPITRInProgress() { r.log.Info("Waiting for cluster to be ready") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } @@ -411,7 +410,7 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { } switch { - case k8serrors.IsNotFound(err): + case k8serrors.IsNotFound(err) && !r.isPITRInProgress(): return false, pgbackrestRestore.Start(ctx) case status == v2.RestoreRunning: return false, nil @@ -427,3 +426,6 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { } return false, nil } +func (r *snapshotRestorer) isPITRInProgress() bool { + return r.cluster.GetAnnotations()[naming.PGBackRestRestore] != "" +} From f2fdd026c079e7e50c080134e892be7783c46ebe Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 27 Jan 2026 18:44:59 +0530 Subject: [PATCH 33/90] update comments Signed-off-by: Mayank Shah --- percona/controller/pgrestore/snapshot/reconcile.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index a2c85e18d4..2227d5dbfb 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -188,7 +188,7 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu if ok, err := r.restorePITR(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "restore PITR") } else if !ok { - r.log.Info("Waiting for PITR to be restored") + r.log.Info("Waiting for PiTR to complete") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } @@ -242,6 +242,8 @@ func (r *snapshotRestorer) listPVCs(ctx context.Context) ([]corev1.PersistentVol return result, nil } +// restorePVCFromSnapshot restores a PVC from a snapshot. +// pvc is a partial object derived from the cluster instance statefulsets (see listPVCs method). func (r *snapshotRestorer) restorePVCFromSnapshot( ctx context.Context, pvc *corev1.PersistentVolumeClaim, From 0566dcfee2a3a9adfeefdc74800c147b54222d55 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 27 Jan 2026 19:08:41 +0530 Subject: [PATCH 34/90] linting Signed-off-by: Mayank Shah --- percona/controller/pgrestore/utils/pgbackrest.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/percona/controller/pgrestore/utils/pgbackrest.go b/percona/controller/pgrestore/utils/pgbackrest.go index b8c8e18ff8..da3e05220b 100644 --- a/percona/controller/pgrestore/utils/pgbackrest.go +++ b/percona/controller/pgrestore/utils/pgbackrest.go @@ -40,7 +40,7 @@ func (r *PGBackRestRestore) Start(ctx context.Context) error { r.pgCluster.Annotations[naming.PGBackRestRestore] = r.pgRestore.Name postgresCluster := new(v1beta1.PostgresCluster) - if err := r.Client.Get(ctx, client.ObjectKeyFromObject(r.pgCluster), postgresCluster); err != nil { + if err := r.Get(ctx, client.ObjectKeyFromObject(r.pgCluster), postgresCluster); err != nil { return errors.Wrap(err, "get PostgresCluster") } @@ -48,7 +48,7 @@ func (r *PGBackRestRestore) Start(ctx context.Context) error { postgresCluster.Status.PGBackRest.Restore = new(v1beta1.PGBackRestJobStatus) - if err := r.Client.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { + if err := r.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { return errors.Wrap(err, "patch PGCluster") } @@ -62,7 +62,7 @@ func (r *PGBackRestRestore) Start(ctx context.Context) error { r.pgCluster.Spec.Backups.PGBackRest.Restore.RepoName = ptr.Deref(r.pgRestore.Spec.RepoName, "") r.pgCluster.Spec.Backups.PGBackRest.Restore.Options = r.pgRestore.Spec.Options - if err := r.Client.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { + if err := r.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { return errors.Wrap(err, "patch PGCluster") } @@ -85,7 +85,7 @@ func (r *PGBackRestRestore) DisableRestore(ctx context.Context) error { r.pgCluster.Spec.Backups.PGBackRest.Restore.Enabled = ptr.To(false) delete(r.pgCluster.Annotations, naming.LabelPGBackRestRestore) - if err := r.Client.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { + if err := r.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { return errors.Wrap(err, "patch PGCluster") } @@ -94,7 +94,7 @@ func (r *PGBackRestRestore) DisableRestore(ctx context.Context) error { func (r *PGBackRestRestore) ObserveStatus(ctx context.Context) (v2.PGRestoreState, *metav1.Time, error) { job := &batchv1.Job{} - err := r.Client.Get(ctx, types.NamespacedName{Name: r.pgCluster.Name + "-pgbackrest-restore", Namespace: r.pgCluster.Namespace}, job) + err := r.Get(ctx, types.NamespacedName{Name: r.pgCluster.Name + "-pgbackrest-restore", Namespace: r.pgCluster.Namespace}, job) if err != nil { return v2.RestoreNew, nil, errors.Wrap(err, "get restore job") } From 85d97838623304d7f9247c838a831af4de5cfc33 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 27 Jan 2026 19:15:51 +0530 Subject: [PATCH 35/90] add PGO_FEATURE_GATES env variable to deploy Signed-off-by: Mayank Shah --- config/manager/default/manager.yaml | 2 ++ deploy/bundle.yaml | 2 ++ deploy/cw-bundle.yaml | 2 ++ deploy/cw-operator.yaml | 2 ++ deploy/operator.yaml | 2 ++ 5 files changed, 10 insertions(+) diff --git a/config/manager/default/manager.yaml b/config/manager/default/manager.yaml index bc132a43cd..2df9c87b59 100644 --- a/config/manager/default/manager.yaml +++ b/config/manager/default/manager.yaml @@ -37,6 +37,8 @@ spec: value: "false" - name: PGO_WORKERS value: "1" + - name: PGO_FEATURE_GATES + value: "" ports: - containerPort: 8080 name: metrics diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 8d2d81ff3b..3dc66d682d 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -54249,6 +54249,8 @@ spec: value: "false" - name: PGO_WORKERS value: "1" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 317d6895a0..62ee3124f1 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -54247,6 +54247,8 @@ spec: value: "false" - name: PGO_WORKERS value: "1" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: diff --git a/deploy/cw-operator.yaml b/deploy/cw-operator.yaml index 7861413845..c5c2f561ae 100644 --- a/deploy/cw-operator.yaml +++ b/deploy/cw-operator.yaml @@ -44,6 +44,8 @@ spec: value: "false" - name: PGO_WORKERS value: "1" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: diff --git a/deploy/operator.yaml b/deploy/operator.yaml index c6c492333b..3050994027 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -47,6 +47,8 @@ spec: value: "false" - name: PGO_WORKERS value: "1" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: From 4f487695af59ecd4cde5b8435d893dc91dfae3d6 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 28 Jan 2026 11:05:35 +0530 Subject: [PATCH 36/90] add retry mechanism Signed-off-by: Mayank Shah --- .../pgbackup/snapshots/reconcile.go | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index da2b6b16aa..4edc01553f 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -21,6 +21,10 @@ import ( v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) +const ( + defaultSnapshotErrorDeadline = 5 * time.Minute +) + type snapshotExecutor interface { // Prepare the cluster for performing a snapshot. // Returns the name of the PVC that will be snapshotted. @@ -214,20 +218,31 @@ func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Re // error occurred while creating the snapshot. case volumeSnapshot.Status.Error != nil: + // Some errors can be transient, so we should wait for a while before giving up. message := ptr.Deref(volumeSnapshot.Status.Error.Message, "") - stsErr := fmt.Errorf("volume snapshot failed: %s", message) + if !shouldFailSnapshot(volumeSnapshot) { + r.log.Info("Snapshot is in error state, but within deadline. Retrying.", "message", message) + return reconcile.Result{}, nil + } + + // Snapshot has failed, update the status to failed. + stsErr := fmt.Errorf("snapshot error: %s", message) r.log.Error(stsErr, "Volume snapshot failed") if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed bcp.Status.Error = stsErr.Error() }); updErr != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + return reconcile.Result{}, nil } } - return reconcile.Result{}, nil } +func shouldFailSnapshot(volumeSnapshot *volumesnapshotv1.VolumeSnapshot) bool { + errAt := volumeSnapshot.Status.Error.Time + return !errAt.IsZero() && time.Now().After(errAt.Add(defaultSnapshotErrorDeadline)) +} + func (r *snapshotReconciler) ensureSnapshot(ctx context.Context, volumeSnapshot *volumesnapshotv1.VolumeSnapshot) (bool, error) { if err := r.cl.Create(ctx, volumeSnapshot); err != nil { return false, client.IgnoreAlreadyExists(err) From 6e4c7b8e58c638ab997e5d7ad619f362b2ee7a80 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 29 Jan 2026 22:11:58 +0530 Subject: [PATCH 37/90] implement checkpointing Signed-off-by: Mayank Shah --- .../controller/pgbackup/snapshots/offline.go | 44 +++++++++++++++++-- .../pgbackup/snapshots/reconcile.go | 5 ++- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 8298eb7e1d..1c3d4916ca 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -2,6 +2,8 @@ package snapshots import ( "context" + "fmt" + "io" "time" "github.com/pkg/errors" @@ -12,8 +14,10 @@ import ( "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" + "github.com/percona/percona-postgresql-operator/v2/internal/postgres" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" @@ -22,21 +26,24 @@ import ( const ( annotationBackupTarget = pNaming.PrefixPerconaPGV2 + "backup-target" - waitTimeout = 5 * time.Minute - retryInterval = 3 * time.Second + checkpointTimeoutSeconds = 30 + waitTimeout = 5 * time.Minute + retryInterval = 3 * time.Second ) type offlineExec struct { cl client.Client cluster *v2.PerconaPGCluster backup *v2.PerconaPGBackup + podExec runtime.PodExecutor } -func newOfflineExec(cl client.Client, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { +func newOfflineExec(cl client.Client, podExec runtime.PodExecutor, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { return &offlineExec{ cl: cl, cluster: pgCluster, backup: pgBackup, + podExec: podExec, } } @@ -46,6 +53,11 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { return "", errors.Wrap(err, "failed to get backup target pod") } + // TODO: should this be optional, since this can take a while on large datasets? + if err := e.checkpoint(ctx, targetInstance); err != nil { + return "", errors.Wrap(err, "failed to checkpoint instance") + } + if err := e.suspendInstance(ctx, targetInstance); err != nil { return "", errors.Wrap(err, "failed to suspend instance") } @@ -57,6 +69,32 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { return targetPVC, nil } +func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error { + exec := func(_ context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { + return e.podExec(ctx, e.cluster.GetNamespace(), instanceName+"-0", naming.ContainerDatabase, stdin, stdout, stderr, command...) + } + + stdout, stderr, err := postgres.Executor(exec). + ExecInDatabasesFromQuery(ctx, `SELECT pg_catalog.current_database()`, + `SET statement_timeout = :'timeout'; CHECKPOINT;`, + map[string]string{ + "timeout": fmt.Sprintf("%ds", checkpointTimeoutSeconds), + "ON_ERROR_STOP": "on", // Abort when any one statement fails. + "QUIET": "on", // Do not print successful statements to stdout. + }) + if err != nil { + return errors.Wrap(err, "failed to execute checkpoint") + } + + if stderr != "" { + return fmt.Errorf("checkpoint failed: %s", stderr) + } + + log := logging.FromContext(ctx) + log.Info("checkpoint executed", "stdout", stdout, "stderr", stderr) + return nil +} + func (e *offlineExec) suspendInstance(ctx context.Context, instanceName string) error { sts := &appsv1.StatefulSet{} if err := e.cl.Get(ctx, client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName}, sts); err != nil { diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 4edc01553f..ac8bc56b25 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -59,12 +59,13 @@ func newSnapshotReconciler( func newSnapshotExec( cl client.Client, + podExec runtime.PodExecutor, cluster *v2.PerconaPGCluster, backup *v2.PerconaPGBackup, ) (snapshotExecutor, error) { switch mode := cluster.Spec.Backups.VolumeSnapshots.Mode; mode { case v2.VolumeSnapshotModeOffline: - return newOfflineExec(cl, cluster, backup), nil + return newOfflineExec(cl, podExec, cluster, backup), nil default: return nil, fmt.Errorf("invalid or unsupported volume snapshot mode: %s", mode) } @@ -103,7 +104,7 @@ func Reconcile( return reconcile.Result{}, nil } - exec, err := newSnapshotExec(cl, pgCluster, pgBackup) + exec, err := newSnapshotExec(cl, podExec, pgCluster, pgBackup) if err != nil { stsErr := fmt.Errorf("invalid or unsupported volume snapshot mode: %s", pgCluster.Spec.Backups.VolumeSnapshots.Mode) if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { From 38857f3b4beb93544111e2155c9c70d71479eeef Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 30 Jan 2026 11:34:21 +0530 Subject: [PATCH 38/90] add restore_command wrapper for snapshots Signed-off-by: Mayank Shah --- build/postgres-operator/Dockerfile | 1 + build/postgres-operator/init-entrypoint.sh | 1 + build/postgres-operator/restore_command.sh | 9 +++++++++ internal/pgbackrest/postgres.go | 2 +- 4 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 build/postgres-operator/restore_command.sh diff --git a/build/postgres-operator/Dockerfile b/build/postgres-operator/Dockerfile index 59646d9aa4..6a17621051 100644 --- a/build/postgres-operator/Dockerfile +++ b/build/postgres-operator/Dockerfile @@ -67,6 +67,7 @@ COPY build/postgres-operator/init-entrypoint.sh /usr/local/bin COPY build/postgres-operator/postgres-entrypoint.sh /usr/local/bin COPY build/postgres-operator/postgres-liveness-check.sh /usr/local/bin COPY build/postgres-operator/postgres-readiness-check.sh /usr/local/bin +COPY build/postgres-operator/restore_command.sh /usr/local/bin COPY hack/tools/queries /opt/crunchy/conf RUN chgrp -R 0 /opt/crunchy/conf && chmod -R g=u opt/crunchy/conf diff --git a/build/postgres-operator/init-entrypoint.sh b/build/postgres-operator/init-entrypoint.sh index 2fde1f02bf..ed4b419033 100755 --- a/build/postgres-operator/init-entrypoint.sh +++ b/build/postgres-operator/init-entrypoint.sh @@ -10,3 +10,4 @@ install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-entrypoi install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-liveness-check.sh" "${CRUNCHY_BINDIR}/bin/postgres-liveness-check.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-readiness-check.sh" "${CRUNCHY_BINDIR}/bin/postgres-readiness-check.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/relocate-extensions.sh" "${CRUNCHY_BINDIR}/bin/relocate-extensions.sh" +install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/restore_command.sh" "${CRUNCHY_BINDIR}/bin/restore_command.sh" diff --git a/build/postgres-operator/restore_command.sh b/build/postgres-operator/restore_command.sh new file mode 100644 index 0000000000..5dc178c234 --- /dev/null +++ b/build/postgres-operator/restore_command.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e + +if [[ -f $PGDATA/restored-from-snapshot ]]; then + rm -f $PGDATA/restored-from-snapshot + exit 1 +fi + +pgbackrest --stanza=db archive-get %f "%p" \ No newline at end of file diff --git a/internal/pgbackrest/postgres.go b/internal/pgbackrest/postgres.go index 50dd2a5d20..b3fe7b7ada 100644 --- a/internal/pgbackrest/postgres.go +++ b/internal/pgbackrest/postgres.go @@ -78,7 +78,7 @@ func PostgreSQL( // Fetch WAL files from any configured repository during recovery. // - https://pgbackrest.org/command.html#command-archive-get // - https://www.postgresql.org/docs/current/runtime-config-wal.html - restore := `pgbackrest --stanza=` + DefaultStanzaName + ` archive-get %f "%p"` + restore := "sh /opt/crunchy/bin/restore_command.sh" if inCluster.Spec.Patroni != nil && inCluster.Spec.Patroni.DynamicConfiguration != nil { postgresql, ok := inCluster.Spec.Patroni.DynamicConfiguration["postgresql"].(map[string]any) if ok { From aa534600a7b9cbd4878ee89a74f3744278adcc11 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 30 Jan 2026 11:34:37 +0530 Subject: [PATCH 39/90] add logic for creating snapshot signal file Signed-off-by: Mayank Shah --- .../controller/pgbackup/snapshots/offline.go | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 1c3d4916ca..2fcfe1b2a1 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "io" + "path" "time" "github.com/pkg/errors" @@ -21,14 +22,17 @@ import ( pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) const ( annotationBackupTarget = pNaming.PrefixPerconaPGV2 + "backup-target" - checkpointTimeoutSeconds = 30 + checkpointTimeoutSeconds = 30 // TODO: make this configurable waitTimeout = 5 * time.Minute retryInterval = 3 * time.Second + + snapshotSignalFile = "restored-from-snapshot" ) type offlineExec struct { @@ -58,6 +62,10 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { return "", errors.Wrap(err, "failed to checkpoint instance") } + if err := e.createSnapshotSignal(ctx, targetInstance); err != nil { + return "", errors.Wrap(err, "failed to create snapshot signal") + } + if err := e.suspendInstance(ctx, targetInstance); err != nil { return "", errors.Wrap(err, "failed to suspend instance") } @@ -69,6 +77,44 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { return targetPVC, nil } +// After a snapshot restored and cluster resumed, Patroni will start the instance in recovery mode, +// so PostgreSQL invokes restore_command to fetch WAL from pgbackrest repo. We want the instance to remain at +// the snapshot-consistent state instead of advancing past it. We create this special file +// in PGDATA before taking the snapshot so it is included in the snapshot; when the instance +// starts after a restore, the restore_command wrapper checks for this file and, if present, exits without +// fetching WAL (so recovery stops at local WAL) and then removes the file. +func (e *offlineExec) createSnapshotSignal(ctx context.Context, instanceName string) error { + postgresCluster := &crunchyv1beta1.PostgresCluster{} + if err := e.cl.Get(ctx, client.ObjectKeyFromObject(e.cluster), postgresCluster); err != nil { + return errors.Wrap(err, "failed to get postgres cluster") + } + + snapshotSignalFile := path.Join(postgres.DataDirectory(postgresCluster), snapshotSignalFile) + cmd := []string{"touch", snapshotSignalFile} + err := e.podExec(ctx, e.cluster.GetNamespace(), + instanceName+"-0", naming.ContainerDatabase, nil, io.Discard, io.Discard, cmd...) + if err != nil { + return errors.Wrap(err, "pod exec failed") + } + return nil +} + +func (e *offlineExec) removeSnapshotSignal(ctx context.Context, instanceName string) error { + postgresCluster := &crunchyv1beta1.PostgresCluster{} + if err := e.cl.Get(ctx, client.ObjectKeyFromObject(e.cluster), postgresCluster); err != nil { + return errors.Wrap(err, "failed to get postgres cluster") + } + + snapshotSignalFile := path.Join(postgres.DataDirectory(postgresCluster), snapshotSignalFile) + cmd := []string{"rm", "-f", snapshotSignalFile} + err := e.podExec(ctx, e.cluster.GetNamespace(), + instanceName+"-0", naming.ContainerDatabase, nil, io.Discard, io.Discard, cmd...) + if err != nil { + return errors.Wrap(err, "pod exec failed") + } + return nil +} + func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error { exec := func(_ context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { return e.podExec(ctx, e.cluster.GetNamespace(), instanceName+"-0", naming.ContainerDatabase, stdin, stdout, stderr, command...) @@ -169,6 +215,10 @@ func (e *offlineExec) finalize(ctx context.Context) error { if err := e.resumeInstance(ctx, targetInstance); err != nil { return errors.Wrap(err, "failed to resume instance") } + + if err := e.removeSnapshotSignal(ctx, targetInstance); err != nil { + return errors.Wrap(err, "failed to remove snapshot signal") + } return nil } From bcd5d049bc46e8d6c40ba78e48552629df7696cd Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 30 Jan 2026 11:34:46 +0530 Subject: [PATCH 40/90] handle leader ep reconcile Signed-off-by: Mayank Shah --- .../pgrestore/snapshot/reconcile.go | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 2227d5dbfb..26c004f1d1 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -28,6 +28,7 @@ import ( restoreutils "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/utils" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) type snapshotRestorer struct { @@ -177,6 +178,12 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu } } + // Re-create Patroni leader Endpoints so the cluster can be bootstrapped from + // the new data. + if err := r.deleteLeaderEndpoints(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "delete leader endpoints") + } + // Start the cluster if ok, err := r.resumeCluster(ctx); err != nil { } else if !ok && !r.isPITRInProgress() { @@ -203,6 +210,20 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{}, nil } +func (r *snapshotRestorer) deleteLeaderEndpoints(ctx context.Context) error { + postgresCluster := &crunchyv1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: r.cluster.Name, + Namespace: r.cluster.Namespace, + }, + } + leaderEp := &corev1.Endpoints{ObjectMeta: naming.PatroniLeaderEndpoints(postgresCluster)} + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(leaderEp), leaderEp); err != nil { + return errors.Wrap(err, "get postgres cluster") + } + return r.cl.Delete(ctx, leaderEp) +} + // listPVCs returns the list of PostgreSQL data PVCs that need to be restored. // // Instead of listing existing PVCs directly, this function derives the PVC names From 047b653d2fc3b8cab7de34cb3f0c9cac3f2db3de Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 30 Jan 2026 13:20:31 +0530 Subject: [PATCH 41/90] more improvements Signed-off-by: Mayank Shah --- .../pgrestore/snapshot/reconcile.go | 45 +++++++++++-------- .../controller/pgrestore/utils/pgbackrest.go | 32 ++++++------- percona/naming/annotations.go | 4 ++ 3 files changed, 48 insertions(+), 33 deletions(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 26c004f1d1..27b1086f47 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -178,9 +178,8 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu } } - // Re-create Patroni leader Endpoints so the cluster can be bootstrapped from - // the new data. - if err := r.deleteLeaderEndpoints(ctx); err != nil { + // Re-create (if needed) Patroni leader Endpoints so the cluster can be bootstrapped from the new data. + if err := r.reconcileLeaderEndpoints(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "delete leader endpoints") } @@ -210,18 +209,27 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{}, nil } -func (r *snapshotRestorer) deleteLeaderEndpoints(ctx context.Context) error { +func (r *snapshotRestorer) reconcileLeaderEndpoints(ctx context.Context) error { postgresCluster := &crunchyv1beta1.PostgresCluster{ ObjectMeta: metav1.ObjectMeta{ Name: r.cluster.Name, Namespace: r.cluster.Namespace, }, } + leaderEp := &corev1.Endpoints{ObjectMeta: naming.PatroniLeaderEndpoints(postgresCluster)} if err := r.cl.Get(ctx, client.ObjectKeyFromObject(leaderEp), leaderEp); err != nil { - return errors.Wrap(err, "get postgres cluster") + return client.IgnoreNotFound(err) + } + + if len(leaderEp.Subsets) > 0 { + return nil } - return r.cl.Delete(ctx, leaderEp) + + if err := r.cl.Delete(ctx, leaderEp); client.IgnoreNotFound(err) != nil { + return errors.Wrap(err, "delete leader endpoints") + } + return nil } // listPVCs returns the list of PostgreSQL data PVCs that need to be restored. @@ -284,12 +292,8 @@ func (r *snapshotRestorer) restorePVCFromSnapshot( } // Check if the PVC is already using the snapshot - if dataSource := observedPVC.Spec.DataSource; dataSource != nil { - if dataSource.Kind == pNaming.KindVolumeSnapshot && - ptr.Deref(dataSource.APIGroup, "") == volumesnapshotv1.GroupName && - dataSource.Name == snapshotName { - return true, nil - } + if val, ok := observedPVC.GetAnnotations()[pNaming.AnnotationSnapshotRestore]; ok && val == r.restore.GetName() { + return true, nil } // If deleting, wait for it to be deleted before recreating @@ -328,6 +332,10 @@ func (r *snapshotRestorer) createPVCFromSnapshot(ctx context.Context, pvc *corev ObjectMeta: pvc.ObjectMeta, Spec: *volumeClaimSpec, } + + newPVC.SetAnnotations(map[string]string{ + pNaming.AnnotationSnapshotRestore: r.restore.GetName(), + }) return r.cl.Create(ctx, newPVC) } @@ -428,18 +436,18 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { pgbackrestRestore := restoreutils.NewPGBackRestRestore(r.cl, r.cluster, r.restore) status, _, err := pgbackrestRestore.ObserveStatus(ctx) - if client.IgnoreNotFound(err) != nil { // ignore NotFound, we handle it below + if err != nil { return false, errors.Wrap(err, "observe PITR status") } - switch { - case k8serrors.IsNotFound(err) && !r.isPITRInProgress(): + switch status { + case v2.RestoreStarting: return false, pgbackrestRestore.Start(ctx) - case status == v2.RestoreRunning: + case v2.RestoreRunning: return false, nil - case status == v2.RestoreSucceeded: + case v2.RestoreSucceeded: return true, pgbackrestRestore.DisableRestore(ctx) - case status == v2.RestoreFailed: + case v2.RestoreFailed: if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { restore.Status.State = v2.RestoreFailed }); err != nil { @@ -449,6 +457,7 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { } return false, nil } + func (r *snapshotRestorer) isPITRInProgress() bool { return r.cluster.GetAnnotations()[naming.PGBackRestRestore] != "" } diff --git a/percona/controller/pgrestore/utils/pgbackrest.go b/percona/controller/pgrestore/utils/pgbackrest.go index da3e05220b..0b14602eef 100644 --- a/percona/controller/pgrestore/utils/pgbackrest.go +++ b/percona/controller/pgrestore/utils/pgbackrest.go @@ -4,14 +4,11 @@ import ( "context" "github.com/pkg/errors" - batchv1 "k8s.io/api/batch/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/percona/percona-postgresql-operator/v2/internal/naming" - "github.com/percona/percona-postgresql-operator/v2/percona/controller" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -34,6 +31,10 @@ func NewPGBackRestRestore(c client.Client, pgCluster *v2.PerconaPGCluster, pgRes func (r *PGBackRestRestore) Start(ctx context.Context) error { orig := r.pgCluster.DeepCopy() + if val, ok := r.pgCluster.GetAnnotations()[naming.PGBackRestRestore]; ok && val == r.pgRestore.Name { + return nil // already started + } + if r.pgCluster.Annotations == nil { r.pgCluster.Annotations = make(map[string]string) } @@ -93,21 +94,22 @@ func (r *PGBackRestRestore) DisableRestore(ctx context.Context) error { } func (r *PGBackRestRestore) ObserveStatus(ctx context.Context) (v2.PGRestoreState, *metav1.Time, error) { - job := &batchv1.Job{} - err := r.Get(ctx, types.NamespacedName{Name: r.pgCluster.Name + "-pgbackrest-restore", Namespace: r.pgCluster.Namespace}, job) - if err != nil { - return v2.RestoreNew, nil, errors.Wrap(err, "get restore job") + cluster := &v2.PerconaPGCluster{} + if err := r.Get(ctx, client.ObjectKeyFromObject(r.pgCluster), cluster); err != nil { + return v2.RestoreStarting, nil, errors.Wrap(err, "get PostgresCluster") } - return checkRestoreJob(job), job.Status.CompletionTime, nil -} -func checkRestoreJob(job *batchv1.Job) v2.PGRestoreState { + if cluster.Status.PGBackRest == nil || cluster.Status.PGBackRest.Restore == nil { + return v2.RestoreStarting, nil, nil + } + restoreStatus := cluster.Status.PGBackRest.Restore + switch { - case controller.JobCompleted(job): - return v2.RestoreSucceeded - case controller.JobFailed(job): - return v2.RestoreFailed + case restoreStatus.Finished && restoreStatus.Succeeded > 0: + return v2.RestoreSucceeded, restoreStatus.CompletionTime, nil + case restoreStatus.Finished && restoreStatus.Failed > 0: + return v2.RestoreFailed, nil, nil default: - return v2.RestoreRunning + return v2.RestoreRunning, nil, nil } } diff --git a/percona/naming/annotations.go b/percona/naming/annotations.go index accfbc80fa..a99b22f7dc 100644 --- a/percona/naming/annotations.go +++ b/percona/naming/annotations.go @@ -50,4 +50,8 @@ const ( // AnnotationInstanceSuspended must be set on the instance StatefulSet to mark // the instance as suspended. AnnotationInstanceSuspended = PrefixPerconaPGV2 + "instance-suspended" + + // AnnotationSnapshotRestore is the annotation added to the data PVCs of a cluster + // to indicate the name of the PerconaPGRestore that is restoring the PVC from a snapshot. + AnnotationSnapshotRestore = PrefixPerconaPGV2 + "snapshot-restore" ) From 6dd9948495892a2bc9b5748161f625cd8b1f4e52 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 30 Jan 2026 13:20:44 +0530 Subject: [PATCH 42/90] add e2e test Signed-off-by: Mayank Shah --- e2e-tests/functions | 1 + .../00-assert.yaml | 24 ++++ .../00-deploy-operator.yaml | 21 ++++ .../01-assert.yaml | 106 ++++++++++++++++++ .../01-create-cluster.yaml | 14 +++ .../02-write-data.yaml | 17 +++ .../03-assert.yaml | 10 ++ .../03-read-from-primary.yaml | 13 +++ .../04-assert.yaml | 26 +++++ .../04-create-backup-snapshot.yaml | 7 ++ .../05-assert.yaml | 31 +++++ .../05-create-restore.yaml | 23 ++++ .../06-assert.yaml | 11 ++ .../06-verify-restored-data.yaml | 13 +++ .../07-assert.yaml | 31 +++++ .../07-create-backup-pgbackrest.yaml | 9 ++ .../08-assert.yaml | 31 +++++ .../08-create-restore-pitr.yaml | 47 ++++++++ .../09-assert.yaml | 14 +++ .../09-verify-restored-data.yaml | 13 +++ .../99-cleanup.yaml | 33 ++++++ 21 files changed, 495 insertions(+) create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/00-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/00-deploy-operator.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/01-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/01-create-cluster.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/02-write-data.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/03-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/03-read-from-primary.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/04-create-backup-snapshot.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/06-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/06-verify-restored-data.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/07-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/07-create-backup-pgbackrest.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/08-create-restore-pitr.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/09-verify-restored-data.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/99-cleanup.yaml diff --git a/e2e-tests/functions b/e2e-tests/functions index 6b2a23167e..0386f99b54 100644 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -56,6 +56,7 @@ deploy_operator() { yq eval '.spec.template.spec.containers[0].image = "'${IMAGE}'"' "${DEPLOY_DIR}/${cw_prefix}operator.yaml" \ | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="DISABLE_TELEMETRY") | .value) = "'${disable_telemetry}'"' - \ | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="LOG_LEVEL") | .value) = "DEBUG"' - \ + | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="PGO_FEATURE_GATES") | .value) = "'${PGO_FEATURE_GATES}'"' - \ | kubectl -n "${OPERATOR_NS:-$NAMESPACE}" apply -f - } diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/00-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/00-assert.yaml new file mode 100644 index 0000000000..ae5a062d84 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/00-assert.yaml @@ -0,0 +1,24 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconapgclusters.pgv2.percona.com +spec: + group: pgv2.percona.com + names: + kind: PerconaPGCluster + listKind: PerconaPGClusterList + plural: perconapgclusters + singular: perconapgcluster + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-postgresql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/00-deploy-operator.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/00-deploy-operator.yaml new file mode 100644 index 0000000000..50c7842282 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/00-deploy-operator.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + init_temp_dir # do this only in the first TestStep + + PGO_FEATURE_GATES="BackupSnapshots=true" deploy_operator + deploy_client + deploy_s3_secrets +--- +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: gke-snapshot-class +driver: pd.csi.storage.gke.io +deletionPolicy: Delete \ No newline at end of file diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/01-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/01-assert.yaml new file mode 100644 index 0000000000..5dc595832c --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/01-assert.yaml @@ -0,0 +1,106 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 480 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/instance-set: instance1 + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + name: backup-snapshot + controller: true + blockOwnerDeletion: true +status: + observedGeneration: 1 + replicas: 1 + readyReplicas: 1 + currentReplicas: 1 + updatedReplicas: 1 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: backup-snapshot-pgbouncer + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/role: pgbouncer + annotations: + deployment.kubernetes.io/revision: '1' + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + name: backup-snapshot + controller: true + blockOwnerDeletion: true +status: + observedGeneration: 1 + replicas: 3 + updatedReplicas: 3 + readyReplicas: 3 +--- +kind: Job +apiVersion: batch/v1 +metadata: + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/pgbackrest: '' + postgres-operator.crunchydata.com/pgbackrest-backup: replica-create + postgres-operator.crunchydata.com/pgbackrest-repo: repo1 + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGBackup + controller: true + blockOwnerDeletion: true +status: + succeeded: 1 +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: backup-snapshot + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGCluster + name: backup-snapshot + controller: true + blockOwnerDeletion: true + finalizers: + - postgres-operator.crunchydata.com/finalizer +status: + instances: + - name: instance1 + readyReplicas: 3 + replicas: 3 + updatedReplicas: 3 + observedGeneration: 1 + pgbackrest: + repos: + - name: repo1 + stanzaCreated: true + proxy: + pgBouncer: + readyReplicas: 3 + replicas: 3 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: backup-snapshot +status: + pgbouncer: + ready: 3 + size: 3 + postgres: + instances: + - name: instance1 + ready: 3 + size: 3 + ready: 3 + size: 3 + state: ready diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/01-create-cluster.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/01-create-cluster.yaml new file mode 100644 index 0000000000..157d0b2a2e --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/01-create-cluster.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr "backup-snapshot" ${RANDOM} \ + | yq '.spec.backups.volumeSnapshots.className="gke-snapshot-class"' \ + | yq '.spec.backups.volumeSnapshots.mode="offline"' \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/02-write-data.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/02-write-data.yaml new file mode 100644 index 0000000000..14ba9bf5e1 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/02-write-data.yaml @@ -0,0 +1,17 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 60 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_psql_local \ + 'CREATE DATABASE myapp; \c myapp \\\ CREATE TABLE IF NOT EXISTS myApp (id int PRIMARY KEY);' \ + "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)" + + run_psql_local \ + '\c myapp \\\ INSERT INTO myApp (id) VALUES (100500)' \ + "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)" diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/03-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/03-assert.yaml new file mode 100644 index 0000000000..6848a5b79a --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/03-assert.yaml @@ -0,0 +1,10 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 03-read-from-primary +data: + data: ' 100500' diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/03-read-from-primary.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/03-read-from-primary.yaml new file mode 100644 index 0000000000..a58c96e1c3 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/03-read-from-primary.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + data=$(run_psql_local '\c myapp \\\ SELECT * from myApp;' "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)") + + kubectl create configmap -n "${NAMESPACE}" 03-read-from-primary --from-literal=data="${data}" diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml new file mode 100644 index 0000000000..1ade55f88b --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 560 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGBackup +metadata: + name: backup1 +spec: + pgCluster: backup-snapshot + method: volumeSnapshot +status: + state: Succeeded +--- +kind: VolumeSnapshot +apiVersion: snapshot.storage.k8s.io/v1 +metadata: + name: backup1 + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGBackup + name: backup1 + controller: true + blockOwnerDeletion: true +status: + readyToUse: true \ No newline at end of file diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/04-create-backup-snapshot.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/04-create-backup-snapshot.yaml new file mode 100644 index 0000000000..ab38603357 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/04-create-backup-snapshot.yaml @@ -0,0 +1,7 @@ +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGBackup +metadata: + name: backup1 +spec: + pgCluster: backup-snapshot + method: volumeSnapshot diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml new file mode 100644 index 0000000000..4413f40dd4 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml @@ -0,0 +1,31 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + pgv2.percona.com/snapshot-restore: restore1 + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/role: pgdata +spec: + dataSource: + apiGroup: snapshot.storage.k8s.io + kind: VolumeSnapshot + name: backup1 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGRestore +metadata: + name: restore1 +status: + state: Succeeded +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: backup-snapshot +status: + state: ready diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml new file mode 100644 index 0000000000..033b0bbfaa --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml @@ -0,0 +1,23 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_psql_local \ + '\c myapp \\\ TRUNCATE TABLE myApp' \ + "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)" + + cat < Date: Fri, 30 Jan 2026 19:33:46 +0530 Subject: [PATCH 43/90] wip: fix pitr Signed-off-by: Mayank Shah --- internal/naming/annotations.go | 4 + .../controller/pgbackup/snapshots/offline.go | 106 +------------ .../pgrestore/snapshot/reconcile.go | 139 ++++++++++++------ percona/postgres/common.go | 56 +++++++ 4 files changed, 160 insertions(+), 145 deletions(-) diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index ec04eb0e9a..736356e9d5 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -81,4 +81,8 @@ const ( // is present, the controller will not update the ConfigMap, allowing users to make custom // modifications that won't be overwritten during reconciliation. OverrideConfigAnnotation = perconaAnnotationPrefix + "override-config" + + // K8SPG-771 + // DisableWALArchiveRecoveryAnnotation is an annotation used to disable WAL recovery on startup. + DisableWALArchiveRecoveryAnnotation = annotationPrefix + "disable-wal-archive-recovery" ) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 2fcfe1b2a1..6590ea36bb 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -4,11 +4,9 @@ import ( "context" "fmt" "io" - "path" "time" "github.com/pkg/errors" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" @@ -22,7 +20,6 @@ import ( pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" - crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) const ( @@ -62,10 +59,6 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { return "", errors.Wrap(err, "failed to checkpoint instance") } - if err := e.createSnapshotSignal(ctx, targetInstance); err != nil { - return "", errors.Wrap(err, "failed to create snapshot signal") - } - if err := e.suspendInstance(ctx, targetInstance); err != nil { return "", errors.Wrap(err, "failed to suspend instance") } @@ -77,44 +70,6 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { return targetPVC, nil } -// After a snapshot restored and cluster resumed, Patroni will start the instance in recovery mode, -// so PostgreSQL invokes restore_command to fetch WAL from pgbackrest repo. We want the instance to remain at -// the snapshot-consistent state instead of advancing past it. We create this special file -// in PGDATA before taking the snapshot so it is included in the snapshot; when the instance -// starts after a restore, the restore_command wrapper checks for this file and, if present, exits without -// fetching WAL (so recovery stops at local WAL) and then removes the file. -func (e *offlineExec) createSnapshotSignal(ctx context.Context, instanceName string) error { - postgresCluster := &crunchyv1beta1.PostgresCluster{} - if err := e.cl.Get(ctx, client.ObjectKeyFromObject(e.cluster), postgresCluster); err != nil { - return errors.Wrap(err, "failed to get postgres cluster") - } - - snapshotSignalFile := path.Join(postgres.DataDirectory(postgresCluster), snapshotSignalFile) - cmd := []string{"touch", snapshotSignalFile} - err := e.podExec(ctx, e.cluster.GetNamespace(), - instanceName+"-0", naming.ContainerDatabase, nil, io.Discard, io.Discard, cmd...) - if err != nil { - return errors.Wrap(err, "pod exec failed") - } - return nil -} - -func (e *offlineExec) removeSnapshotSignal(ctx context.Context, instanceName string) error { - postgresCluster := &crunchyv1beta1.PostgresCluster{} - if err := e.cl.Get(ctx, client.ObjectKeyFromObject(e.cluster), postgresCluster); err != nil { - return errors.Wrap(err, "failed to get postgres cluster") - } - - snapshotSignalFile := path.Join(postgres.DataDirectory(postgresCluster), snapshotSignalFile) - cmd := []string{"rm", "-f", snapshotSignalFile} - err := e.podExec(ctx, e.cluster.GetNamespace(), - instanceName+"-0", naming.ContainerDatabase, nil, io.Discard, io.Discard, cmd...) - if err != nil { - return errors.Wrap(err, "pod exec failed") - } - return nil -} - func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error { exec := func(_ context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { return e.podExec(ctx, e.cluster.GetNamespace(), instanceName+"-0", naming.ContainerDatabase, stdin, stdout, stderr, command...) @@ -142,33 +97,10 @@ func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error } func (e *offlineExec) suspendInstance(ctx context.Context, instanceName string) error { - sts := &appsv1.StatefulSet{} - if err := e.cl.Get(ctx, client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName}, sts); err != nil { - return errors.Wrap(err, "failed to get stateful set") - } - - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - orig := sts.DeepCopy() - annots := sts.GetAnnotations() - if annots == nil { - annots = make(map[string]string) - } - annots[pNaming.AnnotationInstanceSuspended] = "" - sts.SetAnnotations(annots) - return e.cl.Patch(ctx, sts, client.MergeFrom(orig)) - }); err != nil { - return errors.Wrap(err, "failed to update stateful set annotations") - } - - // wait for suspension + // Suspend and wait + instanceKey := client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName} if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, true, func(ctx context.Context) (bool, error) { - if err := e.cl.Get(ctx, client.ObjectKey{ - Namespace: e.cluster.GetNamespace(), - Name: instanceName, - }, sts); err != nil { - return false, errors.Wrap(err, "failed to get stateful set") - } - return sts.Status.Replicas == 0 && sts.Status.ReadyReplicas == 0, nil + return perconaPG.SuspendInstance(ctx, e.cl, instanceKey) }); err != nil { return errors.Wrap(err, "failed to wait for suspension") } @@ -176,32 +108,12 @@ func (e *offlineExec) suspendInstance(ctx context.Context, instanceName string) } func (e *offlineExec) resumeInstance(ctx context.Context, instanceName string) error { - sts := &appsv1.StatefulSet{} - if err := e.cl.Get(ctx, client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName}, sts); err != nil { - return errors.Wrap(err, "failed to get stateful set") - } - - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - orig := sts.DeepCopy() - annots := sts.GetAnnotations() - delete(annots, pNaming.AnnotationInstanceSuspended) - sts.SetAnnotations(annots) - return e.cl.Patch(ctx, sts, client.MergeFrom(orig)) - }); err != nil { - return errors.Wrap(err, "failed to update stateful set annotations") - } - - // wait for resume + // unsuspend and wait + instanceKey := client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName} if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, true, func(ctx context.Context) (bool, error) { - if err := e.cl.Get(ctx, client.ObjectKey{ - Namespace: e.cluster.GetNamespace(), - Name: instanceName, - }, sts); err != nil { - return false, errors.Wrap(err, "failed to get stateful set") - } - return sts.Status.Replicas > 0 && sts.Status.ReadyReplicas > 0, nil + return perconaPG.UnsuspendInstance(ctx, e.cl, instanceKey) }); err != nil { - return errors.Wrap(err, "failed to wait for suspension") + return errors.Wrap(err, "failed to wait for unsuspend") } return nil } @@ -215,10 +127,6 @@ func (e *offlineExec) finalize(ctx context.Context) error { if err := e.resumeInstance(ctx, targetInstance); err != nil { return errors.Wrap(err, "failed to resume instance") } - - if err := e.removeSnapshotSignal(ctx, targetInstance); err != nil { - return errors.Wrap(err, "failed to remove snapshot signal") - } return nil } diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 27b1086f47..c7b66a9260 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -15,7 +15,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -27,7 +26,9 @@ import ( "github.com/percona/percona-postgresql-operator/v2/percona/controller" restoreutils "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/utils" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" + perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -141,10 +142,10 @@ func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Res } // pausing the cluster so the PVCs are unmounted and can be re-created. - if ok, err := r.pauseCluster(ctx); err != nil { - return reconcile.Result{}, errors.Wrap(err, "pause cluster") + if ok, err := r.suspendAllInstances(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "suspend all instances") } else if !ok { - r.log.Info("Waiting for cluster to be paused") + r.log.Info("Waiting for all instances to be suspended") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } @@ -169,6 +170,7 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{}, errors.Wrap(err, "list PVCs") } + // Restore PVCs for _, pvc := range clusterPVCs { if ok, err := r.restorePVCFromSnapshot(ctx, &pvc, volumeSnapshotName); err != nil { return reconcile.Result{}, errors.Wrap(err, "restore PVC from snapshot") @@ -178,23 +180,23 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu } } + // Restore PITR (if needed) + if ok, err := r.restorePITR(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "restore PITR") + } else if !ok { + r.log.Info("Waiting for PiTR to complete") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + // Re-create (if needed) Patroni leader Endpoints so the cluster can be bootstrapped from the new data. if err := r.reconcileLeaderEndpoints(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "delete leader endpoints") } // Start the cluster - if ok, err := r.resumeCluster(ctx); err != nil { + if ok, err := r.unsuspendAllInstances(ctx); err != nil { } else if !ok && !r.isPITRInProgress() { - r.log.Info("Waiting for cluster to be ready") - return reconcile.Result{RequeueAfter: time.Second * 5}, nil - } - - // Restore PITR - if ok, err := r.restorePITR(ctx); err != nil { - return reconcile.Result{}, errors.Wrap(err, "restore PITR") - } else if !ok { - r.log.Info("Waiting for PiTR to complete") + r.log.Info("Waiting for all instances to be unsuspended") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } @@ -339,46 +341,50 @@ func (r *snapshotRestorer) createPVCFromSnapshot(ctx context.Context, pvc *corev return r.cl.Create(ctx, newPVC) } -func (r *snapshotRestorer) pauseCluster(ctx context.Context) (bool, error) { - // Check if already paused - if r.cluster.Spec.Pause != nil && *r.cluster.Spec.Pause { - return r.cluster.Status.State == v2.AppStatePaused, nil +func (r *snapshotRestorer) suspendAllInstances(ctx context.Context) (bool, error) { + instances := &appsv1.StatefulSetList{} + if err := r.cl.List(ctx, instances, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return false, errors.Wrap(err, "list instances") } - // Pause the cluster - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - orig := r.cluster.DeepCopy() - updated := orig.DeepCopy() - if err := r.cl.Get(ctx, client.ObjectKeyFromObject(updated), updated); err != nil { - return err + allSuspended := true + for _, instance := range instances.Items { + if suspended, err := perconaPG.SuspendInstance(ctx, r.cl, client.ObjectKeyFromObject(&instance)); err != nil { + return false, errors.Wrap(err, "suspend instance") + } else if !suspended { + allSuspended = false } - updated.Spec.Pause = ptr.To(true) - return r.cl.Patch(ctx, updated, client.MergeFrom(orig)) - }); err != nil { - return false, err } - return false, nil + return allSuspended, nil } -func (r *snapshotRestorer) resumeCluster(ctx context.Context) (bool, error) { - // Check if already resumed - if r.cluster.Spec.Pause == nil || !*r.cluster.Spec.Pause { - return r.cluster.Status.State == v2.AppStateReady, nil +func (r *snapshotRestorer) unsuspendAllInstances(ctx context.Context) (bool, error) { + instances := &appsv1.StatefulSetList{} + if err := r.cl.List(ctx, instances, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return false, errors.Wrap(err, "list instances") } - // Resume the cluster - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - orig := r.cluster.DeepCopy() - updated := orig.DeepCopy() - if err := r.cl.Get(ctx, client.ObjectKeyFromObject(updated), updated); err != nil { - return err + allUnsuspended := true + for _, instance := range instances.Items { + if unsuspended, err := perconaPG.UnsuspendInstance(ctx, r.cl, client.ObjectKeyFromObject(&instance)); err != nil { + return false, errors.Wrap(err, "unsuspend instance") + } else if !unsuspended { + allUnsuspended = false } - updated.Spec.Pause = nil - return r.cl.Patch(ctx, updated, client.MergeFrom(orig)) - }); err != nil { - return false, err } - return false, nil + return allUnsuspended, nil } func (r *snapshotRestorer) ensureFinalizers(ctx context.Context) error { @@ -421,9 +427,12 @@ func (r *snapshotRestorer) runFinalizers(ctx context.Context) (bool, error) { func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.PerconaPGRestore) func(ctx context.Context, restore *v2.PerconaPGRestore) error { return func(ctx context.Context, restore *v2.PerconaPGRestore) error { + if err := r.enableWALArchiveRecovery(ctx); err != nil { + return errors.Wrap(err, "enable WAL archive recovery") + } // Resume the cluster if it was paused during restore - if _, err := r.resumeCluster(ctx); err != nil { - return errors.Wrap(err, "resume cluster") + if _, err := r.unsuspendAllInstances(ctx); err != nil { + return errors.Wrap(err, "unsuspend all instances") } return nil } @@ -431,6 +440,10 @@ func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.Percon func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { if r.restore.Spec.RepoName == nil { + // PiTR is not needed, no need to recover WAL archives to maintain snapshot consistency. + if err := r.disableWALArchiveRecovery(ctx); err != nil { + return false, errors.Wrap(err, "disable WAL archive recovery") + } return true, nil } @@ -461,3 +474,37 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { func (r *snapshotRestorer) isPITRInProgress() bool { return r.cluster.GetAnnotations()[naming.PGBackRestRestore] != "" } + +func (r *snapshotRestorer) disableWALArchiveRecovery(ctx context.Context) error { + orig := r.cluster.DeepCopy() + for i := range r.cluster.Spec.InstanceSets { + if r.cluster.Spec.InstanceSets[i].Metadata == nil { + r.cluster.Spec.InstanceSets[i].Metadata = &v1beta1.Metadata{} + } + if r.cluster.Spec.InstanceSets[i].Metadata.Annotations == nil { + r.cluster.Spec.InstanceSets[i].Metadata.Annotations = make(map[string]string) + } + r.cluster.Spec.InstanceSets[i].Metadata.Annotations[naming.DisableWALArchiveRecoveryAnnotation] = "true" + } + if err := r.cl.Patch(ctx, r.cluster.DeepCopy(), client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch cluster") + } + return nil +} + +func (r *snapshotRestorer) enableWALArchiveRecovery(ctx context.Context) error { + orig := r.cluster.DeepCopy() + for i := range r.cluster.Spec.InstanceSets { + if r.cluster.Spec.InstanceSets[i].Metadata == nil { + continue + } + if r.cluster.Spec.InstanceSets[i].Metadata.Annotations == nil { + continue + } + delete(r.cluster.Spec.InstanceSets[i].Metadata.Annotations, naming.DisableWALArchiveRecoveryAnnotation) + } + if err := r.cl.Patch(ctx, r.cluster.DeepCopy(), client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch cluster") + } + return nil +} diff --git a/percona/postgres/common.go b/percona/postgres/common.go index fae20c55a2..8796caf448 100644 --- a/percona/postgres/common.go +++ b/percona/postgres/common.go @@ -5,11 +5,14 @@ import ( gover "github.com/hashicorp/go-version" "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/percona/percona-postgresql-operator/v2/internal/naming" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) @@ -79,3 +82,56 @@ func determineVersion(cr *v2.PerconaPGCluster) string { } return patroniVersion4 } + +// SuspendInstance suspends an instance by setting the AnnotationInstanceSuspended annotation on the StatefulSet. +// Returns true if the instance was suspended. +// Caller is responsible for waiting for the instance to be suspended. +func SuspendInstance(ctx context.Context, cli client.Client, instanceKey client.ObjectKey) (bool, error) { + sts := &appsv1.StatefulSet{} + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return false, errors.Wrap(err, "failed to get stateful set") + } + + if _, ok := sts.GetAnnotations()[pNaming.AnnotationInstanceSuspended]; ok { + return sts.Status.Replicas == 0 && sts.Status.ReadyReplicas == 0, nil + } + + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + orig := sts.DeepCopy() + annots := sts.GetAnnotations() + if annots == nil { + annots = make(map[string]string) + } + annots[pNaming.AnnotationInstanceSuspended] = "" + sts.SetAnnotations(annots) + return cli.Patch(ctx, sts, client.MergeFrom(orig)) + }); err != nil { + return false, errors.Wrap(err, "failed to update stateful set annotations") + } + return false, nil +} + +// UnsuspendInstance unsuspends an instance by removing the AnnotationInstanceSuspended annotation on the StatefulSet. +// Returns true if the instance was unsuspended. +// Caller is responsible for waiting for the instance to be unsuspended. +func UnsuspendInstance(ctx context.Context, cli client.Client, instanceKey client.ObjectKey) (bool, error) { + sts := &appsv1.StatefulSet{} + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return false, errors.Wrap(err, "failed to get stateful set") + } + + if _, ok := sts.GetAnnotations()[pNaming.AnnotationInstanceSuspended]; !ok { + return sts.Status.Replicas > 0 && sts.Status.ReadyReplicas > 0, nil + } + + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + orig := sts.DeepCopy() + annots := sts.GetAnnotations() + delete(annots, pNaming.AnnotationInstanceSuspended) + sts.SetAnnotations(annots) + return cli.Patch(ctx, sts, client.MergeFrom(orig)) + }); err != nil { + return false, errors.Wrap(err, "failed to update stateful set annotations") + } + return false, nil +} From 598038a297a279ace880a5a4017d20231921d9bf Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Mon, 9 Feb 2026 11:33:23 +0530 Subject: [PATCH 44/90] in-place restore improvements Signed-off-by: Mayank Shah --- build/postgres-operator/restore_command.sh | 6 +- cmd/postgres-operator/main.go | 9 + internal/pgbackrest/postgres.go | 3 +- percona/controller/pgcluster/controller.go | 53 ++++ .../pgrestore/snapshot/reconcile.go | 258 +++++++----------- percona/naming/kinds.go | 3 +- .../v2/perconapgbackup_types.go | 11 +- 7 files changed, 176 insertions(+), 167 deletions(-) diff --git a/build/postgres-operator/restore_command.sh b/build/postgres-operator/restore_command.sh index 5dc178c234..6b0bc9788e 100644 --- a/build/postgres-operator/restore_command.sh +++ b/build/postgres-operator/restore_command.sh @@ -1,9 +1,9 @@ #!/bin/bash set -e -if [[ -f $PGDATA/restored-from-snapshot ]]; then - rm -f $PGDATA/restored-from-snapshot + +if [[ "${DISABLE_WAL_ARCHIVE_RECOVERY:-}" == "1" ]]; then exit 1 fi -pgbackrest --stanza=db archive-get %f "%p" \ No newline at end of file +exec "$@" diff --git a/cmd/postgres-operator/main.go b/cmd/postgres-operator/main.go index 88796a1be7..efd4caa12f 100644 --- a/cmd/postgres-operator/main.go +++ b/cmd/postgres-operator/main.go @@ -213,6 +213,15 @@ func addControllersToManager(ctx context.Context, mgr manager.Manager) error { return err } + if err := mgr.GetFieldIndexer().IndexField( + context.Background(), + &v2.PerconaPGRestore{}, + v2.IndexFieldPGCluster, + v2.PGClusterIndexerFunc, + ); err != nil { + return err + } + if err := mgr.GetFieldIndexer().IndexField( context.Background(), &v2.PerconaPGBackup{}, diff --git a/internal/pgbackrest/postgres.go b/internal/pgbackrest/postgres.go index b3fe7b7ada..91e69eed3c 100644 --- a/internal/pgbackrest/postgres.go +++ b/internal/pgbackrest/postgres.go @@ -78,7 +78,8 @@ func PostgreSQL( // Fetch WAL files from any configured repository during recovery. // - https://pgbackrest.org/command.html#command-archive-get // - https://www.postgresql.org/docs/current/runtime-config-wal.html - restore := "sh /opt/crunchy/bin/restore_command.sh" + restore := "sh /opt/crunchy/bin/restore_command.sh " + restore += `pgbackrest --stanza=` + DefaultStanzaName + ` archive-get %f "%p"` if inCluster.Spec.Patroni != nil && inCluster.Spec.Patroni.DynamicConfiguration != nil { postgresql, ok := inCluster.Spec.Patroni.DynamicConfiguration["postgresql"].(map[string]any) if ok { diff --git a/percona/controller/pgcluster/controller.go b/percona/controller/pgcluster/controller.go index 2af967cc2b..4b95842eeb 100644 --- a/percona/controller/pgcluster/controller.go +++ b/percona/controller/pgcluster/controller.go @@ -103,9 +103,25 @@ func (r *PGClusterReconciler) SetupWithManager(mgr manager.Manager) error { WatchesRawSource(source.Kind(mgr.GetCache(), &corev1.Secret{}, r.watchSecrets())). WatchesRawSource(source.Kind(mgr.GetCache(), &batchv1.Job{}, r.watchBackupJobs())). WatchesRawSource(source.Kind(mgr.GetCache(), &v2.PerconaPGBackup{}, r.watchPGBackups())). + Watches(&v2.PerconaPGRestore{}, handler.EnqueueRequestsFromMapFunc(r.watchRestores)). Complete(r) } +func (r *PGClusterReconciler) watchRestores(ctx context.Context, o client.Object) []reconcile.Request { + restore, ok := o.(*v2.PerconaPGRestore) + if !ok { + return nil + } + return []reconcile.Request{ + { + NamespacedName: client.ObjectKey{ + Namespace: restore.GetNamespace(), + Name: restore.Spec.PGCluster, + }, + }, + } +} + func (r *PGClusterReconciler) watchServices() handler.TypedFuncs[*corev1.Service, reconcile.Request] { return handler.TypedFuncs[*corev1.Service, reconcile.Request]{ UpdateFunc: func(ctx context.Context, e event.TypedUpdateEvent[*corev1.Service], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { @@ -314,6 +330,10 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, errors.Wrap(err, "reconcile scheduled backups") } + if err := r.reconcileWALRecoveryOnStart(ctx, cr); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile WAL recovery on start") + } + if cr.Spec.Pause != nil && *cr.Spec.Pause { backupRunning, err := isBackupRunning(ctx, r.Client, cr) if err != nil { @@ -368,6 +388,39 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return ctrl.Result{}, nil } +// When a snapshot restore is executed without PiTR, we must disable recovery from WAL on startup +// to ensure that data is consistent with the snapshot. +func (r *PGClusterReconciler) reconcileWALRecoveryOnStart(ctx context.Context, cr *v2.PerconaPGCluster) error { + var restores v2.PerconaPGRestoreList + if err := r.Client.List(ctx, &restores, client.MatchingFields{ + v2.IndexFieldPGCluster: cr.GetName(), + }, client.InNamespace(cr.Namespace)); err != nil { + return errors.Wrap(err, "failed to list restores") + } + + disableRecovery := func() { + for i := range cr.Spec.InstanceSets { + if len(cr.Spec.InstanceSets[i].Env) == 0 { + cr.Spec.InstanceSets[i].Env = make([]corev1.EnvVar, 0) + } + cr.Spec.InstanceSets[i].Env = append(cr.Spec.InstanceSets[i].Env, corev1.EnvVar{ + Name: "DISABLE_WAL_ARCHIVE_RECOVERY", + Value: "1", + }) + } + } + + for _, restore := range restores.Items { + if restore.IsCompleted() { + continue + } + if restore.Spec.VolumeSnapshotName != "" && (restore.Spec.RepoName == nil || *restore.Spec.RepoName == "") { + disableRecovery() + } + } + return nil +} + func (r *PGClusterReconciler) reconcileTLS(ctx context.Context, cr *v2.PerconaPGCluster) error { if err := r.validateTLS(ctx, cr); err != nil { return errors.Wrap(err, "validate TLS") diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index c7b66a9260..216f8aeb7a 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -3,8 +3,6 @@ package snapshot import ( "context" "fmt" - "slices" - "strings" "time" volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" @@ -28,7 +26,6 @@ import ( pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" - "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -141,11 +138,10 @@ func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Res return reconcile.Result{}, errors.Wrap(err, "get volume snapshot") } - // pausing the cluster so the PVCs are unmounted and can be re-created. if ok, err := r.suspendAllInstances(ctx); err != nil { - return reconcile.Result{}, errors.Wrap(err, "suspend all instances") + return reconcile.Result{}, errors.Wrap(err, "shutdown cluster") } else if !ok { - r.log.Info("Waiting for all instances to be suspended") + r.log.Info("Waiting for instances to be suspended") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } @@ -164,86 +160,45 @@ func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Res } func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Result, error) { - volumeSnapshotName := r.restore.Spec.VolumeSnapshotName - clusterPVCs, err := r.listPVCs(ctx) - if err != nil { - return reconcile.Result{}, errors.Wrap(err, "list PVCs") - } - - // Restore PVCs - for _, pvc := range clusterPVCs { - if ok, err := r.restorePVCFromSnapshot(ctx, &pvc, volumeSnapshotName); err != nil { - return reconcile.Result{}, errors.Wrap(err, "restore PVC from snapshot") - } else if !ok { - r.log.Info("Waiting for PVC to restored", "pvc", pvc.GetName()) - return reconcile.Result{RequeueAfter: time.Second * 5}, nil - } - } - - // Restore PITR (if needed) - if ok, err := r.restorePITR(ctx); err != nil { - return reconcile.Result{}, errors.Wrap(err, "restore PITR") + if ok, err := r.reconcileInstancePVCs(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile instances") } else if !ok { - r.log.Info("Waiting for PiTR to complete") + r.log.Info("Waiting for instances PVCs to be reconciled") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - // Re-create (if needed) Patroni leader Endpoints so the cluster can be bootstrapped from the new data. + // Recreate DCS so that cluster can be bootstrapped with new data. if err := r.reconcileLeaderEndpoints(ctx); err != nil { - return reconcile.Result{}, errors.Wrap(err, "delete leader endpoints") + return reconcile.Result{}, errors.Wrap(err, "reconcile leader endpoints") } - // Start the cluster if ok, err := r.unsuspendAllInstances(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "resume cluster") } else if !ok && !r.isPITRInProgress() { - r.log.Info("Waiting for all instances to be unsuspended") + r.log.Info("Waiting for instances to be unsuspended") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + // Perform PITR if needed. + if ok, err := r.restorePITR(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "restore PITR") + } else if !ok { + r.log.Info("Waiting for PITR to complete") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { restore.Status.State = v2.RestoreSucceeded - restore.Status.CompletedAt = ptr.To(metav1.Now()) + restore.Status.CompletedAt = &metav1.Time{Time: time.Now()} }); err != nil { return reconcile.Result{}, errors.Wrap(err, "update restore status") } - r.log.Info("Snapshot restore complete") + r.log.Info("Snapshot restore is complete") return reconcile.Result{}, nil } -func (r *snapshotRestorer) reconcileLeaderEndpoints(ctx context.Context) error { - postgresCluster := &crunchyv1beta1.PostgresCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: r.cluster.Name, - Namespace: r.cluster.Namespace, - }, - } - - leaderEp := &corev1.Endpoints{ObjectMeta: naming.PatroniLeaderEndpoints(postgresCluster)} - if err := r.cl.Get(ctx, client.ObjectKeyFromObject(leaderEp), leaderEp); err != nil { - return client.IgnoreNotFound(err) - } - - if len(leaderEp.Subsets) > 0 { - return nil - } - - if err := r.cl.Delete(ctx, leaderEp); client.IgnoreNotFound(err) != nil { - return errors.Wrap(err, "delete leader endpoints") - } - return nil -} - -// listPVCs returns the list of PostgreSQL data PVCs that need to be restored. -// -// Instead of listing existing PVCs directly, this function derives the PVC names -// from the cluster instance statefulsets. This is necessary because during restore, -// PVCs are deleted and recreated. Listing live PVCs would miss PVCs that are -// currently being recreated and lead to an inconsistent state. -// -// The function returns PVC objects with only metadata populated, -// which is sufficient for getting the job done. -func (r *snapshotRestorer) listPVCs(ctx context.Context) ([]corev1.PersistentVolumeClaim, error) { +func (r *snapshotRestorer) reconcileInstancePVCs(ctx context.Context) (bool, error) { instances := &appsv1.StatefulSetList{} if err := r.cl.List(ctx, instances, &client.ListOptions{ Namespace: r.cluster.GetNamespace(), @@ -252,93 +207,119 @@ func (r *snapshotRestorer) listPVCs(ctx context.Context) ([]corev1.PersistentVol naming.LabelData: naming.DataPostgres, }), }); err != nil { - return nil, errors.Wrap(err, "list instances") + return false, errors.Wrap(err, "list instances") } - result := []corev1.PersistentVolumeClaim{} + done := true for _, instance := range instances.Items { - objectMeta := naming.InstancePostgresDataVolume(&instance) - objectMeta.SetLabels(map[string]string{ - naming.LabelInstanceSet: instance.Labels[naming.LabelInstanceSet], // needed for createPVCFromSnapshot - }) - result = append(result, corev1.PersistentVolumeClaim{ - ObjectMeta: objectMeta, - }) - } - - // sort to ensure consistent ordering - slices.SortStableFunc(result, func(a, b corev1.PersistentVolumeClaim) int { - return strings.Compare(a.GetName(), b.GetName()) - }) - return result, nil + if ok, err := r.reconcileInstancePVC(ctx, &instance); err != nil { + return false, errors.Wrap(err, "reconcile instance PVC") + } else if !ok { + done = false + } + } + return done, nil } -// restorePVCFromSnapshot restores a PVC from a snapshot. -// pvc is a partial object derived from the cluster instance statefulsets (see listPVCs method). -func (r *snapshotRestorer) restorePVCFromSnapshot( +func (r *snapshotRestorer) reconcileInstancePVC( ctx context.Context, - pvc *corev1.PersistentVolumeClaim, - snapshotName string, + instance *appsv1.StatefulSet, ) (bool, error) { + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} observedPVC := &corev1.PersistentVolumeClaim{} - err := r.cl.Get(ctx, client.ObjectKeyFromObject(pvc), observedPVC) + err := r.cl.Get(ctx, client.ObjectKeyFromObject(pvc), observedPVC) if k8serrors.IsNotFound(err) { - // PVC doesn't exist, create it from the snapshot - if err := r.createPVCFromSnapshot(ctx, pvc, snapshotName); err != nil { - return false, errors.Wrap(err, "create PVC with snapshot") + if err := r.createPVCFromSnapshot(ctx, pvc, instance); err != nil { + return false, errors.Wrap(err, "create PVC from data source") } - return false, nil + return true, nil } else if err != nil { return false, errors.Wrap(err, "get observed PVC") } - // Check if the PVC is already using the snapshot - if val, ok := observedPVC.GetAnnotations()[pNaming.AnnotationSnapshotRestore]; ok && val == r.restore.GetName() { + if observedPVC.GetAnnotations()[pNaming.AnnotationSnapshotRestore] == r.restore.GetName() { return true, nil } - // If deleting, wait for it to be deleted before recreating if !observedPVC.GetDeletionTimestamp().IsZero() { return false, nil } - // Delete the existing PVC so we can recreate it from the snapshot + // Delete it so we can recreate if err := r.cl.Delete(ctx, observedPVC); err != nil { return false, errors.Wrap(err, "delete PVC") } return false, nil } -func (r *snapshotRestorer) createPVCFromSnapshot(ctx context.Context, pvc *corev1.PersistentVolumeClaim, snapshotName string) error { - instanceName := pvc.GetLabels()[naming.LabelInstanceSet] - if instanceName == "" { - return errors.New("instance not known for PVC") +func (r *snapshotRestorer) createPVCFromSnapshot( + ctx context.Context, + pvc *corev1.PersistentVolumeClaim, + instance *appsv1.StatefulSet, +) error { + instanceSetName := instance.GetLabels()[naming.LabelInstanceSet] + if instanceSetName == "" { + return errors.New("instance set name is not known") } - var volumeClaimSpec *corev1.PersistentVolumeClaimSpec + + dataSource := &corev1.TypedLocalObjectReference{ + APIGroup: ptr.To(volumesnapshotv1.GroupName), + Kind: pNaming.KindVolumeSnapshot, + Name: r.restore.Spec.VolumeSnapshotName, + } + spec, err := r.pvcSpecFromDataSource(instanceSetName, dataSource) + if err != nil { + return errors.Wrap(err, "get PVC spec from data source") + } + pvc.Spec = spec + pvc.SetAnnotations(map[string]string{ + pNaming.AnnotationSnapshotRestore: r.restore.GetName(), + }) + if err := r.cl.Create(ctx, pvc); err != nil { + return errors.Wrap(err, "create PVC") + } + return nil +} + +func (r *snapshotRestorer) pvcSpecFromDataSource(instanceSetName string, dataSource *corev1.TypedLocalObjectReference) (corev1.PersistentVolumeClaimSpec, error) { + var instanceSetSpec *v2.PGInstanceSetSpec for _, instanceSet := range r.cluster.Spec.InstanceSets { - if instanceSet.Name == instanceName { - volumeClaimSpec = &instanceSet.DataVolumeClaimSpec + if instanceSet.Name == instanceSetName { + instanceSetSpec = &instanceSet break } } - if volumeClaimSpec == nil { - return fmt.Errorf("instance set '%s' either not found or has no data volume claim spec", instanceName) + if instanceSetSpec == nil { + return corev1.PersistentVolumeClaimSpec{}, errors.New("instance set not found") } - volumeClaimSpec.DataSource = &corev1.TypedLocalObjectReference{ - APIGroup: ptr.To(volumesnapshotv1.GroupName), - Kind: pNaming.KindVolumeSnapshot, - Name: snapshotName, + + dataVolSpec := instanceSetSpec.DataVolumeClaimSpec + dataVolSpec.DataSource = dataSource + return dataVolSpec, nil +} + +func (r *snapshotRestorer) reconcileLeaderEndpoints(ctx context.Context) error { + postgresCluster := &crunchyv1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: r.cluster.Name, + Namespace: r.cluster.Namespace, + }, } - newPVC := &corev1.PersistentVolumeClaim{ - ObjectMeta: pvc.ObjectMeta, - Spec: *volumeClaimSpec, + + leaderEp := &corev1.Endpoints{ObjectMeta: naming.PatroniLeaderEndpoints(postgresCluster)} + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(leaderEp), leaderEp); err != nil { + return client.IgnoreNotFound(err) } - newPVC.SetAnnotations(map[string]string{ - pNaming.AnnotationSnapshotRestore: r.restore.GetName(), - }) - return r.cl.Create(ctx, newPVC) + if len(leaderEp.Subsets) > 0 { + return nil + } + + if err := r.cl.Delete(ctx, leaderEp); client.IgnoreNotFound(err) != nil { + return errors.Wrap(err, "delete leader endpoints") + } + return nil } func (r *snapshotRestorer) suspendAllInstances(ctx context.Context) (bool, error) { @@ -427,12 +408,8 @@ func (r *snapshotRestorer) runFinalizers(ctx context.Context) (bool, error) { func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.PerconaPGRestore) func(ctx context.Context, restore *v2.PerconaPGRestore) error { return func(ctx context.Context, restore *v2.PerconaPGRestore) error { - if err := r.enableWALArchiveRecovery(ctx); err != nil { - return errors.Wrap(err, "enable WAL archive recovery") - } - // Resume the cluster if it was paused during restore if _, err := r.unsuspendAllInstances(ctx); err != nil { - return errors.Wrap(err, "unsuspend all instances") + return errors.Wrap(err, "resume cluster") } return nil } @@ -440,10 +417,6 @@ func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.Percon func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { if r.restore.Spec.RepoName == nil { - // PiTR is not needed, no need to recover WAL archives to maintain snapshot consistency. - if err := r.disableWALArchiveRecovery(ctx); err != nil { - return false, errors.Wrap(err, "disable WAL archive recovery") - } return true, nil } @@ -472,39 +445,6 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { } func (r *snapshotRestorer) isPITRInProgress() bool { - return r.cluster.GetAnnotations()[naming.PGBackRestRestore] != "" -} - -func (r *snapshotRestorer) disableWALArchiveRecovery(ctx context.Context) error { - orig := r.cluster.DeepCopy() - for i := range r.cluster.Spec.InstanceSets { - if r.cluster.Spec.InstanceSets[i].Metadata == nil { - r.cluster.Spec.InstanceSets[i].Metadata = &v1beta1.Metadata{} - } - if r.cluster.Spec.InstanceSets[i].Metadata.Annotations == nil { - r.cluster.Spec.InstanceSets[i].Metadata.Annotations = make(map[string]string) - } - r.cluster.Spec.InstanceSets[i].Metadata.Annotations[naming.DisableWALArchiveRecoveryAnnotation] = "true" - } - if err := r.cl.Patch(ctx, r.cluster.DeepCopy(), client.MergeFrom(orig)); err != nil { - return errors.Wrap(err, "patch cluster") - } - return nil -} - -func (r *snapshotRestorer) enableWALArchiveRecovery(ctx context.Context) error { - orig := r.cluster.DeepCopy() - for i := range r.cluster.Spec.InstanceSets { - if r.cluster.Spec.InstanceSets[i].Metadata == nil { - continue - } - if r.cluster.Spec.InstanceSets[i].Metadata.Annotations == nil { - continue - } - delete(r.cluster.Spec.InstanceSets[i].Metadata.Annotations, naming.DisableWALArchiveRecoveryAnnotation) - } - if err := r.cl.Patch(ctx, r.cluster.DeepCopy(), client.MergeFrom(orig)); err != nil { - return errors.Wrap(err, "patch cluster") - } - return nil + _, ok := r.cluster.GetAnnotations()[naming.PGBackRestRestore] + return ok } diff --git a/percona/naming/kinds.go b/percona/naming/kinds.go index 6b1c8cbeca..07bf8db248 100644 --- a/percona/naming/kinds.go +++ b/percona/naming/kinds.go @@ -1,5 +1,6 @@ package naming const ( - KindVolumeSnapshot = "VolumeSnapshot" + KindVolumeSnapshot = "VolumeSnapshot" + KindPersistentVolumeClaim = "PersistentVolumeClaim" ) diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 3e98a5ddf7..aae2d9a928 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -84,10 +84,15 @@ const IndexFieldPGCluster = "spec.pgCluster" var PGClusterIndexerFunc client.IndexerFunc = func(obj client.Object) []string { backup, ok := obj.(*PerconaPGBackup) - if !ok { - return nil + if ok { + return []string{backup.Spec.PGCluster} + } + + restore, ok := obj.(*PerconaPGRestore) + if ok { + return []string{restore.Spec.PGCluster} } - return []string{backup.Spec.PGCluster} + return nil } type PGBackupState string From ecb41dcb9337429329fd8cf321aab4515eeca46a Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Mon, 9 Feb 2026 12:28:48 +0530 Subject: [PATCH 45/90] update e2e test Signed-off-by: Mayank Shah --- .../demand-backup-offline-snapshot/05-assert.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml index 4413f40dd4..451b526115 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml @@ -28,4 +28,14 @@ kind: PerconaPGCluster metadata: name: backup-snapshot status: - state: ready + pgbouncer: + ready: 3 + size: 3 + postgres: + instances: + - name: instance1 + ready: 3 + size: 3 + ready: 3 + size: 3 + state: ready \ No newline at end of file From 04039f74a0089353a34ab6b7a690243484101066 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Mon, 9 Feb 2026 14:20:04 +0530 Subject: [PATCH 46/90] update test runs Signed-off-by: Mayank Shah --- e2e-tests/run-pr.csv | 1 + e2e-tests/run-release.csv | 1 + 2 files changed, 2 insertions(+) diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index 358ce44987..5bfad6cd95 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -4,6 +4,7 @@ custom-extensions custom-tls database-init-sql demand-backup +demand-backup-offline-snapshot finalizers init-deploy monitoring diff --git a/e2e-tests/run-release.csv b/e2e-tests/run-release.csv index 5a53e2617b..322acd7b82 100644 --- a/e2e-tests/run-release.csv +++ b/e2e-tests/run-release.csv @@ -4,6 +4,7 @@ custom-extensions custom-tls database-init-sql demand-backup +demand-backup-offline-snapshot finalizers init-deploy major-upgrade From 1f581904059e867aad11de315b702988d78404c0 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Mon, 9 Feb 2026 15:18:51 +0530 Subject: [PATCH 47/90] test fixes Signed-off-by: Mayank Shah --- .../08-assert.yaml | 37 ++++++++----------- .../08-create-backup-snapshot.yaml | 7 ++++ .../09-assert.yaml | 35 +++++++++++++----- ...-pitr.yaml => 09-create-restore-pitr.yaml} | 2 +- .../10-assert.yaml | 14 +++++++ ...data.yaml => 10-verify-restored-data.yaml} | 0 6 files changed, 64 insertions(+), 31 deletions(-) create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/08-create-backup-snapshot.yaml rename e2e-tests/tests/demand-backup-offline-snapshot/{08-create-restore-pitr.yaml => 09-create-restore-pitr.yaml} (98%) create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml rename e2e-tests/tests/demand-backup-offline-snapshot/{09-verify-restored-data.yaml => 10-verify-restored-data.yaml} (100%) diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml index ccf0630b8c..c999448404 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml @@ -1,31 +1,26 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 600 ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - annotations: - pgv2.percona.com/snapshot-restore: restore-pitr - labels: - postgres-operator.crunchydata.com/cluster: backup-snapshot - postgres-operator.crunchydata.com/role: pgdata -spec: - dataSource: - apiGroup: snapshot.storage.k8s.io - kind: VolumeSnapshot - name: backup1 +timeout: 560 --- apiVersion: pgv2.percona.com/v2 -kind: PerconaPGRestore +kind: PerconaPGBackup metadata: - name: restore-pitr + name: backup3 +spec: + pgCluster: backup-snapshot + method: volumeSnapshot status: state: Succeeded --- -apiVersion: pgv2.percona.com/v2 -kind: PerconaPGCluster +kind: VolumeSnapshot +apiVersion: snapshot.storage.k8s.io/v1 metadata: - name: backup-snapshot + name: backup3 + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGBackup + name: backup3 + controller: true + blockOwnerDeletion: true status: - state: ready + readyToUse: true \ No newline at end of file diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/08-create-backup-snapshot.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/08-create-backup-snapshot.yaml new file mode 100644 index 0000000000..a2e308c919 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/08-create-backup-snapshot.yaml @@ -0,0 +1,7 @@ +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGBackup +metadata: + name: backup3 +spec: + pgCluster: backup-snapshot + method: volumeSnapshot diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml index 7b4053f7f5..c4048d5662 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml @@ -1,14 +1,31 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 30 +timeout: 600 --- -kind: ConfigMap apiVersion: v1 +kind: PersistentVolumeClaim metadata: - name: 09-verify-restored-data -data: - data: |2- - 100500 - 100501 - 100502 - 100503 + annotations: + pgv2.percona.com/snapshot-restore: restore-pitr + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/role: pgdata +spec: + dataSource: + apiGroup: snapshot.storage.k8s.io + kind: VolumeSnapshot + name: backup3 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGRestore +metadata: + name: restore-pitr +status: + state: Succeeded +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: backup-snapshot +status: + state: ready diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/08-create-restore-pitr.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/09-create-restore-pitr.yaml similarity index 98% rename from e2e-tests/tests/demand-backup-offline-snapshot/08-create-restore-pitr.yaml rename to e2e-tests/tests/demand-backup-offline-snapshot/09-create-restore-pitr.yaml index 1a49f56e87..5d17230911 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/08-create-restore-pitr.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/09-create-restore-pitr.yaml @@ -39,7 +39,7 @@ commands: spec: pgCluster: backup-snapshot repoName: repo1 - volumeSnapshotName: backup1 + volumeSnapshotName: backup3 options: - --type=time - --target="${latest_restorable_time}" diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml new file mode 100644 index 0000000000..7b4053f7f5 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 09-verify-restored-data +data: + data: |2- + 100500 + 100501 + 100502 + 100503 diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/09-verify-restored-data.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/10-verify-restored-data.yaml similarity index 100% rename from e2e-tests/tests/demand-backup-offline-snapshot/09-verify-restored-data.yaml rename to e2e-tests/tests/demand-backup-offline-snapshot/10-verify-restored-data.yaml From 38884093415226db3c741142ea655353b9a27bc9 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Mon, 9 Feb 2026 17:35:48 +0530 Subject: [PATCH 48/90] Update build/postgres-operator/restore_command.sh Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- build/postgres-operator/restore_command.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build/postgres-operator/restore_command.sh b/build/postgres-operator/restore_command.sh index 6b0bc9788e..69962f713e 100644 --- a/build/postgres-operator/restore_command.sh +++ b/build/postgres-operator/restore_command.sh @@ -1,9 +1,8 @@ #!/bin/bash set -e - -if [[ "${DISABLE_WAL_ARCHIVE_RECOVERY:-}" == "1" ]]; then - exit 1 +if [[ ${DISABLE_WAL_ARCHIVE_RECOVERY:-} == "1" ]]; then + exit 1 fi exec "$@" From ba9feef78c042d87ad1da417d777d4c9e731a4cc Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Mon, 9 Feb 2026 21:46:52 +0530 Subject: [PATCH 49/90] support for WAL and Tablespace volumes Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgbackups.yaml | 43 ++- .../pgv2.percona.com_perconapgrestores.yaml | 11 +- .../pgv2.percona.com_perconapgclusters.yaml | 54 ++-- deploy/bundle.yaml | 54 ++-- deploy/crd.yaml | 54 ++-- deploy/cw-bundle.yaml | 54 ++-- .../controller/postgrescluster/postgres.go | 2 +- internal/naming/labels.go | 3 + .../controller/pgbackup/snapshots/offline.go | 9 +- .../pgbackup/snapshots/reconcile.go | 261 ++++++++++++++---- percona/controller/pgcluster/controller.go | 2 +- percona/controller/pgrestore/controller.go | 2 +- .../pgrestore/snapshot/reconcile.go | 108 ++++++-- .../v2/perconapgbackup_types.go | 12 +- .../v2/perconapgrestore_types.go | 8 +- .../v2/zz_generated.deepcopy.go | 34 ++- 16 files changed, 532 insertions(+), 179 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index 3a1f9bf4de..971b84ef39 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -403,17 +403,38 @@ spec: type: object snapshot: properties: - targetPvcName: - description: TargetPVCName is the name of the source PVC that - is being snapshotted. - type: string - volumeSnapshotName: - description: VolumeSnapshotName is the name of the VolumeSnapshot - that contains the snapshotted data. - type: string - required: - - targetPvcName - - volumeSnapshotName + dataVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + tablespaceVolumes: + additionalProperties: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + type: object + walVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object type: object state: type: string diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml index 31b017719c..512d001c70 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml @@ -80,18 +80,19 @@ spec: x-kubernetes-validations: - message: repoName is an immutable field rule: self == oldSelf - volumeSnapshotName: - description: The name of the VolumeSnapshot to perform restore from. + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. type: string x-kubernetes-validations: - - message: volumeSnapshotName is an immutable field + - message: volumeSnapshotBackupName is an immutable field rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + - message: either repoName or volumeSnapshotBackupName must be set + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName != "" status: properties: diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 788778ca44..59b7e39b5e 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -402,17 +402,38 @@ spec: type: object snapshot: properties: - targetPvcName: - description: TargetPVCName is the name of the source PVC that - is being snapshotted. - type: string - volumeSnapshotName: - description: VolumeSnapshotName is the name of the VolumeSnapshot - that contains the snapshotted data. - type: string - required: - - targetPvcName - - volumeSnapshotName + dataVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + tablespaceVolumes: + additionalProperties: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + type: object + walVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object type: object state: type: string @@ -22005,18 +22026,19 @@ spec: x-kubernetes-validations: - message: repoName is an immutable field rule: self == oldSelf - volumeSnapshotName: - description: The name of the VolumeSnapshot to perform restore from. + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. type: string x-kubernetes-validations: - - message: volumeSnapshotName is an immutable field + - message: volumeSnapshotBackupName is an immutable field rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + - message: either repoName or volumeSnapshotBackupName must be set + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName != "" status: properties: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 3dc66d682d..ad5234cc62 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -697,17 +697,38 @@ spec: type: object snapshot: properties: - targetPvcName: - description: TargetPVCName is the name of the source PVC that - is being snapshotted. - type: string - volumeSnapshotName: - description: VolumeSnapshotName is the name of the VolumeSnapshot - that contains the snapshotted data. - type: string - required: - - targetPvcName - - volumeSnapshotName + dataVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + tablespaceVolumes: + additionalProperties: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + type: object + walVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object type: object state: type: string @@ -22304,18 +22325,19 @@ spec: x-kubernetes-validations: - message: repoName is an immutable field rule: self == oldSelf - volumeSnapshotName: - description: The name of the VolumeSnapshot to perform restore from. + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. type: string x-kubernetes-validations: - - message: volumeSnapshotName is an immutable field + - message: volumeSnapshotBackupName is an immutable field rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + - message: either repoName or volumeSnapshotBackupName must be set + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName != "" status: properties: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index bb5530d2d7..c6ef09ea3b 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -697,17 +697,38 @@ spec: type: object snapshot: properties: - targetPvcName: - description: TargetPVCName is the name of the source PVC that - is being snapshotted. - type: string - volumeSnapshotName: - description: VolumeSnapshotName is the name of the VolumeSnapshot - that contains the snapshotted data. - type: string - required: - - targetPvcName - - volumeSnapshotName + dataVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + tablespaceVolumes: + additionalProperties: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + type: object + walVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object type: object state: type: string @@ -22304,18 +22325,19 @@ spec: x-kubernetes-validations: - message: repoName is an immutable field rule: self == oldSelf - volumeSnapshotName: - description: The name of the VolumeSnapshot to perform restore from. + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. type: string x-kubernetes-validations: - - message: volumeSnapshotName is an immutable field + - message: volumeSnapshotBackupName is an immutable field rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + - message: either repoName or volumeSnapshotBackupName must be set + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName != "" status: properties: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 62ee3124f1..84278f40cc 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -697,17 +697,38 @@ spec: type: object snapshot: properties: - targetPvcName: - description: TargetPVCName is the name of the source PVC that - is being snapshotted. - type: string - volumeSnapshotName: - description: VolumeSnapshotName is the name of the VolumeSnapshot - that contains the snapshotted data. - type: string - required: - - targetPvcName - - volumeSnapshotName + dataVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + tablespaceVolumes: + additionalProperties: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object + type: object + walVolume: + properties: + pvcName: + type: string + snapshotName: + type: string + required: + - pvcName + - snapshotName + type: object type: object state: type: string @@ -22304,18 +22325,19 @@ spec: x-kubernetes-validations: - message: repoName is an immutable field rule: self == oldSelf - volumeSnapshotName: - description: The name of the VolumeSnapshot to perform restore from. + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. type: string x-kubernetes-validations: - - message: volumeSnapshotName is an immutable field + - message: volumeSnapshotBackupName is an immutable field rule: self == oldSelf required: - pgCluster type: object x-kubernetes-validations: - - message: either repoName or volumeSnapshotName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotName + - message: either repoName or volumeSnapshotBackupName must be set + rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName != "" status: properties: diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index cafc01fe11..be6d1d1608 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -964,7 +964,7 @@ func (r *Reconciler) reconcileTablespaceVolumes( naming.LabelCluster: cluster.Name, naming.LabelInstanceSet: instanceSpec.Name, naming.LabelInstance: instance.Name, - naming.LabelRole: "tablespace", + naming.LabelRole: naming.RoleTablespace, naming.LabelData: vol.Name, } diff --git a/internal/naming/labels.go b/internal/naming/labels.go index ab8f587eaa..ad67a6bc9c 100644 --- a/internal/naming/labels.go +++ b/internal/naming/labels.go @@ -123,6 +123,9 @@ const ( // RoleSnapshot is the LabelRole applied to Snapshot resources. RoleSnapshot = "snapshot" + + // RoleTablespace is the LabelRole applied to tablespace volumes. + RoleTablespace = "tablespace" ) const ( diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 6590ea36bb..8b4179abaf 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -28,8 +28,6 @@ const ( checkpointTimeoutSeconds = 30 // TODO: make this configurable waitTimeout = 5 * time.Minute retryInterval = 3 * time.Second - - snapshotSignalFile = "restored-from-snapshot" ) type offlineExec struct { @@ -62,12 +60,7 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { if err := e.suspendInstance(ctx, targetInstance); err != nil { return "", errors.Wrap(err, "failed to suspend instance") } - - targetPVC, err := e.getTargetPVC(ctx, targetInstance) - if err != nil { - return "", errors.Wrap(err, "failed to get target PVC") - } - return targetPVC, nil + return targetInstance, nil } func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error { diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index ac8bc56b25..3aa4544fda 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -7,7 +7,9 @@ import ( volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -17,6 +19,7 @@ import ( "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) @@ -27,7 +30,7 @@ const ( type snapshotExecutor interface { // Prepare the cluster for performing a snapshot. - // Returns the name of the PVC that will be snapshotted. + // Returns the name of the instance whose PVCs will be snapshotted. prepare(ctx context.Context) (string, error) // Complete the snapshot. finalize(ctx context.Context) error @@ -169,53 +172,60 @@ func (r *snapshotReconciler) reconcileStarting(ctx context.Context) (reconcile.R // +kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshots,verbs=get;list;watch;create func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Result, error) { - volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ - ObjectMeta: metav1.ObjectMeta{ - Name: r.backup.GetName(), - Namespace: r.backup.GetNamespace(), - }, - Spec: volumesnapshotv1.VolumeSnapshotSpec{ - VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), - Source: volumesnapshotv1.VolumeSnapshotSource{ - PersistentVolumeClaimName: &r.backup.Status.Snapshot.TargetPVCName, - }, - }, + dataOk, err := r.reconcileDataSnapshot(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to reconcile data snapshot: %w", err) } - if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + + walOk, err := r.reconcileWALSnapshot(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to reconcile WAL snapshot: %w", err) } + tablespaceOk, err := r.reconcileTablespaceSnapshot(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to reconcile tablespace snapshot: %w", err) + } + + if dataOk && walOk && tablespaceOk { + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupSucceeded + bcp.Status.CompletedAt = ptr.To(metav1.Now()) + }); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + } + return reconcile.Result{}, nil + } + + // Check again later + return reconcile.Result{RequeueAfter: time.Second * 5}, nil +} + +func (r *snapshotReconciler) reconcileSnapshot(ctx context.Context, volumeSnapshot *volumesnapshotv1.VolumeSnapshot) (bool, error) { created, err := r.ensureSnapshot(ctx, volumeSnapshot) if err != nil { - return reconcile.Result{}, fmt.Errorf("failed to ensure snapshot: %w", err) + return false, fmt.Errorf("failed to ensure snapshot: %w", err) } + + log := r.log.WithValues("snapshot", volumeSnapshot.GetName()) if created { - r.log.Info("Volume snapshot created successfully", "snapshot", volumeSnapshot.GetName()) - return reconcile.Result{}, nil // return back later to observe the status + log.Info("Volume snapshot created successfully") + return false, nil // return back later to observe the status } if err := r.cl.Get(ctx, client.ObjectKeyFromObject(volumeSnapshot), volumeSnapshot); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to get volume snapshot: %w", err) + return false, fmt.Errorf("failed to get volume snapshot: %w", err) } switch { // no status reported case volumeSnapshot.Status == nil: - return reconcile.Result{}, nil + return false, nil // snapshot is complete and ready to be restored. case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): - if err := r.exec.finalize(ctx); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) - } - - if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.State = v2.BackupSucceeded - bcp.Status.CompletedAt = ptr.To(metav1.Now()) - }); updErr != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) - } - r.log.Info("Snapshot is complete and ready to be used") + log.Info("Snapshot is complete and ready to be used") + return true, nil // error occurred while creating the snapshot. case volumeSnapshot.Status.Error != nil: @@ -223,20 +233,115 @@ func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Re message := ptr.Deref(volumeSnapshot.Status.Error.Message, "") if !shouldFailSnapshot(volumeSnapshot) { r.log.Info("Snapshot is in error state, but within deadline. Retrying.", "message", message) - return reconcile.Result{}, nil + return false, nil } - // Snapshot has failed, update the status to failed. - stsErr := fmt.Errorf("snapshot error: %s", message) - r.log.Error(stsErr, "Volume snapshot failed") - if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.State = v2.BackupFailed - bcp.Status.Error = stsErr.Error() - }); updErr != nil { - return reconcile.Result{}, nil + err := errors.New(message) + + log.Error(err, "Volume snapshot failed") + return false, err + + default: + return false, nil + } +} + +func (r *snapshotReconciler) reconcileDataSnapshot(ctx context.Context) (bool, error) { + snapshotName := r.backup.GetName() + "-" + naming.RolePostgresData + volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Namespace: r.backup.GetNamespace(), + }, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: &r.backup.Status.Snapshot.DataVolume.PVCName, + }, + }, + } + if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { + return false, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + } + + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) + if err != nil { + return false, fmt.Errorf("failed to reconcile snapshot: %w", err) + } + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.Snapshot.DataVolume.SnapshotName = volumeSnapshot.GetName() + }); err != nil { + return false, fmt.Errorf("failed to update backup status: %w", err) + } + return ok, nil +} + +func (r *snapshotReconciler) reconcileWALSnapshot(ctx context.Context) (bool, error) { + snapshotName := r.backup.GetName() + "-" + naming.RolePostgresWAL + volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Namespace: r.backup.GetNamespace(), + }, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: &r.backup.Status.Snapshot.WALVolume.PVCName, + }, + }, + } + if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { + return false, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + } + + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) + if err != nil { + return false, fmt.Errorf("failed to reconcile snapshot: %w", err) + } + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.Snapshot.WALVolume.SnapshotName = volumeSnapshot.GetName() + }); err != nil { + return false, fmt.Errorf("failed to update backup status: %w", err) + } + return ok, nil +} + +func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context) (bool, error) { + done := true + for tsName, info := range r.backup.Status.Snapshot.TablespaceVolumes { + snapshotName := r.backup.GetName() + "-" + tsName + "-" + naming.RoleTablespace + volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: snapshotName, + Namespace: r.backup.GetNamespace(), + }, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: &info.PVCName, + }, + }, + } + if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { + return false, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + } + + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) + if err != nil { + return false, fmt.Errorf("failed to reconcile snapshot: %w", err) + } + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + ref := bcp.Status.Snapshot.TablespaceVolumes[tsName] + ref.SnapshotName = volumeSnapshot.GetName() + bcp.Status.Snapshot.TablespaceVolumes[tsName] = ref + }); err != nil { + return false, fmt.Errorf("failed to update backup status: %w", err) + } + if !ok { + done = false } } - return reconcile.Result{}, nil + return done, nil } func shouldFailSnapshot(volumeSnapshot *volumesnapshotv1.VolumeSnapshot) bool { @@ -248,15 +353,6 @@ func (r *snapshotReconciler) ensureSnapshot(ctx context.Context, volumeSnapshot if err := r.cl.Create(ctx, volumeSnapshot); err != nil { return false, client.IgnoreAlreadyExists(err) } - - if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - if bcp.Status.Snapshot == nil { - bcp.Status.Snapshot = &v2.SnapshotStatus{} - } - bcp.Status.Snapshot.VolumeSnapshotName = volumeSnapshot.GetName() - }); updErr != nil { - return true, fmt.Errorf("failed to update volumeSnapshot name in backup status: %w", updErr) - } return true, nil } @@ -267,17 +363,74 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { } // prepare the cluster - pvcTarget, err := r.exec.prepare(ctx) + targetInstance, err := r.exec.prepare(ctx) if err != nil { return fmt.Errorf("failed to prepare for snapshot: %w", err) } + snapshotStatus := &v2.SnapshotStatus{} + + // Find data volume + var dataVolumes corev1.PersistentVolumeClaimList + if err := r.cl.List(ctx, &dataVolumes, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: targetInstance, + naming.LabelRole: naming.RolePostgresData, + }), + }); err != nil { + return fmt.Errorf("failed to list data volumes: %w", err) + } + if len(dataVolumes.Items) == 1 { + snapshotStatus.DataVolume = &v2.PVCSnapshotRef{ + PVCName: dataVolumes.Items[0].GetName(), + } + } else { // we expect 1 + return fmt.Errorf("unexpected number of data volumes: %d", len(dataVolumes.Items)) + } + + // Find WAL volume + var walVolumes corev1.PersistentVolumeClaimList + if err := r.cl.List(ctx, &walVolumes, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: targetInstance, + naming.LabelRole: naming.RolePostgresWAL, + }), + }); err != nil { + return fmt.Errorf("failed to list WAL volumes: %w", err) + } + if len(walVolumes.Items) == 1 { + snapshotStatus.WALVolume = &v2.PVCSnapshotRef{ + PVCName: walVolumes.Items[0].GetName(), + } + } + + // Find tablespace volumes + var tablespaceVolumes corev1.PersistentVolumeClaimList + if err := r.cl.List(ctx, &tablespaceVolumes, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: targetInstance, + naming.LabelRole: naming.RoleTablespace, + }), + }); err != nil { + return fmt.Errorf("failed to list tablespace volumes: %w", err) + } + + if len(tablespaceVolumes.Items) > 0 { + snapshotStatus.TablespaceVolumes = make(map[string]v2.PVCSnapshotRef) + } + for _, vol := range tablespaceVolumes.Items { + name := vol.GetLabels()[naming.LabelData] + snapshotStatus.TablespaceVolumes[name] = v2.PVCSnapshotRef{ + PVCName: vol.GetName(), + } + } + // update snapshot status if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - if bcp.Status.Snapshot == nil { - bcp.Status.Snapshot = &v2.SnapshotStatus{} - } - bcp.Status.Snapshot.TargetPVCName = pvcTarget + bcp.Status.Snapshot = snapshotStatus }); err != nil { return fmt.Errorf("failed to update backup status: %w", err) } diff --git a/percona/controller/pgcluster/controller.go b/percona/controller/pgcluster/controller.go index 4b95842eeb..a72cd2507a 100644 --- a/percona/controller/pgcluster/controller.go +++ b/percona/controller/pgcluster/controller.go @@ -414,7 +414,7 @@ func (r *PGClusterReconciler) reconcileWALRecoveryOnStart(ctx context.Context, c if restore.IsCompleted() { continue } - if restore.Spec.VolumeSnapshotName != "" && (restore.Spec.RepoName == nil || *restore.Spec.RepoName == "") { + if restore.Spec.VolumeSnapshotBackupName != "" && (restore.Spec.RepoName == nil || *restore.Spec.RepoName == "") { disableRecovery() } } diff --git a/percona/controller/pgrestore/controller.go b/percona/controller/pgrestore/controller.go index 02dc5a892b..233daa24cd 100644 --- a/percona/controller/pgrestore/controller.go +++ b/percona/controller/pgrestore/controller.go @@ -68,7 +68,7 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") } - if pgRestore.Spec.VolumeSnapshotName != "" { + if pgRestore.Spec.VolumeSnapshotBackupName != "" { // Delegate to snapshot restore reconciliation return snapshot.Reconcile(ctx, r.Client, pgCluster, pgRestore) } diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 216f8aeb7a..8f0470b2ab 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -33,6 +33,7 @@ type snapshotRestorer struct { cl client.Client log logging.Logger cluster *v2.PerconaPGCluster + backup *v2.PerconaPGBackup restore *v2.PerconaPGRestore } @@ -40,12 +41,14 @@ func newSnapshotRestorer( cl client.Client, log logging.Logger, cluster *v2.PerconaPGCluster, + backup *v2.PerconaPGBackup, restore *v2.PerconaPGRestore, ) *snapshotRestorer { return &snapshotRestorer{ cl: cl, log: log, cluster: cluster, + backup: backup, restore: restore, } } @@ -63,7 +66,12 @@ func Reconcile( return reconcile.Result{}, nil } - r := newSnapshotRestorer(c, log, pg, restore) + backup := &v2.PerconaPGBackup{} + if err := c.Get(ctx, types.NamespacedName{Name: restore.Spec.VolumeSnapshotBackupName, Namespace: pg.Namespace}, backup); err != nil { + return reconcile.Result{}, errors.Wrap(err, "get backup") + } + + r := newSnapshotRestorer(c, log, pg, backup, restore) if !restore.GetDeletionTimestamp().IsZero() { if ok, err := r.runFinalizers(ctx); err != nil { @@ -122,22 +130,6 @@ func (r *snapshotRestorer) reconcileNew(ctx context.Context) (reconcile.Result, } func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Result, error) { - // Check if specified volume snapshot exists - volumeSnapshotName := r.restore.Spec.VolumeSnapshotName - volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{} - if err := r.cl.Get(ctx, types.NamespacedName{Name: volumeSnapshotName, Namespace: r.cluster.Namespace}, volumeSnapshot); err != nil { - if k8serrors.IsNotFound(err) { - r.log.Info("Volume snapshot not found, failing restore") - if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { - restore.Status.State = v2.RestoreFailed - }); err != nil { - return reconcile.Result{}, errors.Wrap(err, "update restore status") - } - return reconcile.Result{}, nil - } - return reconcile.Result{}, errors.Wrap(err, "get volume snapshot") - } - if ok, err := r.suspendAllInstances(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "shutdown cluster") } else if !ok { @@ -160,7 +152,7 @@ func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Res } func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Result, error) { - if ok, err := r.reconcileInstancePVCs(ctx); err != nil { + if ok, err := r.reconcileInstances(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "reconcile instances") } else if !ok { r.log.Info("Waiting for instances PVCs to be reconciled") @@ -198,7 +190,7 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{}, nil } -func (r *snapshotRestorer) reconcileInstancePVCs(ctx context.Context) (bool, error) { +func (r *snapshotRestorer) reconcileInstances(ctx context.Context) (bool, error) { instances := &appsv1.StatefulSetList{} if err := r.cl.List(ctx, instances, &client.ListOptions{ Namespace: r.cluster.GetNamespace(), @@ -212,8 +204,8 @@ func (r *snapshotRestorer) reconcileInstancePVCs(ctx context.Context) (bool, err done := true for _, instance := range instances.Items { - if ok, err := r.reconcileInstancePVC(ctx, &instance); err != nil { - return false, errors.Wrap(err, "reconcile instance PVC") + if ok, err := r.reconcileInstance(ctx, &instance); err != nil { + return false, errors.Wrap(err, "reconcile instance") } else if !ok { done = false } @@ -221,16 +213,81 @@ func (r *snapshotRestorer) reconcileInstancePVCs(ctx context.Context) (bool, err return done, nil } -func (r *snapshotRestorer) reconcileInstancePVC( +func (r *snapshotRestorer) reconcileInstance(ctx context.Context, instance *appsv1.StatefulSet) (bool, error) { + dataOk, err := r.reconcileDataVolume(ctx, instance) + if err != nil { + return false, errors.Wrap(err, "reconcile data volume") + } + + walOk, err := r.reconcileWALVolume(ctx, instance) + if err != nil { + return false, errors.Wrap(err, "reconcile WAL volume") + } + + tablespaceOk, err := r.reconcileTablespaceVolumes(ctx, instance) + if err != nil { + return false, errors.Wrap(err, "reconcile tablespace volumes") + } + + return dataOk && walOk && tablespaceOk, nil +} + +func (r *snapshotRestorer) reconcileDataVolume( ctx context.Context, instance *appsv1.StatefulSet, ) (bool, error) { + if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.DataVolume == nil || r.backup.Status.Snapshot.DataVolume.SnapshotName == "" { + return false, errors.New("data volume snapshot not known") + } + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - observedPVC := &corev1.PersistentVolumeClaim{} + snapshotName := r.backup.Status.Snapshot.DataVolume.SnapshotName + return r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) +} + +func (r *snapshotRestorer) reconcileWALVolume( + ctx context.Context, + instance *appsv1.StatefulSet, +) (bool, error) { + if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.WALVolume == nil || r.backup.Status.Snapshot.WALVolume.SnapshotName == "" { + return true, nil + } + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresWALVolume(instance)} + snapshotName := r.backup.Status.Snapshot.WALVolume.SnapshotName + return r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) +} + +func (r *snapshotRestorer) reconcileTablespaceVolumes(ctx context.Context, instance *appsv1.StatefulSet) (bool, error) { + if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.TablespaceVolumes == nil || len(r.backup.Status.Snapshot.TablespaceVolumes) == 0 { + return true, nil + } + + done := true + for tsName, info := range r.backup.Status.Snapshot.TablespaceVolumes { + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstanceTablespaceDataVolume(instance, tsName)} + snapshotName := info.SnapshotName + ok, err := r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) + if err != nil { + return false, errors.Wrap(err, "reconcile tablespace PVC") + } + if !ok { + done = false + } + } + return done, nil +} +func (r *snapshotRestorer) reconcileInstancePVC( + ctx context.Context, + pvc *corev1.PersistentVolumeClaim, + instance *appsv1.StatefulSet, + snapshotName string, +) (bool, error) { + observedPVC := &corev1.PersistentVolumeClaim{} err := r.cl.Get(ctx, client.ObjectKeyFromObject(pvc), observedPVC) if k8serrors.IsNotFound(err) { - if err := r.createPVCFromSnapshot(ctx, pvc, instance); err != nil { + if err := r.createPVCFromSnapshot(ctx, pvc, instance, snapshotName); err != nil { return false, errors.Wrap(err, "create PVC from data source") } return true, nil @@ -257,6 +314,7 @@ func (r *snapshotRestorer) createPVCFromSnapshot( ctx context.Context, pvc *corev1.PersistentVolumeClaim, instance *appsv1.StatefulSet, + snapshotName string, ) error { instanceSetName := instance.GetLabels()[naming.LabelInstanceSet] if instanceSetName == "" { @@ -266,7 +324,7 @@ func (r *snapshotRestorer) createPVCFromSnapshot( dataSource := &corev1.TypedLocalObjectReference{ APIGroup: ptr.To(volumesnapshotv1.GroupName), Kind: pNaming.KindVolumeSnapshot, - Name: r.restore.Spec.VolumeSnapshotName, + Name: snapshotName, } spec, err := r.pvcSpecFromDataSource(instanceSetName, dataSource) if err != nil { diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index aae2d9a928..a7d6a27d29 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -121,11 +121,15 @@ type PerconaPGBackupStatus struct { Snapshot *SnapshotStatus `json:"snapshot,omitempty"` } +type PVCSnapshotRef struct { + SnapshotName string `json:"snapshotName"` + PVCName string `json:"pvcName"` +} + type SnapshotStatus struct { - // VolumeSnapshotName is the name of the VolumeSnapshot that contains the snapshotted data. - VolumeSnapshotName string `json:"volumeSnapshotName"` - // TargetPVCName is the name of the source PVC that is being snapshotted. - TargetPVCName string `json:"targetPvcName"` + DataVolume *PVCSnapshotRef `json:"dataVolume,omitempty"` + WALVolume *PVCSnapshotRef `json:"walVolume,omitempty"` + TablespaceVolumes map[string]PVCSnapshotRef `json:"tablespaceVolumes,omitempty"` } // +kubebuilder:validation:Type=string diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go index 32a539554d..3716ccc9da 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go @@ -29,7 +29,7 @@ type PerconaPGRestore struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` - // +kubebuilder:validation:XValidation:rule="has(self.repoName) || self.repoName != \"\" || self.volumeSnapshotName != \"\"",message="either repoName or volumeSnapshotName must be set" + // +kubebuilder:validation:XValidation:rule="has(self.repoName) || self.repoName != \"\" || self.volumeSnapshotBackupName != \"\"",message="either repoName or volumeSnapshotBackupName must be set" Spec PerconaPGRestoreSpec `json:"spec"` Status PerconaPGRestoreStatus `json:"status,omitempty"` } @@ -55,10 +55,10 @@ type PerconaPGRestoreSpec struct { // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="repoName is an immutable field" RepoName *string `json:"repoName,omitempty"` - // The name of the VolumeSnapshot to perform restore from. + // The name of the backup to perform in-place volume snapshot restores from. // +optional - // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="volumeSnapshotName is an immutable field" - VolumeSnapshotName string `json:"volumeSnapshotName,omitempty"` + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="volumeSnapshotBackupName is an immutable field" + VolumeSnapshotBackupName string `json:"volumeSnapshotBackupName,omitempty"` // Command line options to include when running the pgBackRest restore command. // https://pgbackrest.org/command.html#command-restore diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index d23779ed6a..fd5b2ae230 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -555,6 +555,21 @@ func (in *PMMSpec) DeepCopy() *PMMSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PVCSnapshotRef) DeepCopyInto(out *PVCSnapshotRef) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PVCSnapshotRef. +func (in *PVCSnapshotRef) DeepCopy() *PVCSnapshotRef { + if in == nil { + return nil + } + out := new(PVCSnapshotRef) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Patroni) DeepCopyInto(out *Patroni) { *out = *in @@ -680,7 +695,7 @@ func (in *PerconaPGBackupStatus) DeepCopyInto(out *PerconaPGBackupStatus) { if in.Snapshot != nil { in, out := &in.Snapshot, &out.Snapshot *out = new(SnapshotStatus) - **out = **in + (*in).DeepCopyInto(*out) } } @@ -1239,6 +1254,23 @@ func (in *ServiceExpose) DeepCopy() *ServiceExpose { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SnapshotStatus) DeepCopyInto(out *SnapshotStatus) { *out = *in + if in.DataVolume != nil { + in, out := &in.DataVolume, &out.DataVolume + *out = new(PVCSnapshotRef) + **out = **in + } + if in.WALVolume != nil { + in, out := &in.WALVolume, &out.WALVolume + *out = new(PVCSnapshotRef) + **out = **in + } + if in.TablespaceVolumes != nil { + in, out := &in.TablespaceVolumes, &out.TablespaceVolumes + *out = make(map[string]PVCSnapshotRef, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SnapshotStatus. From 9f9620d28ebae17fe79081a169833228d74c7758 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 10:34:16 +0530 Subject: [PATCH 50/90] fix tests Signed-off-by: Mayank Shah --- .../04-assert.yaml | 2 +- .../05-assert.yaml | 2 +- .../05-create-restore.yaml | 2 +- .../08-assert.yaml | 2 +- .../09-assert.yaml | 2 +- .../09-create-restore-pitr.yaml | 2 +- .../pgbackup/snapshots/reconcile.go | 33 ++++++++++++------- 7 files changed, 28 insertions(+), 17 deletions(-) diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml index 1ade55f88b..75f9b9b03a 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml @@ -15,7 +15,7 @@ status: kind: VolumeSnapshot apiVersion: snapshot.storage.k8s.io/v1 metadata: - name: backup1 + name: backup1-pgdata ownerReferences: - apiVersion: pgv2.percona.com/v2 kind: PerconaPGBackup diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml index 451b526115..ec2c619a09 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml @@ -14,7 +14,7 @@ spec: dataSource: apiGroup: snapshot.storage.k8s.io kind: VolumeSnapshot - name: backup1 + name: backup1-pgdata --- apiVersion: pgv2.percona.com/v2 kind: PerconaPGRestore diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml index 033b0bbfaa..9b537d9f39 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml @@ -18,6 +18,6 @@ commands: name: restore1 spec: pgCluster: backup-snapshot - volumeSnapshotName: backup1 + volumeSnapshotBackupName: backup1 EOF timeout: 360 diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml index c999448404..f714e66783 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml @@ -15,7 +15,7 @@ status: kind: VolumeSnapshot apiVersion: snapshot.storage.k8s.io/v1 metadata: - name: backup3 + name: backup3-pgdata ownerReferences: - apiVersion: pgv2.percona.com/v2 kind: PerconaPGBackup diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml index c4048d5662..e2e0698fc0 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/09-assert.yaml @@ -14,7 +14,7 @@ spec: dataSource: apiGroup: snapshot.storage.k8s.io kind: VolumeSnapshot - name: backup3 + name: backup3-pgdata --- apiVersion: pgv2.percona.com/v2 kind: PerconaPGRestore diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/09-create-restore-pitr.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/09-create-restore-pitr.yaml index 5d17230911..49e1a15065 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/09-create-restore-pitr.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/09-create-restore-pitr.yaml @@ -39,7 +39,7 @@ commands: spec: pgCluster: backup-snapshot repoName: repo1 - volumeSnapshotName: backup3 + volumeSnapshotBackupName: backup3 options: - --type=time - --target="${latest_restorable_time}" diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 3aa4544fda..11887ff5dc 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -187,18 +187,21 @@ func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Re return reconcile.Result{}, fmt.Errorf("failed to reconcile tablespace snapshot: %w", err) } - if dataOk && walOk && tablespaceOk { - if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.State = v2.BackupSucceeded - bcp.Status.CompletedAt = ptr.To(metav1.Now()) - }); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) - } - return reconcile.Result{}, nil + if !dataOk || !walOk || !tablespaceOk { + return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - // Check again later - return reconcile.Result{RequeueAfter: time.Second * 5}, nil + if err := r.complete(ctx); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) + } + + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupSucceeded + bcp.Status.CompletedAt = ptr.To(metav1.Now()) + }); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + } + return reconcile.Result{}, nil } func (r *snapshotReconciler) reconcileSnapshot(ctx context.Context, volumeSnapshot *volumesnapshotv1.VolumeSnapshot) (bool, error) { @@ -277,6 +280,10 @@ func (r *snapshotReconciler) reconcileDataSnapshot(ctx context.Context) (bool, e } func (r *snapshotReconciler) reconcileWALSnapshot(ctx context.Context) (bool, error) { + if r.backup.Status.Snapshot.WALVolume == nil || r.backup.Status.Snapshot.WALVolume.PVCName == "" { + return true, nil + } + snapshotName := r.backup.GetName() + "-" + naming.RolePostgresWAL volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ ObjectMeta: metav1.ObjectMeta{ @@ -307,6 +314,10 @@ func (r *snapshotReconciler) reconcileWALSnapshot(ctx context.Context) (bool, er } func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context) (bool, error) { + if len(r.backup.Status.Snapshot.TablespaceVolumes) == 0 { + return true, nil + } + done := true for tsName, info := range r.backup.Status.Snapshot.TablespaceVolumes { snapshotName := r.backup.GetName() + "-" + tsName + "-" + naming.RoleTablespace @@ -472,7 +483,7 @@ func (r *snapshotReconciler) complete(ctx context.Context) error { controllerutil.RemoveFinalizer(bcp, pNaming.FinalizerSnapshotInProgress) return r.cl.Patch(ctx, bcp, client.MergeFrom(orig)) }); err != nil { - return fmt.Errorf("failed to add remove finalizer: %w", err) + return fmt.Errorf("failed to remove finalizer: %w", err) } return nil } From 04dcc56155da1230455f048d207ae859e4359c53 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 22:03:08 +0530 Subject: [PATCH 51/90] workaround improvements Signed-off-by: Mayank Shah --- build/postgres-operator/Dockerfile | 2 +- build/postgres-operator/init-entrypoint.sh | 2 +- .../restore-command-wrapper.sh | 11 + build/postgres-operator/restore_command.sh | 8 - .../11-assert.yaml | 106 ++++++++ .../11-create-cluster-from-snapshot.yaml | 15 ++ .../12-assert.yaml | 11 + .../12-verify-restored-data.yaml | 13 + internal/pgbackrest/postgres.go | 18 +- percona/controller/pgcluster/controller.go | 37 --- percona/controller/pgrestore/controller.go | 11 +- .../pgrestore/snapshot/reconcile.go | 235 ++++++++++++++++-- percona/naming/annotations.go | 4 + 13 files changed, 392 insertions(+), 81 deletions(-) create mode 100644 build/postgres-operator/restore-command-wrapper.sh delete mode 100644 build/postgres-operator/restore_command.sh create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/11-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/11-create-cluster-from-snapshot.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/12-assert.yaml create mode 100644 e2e-tests/tests/demand-backup-offline-snapshot/12-verify-restored-data.yaml diff --git a/build/postgres-operator/Dockerfile b/build/postgres-operator/Dockerfile index 6a17621051..e6c5f6867b 100644 --- a/build/postgres-operator/Dockerfile +++ b/build/postgres-operator/Dockerfile @@ -67,7 +67,7 @@ COPY build/postgres-operator/init-entrypoint.sh /usr/local/bin COPY build/postgres-operator/postgres-entrypoint.sh /usr/local/bin COPY build/postgres-operator/postgres-liveness-check.sh /usr/local/bin COPY build/postgres-operator/postgres-readiness-check.sh /usr/local/bin -COPY build/postgres-operator/restore_command.sh /usr/local/bin +COPY build/postgres-operator/restore-command-wrapper.sh /usr/local/bin COPY hack/tools/queries /opt/crunchy/conf RUN chgrp -R 0 /opt/crunchy/conf && chmod -R g=u opt/crunchy/conf diff --git a/build/postgres-operator/init-entrypoint.sh b/build/postgres-operator/init-entrypoint.sh index ed4b419033..3090212fbf 100755 --- a/build/postgres-operator/init-entrypoint.sh +++ b/build/postgres-operator/init-entrypoint.sh @@ -10,4 +10,4 @@ install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-entrypoi install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-liveness-check.sh" "${CRUNCHY_BINDIR}/bin/postgres-liveness-check.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-readiness-check.sh" "${CRUNCHY_BINDIR}/bin/postgres-readiness-check.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/relocate-extensions.sh" "${CRUNCHY_BINDIR}/bin/relocate-extensions.sh" -install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/restore_command.sh" "${CRUNCHY_BINDIR}/bin/restore_command.sh" +install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/restore-command-wrapper.sh" "${CRUNCHY_BINDIR}/bin/restore-command-wrapper.sh" diff --git a/build/postgres-operator/restore-command-wrapper.sh b/build/postgres-operator/restore-command-wrapper.sh new file mode 100644 index 0000000000..99d7c5ba26 --- /dev/null +++ b/build/postgres-operator/restore-command-wrapper.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +# When this marker exists (e.g. after a snapshot restore), skip all WAL recovery by +# exiting non-zero. Do not remove the file so every restore_command call is skipped. +if [[ -f "${PGDATA}/skip-wal-recovery" ]]; then + echo "Skipping WAL archive recovery" + exit 1 +fi + +exec "$@" diff --git a/build/postgres-operator/restore_command.sh b/build/postgres-operator/restore_command.sh deleted file mode 100644 index 69962f713e..0000000000 --- a/build/postgres-operator/restore_command.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -e - -if [[ ${DISABLE_WAL_ARCHIVE_RECOVERY:-} == "1" ]]; then - exit 1 -fi - -exec "$@" diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/11-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/11-assert.yaml new file mode 100644 index 0000000000..da17216745 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/11-assert.yaml @@ -0,0 +1,106 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 480 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + labels: + postgres-operator.crunchydata.com/cluster: cluster-from-snapshot + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/instance-set: instance1 + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + name: cluster-from-snapshot + controller: true + blockOwnerDeletion: true +status: + observedGeneration: 1 + replicas: 1 + readyReplicas: 1 + currentReplicas: 1 + updatedReplicas: 1 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: cluster-from-snapshot-pgbouncer + labels: + postgres-operator.crunchydata.com/cluster: cluster-from-snapshot + postgres-operator.crunchydata.com/role: pgbouncer + annotations: + deployment.kubernetes.io/revision: '1' + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + name: cluster-from-snapshot + controller: true + blockOwnerDeletion: true +status: + observedGeneration: 1 + replicas: 3 + updatedReplicas: 3 + readyReplicas: 3 +--- +kind: Job +apiVersion: batch/v1 +metadata: + labels: + postgres-operator.crunchydata.com/cluster: cluster-from-snapshot + postgres-operator.crunchydata.com/pgbackrest: '' + postgres-operator.crunchydata.com/pgbackrest-backup: replica-create + postgres-operator.crunchydata.com/pgbackrest-repo: repo1 + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGBackup + controller: true + blockOwnerDeletion: true +status: + succeeded: 1 +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: cluster-from-snapshot + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGCluster + name: cluster-from-snapshot + controller: true + blockOwnerDeletion: true + finalizers: + - postgres-operator.crunchydata.com/finalizer +status: + instances: + - name: instance1 + readyReplicas: 3 + replicas: 3 + updatedReplicas: 3 + observedGeneration: 1 + pgbackrest: + repos: + - name: repo1 + stanzaCreated: true + proxy: + pgBouncer: + readyReplicas: 3 + replicas: 3 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: cluster-from-snapshot +status: + pgbouncer: + ready: 3 + size: 3 + postgres: + instances: + - name: instance1 + ready: 3 + size: 3 + ready: 3 + size: 3 + state: ready diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/11-create-cluster-from-snapshot.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/11-create-cluster-from-snapshot.yaml new file mode 100644 index 0000000000..bbc2494ad6 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/11-create-cluster-from-snapshot.yaml @@ -0,0 +1,15 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr "cluster-from-snapshot" ${RANDOM} \ + | yq '.spec.instances[0].dataVolumeClaimSpec.dataSource.apiGroup="snapshot.storage.k8s.io"' \ + | yq '.spec.instances[0].dataVolumeClaimSpec.dataSource.kind="VolumeSnapshot"' \ + | yq '.spec.instances[0].dataVolumeClaimSpec.dataSource.name="backup3-pgdata"' \ + | kubectl -n "${NAMESPACE}" apply -f - \ No newline at end of file diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/12-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/12-assert.yaml new file mode 100644 index 0000000000..551eb7bc77 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/12-assert.yaml @@ -0,0 +1,11 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 12-verify-restored-data +data: + data: |2- + 100500 diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/12-verify-restored-data.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/12-verify-restored-data.yaml new file mode 100644 index 0000000000..5fa88503c8 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/12-verify-restored-data.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + data=$(run_psql_local '\c myapp \\\ SELECT * from myApp;' "postgres:$(get_psql_user_pass cluster-from-snapshot-pguser-postgres)@$(get_psql_user_host cluster-from-snapshot-pguser-postgres)") + + kubectl create configmap -n "${NAMESPACE}" 12-verify-restored-data --from-literal=data="${data}" diff --git a/internal/pgbackrest/postgres.go b/internal/pgbackrest/postgres.go index 91e69eed3c..e8e3c49c4a 100644 --- a/internal/pgbackrest/postgres.go +++ b/internal/pgbackrest/postgres.go @@ -78,7 +78,7 @@ func PostgreSQL( // Fetch WAL files from any configured repository during recovery. // - https://pgbackrest.org/command.html#command-archive-get // - https://www.postgresql.org/docs/current/runtime-config-wal.html - restore := "sh /opt/crunchy/bin/restore_command.sh " + restore := "sh /opt/crunchy/bin/restore-command-wrapper.sh " restore += `pgbackrest --stanza=` + DefaultStanzaName + ` archive-get %f "%p"` if inCluster.Spec.Patroni != nil && inCluster.Spec.Patroni.DynamicConfiguration != nil { postgresql, ok := inCluster.Spec.Patroni.DynamicConfiguration["postgresql"].(map[string]any) @@ -92,17 +92,15 @@ func PostgreSQL( } } } - outParameters.Mandatory.Add("restore_command", restore) - if inCluster.Spec.Standby != nil && inCluster.Spec.Standby.Enabled && inCluster.Spec.Standby.RepoName != "" { - - // Fetch WAL files from the designated repository. The repository name - // is validated by the Kubernetes API, so it does not need to be quoted - // nor escaped. - repoName := inCluster.Spec.Standby.RepoName - restore += " --repo=" + strings.TrimPrefix(repoName, "repo") - outParameters.Mandatory.Add("restore_command", restore) + // Append --repo only when using the default pgbackrest restore command; + // a custom restore_command (e.g. "exit 1") should not be modified. + if strings.HasPrefix(restore, "sh /opt/crunchy/bin/restore-command-wrapper.sh ") { + repoName := inCluster.Spec.Standby.RepoName + restore += " --repo=" + strings.TrimPrefix(repoName, "repo") + } } + outParameters.Mandatory.Add("restore_command", restore) } func updateCommandRestorableTime(archive *string) { diff --git a/percona/controller/pgcluster/controller.go b/percona/controller/pgcluster/controller.go index a72cd2507a..e9837b75ea 100644 --- a/percona/controller/pgcluster/controller.go +++ b/percona/controller/pgcluster/controller.go @@ -330,10 +330,6 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, errors.Wrap(err, "reconcile scheduled backups") } - if err := r.reconcileWALRecoveryOnStart(ctx, cr); err != nil { - return reconcile.Result{}, errors.Wrap(err, "reconcile WAL recovery on start") - } - if cr.Spec.Pause != nil && *cr.Spec.Pause { backupRunning, err := isBackupRunning(ctx, r.Client, cr) if err != nil { @@ -388,39 +384,6 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return ctrl.Result{}, nil } -// When a snapshot restore is executed without PiTR, we must disable recovery from WAL on startup -// to ensure that data is consistent with the snapshot. -func (r *PGClusterReconciler) reconcileWALRecoveryOnStart(ctx context.Context, cr *v2.PerconaPGCluster) error { - var restores v2.PerconaPGRestoreList - if err := r.Client.List(ctx, &restores, client.MatchingFields{ - v2.IndexFieldPGCluster: cr.GetName(), - }, client.InNamespace(cr.Namespace)); err != nil { - return errors.Wrap(err, "failed to list restores") - } - - disableRecovery := func() { - for i := range cr.Spec.InstanceSets { - if len(cr.Spec.InstanceSets[i].Env) == 0 { - cr.Spec.InstanceSets[i].Env = make([]corev1.EnvVar, 0) - } - cr.Spec.InstanceSets[i].Env = append(cr.Spec.InstanceSets[i].Env, corev1.EnvVar{ - Name: "DISABLE_WAL_ARCHIVE_RECOVERY", - Value: "1", - }) - } - } - - for _, restore := range restores.Items { - if restore.IsCompleted() { - continue - } - if restore.Spec.VolumeSnapshotBackupName != "" && (restore.Spec.RepoName == nil || *restore.Spec.RepoName == "") { - disableRecovery() - } - } - return nil -} - func (r *PGClusterReconciler) reconcileTLS(ctx context.Context, cr *v2.PerconaPGCluster) error { if err := r.validateTLS(ctx, cr); err != nil { return errors.Wrap(err, "validate TLS") diff --git a/percona/controller/pgrestore/controller.go b/percona/controller/pgrestore/controller.go index 233daa24cd..3514819cef 100644 --- a/percona/controller/pgrestore/controller.go +++ b/percona/controller/pgrestore/controller.go @@ -16,6 +16,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/percona/controller" "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/snapshot" @@ -35,10 +36,18 @@ type PGRestoreReconciler struct { Owner client.FieldOwner Recorder record.EventRecorder Tracer trace.Tracer + PodExec runtime.PodExecutor } // SetupWithManager adds the perconapgrestore controller to the provided runtime manager func (r *PGRestoreReconciler) SetupWithManager(mgr manager.Manager) error { + if r.PodExec == nil { + var err error + r.PodExec, err = runtime.NewPodExecutor(mgr.GetConfig()) + if err != nil { + return err + } + } return builder.ControllerManagedBy(mgr).For(&v2.PerconaPGRestore{}).Complete(r) } @@ -70,7 +79,7 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R if pgRestore.Spec.VolumeSnapshotBackupName != "" { // Delegate to snapshot restore reconciliation - return snapshot.Reconcile(ctx, r.Client, pgCluster, pgRestore) + return snapshot.Reconcile(ctx, r.Client, r.PodExec, pgCluster, pgRestore) } if pgRestore.DeletionTimestamp != nil { diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 8f0470b2ab..e476ac2ab8 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -3,13 +3,18 @@ package snapshot import ( "context" "fmt" + "io" + "path" + "strings" "time" volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" @@ -18,6 +23,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" @@ -35,6 +41,7 @@ type snapshotRestorer struct { cluster *v2.PerconaPGCluster backup *v2.PerconaPGBackup restore *v2.PerconaPGRestore + podExec runtime.PodExecutor } func newSnapshotRestorer( @@ -43,6 +50,7 @@ func newSnapshotRestorer( cluster *v2.PerconaPGCluster, backup *v2.PerconaPGBackup, restore *v2.PerconaPGRestore, + exec runtime.PodExecutor, ) *snapshotRestorer { return &snapshotRestorer{ cl: cl, @@ -50,12 +58,14 @@ func newSnapshotRestorer( cluster: cluster, backup: backup, restore: restore, + podExec: exec, } } func Reconcile( ctx context.Context, c client.Client, + exec runtime.PodExecutor, pg *v2.PerconaPGCluster, restore *v2.PerconaPGRestore, ) (reconcile.Result, error) { @@ -71,7 +81,7 @@ func Reconcile( return reconcile.Result{}, errors.Wrap(err, "get backup") } - r := newSnapshotRestorer(c, log, pg, backup, restore) + r := newSnapshotRestorer(c, log, pg, backup, restore, exec) if !restore.GetDeletionTimestamp().IsZero() { if ok, err := r.runFinalizers(ctx); err != nil { @@ -152,13 +162,43 @@ func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Res } func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Result, error) { - if ok, err := r.reconcileInstances(ctx); err != nil { + instances := &appsv1.StatefulSetList{} + if err := r.cl.List(ctx, instances, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "list instances") + } + + if ok, err := r.reconcileInstances(ctx, instances); err != nil { return reconcile.Result{}, errors.Wrap(err, "reconcile instances") } else if !ok { r.log.Info("Waiting for instances PVCs to be reconciled") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } + // Prepare PVCs + if ok, err := r.runPrepareJob(ctx, instances); err != nil { + return reconcile.Result{}, errors.Wrap(err, "run prepare job") + } else if !ok { + r.log.Info("Preparing PVCs") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + if err := r.reconcilePrepareJobAnnotation(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile prepare job annotation") + } + + // Run PITR if needed + if ok, err := r.restorePITR(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "restore PITR") + } else if !ok { + r.log.Info("Waiting for PITR to complete") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + // Recreate DCS so that cluster can be bootstrapped with new data. if err := r.reconcileLeaderEndpoints(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "reconcile leader endpoints") @@ -171,14 +211,6 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - // Perform PITR if needed. - if ok, err := r.restorePITR(ctx); err != nil { - return reconcile.Result{}, errors.Wrap(err, "restore PITR") - } else if !ok { - r.log.Info("Waiting for PITR to complete") - return reconcile.Result{RequeueAfter: time.Second * 5}, nil - } - if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { restore.Status.State = v2.RestoreSucceeded restore.Status.CompletedAt = &metav1.Time{Time: time.Now()} @@ -190,18 +222,7 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu return reconcile.Result{}, nil } -func (r *snapshotRestorer) reconcileInstances(ctx context.Context) (bool, error) { - instances := &appsv1.StatefulSetList{} - if err := r.cl.List(ctx, instances, &client.ListOptions{ - Namespace: r.cluster.GetNamespace(), - LabelSelector: labels.SelectorFromSet(map[string]string{ - naming.LabelCluster: r.cluster.Name, - naming.LabelData: naming.DataPostgres, - }), - }); err != nil { - return false, errors.Wrap(err, "list instances") - } - +func (r *snapshotRestorer) reconcileInstances(ctx context.Context, instances *appsv1.StatefulSetList) (bool, error) { done := true for _, instance := range instances.Items { if ok, err := r.reconcileInstance(ctx, &instance); err != nil { @@ -466,8 +487,14 @@ func (r *snapshotRestorer) runFinalizers(ctx context.Context) (bool, error) { func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.PerconaPGRestore) func(ctx context.Context, restore *v2.PerconaPGRestore) error { return func(ctx context.Context, restore *v2.PerconaPGRestore) error { - if _, err := r.unsuspendAllInstances(ctx); err != nil { + if done, err := r.unsuspendAllInstances(ctx); err != nil { return errors.Wrap(err, "resume cluster") + } else if !done { + return controller.ErrFinalizerPending + } + + if err := r.cleanupSkipRecoveryFile(ctx); err != nil { + return errors.Wrap(err, "cleanup") } return nil } @@ -506,3 +533,165 @@ func (r *snapshotRestorer) isPITRInProgress() bool { _, ok := r.cluster.GetAnnotations()[naming.PGBackRestRestore] return ok } + +func (r *snapshotRestorer) reconcilePrepareJobAnnotation(ctx context.Context) error { + if _, ok := r.restore.GetAnnotations()[pNaming.AnnotationPVCsPreparedAt]; ok { + return nil + } + + orig := r.restore.DeepCopy() + annotations := r.restore.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + annotations[pNaming.AnnotationPVCsPreparedAt] = time.Now().Format(time.RFC3339) + r.restore.SetAnnotations(annotations) + if err := r.cl.Patch(ctx, r.restore.DeepCopy(), client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch restore annotations") + } + return nil +} + +// prepares PVCs before starting the cluster. +func (r *snapshotRestorer) runPrepareJob(ctx context.Context, instances *appsv1.StatefulSetList) (bool, error) { + jobName := r.restore.GetName() + "-prepare" + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: r.cluster.GetNamespace(), + }, + } + + // PVC already prepared, delete and return. + if _, ok := r.restore.GetAnnotations()[pNaming.AnnotationPVCsPreparedAt]; ok { + return true, client.IgnoreNotFound(r.cl.Delete(ctx, job, + client.PropagationPolicy(metav1.DeletePropagationForeground))) + } + + err := r.cl.Get(ctx, client.ObjectKeyFromObject(job), job) + if k8serrors.IsNotFound(err) { + generatePrepareJob(job, instances, r.cluster, r.restore) + if err := controllerutil.SetControllerReference(r.restore, job, r.cl.Scheme()); err != nil { + return false, errors.Wrap(err, "set controller reference") + } + if err := r.cl.Create(ctx, job); err != nil { + return false, errors.Wrap(err, "create prepare job") + } + return false, nil + } else if err != nil { + return false, errors.Wrap(err, "get prepare job") + } + + if !job.Status.CompletionTime.IsZero() && job.Status.Succeeded > 0 { + return true, nil + } + + if job.Status.Failed > 0 { + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreFailed + }); err != nil { + return false, errors.Wrap(err, "update restore status") + } + return true, nil + } + return false, nil +} + +func generatePrepareJob( + job *batchv1.Job, + instances *appsv1.StatefulSetList, + cluster *v2.PerconaPGCluster, + restore *v2.PerconaPGRestore, +) { + volumes := []corev1.Volume{} + volumeMounts := []corev1.VolumeMount{} + + for _, instance := range instances.Items { + volName := instance.GetName() + "-pgdata" + volumes = append(volumes, corev1.Volume{ + Name: volName, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: naming.InstancePostgresDataVolume(&instance).Name, + }, + }, + }) + volumeMounts = append(volumeMounts, corev1.VolumeMount{ + Name: volName, + MountPath: path.Join(instance.GetName(), "pgdata"), + }) + } + + scriptParts := []string{"set -e"} + for _, mount := range volumeMounts { + if restore.Spec.RepoName == nil || restore.Spec.VolumeSnapshotBackupName == "" { // no PITR + // PVCs are not needed, signal the restore_command to skip WAL recovery in order + // to maintain consistency with the snapshot data. + dataDir := path.Join(mount.MountPath, fmt.Sprintf("pg%d", cluster.Spec.PostgresVersion)) + signalFile := path.Join(dataDir, "skip-wal-recovery") + scriptParts = append(scriptParts, fmt.Sprintf("touch %q", signalFile)) + } else { + // PITR is needed, clear local WAL files since they may belong to a different timeline. + // PITR restore job will fetch the required WAL files from the repo. + walDir := path.Join(mount.MountPath, fmt.Sprintf("pg%d_wal", cluster.Spec.PostgresVersion)) + scriptParts = append(scriptParts, fmt.Sprintf("find %q -mindepth 1 -delete", walDir)) + } + } + script := strings.Join(scriptParts, "\n") + + container := corev1.Container{ + Name: "snapshot-prepare", + Image: cluster.Spec.Image, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("100Mi"), + }, + }, + VolumeMounts: volumeMounts, + Command: []string{"bash", "-c", script}, + } + job.Spec = batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + naming.DefaultContainerAnnotation: "prepare", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{container}, + Volumes: volumes, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + } + +} + +// We create a $PGDATA/skip-wal-recovery file during the snapshot restore when no PITR is specified. +// This method will cleanup this file after the restore is completed. +func (r *snapshotRestorer) cleanupSkipRecoveryFile(ctx context.Context) error { + if r.restore.Spec.RepoName != nil { + return nil + } + + pods := &corev1.PodList{} + if err := r.cl.List(ctx, pods, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return errors.Wrap(err, "list pods") + } + + rmScript := `rm -f "${PGDATA}/skip-wal-recovery"` + for _, pod := range pods.Items { + if err := r.podExec(ctx, r.cluster.GetNamespace(), pod.GetName(), naming.ContainerDatabase, nil, io.Discard, nil, "sh", "-c", rmScript); err != nil { + return err + } + } + + return nil +} diff --git a/percona/naming/annotations.go b/percona/naming/annotations.go index a99b22f7dc..86d508d866 100644 --- a/percona/naming/annotations.go +++ b/percona/naming/annotations.go @@ -54,4 +54,8 @@ const ( // AnnotationSnapshotRestore is the annotation added to the data PVCs of a cluster // to indicate the name of the PerconaPGRestore that is restoring the PVC from a snapshot. AnnotationSnapshotRestore = PrefixPerconaPGV2 + "snapshot-restore" + + // AnnotationPVCsPreparedAt is the annotation added to the PerconaPGRestore which + // prepares the PVCs for a snapshot restore. + AnnotationPVCsPreparedAt = PrefixPerconaPGV2 + "pvcs-prepared-at" ) From 05dd8fccc998eca77b8239238aa3942a48ef6515 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 22:10:42 +0530 Subject: [PATCH 52/90] update examples Signed-off-by: Mayank Shah --- deploy/restore.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/restore.yaml b/deploy/restore.yaml index 2f8ccd86cb..411b1508e3 100644 --- a/deploy/restore.yaml +++ b/deploy/restore.yaml @@ -5,7 +5,7 @@ metadata: spec: pgCluster: cluster1 repoName: repo1 -# volumeSnapshotName: backup1 +# volumeSnapshotBackupName: backup1 # options: # - --type=time # - --target="2022-11-30 15:12:11+03" From bdb8cab8566423c70e37b1bebe460fc1cb1fd05d Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 22:18:53 +0530 Subject: [PATCH 53/90] fix unit test Signed-off-by: Mayank Shah --- internal/pgbackrest/postgres_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/pgbackrest/postgres_test.go b/internal/pgbackrest/postgres_test.go index c45dd1d2d3..cdb5134652 100644 --- a/internal/pgbackrest/postgres_test.go +++ b/internal/pgbackrest/postgres_test.go @@ -30,7 +30,7 @@ func TestPostgreSQLParameters(t *testing.T) { assert.DeepEqual(t, parameters.Mandatory.AsMap(), map[string]string{ "archive_mode": "on", "archive_command": `pgbackrest --stanza=db archive-push "%p"`, - "restore_command": `pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, }) assert.DeepEqual(t, parameters.Default.AsMap(), map[string]string{ @@ -66,7 +66,7 @@ func TestPostgreSQLParameters(t *testing.T) { assert.DeepEqual(t, parameters.Mandatory.AsMap(), map[string]string{ "archive_mode": "on", "archive_command": `pgbackrest --stanza=db archive-push "%p"`, - "restore_command": `pgbackrest --stanza=db archive-get %f "%p" --repo=99`, + "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p" --repo=99`, }) cluster.Spec.Standby = nil @@ -86,7 +86,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) }) @@ -113,7 +113,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) @@ -169,7 +169,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p" --repo=99`, + "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p" --repo=99`, "track_commit_timestamp": "true", }) @@ -190,7 +190,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) }) From 99bf9e2157cefbc827154bf9830e4abcde815627 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 22:19:36 +0530 Subject: [PATCH 54/90] remove unused code Signed-off-by: Mayank Shah --- .../controller/pgbackup/snapshots/offline.go | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 8b4179abaf..3261604d44 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -7,8 +7,6 @@ import ( "time" "github.com/pkg/errors" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" @@ -164,27 +162,3 @@ func (e *offlineExec) getBackupTarget(ctx context.Context) (string, error) { log.Info("Selected backup target", "instance", instanceName) return instanceName, nil } - -func (e *offlineExec) getTargetPVC(ctx context.Context, instanceName string) (string, error) { - pvcs := &corev1.PersistentVolumeClaimList{} - if err := e.cl.List(ctx, pvcs, &client.ListOptions{ - Namespace: e.cluster.GetNamespace(), - LabelSelector: labels.SelectorFromSet(map[string]string{ - naming.LabelInstance: instanceName, - naming.LabelRole: naming.RolePostgresData, - }), - }); err != nil { - return "", errors.Wrap(err, "failed to list PVCs") - } - - if len(pvcs.Items) == 0 { - return "", errors.New("no PVC found") - } - - log := logging.FromContext(ctx) - - if len(pvcs.Items) > 1 { - log.V(1).Info("Multiple PVCs found, using the first one", "pvc", pvcs.Items[0].GetName()) - } - return pvcs.Items[0].GetName(), nil -} From 457231baed0eea522a1bd1896c2cf6e7944cb5a9 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 22:24:41 +0530 Subject: [PATCH 55/90] add unit test shouldFailSnapshot Signed-off-by: Mayank Shah --- .../pgbackup/snapshots/reconcile_test.go | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 percona/controller/pgbackup/snapshots/reconcile_test.go diff --git a/percona/controller/pgbackup/snapshots/reconcile_test.go b/percona/controller/pgbackup/snapshots/reconcile_test.go new file mode 100644 index 0000000000..378906b29a --- /dev/null +++ b/percona/controller/pgbackup/snapshots/reconcile_test.go @@ -0,0 +1,61 @@ +package snapshots + +import ( + "testing" + "time" + + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +func TestShouldFailSnapshot(t *testing.T) { + now := time.Now() + + tests := []struct { + name string + volumeSnapshot *volumesnapshotv1.VolumeSnapshot + wantFail bool + }{ + { + name: "Status.Error.Time is zero", + volumeSnapshot: &volumesnapshotv1.VolumeSnapshot{ + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + Error: &volumesnapshotv1.VolumeSnapshotError{ + Time: ptr.To(metav1.Time{}), + }, + }, + }, + wantFail: false, + }, + { + name: "error within deadline", + volumeSnapshot: &volumesnapshotv1.VolumeSnapshot{ + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + Error: &volumesnapshotv1.VolumeSnapshotError{ + Time: ptr.To(metav1.NewTime(now.Add(-1 * time.Minute))), // 1mins ago, within deadline + }, + }, + }, + wantFail: false, + }, + { + name: "error past deadline", + volumeSnapshot: &volumesnapshotv1.VolumeSnapshot{ + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + Error: &volumesnapshotv1.VolumeSnapshotError{ + Time: ptr.To(metav1.NewTime(now.Add(-10 * time.Minute))), // 10 minutes ago (past 5min deadline) + }, + }, + }, + wantFail: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := shouldFailSnapshot(tt.volumeSnapshot); got != tt.wantFail { + t.Errorf("shouldFailSnapshot() = %v, want %v", got, tt.wantFail) + } + }) + } +} From d25ce94022eb4d016ea641ff774517e91f0e6d63 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 22:38:12 +0530 Subject: [PATCH 56/90] add unit tests for backup helpers Signed-off-by: Mayank Shah --- .../pgbackup/snapshots/reconcile_test.go | 403 +++++++++++++++++- 1 file changed, 400 insertions(+), 3 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile_test.go b/percona/controller/pgbackup/snapshots/reconcile_test.go index 378906b29a..c92f3c1e09 100644 --- a/percona/controller/pgbackup/snapshots/reconcile_test.go +++ b/percona/controller/pgbackup/snapshots/reconcile_test.go @@ -1,12 +1,23 @@ package snapshots import ( + "context" "testing" "time" volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) func TestShouldFailSnapshot(t *testing.T) { @@ -53,9 +64,395 @@ func TestShouldFailSnapshot(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := shouldFailSnapshot(tt.volumeSnapshot); got != tt.wantFail { - t.Errorf("shouldFailSnapshot() = %v, want %v", got, tt.wantFail) - } + assert.Equal(t, tt.wantFail, shouldFailSnapshot(tt.volumeSnapshot)) }) } } + +func TestReconcileDataSnapshot(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + pvcName := "data-pvc" + snapshotClassName := "test-snapshotclass" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: pvcName}, + }, + }, + } + + noopExec := &mockSnapshotExecutor{} + + t.Run("creates VolumeSnapshot and updates backup status", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileDataSnapshot(ctx) + require.NoError(t, err) + assert.False(t, ok, "snapshot not ready yet") + + vsName := backupName + "-" + naming.RolePostgresData + vs := &volumesnapshotv1.VolumeSnapshot{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: vsName}, vs)) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, pvcName, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + + updated := &v2.PerconaPGBackup{} + require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) + require.NotNil(t, updated.Status.Snapshot) + require.NotNil(t, updated.Status.Snapshot.DataVolume) + assert.Equal(t, vsName, updated.Status.Snapshot.DataVolume.SnapshotName) + }) + + t.Run("returns true when existing VolumeSnapshot is ReadyToUse", func(t *testing.T) { + vsName := backupName + "-" + naming.RolePostgresData + existingVS := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vsName, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(pvcName), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + ReadyToUse: ptr.To(true), + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster, existingVS). + WithStatusSubresource(backup, existingVS). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileDataSnapshot(ctx) + require.NoError(t, err) + assert.True(t, ok, "snapshot ready") + }) +} + +func TestReconcileWALSnapshot(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + walPVCName := "wal-pvc" + snapshotClassName := "test-snapshotclass" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + noopExec := &mockSnapshotExecutor{} + + t.Run("returns true when WALVolume is nil", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, + // WALVolume intentionally nil + }, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileWALSnapshot(ctx) + require.NoError(t, err) + assert.True(t, ok, "no WAL volume to snapshot") + }) + + t.Run("returns true when WALVolume.PVCName is empty", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, + WALVolume: &v2.PVCSnapshotRef{PVCName: ""}, + }, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileWALSnapshot(ctx) + require.NoError(t, err) + assert.True(t, ok, "empty WAL PVC name") + }) + + t.Run("creates VolumeSnapshot and updates backup status", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, + WALVolume: &v2.PVCSnapshotRef{PVCName: walPVCName}, + }, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileWALSnapshot(ctx) + require.NoError(t, err) + assert.False(t, ok, "snapshot not ready yet") + + vsName := backupName + "-" + naming.RolePostgresWAL + vs := &volumesnapshotv1.VolumeSnapshot{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: vsName}, vs)) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, walPVCName, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + + updated := &v2.PerconaPGBackup{} + require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) + require.NotNil(t, updated.Status.Snapshot) + require.NotNil(t, updated.Status.Snapshot.WALVolume) + assert.Equal(t, vsName, updated.Status.Snapshot.WALVolume.SnapshotName) + }) + + t.Run("returns true when existing VolumeSnapshot is ReadyToUse", func(t *testing.T) { + vsName := backupName + "-" + naming.RolePostgresWAL + existingVS := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vsName, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(walPVCName), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + ReadyToUse: ptr.To(true), + }, + } + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, + WALVolume: &v2.PVCSnapshotRef{PVCName: walPVCName}, + }, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster, existingVS). + WithStatusSubresource(backup, existingVS). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileWALSnapshot(ctx) + require.NoError(t, err) + assert.True(t, ok, "snapshot ready") + }) +} + +func TestReconcileTablespaceSnapshot(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + snapshotClassName := "test-snapshotclass" + ts1Name, ts2Name := "ts1", "ts2" + ts1PVC, ts2PVC := "pvc-ts1", "pvc-ts2" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + noopExec := &mockSnapshotExecutor{} + + t.Run("returns true when TablespaceVolumes is empty", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, + TablespaceVolumes: nil, + }, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileTablespaceSnapshot(ctx) + require.NoError(t, err) + assert.True(t, ok, "no tablespace volumes to snapshot") + }) + + t.Run("creates VolumeSnapshots and updates backup status", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, + TablespaceVolumes: map[string]v2.PVCSnapshotRef{ + ts1Name: {PVCName: ts1PVC}, + ts2Name: {PVCName: ts2PVC}, + }, + }, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileTablespaceSnapshot(ctx) + require.NoError(t, err) + assert.False(t, ok, "snapshots not ready yet") + + for _, tc := range []struct { + tsName string + pvc string + }{ + {ts1Name, ts1PVC}, + {ts2Name, ts2PVC}, + } { + vsName := backupName + "-" + tc.tsName + "-" + naming.RoleTablespace + vs := &volumesnapshotv1.VolumeSnapshot{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: vsName}, vs)) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, tc.pvc, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + } + + updated := &v2.PerconaPGBackup{} + require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) + require.NotNil(t, updated.Status.Snapshot) + require.NotNil(t, updated.Status.Snapshot.TablespaceVolumes) + assert.Equal(t, backupName+"-"+ts1Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumes[ts1Name].SnapshotName) + assert.Equal(t, backupName+"-"+ts2Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumes[ts2Name].SnapshotName) + }) + + t.Run("returns true when all existing VolumeSnapshots are ReadyToUse", func(t *testing.T) { + vs1Name := backupName + "-" + ts1Name + "-" + naming.RoleTablespace + vs2Name := backupName + "-" + ts2Name + "-" + naming.RoleTablespace + existingVS1 := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vs1Name, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(ts1PVC), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ReadyToUse: ptr.To(true)}, + } + existingVS2 := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vs2Name, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(ts2PVC), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ReadyToUse: ptr.To(true)}, + } + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, + TablespaceVolumes: map[string]v2.PVCSnapshotRef{ + ts1Name: {PVCName: ts1PVC}, + ts2Name: {PVCName: ts2PVC}, + }, + }, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster, existingVS1, existingVS2). + WithStatusSubresource(backup, existingVS1, existingVS2). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileTablespaceSnapshot(ctx) + require.NoError(t, err) + assert.True(t, ok, "all tablespace snapshots ready") + }) +} + +// mockSnapshotExecutor is a no-op snapshotExecutor for tests. +type mockSnapshotExecutor struct{} + +func (m *mockSnapshotExecutor) prepare(ctx context.Context) (string, error) { return "instance-0", nil } +func (m *mockSnapshotExecutor) finalize(ctx context.Context) error { return nil } From de310a9592932882948322e0cf0a743d918e352f Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Tue, 10 Feb 2026 22:40:23 +0530 Subject: [PATCH 57/90] linting Signed-off-by: Mayank Shah --- percona/controller/pgrestore/snapshot/reconcile.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index e476ac2ab8..9487288210 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -386,6 +386,7 @@ func (r *snapshotRestorer) reconcileLeaderEndpoints(ctx context.Context) error { }, } + //nolint:staticcheck leaderEp := &corev1.Endpoints{ObjectMeta: naming.PatroniLeaderEndpoints(postgresCluster)} if err := r.cl.Get(ctx, client.ObjectKeyFromObject(leaderEp), leaderEp); err != nil { return client.IgnoreNotFound(err) @@ -512,6 +513,7 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { } switch status { + case v2.RestoreNew: case v2.RestoreStarting: return false, pgbackrestRestore.Start(ctx) case v2.RestoreRunning: From b5236263f56774bcb105017c6621fa8da2d27567 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 10:48:17 +0530 Subject: [PATCH 58/90] cleanup unused code Signed-off-by: Mayank Shah --- cmd/postgres-operator/main.go | 9 --------- percona/controller/pgcluster/controller.go | 16 ---------------- .../pgv2.percona.com/v2/perconapgbackup_types.go | 10 +++------- 3 files changed, 3 insertions(+), 32 deletions(-) diff --git a/cmd/postgres-operator/main.go b/cmd/postgres-operator/main.go index 33eac75bb8..130daba46c 100644 --- a/cmd/postgres-operator/main.go +++ b/cmd/postgres-operator/main.go @@ -221,15 +221,6 @@ func addControllersToManager(ctx context.Context, mgr manager.Manager) error { return err } - if err := mgr.GetFieldIndexer().IndexField( - context.Background(), - &v2.PerconaPGRestore{}, - v2.IndexFieldPGCluster, - v2.PGClusterIndexerFunc, - ); err != nil { - return err - } - if err := mgr.GetFieldIndexer().IndexField( context.Background(), &v2.PerconaPGBackup{}, diff --git a/percona/controller/pgcluster/controller.go b/percona/controller/pgcluster/controller.go index c01cf720a8..1237ebf6dc 100644 --- a/percona/controller/pgcluster/controller.go +++ b/percona/controller/pgcluster/controller.go @@ -112,25 +112,9 @@ func (r *PGClusterReconciler) SetupWithManager(ctx context.Context, mgr manager. WatchesRawSource(source.Kind(mgr.GetCache(), &batchv1.Job{}, r.watchBackupJobs())). WatchesRawSource(source.Kind(mgr.GetCache(), &v2.PerconaPGBackup{}, r.watchPGBackups())). WatchesRawSource(source.Channel(standbyClusterEvents, &handler.EnqueueRequestForObject{})). - Watches(&v2.PerconaPGRestore{}, handler.EnqueueRequestsFromMapFunc(r.watchRestores)). Complete(r) } -func (r *PGClusterReconciler) watchRestores(ctx context.Context, o client.Object) []reconcile.Request { - restore, ok := o.(*v2.PerconaPGRestore) - if !ok { - return nil - } - return []reconcile.Request{ - { - NamespacedName: client.ObjectKey{ - Namespace: restore.GetNamespace(), - Name: restore.Spec.PGCluster, - }, - }, - } -} - func (r *PGClusterReconciler) watchServices() handler.TypedFuncs[*corev1.Service, reconcile.Request] { return handler.TypedFuncs[*corev1.Service, reconcile.Request]{ UpdateFunc: func(ctx context.Context, e event.TypedUpdateEvent[*corev1.Service], q workqueue.TypedRateLimitingInterface[reconcile.Request]) { diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index a7d6a27d29..46586769f2 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -84,15 +84,11 @@ const IndexFieldPGCluster = "spec.pgCluster" var PGClusterIndexerFunc client.IndexerFunc = func(obj client.Object) []string { backup, ok := obj.(*PerconaPGBackup) - if ok { - return []string{backup.Spec.PGCluster} + if !ok { + return nil } - restore, ok := obj.(*PerconaPGRestore) - if ok { - return []string{restore.Spec.PGCluster} - } - return nil + return []string{backup.Spec.PGCluster} } type PGBackupState string From 3da1a1012f3805b80e90c9ceb93bb8411508a603 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 10:49:34 +0530 Subject: [PATCH 59/90] formatting Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile_test.go b/percona/controller/pgbackup/snapshots/reconcile_test.go index c92f3c1e09..c6592fb684 100644 --- a/percona/controller/pgbackup/snapshots/reconcile_test.go +++ b/percona/controller/pgbackup/snapshots/reconcile_test.go @@ -6,14 +6,14 @@ import ( "time" volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" @@ -382,7 +382,7 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { for _, tc := range []struct { tsName string - pvc string + pvc string }{ {ts1Name, ts1PVC}, {ts2Name, ts2PVC}, From 7b1224444fbd4ac9ba792abbb6b179061298ff72 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 10:53:43 +0530 Subject: [PATCH 60/90] code cleanup Signed-off-by: Mayank Shah --- internal/naming/annotations.go | 4 ---- internal/pgbackrest/postgres.go | 16 +++++++++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index 736356e9d5..ec04eb0e9a 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -81,8 +81,4 @@ const ( // is present, the controller will not update the ConfigMap, allowing users to make custom // modifications that won't be overwritten during reconciliation. OverrideConfigAnnotation = perconaAnnotationPrefix + "override-config" - - // K8SPG-771 - // DisableWALArchiveRecoveryAnnotation is an annotation used to disable WAL recovery on startup. - DisableWALArchiveRecoveryAnnotation = annotationPrefix + "disable-wal-archive-recovery" ) diff --git a/internal/pgbackrest/postgres.go b/internal/pgbackrest/postgres.go index e8e3c49c4a..f3403873a3 100644 --- a/internal/pgbackrest/postgres.go +++ b/internal/pgbackrest/postgres.go @@ -92,15 +92,17 @@ func PostgreSQL( } } } + outParameters.Mandatory.Add("restore_command", restore) + if inCluster.Spec.Standby != nil && inCluster.Spec.Standby.Enabled && inCluster.Spec.Standby.RepoName != "" { - // Append --repo only when using the default pgbackrest restore command; - // a custom restore_command (e.g. "exit 1") should not be modified. - if strings.HasPrefix(restore, "sh /opt/crunchy/bin/restore-command-wrapper.sh ") { - repoName := inCluster.Spec.Standby.RepoName - restore += " --repo=" + strings.TrimPrefix(repoName, "repo") - } + + // Fetch WAL files from the designated repository. The repository name + // is validated by the Kubernetes API, so it does not need to be quoted + // nor escaped. + repoName := inCluster.Spec.Standby.RepoName + restore += " --repo=" + strings.TrimPrefix(repoName, "repo") + outParameters.Mandatory.Add("restore_command", restore) } - outParameters.Mandatory.Add("restore_command", restore) } func updateCommandRestorableTime(archive *string) { From 3f78ad891e94a1f64e38e370f5738b112d1e83ee Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:09:31 +0530 Subject: [PATCH 61/90] status improvements Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgbackups.yaml | 43 ++--- .../pgv2.percona.com_perconapgclusters.yaml | 43 ++--- deploy/bundle.yaml | 43 ++--- deploy/crd.yaml | 43 ++--- deploy/cw-bundle.yaml | 43 ++--- .../controller/pgbackup/snapshots/offline.go | 3 - .../pgbackup/snapshots/reconcile.go | 170 +++++++++--------- .../pgrestore/snapshot/reconcile.go | 27 +-- .../v2/perconapgbackup_types.go | 15 +- .../v2/zz_generated.deepcopy.go | 33 +--- 10 files changed, 178 insertions(+), 285 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index 971b84ef39..4d66f038a8 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -403,38 +403,21 @@ spec: type: object snapshot: properties: - dataVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - tablespaceVolumes: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeRefs: additionalProperties: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - type: object - walVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string type: object state: type: string diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index e8901e181d..df841b9a28 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -402,38 +402,21 @@ spec: type: object snapshot: properties: - dataVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - tablespaceVolumes: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeRefs: additionalProperties: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - type: object - walVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string type: object state: type: string diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index f218c69d28..5b7215a223 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -697,38 +697,21 @@ spec: type: object snapshot: properties: - dataVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - tablespaceVolumes: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeRefs: additionalProperties: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - type: object - walVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string type: object state: type: string diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 2efd28b8ed..61346ad8f4 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -697,38 +697,21 @@ spec: type: object snapshot: properties: - dataVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - tablespaceVolumes: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeRefs: additionalProperties: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - type: object - walVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string type: object state: type: string diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 0b4b5a8d77..aaa9e9550f 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -697,38 +697,21 @@ spec: type: object snapshot: properties: - dataVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - tablespaceVolumes: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeRefs: additionalProperties: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName - type: object - type: object - walVolume: - properties: - pvcName: - type: string - snapshotName: - type: string - required: - - pvcName - - snapshotName + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string type: object state: type: string diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 3261604d44..33552ba097 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -15,14 +15,11 @@ import ( "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/internal/postgres" - pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) const ( - annotationBackupTarget = pNaming.PrefixPerconaPGV2 + "backup-target" - checkpointTimeoutSeconds = 30 // TODO: make this configurable waitTimeout = 5 * time.Minute retryInterval = 3 * time.Second diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 11887ff5dc..546dbb13c6 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -25,6 +25,8 @@ import ( ) const ( + annotationBackupTarget = pNaming.PrefixPerconaPGV2 + "backup-target" + defaultSnapshotErrorDeadline = 5 * time.Minute ) @@ -163,6 +165,7 @@ func (r *snapshotReconciler) reconcileStarting(ctx context.Context) (reconcile.R if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupRunning + bcp.Status.Snapshot = &v2.SnapshotStatus{} }); updErr != nil { return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } @@ -172,17 +175,22 @@ func (r *snapshotReconciler) reconcileStarting(ctx context.Context) (reconcile.R // +kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshots,verbs=get;list;watch;create func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Result, error) { - dataOk, err := r.reconcileDataSnapshot(ctx) + dataPVC, walPVC, tablespacePVCs, err := r.getTargetPVCs(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get target PVCs: %w", err) + } + + dataOk, err := r.reconcileDataSnapshot(ctx, dataPVC) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to reconcile data snapshot: %w", err) } - walOk, err := r.reconcileWALSnapshot(ctx) + walOk, err := r.reconcileWALSnapshot(ctx, walPVC) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to reconcile WAL snapshot: %w", err) } - tablespaceOk, err := r.reconcileTablespaceSnapshot(ctx) + tablespaceOk, err := r.reconcileTablespaceSnapshot(ctx, tablespacePVCs) if err != nil { return reconcile.Result{}, fmt.Errorf("failed to reconcile tablespace snapshot: %w", err) } @@ -249,56 +257,52 @@ func (r *snapshotReconciler) reconcileSnapshot(ctx context.Context, volumeSnapsh } } -func (r *snapshotReconciler) reconcileDataSnapshot(ctx context.Context) (bool, error) { - snapshotName := r.backup.GetName() + "-" + naming.RolePostgresData +func (r *snapshotReconciler) generateSnapshotIntent( + snapshotRole, + sourcePVC string) (*volumesnapshotv1.VolumeSnapshot, error) { + name := r.backup.GetName() + "-" + snapshotRole + namespace := r.backup.GetNamespace() volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ ObjectMeta: metav1.ObjectMeta{ - Name: snapshotName, - Namespace: r.backup.GetNamespace(), + Name: name, + Namespace: namespace, }, Spec: volumesnapshotv1.VolumeSnapshotSpec{ VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), Source: volumesnapshotv1.VolumeSnapshotSource{ - PersistentVolumeClaimName: &r.backup.Status.Snapshot.DataVolume.PVCName, + PersistentVolumeClaimName: &sourcePVC, }, }, } if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { - return false, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + return nil, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) } + return volumeSnapshot, nil +} +func (r *snapshotReconciler) reconcileDataSnapshot(ctx context.Context, targetPVC string) (bool, error) { + volumeSnapshot, err := r.generateSnapshotIntent(naming.RolePostgresData, targetPVC) ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) if err != nil { return false, fmt.Errorf("failed to reconcile snapshot: %w", err) } + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.Snapshot.DataVolume.SnapshotName = volumeSnapshot.GetName() + bcp.Status.Snapshot.DataVolumeSnapshotRef = ptr.To(volumeSnapshot.GetName()) }); err != nil { return false, fmt.Errorf("failed to update backup status: %w", err) } return ok, nil } -func (r *snapshotReconciler) reconcileWALSnapshot(ctx context.Context) (bool, error) { - if r.backup.Status.Snapshot.WALVolume == nil || r.backup.Status.Snapshot.WALVolume.PVCName == "" { +func (r *snapshotReconciler) reconcileWALSnapshot(ctx context.Context, targetPVC string) (bool, error) { + if targetPVC == "" { return true, nil } - snapshotName := r.backup.GetName() + "-" + naming.RolePostgresWAL - volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ - ObjectMeta: metav1.ObjectMeta{ - Name: snapshotName, - Namespace: r.backup.GetNamespace(), - }, - Spec: volumesnapshotv1.VolumeSnapshotSpec{ - VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), - Source: volumesnapshotv1.VolumeSnapshotSource{ - PersistentVolumeClaimName: &r.backup.Status.Snapshot.WALVolume.PVCName, - }, - }, - } - if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { - return false, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + volumeSnapshot, err := r.generateSnapshotIntent(naming.RolePostgresWAL, targetPVC) + if err != nil { + return false, fmt.Errorf("failed to generate snapshot intent: %w", err) } ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) @@ -306,45 +310,32 @@ func (r *snapshotReconciler) reconcileWALSnapshot(ctx context.Context) (bool, er return false, fmt.Errorf("failed to reconcile snapshot: %w", err) } if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.Snapshot.WALVolume.SnapshotName = volumeSnapshot.GetName() + bcp.Status.Snapshot.WALVolumeSnapshotRef = ptr.To(volumeSnapshot.GetName()) }); err != nil { return false, fmt.Errorf("failed to update backup status: %w", err) } return ok, nil } -func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context) (bool, error) { - if len(r.backup.Status.Snapshot.TablespaceVolumes) == 0 { +func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context, targetPVCs map[string]string) (bool, error) { + if len(targetPVCs) == 0 { return true, nil } done := true - for tsName, info := range r.backup.Status.Snapshot.TablespaceVolumes { - snapshotName := r.backup.GetName() + "-" + tsName + "-" + naming.RoleTablespace - volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ - ObjectMeta: metav1.ObjectMeta{ - Name: snapshotName, - Namespace: r.backup.GetNamespace(), - }, - Spec: volumesnapshotv1.VolumeSnapshotSpec{ - VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), - Source: volumesnapshotv1.VolumeSnapshotSource{ - PersistentVolumeClaimName: &info.PVCName, - }, - }, - } - if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { - return false, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) - } - + for tsName, targetPVC := range targetPVCs { + role := tsName + "-" + naming.RoleTablespace + volumeSnapshot, err := r.generateSnapshotIntent(role, targetPVC) ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) if err != nil { return false, fmt.Errorf("failed to reconcile snapshot: %w", err) } + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - ref := bcp.Status.Snapshot.TablespaceVolumes[tsName] - ref.SnapshotName = volumeSnapshot.GetName() - bcp.Status.Snapshot.TablespaceVolumes[tsName] = ref + if bcp.Status.Snapshot.TablespaceVolumeSnapshotRefs == nil { + bcp.Status.Snapshot.TablespaceVolumeSnapshotRefs = make(map[string]string) + } + bcp.Status.Snapshot.TablespaceVolumeSnapshotRefs[tsName] = volumeSnapshot.GetName() }); err != nil { return false, fmt.Errorf("failed to update backup status: %w", err) } @@ -367,21 +358,13 @@ func (r *snapshotReconciler) ensureSnapshot(ctx context.Context, volumeSnapshot return true, nil } -func (r *snapshotReconciler) prepare(ctx context.Context) error { - // finalizer already present, prepare already completed - if controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerSnapshotInProgress) { - return nil +func (r *snapshotReconciler) getTargetPVCs(ctx context.Context) (string, string, map[string]string, error) { + targetInstance := r.backup.GetAnnotations()[annotationBackupTarget] + if targetInstance == "" { + return "", "", nil, fmt.Errorf("backup target instance is not found") } - // prepare the cluster - targetInstance, err := r.exec.prepare(ctx) - if err != nil { - return fmt.Errorf("failed to prepare for snapshot: %w", err) - } - - snapshotStatus := &v2.SnapshotStatus{} - - // Find data volume + dataPVC := "" var dataVolumes corev1.PersistentVolumeClaimList if err := r.cl.List(ctx, &dataVolumes, &client.ListOptions{ Namespace: r.cluster.GetNamespace(), @@ -390,17 +373,15 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { naming.LabelRole: naming.RolePostgresData, }), }); err != nil { - return fmt.Errorf("failed to list data volumes: %w", err) + return "", "", nil, fmt.Errorf("failed to list data volumes: %w", err) } if len(dataVolumes.Items) == 1 { - snapshotStatus.DataVolume = &v2.PVCSnapshotRef{ - PVCName: dataVolumes.Items[0].GetName(), - } - } else { // we expect 1 - return fmt.Errorf("unexpected number of data volumes: %d", len(dataVolumes.Items)) + dataPVC = dataVolumes.Items[0].GetName() + } else { + return "", "", nil, fmt.Errorf("unexpected number of data volumes: %d", len(dataVolumes.Items)) } - // Find WAL volume + walPVC := "" var walVolumes corev1.PersistentVolumeClaimList if err := r.cl.List(ctx, &walVolumes, &client.ListOptions{ Namespace: r.cluster.GetNamespace(), @@ -409,15 +390,13 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { naming.LabelRole: naming.RolePostgresWAL, }), }); err != nil { - return fmt.Errorf("failed to list WAL volumes: %w", err) + return "", "", nil, fmt.Errorf("failed to list WAL volumes: %w", err) } if len(walVolumes.Items) == 1 { - snapshotStatus.WALVolume = &v2.PVCSnapshotRef{ - PVCName: walVolumes.Items[0].GetName(), - } + walPVC = walVolumes.Items[0].GetName() } - // Find tablespace volumes + tablespacePVCs := make(map[string]string) var tablespaceVolumes corev1.PersistentVolumeClaimList if err := r.cl.List(ctx, &tablespaceVolumes, &client.ListOptions{ Namespace: r.cluster.GetNamespace(), @@ -426,24 +405,39 @@ func (r *snapshotReconciler) prepare(ctx context.Context) error { naming.LabelRole: naming.RoleTablespace, }), }); err != nil { - return fmt.Errorf("failed to list tablespace volumes: %w", err) + return "", "", nil, fmt.Errorf("failed to list tablespace volumes: %w", err) } - if len(tablespaceVolumes.Items) > 0 { - snapshotStatus.TablespaceVolumes = make(map[string]v2.PVCSnapshotRef) - } for _, vol := range tablespaceVolumes.Items { name := vol.GetLabels()[naming.LabelData] - snapshotStatus.TablespaceVolumes[name] = v2.PVCSnapshotRef{ - PVCName: vol.GetName(), - } + tablespacePVCs[name] = vol.GetName() } - // update snapshot status - if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.Snapshot = snapshotStatus - }); err != nil { - return fmt.Errorf("failed to update backup status: %w", err) + return dataPVC, walPVC, tablespacePVCs, nil +} + +func (r *snapshotReconciler) prepare(ctx context.Context) error { + // finalizer already present, prepare already completed + if controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerSnapshotInProgress) { + return nil + } + + // prepare the cluster + targetInstance, err := r.exec.prepare(ctx) + if err != nil { + return fmt.Errorf("failed to prepare for snapshot: %w", err) + } + + // Store the backup target instance for later retrieval. + orig := r.backup.DeepCopy() + annotations := r.backup.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + annotations[annotationBackupTarget] = targetInstance + r.backup.SetAnnotations(annotations) + if err := r.cl.Patch(ctx, r.backup.DeepCopy(), client.MergeFrom(orig)); err != nil { + return fmt.Errorf("failed to patch backup annotations: %w", err) } // add finalizer diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 9487288210..a3828a4ba8 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -257,12 +257,12 @@ func (r *snapshotRestorer) reconcileDataVolume( ctx context.Context, instance *appsv1.StatefulSet, ) (bool, error) { - if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.DataVolume == nil || r.backup.Status.Snapshot.DataVolume.SnapshotName == "" { + if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.DataVolumeSnapshotRef == nil { return false, errors.New("data volume snapshot not known") } pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - snapshotName := r.backup.Status.Snapshot.DataVolume.SnapshotName + snapshotName := *r.backup.Status.Snapshot.DataVolumeSnapshotRef return r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) } @@ -270,27 +270,26 @@ func (r *snapshotRestorer) reconcileWALVolume( ctx context.Context, instance *appsv1.StatefulSet, ) (bool, error) { - if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.WALVolume == nil || r.backup.Status.Snapshot.WALVolume.SnapshotName == "" { + if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.WALVolumeSnapshotRef == nil { return true, nil } pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresWALVolume(instance)} - snapshotName := r.backup.Status.Snapshot.WALVolume.SnapshotName + snapshotName := *r.backup.Status.Snapshot.WALVolumeSnapshotRef return r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) } func (r *snapshotRestorer) reconcileTablespaceVolumes(ctx context.Context, instance *appsv1.StatefulSet) (bool, error) { - if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.TablespaceVolumes == nil || len(r.backup.Status.Snapshot.TablespaceVolumes) == 0 { + if r.backup.Status.Snapshot == nil || len(r.backup.Status.Snapshot.TablespaceVolumeSnapshotRefs) == 0 { return true, nil } done := true - for tsName, info := range r.backup.Status.Snapshot.TablespaceVolumes { + for tsName, snapshotName := range r.backup.Status.Snapshot.TablespaceVolumeSnapshotRefs { pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstanceTablespaceDataVolume(instance, tsName)} - snapshotName := info.SnapshotName ok, err := r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) if err != nil { - return false, errors.Wrap(err, "reconcile tablespace PVC") + return false, errors.Wrap(err, "reconcile tablespace volume") } if !ok { done = false @@ -627,13 +626,13 @@ func generatePrepareJob( scriptParts := []string{"set -e"} for _, mount := range volumeMounts { if restore.Spec.RepoName == nil || restore.Spec.VolumeSnapshotBackupName == "" { // no PITR - // PVCs are not needed, signal the restore_command to skip WAL recovery in order + // PITR is not needed, signal the restore_command to skip WAL recovery in order // to maintain consistency with the snapshot data. dataDir := path.Join(mount.MountPath, fmt.Sprintf("pg%d", cluster.Spec.PostgresVersion)) signalFile := path.Join(dataDir, "skip-wal-recovery") scriptParts = append(scriptParts, fmt.Sprintf("touch %q", signalFile)) } else { - // PITR is needed, clear local WAL files since they may belong to a different timeline. + // PITR is needed, clear local WAL files since they may belong to a different timeline. // PITR restore job will fetch the required WAL files from the repo. walDir := path.Join(mount.MountPath, fmt.Sprintf("pg%d_wal", cluster.Spec.PostgresVersion)) scriptParts = append(scriptParts, fmt.Sprintf("find %q -mindepth 1 -delete", walDir)) @@ -646,8 +645,12 @@ func generatePrepareJob( Image: cluster.Spec.Image, Resources: corev1.ResourceRequirements{ Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("100Mi"), + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("32Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("32Mi"), }, }, VolumeMounts: volumeMounts, diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 46586769f2..86cb3e90f4 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -117,15 +117,14 @@ type PerconaPGBackupStatus struct { Snapshot *SnapshotStatus `json:"snapshot,omitempty"` } -type PVCSnapshotRef struct { - SnapshotName string `json:"snapshotName"` - PVCName string `json:"pvcName"` -} - type SnapshotStatus struct { - DataVolume *PVCSnapshotRef `json:"dataVolume,omitempty"` - WALVolume *PVCSnapshotRef `json:"walVolume,omitempty"` - TablespaceVolumes map[string]PVCSnapshotRef `json:"tablespaceVolumes,omitempty"` + // Name of the VolumeSnapshot containing data volume contents. + DataVolumeSnapshotRef *string `json:"dataVolumeSnapshotRef,omitempty"` + // Name of the VolumeSnapshot containing WAL volume contents. + WALVolumeSnapshotRef *string `json:"walVolumeSnapshotRef,omitempty"` + // Names of the VolumeSnapshots containing tablespace volume contents. + // Key is the name of the tablespace, value is the name of the VolumeSnapshot. + TablespaceVolumeSnapshotRefs map[string]string `json:"tablespaceVolumeRefs,omitempty"` } // +kubebuilder:validation:Type=string diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index 8516dc32f5..964d4bec9e 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -555,21 +555,6 @@ func (in *PMMSpec) DeepCopy() *PMMSpec { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PVCSnapshotRef) DeepCopyInto(out *PVCSnapshotRef) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PVCSnapshotRef. -func (in *PVCSnapshotRef) DeepCopy() *PVCSnapshotRef { - if in == nil { - return nil - } - out := new(PVCSnapshotRef) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Patroni) DeepCopyInto(out *Patroni) { *out = *in @@ -1259,19 +1244,19 @@ func (in *ServiceExpose) DeepCopy() *ServiceExpose { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SnapshotStatus) DeepCopyInto(out *SnapshotStatus) { *out = *in - if in.DataVolume != nil { - in, out := &in.DataVolume, &out.DataVolume - *out = new(PVCSnapshotRef) + if in.DataVolumeSnapshotRef != nil { + in, out := &in.DataVolumeSnapshotRef, &out.DataVolumeSnapshotRef + *out = new(string) **out = **in } - if in.WALVolume != nil { - in, out := &in.WALVolume, &out.WALVolume - *out = new(PVCSnapshotRef) + if in.WALVolumeSnapshotRef != nil { + in, out := &in.WALVolumeSnapshotRef, &out.WALVolumeSnapshotRef + *out = new(string) **out = **in } - if in.TablespaceVolumes != nil { - in, out := &in.TablespaceVolumes, &out.TablespaceVolumes - *out = make(map[string]PVCSnapshotRef, len(*in)) + if in.TablespaceVolumeSnapshotRefs != nil { + in, out := &in.TablespaceVolumeSnapshotRefs, &out.TablespaceVolumeSnapshotRefs + *out = make(map[string]string, len(*in)) for key, val := range *in { (*out)[key] = val } From 05361f7fae7b3b63bb4f177daac8bf30c3f6bb15 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:09:41 +0530 Subject: [PATCH 62/90] add more unit tests Signed-off-by: Mayank Shah --- .../pgbackup/snapshots/reconcile_test.go | 179 +++++++++++------- .../pgrestore/snapshot/reconcile_test.go | 162 ++++++++++++++++ 2 files changed, 269 insertions(+), 72 deletions(-) create mode 100644 percona/controller/pgrestore/snapshot/reconcile_test.go diff --git a/percona/controller/pgbackup/snapshots/reconcile_test.go b/percona/controller/pgbackup/snapshots/reconcile_test.go index c6592fb684..faa78ace0a 100644 --- a/percona/controller/pgbackup/snapshots/reconcile_test.go +++ b/percona/controller/pgbackup/snapshots/reconcile_test.go @@ -98,9 +98,7 @@ func TestReconcileDataSnapshot(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: pvcName}, - }, + Snapshot: &v2.SnapshotStatus{}, }, } @@ -114,7 +112,7 @@ func TestReconcileDataSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileDataSnapshot(ctx) + ok, err := r.reconcileDataSnapshot(ctx, pvcName) require.NoError(t, err) assert.False(t, ok, "snapshot not ready yet") @@ -127,8 +125,7 @@ func TestReconcileDataSnapshot(t *testing.T) { updated := &v2.PerconaPGBackup{} require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) require.NotNil(t, updated.Status.Snapshot) - require.NotNil(t, updated.Status.Snapshot.DataVolume) - assert.Equal(t, vsName, updated.Status.Snapshot.DataVolume.SnapshotName) + assert.Equal(t, vsName, *updated.Status.Snapshot.DataVolumeSnapshotRef) }) t.Run("returns true when existing VolumeSnapshot is ReadyToUse", func(t *testing.T) { @@ -153,7 +150,7 @@ func TestReconcileDataSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileDataSnapshot(ctx) + ok, err := r.reconcileDataSnapshot(ctx, pvcName) require.NoError(t, err) assert.True(t, ok, "snapshot ready") }) @@ -186,15 +183,12 @@ func TestReconcileWALSnapshot(t *testing.T) { noopExec := &mockSnapshotExecutor{} - t.Run("returns true when WALVolume is nil", func(t *testing.T) { + t.Run("returns true when target PVC is empty", func(t *testing.T) { backup := &v2.PerconaPGBackup{ ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, - // WALVolume intentionally nil - }, + Snapshot: &v2.SnapshotStatus{}, }, } cl := fake.NewClientBuilder(). @@ -204,43 +198,17 @@ func TestReconcileWALSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileWALSnapshot(ctx) + ok, err := r.reconcileWALSnapshot(ctx, "") require.NoError(t, err) assert.True(t, ok, "no WAL volume to snapshot") }) - t.Run("returns true when WALVolume.PVCName is empty", func(t *testing.T) { - backup := &v2.PerconaPGBackup{ - ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, - Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, - Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, - WALVolume: &v2.PVCSnapshotRef{PVCName: ""}, - }, - }, - } - cl := fake.NewClientBuilder(). - WithScheme(s). - WithObjects(backup.DeepCopy(), cluster). - WithStatusSubresource(backup). - Build() - - r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileWALSnapshot(ctx) - require.NoError(t, err) - assert.True(t, ok, "empty WAL PVC name") - }) - t.Run("creates VolumeSnapshot and updates backup status", func(t *testing.T) { backup := &v2.PerconaPGBackup{ ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, - WALVolume: &v2.PVCSnapshotRef{PVCName: walPVCName}, - }, + Snapshot: &v2.SnapshotStatus{}, }, } cl := fake.NewClientBuilder(). @@ -250,7 +218,7 @@ func TestReconcileWALSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileWALSnapshot(ctx) + ok, err := r.reconcileWALSnapshot(ctx, walPVCName) require.NoError(t, err) assert.False(t, ok, "snapshot not ready yet") @@ -263,8 +231,7 @@ func TestReconcileWALSnapshot(t *testing.T) { updated := &v2.PerconaPGBackup{} require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) require.NotNil(t, updated.Status.Snapshot) - require.NotNil(t, updated.Status.Snapshot.WALVolume) - assert.Equal(t, vsName, updated.Status.Snapshot.WALVolume.SnapshotName) + assert.Equal(t, vsName, *updated.Status.Snapshot.WALVolumeSnapshotRef) }) t.Run("returns true when existing VolumeSnapshot is ReadyToUse", func(t *testing.T) { @@ -285,10 +252,7 @@ func TestReconcileWALSnapshot(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, - WALVolume: &v2.PVCSnapshotRef{PVCName: walPVCName}, - }, + Snapshot: &v2.SnapshotStatus{}, }, } cl := fake.NewClientBuilder(). @@ -298,7 +262,7 @@ func TestReconcileWALSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileWALSnapshot(ctx) + ok, err := r.reconcileWALSnapshot(ctx, walPVCName) require.NoError(t, err) assert.True(t, ok, "snapshot ready") }) @@ -337,10 +301,7 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, - TablespaceVolumes: nil, - }, + Snapshot: &v2.SnapshotStatus{}, }, } cl := fake.NewClientBuilder(). @@ -350,7 +311,7 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileTablespaceSnapshot(ctx) + ok, err := r.reconcileTablespaceSnapshot(ctx, nil) require.NoError(t, err) assert.True(t, ok, "no tablespace volumes to snapshot") }) @@ -360,13 +321,7 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, - TablespaceVolumes: map[string]v2.PVCSnapshotRef{ - ts1Name: {PVCName: ts1PVC}, - ts2Name: {PVCName: ts2PVC}, - }, - }, + Snapshot: &v2.SnapshotStatus{}, }, } cl := fake.NewClientBuilder(). @@ -376,7 +331,10 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileTablespaceSnapshot(ctx) + ok, err := r.reconcileTablespaceSnapshot(ctx, map[string]string{ + ts1Name: ts1PVC, + ts2Name: ts2PVC, + }) require.NoError(t, err) assert.False(t, ok, "snapshots not ready yet") @@ -397,9 +355,8 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { updated := &v2.PerconaPGBackup{} require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) require.NotNil(t, updated.Status.Snapshot) - require.NotNil(t, updated.Status.Snapshot.TablespaceVolumes) - assert.Equal(t, backupName+"-"+ts1Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumes[ts1Name].SnapshotName) - assert.Equal(t, backupName+"-"+ts2Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumes[ts2Name].SnapshotName) + assert.Equal(t, backupName+"-"+ts1Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumeSnapshotRefs[ts1Name]) + assert.Equal(t, backupName+"-"+ts2Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumeSnapshotRefs[ts2Name]) }) t.Run("returns true when all existing VolumeSnapshots are ReadyToUse", func(t *testing.T) { @@ -429,13 +386,7 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, Status: v2.PerconaPGBackupStatus{ - Snapshot: &v2.SnapshotStatus{ - DataVolume: &v2.PVCSnapshotRef{PVCName: "data-pvc"}, - TablespaceVolumes: map[string]v2.PVCSnapshotRef{ - ts1Name: {PVCName: ts1PVC}, - ts2Name: {PVCName: ts2PVC}, - }, - }, + Snapshot: &v2.SnapshotStatus{}, }, } cl := fake.NewClientBuilder(). @@ -445,12 +396,96 @@ func TestReconcileTablespaceSnapshot(t *testing.T) { Build() r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) - ok, err := r.reconcileTablespaceSnapshot(ctx) + ok, err := r.reconcileTablespaceSnapshot(ctx, map[string]string{ + ts1Name: ts1PVC, + ts2Name: ts2PVC, + }) require.NoError(t, err) assert.True(t, ok, "all tablespace snapshots ready") }) } +func TestGenerateSnapshotIntent(t *testing.T) { + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + snapshotClassName := "test-snapshotclass" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup, cluster). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, &mockSnapshotExecutor{}) + + tests := []struct { + name string + snapshotRole string + sourcePVC string + wantName string + }{ + { + name: "data volume", + snapshotRole: naming.RolePostgresData, + sourcePVC: "data-pvc", + wantName: backupName + "-" + naming.RolePostgresData, + }, + { + name: "WAL volume", + snapshotRole: naming.RolePostgresWAL, + sourcePVC: "wal-pvc", + wantName: backupName + "-" + naming.RolePostgresWAL, + }, + { + name: "tablespace volume", + snapshotRole: "ts1-" + naming.RoleTablespace, + sourcePVC: "pvc-ts1", + wantName: backupName + "-" + "ts1-" + naming.RoleTablespace, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vs, err := r.generateSnapshotIntent(tt.snapshotRole, tt.sourcePVC) + require.NoError(t, err) + require.NotNil(t, vs) + + assert.Equal(t, tt.wantName, vs.Name) + assert.Equal(t, ns, vs.Namespace) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, tt.sourcePVC, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + + // Owner reference should be set to the backup + require.True(t, len(vs.OwnerReferences) > 0, "expected owner reference to be set") + assert.Equal(t, backupName, vs.OwnerReferences[0].Name) + assert.Equal(t, "pgv2.percona.com/v2", vs.OwnerReferences[0].APIVersion) + assert.Equal(t, "PerconaPGBackup", vs.OwnerReferences[0].Kind) + }) + } +} + // mockSnapshotExecutor is a no-op snapshotExecutor for tests. type mockSnapshotExecutor struct{} diff --git a/percona/controller/pgrestore/snapshot/reconcile_test.go b/percona/controller/pgrestore/snapshot/reconcile_test.go new file mode 100644 index 0000000000..5b1b6e1efe --- /dev/null +++ b/percona/controller/pgrestore/snapshot/reconcile_test.go @@ -0,0 +1,162 @@ +package snapshot + +import ( + "path" + "strings" + "testing" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGeneratePrepareJob(t *testing.T) { + ns := "test-ns" + clusterName := "my-cluster" + postgresVersion := 15 + image := "postgres:15" + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: postgresVersion, + Image: image, + }, + } + + makeInstance := func(name string) appsv1.StatefulSet { + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + }, + } + } + + t.Run("single instance without PITR", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{makeInstance("my-cluster-instance-0")}, + } + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + VolumeSnapshotBackupName: "my-backup", + // RepoName nil and VolumeSnapshotBackupName set = no PITR + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + require.Len(t, job.Spec.Template.Spec.Containers, 1) + container := job.Spec.Template.Spec.Containers[0] + assert.Equal(t, "snapshot-prepare", container.Name) + assert.Equal(t, image, container.Image) + assert.Equal(t, []string{"bash", "-c"}, container.Command[:2]) + + assert.Equal(t, resource.MustParse("50m"), container.Resources.Requests[corev1.ResourceCPU]) + assert.Equal(t, resource.MustParse("32Mi"), container.Resources.Requests[corev1.ResourceMemory]) + + // No PITR: script should touch skip-wal-recovery files + script := container.Command[2] + dataDir := path.Join("my-cluster-instance-0", "pgdata", "pg15") + assert.Contains(t, script, "touch") + assert.Contains(t, script, path.Join(dataDir, "skip-wal-recovery")) + + // Volume and mount for instance + require.Len(t, job.Spec.Template.Spec.Volumes, 1) + assert.Equal(t, "my-cluster-instance-0-pgdata", job.Spec.Template.Spec.Volumes[0].Name) + assert.Equal(t, "my-cluster-instance-0-pgdata", job.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ClaimName) + + require.Len(t, container.VolumeMounts, 1) + assert.Equal(t, "my-cluster-instance-0-pgdata", container.VolumeMounts[0].Name) + assert.Equal(t, path.Join("my-cluster-instance-0", "pgdata"), container.VolumeMounts[0].MountPath) + + assert.Equal(t, corev1.RestartPolicyNever, job.Spec.Template.Spec.RestartPolicy) + }) + + t.Run("multiple instances without PITR", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{ + makeInstance("my-cluster-instance-0"), + makeInstance("my-cluster-instance-1"), + }, + } + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + VolumeSnapshotBackupName: "my-backup", + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + container := job.Spec.Template.Spec.Containers[0] + script := container.Command[2] + + // Both instances should have skip-wal-recovery + assert.Contains(t, script, path.Join("my-cluster-instance-0", "pgdata", "pg15", "skip-wal-recovery")) + assert.Contains(t, script, path.Join("my-cluster-instance-1", "pgdata", "pg15", "skip-wal-recovery")) + + require.Len(t, job.Spec.Template.Spec.Volumes, 2) + assert.Equal(t, []string{"my-cluster-instance-0-pgdata", "my-cluster-instance-1-pgdata"}, + []string{job.Spec.Template.Spec.Volumes[0].Name, job.Spec.Template.Spec.Volumes[1].Name}) + }) + + t.Run("with PITR clears WAL directory", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{makeInstance("my-cluster-instance-0")}, + } + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + RepoName: ptr.To("repo1"), + VolumeSnapshotBackupName: "my-backup", + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + container := job.Spec.Template.Spec.Containers[0] + script := container.Command[2] + + // PITR: script should find/delete WAL dir, not touch skip-wal-recovery + walDir := path.Join("my-cluster-instance-0", "pgdata", "pg15_wal") + assert.Contains(t, script, "find") + assert.Contains(t, script, "-mindepth") + assert.Contains(t, script, "-delete") + assert.Contains(t, script, walDir) + assert.NotContains(t, script, "skip-wal-recovery") + }) + + t.Run("script starts with set -e", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{makeInstance("instance-0")}, + } + restore := &v2.PerconaPGRestore{ + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + VolumeSnapshotBackupName: "backup", + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + script := job.Spec.Template.Spec.Containers[0].Command[2] + assert.True(t, strings.HasPrefix(script, "set -e\n"), + "script should start with 'set -e' for error handling, got: %q", script[:50]) + }) +} From c9942f947b4c1a6a6f604d32b509e67976ea7944 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:09:50 +0530 Subject: [PATCH 63/90] update e2e test assertions Signed-off-by: Mayank Shah --- e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml | 2 ++ e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml index 75f9b9b03a..3a662811e5 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml @@ -11,6 +11,8 @@ spec: method: volumeSnapshot status: state: Succeeded + snapshot: + dataVolumeSnapshotRef: backup1-pgdata --- kind: VolumeSnapshot apiVersion: snapshot.storage.k8s.io/v1 diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml index f714e66783..7c279f645b 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/08-assert.yaml @@ -11,6 +11,8 @@ spec: method: volumeSnapshot status: state: Succeeded + snapshot: + dataVolumeSnapshotRef: backup3-pgdata --- kind: VolumeSnapshot apiVersion: snapshot.storage.k8s.io/v1 From ab2c612553fd31f143b91a27672f7bf28ec72f81 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:24:06 +0530 Subject: [PATCH 64/90] fix inconsistencies & address copilot comments Signed-off-by: Mayank Shah --- .../demand-backup-offline-snapshot/10-assert.yaml | 2 +- .../10-verify-restored-data.yaml | 2 +- .../demand-backup-offline-snapshot/99-cleanup.yaml | 6 +++++- internal/naming/names.go | 2 +- percona/controller/pgbackup/snapshots/reconcile.go | 14 ++++++++++++++ .../pgbackup/snapshots/reconcile_test.go | 2 +- percona/controller/pgrestore/snapshot/reconcile.go | 7 ++++--- .../pgrestore/snapshot/reconcile_test.go | 4 ++-- percona/controller/pgrestore/utils/pgbackrest.go | 8 ++++++-- 9 files changed, 35 insertions(+), 12 deletions(-) diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml index 7b4053f7f5..1cba140cbf 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/10-assert.yaml @@ -5,7 +5,7 @@ timeout: 30 kind: ConfigMap apiVersion: v1 metadata: - name: 09-verify-restored-data + name: 10-verify-restored-data data: data: |2- 100500 diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/10-verify-restored-data.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/10-verify-restored-data.yaml index 91f10f09fa..05a7c8f0ae 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/10-verify-restored-data.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/10-verify-restored-data.yaml @@ -10,4 +10,4 @@ commands: data=$(run_psql_local '\c myapp \\\ SELECT * from myApp;' "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)") - kubectl create configmap -n "${NAMESPACE}" 09-verify-restored-data --from-literal=data="${data}" + kubectl create configmap -n "${NAMESPACE}" 10-verify-restored-data --from-literal=data="${data}" diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/99-cleanup.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/99-cleanup.yaml index 5cf41c4203..4d9bb23ee8 100644 --- a/e2e-tests/tests/demand-backup-offline-snapshot/99-cleanup.yaml +++ b/e2e-tests/tests/demand-backup-offline-snapshot/99-cleanup.yaml @@ -20,7 +20,11 @@ delete: - apiVersion: snapshot.storage.k8s.io/v1 kind: VolumeSnapshot metadata: - name: backup1 + name: backup1-pgdata +- apiVersion: snapshot.storage.k8s.io/v1 + kind: VolumeSnapshot + metadata: + name: backup3-pgdata commands: - script: |- set -o errexit diff --git a/internal/naming/names.go b/internal/naming/names.go index 52fe1ed119..b857910e10 100644 --- a/internal/naming/names.go +++ b/internal/naming/names.go @@ -473,7 +473,7 @@ func PGBackRestCronJob(cluster *v1beta1.PostgresCluster, backuptype, repoName st } } -// PGBackRestCronJob returns the ObjectMeta for a pgBackRest CronJob +// VolumeSnapshotCronJob returns the ObjectMeta for a volume snapshot CronJob func VolumeSnapshotCronJob(cluster *v1beta1.PostgresCluster) metav1.ObjectMeta { return metav1.ObjectMeta{ Namespace: cluster.GetNamespace(), diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 546dbb13c6..1af6174451 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -85,6 +85,12 @@ func Reconcile( pgCluster *v2.PerconaPGCluster, ) (reconcile.Result, error) { if pgBackup == nil || pgCluster == nil { + if err := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = "pgBackup or pgCluster is nil or not found" + }); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + } return reconcile.Result{}, errors.New("pgBackup or pgCluster is nil or not found") } @@ -282,6 +288,10 @@ func (r *snapshotReconciler) generateSnapshotIntent( func (r *snapshotReconciler) reconcileDataSnapshot(ctx context.Context, targetPVC string) (bool, error) { volumeSnapshot, err := r.generateSnapshotIntent(naming.RolePostgresData, targetPVC) + if err != nil { + return false, fmt.Errorf("failed to generate snapshot intent: %w", err) + } + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) if err != nil { return false, fmt.Errorf("failed to reconcile snapshot: %w", err) @@ -326,6 +336,10 @@ func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context, ta for tsName, targetPVC := range targetPVCs { role := tsName + "-" + naming.RoleTablespace volumeSnapshot, err := r.generateSnapshotIntent(role, targetPVC) + if err != nil { + return false, fmt.Errorf("failed to generate snapshot intent: %w", err) + } + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) if err != nil { return false, fmt.Errorf("failed to reconcile snapshot: %w", err) diff --git a/percona/controller/pgbackup/snapshots/reconcile_test.go b/percona/controller/pgbackup/snapshots/reconcile_test.go index faa78ace0a..bb1378f202 100644 --- a/percona/controller/pgbackup/snapshots/reconcile_test.go +++ b/percona/controller/pgbackup/snapshots/reconcile_test.go @@ -478,7 +478,7 @@ func TestGenerateSnapshotIntent(t *testing.T) { assert.Equal(t, tt.sourcePVC, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) // Owner reference should be set to the backup - require.True(t, len(vs.OwnerReferences) > 0, "expected owner reference to be set") + require.Positive(t, len(vs.OwnerReferences), "expected owner reference to be set") assert.Equal(t, backupName, vs.OwnerReferences[0].Name) assert.Equal(t, "pgv2.percona.com/v2", vs.OwnerReferences[0].APIVersion) assert.Equal(t, "PerconaPGBackup", vs.OwnerReferences[0].Kind) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index a3828a4ba8..8125dd7cf1 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -113,7 +113,7 @@ func Reconcile( } func (r *snapshotRestorer) reconcileNew(ctx context.Context) (reconcile.Result, error) { - if restore := r.cluster.Spec.Backups.PGBackRest.Restore; restore != nil && *restore.Enabled { + if restore := r.cluster.Spec.Backups.PGBackRest.Restore; restore != nil && restore.Enabled != nil && *restore.Enabled { r.log.Info("Waiting for another restore to finish") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } @@ -640,8 +640,9 @@ func generatePrepareJob( } script := strings.Join(scriptParts, "\n") + containerName := "snapshot-prepare" container := corev1.Container{ - Name: "snapshot-prepare", + Name: containerName, Image: cluster.Spec.Image, Resources: corev1.ResourceRequirements{ Requests: corev1.ResourceList{ @@ -660,7 +661,7 @@ func generatePrepareJob( Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - naming.DefaultContainerAnnotation: "prepare", + naming.DefaultContainerAnnotation: containerName, }, }, Spec: corev1.PodSpec{ diff --git a/percona/controller/pgrestore/snapshot/reconcile_test.go b/percona/controller/pgrestore/snapshot/reconcile_test.go index 5b1b6e1efe..fbc3f6b7de 100644 --- a/percona/controller/pgrestore/snapshot/reconcile_test.go +++ b/percona/controller/pgrestore/snapshot/reconcile_test.go @@ -5,6 +5,8 @@ import ( "strings" "testing" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -13,8 +15,6 @@ import ( "k8s.io/utils/ptr" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestGeneratePrepareJob(t *testing.T) { diff --git a/percona/controller/pgrestore/utils/pgbackrest.go b/percona/controller/pgrestore/utils/pgbackrest.go index 0b14602eef..ab572d817f 100644 --- a/percona/controller/pgrestore/utils/pgbackrest.go +++ b/percona/controller/pgrestore/utils/pgbackrest.go @@ -47,7 +47,11 @@ func (r *PGBackRestRestore) Start(ctx context.Context) error { origPostgres := postgresCluster.DeepCopy() - postgresCluster.Status.PGBackRest.Restore = new(v1beta1.PGBackRestJobStatus) + if postgresCluster.Status.PGBackRest == nil { + postgresCluster.Status.PGBackRest = &v1beta1.PGBackRestStatus{} + } + + postgresCluster.Status.PGBackRest.Restore = &v1beta1.PGBackRestJobStatus{} if err := r.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { return errors.Wrap(err, "patch PGCluster") @@ -84,7 +88,7 @@ func (r *PGBackRestRestore) DisableRestore(ctx context.Context) error { } r.pgCluster.Spec.Backups.PGBackRest.Restore.Enabled = ptr.To(false) - delete(r.pgCluster.Annotations, naming.LabelPGBackRestRestore) + delete(r.pgCluster.Annotations, naming.PGBackRestRestore) if err := r.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { return errors.Wrap(err, "patch PGCluster") From 0e9caea5255c9e9fe04a1b12481b030d0cc3df70 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:25:55 +0530 Subject: [PATCH 65/90] remove sh prefix Signed-off-by: Mayank Shah --- internal/pgbackrest/postgres.go | 2 +- internal/pgbackrest/postgres_test.go | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/pgbackrest/postgres.go b/internal/pgbackrest/postgres.go index f3403873a3..bba7ba5ecd 100644 --- a/internal/pgbackrest/postgres.go +++ b/internal/pgbackrest/postgres.go @@ -78,7 +78,7 @@ func PostgreSQL( // Fetch WAL files from any configured repository during recovery. // - https://pgbackrest.org/command.html#command-archive-get // - https://www.postgresql.org/docs/current/runtime-config-wal.html - restore := "sh /opt/crunchy/bin/restore-command-wrapper.sh " + restore := "/opt/crunchy/bin/restore-command-wrapper.sh " restore += `pgbackrest --stanza=` + DefaultStanzaName + ` archive-get %f "%p"` if inCluster.Spec.Patroni != nil && inCluster.Spec.Patroni.DynamicConfiguration != nil { postgresql, ok := inCluster.Spec.Patroni.DynamicConfiguration["postgresql"].(map[string]any) diff --git a/internal/pgbackrest/postgres_test.go b/internal/pgbackrest/postgres_test.go index cdb5134652..9ce69025a5 100644 --- a/internal/pgbackrest/postgres_test.go +++ b/internal/pgbackrest/postgres_test.go @@ -30,7 +30,7 @@ func TestPostgreSQLParameters(t *testing.T) { assert.DeepEqual(t, parameters.Mandatory.AsMap(), map[string]string{ "archive_mode": "on", "archive_command": `pgbackrest --stanza=db archive-push "%p"`, - "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, }) assert.DeepEqual(t, parameters.Default.AsMap(), map[string]string{ @@ -66,7 +66,7 @@ func TestPostgreSQLParameters(t *testing.T) { assert.DeepEqual(t, parameters.Mandatory.AsMap(), map[string]string{ "archive_mode": "on", "archive_command": `pgbackrest --stanza=db archive-push "%p"`, - "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p" --repo=99`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p" --repo=99`, }) cluster.Spec.Standby = nil @@ -86,7 +86,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) }) @@ -113,7 +113,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) @@ -169,7 +169,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p" --repo=99`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p" --repo=99`, "track_commit_timestamp": "true", }) @@ -190,7 +190,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `sh /opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) }) From 54603df72658a604cebd32078bd3cef5ad815e01 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:27:42 +0530 Subject: [PATCH 66/90] checkpoint timeout default to 5m Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/offline.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 33552ba097..96e229b594 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -20,7 +20,7 @@ import ( ) const ( - checkpointTimeoutSeconds = 30 // TODO: make this configurable + checkpointTimeoutSeconds = 300 // 5mins // TODO: make this configurable waitTimeout = 5 * time.Minute retryInterval = 3 * time.Second ) From 667936692d54284bcf11dffd27b366c2d84c3fb1 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:36:29 +0530 Subject: [PATCH 67/90] linting Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile_test.go b/percona/controller/pgbackup/snapshots/reconcile_test.go index bb1378f202..fd6ce033ec 100644 --- a/percona/controller/pgbackup/snapshots/reconcile_test.go +++ b/percona/controller/pgbackup/snapshots/reconcile_test.go @@ -478,7 +478,7 @@ func TestGenerateSnapshotIntent(t *testing.T) { assert.Equal(t, tt.sourcePVC, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) // Owner reference should be set to the backup - require.Positive(t, len(vs.OwnerReferences), "expected owner reference to be set") + require.NotEmpty(t, vs.OwnerReferences, "expected owner reference to be set") assert.Equal(t, backupName, vs.OwnerReferences[0].Name) assert.Equal(t, "pgv2.percona.com/v2", vs.OwnerReferences[0].APIVersion) assert.Equal(t, "PerconaPGBackup", vs.OwnerReferences[0].Kind) From a99b660670f25b60736fcfaf84f564be7624c446 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:48:41 +0530 Subject: [PATCH 68/90] POSIX-compliant script Signed-off-by: Mayank Shah --- build/postgres-operator/restore-command-wrapper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/postgres-operator/restore-command-wrapper.sh b/build/postgres-operator/restore-command-wrapper.sh index 99d7c5ba26..b9c6504ee5 100644 --- a/build/postgres-operator/restore-command-wrapper.sh +++ b/build/postgres-operator/restore-command-wrapper.sh @@ -1,9 +1,9 @@ -#!/bin/bash +#!/bin/sh set -e # When this marker exists (e.g. after a snapshot restore), skip all WAL recovery by # exiting non-zero. Do not remove the file so every restore_command call is skipped. -if [[ -f "${PGDATA}/skip-wal-recovery" ]]; then +if [ -f "${PGDATA}/skip-wal-recovery" ]; then echo "Skipping WAL archive recovery" exit 1 fi From f9294d676f4e08e7f39d2a4c69104b1e56a7368f Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:50:05 +0530 Subject: [PATCH 69/90] fix potential nil-ptr Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 1af6174451..f85b2b6e7a 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -361,6 +361,9 @@ func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context, ta } func shouldFailSnapshot(volumeSnapshot *volumesnapshotv1.VolumeSnapshot) bool { + if volumeSnapshot.Status == nil || volumeSnapshot.Status.Error == nil { + return false + } errAt := volumeSnapshot.Status.Error.Time return !errAt.IsZero() && time.Now().After(errAt.Add(defaultSnapshotErrorDeadline)) } From c7d33400ddc13e8ad2695bad66b2e865138dd83a Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 12:50:29 +0530 Subject: [PATCH 70/90] fix error messag Signed-off-by: Mayank Shah --- percona/controller/pgrestore/utils/pgbackrest.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/controller/pgrestore/utils/pgbackrest.go b/percona/controller/pgrestore/utils/pgbackrest.go index ab572d817f..d93c160686 100644 --- a/percona/controller/pgrestore/utils/pgbackrest.go +++ b/percona/controller/pgrestore/utils/pgbackrest.go @@ -100,7 +100,7 @@ func (r *PGBackRestRestore) DisableRestore(ctx context.Context) error { func (r *PGBackRestRestore) ObserveStatus(ctx context.Context) (v2.PGRestoreState, *metav1.Time, error) { cluster := &v2.PerconaPGCluster{} if err := r.Get(ctx, client.ObjectKeyFromObject(r.pgCluster), cluster); err != nil { - return v2.RestoreStarting, nil, errors.Wrap(err, "get PostgresCluster") + return v2.RestoreStarting, nil, errors.Wrap(err, "get PerconaPGCluster") } if cluster.Status.PGBackRest == nil || cluster.Status.PGBackRest.Restore == nil { From 34a95a267b15882007e5c333927f7da51edee10e Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 13:28:18 +0530 Subject: [PATCH 71/90] fix invalid status update Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index f85b2b6e7a..c4067d3867 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -85,12 +85,6 @@ func Reconcile( pgCluster *v2.PerconaPGCluster, ) (reconcile.Result, error) { if pgBackup == nil || pgCluster == nil { - if err := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { - bcp.Status.State = v2.BackupFailed - bcp.Status.Error = "pgBackup or pgCluster is nil or not found" - }); err != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) - } return reconcile.Result{}, errors.New("pgBackup or pgCluster is nil or not found") } From abd599a60336de0dd7696e8089576ff01342e557 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 13:28:32 +0530 Subject: [PATCH 72/90] fix retries to get latest objecgt Signed-off-by: Mayank Shah --- percona/postgres/common.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/percona/postgres/common.go b/percona/postgres/common.go index 8796caf448..da6cba7371 100644 --- a/percona/postgres/common.go +++ b/percona/postgres/common.go @@ -97,6 +97,10 @@ func SuspendInstance(ctx context.Context, cli client.Client, instanceKey client. } if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return errors.Wrap(err, "failed to get stateful set") + } + orig := sts.DeepCopy() annots := sts.GetAnnotations() if annots == nil { @@ -125,6 +129,10 @@ func UnsuspendInstance(ctx context.Context, cli client.Client, instanceKey clien } if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return errors.Wrap(err, "failed to get stateful set") + } + orig := sts.DeepCopy() annots := sts.GetAnnotations() delete(annots, pNaming.AnnotationInstanceSuspended) From a7fe1339138d2055b2ce4c0ea6e35aa74abc0b89 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 13:28:50 +0530 Subject: [PATCH 73/90] bugfix: restore reconcile can use incorrect volume spec Signed-off-by: Mayank Shah --- .../pgrestore/snapshot/reconcile.go | 52 ++++++++++++++++--- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index 8125dd7cf1..ca678ac4f2 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -263,7 +263,8 @@ func (r *snapshotRestorer) reconcileDataVolume( pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} snapshotName := *r.backup.Status.Snapshot.DataVolumeSnapshotRef - return r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) + volCtxInfo := volumeContextInfo{role: naming.RolePostgresData} + return r.reconcileInstancePVC(ctx, volCtxInfo, pvc, instance, snapshotName) } func (r *snapshotRestorer) reconcileWALVolume( @@ -276,7 +277,8 @@ func (r *snapshotRestorer) reconcileWALVolume( pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresWALVolume(instance)} snapshotName := *r.backup.Status.Snapshot.WALVolumeSnapshotRef - return r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) + volCtxInfo := volumeContextInfo{role: naming.RolePostgresWAL} + return r.reconcileInstancePVC(ctx, volCtxInfo, pvc, instance, snapshotName) } func (r *snapshotRestorer) reconcileTablespaceVolumes(ctx context.Context, instance *appsv1.StatefulSet) (bool, error) { @@ -287,7 +289,8 @@ func (r *snapshotRestorer) reconcileTablespaceVolumes(ctx context.Context, insta done := true for tsName, snapshotName := range r.backup.Status.Snapshot.TablespaceVolumeSnapshotRefs { pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstanceTablespaceDataVolume(instance, tsName)} - ok, err := r.reconcileInstancePVC(ctx, pvc, instance, snapshotName) + volCtxInfo := volumeContextInfo{role: naming.RoleTablespace, tablespaceName: tsName} + ok, err := r.reconcileInstancePVC(ctx, volCtxInfo, pvc, instance, snapshotName) if err != nil { return false, errors.Wrap(err, "reconcile tablespace volume") } @@ -300,6 +303,7 @@ func (r *snapshotRestorer) reconcileTablespaceVolumes(ctx context.Context, insta func (r *snapshotRestorer) reconcileInstancePVC( ctx context.Context, + volCtxInfo volumeContextInfo, pvc *corev1.PersistentVolumeClaim, instance *appsv1.StatefulSet, snapshotName string, @@ -307,7 +311,7 @@ func (r *snapshotRestorer) reconcileInstancePVC( observedPVC := &corev1.PersistentVolumeClaim{} err := r.cl.Get(ctx, client.ObjectKeyFromObject(pvc), observedPVC) if k8serrors.IsNotFound(err) { - if err := r.createPVCFromSnapshot(ctx, pvc, instance, snapshotName); err != nil { + if err := r.createPVCFromSnapshot(ctx, volCtxInfo, pvc, instance, snapshotName); err != nil { return false, errors.Wrap(err, "create PVC from data source") } return true, nil @@ -332,6 +336,7 @@ func (r *snapshotRestorer) reconcileInstancePVC( func (r *snapshotRestorer) createPVCFromSnapshot( ctx context.Context, + volCtxInfo volumeContextInfo, pvc *corev1.PersistentVolumeClaim, instance *appsv1.StatefulSet, snapshotName string, @@ -346,7 +351,7 @@ func (r *snapshotRestorer) createPVCFromSnapshot( Kind: pNaming.KindVolumeSnapshot, Name: snapshotName, } - spec, err := r.pvcSpecFromDataSource(instanceSetName, dataSource) + spec, err := r.pvcSpecFromDataSource(volCtxInfo, instanceSetName, dataSource) if err != nil { return errors.Wrap(err, "get PVC spec from data source") } @@ -360,7 +365,16 @@ func (r *snapshotRestorer) createPVCFromSnapshot( return nil } -func (r *snapshotRestorer) pvcSpecFromDataSource(instanceSetName string, dataSource *corev1.TypedLocalObjectReference) (corev1.PersistentVolumeClaimSpec, error) { +type volumeContextInfo struct { + role string + tablespaceName string +} + +func (r *snapshotRestorer) pvcSpecFromDataSource( + volCtxInfo volumeContextInfo, + instanceSetName string, + dataSource *corev1.TypedLocalObjectReference, +) (corev1.PersistentVolumeClaimSpec, error) { var instanceSetSpec *v2.PGInstanceSetSpec for _, instanceSet := range r.cluster.Spec.InstanceSets { if instanceSet.Name == instanceSetName { @@ -372,7 +386,31 @@ func (r *snapshotRestorer) pvcSpecFromDataSource(instanceSetName string, dataSou return corev1.PersistentVolumeClaimSpec{}, errors.New("instance set not found") } - dataVolSpec := instanceSetSpec.DataVolumeClaimSpec + var volSpec *corev1.PersistentVolumeClaimSpec + switch volCtxInfo.role { + case naming.RolePostgresData: + volSpec = &instanceSetSpec.DataVolumeClaimSpec + case naming.RolePostgresWAL: + volSpec = instanceSetSpec.WALVolumeClaimSpec + case naming.RoleTablespace: + tablespaceIdx := -1 + for i, ts := range instanceSetSpec.TablespaceVolumes { + if ts.Name == volCtxInfo.tablespaceName { + tablespaceIdx = i + break + } + } + if tablespaceIdx == -1 { + return corev1.PersistentVolumeClaimSpec{}, errors.New("tablespace not found") + } + volSpec = &instanceSetSpec.TablespaceVolumes[tablespaceIdx].DataVolumeClaimSpec + } + + if volSpec == nil { + return corev1.PersistentVolumeClaimSpec{}, errors.New("volume spec not found in instance spec") + } + + dataVolSpec := *volSpec dataVolSpec.DataSource = dataSource return dataVolSpec, nil } From 620c3deb53f2dc418f544874ebec377912dc73ff Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 13:28:53 +0530 Subject: [PATCH 74/90] add unit test Signed-off-by: Mayank Shah --- .../pgrestore/snapshot/reconcile_test.go | 684 ++++++++++++++++++ 1 file changed, 684 insertions(+) diff --git a/percona/controller/pgrestore/snapshot/reconcile_test.go b/percona/controller/pgrestore/snapshot/reconcile_test.go index fbc3f6b7de..97dfb1c3d1 100644 --- a/percona/controller/pgrestore/snapshot/reconcile_test.go +++ b/percona/controller/pgrestore/snapshot/reconcile_test.go @@ -1,6 +1,8 @@ package snapshot import ( + "context" + "io" "path" "strings" "testing" @@ -10,11 +12,21 @@ import ( appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestGeneratePrepareJob(t *testing.T) { @@ -160,3 +172,675 @@ func TestGeneratePrepareJob(t *testing.T) { "script should start with 'set -e' for error handling, got: %q", script[:50]) }) } + +// noopPodExecutor is a PodExecutor that does nothing, for tests that don't need exec. +var noopPodExecutor runtime.PodExecutor = func( + _ context.Context, _, _, _ string, _ io.Reader, _, _ io.Writer, _ ...string, +) error { + return nil +} + +func TestReconcileDataVolume(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + clusterName := "my-cluster" + backupName := "my-backup" + restoreName := "my-restore" + snapshotName := "my-backup-pgdata" + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: 15, + InstanceSets: v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolumeSnapshotRef: ptr.To(snapshotName), + }, + }, + } + + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: restoreName, Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{PGCluster: clusterName, VolumeSnapshotBackupName: backupName}, + } + + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: instanceName, Namespace: ns}, + Spec: appsv1.StatefulSetSpec{ServiceName: clusterName + "-pods"}, + } + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + t.Run("returns error when DataVolumeSnapshotRef is nil", func(t *testing.T) { + backupNoSnapshot := backup.DeepCopy() + backupNoSnapshot.Status.Snapshot = nil + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupNoSnapshot, restore). + WithStatusSubresource(backupNoSnapshot). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupNoSnapshot, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.Error(t, err) + assert.False(t, ok) + assert.Contains(t, err.Error(), "data volume snapshot not known") + }) + + t.Run("creates PVC with correct data source when not found", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + + pvcName := instanceName + "-pgdata" + pvc := &corev1.PersistentVolumeClaim{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc)) + + // Verify data source points to the VolumeSnapshot + require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") + assert.Equal(t, snapshotName, pvc.Spec.DataSource.Name) + assert.Equal(t, ptr.Deref(pvc.Spec.DataSource.APIGroup, ""), volumesnapshotv1.GroupName) + assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) + + // Verify spec from instance set + assert.Equal(t, []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, pvc.Spec.AccessModes) + assert.Equal(t, resource.MustParse("1Gi"), pvc.Spec.Resources.Requests[corev1.ResourceStorage]) + + // Verify restore annotation + assert.Equal(t, restoreName, pvc.GetAnnotations()[pNaming.AnnotationSnapshotRestore]) + }) + + t.Run("deletes PVC when restore annotation is not found", func(t *testing.T) { + pvcName := instanceName + "-pgdata" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{}, // No AnnotationSnapshotRestore + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("500Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok, "should return false to trigger requeue") + + // PVC should be deleted + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("deletes PVC when annotation points to different restore", func(t *testing.T) { + pvcName := instanceName + "-pgdata" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: "other-restore", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + // PVC should be deleted so it can be recreated for this restore + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("returns true when PVC already has restore annotation", func(t *testing.T) { + pvcName := instanceName + "-pgdata" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: restoreName, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + }) +} + +func TestReconcileWALVolume(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + clusterName := "my-cluster" + backupName := "my-backup" + restoreName := "my-restore" + walSnapshotName := "my-backup-pgwal" + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: 15, + InstanceSets: v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + WALVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("512Mi"), + }, + }, + }, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + WALVolumeSnapshotRef: ptr.To(walSnapshotName), + }, + }, + } + + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: restoreName, Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{PGCluster: clusterName, VolumeSnapshotBackupName: backupName}, + } + + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: instanceName, Namespace: ns}, + Spec: appsv1.StatefulSetSpec{ServiceName: clusterName + "-pods"}, + } + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + t.Run("returns true when WALVolumeSnapshotRef is nil", func(t *testing.T) { + backupNoWAL := backup.DeepCopy() + backupNoWAL.Status.Snapshot.WALVolumeSnapshotRef = nil + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupNoWAL, restore). + WithStatusSubresource(backupNoWAL). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupNoWAL, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok, "no WAL volume to restore") + }) + + t.Run("creates PVC with correct data source when not found", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + + pvcName := instanceName + "-pgwal" + pvc := &corev1.PersistentVolumeClaim{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc)) + + // Verify data source points to the WAL VolumeSnapshot + require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") + assert.Equal(t, walSnapshotName, pvc.Spec.DataSource.Name) + assert.Equal(t, ptr.Deref(pvc.Spec.DataSource.APIGroup, ""), volumesnapshotv1.GroupName) + assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) + + // Verify spec from WALVolumeClaimSpec (512Mi, not data's 1Gi) + assert.Equal(t, []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, pvc.Spec.AccessModes) + assert.Equal(t, resource.MustParse("512Mi"), pvc.Spec.Resources.Requests[corev1.ResourceStorage]) + + // Verify restore annotation + assert.Equal(t, restoreName, pvc.GetAnnotations()[pNaming.AnnotationSnapshotRestore]) + }) + + t.Run("deletes PVC when restore annotation is not found", func(t *testing.T) { + pvcName := instanceName + "-pgwal" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{}, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("256Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("deletes PVC when annotation points to different restore", func(t *testing.T) { + pvcName := instanceName + "-pgwal" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: "other-restore", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("256Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("returns true when PVC already has restore annotation", func(t *testing.T) { + pvcName := instanceName + "-pgwal" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: restoreName, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("512Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + }) +} + +func TestReconcileTablespaceVolumes(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + clusterName := "my-cluster" + backupName := "my-backup" + restoreName := "my-restore" + ts1Name := "ts1" + ts1SnapshotName := "my-backup-ts1-tablespace" + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: 15, + InstanceSets: v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + TablespaceVolumes: []crunchyv1beta1.TablespaceVolume{ + { + Name: ts1Name, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + TablespaceVolumeSnapshotRefs: map[string]string{ts1Name: ts1SnapshotName}, + }, + }, + } + + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: restoreName, Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{PGCluster: clusterName, VolumeSnapshotBackupName: backupName}, + } + + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: instanceName, Namespace: ns}, + Spec: appsv1.StatefulSetSpec{ServiceName: clusterName + "-pods"}, + } + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + t.Run("returns true when Snapshot is nil", func(t *testing.T) { + backupNoSnapshot := backup.DeepCopy() + backupNoSnapshot.Status.Snapshot = nil + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupNoSnapshot, restore). + WithStatusSubresource(backupNoSnapshot). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupNoSnapshot, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok, "no tablespace volumes to restore") + }) + + t.Run("returns true when TablespaceVolumeSnapshotRefs is empty", func(t *testing.T) { + backupEmpty := backup.DeepCopy() + backupEmpty.Status.Snapshot = &v2.SnapshotStatus{TablespaceVolumeSnapshotRefs: map[string]string{}} + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupEmpty, restore). + WithStatusSubresource(backupEmpty). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupEmpty, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok, "no tablespace volumes to restore") + }) + + t.Run("creates PVC with correct data source when not found", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + + pvcName := instanceName + "-" + ts1Name + "-tablespace" + pvc := &corev1.PersistentVolumeClaim{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc)) + + // Verify data source points to the tablespace VolumeSnapshot + require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") + assert.Equal(t, ts1SnapshotName, pvc.Spec.DataSource.Name) + assert.Equal(t, ptr.Deref(pvc.Spec.DataSource.APIGroup, ""), volumesnapshotv1.GroupName) + assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) + + // Verify spec from TablespaceVolumes[ts1].DataVolumeClaimSpec (2Gi, not data's 1Gi) + assert.Equal(t, []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, pvc.Spec.AccessModes) + assert.Equal(t, resource.MustParse("2Gi"), pvc.Spec.Resources.Requests[corev1.ResourceStorage]) + + // Verify restore annotation + assert.Equal(t, restoreName, pvc.GetAnnotations()[pNaming.AnnotationSnapshotRestore]) + }) + + t.Run("deletes PVC when restore annotation is not found", func(t *testing.T) { + pvcName := instanceName + "-" + ts1Name + "-tablespace" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{}, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("deletes PVC when annotation points to different restore", func(t *testing.T) { + pvcName := instanceName + "-" + ts1Name + "-tablespace" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: "other-restore", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("returns true when all tablespace PVCs have correct restore annotation", func(t *testing.T) { + pvcName := instanceName + "-" + ts1Name + "-tablespace" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: restoreName, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + }) +} From de31ed9e56eb4551102eea82327f579464cf0655 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 13:35:30 +0530 Subject: [PATCH 75/90] fix linting suggestions Signed-off-by: Mayank Shah --- .../generated/pgv2.percona.com_perconapgrestores.yaml | 4 ++-- config/crd/bases/pgv2.percona.com_perconapgclusters.yaml | 4 ++-- deploy/bundle.yaml | 4 ++-- deploy/crd.yaml | 4 ++-- deploy/cw-bundle.yaml | 4 ++-- percona/controller/pgbackup/snapshots/reconcile.go | 2 +- percona/controller/pgrestore/controller.go | 4 ++-- percona/controller/pgrestore/snapshot/reconcile_test.go | 6 +++--- pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml index 512d001c70..d8430b6436 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml @@ -92,8 +92,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName - != "" + rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName + != "") status: properties: completed: diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index df841b9a28..a03520fa0d 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -22041,8 +22041,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName - != "" + rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName + != "") status: properties: completed: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 5b7215a223..e818830a0f 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -22340,8 +22340,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName - != "" + rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName + != "") status: properties: completed: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 61346ad8f4..d2723e3df6 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -22340,8 +22340,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName - != "" + rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName + != "") status: properties: completed: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index aaa9e9550f..36d5d1398e 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -22340,8 +22340,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: has(self.repoName) || self.repoName != "" || self.volumeSnapshotBackupName - != "" + rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName + != "") status: properties: completed: diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index c4067d3867..0d78047ed2 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -85,7 +85,7 @@ func Reconcile( pgCluster *v2.PerconaPGCluster, ) (reconcile.Result, error) { if pgBackup == nil || pgCluster == nil { - return reconcile.Result{}, errors.New("pgBackup or pgCluster is nil or not found") + return reconcile.Result{}, errors.New("PerconaPGBackup or PerconaPGCluster is nil or not found") } log := logging.FromContext(ctx). diff --git a/percona/controller/pgrestore/controller.go b/percona/controller/pgrestore/controller.go index 3514819cef..233bff1de7 100644 --- a/percona/controller/pgrestore/controller.go +++ b/percona/controller/pgrestore/controller.go @@ -74,7 +74,7 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R pgCluster := &v2.PerconaPGCluster{} err := r.Client.Get(ctx, types.NamespacedName{Name: pgRestore.Spec.PGCluster, Namespace: request.Namespace}, pgCluster) if err != nil { - return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") + return reconcile.Result{}, errors.Wrap(err, "get PerconaPGCluster") } if pgRestore.Spec.VolumeSnapshotBackupName != "" { @@ -174,7 +174,7 @@ func runFinalizers(ctx context.Context, c client.Client, pr *v2.PerconaPGRestore if k8serrors.IsNotFound(err) { pg = nil } else { - return errors.Wrap(err, "get PostgresCluster") + return errors.Wrap(err, "get PerconaPGCluster") } } diff --git a/percona/controller/pgrestore/snapshot/reconcile_test.go b/percona/controller/pgrestore/snapshot/reconcile_test.go index 97dfb1c3d1..df70fa90f2 100644 --- a/percona/controller/pgrestore/snapshot/reconcile_test.go +++ b/percona/controller/pgrestore/snapshot/reconcile_test.go @@ -275,7 +275,7 @@ func TestReconcileDataVolume(t *testing.T) { // Verify data source points to the VolumeSnapshot require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") assert.Equal(t, snapshotName, pvc.Spec.DataSource.Name) - assert.Equal(t, ptr.Deref(pvc.Spec.DataSource.APIGroup, ""), volumesnapshotv1.GroupName) + assert.Equal(t, volumesnapshotv1.GroupName, ptr.Deref(pvc.Spec.DataSource.APIGroup, "")) assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) // Verify spec from instance set @@ -493,7 +493,7 @@ func TestReconcileWALVolume(t *testing.T) { // Verify data source points to the WAL VolumeSnapshot require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") assert.Equal(t, walSnapshotName, pvc.Spec.DataSource.Name) - assert.Equal(t, ptr.Deref(pvc.Spec.DataSource.APIGroup, ""), volumesnapshotv1.GroupName) + assert.Equal(t, volumesnapshotv1.GroupName, ptr.Deref(pvc.Spec.DataSource.APIGroup, "")) assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) // Verify spec from WALVolumeClaimSpec (512Mi, not data's 1Gi) @@ -731,7 +731,7 @@ func TestReconcileTablespaceVolumes(t *testing.T) { // Verify data source points to the tablespace VolumeSnapshot require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") assert.Equal(t, ts1SnapshotName, pvc.Spec.DataSource.Name) - assert.Equal(t, ptr.Deref(pvc.Spec.DataSource.APIGroup, ""), volumesnapshotv1.GroupName) + assert.Equal(t, volumesnapshotv1.GroupName, ptr.Deref(pvc.Spec.DataSource.APIGroup, "")) assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) // Verify spec from TablespaceVolumes[ts1].DataVolumeClaimSpec (2Gi, not data's 1Gi) diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go index 3716ccc9da..511b4f3925 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go @@ -29,7 +29,7 @@ type PerconaPGRestore struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` - // +kubebuilder:validation:XValidation:rule="has(self.repoName) || self.repoName != \"\" || self.volumeSnapshotBackupName != \"\"",message="either repoName or volumeSnapshotBackupName must be set" + // +kubebuilder:validation:XValidation:rule="((has(self.repoName) && self.repoName != \"\") || self.volumeSnapshotBackupName != \"\")",message="either repoName or volumeSnapshotBackupName must be set" Spec PerconaPGRestoreSpec `json:"spec"` Status PerconaPGRestoreStatus `json:"status,omitempty"` } From 21b71da3d8dd31c241ead959d9ce68b893156950 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 14:54:24 +0530 Subject: [PATCH 76/90] prepare job should work when using separate wal volumes Signed-off-by: Mayank Shah --- .../pgrestore/snapshot/reconcile.go | 175 +++++++++++++----- 1 file changed, 125 insertions(+), 50 deletions(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index ca678ac4f2..afe9f462e9 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -636,65 +636,38 @@ func (r *snapshotRestorer) runPrepareJob(ctx context.Context, instances *appsv1. return false, nil } +// instanceSetSpecForName returns the PGInstanceSetSpec for the given instance set name, or nil if not found. +func instanceSetSpecForName(cluster *v2.PerconaPGCluster, name string) *v2.PGInstanceSetSpec { + for i := range cluster.Spec.InstanceSets { + if cluster.Spec.InstanceSets[i].Name == name { + return &cluster.Spec.InstanceSets[i] + } + } + return nil +} + +// instancePrepareInfo holds mount paths for an instance used by the snapshot prepare job. +// dataMountPath is empty when PITR + dedicated WAL (data volume not mounted). +// walMountPath is empty when no dedicated WAL volume. +type instancePrepareInfo struct { + dataMountPath string + walMountPath string +} + func generatePrepareJob( job *batchv1.Job, instances *appsv1.StatefulSetList, cluster *v2.PerconaPGCluster, restore *v2.PerconaPGRestore, ) { - volumes := []corev1.Volume{} - volumeMounts := []corev1.VolumeMount{} + pitrEnabled := restore.Spec.RepoName != nil && restore.Spec.VolumeSnapshotBackupName != "" + pgVersion := cluster.Spec.PostgresVersion - for _, instance := range instances.Items { - volName := instance.GetName() + "-pgdata" - volumes = append(volumes, corev1.Volume{ - Name: volName, - VolumeSource: corev1.VolumeSource{ - PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - ClaimName: naming.InstancePostgresDataVolume(&instance).Name, - }, - }, - }) - volumeMounts = append(volumeMounts, corev1.VolumeMount{ - Name: volName, - MountPath: path.Join(instance.GetName(), "pgdata"), - }) - } + volumes, volumeMounts, instanceInfos := buildPrepareJobVolumes(instances, cluster, pitrEnabled) - scriptParts := []string{"set -e"} - for _, mount := range volumeMounts { - if restore.Spec.RepoName == nil || restore.Spec.VolumeSnapshotBackupName == "" { // no PITR - // PITR is not needed, signal the restore_command to skip WAL recovery in order - // to maintain consistency with the snapshot data. - dataDir := path.Join(mount.MountPath, fmt.Sprintf("pg%d", cluster.Spec.PostgresVersion)) - signalFile := path.Join(dataDir, "skip-wal-recovery") - scriptParts = append(scriptParts, fmt.Sprintf("touch %q", signalFile)) - } else { - // PITR is needed, clear local WAL files since they may belong to a different timeline. - // PITR restore job will fetch the required WAL files from the repo. - walDir := path.Join(mount.MountPath, fmt.Sprintf("pg%d_wal", cluster.Spec.PostgresVersion)) - scriptParts = append(scriptParts, fmt.Sprintf("find %q -mindepth 1 -delete", walDir)) - } - } - script := strings.Join(scriptParts, "\n") + script := buildPrepareJobScript(instanceInfos, pgVersion, pitrEnabled) containerName := "snapshot-prepare" - container := corev1.Container{ - Name: containerName, - Image: cluster.Spec.Image, - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), - }, - Limits: corev1.ResourceList{ - corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), - }, - }, - VolumeMounts: volumeMounts, - Command: []string{"bash", "-c", script}, - } job.Spec = batchv1.JobSpec{ Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ @@ -703,13 +676,115 @@ func generatePrepareJob( }, }, Spec: corev1.PodSpec{ - Containers: []corev1.Container{container}, + Containers: []corev1.Container{{ + Name: containerName, + Image: cluster.Spec.Image, + Command: []string{"bash", "-c", script}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("32Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("32Mi"), + }, + }, + VolumeMounts: volumeMounts, + }}, Volumes: volumes, RestartPolicy: corev1.RestartPolicyNever, }, }, } +} + +func buildPrepareJobVolumes( + instances *appsv1.StatefulSetList, + cluster *v2.PerconaPGCluster, + pitrEnabled bool, +) ([]corev1.Volume, []corev1.VolumeMount, []instancePrepareInfo) { + var volumes []corev1.Volume + var volumeMounts []corev1.VolumeMount + var instanceInfos []instancePrepareInfo + + for _, instance := range instances.Items { + instanceSetSpec := instanceSetSpecForName(cluster, instance.Labels[naming.LabelInstanceSet]) + hasWALVolume := instanceSetSpec != nil && instanceSetSpec.WALVolumeClaimSpec != nil + + // When PITR + dedicated WAL volumes, we only clear the WAL directory; no need to mount data. + needDataVolume := !pitrEnabled || !hasWALVolume + + var info instancePrepareInfo + + if needDataVolume { + info.dataMountPath = path.Join("/", instance.GetName(), "pgdata") + volumes, volumeMounts = appendDataVolume(volumes, volumeMounts, &instance, info.dataMountPath) + } + + if hasWALVolume { + info.walMountPath = path.Join("/", instance.GetName(), "pgwal") + volumes, volumeMounts = appendWALVolume(volumes, volumeMounts, &instance, info.walMountPath) + } + + instanceInfos = append(instanceInfos, info) + } + + return volumes, volumeMounts, instanceInfos +} + +func appendDataVolume(volumes []corev1.Volume, mounts []corev1.VolumeMount, instance *appsv1.StatefulSet, mountPath string) ([]corev1.Volume, []corev1.VolumeMount) { + name := instance.GetName() + "-pgdata" + volumes = append(volumes, corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: naming.InstancePostgresDataVolume(instance).Name, + }, + }, + }) + mounts = append(mounts, corev1.VolumeMount{Name: name, MountPath: mountPath}) + return volumes, mounts +} + +func appendWALVolume(volumes []corev1.Volume, mounts []corev1.VolumeMount, instance *appsv1.StatefulSet, mountPath string) ([]corev1.Volume, []corev1.VolumeMount) { + name := instance.GetName() + "-pgwal" + volumes = append(volumes, corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: naming.InstancePostgresWALVolume(instance).Name, + }, + }, + }) + mounts = append(mounts, corev1.VolumeMount{Name: name, MountPath: mountPath}) + return volumes, mounts +} + +func buildPrepareJobScript(instanceInfos []instancePrepareInfo, pgVersion int, pitrEnabled bool) string { + scriptParts := []string{"set -e"} + walDirSuffix := fmt.Sprintf("pg%d_wal", pgVersion) + dataDirSuffix := fmt.Sprintf("pg%d", pgVersion) + + for _, info := range instanceInfos { + if pitrEnabled { + // Clear WAL files so PITR restore can fetch from repo. WAL lives under WAL mount + // when dedicated volume is used, otherwise under pgdata. + walBase := info.dataMountPath + if info.walMountPath != "" { + walBase = info.walMountPath + } + walDir := path.Join(walBase, walDirSuffix) + scriptParts = append(scriptParts, fmt.Sprintf("find %q -mindepth 1 -delete", walDir)) + } else { + // Signal restore_command to skip WAL recovery for consistency with snapshot data. + dataDir := path.Join(info.dataMountPath, dataDirSuffix) + signalFile := path.Join(dataDir, "skip-wal-recovery") + scriptParts = append(scriptParts, fmt.Sprintf("touch %q", signalFile)) + } + } + return strings.Join(scriptParts, "\n") } // We create a $PGDATA/skip-wal-recovery file during the snapshot restore when no PITR is specified. From 4fb1ef977fa41e73f2282bea939076c10c4a80e6 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 14:54:32 +0530 Subject: [PATCH 77/90] update unit tests Signed-off-by: Mayank Shah --- .../pgrestore/snapshot/reconcile_test.go | 66 ++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile_test.go b/percona/controller/pgrestore/snapshot/reconcile_test.go index df70fa90f2..fe91672388 100644 --- a/percona/controller/pgrestore/snapshot/reconcile_test.go +++ b/percona/controller/pgrestore/snapshot/reconcile_test.go @@ -90,7 +90,7 @@ func TestGeneratePrepareJob(t *testing.T) { require.Len(t, container.VolumeMounts, 1) assert.Equal(t, "my-cluster-instance-0-pgdata", container.VolumeMounts[0].Name) - assert.Equal(t, path.Join("my-cluster-instance-0", "pgdata"), container.VolumeMounts[0].MountPath) + assert.Equal(t, path.Join("/", "my-cluster-instance-0", "pgdata"), container.VolumeMounts[0].MountPath) assert.Equal(t, corev1.RestartPolicyNever, job.Spec.Template.Spec.RestartPolicy) }) @@ -153,6 +153,70 @@ func TestGeneratePrepareJob(t *testing.T) { assert.NotContains(t, script, "skip-wal-recovery") }) + t.Run("with PITR and dedicated WAL volume clears WAL under pgwal mount", func(t *testing.T) { + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + clusterWithWAL := cluster.DeepCopy() + clusterWithWAL.Spec.InstanceSets = v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + WALVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("512Mi"), + }, + }, + }, + }, + } + + instance := makeInstance(instanceName) + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{Items: []appsv1.StatefulSet{instance}} + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + RepoName: ptr.To("repo1"), + VolumeSnapshotBackupName: "my-backup", + }, + } + + generatePrepareJob(job, instances, clusterWithWAL, restore) + + container := job.Spec.Template.Spec.Containers[0] + script := container.Command[2] + + // WAL is on dedicated volume: script should clear /instance/pgwal/pg15_wal, not pgdata + walDir := path.Join("/", instanceName, "pgwal", "pg15_wal") + assert.Contains(t, script, "find") + assert.Contains(t, script, walDir) + assert.NotContains(t, script, path.Join("/", instanceName, "pgdata", "pg15_wal")) + + // PITR + dedicated WAL: only WAL PVC is mounted, not data + require.Len(t, job.Spec.Template.Spec.Volumes, 1) + assert.Equal(t, instanceName+"-pgwal", job.Spec.Template.Spec.Volumes[0].Name) + + require.Len(t, container.VolumeMounts, 1) + assert.Equal(t, instanceName+"-pgwal", container.VolumeMounts[0].Name) + assert.Equal(t, path.Join("/", instanceName, "pgwal"), container.VolumeMounts[0].MountPath) + }) + t.Run("script starts with set -e", func(t *testing.T) { job := &batchv1.Job{} instances := &appsv1.StatefulSetList{ From de9c305dbabb6c1de33008d6813cb1ecb3818452 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 14:54:38 +0530 Subject: [PATCH 78/90] spelling errors Signed-off-by: Mayank Shah --- percona/naming/finalizers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/naming/finalizers.go b/percona/naming/finalizers.go index db7bf5441c..95f8131ab4 100644 --- a/percona/naming/finalizers.go +++ b/percona/naming/finalizers.go @@ -21,7 +21,7 @@ const ( // FinalizerSnapshotInProgress is set on PerconaPGBackup objects. // It ensures that any changes made to the PGCluster are reverted upon - // snapshot completion (success or failure) or pre-mature deletion of the PGBackup. + // snapshot completion (success or failure) or premature deletion of the PGBackup. FinalizerSnapshotInProgress = PrefixPercona + "snapshot-in-progress" //nolint:gosec ) From df353e1663109b520eb52b01bf7034af9beb5324 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 15:07:30 +0530 Subject: [PATCH 79/90] allow configuring checkpoint timeout Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgclusters.yaml | 12 +++++++ .../pgv2.percona.com_perconapgclusters.yaml | 12 +++++++ deploy/bundle.yaml | 12 +++++++ deploy/crd.yaml | 12 +++++++ deploy/cw-bundle.yaml | 12 +++++++ .../controller/pgbackup/snapshots/offline.go | 31 ++++++++++++------- .../v2/perconapgcluster_types.go | 13 ++++++++ .../v2/zz_generated.deepcopy.go | 25 +++++++++++++++ 8 files changed, 117 insertions(+), 12 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml index e80367fc0a..2f288d5b1d 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml @@ -7122,6 +7122,18 @@ spec: enum: - offline type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpointTimeoutSeconds: + default: 300 + description: Timeout for the checkpoint operation. + format: int32 + minimum: 30 + type: integer + type: object schedule: description: |- Defines the Cron schedule for a VolumeSnapshot. diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index a03520fa0d..1910ae5564 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -7555,6 +7555,18 @@ spec: enum: - offline type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpointTimeoutSeconds: + default: 300 + description: Timeout for the checkpoint operation. + format: int32 + minimum: 30 + type: integer + type: object schedule: description: |- Defines the Cron schedule for a VolumeSnapshot. diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index e818830a0f..38db7ce17e 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -7852,6 +7852,18 @@ spec: enum: - offline type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpointTimeoutSeconds: + default: 300 + description: Timeout for the checkpoint operation. + format: int32 + minimum: 30 + type: integer + type: object schedule: description: |- Defines the Cron schedule for a VolumeSnapshot. diff --git a/deploy/crd.yaml b/deploy/crd.yaml index d2723e3df6..b057813416 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -7852,6 +7852,18 @@ spec: enum: - offline type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpointTimeoutSeconds: + default: 300 + description: Timeout for the checkpoint operation. + format: int32 + minimum: 30 + type: integer + type: object schedule: description: |- Defines the Cron schedule for a VolumeSnapshot. diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 36d5d1398e..0741b54476 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -7852,6 +7852,18 @@ spec: enum: - offline type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpointTimeoutSeconds: + default: 300 + description: Timeout for the checkpoint operation. + format: int32 + minimum: 30 + type: integer + type: object schedule: description: |- Defines the Cron schedule for a VolumeSnapshot. diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 96e229b594..a684e33db8 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -20,24 +20,26 @@ import ( ) const ( - checkpointTimeoutSeconds = 300 // 5mins // TODO: make this configurable - waitTimeout = 5 * time.Minute - retryInterval = 3 * time.Second + defaultCheckpointTimeoutSeconds int32 = 300 // 5mins + waitTimeout = 5 * time.Minute + retryInterval = 3 * time.Second ) type offlineExec struct { - cl client.Client - cluster *v2.PerconaPGCluster - backup *v2.PerconaPGBackup - podExec runtime.PodExecutor + cl client.Client + cluster *v2.PerconaPGCluster + backup *v2.PerconaPGBackup + podExec runtime.PodExecutor + offlineConfig *v2.OfflineSnapshotConfig } func newOfflineExec(cl client.Client, podExec runtime.PodExecutor, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { return &offlineExec{ - cl: cl, - cluster: pgCluster, - backup: pgBackup, - podExec: podExec, + cl: cl, + cluster: pgCluster, + backup: pgBackup, + podExec: podExec, + offlineConfig: pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig, } } @@ -63,11 +65,16 @@ func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error return e.podExec(ctx, e.cluster.GetNamespace(), instanceName+"-0", naming.ContainerDatabase, stdin, stdout, stderr, command...) } + timeoutSeconds := defaultCheckpointTimeoutSeconds + if e.offlineConfig != nil && e.offlineConfig.CheckpointTimeoutSeconds != nil { + timeoutSeconds = *e.offlineConfig.CheckpointTimeoutSeconds + } + stdout, stderr, err := postgres.Executor(exec). ExecInDatabasesFromQuery(ctx, `SELECT pg_catalog.current_database()`, `SET statement_timeout = :'timeout'; CHECKPOINT;`, map[string]string{ - "timeout": fmt.Sprintf("%ds", checkpointTimeoutSeconds), + "timeout": fmt.Sprintf("%ds", timeoutSeconds), "ON_ERROR_STOP": "on", // Abort when any one statement fails. "QUIET": "on", // Do not print successful statements to stdout. }) diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index 3e2d2e499e..6ea233ee37 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -554,6 +554,19 @@ type VolumeSnapshots struct { // +optional // +kubebuilder:validation:MinLength=6 Schedule *string `json:"schedule,omitempty"` + + // Configuration for offline snapshot operations. + // Ignored if mode is not offline. + // +optional + OfflineConfig *OfflineSnapshotConfig `json:"offlineConfig,omitempty"` +} + +type OfflineSnapshotConfig struct { + // Timeout for the checkpoint operation. + // +optional + // +kubebuilder:validation:Minimum=30 + // +kubebuilder:default=300 + CheckpointTimeoutSeconds *int32 `json:"checkpointTimeoutSeconds,omitempty"` } func (b Backups) IsVolumeSnapshotsEnabled() bool { diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index 964d4bec9e..10178faca7 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -144,6 +144,26 @@ func (in *ExtensionsSpec) DeepCopy() *ExtensionsSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OfflineSnapshotConfig) DeepCopyInto(out *OfflineSnapshotConfig) { + *out = *in + if in.CheckpointTimeoutSeconds != nil { + in, out := &in.CheckpointTimeoutSeconds, &out.CheckpointTimeoutSeconds + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OfflineSnapshotConfig. +func (in *OfflineSnapshotConfig) DeepCopy() *OfflineSnapshotConfig { + if in == nil { + return nil + } + out := new(OfflineSnapshotConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PGBackRestArchive) DeepCopyInto(out *PGBackRestArchive) { *out = *in @@ -1325,6 +1345,11 @@ func (in *VolumeSnapshots) DeepCopyInto(out *VolumeSnapshots) { *out = new(string) **out = **in } + if in.OfflineConfig != nil { + in, out := &in.OfflineConfig, &out.OfflineConfig + *out = new(OfflineSnapshotConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeSnapshots. From 0e8879cb5359bd0c94e57baf5ff685797838d425 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 15:50:45 +0530 Subject: [PATCH 80/90] fix restore wrapper Signed-off-by: Mayank Shah --- build/postgres-operator/restore-command-wrapper.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/build/postgres-operator/restore-command-wrapper.sh b/build/postgres-operator/restore-command-wrapper.sh index b9c6504ee5..d2f24a206f 100644 --- a/build/postgres-operator/restore-command-wrapper.sh +++ b/build/postgres-operator/restore-command-wrapper.sh @@ -4,7 +4,6 @@ set -e # When this marker exists (e.g. after a snapshot restore), skip all WAL recovery by # exiting non-zero. Do not remove the file so every restore_command call is skipped. if [ -f "${PGDATA}/skip-wal-recovery" ]; then - echo "Skipping WAL archive recovery" exit 1 fi From 26ed3f8b68558556742cbc9694a672712e45a766 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 15:51:36 +0530 Subject: [PATCH 81/90] consistent field naming Signed-off-by: Mayank Shah --- .../percona/generated/pgv2.percona.com_perconapgbackups.yaml | 2 +- config/crd/bases/pgv2.percona.com_perconapgclusters.yaml | 2 +- deploy/bundle.yaml | 2 +- deploy/crd.yaml | 2 +- deploy/cw-bundle.yaml | 2 +- pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index 4d66f038a8..8fe3ad1dd9 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -407,7 +407,7 @@ spec: description: Name of the VolumeSnapshot containing data volume contents. type: string - tablespaceVolumeRefs: + tablespaceVolumeSnapshotRefs: additionalProperties: type: string description: |- diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 1910ae5564..9b9a2ce005 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -406,7 +406,7 @@ spec: description: Name of the VolumeSnapshot containing data volume contents. type: string - tablespaceVolumeRefs: + tablespaceVolumeSnapshotRefs: additionalProperties: type: string description: |- diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 38db7ce17e..ac8e97cc67 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -701,7 +701,7 @@ spec: description: Name of the VolumeSnapshot containing data volume contents. type: string - tablespaceVolumeRefs: + tablespaceVolumeSnapshotRefs: additionalProperties: type: string description: |- diff --git a/deploy/crd.yaml b/deploy/crd.yaml index b057813416..85eb1515fb 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -701,7 +701,7 @@ spec: description: Name of the VolumeSnapshot containing data volume contents. type: string - tablespaceVolumeRefs: + tablespaceVolumeSnapshotRefs: additionalProperties: type: string description: |- diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 0741b54476..f773bcaec1 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -701,7 +701,7 @@ spec: description: Name of the VolumeSnapshot containing data volume contents. type: string - tablespaceVolumeRefs: + tablespaceVolumeSnapshotRefs: additionalProperties: type: string description: |- diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 86cb3e90f4..ae4836e765 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -124,7 +124,7 @@ type SnapshotStatus struct { WALVolumeSnapshotRef *string `json:"walVolumeSnapshotRef,omitempty"` // Names of the VolumeSnapshots containing tablespace volume contents. // Key is the name of the tablespace, value is the name of the VolumeSnapshot. - TablespaceVolumeSnapshotRefs map[string]string `json:"tablespaceVolumeRefs,omitempty"` + TablespaceVolumeSnapshotRefs map[string]string `json:"tablespaceVolumeSnapshotRefs,omitempty"` } // +kubebuilder:validation:Type=string From 71c54302a72dc62da632765d855e5e218338e35e Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 15:54:27 +0530 Subject: [PATCH 82/90] fix retry loop Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/offline.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index a684e33db8..7fa111908b 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -150,8 +150,12 @@ func (e *offlineExec) getBackupTarget(ctx context.Context) (string, error) { } if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - orig := e.backup.DeepCopy() bcp := e.backup.DeepCopy() + if err := e.cl.Get(ctx, client.ObjectKeyFromObject(bcp), bcp); err != nil { + return err + } + + orig := bcp.DeepCopy() annots := bcp.GetAnnotations() if annots == nil { annots = make(map[string]string) From 1028d2399bc75a1dc11b7f1368ba4ff9c96ef8e9 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 18:07:42 +0530 Subject: [PATCH 83/90] no need to check PITR while unsuspending Signed-off-by: Mayank Shah --- percona/controller/pgrestore/snapshot/reconcile.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go index afe9f462e9..68edde7d00 100644 --- a/percona/controller/pgrestore/snapshot/reconcile.go +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -206,7 +206,7 @@ func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Resu if ok, err := r.unsuspendAllInstances(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "resume cluster") - } else if !ok && !r.isPITRInProgress() { + } else if !ok { r.log.Info("Waiting for instances to be unsuspended") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } @@ -568,11 +568,6 @@ func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { return false, nil } -func (r *snapshotRestorer) isPITRInProgress() bool { - _, ok := r.cluster.GetAnnotations()[naming.PGBackRestRestore] - return ok -} - func (r *snapshotRestorer) reconcilePrepareJobAnnotation(ctx context.Context) error { if _, ok := r.restore.GetAnnotations()[pNaming.AnnotationPVCsPreparedAt]; ok { return nil From 5357c50170a82994d8d7f353f25750ee0228e7a6 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 18:11:41 +0530 Subject: [PATCH 84/90] error message consistency Signed-off-by: Mayank Shah --- percona/controller/pgbackup/controller.go | 3 +-- percona/controller/pgrestore/utils/pgbackrest.go | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index b5b3144a22..3ace1e8c9f 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -2,7 +2,6 @@ package pgbackup import ( "context" - "fmt" "path" "slices" "time" @@ -124,7 +123,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re bcp.Status.State = v2.BackupFailed bcp.Status.Error = "repoName is required when method is 'pgbackrest'" }); updErr != nil { - return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + return reconcile.Result{}, errors.Errorf("failed to update backup status: %w", updErr) } return reconcile.Result{}, errors.Errorf("'repoName' is required when method is 'pgbackrest'") } diff --git a/percona/controller/pgrestore/utils/pgbackrest.go b/percona/controller/pgrestore/utils/pgbackrest.go index d93c160686..9e32efe488 100644 --- a/percona/controller/pgrestore/utils/pgbackrest.go +++ b/percona/controller/pgrestore/utils/pgbackrest.go @@ -54,7 +54,7 @@ func (r *PGBackRestRestore) Start(ctx context.Context) error { postgresCluster.Status.PGBackRest.Restore = &v1beta1.PGBackRestJobStatus{} if err := r.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { - return errors.Wrap(err, "patch PGCluster") + return errors.Wrap(err, "patch PostgresCluster status failed trying to initialize PGBackRest restore status") } if r.pgCluster.Spec.Backups.PGBackRest.Restore == nil { @@ -68,7 +68,7 @@ func (r *PGBackRestRestore) Start(ctx context.Context) error { r.pgCluster.Spec.Backups.PGBackRest.Restore.Options = r.pgRestore.Spec.Options if err := r.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { - return errors.Wrap(err, "patch PGCluster") + return errors.Wrap(err, "patch PostgresCluster status failed trying to start restore") } return nil From b4910f5cb7e4e5ff826a575a14d18f527ab073fa Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 18:26:33 +0530 Subject: [PATCH 85/90] remove duplicate code Signed-off-by: Mayank Shah --- .../controller/pgbackup/snapshots/offline.go | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 7fa111908b..b565248e16 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -8,7 +8,6 @@ import ( "github.com/pkg/errors" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" @@ -143,30 +142,13 @@ func (e *offlineExec) getBackupTarget(ctx context.Context) (string, error) { if len(replicas) == 0 { return "", errors.New("no replica pods found") } + targetPod := replicas[0] instanceName := targetPod.GetLabels()[naming.LabelInstance] if instanceName == "" { return "", errors.New("cannot determine instance name from pod labels") } - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - bcp := e.backup.DeepCopy() - if err := e.cl.Get(ctx, client.ObjectKeyFromObject(bcp), bcp); err != nil { - return err - } - - orig := bcp.DeepCopy() - annots := bcp.GetAnnotations() - if annots == nil { - annots = make(map[string]string) - } - annots[annotationBackupTarget] = instanceName - bcp.SetAnnotations(annots) - return e.cl.Patch(ctx, bcp, client.MergeFrom(orig)) - }); err != nil { - return "", errors.Wrap(err, "failed to update backup annotations") - } - log.Info("Selected backup target", "instance", instanceName) return instanceName, nil } From 3d314a2de2a0e4d35bf47159913a0dca4035e528 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 18:27:56 +0530 Subject: [PATCH 86/90] use fmt.Errorf Signed-off-by: Mayank Shah --- percona/controller/pgbackup/controller.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 3ace1e8c9f..98dc4e8c0e 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -2,6 +2,7 @@ package pgbackup import ( "context" + "fmt" "path" "slices" "time" @@ -123,9 +124,9 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re bcp.Status.State = v2.BackupFailed bcp.Status.Error = "repoName is required when method is 'pgbackrest'" }); updErr != nil { - return reconcile.Result{}, errors.Errorf("failed to update backup status: %w", updErr) + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } - return reconcile.Result{}, errors.Errorf("'repoName' is required when method is 'pgbackrest'") + return reconcile.Result{}, errors.New("'repoName' is required when method is 'pgbackrest'") } switch pgBackup.Status.State { From e20a3cb3acd282a85ab8ff7c3c0506ef3196a359 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Wed, 11 Feb 2026 18:32:13 +0530 Subject: [PATCH 87/90] catch potential nil error Signed-off-by: Mayank Shah --- percona/controller/pgbackup/snapshots/reconcile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go index 0d78047ed2..1a95f93492 100644 --- a/percona/controller/pgbackup/snapshots/reconcile.go +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -355,7 +355,7 @@ func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context, ta } func shouldFailSnapshot(volumeSnapshot *volumesnapshotv1.VolumeSnapshot) bool { - if volumeSnapshot.Status == nil || volumeSnapshot.Status.Error == nil { + if volumeSnapshot.Status == nil || volumeSnapshot.Status.Error == nil || volumeSnapshot.Status.Error.Time.IsZero() { return false } errAt := volumeSnapshot.Status.Error.Time From 4e2203cca56eaf6ab9e99a1b6bafc2342e237913 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Thu, 12 Feb 2026 14:42:55 +0530 Subject: [PATCH 88/90] allow skipping checkpointing Signed-off-by: Mayank Shah --- .../pgv2.percona.com_perconapgclusters.yaml | 23 ++++++++--- .../pgv2.percona.com_perconapgclusters.yaml | 23 ++++++++--- deploy/bundle.yaml | 23 ++++++++--- deploy/crd.yaml | 23 ++++++++--- deploy/cw-bundle.yaml | 23 ++++++++--- .../controller/pgbackup/snapshots/offline.go | 38 +++++++++++++------ .../v2/perconapgcluster_types.go | 30 ++++++++++++++- .../v2/zz_generated.deepcopy.go | 33 ++++++++++++++-- 8 files changed, 170 insertions(+), 46 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml index 2f288d5b1d..21140c1bd1 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml @@ -7127,12 +7127,23 @@ spec: Configuration for offline snapshot operations. Ignored if mode is not offline. properties: - checkpointTimeoutSeconds: - default: 300 - description: Timeout for the checkpoint operation. - format: int32 - minimum: 30 - type: integer + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object type: object schedule: description: |- diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 9b9a2ce005..1ecdf1ece2 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -7560,12 +7560,23 @@ spec: Configuration for offline snapshot operations. Ignored if mode is not offline. properties: - checkpointTimeoutSeconds: - default: 300 - description: Timeout for the checkpoint operation. - format: int32 - minimum: 30 - type: integer + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object type: object schedule: description: |- diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index ac8e97cc67..9dadb44c98 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -7857,12 +7857,23 @@ spec: Configuration for offline snapshot operations. Ignored if mode is not offline. properties: - checkpointTimeoutSeconds: - default: 300 - description: Timeout for the checkpoint operation. - format: int32 - minimum: 30 - type: integer + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object type: object schedule: description: |- diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 85eb1515fb..3fdfb8a250 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -7857,12 +7857,23 @@ spec: Configuration for offline snapshot operations. Ignored if mode is not offline. properties: - checkpointTimeoutSeconds: - default: 300 - description: Timeout for the checkpoint operation. - format: int32 - minimum: 30 - type: integer + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object type: object schedule: description: |- diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index f773bcaec1..73deee7ea9 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -7857,12 +7857,23 @@ spec: Configuration for offline snapshot operations. Ignored if mode is not offline. properties: - checkpointTimeoutSeconds: - default: 300 - description: Timeout for the checkpoint operation. - format: int32 - minimum: 30 - type: integer + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object type: object schedule: description: |- diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index b565248e16..195136a1a4 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -8,6 +8,7 @@ import ( "github.com/pkg/errors" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" @@ -19,9 +20,8 @@ import ( ) const ( - defaultCheckpointTimeoutSeconds int32 = 300 // 5mins - waitTimeout = 5 * time.Minute - retryInterval = 3 * time.Second + waitTimeout = 5 * time.Minute + retryInterval = 3 * time.Second ) type offlineExec struct { @@ -32,13 +32,26 @@ type offlineExec struct { offlineConfig *v2.OfflineSnapshotConfig } +func newSnapshotConfig(pgCluster *v2.PerconaPGCluster) *v2.OfflineSnapshotConfig { + defaultConfig := v2.DefaultOfflineSnapshotConfig() + if pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig == nil { + return defaultConfig + } + + config := pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig + if config.Checkpoint == nil { + config.Checkpoint = defaultConfig.Checkpoint + } + return config +} + func newOfflineExec(cl client.Client, podExec runtime.PodExecutor, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { return &offlineExec{ cl: cl, cluster: pgCluster, backup: pgBackup, podExec: podExec, - offlineConfig: pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig, + offlineConfig: newSnapshotConfig(pgCluster), } } @@ -48,7 +61,6 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { return "", errors.Wrap(err, "failed to get backup target pod") } - // TODO: should this be optional, since this can take a while on large datasets? if err := e.checkpoint(ctx, targetInstance); err != nil { return "", errors.Wrap(err, "failed to checkpoint instance") } @@ -60,15 +72,20 @@ func (e *offlineExec) prepare(ctx context.Context) (string, error) { } func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error { - exec := func(_ context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { - return e.podExec(ctx, e.cluster.GetNamespace(), instanceName+"-0", naming.ContainerDatabase, stdin, stdout, stderr, command...) + log := logging.FromContext(ctx) + defaults := v2.DefaultOfflineSnapshotConfig().Checkpoint + + skip := !ptr.Deref(e.offlineConfig.Checkpoint.Enabled, *defaults.Enabled) + if skip { + log.Info("Skipping checkpoint") + return nil } - timeoutSeconds := defaultCheckpointTimeoutSeconds - if e.offlineConfig != nil && e.offlineConfig.CheckpointTimeoutSeconds != nil { - timeoutSeconds = *e.offlineConfig.CheckpointTimeoutSeconds + exec := func(_ context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { + return e.podExec(ctx, e.cluster.GetNamespace(), instanceName+"-0", naming.ContainerDatabase, stdin, stdout, stderr, command...) } + timeoutSeconds := ptr.Deref(e.offlineConfig.Checkpoint.TimeoutSeconds, *defaults.TimeoutSeconds) stdout, stderr, err := postgres.Executor(exec). ExecInDatabasesFromQuery(ctx, `SELECT pg_catalog.current_database()`, `SET statement_timeout = :'timeout'; CHECKPOINT;`, @@ -85,7 +102,6 @@ func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error return fmt.Errorf("checkpoint failed: %s", stderr) } - log := logging.FromContext(ctx) log.Info("checkpoint executed", "stdout", stdout, "stderr", stderr) return nil } diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index 6ea233ee37..cb274ce19c 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -9,6 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -264,6 +265,12 @@ func (cr *PerconaPGCluster) Default() { if cr.CompareVersion("2.6.0") >= 0 && cr.Spec.AutoCreateUserSchema == nil { cr.Spec.AutoCreateUserSchema = &t } + + if cr.Spec.Backups.IsVolumeSnapshotsEnabled() && + cr.Spec.Backups.VolumeSnapshots.Mode == VolumeSnapshotModeOffline && + cr.Spec.Backups.VolumeSnapshots.OfflineConfig == nil { + cr.Spec.Backups.VolumeSnapshots.OfflineConfig = DefaultOfflineSnapshotConfig() + } } func (cr *PerconaPGCluster) PostgresImage() string { @@ -561,12 +568,33 @@ type VolumeSnapshots struct { OfflineConfig *OfflineSnapshotConfig `json:"offlineConfig,omitempty"` } +func DefaultOfflineSnapshotConfig() *OfflineSnapshotConfig { + return &OfflineSnapshotConfig{ + Checkpoint: &CheckpointConfig{ + Enabled: ptr.To(true), + TimeoutSeconds: ptr.To(int32(300)), + }, + } +} + type OfflineSnapshotConfig struct { + // Checkpoint configuration for offline snapshot operations. + // +optional + Checkpoint *CheckpointConfig `json:"checkpoint,omitempty"` +} + +type CheckpointConfig struct { + // If set, a checkpoint is requested. + // +optional + // +kubebuilder:default=true + Enabled *bool `json:"enabled,omitempty"` + // Timeout for the checkpoint operation. + // Ignored if checkpoint is not enabled. // +optional // +kubebuilder:validation:Minimum=30 // +kubebuilder:default=300 - CheckpointTimeoutSeconds *int32 `json:"checkpointTimeoutSeconds,omitempty"` + TimeoutSeconds *int32 `json:"timeoutSeconds,omitempty"` } func (b Backups) IsVolumeSnapshotsEnabled() bool { diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index 10178faca7..0962924286 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -87,6 +87,31 @@ func (in *BuiltInExtensionsSpec) DeepCopy() *BuiltInExtensionsSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CheckpointConfig) DeepCopyInto(out *CheckpointConfig) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } + if in.TimeoutSeconds != nil { + in, out := &in.TimeoutSeconds, &out.TimeoutSeconds + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CheckpointConfig. +func (in *CheckpointConfig) DeepCopy() *CheckpointConfig { + if in == nil { + return nil + } + out := new(CheckpointConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CustomExtensionSpec) DeepCopyInto(out *CustomExtensionSpec) { *out = *in @@ -147,10 +172,10 @@ func (in *ExtensionsSpec) DeepCopy() *ExtensionsSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OfflineSnapshotConfig) DeepCopyInto(out *OfflineSnapshotConfig) { *out = *in - if in.CheckpointTimeoutSeconds != nil { - in, out := &in.CheckpointTimeoutSeconds, &out.CheckpointTimeoutSeconds - *out = new(int32) - **out = **in + if in.Checkpoint != nil { + in, out := &in.Checkpoint, &out.Checkpoint + *out = new(CheckpointConfig) + (*in).DeepCopyInto(*out) } } From 685d17229814ffaa9caa5c8cd4f8b3cadfdb2c4a Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 13 Feb 2026 15:03:26 +0530 Subject: [PATCH 89/90] address copilot comments Signed-off-by: Mayank Shah --- .../percona/generated/pgv2.percona.com_perconapgrestores.yaml | 4 ++-- config/crd/bases/pgv2.percona.com_perconapgclusters.yaml | 4 ++-- deploy/bundle.yaml | 4 ++-- deploy/crd.yaml | 4 ++-- deploy/cw-bundle.yaml | 4 ++-- percona/controller/pgbackup/snapshots/offline.go | 2 +- pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml index d8430b6436..34a16e4f2f 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml @@ -92,8 +92,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName - != "") + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 1ecdf1ece2..9fbf29e325 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -22064,8 +22064,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName - != "") + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 9dadb44c98..5634e1bde6 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -22363,8 +22363,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName - != "") + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 3fdfb8a250..7607fbb0b9 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -22363,8 +22363,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName - != "") + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 73deee7ea9..74b0b0e78c 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -22363,8 +22363,8 @@ spec: type: object x-kubernetes-validations: - message: either repoName or volumeSnapshotBackupName must be set - rule: ((has(self.repoName) && self.repoName != "") || self.volumeSnapshotBackupName - != "") + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go index 195136a1a4..0bc9af76e7 100644 --- a/percona/controller/pgbackup/snapshots/offline.go +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -38,7 +38,7 @@ func newSnapshotConfig(pgCluster *v2.PerconaPGCluster) *v2.OfflineSnapshotConfig return defaultConfig } - config := pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig + config := pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig.DeepCopy() if config.Checkpoint == nil { config.Checkpoint = defaultConfig.Checkpoint } diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go index 511b4f3925..e242b1ee07 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go @@ -29,7 +29,7 @@ type PerconaPGRestore struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` - // +kubebuilder:validation:XValidation:rule="((has(self.repoName) && self.repoName != \"\") || self.volumeSnapshotBackupName != \"\")",message="either repoName or volumeSnapshotBackupName must be set" + // +kubebuilder:validation:XValidation:rule="((has(self.repoName) && self.repoName != \"\") || (has(self.volumeSnapshotBackupName) && self.volumeSnapshotBackupName != \"\"))",message="either repoName or volumeSnapshotBackupName must be set" Spec PerconaPGRestoreSpec `json:"spec"` Status PerconaPGRestoreStatus `json:"status,omitempty"` } From e7279c90b2189a51099d240bd1182393bfd56c17 Mon Sep 17 00:00:00 2001 From: Mayank Shah Date: Fri, 13 Feb 2026 15:06:34 +0530 Subject: [PATCH 90/90] fix error message Signed-off-by: Mayank Shah --- percona/controller/pgbackup/controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 98dc4e8c0e..47db0cb35d 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -97,7 +97,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re pgCluster := new(v2.PerconaPGCluster) if err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: request.Namespace}, pgCluster); err != nil { if !k8serrors.IsNotFound(err) { - return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") + return reconcile.Result{}, errors.Wrap(err, "get PerconaPGCluster") } pgCluster = nil }