diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml index f9aaf1c422..8fe3ad1dd9 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgbackups.yaml @@ -70,6 +70,13 @@ spec: type: object spec: properties: + method: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -80,14 +87,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: @@ -391,6 +401,24 @@ spec: required: - name type: object + snapshot: + properties: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeSnapshotRefs: + additionalProperties: + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. + type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string + type: object state: type: string storageType: diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml index e8859d08e9..0466c5a851 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgclusters.yaml @@ -7110,6 +7110,51 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object + type: object + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups diff --git a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml index e1bc4d36b7..34a16e4f2f 100644 --- a/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml +++ b/build/crd/percona/generated/pgv2.percona.com_perconapgrestores.yaml @@ -67,6 +67,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -74,10 +77,23 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. + type: string + x-kubernetes-validations: + - message: volumeSnapshotBackupName is an immutable field + rule: self == oldSelf required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotBackupName must be set + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/build/postgres-operator/Dockerfile b/build/postgres-operator/Dockerfile index 2d0663c455..a159bf2710 100644 --- a/build/postgres-operator/Dockerfile +++ b/build/postgres-operator/Dockerfile @@ -69,6 +69,7 @@ COPY build/postgres-operator/init-entrypoint.sh /usr/local/bin COPY build/postgres-operator/postgres-entrypoint.sh /usr/local/bin COPY build/postgres-operator/postgres-liveness-check.sh /usr/local/bin COPY build/postgres-operator/postgres-readiness-check.sh /usr/local/bin +COPY build/postgres-operator/restore-command-wrapper.sh /usr/local/bin COPY hack/tools/queries /opt/crunchy/conf RUN chgrp -R 0 /opt/crunchy/conf && chmod -R g=u opt/crunchy/conf diff --git a/build/postgres-operator/init-entrypoint.sh b/build/postgres-operator/init-entrypoint.sh index 2fde1f02bf..3090212fbf 100755 --- a/build/postgres-operator/init-entrypoint.sh +++ b/build/postgres-operator/init-entrypoint.sh @@ -10,3 +10,4 @@ install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-entrypoi install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-liveness-check.sh" "${CRUNCHY_BINDIR}/bin/postgres-liveness-check.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/postgres-readiness-check.sh" "${CRUNCHY_BINDIR}/bin/postgres-readiness-check.sh" install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/relocate-extensions.sh" "${CRUNCHY_BINDIR}/bin/relocate-extensions.sh" +install -o "$(id -u)" -g "$(id -g)" -m 0755 -D "/usr/local/bin/restore-command-wrapper.sh" "${CRUNCHY_BINDIR}/bin/restore-command-wrapper.sh" diff --git a/build/postgres-operator/restore-command-wrapper.sh b/build/postgres-operator/restore-command-wrapper.sh new file mode 100644 index 0000000000..d2f24a206f --- /dev/null +++ b/build/postgres-operator/restore-command-wrapper.sh @@ -0,0 +1,10 @@ +#!/bin/sh +set -e + +# When this marker exists (e.g. after a snapshot restore), skip all WAL recovery by +# exiting non-zero. Do not remove the file so every restore_command call is skipped. +if [ -f "${PGDATA}/skip-wal-recovery" ]; then + exit 1 +fi + +exec "$@" diff --git a/cmd/postgres-operator/main.go b/cmd/postgres-operator/main.go index 2decf235d8..130daba46c 100644 --- a/cmd/postgres-operator/main.go +++ b/cmd/postgres-operator/main.go @@ -14,6 +14,7 @@ import ( "time" "unicode" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" "go.opentelemetry.io/otel" uzap "go.uber.org/zap" @@ -125,6 +126,8 @@ func main() { // Add Percona custom resource types to scheme assertNoError(v2.AddToScheme(mgr.GetScheme())) + assertNoError(volumesnapshotv1.AddToScheme(mgr.GetScheme())) + // add all PostgreSQL Operator controllers to the runtime manager err = addControllersToManager(ctx, mgr) assertNoError(err) diff --git a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml index 6fcdbc3bb8..836d8afba7 100644 --- a/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml +++ b/config/crd/bases/pgv2.percona.com_perconapgclusters.yaml @@ -69,6 +69,13 @@ spec: type: object spec: properties: + method: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -79,14 +86,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: @@ -390,6 +400,24 @@ spec: required: - name type: object + snapshot: + properties: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeSnapshotRefs: + additionalProperties: + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. + type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string + type: object state: type: string storageType: @@ -7515,6 +7543,51 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object + type: object + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups @@ -21968,6 +22041,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -21975,10 +22051,23 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. + type: string + x-kubernetes-validations: + - message: volumeSnapshotBackupName is an immutable field + rule: self == oldSelf required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotBackupName must be set + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/config/manager/default/manager.yaml b/config/manager/default/manager.yaml index d7a647692b..61d46218fa 100644 --- a/config/manager/default/manager.yaml +++ b/config/manager/default/manager.yaml @@ -39,6 +39,8 @@ spec: value: "1" - name: PPROF_BIND_ADDRESS value: "0" + - name: PGO_FEATURE_GATES + value: "" ports: - containerPort: 8080 name: metrics diff --git a/deploy/backup.yaml b/deploy/backup.yaml index 4ca1ee3725..34d87572fd 100644 --- a/deploy/backup.yaml +++ b/deploy/backup.yaml @@ -5,5 +5,6 @@ metadata: spec: pgCluster: cluster1 repoName: repo1 +# method: volumeSnapshot # options: # - --type=full diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index 87e3aa52fe..e090d721e8 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -364,6 +364,13 @@ spec: type: object spec: properties: + method: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -374,14 +381,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: @@ -685,6 +695,24 @@ spec: required: - name type: object + snapshot: + properties: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeSnapshotRefs: + additionalProperties: + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. + type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string + type: object state: type: string storageType: @@ -7812,6 +7840,51 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object + type: object + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups @@ -22267,6 +22340,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -22274,10 +22350,23 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. + type: string + x-kubernetes-validations: + - message: volumeSnapshotBackupName is an immutable field + rule: self == oldSelf required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotBackupName must be set + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: @@ -54215,6 +54304,8 @@ spec: value: "1" - name: PPROF_BIND_ADDRESS value: "0" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: diff --git a/deploy/cr.yaml b/deploy/cr.yaml index 599a19c21b..83024e12f8 100644 --- a/deploy/cr.yaml +++ b/deploy/cr.yaml @@ -408,6 +408,10 @@ spec: backups: # trackLatestRestorableTime: true +# volumeSnapshots: +# mode: offline +# className: VOLUME-SNAPSHOT-CLASS +# schedule: "0 0 * * 6" pgbackrest: # metadata: # labels: diff --git a/deploy/crd.yaml b/deploy/crd.yaml index 0ae004e7cb..b839553102 100644 --- a/deploy/crd.yaml +++ b/deploy/crd.yaml @@ -364,6 +364,13 @@ spec: type: object spec: properties: + method: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -374,14 +381,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: @@ -685,6 +695,24 @@ spec: required: - name type: object + snapshot: + properties: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeSnapshotRefs: + additionalProperties: + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. + type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string + type: object state: type: string storageType: @@ -7812,6 +7840,51 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object + type: object + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups @@ -22267,6 +22340,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -22274,10 +22350,23 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. + type: string + x-kubernetes-validations: + - message: volumeSnapshotBackupName is an immutable field + rule: self == oldSelf required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotBackupName must be set + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 11519a8144..620c9fba8a 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -364,6 +364,13 @@ spec: type: object spec: properties: + method: + default: pgbackrest + description: Method with which to perform the backup + enum: + - pgbackrest + - volumeSnapshot + type: string options: description: |- Command line options to include when running the pgBackRest backup command. @@ -374,14 +381,17 @@ spec: pgCluster: type: string repoName: - description: The name of the pgBackRest repo to run the backup command - against. + description: |- + The name of the pgBackRest repo to run the backup command against. + This is required when method is 'pgbackrest'. pattern: ^repo[1-4] type: string required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: repoName is required when method is 'pgbackrest' + rule: self.method == "volumeSnapshot" || has(self.repoName) status: properties: backupName: @@ -685,6 +695,24 @@ spec: required: - name type: object + snapshot: + properties: + dataVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing data volume + contents. + type: string + tablespaceVolumeSnapshotRefs: + additionalProperties: + type: string + description: |- + Names of the VolumeSnapshots containing tablespace volume contents. + Key is the name of the tablespace, value is the name of the VolumeSnapshot. + type: object + walVolumeSnapshotRef: + description: Name of the VolumeSnapshot containing WAL volume + contents. + type: string + type: object state: type: string storageType: @@ -7812,6 +7840,51 @@ spec: trackLatestRestorableTime: description: Enable tracking latest restorable time type: boolean + volumeSnapshots: + description: VolumeSnapshots configuration + properties: + className: + description: Name of the VolumeSnapshotClass to use. + type: string + mode: + default: offline + description: Mode of the VolumeSnapshot. + enum: + - offline + type: string + offlineConfig: + description: |- + Configuration for offline snapshot operations. + Ignored if mode is not offline. + properties: + checkpoint: + description: Checkpoint configuration for offline snapshot + operations. + properties: + enabled: + default: true + description: If set, a checkpoint is requested. + type: boolean + timeoutSeconds: + default: 300 + description: |- + Timeout for the checkpoint operation. + Ignored if checkpoint is not enabled. + format: int32 + minimum: 30 + type: integer + type: object + type: object + schedule: + description: |- + Defines the Cron schedule for a VolumeSnapshot. + Follows the standard Cron schedule syntax: + https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + minLength: 6 + type: string + required: + - className + type: object type: object x-kubernetes-validations: - message: At least one repository must be configured when backups @@ -22267,6 +22340,9 @@ spec: pgCluster: description: The name of the PerconaPGCluster to perform restore. type: string + x-kubernetes-validations: + - message: pgCluster is an immutable field + rule: self == oldSelf repoName: description: |- The name of the pgBackRest repo within the source PostgresCluster that contains the backups @@ -22274,10 +22350,23 @@ spec: for the new PostgresCluster. pattern: ^repo[1-4] type: string + x-kubernetes-validations: + - message: repoName is an immutable field + rule: self == oldSelf + volumeSnapshotBackupName: + description: The name of the backup to perform in-place volume snapshot + restores from. + type: string + x-kubernetes-validations: + - message: volumeSnapshotBackupName is an immutable field + rule: self == oldSelf required: - pgCluster - - repoName type: object + x-kubernetes-validations: + - message: either repoName or volumeSnapshotBackupName must be set + rule: ((has(self.repoName) && self.repoName != "") || (has(self.volumeSnapshotBackupName) + && self.volumeSnapshotBackupName != "")) status: properties: completed: @@ -54213,6 +54302,8 @@ spec: value: "1" - name: PPROF_BIND_ADDRESS value: "0" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: diff --git a/deploy/cw-operator.yaml b/deploy/cw-operator.yaml index 0a75a83fc4..25480469ad 100644 --- a/deploy/cw-operator.yaml +++ b/deploy/cw-operator.yaml @@ -46,6 +46,8 @@ spec: value: "1" - name: PPROF_BIND_ADDRESS value: "0" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 3595d64cfe..4cc6b05aae 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -49,6 +49,8 @@ spec: value: "1" - name: PPROF_BIND_ADDRESS value: "0" + - name: PGO_FEATURE_GATES + value: "" image: docker.io/perconalab/percona-postgresql-operator:main imagePullPolicy: Always livenessProbe: diff --git a/deploy/restore.yaml b/deploy/restore.yaml index e85f417e7c..411b1508e3 100644 --- a/deploy/restore.yaml +++ b/deploy/restore.yaml @@ -5,6 +5,7 @@ metadata: spec: pgCluster: cluster1 repoName: repo1 +# volumeSnapshotBackupName: backup1 # options: # - --type=time # - --target="2022-11-30 15:12:11+03" diff --git a/e2e-tests/functions b/e2e-tests/functions index 3a9e66ca9a..b35c245479 100644 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -56,6 +56,7 @@ deploy_operator() { yq eval '.spec.template.spec.containers[0].image = "'${IMAGE}'"' "${DEPLOY_DIR}/${cw_prefix}operator.yaml" \ | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="DISABLE_TELEMETRY") | .value) = "'${disable_telemetry}'"' - \ | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="LOG_LEVEL") | .value) = "DEBUG"' - \ + | yq eval '(.spec.template.spec.containers[] | select(.name=="operator") | .env[] | select(.name=="PGO_FEATURE_GATES") | .value) = "'${PGO_FEATURE_GATES}'"' - \ | kubectl -n "${OPERATOR_NS:-$NAMESPACE}" apply -f - } diff --git a/e2e-tests/run-pr.csv b/e2e-tests/run-pr.csv index 5253b4a97f..beaf200b1a 100644 --- a/e2e-tests/run-pr.csv +++ b/e2e-tests/run-pr.csv @@ -5,6 +5,7 @@ custom-extensions custom-tls database-init-sql demand-backup +demand-backup-offline-snapshot finalizers init-deploy huge-pages diff --git a/e2e-tests/run-release.csv b/e2e-tests/run-release.csv index 24f64d8906..1151e8b91b 100644 --- a/e2e-tests/run-release.csv +++ b/e2e-tests/run-release.csv @@ -5,6 +5,7 @@ custom-extensions custom-tls database-init-sql demand-backup +demand-backup-offline-snapshot finalizers init-deploy huge-pages diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/00-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/00-assert.yaml new file mode 100644 index 0000000000..ae5a062d84 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/00-assert.yaml @@ -0,0 +1,24 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 120 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: perconapgclusters.pgv2.percona.com +spec: + group: pgv2.percona.com + names: + kind: PerconaPGCluster + listKind: PerconaPGClusterList + plural: perconapgclusters + singular: perconapgcluster + scope: Namespaced +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +metadata: + name: check-operator-deploy-status +timeout: 120 +commands: + - script: kubectl assert exist-enhanced deployment percona-postgresql-operator -n ${OPERATOR_NS:-$NAMESPACE} --field-selector status.readyReplicas=1 diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/00-deploy-operator.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/00-deploy-operator.yaml new file mode 100644 index 0000000000..50c7842282 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/00-deploy-operator.yaml @@ -0,0 +1,21 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + init_temp_dir # do this only in the first TestStep + + PGO_FEATURE_GATES="BackupSnapshots=true" deploy_operator + deploy_client + deploy_s3_secrets +--- +apiVersion: snapshot.storage.k8s.io/v1 +kind: VolumeSnapshotClass +metadata: + name: gke-snapshot-class +driver: pd.csi.storage.gke.io +deletionPolicy: Delete \ No newline at end of file diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/01-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/01-assert.yaml new file mode 100644 index 0000000000..5dc595832c --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/01-assert.yaml @@ -0,0 +1,106 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 480 +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/instance-set: instance1 + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + name: backup-snapshot + controller: true + blockOwnerDeletion: true +status: + observedGeneration: 1 + replicas: 1 + readyReplicas: 1 + currentReplicas: 1 + updatedReplicas: 1 + collisionCount: 0 +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: backup-snapshot-pgbouncer + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/role: pgbouncer + annotations: + deployment.kubernetes.io/revision: '1' + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + kind: PostgresCluster + name: backup-snapshot + controller: true + blockOwnerDeletion: true +status: + observedGeneration: 1 + replicas: 3 + updatedReplicas: 3 + readyReplicas: 3 +--- +kind: Job +apiVersion: batch/v1 +metadata: + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/pgbackrest: '' + postgres-operator.crunchydata.com/pgbackrest-backup: replica-create + postgres-operator.crunchydata.com/pgbackrest-repo: repo1 + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGBackup + controller: true + blockOwnerDeletion: true +status: + succeeded: 1 +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: backup-snapshot + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGCluster + name: backup-snapshot + controller: true + blockOwnerDeletion: true + finalizers: + - postgres-operator.crunchydata.com/finalizer +status: + instances: + - name: instance1 + readyReplicas: 3 + replicas: 3 + updatedReplicas: 3 + observedGeneration: 1 + pgbackrest: + repos: + - name: repo1 + stanzaCreated: true + proxy: + pgBouncer: + readyReplicas: 3 + replicas: 3 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: backup-snapshot +status: + pgbouncer: + ready: 3 + size: 3 + postgres: + instances: + - name: instance1 + ready: 3 + size: 3 + ready: 3 + size: 3 + state: ready diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/01-create-cluster.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/01-create-cluster.yaml new file mode 100644 index 0000000000..157d0b2a2e --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/01-create-cluster.yaml @@ -0,0 +1,14 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 10 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + get_cr "backup-snapshot" ${RANDOM} \ + | yq '.spec.backups.volumeSnapshots.className="gke-snapshot-class"' \ + | yq '.spec.backups.volumeSnapshots.mode="offline"' \ + | kubectl -n "${NAMESPACE}" apply -f - diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/02-write-data.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/02-write-data.yaml new file mode 100644 index 0000000000..14ba9bf5e1 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/02-write-data.yaml @@ -0,0 +1,17 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 60 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_psql_local \ + 'CREATE DATABASE myapp; \c myapp \\\ CREATE TABLE IF NOT EXISTS myApp (id int PRIMARY KEY);' \ + "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)" + + run_psql_local \ + '\c myapp \\\ INSERT INTO myApp (id) VALUES (100500)' \ + "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)" diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/03-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/03-assert.yaml new file mode 100644 index 0000000000..6848a5b79a --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/03-assert.yaml @@ -0,0 +1,10 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 30 +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: 03-read-from-primary +data: + data: ' 100500' diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/03-read-from-primary.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/03-read-from-primary.yaml new file mode 100644 index 0000000000..a58c96e1c3 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/03-read-from-primary.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +timeout: 30 +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + data=$(run_psql_local '\c myapp \\\ SELECT * from myApp;' "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)") + + kubectl create configmap -n "${NAMESPACE}" 03-read-from-primary --from-literal=data="${data}" diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml new file mode 100644 index 0000000000..3a662811e5 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/04-assert.yaml @@ -0,0 +1,28 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 560 +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGBackup +metadata: + name: backup1 +spec: + pgCluster: backup-snapshot + method: volumeSnapshot +status: + state: Succeeded + snapshot: + dataVolumeSnapshotRef: backup1-pgdata +--- +kind: VolumeSnapshot +apiVersion: snapshot.storage.k8s.io/v1 +metadata: + name: backup1-pgdata + ownerReferences: + - apiVersion: pgv2.percona.com/v2 + kind: PerconaPGBackup + name: backup1 + controller: true + blockOwnerDeletion: true +status: + readyToUse: true \ No newline at end of file diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/04-create-backup-snapshot.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/04-create-backup-snapshot.yaml new file mode 100644 index 0000000000..ab38603357 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/04-create-backup-snapshot.yaml @@ -0,0 +1,7 @@ +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGBackup +metadata: + name: backup1 +spec: + pgCluster: backup-snapshot + method: volumeSnapshot diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml new file mode 100644 index 0000000000..ec2c619a09 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/05-assert.yaml @@ -0,0 +1,41 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + pgv2.percona.com/snapshot-restore: restore1 + labels: + postgres-operator.crunchydata.com/cluster: backup-snapshot + postgres-operator.crunchydata.com/role: pgdata +spec: + dataSource: + apiGroup: snapshot.storage.k8s.io + kind: VolumeSnapshot + name: backup1-pgdata +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGRestore +metadata: + name: restore1 +status: + state: Succeeded +--- +apiVersion: pgv2.percona.com/v2 +kind: PerconaPGCluster +metadata: + name: backup-snapshot +status: + pgbouncer: + ready: 3 + size: 3 + postgres: + instances: + - name: instance1 + ready: 3 + size: 3 + ready: 3 + size: 3 + state: ready \ No newline at end of file diff --git a/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml b/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml new file mode 100644 index 0000000000..9b537d9f39 --- /dev/null +++ b/e2e-tests/tests/demand-backup-offline-snapshot/05-create-restore.yaml @@ -0,0 +1,23 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: |- + set -o errexit + set -o xtrace + + source ../../functions + + run_psql_local \ + '\c myapp \\\ TRUNCATE TABLE myApp' \ + "postgres:$(get_psql_user_pass backup-snapshot-pguser-postgres)@$(get_psql_user_host backup-snapshot-pguser-postgres)" + + cat < /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) }) @@ -113,7 +113,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) @@ -169,7 +169,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p" --repo=99`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p" --repo=99`, "track_commit_timestamp": "true", }) @@ -190,7 +190,7 @@ func TestPostgreSQLParameters(t *testing.T) { `grep -E "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}(Z|[\+\-][0-9]{2})$"); `, "if [ ! -z ${timestamp} ]; then echo ${timestamp} > /pgdata/latest_commit_timestamp.txt; fi", }, ""), - "restore_command": `pgbackrest --stanza=db archive-get %f "%p"`, + "restore_command": `/opt/crunchy/bin/restore-command-wrapper.sh pgbackrest --stanza=db archive-get %f "%p"`, "track_commit_timestamp": "true", }) }) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 2ef563ac6e..47db0cb35d 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -2,10 +2,12 @@ package pgbackup import ( "context" + "fmt" "path" "slices" "time" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -14,6 +16,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -23,10 +26,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/clientcmd" "github.com/percona/percona-postgresql-operator/v2/percona/controller" + "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgbackup/snapshots" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" "github.com/percona/percona-postgresql-operator/v2/percona/pgbackrest" "github.com/percona/percona-postgresql-operator/v2/percona/watcher" @@ -43,16 +48,25 @@ var ErrBackupJobNotFound = errors.New("backup Job not found") // Reconciler holds resources for the PerconaPGBackup reconciler type PGBackupReconciler struct { - Client client.Client + Client client.Client + PodExec runtime.PodExecutor ExternalChan chan event.GenericEvent } // SetupWithManager adds the PerconaPGBackup controller to the provided runtime manager func (r *PGBackupReconciler) SetupWithManager(mgr manager.Manager) error { + if r.PodExec == nil { + var err error + r.PodExec, err = runtime.NewPodExecutor(mgr.GetConfig()) + if err != nil { + return err + } + } return (builder.ControllerManagedBy(mgr). For(&v2.PerconaPGBackup{}). WatchesRawSource(source.Channel(r.ExternalChan, &handler.EnqueueRequestForObject{})). + Owns(&volumesnapshotv1.VolumeSnapshot{}). Complete(r)) } @@ -80,6 +94,18 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re pgBackup.Default() + pgCluster := new(v2.PerconaPGCluster) + if err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: request.Namespace}, pgCluster); err != nil { + if !k8serrors.IsNotFound(err) { + return reconcile.Result{}, errors.Wrap(err, "get PerconaPGCluster") + } + pgCluster = nil + } + + if *pgBackup.Spec.Method == v2.BackupMethodVolumeSnapshot { + return snapshots.Reconcile(ctx, r.Client, r.PodExec, pgBackup, pgCluster) + } + if !pgBackup.DeletionTimestamp.IsZero() || pgBackup.Status.State == v2.BackupFailed { if _, err := runFinalizers(ctx, r.Client, pgBackup); err != nil { return reconcile.Result{}, errors.Wrap(err, "failed to run finalizers") @@ -93,12 +119,14 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re } } - pgCluster := new(v2.PerconaPGCluster) - if err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Spec.PGCluster, Namespace: request.Namespace}, pgCluster); err != nil { - if !k8serrors.IsNotFound(err) { - return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") + if ptr.Deref(pgBackup.Spec.RepoName, "") == "" { + if updErr := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = "repoName is required when method is 'pgbackrest'" + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) } - pgCluster = nil + return reconcile.Result{}, errors.New("'repoName' is required when method is 'pgbackrest'") } switch pgBackup.Status.State { @@ -152,10 +180,10 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re repo := getRepo(pgCluster, pgBackup) if repo == nil { - return reconcile.Result{}, errors.Errorf("%s repo not defined", pgBackup.Spec.RepoName) + return reconcile.Result{}, errors.Errorf("'%s' repo not defined", ptr.Deref(pgBackup.Spec.RepoName, "")) } - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.Destination = getDestination(pgCluster, pgBackup) bcp.Status.Image = pgCluster.Spec.Backups.PGBackRest.Image bcp.Status.Repo = repo @@ -219,7 +247,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re return reconcile.Result{}, errors.Wrap(err, "update PGBackup") } - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupRunning bcp.Status.JobName = job.Name }); err != nil { @@ -233,7 +261,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re if err != nil { // If something has deleted the job even with the finalizer, we should fail the backup. if k8serrors.IsNotFound(err) { - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed }); err != nil { return reconcile.Result{}, errors.Wrap(err, "update PGBackup status") @@ -265,7 +293,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re return reconcile.Result{RequeueAfter: time.Second * 5}, nil } - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.CompletedAt = job.Status.CompletionTime bcp.Status.State = status }); err != nil { @@ -301,7 +329,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re if err == nil { log.Info("Got latest restorable timestamp", "timestamp", latestRestorableTime) - if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, r.Client, func(bcp *v2.PerconaPGBackup) { bcp.Status.LatestRestorableTime.Time = latestRestorableTime }); err != nil { return reconcile.Result{}, errors.Wrap(err, "update PGBackup status") @@ -411,7 +439,7 @@ func getBackupInProgress(ctx context.Context, c client.Client, clusterName, ns s } func getRepo(pg *v2.PerconaPGCluster, pb *v2.PerconaPGBackup) *v1beta1.PGBackRestRepo { - repoName := pb.Spec.RepoName + repoName := ptr.Deref(pb.Spec.RepoName, "") for i, r := range pg.Spec.Backups.PGBackRest.Repos { if repoName == r.Name { return &pg.Spec.Backups.PGBackRest.Repos[i] @@ -444,7 +472,7 @@ func getDestination(pg *v2.PerconaPGCluster, pb *v2.PerconaPGBackup) string { } func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod, pgBackup *v2.PerconaPGBackup) error { - info, err := pgbackrest.GetInfo(ctx, pod, pgBackup.Spec.RepoName) + info, err := pgbackrest.GetInfo(ctx, pod, ptr.Deref(pgBackup.Spec.RepoName, "")) if err != nil { return errors.Wrap(err, "get pgBackRest info") } @@ -473,7 +501,7 @@ func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod, stanzaName = info.Name if pgBackup.Status.BackupName == "" { - if err := updateStatus(ctx, c, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, c, func(bcp *v2.PerconaPGBackup) { bcp.Status.BackupName = backup.Label bcp.Status.BackupType = backup.Type }); err != nil { @@ -481,7 +509,7 @@ func updatePGBackrestInfo(ctx context.Context, c client.Client, pod *corev1.Pod, } } - if err := pgbackrest.SetAnnotationsToBackup(ctx, pod, stanzaName, backup.Label, pgBackup.Spec.RepoName, map[string]string{ + if err := pgbackrest.SetAnnotationsToBackup(ctx, pod, stanzaName, backup.Label, ptr.Deref(pgBackup.Spec.RepoName, ""), map[string]string{ v2.PGBackrestAnnotationJobName: pgBackup.Status.JobName, }); err != nil { return errors.Wrap(err, "set annotations to backup") @@ -636,7 +664,7 @@ func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) e pg.Spec.Backups.PGBackRest.Manual = new(v1beta1.PGBackRestManualBackup) } - pg.Spec.Backups.PGBackRest.Manual.RepoName = pb.Spec.RepoName + pg.Spec.Backups.PGBackRest.Manual.RepoName = ptr.Deref(pb.Spec.RepoName, "") pg.Spec.Backups.PGBackRest.Manual.Options = pb.Spec.Options return c.Update(ctx, pg) @@ -664,7 +692,7 @@ func findBackupJob(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) err := c.List(ctx, jobList, client.InNamespace(pb.Namespace), client.MatchingLabelsSelector{ - Selector: naming.PGBackRestBackupJobSelector(pb.Spec.PGCluster, pb.Spec.RepoName, naming.BackupManual), + Selector: naming.PGBackRestBackupJobSelector(pb.Spec.PGCluster, ptr.Deref(pb.Spec.RepoName, ""), naming.BackupManual), }) if err != nil { return nil, errors.Wrap(err, "get backup jobs") @@ -722,23 +750,10 @@ func failIfClusterIsNotReady(ctx context.Context, cl client.Client, pgCluster *v log.Info("Cluster is not ready for backup for too long. Setting it's state to Failed") - if err := updateStatus(ctx, cl, pgBackup, func(bcp *v2.PerconaPGBackup) { + if err := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { bcp.Status.State = v2.BackupFailed }); err != nil { return errors.Wrap(err, "update PGBackup status") } return nil } - -func updateStatus(ctx context.Context, cl client.Client, pgBackup *v2.PerconaPGBackup, updateFunc func(bcp *v2.PerconaPGBackup)) error { - return retry.RetryOnConflict(retry.DefaultBackoff, func() error { - bcp := new(v2.PerconaPGBackup) - if err := cl.Get(ctx, client.ObjectKeyFromObject(pgBackup), bcp); err != nil { - return errors.Wrap(err, "get PGBackup") - } - - updateFunc(bcp) - - return cl.Status().Update(ctx, bcp) - }) -} diff --git a/percona/controller/pgbackup/snapshots/offline.go b/percona/controller/pgbackup/snapshots/offline.go new file mode 100644 index 0000000000..0bc9af76e7 --- /dev/null +++ b/percona/controller/pgbackup/snapshots/offline.go @@ -0,0 +1,170 @@ +package snapshots + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + "github.com/percona/percona-postgresql-operator/v2/internal/postgres" + perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" +) + +const ( + waitTimeout = 5 * time.Minute + retryInterval = 3 * time.Second +) + +type offlineExec struct { + cl client.Client + cluster *v2.PerconaPGCluster + backup *v2.PerconaPGBackup + podExec runtime.PodExecutor + offlineConfig *v2.OfflineSnapshotConfig +} + +func newSnapshotConfig(pgCluster *v2.PerconaPGCluster) *v2.OfflineSnapshotConfig { + defaultConfig := v2.DefaultOfflineSnapshotConfig() + if pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig == nil { + return defaultConfig + } + + config := pgCluster.Spec.Backups.VolumeSnapshots.OfflineConfig.DeepCopy() + if config.Checkpoint == nil { + config.Checkpoint = defaultConfig.Checkpoint + } + return config +} + +func newOfflineExec(cl client.Client, podExec runtime.PodExecutor, pgCluster *v2.PerconaPGCluster, pgBackup *v2.PerconaPGBackup) *offlineExec { + return &offlineExec{ + cl: cl, + cluster: pgCluster, + backup: pgBackup, + podExec: podExec, + offlineConfig: newSnapshotConfig(pgCluster), + } +} + +func (e *offlineExec) prepare(ctx context.Context) (string, error) { + targetInstance, err := e.getBackupTarget(ctx) + if err != nil { + return "", errors.Wrap(err, "failed to get backup target pod") + } + + if err := e.checkpoint(ctx, targetInstance); err != nil { + return "", errors.Wrap(err, "failed to checkpoint instance") + } + + if err := e.suspendInstance(ctx, targetInstance); err != nil { + return "", errors.Wrap(err, "failed to suspend instance") + } + return targetInstance, nil +} + +func (e *offlineExec) checkpoint(ctx context.Context, instanceName string) error { + log := logging.FromContext(ctx) + defaults := v2.DefaultOfflineSnapshotConfig().Checkpoint + + skip := !ptr.Deref(e.offlineConfig.Checkpoint.Enabled, *defaults.Enabled) + if skip { + log.Info("Skipping checkpoint") + return nil + } + + exec := func(_ context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string) error { + return e.podExec(ctx, e.cluster.GetNamespace(), instanceName+"-0", naming.ContainerDatabase, stdin, stdout, stderr, command...) + } + + timeoutSeconds := ptr.Deref(e.offlineConfig.Checkpoint.TimeoutSeconds, *defaults.TimeoutSeconds) + stdout, stderr, err := postgres.Executor(exec). + ExecInDatabasesFromQuery(ctx, `SELECT pg_catalog.current_database()`, + `SET statement_timeout = :'timeout'; CHECKPOINT;`, + map[string]string{ + "timeout": fmt.Sprintf("%ds", timeoutSeconds), + "ON_ERROR_STOP": "on", // Abort when any one statement fails. + "QUIET": "on", // Do not print successful statements to stdout. + }) + if err != nil { + return errors.Wrap(err, "failed to execute checkpoint") + } + + if stderr != "" { + return fmt.Errorf("checkpoint failed: %s", stderr) + } + + log.Info("checkpoint executed", "stdout", stdout, "stderr", stderr) + return nil +} + +func (e *offlineExec) suspendInstance(ctx context.Context, instanceName string) error { + // Suspend and wait + instanceKey := client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName} + if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, true, func(ctx context.Context) (bool, error) { + return perconaPG.SuspendInstance(ctx, e.cl, instanceKey) + }); err != nil { + return errors.Wrap(err, "failed to wait for suspension") + } + return nil +} + +func (e *offlineExec) resumeInstance(ctx context.Context, instanceName string) error { + // unsuspend and wait + instanceKey := client.ObjectKey{Namespace: e.cluster.GetNamespace(), Name: instanceName} + if err := wait.PollUntilContextTimeout(ctx, retryInterval, waitTimeout, true, func(ctx context.Context) (bool, error) { + return perconaPG.UnsuspendInstance(ctx, e.cl, instanceKey) + }); err != nil { + return errors.Wrap(err, "failed to wait for unsuspend") + } + return nil +} + +func (e *offlineExec) finalize(ctx context.Context) error { + targetInstance, err := e.getBackupTarget(ctx) + if err != nil { + return errors.Wrap(err, "failed to get backup target") + } + + if err := e.resumeInstance(ctx, targetInstance); err != nil { + return errors.Wrap(err, "failed to resume instance") + } + return nil +} + +func (e *offlineExec) getBackupTarget(ctx context.Context) (string, error) { + // If we already determined it before, use it. + if name, ok := e.backup.GetAnnotations()[annotationBackupTarget]; ok && name != "" { + return name, nil + } + + log := logging.FromContext(ctx) + + // TODO: single node clusters do not have replicas. + // We should allow using a primary pod as the backup target. + // Since this is unsafe, we should let the user explicitly opt-in for this behavior. + replicas, err := perconaPG.GetReplicaPods(ctx, e.cl, e.cluster) + if err != nil { + return "", errors.Wrap(err, "failed to get replica pods") + } + if len(replicas) == 0 { + return "", errors.New("no replica pods found") + } + + targetPod := replicas[0] + instanceName := targetPod.GetLabels()[naming.LabelInstance] + if instanceName == "" { + return "", errors.New("cannot determine instance name from pod labels") + } + + log.Info("Selected backup target", "instance", instanceName) + return instanceName, nil +} diff --git a/percona/controller/pgbackup/snapshots/reconcile.go b/percona/controller/pgbackup/snapshots/reconcile.go new file mode 100644 index 0000000000..1a95f93492 --- /dev/null +++ b/percona/controller/pgbackup/snapshots/reconcile.go @@ -0,0 +1,494 @@ +package snapshots + +import ( + "context" + "fmt" + "time" + + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" + "github.com/percona/percona-postgresql-operator/v2/internal/feature" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" +) + +const ( + annotationBackupTarget = pNaming.PrefixPerconaPGV2 + "backup-target" + + defaultSnapshotErrorDeadline = 5 * time.Minute +) + +type snapshotExecutor interface { + // Prepare the cluster for performing a snapshot. + // Returns the name of the instance whose PVCs will be snapshotted. + prepare(ctx context.Context) (string, error) + // Complete the snapshot. + finalize(ctx context.Context) error +} + +type snapshotReconciler struct { + cl client.Client + log logging.Logger + cluster *v2.PerconaPGCluster + backup *v2.PerconaPGBackup + exec snapshotExecutor +} + +func newSnapshotReconciler( + cl client.Client, + log logging.Logger, + cluster *v2.PerconaPGCluster, + backup *v2.PerconaPGBackup, + exec snapshotExecutor, +) *snapshotReconciler { + return &snapshotReconciler{ + cl: cl, + log: log, + cluster: cluster, + backup: backup, + exec: exec, + } +} + +func newSnapshotExec( + cl client.Client, + podExec runtime.PodExecutor, + cluster *v2.PerconaPGCluster, + backup *v2.PerconaPGBackup, +) (snapshotExecutor, error) { + switch mode := cluster.Spec.Backups.VolumeSnapshots.Mode; mode { + case v2.VolumeSnapshotModeOffline: + return newOfflineExec(cl, podExec, cluster, backup), nil + default: + return nil, fmt.Errorf("invalid or unsupported volume snapshot mode: %s", mode) + } +} + +// Reconcile backup snapshot +func Reconcile( + ctx context.Context, + cl client.Client, + podExec runtime.PodExecutor, + pgBackup *v2.PerconaPGBackup, + pgCluster *v2.PerconaPGCluster, +) (reconcile.Result, error) { + if pgBackup == nil || pgCluster == nil { + return reconcile.Result{}, errors.New("PerconaPGBackup or PerconaPGCluster is nil or not found") + } + + log := logging.FromContext(ctx). + WithName("SnapshotReconciler"). + WithValues("backup", pgBackup.Name, "cluster", pgCluster.Name) + + // Do nothing if the feature is not enabled. + if !feature.Enabled(ctx, feature.BackupSnapshots) { + log.Info(fmt.Sprintf("Feature gate '%s' is not enabled, skipping snapshot reconciliation", feature.BackupSnapshots)) + return reconcile.Result{}, nil + } + + // Check if volume snapshots are enabled for this cluster. + if !pgCluster.Spec.Backups.IsVolumeSnapshotsEnabled() { + if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = "Volume snapshots are not enabled for this cluster" + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + return reconcile.Result{}, nil + } + + exec, err := newSnapshotExec(cl, podExec, pgCluster, pgBackup) + if err != nil { + stsErr := fmt.Errorf("invalid or unsupported volume snapshot mode: %s", pgCluster.Spec.Backups.VolumeSnapshots.Mode) + if updErr := pgBackup.UpdateStatus(ctx, cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + bcp.Status.Error = stsErr.Error() + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + return reconcile.Result{}, stsErr + } + + r := newSnapshotReconciler(cl, log, pgCluster, pgBackup, exec) + return r.reconcile(ctx) +} + +func (r *snapshotReconciler) reconcile(ctx context.Context) (reconcile.Result, error) { + if !r.backup.GetDeletionTimestamp().IsZero() { + return reconcile.Result{}, r.complete(ctx) + } + + switch r.backup.Status.State { + case v2.BackupNew: + return r.reconcileNew(ctx) + case v2.BackupStarting: + return r.reconcileStarting(ctx) + case v2.BackupRunning: + return r.reconcileRunning(ctx) + case v2.BackupFailed, v2.BackupSucceeded: + return reconcile.Result{}, r.complete(ctx) + } + return reconcile.Result{}, nil +} + +func (r *snapshotReconciler) reconcileNew(ctx context.Context) (reconcile.Result, error) { + if r.cluster.Status.State != v2.AppStateReady { + r.log.Info("Waiting for cluster to be ready before creating snapshot") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupStarting + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + r.log.Info("Snapshot is starting") + return reconcile.Result{}, nil +} + +func (r *snapshotReconciler) reconcileStarting(ctx context.Context) (reconcile.Result, error) { + if err := r.prepare(ctx); err != nil { + return reconcile.Result{}, err + } + + if updErr := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupRunning + bcp.Status.Snapshot = &v2.SnapshotStatus{} + }); updErr != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", updErr) + } + r.log.Info("Snapshot is running") + return reconcile.Result{}, nil +} + +// +kubebuilder:rbac:groups=snapshot.storage.k8s.io,resources=volumesnapshots,verbs=get;list;watch;create +func (r *snapshotReconciler) reconcileRunning(ctx context.Context) (reconcile.Result, error) { + dataPVC, walPVC, tablespacePVCs, err := r.getTargetPVCs(ctx) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to get target PVCs: %w", err) + } + + dataOk, err := r.reconcileDataSnapshot(ctx, dataPVC) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to reconcile data snapshot: %w", err) + } + + walOk, err := r.reconcileWALSnapshot(ctx, walPVC) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to reconcile WAL snapshot: %w", err) + } + + tablespaceOk, err := r.reconcileTablespaceSnapshot(ctx, tablespacePVCs) + if err != nil { + return reconcile.Result{}, fmt.Errorf("failed to reconcile tablespace snapshot: %w", err) + } + + if !dataOk || !walOk || !tablespaceOk { + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + if err := r.complete(ctx); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to complete snapshot: %w", err) + } + + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupSucceeded + bcp.Status.CompletedAt = ptr.To(metav1.Now()) + }); err != nil { + return reconcile.Result{}, fmt.Errorf("failed to update backup status: %w", err) + } + return reconcile.Result{}, nil +} + +func (r *snapshotReconciler) reconcileSnapshot(ctx context.Context, volumeSnapshot *volumesnapshotv1.VolumeSnapshot) (bool, error) { + created, err := r.ensureSnapshot(ctx, volumeSnapshot) + if err != nil { + return false, fmt.Errorf("failed to ensure snapshot: %w", err) + } + + log := r.log.WithValues("snapshot", volumeSnapshot.GetName()) + if created { + log.Info("Volume snapshot created successfully") + return false, nil // return back later to observe the status + } + + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(volumeSnapshot), volumeSnapshot); err != nil { + return false, fmt.Errorf("failed to get volume snapshot: %w", err) + } + + switch { + // no status reported + case volumeSnapshot.Status == nil: + return false, nil + + // snapshot is complete and ready to be restored. + case ptr.Deref(volumeSnapshot.Status.ReadyToUse, false): + log.Info("Snapshot is complete and ready to be used") + return true, nil + + // error occurred while creating the snapshot. + case volumeSnapshot.Status.Error != nil: + // Some errors can be transient, so we should wait for a while before giving up. + message := ptr.Deref(volumeSnapshot.Status.Error.Message, "") + if !shouldFailSnapshot(volumeSnapshot) { + r.log.Info("Snapshot is in error state, but within deadline. Retrying.", "message", message) + return false, nil + } + + err := errors.New(message) + + log.Error(err, "Volume snapshot failed") + return false, err + + default: + return false, nil + } +} + +func (r *snapshotReconciler) generateSnapshotIntent( + snapshotRole, + sourcePVC string) (*volumesnapshotv1.VolumeSnapshot, error) { + name := r.backup.GetName() + "-" + snapshotRole + namespace := r.backup.GetNamespace() + volumeSnapshot := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(r.cluster.Spec.Backups.VolumeSnapshots.ClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: &sourcePVC, + }, + }, + } + if err := controllerutil.SetControllerReference(r.backup, volumeSnapshot, r.cl.Scheme()); err != nil { + return nil, fmt.Errorf("failed to set owner reference on volume snapshot: %w", err) + } + return volumeSnapshot, nil +} + +func (r *snapshotReconciler) reconcileDataSnapshot(ctx context.Context, targetPVC string) (bool, error) { + volumeSnapshot, err := r.generateSnapshotIntent(naming.RolePostgresData, targetPVC) + if err != nil { + return false, fmt.Errorf("failed to generate snapshot intent: %w", err) + } + + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) + if err != nil { + return false, fmt.Errorf("failed to reconcile snapshot: %w", err) + } + + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.Snapshot.DataVolumeSnapshotRef = ptr.To(volumeSnapshot.GetName()) + }); err != nil { + return false, fmt.Errorf("failed to update backup status: %w", err) + } + return ok, nil +} + +func (r *snapshotReconciler) reconcileWALSnapshot(ctx context.Context, targetPVC string) (bool, error) { + if targetPVC == "" { + return true, nil + } + + volumeSnapshot, err := r.generateSnapshotIntent(naming.RolePostgresWAL, targetPVC) + if err != nil { + return false, fmt.Errorf("failed to generate snapshot intent: %w", err) + } + + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) + if err != nil { + return false, fmt.Errorf("failed to reconcile snapshot: %w", err) + } + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + bcp.Status.Snapshot.WALVolumeSnapshotRef = ptr.To(volumeSnapshot.GetName()) + }); err != nil { + return false, fmt.Errorf("failed to update backup status: %w", err) + } + return ok, nil +} + +func (r *snapshotReconciler) reconcileTablespaceSnapshot(ctx context.Context, targetPVCs map[string]string) (bool, error) { + if len(targetPVCs) == 0 { + return true, nil + } + + done := true + for tsName, targetPVC := range targetPVCs { + role := tsName + "-" + naming.RoleTablespace + volumeSnapshot, err := r.generateSnapshotIntent(role, targetPVC) + if err != nil { + return false, fmt.Errorf("failed to generate snapshot intent: %w", err) + } + + ok, err := r.reconcileSnapshot(ctx, volumeSnapshot) + if err != nil { + return false, fmt.Errorf("failed to reconcile snapshot: %w", err) + } + + if err := r.backup.UpdateStatus(ctx, r.cl, func(bcp *v2.PerconaPGBackup) { + if bcp.Status.Snapshot.TablespaceVolumeSnapshotRefs == nil { + bcp.Status.Snapshot.TablespaceVolumeSnapshotRefs = make(map[string]string) + } + bcp.Status.Snapshot.TablespaceVolumeSnapshotRefs[tsName] = volumeSnapshot.GetName() + }); err != nil { + return false, fmt.Errorf("failed to update backup status: %w", err) + } + if !ok { + done = false + } + } + return done, nil +} + +func shouldFailSnapshot(volumeSnapshot *volumesnapshotv1.VolumeSnapshot) bool { + if volumeSnapshot.Status == nil || volumeSnapshot.Status.Error == nil || volumeSnapshot.Status.Error.Time.IsZero() { + return false + } + errAt := volumeSnapshot.Status.Error.Time + return !errAt.IsZero() && time.Now().After(errAt.Add(defaultSnapshotErrorDeadline)) +} + +func (r *snapshotReconciler) ensureSnapshot(ctx context.Context, volumeSnapshot *volumesnapshotv1.VolumeSnapshot) (bool, error) { + if err := r.cl.Create(ctx, volumeSnapshot); err != nil { + return false, client.IgnoreAlreadyExists(err) + } + return true, nil +} + +func (r *snapshotReconciler) getTargetPVCs(ctx context.Context) (string, string, map[string]string, error) { + targetInstance := r.backup.GetAnnotations()[annotationBackupTarget] + if targetInstance == "" { + return "", "", nil, fmt.Errorf("backup target instance is not found") + } + + dataPVC := "" + var dataVolumes corev1.PersistentVolumeClaimList + if err := r.cl.List(ctx, &dataVolumes, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: targetInstance, + naming.LabelRole: naming.RolePostgresData, + }), + }); err != nil { + return "", "", nil, fmt.Errorf("failed to list data volumes: %w", err) + } + if len(dataVolumes.Items) == 1 { + dataPVC = dataVolumes.Items[0].GetName() + } else { + return "", "", nil, fmt.Errorf("unexpected number of data volumes: %d", len(dataVolumes.Items)) + } + + walPVC := "" + var walVolumes corev1.PersistentVolumeClaimList + if err := r.cl.List(ctx, &walVolumes, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: targetInstance, + naming.LabelRole: naming.RolePostgresWAL, + }), + }); err != nil { + return "", "", nil, fmt.Errorf("failed to list WAL volumes: %w", err) + } + if len(walVolumes.Items) == 1 { + walPVC = walVolumes.Items[0].GetName() + } + + tablespacePVCs := make(map[string]string) + var tablespaceVolumes corev1.PersistentVolumeClaimList + if err := r.cl.List(ctx, &tablespaceVolumes, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelInstance: targetInstance, + naming.LabelRole: naming.RoleTablespace, + }), + }); err != nil { + return "", "", nil, fmt.Errorf("failed to list tablespace volumes: %w", err) + } + + for _, vol := range tablespaceVolumes.Items { + name := vol.GetLabels()[naming.LabelData] + tablespacePVCs[name] = vol.GetName() + } + + return dataPVC, walPVC, tablespacePVCs, nil +} + +func (r *snapshotReconciler) prepare(ctx context.Context) error { + // finalizer already present, prepare already completed + if controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerSnapshotInProgress) { + return nil + } + + // prepare the cluster + targetInstance, err := r.exec.prepare(ctx) + if err != nil { + return fmt.Errorf("failed to prepare for snapshot: %w", err) + } + + // Store the backup target instance for later retrieval. + orig := r.backup.DeepCopy() + annotations := r.backup.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + annotations[annotationBackupTarget] = targetInstance + r.backup.SetAnnotations(annotations) + if err := r.cl.Patch(ctx, r.backup.DeepCopy(), client.MergeFrom(orig)); err != nil { + return fmt.Errorf("failed to patch backup annotations: %w", err) + } + + // add finalizer + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + bcp := r.backup.DeepCopy() + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(bcp), bcp); err != nil { + return err + } + orig := bcp.DeepCopy() + controllerutil.AddFinalizer(bcp, pNaming.FinalizerSnapshotInProgress) + return r.cl.Patch(ctx, bcp, client.MergeFrom(orig)) + }); err != nil { + return fmt.Errorf("failed to add backup finalizer: %w", err) + } + r.log.Info("Prepared for snapshot") + return nil +} + +func (r *snapshotReconciler) complete(ctx context.Context) error { + // already finalized + if !controllerutil.ContainsFinalizer(r.backup, pNaming.FinalizerSnapshotInProgress) { + return nil + } + + // run finalize + if err := r.exec.finalize(ctx); err != nil { + return fmt.Errorf("finalize failed: %w", err) + } + + // remove finalizer + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + bcp := r.backup.DeepCopy() + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(bcp), bcp); err != nil { + return err + } + orig := bcp.DeepCopy() + controllerutil.RemoveFinalizer(bcp, pNaming.FinalizerSnapshotInProgress) + return r.cl.Patch(ctx, bcp, client.MergeFrom(orig)) + }); err != nil { + return fmt.Errorf("failed to remove finalizer: %w", err) + } + return nil +} diff --git a/percona/controller/pgbackup/snapshots/reconcile_test.go b/percona/controller/pgbackup/snapshots/reconcile_test.go new file mode 100644 index 0000000000..fd6ce033ec --- /dev/null +++ b/percona/controller/pgbackup/snapshots/reconcile_test.go @@ -0,0 +1,493 @@ +package snapshots + +import ( + "context" + "testing" + "time" + + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" +) + +func TestShouldFailSnapshot(t *testing.T) { + now := time.Now() + + tests := []struct { + name string + volumeSnapshot *volumesnapshotv1.VolumeSnapshot + wantFail bool + }{ + { + name: "Status.Error.Time is zero", + volumeSnapshot: &volumesnapshotv1.VolumeSnapshot{ + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + Error: &volumesnapshotv1.VolumeSnapshotError{ + Time: ptr.To(metav1.Time{}), + }, + }, + }, + wantFail: false, + }, + { + name: "error within deadline", + volumeSnapshot: &volumesnapshotv1.VolumeSnapshot{ + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + Error: &volumesnapshotv1.VolumeSnapshotError{ + Time: ptr.To(metav1.NewTime(now.Add(-1 * time.Minute))), // 1mins ago, within deadline + }, + }, + }, + wantFail: false, + }, + { + name: "error past deadline", + volumeSnapshot: &volumesnapshotv1.VolumeSnapshot{ + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + Error: &volumesnapshotv1.VolumeSnapshotError{ + Time: ptr.To(metav1.NewTime(now.Add(-10 * time.Minute))), // 10 minutes ago (past 5min deadline) + }, + }, + }, + wantFail: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.wantFail, shouldFailSnapshot(tt.volumeSnapshot)) + }) + } +} + +func TestReconcileDataSnapshot(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + pvcName := "data-pvc" + snapshotClassName := "test-snapshotclass" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{}, + }, + } + + noopExec := &mockSnapshotExecutor{} + + t.Run("creates VolumeSnapshot and updates backup status", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileDataSnapshot(ctx, pvcName) + require.NoError(t, err) + assert.False(t, ok, "snapshot not ready yet") + + vsName := backupName + "-" + naming.RolePostgresData + vs := &volumesnapshotv1.VolumeSnapshot{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: vsName}, vs)) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, pvcName, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + + updated := &v2.PerconaPGBackup{} + require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) + require.NotNil(t, updated.Status.Snapshot) + assert.Equal(t, vsName, *updated.Status.Snapshot.DataVolumeSnapshotRef) + }) + + t.Run("returns true when existing VolumeSnapshot is ReadyToUse", func(t *testing.T) { + vsName := backupName + "-" + naming.RolePostgresData + existingVS := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vsName, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(pvcName), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + ReadyToUse: ptr.To(true), + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster, existingVS). + WithStatusSubresource(backup, existingVS). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileDataSnapshot(ctx, pvcName) + require.NoError(t, err) + assert.True(t, ok, "snapshot ready") + }) +} + +func TestReconcileWALSnapshot(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + walPVCName := "wal-pvc" + snapshotClassName := "test-snapshotclass" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + noopExec := &mockSnapshotExecutor{} + + t.Run("returns true when target PVC is empty", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{}, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileWALSnapshot(ctx, "") + require.NoError(t, err) + assert.True(t, ok, "no WAL volume to snapshot") + }) + + t.Run("creates VolumeSnapshot and updates backup status", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{}, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileWALSnapshot(ctx, walPVCName) + require.NoError(t, err) + assert.False(t, ok, "snapshot not ready yet") + + vsName := backupName + "-" + naming.RolePostgresWAL + vs := &volumesnapshotv1.VolumeSnapshot{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: vsName}, vs)) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, walPVCName, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + + updated := &v2.PerconaPGBackup{} + require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) + require.NotNil(t, updated.Status.Snapshot) + assert.Equal(t, vsName, *updated.Status.Snapshot.WALVolumeSnapshotRef) + }) + + t.Run("returns true when existing VolumeSnapshot is ReadyToUse", func(t *testing.T) { + vsName := backupName + "-" + naming.RolePostgresWAL + existingVS := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vsName, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(walPVCName), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ + ReadyToUse: ptr.To(true), + }, + } + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{}, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster, existingVS). + WithStatusSubresource(backup, existingVS). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileWALSnapshot(ctx, walPVCName) + require.NoError(t, err) + assert.True(t, ok, "snapshot ready") + }) +} + +func TestReconcileTablespaceSnapshot(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + snapshotClassName := "test-snapshotclass" + ts1Name, ts2Name := "ts1", "ts2" + ts1PVC, ts2PVC := "pvc-ts1", "pvc-ts2" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + noopExec := &mockSnapshotExecutor{} + + t.Run("returns true when TablespaceVolumes is empty", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{}, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileTablespaceSnapshot(ctx, nil) + require.NoError(t, err) + assert.True(t, ok, "no tablespace volumes to snapshot") + }) + + t.Run("creates VolumeSnapshots and updates backup status", func(t *testing.T) { + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{}, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster). + WithStatusSubresource(backup). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileTablespaceSnapshot(ctx, map[string]string{ + ts1Name: ts1PVC, + ts2Name: ts2PVC, + }) + require.NoError(t, err) + assert.False(t, ok, "snapshots not ready yet") + + for _, tc := range []struct { + tsName string + pvc string + }{ + {ts1Name, ts1PVC}, + {ts2Name, ts2PVC}, + } { + vsName := backupName + "-" + tc.tsName + "-" + naming.RoleTablespace + vs := &volumesnapshotv1.VolumeSnapshot{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: vsName}, vs)) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, tc.pvc, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + } + + updated := &v2.PerconaPGBackup{} + require.NoError(t, cl.Get(ctx, client.ObjectKeyFromObject(backup), updated)) + require.NotNil(t, updated.Status.Snapshot) + assert.Equal(t, backupName+"-"+ts1Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumeSnapshotRefs[ts1Name]) + assert.Equal(t, backupName+"-"+ts2Name+"-"+naming.RoleTablespace, updated.Status.Snapshot.TablespaceVolumeSnapshotRefs[ts2Name]) + }) + + t.Run("returns true when all existing VolumeSnapshots are ReadyToUse", func(t *testing.T) { + vs1Name := backupName + "-" + ts1Name + "-" + naming.RoleTablespace + vs2Name := backupName + "-" + ts2Name + "-" + naming.RoleTablespace + existingVS1 := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vs1Name, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(ts1PVC), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ReadyToUse: ptr.To(true)}, + } + existingVS2 := &volumesnapshotv1.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{Name: vs2Name, Namespace: ns}, + Spec: volumesnapshotv1.VolumeSnapshotSpec{ + VolumeSnapshotClassName: ptr.To(snapshotClassName), + Source: volumesnapshotv1.VolumeSnapshotSource{ + PersistentVolumeClaimName: ptr.To(ts2PVC), + }, + }, + Status: &volumesnapshotv1.VolumeSnapshotStatus{ReadyToUse: ptr.To(true)}, + } + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{}, + }, + } + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup.DeepCopy(), cluster, existingVS1, existingVS2). + WithStatusSubresource(backup, existingVS1, existingVS2). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, noopExec) + ok, err := r.reconcileTablespaceSnapshot(ctx, map[string]string{ + ts1Name: ts1PVC, + ts2Name: ts2PVC, + }) + require.NoError(t, err) + assert.True(t, ok, "all tablespace snapshots ready") + }) +} + +func TestGenerateSnapshotIntent(t *testing.T) { + ns := "test-ns" + backupName := "my-backup" + clusterName := "my-cluster" + snapshotClassName := "test-snapshotclass" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + Backups: v2.Backups{ + VolumeSnapshots: &v2.VolumeSnapshots{ + Mode: v2.VolumeSnapshotModeOffline, + ClassName: snapshotClassName, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns, UID: "backup-uid"}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(backup, cluster). + Build() + + r := newSnapshotReconciler(cl, logging.Discard(), cluster, backup, &mockSnapshotExecutor{}) + + tests := []struct { + name string + snapshotRole string + sourcePVC string + wantName string + }{ + { + name: "data volume", + snapshotRole: naming.RolePostgresData, + sourcePVC: "data-pvc", + wantName: backupName + "-" + naming.RolePostgresData, + }, + { + name: "WAL volume", + snapshotRole: naming.RolePostgresWAL, + sourcePVC: "wal-pvc", + wantName: backupName + "-" + naming.RolePostgresWAL, + }, + { + name: "tablespace volume", + snapshotRole: "ts1-" + naming.RoleTablespace, + sourcePVC: "pvc-ts1", + wantName: backupName + "-" + "ts1-" + naming.RoleTablespace, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + vs, err := r.generateSnapshotIntent(tt.snapshotRole, tt.sourcePVC) + require.NoError(t, err) + require.NotNil(t, vs) + + assert.Equal(t, tt.wantName, vs.Name) + assert.Equal(t, ns, vs.Namespace) + assert.Equal(t, snapshotClassName, ptr.Deref(vs.Spec.VolumeSnapshotClassName, "")) + assert.Equal(t, tt.sourcePVC, ptr.Deref(vs.Spec.Source.PersistentVolumeClaimName, "")) + + // Owner reference should be set to the backup + require.NotEmpty(t, vs.OwnerReferences, "expected owner reference to be set") + assert.Equal(t, backupName, vs.OwnerReferences[0].Name) + assert.Equal(t, "pgv2.percona.com/v2", vs.OwnerReferences[0].APIVersion) + assert.Equal(t, "PerconaPGBackup", vs.OwnerReferences[0].Kind) + }) + } +} + +// mockSnapshotExecutor is a no-op snapshotExecutor for tests. +type mockSnapshotExecutor struct{} + +func (m *mockSnapshotExecutor) prepare(ctx context.Context) (string, error) { return "instance-0", nil } +func (m *mockSnapshotExecutor) finalize(ctx context.Context) error { return nil } diff --git a/percona/controller/pgcluster/backup.go b/percona/controller/pgcluster/backup.go index 06c680e258..b1aea84bc2 100644 --- a/percona/controller/pgcluster/backup.go +++ b/percona/controller/pgcluster/backup.go @@ -10,6 +10,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -166,7 +167,7 @@ func reconcileBackupJob(ctx context.Context, cl client.Client, cr *v2.PerconaPGC }, Spec: v2.PerconaPGBackupSpec{ PGCluster: cr.Name, - RepoName: repoName, + RepoName: ptr.To(repoName), }, } if cr.CompareVersion("2.6.0") >= 0 && cr.Spec.Metadata != nil { @@ -227,7 +228,7 @@ func listPGBackups(ctx context.Context, cl client.Reader, cr *v2.PerconaPGCluste // we should not filter by label, because the user can create the resource without the label list := []v2.PerconaPGBackup{} for _, pgBackup := range pbList.Items { - if pgBackup.Spec.PGCluster != cr.Name || pgBackup.Spec.RepoName != repoName { + if pgBackup.Spec.PGCluster != cr.Name || ptr.Deref(pgBackup.Spec.RepoName, "") != repoName { continue } list = append(list, pgBackup) diff --git a/percona/controller/pgcluster/controller_test.go b/percona/controller/pgcluster/controller_test.go index 30de7aece2..b5427d3391 100644 --- a/percona/controller/pgcluster/controller_test.go +++ b/percona/controller/pgcluster/controller_test.go @@ -862,7 +862,7 @@ var _ = Describe("Pause with backup", Ordered, func() { }, Spec: v2.PerconaPGBackupSpec{ PGCluster: crName, - RepoName: "repo1", + RepoName: ptr.To("repo1"), }, } diff --git a/percona/controller/pgcluster/restore.go b/percona/controller/pgcluster/restore.go index d941aa05b5..d5cc2fdfb1 100644 --- a/percona/controller/pgcluster/restore.go +++ b/percona/controller/pgcluster/restore.go @@ -5,6 +5,7 @@ import ( k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "github.com/percona/percona-postgresql-operator/v2/internal/naming" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" @@ -39,7 +40,7 @@ func (r *PGClusterReconciler) createBootstrapRestoreObject(ctx context.Context, }, Spec: v2.PerconaPGRestoreSpec{ PGCluster: cr.Name, - RepoName: repoName, + RepoName: ptr.To(repoName), }, } if cr.CompareVersion("2.6.0") >= 0 && cr.Spec.Metadata != nil { diff --git a/percona/controller/pgcluster/schedule.go b/percona/controller/pgcluster/schedule.go index ad5d65412f..125c5921de 100644 --- a/percona/controller/pgcluster/schedule.go +++ b/percona/controller/pgcluster/schedule.go @@ -9,8 +9,10 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" "github.com/percona/percona-postgresql-operator/v2/internal/controller/postgrescluster" + "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" @@ -28,6 +30,12 @@ func (r *PGClusterReconciler) reconcileScheduledBackups(ctx context.Context, cr } } } + + if cr.Spec.Backups.IsVolumeSnapshotsEnabled() && feature.Enabled(ctx, feature.BackupSnapshots) { + if err := r.reconcileScheduledSnapshots(ctx, cr, cr.Spec.Backups.VolumeSnapshots.Schedule); err != nil { + return errors.Wrapf(err, "failed to reconcile scheduled snapshots") + } + } return nil } @@ -120,7 +128,7 @@ func (r *PGClusterReconciler) createScheduledBackup(log logr.Logger, backupName, }, Spec: v2.PerconaPGBackupSpec{ PGCluster: cr.Name, - RepoName: repoName, + RepoName: ptr.To(repoName), Options: []string{"--type=" + backupType}, }, } @@ -136,3 +144,82 @@ func (r *PGClusterReconciler) createScheduledBackup(log logr.Logger, backupName, } return nil } + +func (r *PGClusterReconciler) createScheduledSnapshotFunc(log logr.Logger, backupName, namespace, clusterName string) func() { + return func() { + if err := r.createScheduledSnapshot(log, backupName, namespace, clusterName); err != nil { + log.Error(err, "failed to create a scheduled snapshot") + } + } +} + +func (r *PGClusterReconciler) createScheduledSnapshot(log logr.Logger, backupName, namespace, clusterName string) error { + ctx := context.Background() + + cr := &v2.PerconaPGCluster{} + if err := r.Client.Get(ctx, types.NamespacedName{ + Namespace: namespace, + Name: clusterName, + }, cr); err != nil { + if k8serrors.IsNotFound(err) { + log.Info("cluster is not found, deleting the job", "name", backupName, "cluster", cr.Name, "namespace", cr.Namespace) + + r.Cron.DeleteBackupJob(backupName, namespace) + return nil + } + return err + } + if cr.Status.State != v2.AppStateReady { + log.Info("Cluster is not ready. Can't start scheduled snapshot") + return nil + } + + pb := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: backupName + "-", + Namespace: namespace, + }, + Spec: v2.PerconaPGBackupSpec{ + PGCluster: cr.Name, + Method: ptr.To(v2.BackupMethodVolumeSnapshot), + }, + } + + if cr.Spec.Metadata != nil { + pb.Annotations = cr.Spec.Metadata.Annotations + pb.Labels = cr.Spec.Metadata.Labels + } + + err := r.Client.Create(ctx, pb) + if err != nil { + return errors.Wrapf(err, "failed to create PerconaPGBackup %s", backupName) + } + return nil +} + +func (r *PGClusterReconciler) reconcileScheduledSnapshots( + ctx context.Context, + cr *v2.PerconaPGCluster, + schedule *string) error { + log := logging.FromContext(ctx) + + name := naming.VolumeSnapshotCronJob(&v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: cr.Name, + Namespace: cr.Namespace, + }}) + + if schedule == nil || *schedule == "" { + r.Cron.DeleteBackupJob(name.Name, name.Namespace) + return nil + } + + createBackupFunc := r.createScheduledSnapshotFunc(log, name.Name, cr.Namespace, cr.Name) + + if err := r.Cron.ApplyBackupJob(name.Name, name.Namespace, *schedule, createBackupFunc); err != nil { + log.Error(err, "failed to create a cron for a scheduled snapshot job") + return err + } + + return nil +} diff --git a/percona/controller/pgrestore/controller.go b/percona/controller/pgrestore/controller.go index 0061744463..233bff1de7 100644 --- a/percona/controller/pgrestore/controller.go +++ b/percona/controller/pgrestore/controller.go @@ -16,12 +16,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" "github.com/percona/percona-postgresql-operator/v2/internal/logging" - "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/percona/controller" + "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/snapshot" + restoreutils "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/utils" pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" - "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) const ( @@ -35,10 +36,18 @@ type PGRestoreReconciler struct { Owner client.FieldOwner Recorder record.EventRecorder Tracer trace.Tracer + PodExec runtime.PodExecutor } // SetupWithManager adds the perconapgrestore controller to the provided runtime manager func (r *PGRestoreReconciler) SetupWithManager(mgr manager.Manager) error { + if r.PodExec == nil { + var err error + r.PodExec, err = runtime.NewPodExecutor(mgr.GetConfig()) + if err != nil { + return err + } + } return builder.ControllerManagedBy(mgr).For(&v2.PerconaPGRestore{}).Complete(r) } @@ -62,6 +71,17 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, err } + pgCluster := &v2.PerconaPGCluster{} + err := r.Client.Get(ctx, types.NamespacedName{Name: pgRestore.Spec.PGCluster, Namespace: request.Namespace}, pgCluster) + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "get PerconaPGCluster") + } + + if pgRestore.Spec.VolumeSnapshotBackupName != "" { + // Delegate to snapshot restore reconciliation + return snapshot.Reconcile(ctx, r.Client, r.PodExec, pgCluster, pgRestore) + } + if pgRestore.DeletionTimestamp != nil { if err := runFinalizers(ctx, r.Client, pgRestore); err != nil { return reconcile.Result{}, errors.Wrap(err, "failed to run finalizers") @@ -73,11 +93,7 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, nil } - pgCluster := &v2.PerconaPGCluster{} - err := r.Client.Get(ctx, types.NamespacedName{Name: pgRestore.Spec.PGCluster, Namespace: request.Namespace}, pgCluster) - if err != nil { - return reconcile.Result{}, errors.Wrap(err, "get PostgresCluster") - } + restorer := restoreutils.NewPGBackRestRestore(r.Client, pgCluster, pgRestore) switch pgRestore.Status.State { case v2.RestoreNew: @@ -87,9 +103,12 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R } if _, ok := pgRestore.Annotations[pNaming.AnnotationClusterBootstrapRestore]; !ok { - if err := startRestore(ctx, r.Client, pgCluster, pgRestore); err != nil { + if err := restorer.Start(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "start restore") } + if err := ensureFinalizers(ctx, r.Client, pgRestore); err != nil { + return reconcile.Result{}, errors.Wrap(err, "ensure finalizers") + } } pgRestore.Status.State = v2.RestoreStarting @@ -116,26 +135,23 @@ func (r *PGRestoreReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, nil case v2.RestoreRunning: - job := &batchv1.Job{} - err := r.Client.Get(ctx, types.NamespacedName{Name: pgCluster.Name + "-pgbackrest-restore", Namespace: pgCluster.Namespace}, job) + status, completedAt, err := restorer.ObserveStatus(ctx) if err != nil { - return reconcile.Result{}, errors.Wrap(err, "get restore job") + return reconcile.Result{}, errors.Wrap(err, "observe restore status") } - - status := checkRestoreJob(job) switch status { case v2.RestoreFailed: log.Info("Restore failed") case v2.RestoreSucceeded: log.Info("Restore succeeded") - pgRestore.Status.CompletedAt = job.Status.CompletionTime + pgRestore.Status.CompletedAt = completedAt default: log.Info("Waiting for restore to complete") return reconcile.Result{RequeueAfter: time.Second * 5}, nil } if _, ok := pgRestore.Annotations[pNaming.AnnotationClusterBootstrapRestore]; !ok { - if err := disableRestore(ctx, r.Client, pgCluster, pgRestore); err != nil { + if err := restorer.DisableRestore(ctx); err != nil { return reconcile.Result{}, errors.Wrap(err, "disable restore") } } @@ -158,7 +174,7 @@ func runFinalizers(ctx context.Context, c client.Client, pr *v2.PerconaPGRestore if k8serrors.IsNotFound(err) { pg = nil } else { - return errors.Wrap(err, "get PostgresCluster") + return errors.Wrap(err, "get PerconaPGCluster") } } @@ -167,7 +183,8 @@ func runFinalizers(ctx context.Context, c client.Client, pr *v2.PerconaPGRestore if pg == nil { return nil } - return disableRestore(ctx, c, pg, pr) + restorer := restoreutils.NewPGBackRestRestore(c, pg, pr) + return restorer.DisableRestore(ctx) }, } @@ -198,82 +215,3 @@ func ensureFinalizers(ctx context.Context, cl client.Client, pr *v2.PerconaPGRes } return nil } - -func startRestore(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, pr *v2.PerconaPGRestore) error { - orig := pg.DeepCopy() - - if pg.Annotations == nil { - pg.Annotations = make(map[string]string) - } - pg.Annotations[naming.PGBackRestRestore] = pr.Name - - postgresCluster := new(v1beta1.PostgresCluster) - if err := c.Get(ctx, client.ObjectKeyFromObject(pg), postgresCluster); err != nil { - return errors.Wrap(err, "get PostgresCluster") - } - - origPostgres := postgresCluster.DeepCopy() - - postgresCluster.Status.PGBackRest.Restore = new(v1beta1.PGBackRestJobStatus) - - if err := c.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { - return errors.Wrap(err, "patch PGCluster") - } - - if pg.Spec.Backups.PGBackRest.Restore == nil { - pg.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ - PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, - } - } - - tvar := true - pg.Spec.Backups.PGBackRest.Restore.Enabled = &tvar - pg.Spec.Backups.PGBackRest.Restore.RepoName = pr.Spec.RepoName - pg.Spec.Backups.PGBackRest.Restore.Options = pr.Spec.Options - - if err := c.Patch(ctx, pg, client.MergeFrom(orig)); err != nil { - return errors.Wrap(err, "patch PGCluster") - } - - if err := ensureFinalizers(ctx, c, pr); err != nil { - return errors.Wrap(err, "ensure restore finalizers") - } - - return nil -} - -func disableRestore(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, pr *v2.PerconaPGRestore) error { - if pr.Status.State == v2.RestoreSucceeded || pr.Status.State == v2.RestoreFailed { - return nil - } - - orig := pg.DeepCopy() - - if pg.Spec.Backups.PGBackRest.Restore == nil { - pg.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ - PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, - } - } - - fvar := false - pg.Spec.Backups.PGBackRest.Restore.Enabled = &fvar - - delete(pg.Annotations, naming.LabelPGBackRestRestore) - - if err := c.Patch(ctx, pg, client.MergeFrom(orig)); err != nil { - return errors.Wrap(err, "patch PGCluster") - } - - return nil -} - -func checkRestoreJob(job *batchv1.Job) v2.PGRestoreState { - switch { - case controller.JobCompleted(job): - return v2.RestoreSucceeded - case controller.JobFailed(job): - return v2.RestoreFailed - default: - return v2.RestoreRunning - } -} diff --git a/percona/controller/pgrestore/snapshot/reconcile.go b/percona/controller/pgrestore/snapshot/reconcile.go new file mode 100644 index 0000000000..68edde7d00 --- /dev/null +++ b/percona/controller/pgrestore/snapshot/reconcile.go @@ -0,0 +1,811 @@ +package snapshot + +import ( + "context" + "fmt" + "io" + "path" + "strings" + "time" + + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" + "github.com/percona/percona-postgresql-operator/v2/internal/feature" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + "github.com/percona/percona-postgresql-operator/v2/percona/controller" + restoreutils "github.com/percona/percona-postgresql-operator/v2/percona/controller/pgrestore/utils" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" + perconaPG "github.com/percona/percona-postgresql-operator/v2/percona/postgres" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +type snapshotRestorer struct { + cl client.Client + log logging.Logger + cluster *v2.PerconaPGCluster + backup *v2.PerconaPGBackup + restore *v2.PerconaPGRestore + podExec runtime.PodExecutor +} + +func newSnapshotRestorer( + cl client.Client, + log logging.Logger, + cluster *v2.PerconaPGCluster, + backup *v2.PerconaPGBackup, + restore *v2.PerconaPGRestore, + exec runtime.PodExecutor, +) *snapshotRestorer { + return &snapshotRestorer{ + cl: cl, + log: log, + cluster: cluster, + backup: backup, + restore: restore, + podExec: exec, + } +} + +func Reconcile( + ctx context.Context, + c client.Client, + exec runtime.PodExecutor, + pg *v2.PerconaPGCluster, + restore *v2.PerconaPGRestore, +) (reconcile.Result, error) { + log := logging.FromContext(ctx).WithName("SnapshotRestorer") + + if !feature.Enabled(ctx, feature.BackupSnapshots) { + log.Info(fmt.Sprintf("Feature gate '%s' is not enabled, skipping snapshot restore", feature.BackupSnapshots)) + return reconcile.Result{}, nil + } + + backup := &v2.PerconaPGBackup{} + if err := c.Get(ctx, types.NamespacedName{Name: restore.Spec.VolumeSnapshotBackupName, Namespace: pg.Namespace}, backup); err != nil { + return reconcile.Result{}, errors.Wrap(err, "get backup") + } + + r := newSnapshotRestorer(c, log, pg, backup, restore, exec) + + if !restore.GetDeletionTimestamp().IsZero() { + if ok, err := r.runFinalizers(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "run finalizers") + } else if !ok { + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + return reconcile.Result{}, nil + } + + switch restore.Status.State { + case v2.RestoreNew: + return r.reconcileNew(ctx) + case v2.RestoreStarting: + return r.reconcileStarting(ctx) + case v2.RestoreRunning: + return r.reconcileRunning(ctx) + case v2.RestoreSucceeded, v2.RestoreFailed: + ok, err := r.runFinalizers(ctx) + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "run finalizers") + } + if !ok { + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + return reconcile.Result{}, nil + } + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) reconcileNew(ctx context.Context) (reconcile.Result, error) { + if restore := r.cluster.Spec.Backups.PGBackRest.Restore; restore != nil && restore.Enabled != nil && *restore.Enabled { + r.log.Info("Waiting for another restore to finish") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + restores := &v2.PerconaPGRestoreList{} + if err := r.cl.List(ctx, restores, client.InNamespace(r.cluster.Namespace)); err != nil { + return reconcile.Result{}, errors.Wrap(err, "list restores") + } + for _, restore := range restores.Items { + if restore.Spec.PGCluster != r.cluster.Name || restore.IsCompleted() || restore.GetName() == r.restore.GetName() { + continue + } + r.log.Info("Waiting for another restore to finish") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreStarting + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update restore status") + } + r.log.Info("Snapshot restore is starting") + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) reconcileStarting(ctx context.Context) (reconcile.Result, error) { + if ok, err := r.suspendAllInstances(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "shutdown cluster") + } else if !ok { + r.log.Info("Waiting for instances to be suspended") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + if err := r.ensureFinalizers(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "ensure finalizers") + } + + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreRunning + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update restore status") + } + + r.log.Info("Snapshot restore is running") + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) reconcileRunning(ctx context.Context) (reconcile.Result, error) { + instances := &appsv1.StatefulSetList{} + if err := r.cl.List(ctx, instances, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "list instances") + } + + if ok, err := r.reconcileInstances(ctx, instances); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile instances") + } else if !ok { + r.log.Info("Waiting for instances PVCs to be reconciled") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + // Prepare PVCs + if ok, err := r.runPrepareJob(ctx, instances); err != nil { + return reconcile.Result{}, errors.Wrap(err, "run prepare job") + } else if !ok { + r.log.Info("Preparing PVCs") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + if err := r.reconcilePrepareJobAnnotation(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile prepare job annotation") + } + + // Run PITR if needed + if ok, err := r.restorePITR(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "restore PITR") + } else if !ok { + r.log.Info("Waiting for PITR to complete") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + // Recreate DCS so that cluster can be bootstrapped with new data. + if err := r.reconcileLeaderEndpoints(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile leader endpoints") + } + + if ok, err := r.unsuspendAllInstances(ctx); err != nil { + return reconcile.Result{}, errors.Wrap(err, "resume cluster") + } else if !ok { + r.log.Info("Waiting for instances to be unsuspended") + return reconcile.Result{RequeueAfter: time.Second * 5}, nil + } + + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreSucceeded + restore.Status.CompletedAt = &metav1.Time{Time: time.Now()} + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update restore status") + } + + r.log.Info("Snapshot restore is complete") + return reconcile.Result{}, nil +} + +func (r *snapshotRestorer) reconcileInstances(ctx context.Context, instances *appsv1.StatefulSetList) (bool, error) { + done := true + for _, instance := range instances.Items { + if ok, err := r.reconcileInstance(ctx, &instance); err != nil { + return false, errors.Wrap(err, "reconcile instance") + } else if !ok { + done = false + } + } + return done, nil +} + +func (r *snapshotRestorer) reconcileInstance(ctx context.Context, instance *appsv1.StatefulSet) (bool, error) { + dataOk, err := r.reconcileDataVolume(ctx, instance) + if err != nil { + return false, errors.Wrap(err, "reconcile data volume") + } + + walOk, err := r.reconcileWALVolume(ctx, instance) + if err != nil { + return false, errors.Wrap(err, "reconcile WAL volume") + } + + tablespaceOk, err := r.reconcileTablespaceVolumes(ctx, instance) + if err != nil { + return false, errors.Wrap(err, "reconcile tablespace volumes") + } + + return dataOk && walOk && tablespaceOk, nil +} + +func (r *snapshotRestorer) reconcileDataVolume( + ctx context.Context, + instance *appsv1.StatefulSet, +) (bool, error) { + if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.DataVolumeSnapshotRef == nil { + return false, errors.New("data volume snapshot not known") + } + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + snapshotName := *r.backup.Status.Snapshot.DataVolumeSnapshotRef + volCtxInfo := volumeContextInfo{role: naming.RolePostgresData} + return r.reconcileInstancePVC(ctx, volCtxInfo, pvc, instance, snapshotName) +} + +func (r *snapshotRestorer) reconcileWALVolume( + ctx context.Context, + instance *appsv1.StatefulSet, +) (bool, error) { + if r.backup.Status.Snapshot == nil || r.backup.Status.Snapshot.WALVolumeSnapshotRef == nil { + return true, nil + } + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresWALVolume(instance)} + snapshotName := *r.backup.Status.Snapshot.WALVolumeSnapshotRef + volCtxInfo := volumeContextInfo{role: naming.RolePostgresWAL} + return r.reconcileInstancePVC(ctx, volCtxInfo, pvc, instance, snapshotName) +} + +func (r *snapshotRestorer) reconcileTablespaceVolumes(ctx context.Context, instance *appsv1.StatefulSet) (bool, error) { + if r.backup.Status.Snapshot == nil || len(r.backup.Status.Snapshot.TablespaceVolumeSnapshotRefs) == 0 { + return true, nil + } + + done := true + for tsName, snapshotName := range r.backup.Status.Snapshot.TablespaceVolumeSnapshotRefs { + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstanceTablespaceDataVolume(instance, tsName)} + volCtxInfo := volumeContextInfo{role: naming.RoleTablespace, tablespaceName: tsName} + ok, err := r.reconcileInstancePVC(ctx, volCtxInfo, pvc, instance, snapshotName) + if err != nil { + return false, errors.Wrap(err, "reconcile tablespace volume") + } + if !ok { + done = false + } + } + return done, nil +} + +func (r *snapshotRestorer) reconcileInstancePVC( + ctx context.Context, + volCtxInfo volumeContextInfo, + pvc *corev1.PersistentVolumeClaim, + instance *appsv1.StatefulSet, + snapshotName string, +) (bool, error) { + observedPVC := &corev1.PersistentVolumeClaim{} + err := r.cl.Get(ctx, client.ObjectKeyFromObject(pvc), observedPVC) + if k8serrors.IsNotFound(err) { + if err := r.createPVCFromSnapshot(ctx, volCtxInfo, pvc, instance, snapshotName); err != nil { + return false, errors.Wrap(err, "create PVC from data source") + } + return true, nil + } else if err != nil { + return false, errors.Wrap(err, "get observed PVC") + } + + if observedPVC.GetAnnotations()[pNaming.AnnotationSnapshotRestore] == r.restore.GetName() { + return true, nil + } + + if !observedPVC.GetDeletionTimestamp().IsZero() { + return false, nil + } + + // Delete it so we can recreate + if err := r.cl.Delete(ctx, observedPVC); err != nil { + return false, errors.Wrap(err, "delete PVC") + } + return false, nil +} + +func (r *snapshotRestorer) createPVCFromSnapshot( + ctx context.Context, + volCtxInfo volumeContextInfo, + pvc *corev1.PersistentVolumeClaim, + instance *appsv1.StatefulSet, + snapshotName string, +) error { + instanceSetName := instance.GetLabels()[naming.LabelInstanceSet] + if instanceSetName == "" { + return errors.New("instance set name is not known") + } + + dataSource := &corev1.TypedLocalObjectReference{ + APIGroup: ptr.To(volumesnapshotv1.GroupName), + Kind: pNaming.KindVolumeSnapshot, + Name: snapshotName, + } + spec, err := r.pvcSpecFromDataSource(volCtxInfo, instanceSetName, dataSource) + if err != nil { + return errors.Wrap(err, "get PVC spec from data source") + } + pvc.Spec = spec + pvc.SetAnnotations(map[string]string{ + pNaming.AnnotationSnapshotRestore: r.restore.GetName(), + }) + if err := r.cl.Create(ctx, pvc); err != nil { + return errors.Wrap(err, "create PVC") + } + return nil +} + +type volumeContextInfo struct { + role string + tablespaceName string +} + +func (r *snapshotRestorer) pvcSpecFromDataSource( + volCtxInfo volumeContextInfo, + instanceSetName string, + dataSource *corev1.TypedLocalObjectReference, +) (corev1.PersistentVolumeClaimSpec, error) { + var instanceSetSpec *v2.PGInstanceSetSpec + for _, instanceSet := range r.cluster.Spec.InstanceSets { + if instanceSet.Name == instanceSetName { + instanceSetSpec = &instanceSet + break + } + } + if instanceSetSpec == nil { + return corev1.PersistentVolumeClaimSpec{}, errors.New("instance set not found") + } + + var volSpec *corev1.PersistentVolumeClaimSpec + switch volCtxInfo.role { + case naming.RolePostgresData: + volSpec = &instanceSetSpec.DataVolumeClaimSpec + case naming.RolePostgresWAL: + volSpec = instanceSetSpec.WALVolumeClaimSpec + case naming.RoleTablespace: + tablespaceIdx := -1 + for i, ts := range instanceSetSpec.TablespaceVolumes { + if ts.Name == volCtxInfo.tablespaceName { + tablespaceIdx = i + break + } + } + if tablespaceIdx == -1 { + return corev1.PersistentVolumeClaimSpec{}, errors.New("tablespace not found") + } + volSpec = &instanceSetSpec.TablespaceVolumes[tablespaceIdx].DataVolumeClaimSpec + } + + if volSpec == nil { + return corev1.PersistentVolumeClaimSpec{}, errors.New("volume spec not found in instance spec") + } + + dataVolSpec := *volSpec + dataVolSpec.DataSource = dataSource + return dataVolSpec, nil +} + +func (r *snapshotRestorer) reconcileLeaderEndpoints(ctx context.Context) error { + postgresCluster := &crunchyv1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: r.cluster.Name, + Namespace: r.cluster.Namespace, + }, + } + + //nolint:staticcheck + leaderEp := &corev1.Endpoints{ObjectMeta: naming.PatroniLeaderEndpoints(postgresCluster)} + if err := r.cl.Get(ctx, client.ObjectKeyFromObject(leaderEp), leaderEp); err != nil { + return client.IgnoreNotFound(err) + } + + if len(leaderEp.Subsets) > 0 { + return nil + } + + if err := r.cl.Delete(ctx, leaderEp); client.IgnoreNotFound(err) != nil { + return errors.Wrap(err, "delete leader endpoints") + } + return nil +} + +func (r *snapshotRestorer) suspendAllInstances(ctx context.Context) (bool, error) { + instances := &appsv1.StatefulSetList{} + if err := r.cl.List(ctx, instances, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return false, errors.Wrap(err, "list instances") + } + + allSuspended := true + for _, instance := range instances.Items { + if suspended, err := perconaPG.SuspendInstance(ctx, r.cl, client.ObjectKeyFromObject(&instance)); err != nil { + return false, errors.Wrap(err, "suspend instance") + } else if !suspended { + allSuspended = false + } + } + return allSuspended, nil +} + +func (r *snapshotRestorer) unsuspendAllInstances(ctx context.Context) (bool, error) { + instances := &appsv1.StatefulSetList{} + if err := r.cl.List(ctx, instances, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return false, errors.Wrap(err, "list instances") + } + + allUnsuspended := true + for _, instance := range instances.Items { + if unsuspended, err := perconaPG.UnsuspendInstance(ctx, r.cl, client.ObjectKeyFromObject(&instance)); err != nil { + return false, errors.Wrap(err, "unsuspend instance") + } else if !unsuspended { + allUnsuspended = false + } + } + return allUnsuspended, nil +} + +func (r *snapshotRestorer) ensureFinalizers(ctx context.Context) error { + orig := r.restore.DeepCopy() + + finalizers := []string{pNaming.FinalizerSnapshotRestore} + finalizersChanged := false + for _, f := range finalizers { + if controllerutil.AddFinalizer(r.restore, f) { + finalizersChanged = true + } + } + if !finalizersChanged { + return nil + } + + if err := r.cl.Patch(ctx, r.restore.DeepCopy(), client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch finalizers") + } + return nil +} + +func (r *snapshotRestorer) runFinalizers(ctx context.Context) (bool, error) { + finalizers := map[string]controller.FinalizerFunc[*v2.PerconaPGRestore]{ + pNaming.FinalizerSnapshotRestore: r.finalizeSnapshotRestore(r.cl, r.restore), + } + + finished := true + for finalizer, f := range finalizers { + done, err := controller.RunFinalizer(ctx, r.cl, r.restore, finalizer, f) + if err != nil { + return false, errors.Wrapf(err, "run finalizer %s", finalizer) + } + if !done { + finished = false + } + } + return finished, nil +} + +func (r *snapshotRestorer) finalizeSnapshotRestore(_ client.Client, _ *v2.PerconaPGRestore) func(ctx context.Context, restore *v2.PerconaPGRestore) error { + return func(ctx context.Context, restore *v2.PerconaPGRestore) error { + if done, err := r.unsuspendAllInstances(ctx); err != nil { + return errors.Wrap(err, "resume cluster") + } else if !done { + return controller.ErrFinalizerPending + } + + if err := r.cleanupSkipRecoveryFile(ctx); err != nil { + return errors.Wrap(err, "cleanup") + } + return nil + } +} + +func (r *snapshotRestorer) restorePITR(ctx context.Context) (bool, error) { + if r.restore.Spec.RepoName == nil { + return true, nil + } + + pgbackrestRestore := restoreutils.NewPGBackRestRestore(r.cl, r.cluster, r.restore) + status, _, err := pgbackrestRestore.ObserveStatus(ctx) + if err != nil { + return false, errors.Wrap(err, "observe PITR status") + } + + switch status { + case v2.RestoreNew: + case v2.RestoreStarting: + return false, pgbackrestRestore.Start(ctx) + case v2.RestoreRunning: + return false, nil + case v2.RestoreSucceeded: + return true, pgbackrestRestore.DisableRestore(ctx) + case v2.RestoreFailed: + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreFailed + }); err != nil { + return false, errors.Wrap(err, "update restore status") + } + return true, nil + } + return false, nil +} + +func (r *snapshotRestorer) reconcilePrepareJobAnnotation(ctx context.Context) error { + if _, ok := r.restore.GetAnnotations()[pNaming.AnnotationPVCsPreparedAt]; ok { + return nil + } + + orig := r.restore.DeepCopy() + annotations := r.restore.GetAnnotations() + if annotations == nil { + annotations = make(map[string]string) + } + annotations[pNaming.AnnotationPVCsPreparedAt] = time.Now().Format(time.RFC3339) + r.restore.SetAnnotations(annotations) + if err := r.cl.Patch(ctx, r.restore.DeepCopy(), client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch restore annotations") + } + return nil +} + +// prepares PVCs before starting the cluster. +func (r *snapshotRestorer) runPrepareJob(ctx context.Context, instances *appsv1.StatefulSetList) (bool, error) { + jobName := r.restore.GetName() + "-prepare" + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: r.cluster.GetNamespace(), + }, + } + + // PVC already prepared, delete and return. + if _, ok := r.restore.GetAnnotations()[pNaming.AnnotationPVCsPreparedAt]; ok { + return true, client.IgnoreNotFound(r.cl.Delete(ctx, job, + client.PropagationPolicy(metav1.DeletePropagationForeground))) + } + + err := r.cl.Get(ctx, client.ObjectKeyFromObject(job), job) + if k8serrors.IsNotFound(err) { + generatePrepareJob(job, instances, r.cluster, r.restore) + if err := controllerutil.SetControllerReference(r.restore, job, r.cl.Scheme()); err != nil { + return false, errors.Wrap(err, "set controller reference") + } + if err := r.cl.Create(ctx, job); err != nil { + return false, errors.Wrap(err, "create prepare job") + } + return false, nil + } else if err != nil { + return false, errors.Wrap(err, "get prepare job") + } + + if !job.Status.CompletionTime.IsZero() && job.Status.Succeeded > 0 { + return true, nil + } + + if job.Status.Failed > 0 { + if err := r.restore.UpdateStatus(ctx, r.cl, func(restore *v2.PerconaPGRestore) { + restore.Status.State = v2.RestoreFailed + }); err != nil { + return false, errors.Wrap(err, "update restore status") + } + return true, nil + } + return false, nil +} + +// instanceSetSpecForName returns the PGInstanceSetSpec for the given instance set name, or nil if not found. +func instanceSetSpecForName(cluster *v2.PerconaPGCluster, name string) *v2.PGInstanceSetSpec { + for i := range cluster.Spec.InstanceSets { + if cluster.Spec.InstanceSets[i].Name == name { + return &cluster.Spec.InstanceSets[i] + } + } + return nil +} + +// instancePrepareInfo holds mount paths for an instance used by the snapshot prepare job. +// dataMountPath is empty when PITR + dedicated WAL (data volume not mounted). +// walMountPath is empty when no dedicated WAL volume. +type instancePrepareInfo struct { + dataMountPath string + walMountPath string +} + +func generatePrepareJob( + job *batchv1.Job, + instances *appsv1.StatefulSetList, + cluster *v2.PerconaPGCluster, + restore *v2.PerconaPGRestore, +) { + pitrEnabled := restore.Spec.RepoName != nil && restore.Spec.VolumeSnapshotBackupName != "" + pgVersion := cluster.Spec.PostgresVersion + + volumes, volumeMounts, instanceInfos := buildPrepareJobVolumes(instances, cluster, pitrEnabled) + + script := buildPrepareJobScript(instanceInfos, pgVersion, pitrEnabled) + + containerName := "snapshot-prepare" + job.Spec = batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + naming.DefaultContainerAnnotation: containerName, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: containerName, + Image: cluster.Spec.Image, + Command: []string{"bash", "-c", script}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("32Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("32Mi"), + }, + }, + VolumeMounts: volumeMounts, + }}, + Volumes: volumes, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + } +} + +func buildPrepareJobVolumes( + instances *appsv1.StatefulSetList, + cluster *v2.PerconaPGCluster, + pitrEnabled bool, +) ([]corev1.Volume, []corev1.VolumeMount, []instancePrepareInfo) { + var volumes []corev1.Volume + var volumeMounts []corev1.VolumeMount + var instanceInfos []instancePrepareInfo + + for _, instance := range instances.Items { + instanceSetSpec := instanceSetSpecForName(cluster, instance.Labels[naming.LabelInstanceSet]) + hasWALVolume := instanceSetSpec != nil && instanceSetSpec.WALVolumeClaimSpec != nil + + // When PITR + dedicated WAL volumes, we only clear the WAL directory; no need to mount data. + needDataVolume := !pitrEnabled || !hasWALVolume + + var info instancePrepareInfo + + if needDataVolume { + info.dataMountPath = path.Join("/", instance.GetName(), "pgdata") + volumes, volumeMounts = appendDataVolume(volumes, volumeMounts, &instance, info.dataMountPath) + } + + if hasWALVolume { + info.walMountPath = path.Join("/", instance.GetName(), "pgwal") + volumes, volumeMounts = appendWALVolume(volumes, volumeMounts, &instance, info.walMountPath) + } + + instanceInfos = append(instanceInfos, info) + } + + return volumes, volumeMounts, instanceInfos +} + +func appendDataVolume(volumes []corev1.Volume, mounts []corev1.VolumeMount, instance *appsv1.StatefulSet, mountPath string) ([]corev1.Volume, []corev1.VolumeMount) { + name := instance.GetName() + "-pgdata" + volumes = append(volumes, corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: naming.InstancePostgresDataVolume(instance).Name, + }, + }, + }) + mounts = append(mounts, corev1.VolumeMount{Name: name, MountPath: mountPath}) + return volumes, mounts +} + +func appendWALVolume(volumes []corev1.Volume, mounts []corev1.VolumeMount, instance *appsv1.StatefulSet, mountPath string) ([]corev1.Volume, []corev1.VolumeMount) { + name := instance.GetName() + "-pgwal" + volumes = append(volumes, corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: naming.InstancePostgresWALVolume(instance).Name, + }, + }, + }) + mounts = append(mounts, corev1.VolumeMount{Name: name, MountPath: mountPath}) + return volumes, mounts +} + +func buildPrepareJobScript(instanceInfos []instancePrepareInfo, pgVersion int, pitrEnabled bool) string { + scriptParts := []string{"set -e"} + walDirSuffix := fmt.Sprintf("pg%d_wal", pgVersion) + dataDirSuffix := fmt.Sprintf("pg%d", pgVersion) + + for _, info := range instanceInfos { + if pitrEnabled { + // Clear WAL files so PITR restore can fetch from repo. WAL lives under WAL mount + // when dedicated volume is used, otherwise under pgdata. + walBase := info.dataMountPath + if info.walMountPath != "" { + walBase = info.walMountPath + } + walDir := path.Join(walBase, walDirSuffix) + scriptParts = append(scriptParts, fmt.Sprintf("find %q -mindepth 1 -delete", walDir)) + } else { + // Signal restore_command to skip WAL recovery for consistency with snapshot data. + dataDir := path.Join(info.dataMountPath, dataDirSuffix) + signalFile := path.Join(dataDir, "skip-wal-recovery") + scriptParts = append(scriptParts, fmt.Sprintf("touch %q", signalFile)) + } + } + + return strings.Join(scriptParts, "\n") +} + +// We create a $PGDATA/skip-wal-recovery file during the snapshot restore when no PITR is specified. +// This method will cleanup this file after the restore is completed. +func (r *snapshotRestorer) cleanupSkipRecoveryFile(ctx context.Context) error { + if r.restore.Spec.RepoName != nil { + return nil + } + + pods := &corev1.PodList{} + if err := r.cl.List(ctx, pods, &client.ListOptions{ + Namespace: r.cluster.GetNamespace(), + LabelSelector: labels.SelectorFromSet(map[string]string{ + naming.LabelCluster: r.cluster.Name, + naming.LabelData: naming.DataPostgres, + }), + }); err != nil { + return errors.Wrap(err, "list pods") + } + + rmScript := `rm -f "${PGDATA}/skip-wal-recovery"` + for _, pod := range pods.Items { + if err := r.podExec(ctx, r.cluster.GetNamespace(), pod.GetName(), naming.ContainerDatabase, nil, io.Discard, nil, "sh", "-c", rmScript); err != nil { + return err + } + } + + return nil +} diff --git a/percona/controller/pgrestore/snapshot/reconcile_test.go b/percona/controller/pgrestore/snapshot/reconcile_test.go new file mode 100644 index 0000000000..fe91672388 --- /dev/null +++ b/percona/controller/pgrestore/snapshot/reconcile_test.go @@ -0,0 +1,910 @@ +package snapshot + +import ( + "context" + "io" + "path" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" + "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" + "github.com/percona/percona-postgresql-operator/v2/internal/logging" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestGeneratePrepareJob(t *testing.T) { + ns := "test-ns" + clusterName := "my-cluster" + postgresVersion := 15 + image := "postgres:15" + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: postgresVersion, + Image: image, + }, + } + + makeInstance := func(name string) appsv1.StatefulSet { + return appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + }, + } + } + + t.Run("single instance without PITR", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{makeInstance("my-cluster-instance-0")}, + } + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + VolumeSnapshotBackupName: "my-backup", + // RepoName nil and VolumeSnapshotBackupName set = no PITR + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + require.Len(t, job.Spec.Template.Spec.Containers, 1) + container := job.Spec.Template.Spec.Containers[0] + assert.Equal(t, "snapshot-prepare", container.Name) + assert.Equal(t, image, container.Image) + assert.Equal(t, []string{"bash", "-c"}, container.Command[:2]) + + assert.Equal(t, resource.MustParse("50m"), container.Resources.Requests[corev1.ResourceCPU]) + assert.Equal(t, resource.MustParse("32Mi"), container.Resources.Requests[corev1.ResourceMemory]) + + // No PITR: script should touch skip-wal-recovery files + script := container.Command[2] + dataDir := path.Join("my-cluster-instance-0", "pgdata", "pg15") + assert.Contains(t, script, "touch") + assert.Contains(t, script, path.Join(dataDir, "skip-wal-recovery")) + + // Volume and mount for instance + require.Len(t, job.Spec.Template.Spec.Volumes, 1) + assert.Equal(t, "my-cluster-instance-0-pgdata", job.Spec.Template.Spec.Volumes[0].Name) + assert.Equal(t, "my-cluster-instance-0-pgdata", job.Spec.Template.Spec.Volumes[0].PersistentVolumeClaim.ClaimName) + + require.Len(t, container.VolumeMounts, 1) + assert.Equal(t, "my-cluster-instance-0-pgdata", container.VolumeMounts[0].Name) + assert.Equal(t, path.Join("/", "my-cluster-instance-0", "pgdata"), container.VolumeMounts[0].MountPath) + + assert.Equal(t, corev1.RestartPolicyNever, job.Spec.Template.Spec.RestartPolicy) + }) + + t.Run("multiple instances without PITR", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{ + makeInstance("my-cluster-instance-0"), + makeInstance("my-cluster-instance-1"), + }, + } + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + VolumeSnapshotBackupName: "my-backup", + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + container := job.Spec.Template.Spec.Containers[0] + script := container.Command[2] + + // Both instances should have skip-wal-recovery + assert.Contains(t, script, path.Join("my-cluster-instance-0", "pgdata", "pg15", "skip-wal-recovery")) + assert.Contains(t, script, path.Join("my-cluster-instance-1", "pgdata", "pg15", "skip-wal-recovery")) + + require.Len(t, job.Spec.Template.Spec.Volumes, 2) + assert.Equal(t, []string{"my-cluster-instance-0-pgdata", "my-cluster-instance-1-pgdata"}, + []string{job.Spec.Template.Spec.Volumes[0].Name, job.Spec.Template.Spec.Volumes[1].Name}) + }) + + t.Run("with PITR clears WAL directory", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{makeInstance("my-cluster-instance-0")}, + } + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + RepoName: ptr.To("repo1"), + VolumeSnapshotBackupName: "my-backup", + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + container := job.Spec.Template.Spec.Containers[0] + script := container.Command[2] + + // PITR: script should find/delete WAL dir, not touch skip-wal-recovery + walDir := path.Join("my-cluster-instance-0", "pgdata", "pg15_wal") + assert.Contains(t, script, "find") + assert.Contains(t, script, "-mindepth") + assert.Contains(t, script, "-delete") + assert.Contains(t, script, walDir) + assert.NotContains(t, script, "skip-wal-recovery") + }) + + t.Run("with PITR and dedicated WAL volume clears WAL under pgwal mount", func(t *testing.T) { + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + clusterWithWAL := cluster.DeepCopy() + clusterWithWAL.Spec.InstanceSets = v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + WALVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("512Mi"), + }, + }, + }, + }, + } + + instance := makeInstance(instanceName) + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{Items: []appsv1.StatefulSet{instance}} + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: "my-restore", Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + RepoName: ptr.To("repo1"), + VolumeSnapshotBackupName: "my-backup", + }, + } + + generatePrepareJob(job, instances, clusterWithWAL, restore) + + container := job.Spec.Template.Spec.Containers[0] + script := container.Command[2] + + // WAL is on dedicated volume: script should clear /instance/pgwal/pg15_wal, not pgdata + walDir := path.Join("/", instanceName, "pgwal", "pg15_wal") + assert.Contains(t, script, "find") + assert.Contains(t, script, walDir) + assert.NotContains(t, script, path.Join("/", instanceName, "pgdata", "pg15_wal")) + + // PITR + dedicated WAL: only WAL PVC is mounted, not data + require.Len(t, job.Spec.Template.Spec.Volumes, 1) + assert.Equal(t, instanceName+"-pgwal", job.Spec.Template.Spec.Volumes[0].Name) + + require.Len(t, container.VolumeMounts, 1) + assert.Equal(t, instanceName+"-pgwal", container.VolumeMounts[0].Name) + assert.Equal(t, path.Join("/", instanceName, "pgwal"), container.VolumeMounts[0].MountPath) + }) + + t.Run("script starts with set -e", func(t *testing.T) { + job := &batchv1.Job{} + instances := &appsv1.StatefulSetList{ + Items: []appsv1.StatefulSet{makeInstance("instance-0")}, + } + restore := &v2.PerconaPGRestore{ + Spec: v2.PerconaPGRestoreSpec{ + PGCluster: clusterName, + VolumeSnapshotBackupName: "backup", + }, + } + + generatePrepareJob(job, instances, cluster, restore) + + script := job.Spec.Template.Spec.Containers[0].Command[2] + assert.True(t, strings.HasPrefix(script, "set -e\n"), + "script should start with 'set -e' for error handling, got: %q", script[:50]) + }) +} + +// noopPodExecutor is a PodExecutor that does nothing, for tests that don't need exec. +var noopPodExecutor runtime.PodExecutor = func( + _ context.Context, _, _, _ string, _ io.Reader, _, _ io.Writer, _ ...string, +) error { + return nil +} + +func TestReconcileDataVolume(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + clusterName := "my-cluster" + backupName := "my-backup" + restoreName := "my-restore" + snapshotName := "my-backup-pgdata" + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: 15, + InstanceSets: v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + DataVolumeSnapshotRef: ptr.To(snapshotName), + }, + }, + } + + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: restoreName, Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{PGCluster: clusterName, VolumeSnapshotBackupName: backupName}, + } + + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: instanceName, Namespace: ns}, + Spec: appsv1.StatefulSetSpec{ServiceName: clusterName + "-pods"}, + } + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + t.Run("returns error when DataVolumeSnapshotRef is nil", func(t *testing.T) { + backupNoSnapshot := backup.DeepCopy() + backupNoSnapshot.Status.Snapshot = nil + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupNoSnapshot, restore). + WithStatusSubresource(backupNoSnapshot). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupNoSnapshot, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.Error(t, err) + assert.False(t, ok) + assert.Contains(t, err.Error(), "data volume snapshot not known") + }) + + t.Run("creates PVC with correct data source when not found", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + + pvcName := instanceName + "-pgdata" + pvc := &corev1.PersistentVolumeClaim{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc)) + + // Verify data source points to the VolumeSnapshot + require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") + assert.Equal(t, snapshotName, pvc.Spec.DataSource.Name) + assert.Equal(t, volumesnapshotv1.GroupName, ptr.Deref(pvc.Spec.DataSource.APIGroup, "")) + assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) + + // Verify spec from instance set + assert.Equal(t, []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, pvc.Spec.AccessModes) + assert.Equal(t, resource.MustParse("1Gi"), pvc.Spec.Resources.Requests[corev1.ResourceStorage]) + + // Verify restore annotation + assert.Equal(t, restoreName, pvc.GetAnnotations()[pNaming.AnnotationSnapshotRestore]) + }) + + t.Run("deletes PVC when restore annotation is not found", func(t *testing.T) { + pvcName := instanceName + "-pgdata" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{}, // No AnnotationSnapshotRestore + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("500Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok, "should return false to trigger requeue") + + // PVC should be deleted + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("deletes PVC when annotation points to different restore", func(t *testing.T) { + pvcName := instanceName + "-pgdata" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: "other-restore", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + // PVC should be deleted so it can be recreated for this restore + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("returns true when PVC already has restore annotation", func(t *testing.T) { + pvcName := instanceName + "-pgdata" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: restoreName, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileDataVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + }) +} + +func TestReconcileWALVolume(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + clusterName := "my-cluster" + backupName := "my-backup" + restoreName := "my-restore" + walSnapshotName := "my-backup-pgwal" + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: 15, + InstanceSets: v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + WALVolumeClaimSpec: &corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("512Mi"), + }, + }, + }, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + WALVolumeSnapshotRef: ptr.To(walSnapshotName), + }, + }, + } + + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: restoreName, Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{PGCluster: clusterName, VolumeSnapshotBackupName: backupName}, + } + + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: instanceName, Namespace: ns}, + Spec: appsv1.StatefulSetSpec{ServiceName: clusterName + "-pods"}, + } + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + t.Run("returns true when WALVolumeSnapshotRef is nil", func(t *testing.T) { + backupNoWAL := backup.DeepCopy() + backupNoWAL.Status.Snapshot.WALVolumeSnapshotRef = nil + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupNoWAL, restore). + WithStatusSubresource(backupNoWAL). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupNoWAL, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok, "no WAL volume to restore") + }) + + t.Run("creates PVC with correct data source when not found", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + + pvcName := instanceName + "-pgwal" + pvc := &corev1.PersistentVolumeClaim{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc)) + + // Verify data source points to the WAL VolumeSnapshot + require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") + assert.Equal(t, walSnapshotName, pvc.Spec.DataSource.Name) + assert.Equal(t, volumesnapshotv1.GroupName, ptr.Deref(pvc.Spec.DataSource.APIGroup, "")) + assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) + + // Verify spec from WALVolumeClaimSpec (512Mi, not data's 1Gi) + assert.Equal(t, []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, pvc.Spec.AccessModes) + assert.Equal(t, resource.MustParse("512Mi"), pvc.Spec.Resources.Requests[corev1.ResourceStorage]) + + // Verify restore annotation + assert.Equal(t, restoreName, pvc.GetAnnotations()[pNaming.AnnotationSnapshotRestore]) + }) + + t.Run("deletes PVC when restore annotation is not found", func(t *testing.T) { + pvcName := instanceName + "-pgwal" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{}, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("256Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("deletes PVC when annotation points to different restore", func(t *testing.T) { + pvcName := instanceName + "-pgwal" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: "other-restore", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("256Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("returns true when PVC already has restore annotation", func(t *testing.T) { + pvcName := instanceName + "-pgwal" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: restoreName, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("512Mi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileWALVolume(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + }) +} + +func TestReconcileTablespaceVolumes(t *testing.T) { + ctx := context.Background() + ns := "test-ns" + clusterName := "my-cluster" + backupName := "my-backup" + restoreName := "my-restore" + ts1Name := "ts1" + ts1SnapshotName := "my-backup-ts1-tablespace" + instanceSetName := "00" + instanceName := clusterName + "-" + instanceSetName + "-0" + + s := scheme.Scheme + require.NoError(t, corev1.AddToScheme(s)) + require.NoError(t, v2.AddToScheme(s)) + require.NoError(t, volumesnapshotv1.AddToScheme(s)) + + cluster := &v2.PerconaPGCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: ns}, + Spec: v2.PerconaPGClusterSpec{ + PostgresVersion: 15, + InstanceSets: v2.PGInstanceSets{ + { + Name: instanceSetName, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + TablespaceVolumes: []crunchyv1beta1.TablespaceVolume{ + { + Name: ts1Name, + DataVolumeClaimSpec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + }, + }, + }, + } + + backup := &v2.PerconaPGBackup{ + ObjectMeta: metav1.ObjectMeta{Name: backupName, Namespace: ns}, + Spec: v2.PerconaPGBackupSpec{PGCluster: clusterName}, + Status: v2.PerconaPGBackupStatus{ + Snapshot: &v2.SnapshotStatus{ + TablespaceVolumeSnapshotRefs: map[string]string{ts1Name: ts1SnapshotName}, + }, + }, + } + + restore := &v2.PerconaPGRestore{ + ObjectMeta: metav1.ObjectMeta{Name: restoreName, Namespace: ns}, + Spec: v2.PerconaPGRestoreSpec{PGCluster: clusterName, VolumeSnapshotBackupName: backupName}, + } + + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: instanceName, Namespace: ns}, + Spec: appsv1.StatefulSetSpec{ServiceName: clusterName + "-pods"}, + } + instance.Labels = map[string]string{ + naming.LabelInstanceSet: instanceSetName, + naming.LabelInstance: instanceName, + } + + t.Run("returns true when Snapshot is nil", func(t *testing.T) { + backupNoSnapshot := backup.DeepCopy() + backupNoSnapshot.Status.Snapshot = nil + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupNoSnapshot, restore). + WithStatusSubresource(backupNoSnapshot). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupNoSnapshot, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok, "no tablespace volumes to restore") + }) + + t.Run("returns true when TablespaceVolumeSnapshotRefs is empty", func(t *testing.T) { + backupEmpty := backup.DeepCopy() + backupEmpty.Status.Snapshot = &v2.SnapshotStatus{TablespaceVolumeSnapshotRefs: map[string]string{}} + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backupEmpty, restore). + WithStatusSubresource(backupEmpty). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backupEmpty, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok, "no tablespace volumes to restore") + }) + + t.Run("creates PVC with correct data source when not found", func(t *testing.T) { + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + + pvcName := instanceName + "-" + ts1Name + "-tablespace" + pvc := &corev1.PersistentVolumeClaim{} + require.NoError(t, cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc)) + + // Verify data source points to the tablespace VolumeSnapshot + require.NotNil(t, pvc.Spec.DataSource, "PVC should have DataSource") + assert.Equal(t, ts1SnapshotName, pvc.Spec.DataSource.Name) + assert.Equal(t, volumesnapshotv1.GroupName, ptr.Deref(pvc.Spec.DataSource.APIGroup, "")) + assert.Equal(t, pNaming.KindVolumeSnapshot, pvc.Spec.DataSource.Kind) + + // Verify spec from TablespaceVolumes[ts1].DataVolumeClaimSpec (2Gi, not data's 1Gi) + assert.Equal(t, []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, pvc.Spec.AccessModes) + assert.Equal(t, resource.MustParse("2Gi"), pvc.Spec.Resources.Requests[corev1.ResourceStorage]) + + // Verify restore annotation + assert.Equal(t, restoreName, pvc.GetAnnotations()[pNaming.AnnotationSnapshotRestore]) + }) + + t.Run("deletes PVC when restore annotation is not found", func(t *testing.T) { + pvcName := instanceName + "-" + ts1Name + "-tablespace" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{}, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("deletes PVC when annotation points to different restore", func(t *testing.T) { + pvcName := instanceName + "-" + ts1Name + "-tablespace" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: "other-restore", + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.False(t, ok) + + pvc := &corev1.PersistentVolumeClaim{} + err = cl.Get(ctx, client.ObjectKey{Namespace: ns, Name: pvcName}, pvc) + require.True(t, k8serrors.IsNotFound(err), "PVC should be deleted, got err: %v", err) + }) + + t.Run("returns true when all tablespace PVCs have correct restore annotation", func(t *testing.T) { + pvcName := instanceName + "-" + ts1Name + "-tablespace" + existingPVC := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: ns, + Annotations: map[string]string{ + pNaming.AnnotationSnapshotRestore: restoreName, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("2Gi"), + }, + }, + }, + } + + cl := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(cluster, backup, restore, existingPVC). + WithStatusSubresource(backup). + Build() + + r := newSnapshotRestorer(cl, logging.Discard(), cluster, backup, restore, noopPodExecutor) + ok, err := r.reconcileTablespaceVolumes(ctx, instance) + require.NoError(t, err) + assert.True(t, ok) + }) +} diff --git a/percona/controller/pgrestore/utils/pgbackrest.go b/percona/controller/pgrestore/utils/pgbackrest.go new file mode 100644 index 0000000000..9e32efe488 --- /dev/null +++ b/percona/controller/pgrestore/utils/pgbackrest.go @@ -0,0 +1,119 @@ +package utils + +import ( + "context" + + "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" + "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +type PGBackRestRestore struct { + client.Client + + pgCluster *v2.PerconaPGCluster + pgRestore *v2.PerconaPGRestore +} + +func NewPGBackRestRestore(c client.Client, pgCluster *v2.PerconaPGCluster, pgRestore *v2.PerconaPGRestore) *PGBackRestRestore { + return &PGBackRestRestore{ + Client: c, + pgCluster: pgCluster, + pgRestore: pgRestore, + } +} + +func (r *PGBackRestRestore) Start(ctx context.Context) error { + orig := r.pgCluster.DeepCopy() + + if val, ok := r.pgCluster.GetAnnotations()[naming.PGBackRestRestore]; ok && val == r.pgRestore.Name { + return nil // already started + } + + if r.pgCluster.Annotations == nil { + r.pgCluster.Annotations = make(map[string]string) + } + r.pgCluster.Annotations[naming.PGBackRestRestore] = r.pgRestore.Name + + postgresCluster := new(v1beta1.PostgresCluster) + if err := r.Get(ctx, client.ObjectKeyFromObject(r.pgCluster), postgresCluster); err != nil { + return errors.Wrap(err, "get PostgresCluster") + } + + origPostgres := postgresCluster.DeepCopy() + + if postgresCluster.Status.PGBackRest == nil { + postgresCluster.Status.PGBackRest = &v1beta1.PGBackRestStatus{} + } + + postgresCluster.Status.PGBackRest.Restore = &v1beta1.PGBackRestJobStatus{} + + if err := r.Status().Patch(ctx, postgresCluster, client.MergeFrom(origPostgres)); err != nil { + return errors.Wrap(err, "patch PostgresCluster status failed trying to initialize PGBackRest restore status") + } + + if r.pgCluster.Spec.Backups.PGBackRest.Restore == nil { + r.pgCluster.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ + PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, + } + } + + r.pgCluster.Spec.Backups.PGBackRest.Restore.Enabled = ptr.To(true) + r.pgCluster.Spec.Backups.PGBackRest.Restore.RepoName = ptr.Deref(r.pgRestore.Spec.RepoName, "") + r.pgCluster.Spec.Backups.PGBackRest.Restore.Options = r.pgRestore.Spec.Options + + if err := r.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch PostgresCluster status failed trying to start restore") + } + + return nil +} + +func (r *PGBackRestRestore) DisableRestore(ctx context.Context) error { + if r.pgRestore.Status.State == v2.RestoreSucceeded || r.pgRestore.Status.State == v2.RestoreFailed { + return nil + } + + orig := r.pgCluster.DeepCopy() + + if r.pgCluster.Spec.Backups.PGBackRest.Restore == nil { + r.pgCluster.Spec.Backups.PGBackRest.Restore = &v1beta1.PGBackRestRestore{ + PostgresClusterDataSource: &v1beta1.PostgresClusterDataSource{}, + } + } + + r.pgCluster.Spec.Backups.PGBackRest.Restore.Enabled = ptr.To(false) + delete(r.pgCluster.Annotations, naming.PGBackRestRestore) + + if err := r.Patch(ctx, r.pgCluster, client.MergeFrom(orig)); err != nil { + return errors.Wrap(err, "patch PGCluster") + } + + return nil +} + +func (r *PGBackRestRestore) ObserveStatus(ctx context.Context) (v2.PGRestoreState, *metav1.Time, error) { + cluster := &v2.PerconaPGCluster{} + if err := r.Get(ctx, client.ObjectKeyFromObject(r.pgCluster), cluster); err != nil { + return v2.RestoreStarting, nil, errors.Wrap(err, "get PerconaPGCluster") + } + + if cluster.Status.PGBackRest == nil || cluster.Status.PGBackRest.Restore == nil { + return v2.RestoreStarting, nil, nil + } + restoreStatus := cluster.Status.PGBackRest.Restore + + switch { + case restoreStatus.Finished && restoreStatus.Succeeded > 0: + return v2.RestoreSucceeded, restoreStatus.CompletionTime, nil + case restoreStatus.Finished && restoreStatus.Failed > 0: + return v2.RestoreFailed, nil, nil + default: + return v2.RestoreRunning, nil, nil + } +} diff --git a/percona/naming/annotations.go b/percona/naming/annotations.go index cf36f7b6bc..c6737e96a2 100644 --- a/percona/naming/annotations.go +++ b/percona/naming/annotations.go @@ -51,4 +51,16 @@ const ( // indicate the name of the main site. // This annotation is set on standby clusters based on pgbackrest only. This is needed to calculate the replication lag. AnnotationReplicationMainSite = PrefixPerconaPGV2 + "replication-main-site" + + // AnnotationInstanceSuspended must be set on the instance StatefulSet to mark + // the instance as suspended. + AnnotationInstanceSuspended = PrefixPerconaPGV2 + "instance-suspended" + + // AnnotationSnapshotRestore is the annotation added to the data PVCs of a cluster + // to indicate the name of the PerconaPGRestore that is restoring the PVC from a snapshot. + AnnotationSnapshotRestore = PrefixPerconaPGV2 + "snapshot-restore" + + // AnnotationPVCsPreparedAt is the annotation added to the PerconaPGRestore which + // prepares the PVCs for a snapshot restore. + AnnotationPVCsPreparedAt = PrefixPerconaPGV2 + "pvcs-prepared-at" ) diff --git a/percona/naming/finalizers.go b/percona/naming/finalizers.go index d2fa2858bf..95f8131ab4 100644 --- a/percona/naming/finalizers.go +++ b/percona/naming/finalizers.go @@ -18,9 +18,15 @@ const ( // PerconaPGBackup finalizers const ( FinalizerDeleteBackup = PrefixPerconaInternal + "delete-backup" //nolint:gosec + + // FinalizerSnapshotInProgress is set on PerconaPGBackup objects. + // It ensures that any changes made to the PGCluster are reverted upon + // snapshot completion (success or failure) or premature deletion of the PGBackup. + FinalizerSnapshotInProgress = PrefixPercona + "snapshot-in-progress" //nolint:gosec ) // PerconaPGBackup job finalizers const ( - FinalizerKeepJob = PrefixPerconaInternal + "keep-job" //nolint:gosec + FinalizerKeepJob = PrefixPerconaInternal + "keep-job" //nolint:gosec + FinalizerSnapshotRestore = PrefixPerconaInternal + "snapshot-restore" //nolint:gosec ) diff --git a/percona/naming/kinds.go b/percona/naming/kinds.go new file mode 100644 index 0000000000..07bf8db248 --- /dev/null +++ b/percona/naming/kinds.go @@ -0,0 +1,6 @@ +package naming + +const ( + KindVolumeSnapshot = "VolumeSnapshot" + KindPersistentVolumeClaim = "PersistentVolumeClaim" +) diff --git a/percona/postgres/common.go b/percona/postgres/common.go index 41026ade48..da6cba7371 100644 --- a/percona/postgres/common.go +++ b/percona/postgres/common.go @@ -5,10 +5,14 @@ import ( gover "github.com/hashicorp/go-version" "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/percona/percona-postgresql-operator/v2/internal/naming" + pNaming "github.com/percona/percona-postgresql-operator/v2/percona/naming" v2 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/pgv2.percona.com/v2" ) @@ -54,9 +58,88 @@ func GetPrimaryPod(ctx context.Context, cli client.Client, cr *v2.PerconaPGClust return &podList.Items[0], nil } +// GetReplicaPods lists the replica pods for a given cluster. +func GetReplicaPods(ctx context.Context, cli client.Client, cr *v2.PerconaPGCluster) ([]corev1.Pod, error) { + podList := &corev1.PodList{} + + err := cli.List(ctx, podList, &client.ListOptions{ + Namespace: cr.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + "app.kubernetes.io/instance": cr.GetName(), + "postgres-operator.crunchydata.com/role": naming.RolePatroniReplica, + }), + }) + if err != nil { + return nil, errors.Wrap(err, "failed to list pods") + } + + return podList.Items, nil +} + func determineVersion(cr *v2.PerconaPGCluster) string { if cr.CompareVersion("2.7.0") <= 0 { return cr.Status.PatroniVersion } return patroniVersion4 } + +// SuspendInstance suspends an instance by setting the AnnotationInstanceSuspended annotation on the StatefulSet. +// Returns true if the instance was suspended. +// Caller is responsible for waiting for the instance to be suspended. +func SuspendInstance(ctx context.Context, cli client.Client, instanceKey client.ObjectKey) (bool, error) { + sts := &appsv1.StatefulSet{} + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return false, errors.Wrap(err, "failed to get stateful set") + } + + if _, ok := sts.GetAnnotations()[pNaming.AnnotationInstanceSuspended]; ok { + return sts.Status.Replicas == 0 && sts.Status.ReadyReplicas == 0, nil + } + + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return errors.Wrap(err, "failed to get stateful set") + } + + orig := sts.DeepCopy() + annots := sts.GetAnnotations() + if annots == nil { + annots = make(map[string]string) + } + annots[pNaming.AnnotationInstanceSuspended] = "" + sts.SetAnnotations(annots) + return cli.Patch(ctx, sts, client.MergeFrom(orig)) + }); err != nil { + return false, errors.Wrap(err, "failed to update stateful set annotations") + } + return false, nil +} + +// UnsuspendInstance unsuspends an instance by removing the AnnotationInstanceSuspended annotation on the StatefulSet. +// Returns true if the instance was unsuspended. +// Caller is responsible for waiting for the instance to be unsuspended. +func UnsuspendInstance(ctx context.Context, cli client.Client, instanceKey client.ObjectKey) (bool, error) { + sts := &appsv1.StatefulSet{} + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return false, errors.Wrap(err, "failed to get stateful set") + } + + if _, ok := sts.GetAnnotations()[pNaming.AnnotationInstanceSuspended]; !ok { + return sts.Status.Replicas > 0 && sts.Status.ReadyReplicas > 0, nil + } + + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + if err := cli.Get(ctx, instanceKey, sts); err != nil { + return errors.Wrap(err, "failed to get stateful set") + } + + orig := sts.DeepCopy() + annots := sts.GetAnnotations() + delete(annots, pNaming.AnnotationInstanceSuspended) + sts.SetAnnotations(annots) + return cli.Patch(ctx, sts, client.MergeFrom(orig)) + }); err != nil { + return false, errors.Wrap(err, "failed to update stateful set annotations") + } + return false, nil +} diff --git a/percona/watcher/wal.go b/percona/watcher/wal.go index 6fa0b93434..cee366961d 100644 --- a/percona/watcher/wal.go +++ b/percona/watcher/wal.go @@ -9,6 +9,7 @@ import ( "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" @@ -136,7 +137,9 @@ func getLatestBackup(ctx context.Context, cli client.Client, cr *pgv2.PerconaPGC latest := &pgv2.PerconaPGBackup{} runningBackupExists := false for _, backup := range backupList.Items { - backup := backup + if ptr.Deref(backup.Spec.Method, pgv2.BackupMethodPGBackrest) == pgv2.BackupMethodVolumeSnapshot { + continue + } switch backup.Status.State { case pgv2.BackupSucceeded: @@ -213,7 +216,7 @@ func getBackupStartTimestamp(ctx context.Context, cli client.Client, cr *pgv2.Pe return time.Time{}, errors.Wrap(PrimaryPodNotFound, err.Error()) } - pgbackrestInfo, err := pgbackrest.GetInfo(ctx, primary, backup.Spec.RepoName) + pgbackrestInfo, err := pgbackrest.GetInfo(ctx, primary, ptr.Deref(backup.Spec.RepoName, "")) if err != nil { return time.Time{}, errors.Wrap(err, "get pgbackrest info") } diff --git a/percona/watcher/wal_test.go b/percona/watcher/wal_test.go index f6442391b0..ca95309529 100644 --- a/percona/watcher/wal_test.go +++ b/percona/watcher/wal_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/percona/percona-postgresql-operator/v2/percona/testutils" @@ -331,7 +332,7 @@ func TestGetLatestCommitTimestamp(t *testing.T) { }, Spec: pgv2.PerconaPGBackupSpec{ PGCluster: "test-cluster", - RepoName: "repo1", + RepoName: ptr.To("repo1"), }, }, cluster: &pgv2.PerconaPGCluster{ diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go index 15c4433251..ae4836e765 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types.go @@ -1,12 +1,16 @@ package v2 import ( + "context" "encoding/json" "fmt" "time" v "github.com/hashicorp/go-version" + "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" crunchyv1beta1 "github.com/percona/percona-postgresql-operator/v2/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -35,6 +39,7 @@ type PerconaPGBackup struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` + // +kubebuilder:validation:XValidation:rule="self.method == \"volumeSnapshot\" || has(self.repoName)",message="repoName is required when method is 'pgbackrest'" Spec PerconaPGBackupSpec `json:"spec"` Status PerconaPGBackupStatus `json:"status,omitempty"` } @@ -47,13 +52,27 @@ type PerconaPGBackupList struct { Items []PerconaPGBackup `json:"items"` } +type BackupMethod string + +const ( + BackupMethodPGBackrest BackupMethod = "pgbackrest" + BackupMethodVolumeSnapshot BackupMethod = "volumeSnapshot" +) + type PerconaPGBackupSpec struct { PGCluster string `json:"pgCluster"` + // +optional // The name of the pgBackRest repo to run the backup command against. - // +kubebuilder:validation:Required + // This is required when method is 'pgbackrest'. // +kubebuilder:validation:Pattern=^repo[1-4] - RepoName string `json:"repoName"` + RepoName *string `json:"repoName,omitempty"` + + // Method with which to perform the backup + // +kubebuilder:validation:Enum={pgbackrest,volumeSnapshot} + // +kubebuilder:default=pgbackrest + // +optional + Method *BackupMethod `json:"method,omitempty"` // Command line options to include when running the pgBackRest backup command. // https://pgbackrest.org/command.html#command-backup @@ -68,6 +87,7 @@ var PGClusterIndexerFunc client.IndexerFunc = func(obj client.Object) []string { if !ok { return nil } + return []string{backup.Spec.PGCluster} } @@ -94,6 +114,17 @@ type PerconaPGBackupStatus struct { BackupName string `json:"backupName,omitempty"` CRVersion string `json:"crVersion,omitempty"` LatestRestorableTime PITRestoreDateTime `json:"latestRestorableTime,omitempty"` + Snapshot *SnapshotStatus `json:"snapshot,omitempty"` +} + +type SnapshotStatus struct { + // Name of the VolumeSnapshot containing data volume contents. + DataVolumeSnapshotRef *string `json:"dataVolumeSnapshotRef,omitempty"` + // Name of the VolumeSnapshot containing WAL volume contents. + WALVolumeSnapshotRef *string `json:"walVolumeSnapshotRef,omitempty"` + // Names of the VolumeSnapshots containing tablespace volume contents. + // Key is the name of the tablespace, value is the name of the VolumeSnapshot. + TablespaceVolumeSnapshotRefs map[string]string `json:"tablespaceVolumeSnapshotRefs,omitempty"` } // +kubebuilder:validation:Type=string @@ -168,7 +199,13 @@ const ( ) func (b *PerconaPGBackup) Default() { - b.Spec.Options = append(b.Spec.Options, fmt.Sprintf(`--annotation="%s"="%s"`, PGBackrestAnnotationBackupName, b.Name)) + if b.Spec.Method == nil { + b.Spec.Method = ptr.To(BackupMethodPGBackrest) + } + + if *b.Spec.Method == BackupMethodPGBackrest { + b.Spec.Options = append(b.Spec.Options, fmt.Sprintf(`--annotation="%s"="%s"`, PGBackrestAnnotationBackupName, b.Name)) + } } func (b *PerconaPGBackup) CompareVersion(ver string) int { @@ -178,3 +215,16 @@ func (b *PerconaPGBackup) CompareVersion(ver string) int { backupVersion := v.Must(v.NewVersion(b.Status.CRVersion)) return backupVersion.Compare(v.Must(v.NewVersion(ver))) } + +func (pgBackup *PerconaPGBackup) UpdateStatus(ctx context.Context, cl client.Client, updateFunc func(bcp *PerconaPGBackup)) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + bcp := new(PerconaPGBackup) + if err := cl.Get(ctx, client.ObjectKeyFromObject(pgBackup), bcp); err != nil { + return errors.Wrap(err, "get PGBackup") + } + + updateFunc(bcp) + + return cl.Status().Update(ctx, bcp) + }) +} diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go index e8b27d955c..d45d70d4d6 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgbackup_types_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" ) func TestPITRestoreDateTime_MarshalJSON(t *testing.T) { @@ -29,25 +30,25 @@ func TestPITRestoreDateTime_MarshalJSON(t *testing.T) { }, "non-pointer zero date time": { data: PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Time{})), + Time: ptr.To(metav1.NewTime(time.Time{})), }, expected: `"0001-01-01 00:00:00.000000+0000"`, }, "pointer zero date time": { data: &PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Time{})), + Time: ptr.To(metav1.NewTime(time.Time{})), }, expected: `"0001-01-01 00:00:00.000000+0000"`, }, "non-pointer with date time": { data: PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), + Time: ptr.To(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), }, expected: `"2025-11-21 13:14:15.345600+0000"`, }, "pointer with date time": { data: &PITRestoreDateTime{ - Time: ptr(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), + Time: ptr.To(metav1.NewTime(time.Date(2025, time.November, 21, 13, 14, 15, 345600000, time.UTC))), }, expected: `"2025-11-21 13:14:15.345600+0000"`, }, @@ -63,7 +64,3 @@ func TestPITRestoreDateTime_MarshalJSON(t *testing.T) { }) } } - -func ptr[T any](v T) *T { - return &v -} diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go index 8bb805a58d..4384177529 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgcluster_types.go @@ -9,6 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -266,6 +267,12 @@ func (cr *PerconaPGCluster) Default() { if cr.CompareVersion("2.6.0") >= 0 && cr.Spec.AutoCreateUserSchema == nil { cr.Spec.AutoCreateUserSchema = &t } + + if cr.Spec.Backups.IsVolumeSnapshotsEnabled() && + cr.Spec.Backups.VolumeSnapshots.Mode == VolumeSnapshotModeOffline && + cr.Spec.Backups.VolumeSnapshots.OfflineConfig == nil { + cr.Spec.Backups.VolumeSnapshots.OfflineConfig = DefaultOfflineSnapshotConfig() + } } func (cr *PerconaPGCluster) PostgresImage() string { @@ -526,6 +533,75 @@ type Backups struct { // Enable tracking latest restorable time TrackLatestRestorableTime *bool `json:"trackLatestRestorableTime,omitempty"` + + // VolumeSnapshots configuration + // +optional + VolumeSnapshots *VolumeSnapshots `json:"volumeSnapshots,omitempty"` +} + +type VolumeSnapshotMode string + +const ( + // VolumeSnapshotModeOffline is the mode for taking offline VolumeSnapshots. + // With this mode, the operator will stop a replica and take a snapshot of the PVC. + VolumeSnapshotModeOffline VolumeSnapshotMode = "offline" +) + +type VolumeSnapshots struct { + // Mode of the VolumeSnapshot. + // +kubebuilder:validation:Enum={offline} + // +kubebuilder:default=offline + // +optional + Mode VolumeSnapshotMode `json:"mode,omitempty"` + + // Name of the VolumeSnapshotClass to use. + // +kubebuilder:validation:Required + ClassName string `json:"className"` + + // Defines the Cron schedule for a VolumeSnapshot. + // Follows the standard Cron schedule syntax: + // https://k8s.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax + // +optional + // +kubebuilder:validation:MinLength=6 + Schedule *string `json:"schedule,omitempty"` + + // Configuration for offline snapshot operations. + // Ignored if mode is not offline. + // +optional + OfflineConfig *OfflineSnapshotConfig `json:"offlineConfig,omitempty"` +} + +func DefaultOfflineSnapshotConfig() *OfflineSnapshotConfig { + return &OfflineSnapshotConfig{ + Checkpoint: &CheckpointConfig{ + Enabled: ptr.To(true), + TimeoutSeconds: ptr.To(int32(300)), + }, + } +} + +type OfflineSnapshotConfig struct { + // Checkpoint configuration for offline snapshot operations. + // +optional + Checkpoint *CheckpointConfig `json:"checkpoint,omitempty"` +} + +type CheckpointConfig struct { + // If set, a checkpoint is requested. + // +optional + // +kubebuilder:default=true + Enabled *bool `json:"enabled,omitempty"` + + // Timeout for the checkpoint operation. + // Ignored if checkpoint is not enabled. + // +optional + // +kubebuilder:validation:Minimum=30 + // +kubebuilder:default=300 + TimeoutSeconds *int32 `json:"timeoutSeconds,omitempty"` +} + +func (b Backups) IsVolumeSnapshotsEnabled() bool { + return b.VolumeSnapshots != nil && b.VolumeSnapshots.ClassName != "" } func (b Backups) IsEnabled() bool { diff --git a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go index a1e395bf1a..e242b1ee07 100644 --- a/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go +++ b/pkg/apis/pgv2.percona.com/v2/perconapgrestore_types.go @@ -1,7 +1,12 @@ package v2 import ( + "context" + + "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" ) func init() { @@ -24,6 +29,7 @@ type PerconaPGRestore struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` + // +kubebuilder:validation:XValidation:rule="((has(self.repoName) && self.repoName != \"\") || (has(self.volumeSnapshotBackupName) && self.volumeSnapshotBackupName != \"\"))",message="either repoName or volumeSnapshotBackupName must be set" Spec PerconaPGRestoreSpec `json:"spec"` Status PerconaPGRestoreStatus `json:"status,omitempty"` } @@ -39,14 +45,20 @@ type PerconaPGRestoreList struct { type PerconaPGRestoreSpec struct { // The name of the PerconaPGCluster to perform restore. // +kubebuilder:validation:Required + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="pgCluster is an immutable field" PGCluster string `json:"pgCluster"` // The name of the pgBackRest repo within the source PostgresCluster that contains the backups // that should be utilized to perform a pgBackRest restore when initializing the data source // for the new PostgresCluster. - // +kubebuilder:validation:Required // +kubebuilder:validation:Pattern=^repo[1-4] - RepoName string `json:"repoName"` + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="repoName is an immutable field" + RepoName *string `json:"repoName,omitempty"` + + // The name of the backup to perform in-place volume snapshot restores from. + // +optional + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="volumeSnapshotBackupName is an immutable field" + VolumeSnapshotBackupName string `json:"volumeSnapshotBackupName,omitempty"` // Command line options to include when running the pgBackRest restore command. // https://pgbackrest.org/command.html#command-restore @@ -69,3 +81,20 @@ type PerconaPGRestoreStatus struct { State PGRestoreState `json:"state,omitempty"` CompletedAt *metav1.Time `json:"completed,omitempty"` } + +func (r *PerconaPGRestore) IsCompleted() bool { + return r.Status.State == RestoreSucceeded || r.Status.State == RestoreFailed +} + +func (pgRestore *PerconaPGRestore) UpdateStatus(ctx context.Context, cl client.Client, updateFunc func(restore *PerconaPGRestore)) error { + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + restore := new(PerconaPGRestore) + if err := cl.Get(ctx, client.ObjectKeyFromObject(pgRestore), restore); err != nil { + return errors.Wrap(err, "get PGRestore") + } + + updateFunc(restore) + + return cl.Status().Update(ctx, restore) + }) +} diff --git a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go index 815d6481af..0962924286 100644 --- a/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go +++ b/pkg/apis/pgv2.percona.com/v2/zz_generated.deepcopy.go @@ -30,6 +30,11 @@ func (in *Backups) DeepCopyInto(out *Backups) { *out = new(bool) **out = **in } + if in.VolumeSnapshots != nil { + in, out := &in.VolumeSnapshots, &out.VolumeSnapshots + *out = new(VolumeSnapshots) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Backups. @@ -82,6 +87,31 @@ func (in *BuiltInExtensionsSpec) DeepCopy() *BuiltInExtensionsSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CheckpointConfig) DeepCopyInto(out *CheckpointConfig) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } + if in.TimeoutSeconds != nil { + in, out := &in.TimeoutSeconds, &out.TimeoutSeconds + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CheckpointConfig. +func (in *CheckpointConfig) DeepCopy() *CheckpointConfig { + if in == nil { + return nil + } + out := new(CheckpointConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CustomExtensionSpec) DeepCopyInto(out *CustomExtensionSpec) { *out = *in @@ -139,6 +169,26 @@ func (in *ExtensionsSpec) DeepCopy() *ExtensionsSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OfflineSnapshotConfig) DeepCopyInto(out *OfflineSnapshotConfig) { + *out = *in + if in.Checkpoint != nil { + in, out := &in.Checkpoint, &out.Checkpoint + *out = new(CheckpointConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OfflineSnapshotConfig. +func (in *OfflineSnapshotConfig) DeepCopy() *OfflineSnapshotConfig { + if in == nil { + return nil + } + out := new(OfflineSnapshotConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PGBackRestArchive) DeepCopyInto(out *PGBackRestArchive) { *out = *in @@ -632,6 +682,16 @@ func (in *PerconaPGBackupList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PerconaPGBackupSpec) DeepCopyInto(out *PerconaPGBackupSpec) { *out = *in + if in.RepoName != nil { + in, out := &in.RepoName, &out.RepoName + *out = new(string) + **out = **in + } + if in.Method != nil { + in, out := &in.Method, &out.Method + *out = new(BackupMethod) + **out = **in + } if in.Options != nil { in, out := &in.Options, &out.Options *out = make([]string, len(*in)) @@ -662,6 +722,11 @@ func (in *PerconaPGBackupStatus) DeepCopyInto(out *PerconaPGBackupStatus) { (*in).DeepCopyInto(*out) } in.LatestRestorableTime.DeepCopyInto(&out.LatestRestorableTime) + if in.Snapshot != nil { + in, out := &in.Snapshot, &out.Snapshot + *out = new(SnapshotStatus) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PerconaPGBackupStatus. @@ -947,6 +1012,11 @@ func (in *PerconaPGRestoreList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PerconaPGRestoreSpec) DeepCopyInto(out *PerconaPGRestoreSpec) { *out = *in + if in.RepoName != nil { + in, out := &in.RepoName, &out.RepoName + *out = new(string) + **out = **in + } if in.Options != nil { in, out := &in.Options, &out.Options *out = make([]string, len(*in)) @@ -1216,6 +1286,38 @@ func (in *ServiceExpose) DeepCopy() *ServiceExpose { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SnapshotStatus) DeepCopyInto(out *SnapshotStatus) { + *out = *in + if in.DataVolumeSnapshotRef != nil { + in, out := &in.DataVolumeSnapshotRef, &out.DataVolumeSnapshotRef + *out = new(string) + **out = **in + } + if in.WALVolumeSnapshotRef != nil { + in, out := &in.WALVolumeSnapshotRef, &out.WALVolumeSnapshotRef + *out = new(string) + **out = **in + } + if in.TablespaceVolumeSnapshotRefs != nil { + in, out := &in.TablespaceVolumeSnapshotRefs, &out.TablespaceVolumeSnapshotRefs + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SnapshotStatus. +func (in *SnapshotStatus) DeepCopy() *SnapshotStatus { + if in == nil { + return nil + } + out := new(SnapshotStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *StandbySpec) DeepCopyInto(out *StandbySpec) { *out = *in @@ -1259,3 +1361,28 @@ func (in *StandbyStatus) DeepCopy() *StandbyStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VolumeSnapshots) DeepCopyInto(out *VolumeSnapshots) { + *out = *in + if in.Schedule != nil { + in, out := &in.Schedule, &out.Schedule + *out = new(string) + **out = **in + } + if in.OfflineConfig != nil { + in, out := &in.OfflineConfig, &out.OfflineConfig + *out = new(OfflineSnapshotConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeSnapshots. +func (in *VolumeSnapshots) DeepCopy() *VolumeSnapshots { + if in == nil { + return nil + } + out := new(VolumeSnapshots) + in.DeepCopyInto(out) + return out +}