@@ -14,7 +14,9 @@ import (
1414
1515 v1 "k8s.io/api/core/v1"
1616 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
17+ "k8s.io/apimachinery/pkg/api/errors"
1718 "k8s.io/apimachinery/pkg/types"
19+ "k8s.io/apimachinery/pkg/util/wait"
1820 "k8s.io/client-go/tools/cache"
1921
2022 acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1"
@@ -539,6 +541,13 @@ func (c *Controller) postgresqlUpdate(prev, cur interface{}) {
539541 pgOld := c .postgresqlCheck (prev )
540542 pgNew := c .postgresqlCheck (cur )
541543 if pgOld != nil && pgNew != nil {
544+
545+ if pgNew .Annotations ["postgres-operator.zalando.org/action" ] == "restore-in-place" {
546+ c .logger .Debugf ("restore-in-place: postgresqlUpdate called for cluster %q" , pgNew .Name )
547+ c .handlerRestoreInPlace (pgOld , pgNew )
548+ return
549+ }
550+
542551 // Avoid the inifinite recursion for status updates
543552 if reflect .DeepEqual (pgOld .Spec , pgNew .Spec ) {
544553 if reflect .DeepEqual (pgNew .Annotations , pgOld .Annotations ) {
@@ -568,6 +577,146 @@ func (c *Controller) postgresqlCheck(obj interface{}) *acidv1.Postgresql {
568577 return pg
569578}
570579
580+ // validateRestoreInPlace checks if the restore parameters are valid
581+ func (c * Controller ) validateRestoreInPlace (pgOld , pgNew * acidv1.Postgresql ) error {
582+ c .logger .Debugf ("restore-in-place: validating restore parameters for cluster %q" , pgNew .Name )
583+
584+ if pgNew .Spec .Clone == nil {
585+ return fmt .Errorf ("'clone' section is missing in the manifest" )
586+ }
587+
588+ // Use ClusterName from CloneDescription
589+ if pgNew .Spec .Clone .ClusterName != pgOld .Name {
590+ return fmt .Errorf ("clone cluster name %q does not match the current cluster name %q" , pgNew .Spec .Clone .ClusterName , pgOld .Name )
591+ }
592+
593+ // Use EndTimestamp from CloneDescription
594+ cloneTimestamp , err := time .Parse (time .RFC3339 , pgNew .Spec .Clone .EndTimestamp )
595+ if err != nil {
596+ return fmt .Errorf ("could not parse clone timestamp %q: %v" , pgNew .Spec .Clone .EndTimestamp , err )
597+ }
598+
599+ if cloneTimestamp .After (time .Now ()) {
600+ return fmt .Errorf ("clone timestamp %q is in the future" , pgNew .Spec .Clone .EndTimestamp )
601+ }
602+
603+ c .logger .Debugf ("restore-in-place: validation successful" )
604+ return nil
605+ }
606+
607+ // waitForOldResourcesTermination waits until the postgresql CR and its StatefulSet are terminated
608+ func (c * Controller ) waitForOldResourcesTermination (pgOld * acidv1.Postgresql , statefulSetName string ) error {
609+ c .logger .Debugf ("restore-in-place: Waiting for old CR %q and StatefulSet %q to be fully terminated" , pgOld .Name , statefulSetName )
610+
611+ err := wait .PollUntilContextTimeout (context .TODO (), 2 * time .Second , 5 * time .Minute , true , func (ctx context.Context ) (bool , error ) {
612+ // Check for CR
613+ _ , crErr := c .KubeClient .AcidV1ClientSet .AcidV1 ().Postgresqls (pgOld .Namespace ).Get (ctx , pgOld .Name , metav1.GetOptions {})
614+ crGone := errors .IsNotFound (crErr )
615+ if crErr != nil && ! crGone {
616+ c .logger .Errorf ("restore-in-place: Error while waiting for CR deletion: %v" , crErr )
617+ return false , crErr // A real error occurred
618+ }
619+
620+ // Check for StatefulSet
621+ _ , stsErr := c .KubeClient .StatefulSets (pgOld .Namespace ).Get (ctx , statefulSetName , metav1.GetOptions {})
622+ stsGone := errors .IsNotFound (stsErr )
623+ if stsErr != nil && ! stsGone {
624+ c .logger .Errorf ("restore-in-place: Error while waiting for StatefulSet deletion: %v" , stsErr )
625+ return false , stsErr // A real error occurred
626+ }
627+
628+ if crGone && stsGone {
629+ c .logger .Debugf ("restore-in-place: Both old CR and StatefulSet are fully terminated." )
630+ return true , nil
631+ }
632+
633+ if ! crGone {
634+ c .logger .Infof ("restore-in-place: still waiting for postgresql CR %q to be deleted" , pgOld .Name )
635+ }
636+ if ! stsGone {
637+ c .logger .Infof ("restore-in-place: still waiting for StatefulSet %q to be deleted" , statefulSetName )
638+ }
639+
640+ return false , nil // Not done yet, continue polling.
641+ })
642+
643+ if err != nil {
644+ return fmt .Errorf ("error while waiting for old resources to be deleted: %v" , err )
645+ }
646+
647+ c .logger .Debugf ("restore-in-place: Finished waiting for old resource deletion." )
648+ return nil
649+ }
650+
651+ // handlerRestoreInPlace is to handle the resotre in place, it does few operatons
652+ // 1. Verifies the parameters required for restoring in place
653+ // 2. Removes the old CR if it exists, wait for it, if not present check the err that it is a k8sNotfound error and continue
654+ // 3. Wait for the successful removal of statefulsets, if not present check the err that it is a k8sNotfound error and continue
655+ // 4. Create a new CR with the latest details, while keeping few metadata about restore
656+ func (c * Controller ) handlerRestoreInPlace (pgOld , pgNew * acidv1.Postgresql ) {
657+ c .logger .Infof ("restore-in-place: starting restore-in-place for cluster %q" , pgNew .Name )
658+
659+ if err := c .validateRestoreInPlace (pgOld , pgNew ); err != nil {
660+ c .logger .Errorf ("restore-in-place: validation failed for cluster %q: %v" , pgNew .Name , err )
661+ return
662+ }
663+
664+ newPgSpec := pgNew .DeepCopy ()
665+ delete (newPgSpec .Annotations , "postgres-operator.zalando.org/action" )
666+ newPgSpec .ResourceVersion = ""
667+ newPgSpec .UID = ""
668+ c .logger .Debugf ("restore-in-place: newPgSpec after removing annotation: %+v" , newPgSpec )
669+
670+ statefulSetName := pgOld .Name // Capture StatefulSet name, it's the same as the cluster name
671+
672+ // Initiate CR deletion first, as requested
673+ c .logger .Debugf ("restore-in-place: Attempting direct API deletion of postgresql CR %q" , pgOld .Name )
674+ err := c .KubeClient .AcidV1ClientSet .AcidV1 ().Postgresqls (pgOld .Namespace ).Delete (context .TODO (), pgOld .Name , metav1.DeleteOptions {})
675+ if err != nil && ! errors .IsNotFound (err ) {
676+ c .logger .Errorf ("restore-in-place: could not delete postgresql CR via API: %v" , err )
677+ return // Stop if there's a critical error deleting the CR
678+ }
679+ c .logger .Debugf ("restore-in-place: Direct API deletion of postgresql CR for %q initiated (or CR was already not found)." , pgOld .Name )
680+
681+ // Then, initiate cluster sub-resource deletion if the cluster object is in memory
682+ clusterName := util .NameFromMeta (pgOld .ObjectMeta )
683+ c .clustersMu .RLock ()
684+ cl , clusterFound := c .clusters [clusterName ]
685+ c .clustersMu .RUnlock ()
686+
687+ if clusterFound {
688+ c .logger .Debugf ("restore-in-place: Cluster object found in memory. Calling cluster.Delete() for %q" , clusterName )
689+ if cl .Annotations == nil {
690+ cl .Annotations = make (map [string ]string )
691+ }
692+ cl .Annotations ["postgres-operator.zalando.org/action" ] = "restore-in-place" // User requested to keep this
693+ if err := cl .Delete (); err != nil {
694+ // Log error but continue to ensure we wait for termination
695+ c .logger .Errorf ("restore-in-place: error during cluster.Delete() for %q: %v. Proceeding to wait for termination." , clusterName , err )
696+ }
697+ c .logger .Debugf ("restore-in-place: cluster.Delete() returned for %q" , clusterName )
698+ } else {
699+ c .logger .Warningf ("restore-in-place: cluster %q not found in controller's map. Relying on CR deletion to trigger cleanup." , clusterName )
700+ }
701+
702+ if err := c .waitForOldResourcesTermination (pgOld , statefulSetName ); err != nil {
703+ c .logger .Errorf ("restore-in-place: %v" , err )
704+ return
705+ }
706+
707+ // Create a new CR with the latest details
708+ c .logger .Debugf ("restore-in-place: Creating new postgresql CR %q" , newPgSpec .Name )
709+ _ , err = c .KubeClient .AcidV1ClientSet .AcidV1 ().Postgresqls (newPgSpec .Namespace ).Create (context .TODO (), newPgSpec , metav1.CreateOptions {})
710+ if err != nil {
711+ c .logger .Errorf ("restore-in-place: could not create postgresql CR for restore-in-place: %v" , err )
712+ // If the new CR cannot be created, the user needs to intervene.
713+ return
714+ }
715+ c .logger .Debugf ("restore-in-place: New postgresql CR %q created" , newPgSpec .Name )
716+
717+ c .logger .Infof ("restore-in-place: for cluster %q triggered successfully" , pgNew .Name )
718+ }
719+
571720/*
572721Ensures the pod service account and role bindings exists in a namespace
573722before a PG cluster is created there so that a user does not have to deploy
0 commit comments