Skip to content

Commit dafd6d8

Browse files
committed
PITR: fix clones and restore
1 parent 5ed89b4 commit dafd6d8

2 files changed

Lines changed: 65 additions & 3 deletions

File tree

src/api/organization/project/branch/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from .....deployment.kubernetes.volume_clone import (
5151
clone_branch_database_volume,
5252
restore_branch_database_volume_from_snapshot,
53+
restore_branch_wal_volume_from_snapshot,
5354
)
5455
from .....deployment.settings import get_settings as get_deployment_settings
5556
from .....exceptions import VelaDeploymentError, VelaError, VelaKubernetesError, VelaSimplyblockAPIError
@@ -359,6 +360,7 @@ class RestoreSnapshotContext(TypedDict):
359360
namespace: str
360361
name: str
361362
content_name: str | None
363+
wal_snapshot_name: str | None
362364

363365

364366
def snapshot_pgbouncer_config(config: PgbouncerConfig | None) -> PgbouncerConfigSnapshot:
@@ -765,6 +767,7 @@ async def _clone_branch_environment_task(
765767
pvc_timeout_seconds=_PVC_CLONE_TIMEOUT_SECONDS,
766768
pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS,
767769
database_size=parameters.database_size,
770+
pitr_enabled=pitr_enabled,
768771
)
769772
except VelaError:
770773
await _persist_branch_status(branch_id, BranchServiceStatus.ERROR)
@@ -821,6 +824,7 @@ async def _restore_branch_environment_task(
821824
snapshot_namespace: str,
822825
snapshot_name: str,
823826
snapshot_content_name: str | None,
827+
wal_snapshot_name: str | None,
824828
restore_database_size: int,
825829
pgbouncer_config: PgbouncerConfigSnapshot,
826830
pitr_enabled: bool,
@@ -844,6 +848,19 @@ async def _restore_branch_environment_task(
844848
pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS,
845849
database_size=restore_database_size,
846850
)
851+
if wal_snapshot_name is not None:
852+
await restore_branch_wal_volume_from_snapshot(
853+
source_branch_id=source_branch_id,
854+
target_branch_id=branch_id,
855+
snapshot_namespace=snapshot_namespace,
856+
snapshot_name=wal_snapshot_name,
857+
snapshot_class=_VOLUME_SNAPSHOT_CLASS,
858+
storage_class_name=storage_class_name,
859+
snapshot_timeout_seconds=_SNAPSHOT_TIMEOUT_SECONDS,
860+
snapshot_poll_interval_seconds=_SNAPSHOT_POLL_INTERVAL_SECONDS,
861+
pvc_timeout_seconds=_PVC_TIMEOUT_SECONDS,
862+
pvc_poll_interval_seconds=_PVC_POLL_INTERVAL_SECONDS,
863+
)
847864
except VelaError:
848865
await _persist_branch_status(branch_id, BranchServiceStatus.ERROR)
849866
await _cleanup_failed_branch_deployment(branch_id)
@@ -1320,6 +1337,7 @@ def _schedule_branch_environment_tasks(
13201337
snapshot_namespace=restore_snapshot["namespace"],
13211338
snapshot_name=restore_snapshot["name"],
13221339
snapshot_content_name=restore_snapshot["content_name"],
1340+
wal_snapshot_name=restore_snapshot["wal_snapshot_name"],
13231341
restore_database_size=restore_database_size,
13241342
pgbouncer_config=pgbouncer_config,
13251343
pitr_enabled=branch.pitr_enabled,
@@ -1386,6 +1404,7 @@ async def create( # noqa: C901
13861404
namespace=cast("str", backup_entry.snapshot_namespace),
13871405
name=cast("str", backup_entry.snapshot_name),
13881406
content_name=backup_entry.snapshot_content_name,
1407+
wal_snapshot_name=backup_entry.wal_snapshot_name,
13891408
)
13901409
source_id: Identifier | None = getattr(source, "id", None)
13911410
clone_parameters: DeploymentParameters | None = None

src/deployment/kubernetes/volume_clone.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@
55
from datetime import UTC, datetime
66
from typing import Any
77

8+
from kubernetes.utils import parse_quantity
9+
810
from ..._util import Identifier
911
from ...exceptions import VelaKubernetesError
1012
from .. import (
1113
_POD_SECURITY_LABELS,
1214
AUTOSCALER_PVC_SUFFIX,
1315
AUTOSCALER_WAL_PVC_SUFFIX,
16+
PITR_WAL_PVC_SIZE,
1417
get_autoscaler_vm_identity,
1518
kube_service,
1619
)
@@ -36,6 +39,8 @@
3639

3740
logger = logging.getLogger(__name__)
3841

42+
_PITR_WAL_PVC_SIZE_BYTES: int = int(parse_quantity(PITR_WAL_PVC_SIZE))
43+
3944

4045
@dataclass(frozen=True)
4146
class CloneTimeouts:
@@ -319,6 +324,7 @@ class _SnapshotRestoreOperation:
319324
storage_class_name: str
320325
target_database_size: int
321326
timeouts: CloneTimeouts
327+
pvc_suffix: str = AUTOSCALER_PVC_SUFFIX
322328
ids: CloneIdentifiers = field(init=False)
323329
created_target_snapshot: bool = field(default=False, init=False)
324330
created_content: bool = field(default=False, init=False)
@@ -327,8 +333,8 @@ def __post_init__(self) -> None:
327333
source_ns = self.snapshot_namespace
328334
_, source_vm_name = get_autoscaler_vm_identity(self.source_branch_id)
329335
target_ns, target_vm_name = get_autoscaler_vm_identity(self.target_branch_id)
330-
pvc_name = f"{source_vm_name}{AUTOSCALER_PVC_SUFFIX}"
331-
target_pvc_name = f"{target_vm_name}{AUTOSCALER_PVC_SUFFIX}"
336+
pvc_name = f"{source_vm_name}{self.pvc_suffix}"
337+
target_pvc_name = f"{target_vm_name}{self.pvc_suffix}"
332338
if target_pvc_name != pvc_name:
333339
raise VelaKubernetesError(
334340
f"Autoscaler PVC name mismatch between source ({pvc_name}) and target ({target_pvc_name})"
@@ -520,7 +526,7 @@ async def clone_branch_database_volume(
520526
target_branch_id=target_branch_id,
521527
snapshot_class=snapshot_class,
522528
storage_class_name=storage_class_name,
523-
target_database_size=database_size,
529+
target_database_size=_PITR_WAL_PVC_SIZE_BYTES,
524530
timeouts=timeouts,
525531
volume_label="wal",
526532
pvc_suffix=AUTOSCALER_WAL_PVC_SUFFIX,
@@ -563,3 +569,40 @@ async def restore_branch_database_volume_from_snapshot(
563569
),
564570
)
565571
await operation.run()
572+
573+
574+
async def restore_branch_wal_volume_from_snapshot(
575+
*,
576+
source_branch_id: Identifier,
577+
target_branch_id: Identifier,
578+
snapshot_namespace: str,
579+
snapshot_name: str,
580+
snapshot_class: str,
581+
storage_class_name: str,
582+
snapshot_timeout_seconds: float,
583+
snapshot_poll_interval_seconds: float,
584+
pvc_timeout_seconds: float,
585+
pvc_poll_interval_seconds: float,
586+
) -> None:
587+
"""
588+
Restore the WAL volume for a PITR-enabled branch from an existing VolumeSnapshot.
589+
The WAL PVC is always restored to PITR_WAL_PVC_SIZE (100Gi).
590+
"""
591+
operation = _SnapshotRestoreOperation(
592+
source_branch_id=source_branch_id,
593+
target_branch_id=target_branch_id,
594+
snapshot_namespace=snapshot_namespace,
595+
snapshot_name=snapshot_name,
596+
snapshot_content_name=None,
597+
snapshot_class=snapshot_class,
598+
storage_class_name=storage_class_name,
599+
target_database_size=_PITR_WAL_PVC_SIZE_BYTES,
600+
pvc_suffix=AUTOSCALER_WAL_PVC_SUFFIX,
601+
timeouts=CloneTimeouts(
602+
snapshot_ready=snapshot_timeout_seconds,
603+
snapshot_poll=snapshot_poll_interval_seconds,
604+
pvc_ready=pvc_timeout_seconds,
605+
pvc_poll=pvc_poll_interval_seconds,
606+
),
607+
)
608+
await operation.run()

0 commit comments

Comments
 (0)