Skip to content

Commit 71bfb91

Browse files
committed
feat(operator): gate Service traffic on operator-set ready label
The per-replica postgres Service used to route to any pod matching just `pgro.bes.au/restore=<active-name>`, which meant a new restore became reachable to external clients the moment its pod went Ready — including during the operator's Switching phase, before schema migration completes. External workloads that connect at that point can grab locks on the very schemas the operator is about to drop and rewrite, wedging the migration. Add a second label `pgro.bes.au/ready-for-traffic=true` to the Service selector, and have the operator only patch that label onto a restore's pod once it's safe for external traffic — after schema migration completes, immediately before the Service selector is pointed at the new restore. Until then, the new restore is unreachable via the Service even though its pod is healthy, so DDL prep runs without external interference. Also re-apply the label every Active reconcile pass so it's restored if a pod restart drops it, and so existing pre-upgrade Active pods get the label on the first reconcile after deploy.
1 parent 29f3933 commit 71bfb91

4 files changed

Lines changed: 100 additions & 1 deletion

File tree

src/controllers/replica.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,14 @@ pub async fn reconcile(replica: Arc<PostgresPhysicalReplica>, ctx: Arc<Context>)
190190
"performing blue-green switchover"
191191
);
192192

193+
// Mark the new restore's pod ready for traffic BEFORE pointing the
194+
// Service at it. The Service selector requires the
195+
// `ready-for-traffic=true` label (see [[READY_FOR_TRAFFIC_LABEL]]),
196+
// so until the operator sets the label, no external client can
197+
// reach the restore via the Service — operator-side prep work
198+
// (DROP SCHEMA, migration Job) runs without external interference.
199+
switching.mark_pod_ready_for_traffic(client).await?;
200+
193201
// Update Service selector to point to the new restore
194202
switching.update_service_selector(client, &name).await?;
195203

src/controllers/replica/resources.rs

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use k8s_openapi::{
66
api::{
77
batch::v1::{Job, JobSpec},
88
core::v1::{
9-
Container, EnvVar, LocalObjectReference, PodSpec, PodTemplateSpec,
9+
Container, EnvVar, LocalObjectReference, Pod, PodSpec, PodTemplateSpec,
1010
ResourceRequirements, Secret, Service, ServicePort, ServiceSpec,
1111
},
1212
},
@@ -415,13 +415,60 @@ impl PostgresPhysicalReplica {
415415
}
416416
}
417417

418+
/// Label the operator sets on a restore's postgres pod once schema
419+
/// migration (and anything else that must run pre-handover) has completed
420+
/// and the pod is safe to receive external traffic. The per-replica
421+
/// Service selector requires this label, so a restore in `Switching`
422+
/// can't be reached via the Service — operator-side work (DROP SCHEMA,
423+
/// `pg_dump | psql` migration Job, etc.) runs without external clients
424+
/// racing to grab locks on the schemas being touched.
425+
pub const READY_FOR_TRAFFIC_LABEL: &str = "pgro.bes.au/ready-for-traffic";
426+
418427
impl PostgresPhysicalRestore {
428+
/// Patch this restore's postgres pod to add the [`READY_FOR_TRAFFIC_LABEL`].
429+
/// Idempotent and resilient to the pod not existing yet (the restore's
430+
/// deployment may be mid-rollout); callers that need the label to be
431+
/// present should retry on the next reconcile pass.
432+
pub async fn mark_pod_ready_for_traffic(&self, client: &Client) -> Result<()> {
433+
let pods: Api<Pod> = Api::namespaced(client.clone(), &self.ns());
434+
let selector = format!("pgro.bes.au/restore={}", self.name_any());
435+
let list = pods.list(&ListParams::default().labels(&selector)).await?;
436+
let pod = list
437+
.items
438+
.into_iter()
439+
.find(|p| p.status.as_ref().and_then(|s| s.phase.as_deref()) == Some("Running"));
440+
let Some(pod) = pod else {
441+
warn!(
442+
restore = self.name_any(),
443+
"no running pod for restore yet; will retry next reconcile"
444+
);
445+
return Ok(());
446+
};
447+
let pod_name = pod.name_any();
448+
let patch = serde_json::json!({
449+
"metadata": {
450+
"labels": {
451+
READY_FOR_TRAFFIC_LABEL: "true",
452+
}
453+
}
454+
});
455+
pods.patch(&pod_name, &PatchParams::default(), &Patch::Merge(&patch))
456+
.await?;
457+
info!(
458+
restore = self.name_any(),
459+
pod = pod_name,
460+
"marked restore pod ready for traffic"
461+
);
462+
Ok(())
463+
}
464+
419465
pub async fn update_service_selector(&self, client: &Client, service_name: &str) -> Result<()> {
420466
let services: Api<Service> = Api::namespaced(client.clone(), &self.ns());
421467
let patch = serde_json::json!({
422468
"spec": {
423469
"selector": {
424470
"pgro.bes.au/restore": self.name_any(),
471+
READY_FOR_TRAFFIC_LABEL: "true",
425472
}
426473
}
427474
});

src/controllers/restore.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,13 @@ async fn reconcile_active(
617617
apply_restore_deployment(client, restore, &replica, name, namespace).await?;
618618
}
619619

620+
// Defensively ensure the ready-for-traffic label is on the pod. If the
621+
// pod restarted (OOM, eviction, node loss), the label is gone from the
622+
// new pod and the Service stops routing to it; re-applying every pass
623+
// closes that gap. Also handles the upgrade path where existing Active
624+
// pods predate the label.
625+
restore.mark_pod_ready_for_traffic(client).await?;
626+
620627
Ok(Action::requeue(Duration::from_secs(300)))
621628
}
622629

tests/switchover.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,13 @@ async fn second_restore_and_switchover() {
102102
Some(first_restore_name.as_str()),
103103
"service selector should point to first restore"
104104
);
105+
assert_eq!(
106+
svc_selector
107+
.get("pgro.bes.au/ready-for-traffic")
108+
.map(|s| s.as_str()),
109+
Some("true"),
110+
"service selector should gate on ready-for-traffic label"
111+
);
105112

106113
// Manually create a second PostgresPhysicalRestore to trigger switchover
107114
let second_restore_name = "switchover-replica-manual-second";
@@ -213,6 +220,36 @@ async fn second_restore_and_switchover() {
213220
Some(second_restore_name),
214221
"service selector should point to second restore after switchover"
215222
);
223+
assert_eq!(
224+
svc_selector
225+
.get("pgro.bes.au/ready-for-traffic")
226+
.map(|s| s.as_str()),
227+
Some("true"),
228+
"service selector should still gate on ready-for-traffic label after switchover"
229+
);
230+
231+
// The second restore's pod should be labeled ready-for-traffic.
232+
let pods: kube::Api<k8s_openapi::api::core::v1::Pod> =
233+
kube::Api::namespaced(client.clone(), &ns);
234+
let pod_list = pods
235+
.list(
236+
&kube::api::ListParams::default()
237+
.labels(&format!("pgro.bes.au/restore={second_restore_name}")),
238+
)
239+
.await
240+
.expect("failed to list second restore pods");
241+
let labeled = pod_list.items.iter().any(|p| {
242+
p.metadata
243+
.labels
244+
.as_ref()
245+
.and_then(|l| l.get("pgro.bes.au/ready-for-traffic"))
246+
.map(|v| v.as_str())
247+
== Some("true")
248+
});
249+
assert!(
250+
labeled,
251+
"second restore's pod should carry pgro.bes.au/ready-for-traffic=true after switchover"
252+
);
216253

217254
// Second restore should have activated_at set
218255
let second_restore = restores

0 commit comments

Comments
 (0)