From aaa480c62828149b47a3748749915a0132162f9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Saparelli?= Date: Sat, 6 Jun 2026 01:16:37 +1200 Subject: [PATCH] fix(operator): bound the schema-migration timeout cleanup to 60s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The timeout path added in v0.3.22 calls DROP SCHEMA … CASCADE on the new restore to clear stale persistent_schemas before switchover. But that DROP can hang indefinitely if a backend on the target has the schema's namespace locked — which is precisely the failure mode that tripped the budget in the first place (e.g. a CREATE TABLE spinning at 100% CPU and ignoring SIGTERM). The cleanup queued behind that lock, wedging the switchover the cleanup was meant to enable. Wrap the DROP cleanup in tokio::time::timeout (60s). If it doesn't complete in time (either errors or times out), log it and proceed with the switchover anyway — leftover schemas in the target are strictly better than the replica never coming up. The next restore cycle will re-attempt migration with a fresh target PVC and (typically) clear the leftovers as part of pre-migration prep. Restructures the cleanup into a separate drop_persistent_schemas_on_target helper so the timeout wraps a single async future. --- src/controllers/replica.rs | 96 ++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 25 deletions(-) diff --git a/src/controllers/replica.rs b/src/controllers/replica.rs index ca2653e..366bc2b 100644 --- a/src/controllers/replica.rs +++ b/src/controllers/replica.rs @@ -1099,6 +1099,46 @@ fn migration_exceeded_budget(replica: &PostgresPhysicalReplica, job: &Job) -> bo /// schema-less) database instead of being blocked indefinitely. The /// next restore cycle re-attempts migration if the schemas reappear on /// the source. +/// Connect to the new restore and `DROP SCHEMA … CASCADE` each given +/// schema. Separate function so the caller can wrap it in +/// `tokio::time::timeout`; the operation has no internal deadline and +/// can hang indefinitely if a backend on the target has a lock on the +/// schema's namespace. +async fn drop_persistent_schemas_on_target( + client: &Client, + ctx: &Arc, + replica: &PostgresPhysicalReplica, + namespace: &str, + new_restore_name: &str, + schemas: &[String], +) -> Result<()> { + let reader_secret_name = replica.creds_secret_name(); + let secrets: Api = Api::namespaced(client.clone(), namespace); + let reader_secret = secrets.get(&reader_secret_name).await?; + let reader_user = postgres::read_secret_field(&reader_secret, "username")?; + let reader_password = postgres::read_secret_field(&reader_secret, "password")?; + let target_dbname = postgres::discover_restore_database( + client, + namespace, + new_restore_name, + &reader_user, + &reader_password, + ctx.use_port_forward(), + ) + .await?; + let conn = postgres::connect_to_restore( + client, + namespace, + new_restore_name, + &target_dbname, + &reader_user, + &reader_password, + ctx.use_port_forward(), + ) + .await?; + postgres::drop_schemas_on(&conn.client, schemas).await +} + async fn timeout_schema_migration( client: &Client, ctx: &Arc, @@ -1126,35 +1166,41 @@ async fn timeout_schema_migration( warn!(job = %job_name, error = %e, "failed to delete timed-out migration Job"); } - // DROP SCHEMA … CASCADE the persistent_schemas on the new restore so - // the operator's "owned" schemas don't carry stale leftovers from the - // restored data. Best-effort per-schema via IF EXISTS. + // Opportunistically DROP SCHEMA … CASCADE the persistent_schemas on + // the new restore so the operator's "owned" schemas don't carry stale + // leftovers from the restored data. Bounded at 60 seconds: if a + // backend in the target restore is itself stuck (the exact failure + // mode that tripped the migration budget in the first place — e.g. + // CREATE TABLE spinning at 100% CPU and ignoring SIGTERM), our DROP + // queues on its lock and never completes. Better to leave leftover + // schemas than to wedge the switchover indefinitely on the cleanup. + const CLEANUP_TIMEOUT: Duration = Duration::from_secs(60); if !schemas.is_empty() { - let reader_secret_name = replica.creds_secret_name(); - let secrets: Api = Api::namespaced(client.clone(), namespace); - let reader_secret = secrets.get(&reader_secret_name).await?; - let reader_user = postgres::read_secret_field(&reader_secret, "username")?; - let reader_password = postgres::read_secret_field(&reader_secret, "password")?; - let target_dbname = postgres::discover_restore_database( + let cleanup = drop_persistent_schemas_on_target( client, + ctx, + replica, namespace, &new_restore_name, - &reader_user, - &reader_password, - ctx.use_port_forward(), - ) - .await?; - let conn = postgres::connect_to_restore( - client, - namespace, - &new_restore_name, - &target_dbname, - &reader_user, - &reader_password, - ctx.use_port_forward(), - ) - .await?; - postgres::drop_schemas_on(&conn.client, &schemas).await?; + &schemas, + ); + match tokio::time::timeout(CLEANUP_TIMEOUT, cleanup).await { + Ok(Ok(())) => {} + Ok(Err(e)) => { + warn!( + replica = %replica_name, + error = %e, + "DROP SCHEMA cleanup errored in timeout-skip path; proceeding to switchover with leftover schemas" + ); + } + Err(_) => { + warn!( + replica = %replica_name, + timeout = ?CLEANUP_TIMEOUT, + "DROP SCHEMA cleanup itself timed out (target postgres backend likely stuck); proceeding to switchover with leftover schemas" + ); + } + } } // Surface as a Warning event so this is visible on the replica CR.