Skip to content

Commit 70b4395

Browse files
committed
fix(hang): Prevent connection hangs on serverless databases
This commit addresses an issue where the replication process would hang indefinitely when connecting to serverless PostgreSQL providers like Neon or SerenDB. The root cause was twofold: 1. **Connection Leak in Pre-flight Checks:** The pre-flight checks were holding database connections open for the entire duration of the checks. On serverless platforms with strict connection limits, this exhausted the connection pool. 2. **Missing Connection Timeout:** The command used to restore global objects did not have a connection timeout. When the connection pool was exhausted, would wait forever, causing the application to hang. This commit fixes the issue by: - Refactoring to use short-lived connections for each check, ensuring connections are released back to the pool immediately. - Adding a 30-second connection timeout () to the command in as a defensive measure.
1 parent 39d1394 commit 70b4395

3 files changed

Lines changed: 62 additions & 58 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/migration/restore.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ pub async fn restore_globals(target_url: &str, input_path: &str) -> Result<()> {
4848
cmd.env(env_var, value);
4949
}
5050

51+
// Mitigate hangs on serverless DBs with strict connection limits
52+
cmd.env("PGCONNECT_TIMEOUT", "30");
53+
5154
let output = cmd.output().await.context(
5255
"Failed to execute psql. Is PostgreSQL client installed?\n\
5356
Install with:\n\

src/preflight.rs

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -169,22 +169,55 @@ pub async fn run_preflight_checks(
169169
// 1. Check local environment (pg_dump, pg_restore, etc.)
170170
check_local_environment(&mut result);
171171

172-
// 2. Check network connectivity
173-
let clients = check_network_connectivity(&mut result, source_url, target_url).await;
172+
// 2. Check network connectivity and get server versions. Connections are short-lived.
173+
let source_client_url = check_network_connectivity(&mut result, source_url, "source").await?;
174+
let target_client_url = check_network_connectivity(&mut result, target_url, "target").await?;
174175

175176
// 3. Check version compatibility (only if we could connect and have local version)
176177
if result.local_pg_version.is_some() && result.source_pg_version.is_some() {
177178
check_version_compatibility(&mut result);
178179
}
179180

180-
// 4. Check source permissions
181-
if let Some(ref client) = clients.source {
182-
check_source_permissions(&mut result, client).await;
181+
// 4. Check source permissions using a new, short-lived connection
182+
if let Some(url) = source_client_url {
183+
match crate::postgres::connect_with_retry(&url).await {
184+
Ok(client) => {
185+
check_source_permissions(&mut result, &client).await;
186+
// client is dropped here, closing the connection
187+
}
188+
Err(e) => {
189+
result.source_permissions.push(CheckResult::fail(
190+
"connection",
191+
format!("Failed to re-establish connection to source for permission checks: {}", e),
192+
));
193+
result.issues.push(PreflightIssue {
194+
title: "Source connection for permissions failed".to_string(),
195+
explanation: e.to_string(),
196+
fixes: vec!["Ensure source database is accessible".to_string()],
197+
});
198+
}
199+
}
183200
}
184201

185-
// 5. Check target permissions
186-
if let Some(ref client) = clients.target {
187-
check_target_permissions(&mut result, client).await;
202+
// 5. Check target permissions using a new, short-lived connection
203+
if let Some(url) = target_client_url {
204+
match crate::postgres::connect_with_retry(&url).await {
205+
Ok(client) => {
206+
check_target_permissions(&mut result, &client).await;
207+
// client is dropped here, closing the connection
208+
}
209+
Err(e) => {
210+
result.target_permissions.push(CheckResult::fail(
211+
"connection",
212+
format!("Failed to re-establish connection to target for permission checks: {}", e),
213+
));
214+
result.issues.push(PreflightIssue {
215+
title: "Target connection for permissions failed".to_string(),
216+
explanation: e.to_string(),
217+
fixes: vec!["Ensure target database is accessible".to_string()],
218+
});
219+
}
220+
}
188221
}
189222

190223
Ok(result)
@@ -239,76 +272,44 @@ fn check_local_environment(result: &mut PreflightResult) {
239272
}
240273
}
241274

242-
#[derive(Default)]
243-
struct ConnectivityClients {
244-
source: Option<Client>,
245-
target: Option<Client>,
246-
}
247-
248275
async fn check_network_connectivity(
249276
result: &mut PreflightResult,
250-
source_url: &str,
251-
target_url: &str,
252-
) -> ConnectivityClients {
253-
let mut clients = ConnectivityClients::default();
254-
255-
// Check source
256-
match crate::postgres::connect_with_retry(source_url).await {
277+
db_url: &str,
278+
db_type: &str, // "source" or "target"
279+
) -> Result<Option<String>> {
280+
match crate::postgres::connect_with_retry(db_url).await {
257281
Ok(client) => {
258-
// Also get server version while connected
259-
if let Ok(row) = client.query_one("SHOW server_version", &[]).await {
260-
let version_str: String = row.get(0);
261-
if let Ok(version) = crate::utils::parse_pg_version_string(&version_str) {
262-
result.source_pg_version = Some(version);
282+
// Also get server version while connected (only for source)
283+
if db_type == "source" {
284+
if let Ok(row) = client.query_one("SHOW server_version", &[]).await {
285+
let version_str: String = row.get(0);
286+
if let Ok(version) = crate::utils::parse_pg_version_string(&version_str) {
287+
result.source_pg_version = Some(version);
288+
}
263289
}
264290
}
265291
result
266292
.network
267-
.push(CheckResult::pass("source", "Source database reachable"));
268-
clients.source = Some(client);
293+
.push(CheckResult::pass(db_type, format!("{} database reachable", db_type)));
294+
Ok(Some(db_url.to_string())) // Return the URL if connection was successful
269295
}
270296
Err(e) => {
271297
result.network.push(CheckResult::fail(
272-
"source",
273-
format!("Cannot connect to source: {}", e),
298+
db_type,
299+
format!("Cannot connect to {}: {}", db_type, e),
274300
));
275301
result.issues.push(PreflightIssue {
276-
title: "Source database unreachable".to_string(),
302+
title: format!("{} database unreachable", db_type),
277303
explanation: e.to_string(),
278304
fixes: vec![
279305
"Verify connection string is correct".to_string(),
280306
"Check network connectivity to database host".to_string(),
281307
"Ensure firewall allows PostgreSQL port (5432)".to_string(),
282308
],
283309
});
310+
Ok(None) // Return None if connection failed
284311
}
285312
}
286-
287-
// Check target
288-
match crate::postgres::connect_with_retry(target_url).await {
289-
Ok(client) => {
290-
result
291-
.network
292-
.push(CheckResult::pass("target", "Target database reachable"));
293-
clients.target = Some(client);
294-
}
295-
Err(e) => {
296-
result.network.push(CheckResult::fail(
297-
"target",
298-
format!("Cannot connect to target: {}", e),
299-
));
300-
result.issues.push(PreflightIssue {
301-
title: "Target database unreachable".to_string(),
302-
explanation: e.to_string(),
303-
fixes: vec![
304-
"Verify connection string is correct".to_string(),
305-
"Check network connectivity to database host".to_string(),
306-
],
307-
});
308-
}
309-
}
310-
311-
clients
312313
}
313314

314315
fn check_version_compatibility(result: &mut PreflightResult) {

0 commit comments

Comments
 (0)