Skip to content

Commit 69231dc

Browse files
committed
fix(prisma): add retry for Aurora Serverless v2 connection errors (#104, #105)
Why: Aurora Serverless v2 with auto-pause (0 ACU) drops connections on idle_session_timeout and takes ~15s to resume. Without retry, both runtime queries and CDK deployment migrations fail on transient errors. Also, DATABASE_URL (including password) was logged to CloudWatch. What: - Remove console.log(DATABASE_URL) that leaked credentials to CloudWatch - Add Prisma client extension with retry on transient connection errors (P2024, P1001, P1017, idle-session timeout, ECONNRESET) - Add exponential backoff retry to migration-runner for prisma db push - Optimize connection params: connection_limit=1, connect_timeout=30
1 parent 3bf183c commit 69231dc

File tree

3 files changed

+93
-20
lines changed

3 files changed

+93
-20
lines changed

cdk/lib/constructs/database.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,10 @@ export class Database extends Construct implements ec2.IConnectable {
9090

9191
public getLambdaEnvironment(databaseName: string) {
9292
const conn = this.getConnectionInfo();
93-
// Aurora Serverless v2 cold start takes up to 15 seconds
94-
// https://www.prisma.io/docs/orm/prisma-client/setup-and-configuration/databases-connections/connection-pool
95-
const option = '?pool_timeout=20&connect_timeout=20';
93+
// connection_limit=1: Each Lambda instance handles one request at a time
94+
// connect_timeout=30: Aurora Serverless v2 auto-pause resume takes ~15s (longer after 24h+ pause)
95+
// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html
96+
const option = '?connection_limit=1&connect_timeout=30';
9697
return {
9798
DATABASE_HOST: conn.host,
9899
DATABASE_NAME: databaseName,

webapp/src/jobs/migration-runner.ts

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,26 +27,45 @@ export const handler: Handler = async (event, _) => {
2727
// Currently we don't have any direct method to invoke prisma migration programmatically.
2828
// As a workaround, we spawn migration script as a child process and wait for its completion.
2929
// Please also refer to the following GitHub issue: https://github.com/prisma/prisma/issues/4703
30-
try {
31-
const exitCode = await new Promise((resolve, _) => {
30+
await runPrismaDbPush(options);
31+
};
32+
33+
// Aurora Serverless v2 may be resuming from auto-pause (0 ACU) during CDK deployment,
34+
// which takes approximately 15 seconds. Retry transient connection errors with exponential backoff.
35+
// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html
36+
async function runPrismaDbPush(options: string[], maxRetries = 5, baseDelay = 3000): Promise<void> {
37+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
38+
const { exitCode, stdout, stderr } = await new Promise<{
39+
exitCode: number;
40+
stdout: string;
41+
stderr: string;
42+
}>((resolve) => {
3243
execFile(
3344
path.resolve('./node_modules/prisma/build/index.js'),
3445
['db', 'push', '--skip-generate'].concat(options),
3546
(error, stdout, stderr) => {
36-
console.log(stdout);
37-
if (error != null) {
38-
console.log(`prisma db push exited with error ${error.message}`);
39-
resolve(error.code ?? 1);
40-
} else {
41-
resolve(0);
42-
}
47+
resolve({
48+
exitCode: error ? (typeof error.code === 'number' ? error.code : 1) : 0,
49+
stdout,
50+
stderr,
51+
});
4352
},
4453
);
4554
});
4655

47-
if (exitCode != 0) throw Error(`db push failed with exit code ${exitCode}`);
48-
} catch (e) {
49-
console.log(e);
50-
throw e;
56+
console.log(`prisma db push attempt ${attempt}/${maxRetries}`, { exitCode, stdout, stderr });
57+
58+
if (exitCode === 0) return;
59+
60+
const isRetryable =
61+
stderr.includes('P1001') || stderr.includes("Can't reach database") || stderr.includes('Connection refused');
62+
63+
if (!isRetryable || attempt === maxRetries) {
64+
throw new Error(`prisma db push failed after ${attempt} attempt(s): ${stderr}`);
65+
}
66+
67+
const delay = baseDelay * Math.pow(2, attempt - 1) + Math.random() * 1000;
68+
console.log(`Retrying prisma db push in ${Math.round(delay)}ms...`);
69+
await new Promise((r) => setTimeout(r, delay));
5170
}
52-
};
71+
}

webapp/src/lib/prisma.ts

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,65 @@
1-
import { PrismaClient } from '@prisma/client';
1+
import { Prisma, PrismaClient } from '@prisma/client';
22

33
// https://www.prisma.io/docs/guides/nextjs
44

55
const globalForPrisma = global as unknown as {
66
prisma: PrismaClient;
77
};
88

9-
console.log(process.env.DATABASE_URL);
10-
export const prisma = globalForPrisma.prisma || new PrismaClient({ log: ['query', 'info', 'warn', 'error'] });
9+
// Determine if an error is a transient connection issue that may resolve on retry.
10+
// Aurora Serverless v2 can drop connections due to idle_session_timeout (60s) or auto-pause,
11+
// and resume takes approximately 15 seconds.
12+
// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html
13+
function isRetryableError(error: unknown): boolean {
14+
if (!(error instanceof Error)) return false;
15+
const code = (error as { code?: string }).code;
16+
if (
17+
code === 'P2024' || // Connection pool timeout
18+
code === 'P1001' || // Can't reach database server
19+
code === 'P1017' // Server has closed the connection
20+
) {
21+
return true;
22+
}
23+
const msg = error.message;
24+
return (
25+
msg.includes('idle-session timeout') ||
26+
msg.includes('terminating connection') ||
27+
msg.includes('Connection terminated') ||
28+
msg.includes('ECONNRESET')
29+
);
30+
}
31+
32+
const basePrisma = new PrismaClient();
33+
34+
async function withRetry<T>(fn: () => Promise<T>, maxRetries = 3, baseDelay = 500): Promise<T> {
35+
let lastError: unknown;
36+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
37+
try {
38+
return await fn();
39+
} catch (error) {
40+
lastError = error;
41+
if (attempt === maxRetries || !isRetryableError(error)) throw error;
42+
// Discard stale connections before retrying
43+
await basePrisma.$disconnect();
44+
const delay = baseDelay * Math.pow(2, attempt) + Math.random() * 100;
45+
console.warn(`Prisma retry attempt ${attempt + 1}/${maxRetries}, waiting ${Math.round(delay)}ms`);
46+
await new Promise((r) => setTimeout(r, delay));
47+
}
48+
}
49+
throw lastError;
50+
}
51+
52+
const retryExtension = Prisma.defineExtension({
53+
name: 'retry-on-connection-error',
54+
query: {
55+
$allModels: {
56+
async $allOperations({ args, query }) {
57+
return withRetry(() => query(args));
58+
},
59+
},
60+
},
61+
});
62+
63+
export const prisma = basePrisma.$extends(retryExtension) as unknown as PrismaClient;
1164

1265
if (process.env.NODE_ENV !== 'production') globalForPrisma.prisma = prisma;

0 commit comments

Comments
 (0)