Skip to content

Commit 7c05dfb

Browse files
fix(prisma): add retry for Aurora Serverless v2 connection errors (#121)
## Issue close #104 close #105 ## Problem The starter kit has three issues with Prisma + Aurora Serverless v2 (auto-pause enabled with `minCapacity: 0`): 1. **Credential leak**: `console.log(process.env.DATABASE_URL)` in `prisma.ts` outputs the full connection string including password to CloudWatch Logs. 2. **No runtime retry**: Aurora drops idle connections after `idle_session_timeout` (60s) and takes ~15s to resume from auto-pause ([docs](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html)). Without retry, queries fail with transient errors (P1017, ECONNRESET) and do not recover. 3. **No migration retry**: `migration-runner.ts` runs `prisma db push` without retry. During `cdk deploy`, Aurora may still be resuming, causing P1001 ("Can't reach database server") and failing the entire deployment. ## Solution - **Remove `console.log(DATABASE_URL)`** to fix the credential leak. - **Add a Prisma client extension** (`Prisma.defineExtension` with `$allModels.$allOperations`) that retries transient connection errors with exponential backoff. Retryable errors: P2024, P1001, P1017, idle-session timeout, ECONNRESET. Non-retryable errors (auth failures, schema errors) are thrown immediately. - **Add retry to `migration-runner.ts`** for `prisma db push` with exponential backoff (base 3s, max 5 attempts, ~100s worst case within Lambda 5min timeout). Only P1001 / connection refused are retried. - **Optimize connection parameters**: `connection_limit=1` (Lambda handles one request per instance), `connect_timeout=30` (accommodates auto-pause resume time). ## Changes - `webapp/src/lib/prisma.ts` — Remove `console.log`, remove verbose `log` option, add retry extension via `$extends` - `webapp/src/jobs/migration-runner.ts` — Extract `runPrismaDbPush` with retry loop, structured logging - `cdk/lib/constructs/database.ts` — Change connection options to `?connection_limit=1&connect_timeout=30` ## Verification - [ ] `console.log(process.env.DATABASE_URL)` is removed - [ ] After Aurora auto-pause resume, the first request recovers via retry - [ ] Non-retryable errors (e.g. auth failure) are thrown immediately without retry - [ ] `cdk deploy` succeeds even when Aurora is resuming from 0 ACU - [ ] `tsc --noEmit` passes - [ ] `prettier --check` passes --------- Co-authored-by: Kazuho Cryer-Shinozuka <malaysia.cryer@gmail.com>
1 parent 84be605 commit 7c05dfb

File tree

5 files changed

+110
-32
lines changed

5 files changed

+110
-32
lines changed

cdk/lib/constructs/database.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,10 @@ export class Database extends Construct implements ec2.IConnectable {
9696

9797
public getLambdaEnvironment(databaseName: string) {
9898
const conn = this.getConnectionInfo();
99-
// Aurora Serverless v2 cold start takes up to 15 seconds
100-
// https://www.prisma.io/docs/orm/prisma-client/setup-and-configuration/databases-connections/connection-pool
101-
const option = '?pool_timeout=20&connect_timeout=20';
99+
// connection_limit=1: Each Lambda instance handles one request at a time
100+
// connect_timeout=30: Aurora Serverless v2 auto-pause resume takes ~15s (longer after 24h+ pause)
101+
// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html
102+
const option = '?connection_limit=1&connect_timeout=30';
102103
return {
103104
DATABASE_HOST: conn.host,
104105
DATABASE_NAME: databaseName,

cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,7 @@ exports[`Snapshot test 2`] = `
723723
],
724724
},
725725
"DATABASE_NAME": "main",
726-
"DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20",
726+
"DATABASE_OPTION": "?connection_limit=1&connect_timeout=30",
727727
"DATABASE_PASSWORD": {
728728
"Fn::Join": [
729729
"",
@@ -772,7 +772,7 @@ exports[`Snapshot test 2`] = `
772772
"Endpoint.Port",
773773
],
774774
},
775-
"/main?pool_timeout=20&connect_timeout=20",
775+
"/main?connection_limit=1&connect_timeout=30",
776776
],
777777
],
778778
},
@@ -3575,7 +3575,7 @@ async function handler(event) {
35753575
],
35763576
},
35773577
"DATABASE_NAME": "main",
3578-
"DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20",
3578+
"DATABASE_OPTION": "?connection_limit=1&connect_timeout=30",
35793579
"DATABASE_PASSWORD": {
35803580
"Fn::Join": [
35813581
"",
@@ -3624,7 +3624,7 @@ async function handler(event) {
36243624
"Endpoint.Port",
36253625
],
36263626
},
3627-
"/main?pool_timeout=20&connect_timeout=20",
3627+
"/main?connection_limit=1&connect_timeout=30",
36283628
],
36293629
],
36303630
},
@@ -3952,7 +3952,7 @@ async function handler(event) {
39523952
],
39533953
},
39543954
"DATABASE_NAME": "main",
3955-
"DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20",
3955+
"DATABASE_OPTION": "?connection_limit=1&connect_timeout=30",
39563956
"DATABASE_PASSWORD": {
39573957
"Fn::Join": [
39583958
"",
@@ -4001,7 +4001,7 @@ async function handler(event) {
40014001
"Endpoint.Port",
40024002
],
40034003
},
4004-
"/main?pool_timeout=20&connect_timeout=20",
4004+
"/main?connection_limit=1&connect_timeout=30",
40054005
],
40064006
],
40074007
},

cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ exports[`Snapshot test 2`] = `
744744
],
745745
},
746746
"DATABASE_NAME": "main",
747-
"DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20",
747+
"DATABASE_OPTION": "?connection_limit=1&connect_timeout=30",
748748
"DATABASE_PASSWORD": {
749749
"Fn::Join": [
750750
"",
@@ -793,7 +793,7 @@ exports[`Snapshot test 2`] = `
793793
"Endpoint.Port",
794794
],
795795
},
796-
"/main?pool_timeout=20&connect_timeout=20",
796+
"/main?connection_limit=1&connect_timeout=30",
797797
],
798798
],
799799
},
@@ -3405,7 +3405,7 @@ async function handler(event) {
34053405
],
34063406
},
34073407
"DATABASE_NAME": "main",
3408-
"DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20",
3408+
"DATABASE_OPTION": "?connection_limit=1&connect_timeout=30",
34093409
"DATABASE_PASSWORD": {
34103410
"Fn::Join": [
34113411
"",
@@ -3454,7 +3454,7 @@ async function handler(event) {
34543454
"Endpoint.Port",
34553455
],
34563456
},
3457-
"/main?pool_timeout=20&connect_timeout=20",
3457+
"/main?connection_limit=1&connect_timeout=30",
34583458
],
34593459
],
34603460
},
@@ -3758,7 +3758,7 @@ async function handler(event) {
37583758
],
37593759
},
37603760
"DATABASE_NAME": "main",
3761-
"DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20",
3761+
"DATABASE_OPTION": "?connection_limit=1&connect_timeout=30",
37623762
"DATABASE_PASSWORD": {
37633763
"Fn::Join": [
37643764
"",
@@ -3807,7 +3807,7 @@ async function handler(event) {
38073807
"Endpoint.Port",
38083808
],
38093809
},
3810-
"/main?pool_timeout=20&connect_timeout=20",
3810+
"/main?connection_limit=1&connect_timeout=30",
38113811
],
38123812
],
38133813
},

webapp/src/jobs/migration-runner.ts

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,26 +27,45 @@ export const handler: Handler = async (event, _) => {
2727
// Currently we don't have any direct method to invoke prisma migration programmatically.
2828
// As a workaround, we spawn migration script as a child process and wait for its completion.
2929
// Please also refer to the following GitHub issue: https://github.com/prisma/prisma/issues/4703
30-
try {
31-
const exitCode = await new Promise((resolve, _) => {
30+
await runPrismaDbPush(options);
31+
};
32+
33+
// Aurora Serverless v2 may be resuming from auto-pause (0 ACU) during CDK deployment,
34+
// which takes approximately 15 seconds. Retry transient connection errors with exponential backoff.
35+
// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html
36+
async function runPrismaDbPush(options: string[], maxRetries = 5, baseDelay = 3000): Promise<void> {
37+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
38+
const { exitCode, stdout, stderr } = await new Promise<{
39+
exitCode: number;
40+
stdout: string;
41+
stderr: string;
42+
}>((resolve) => {
3243
execFile(
3344
path.resolve('./node_modules/prisma/build/index.js'),
3445
['db', 'push', '--skip-generate'].concat(options),
3546
(error, stdout, stderr) => {
36-
console.log(stdout);
37-
if (error != null) {
38-
console.log(`prisma db push exited with error ${error.message}`);
39-
resolve(error.code ?? 1);
40-
} else {
41-
resolve(0);
42-
}
47+
resolve({
48+
exitCode: error ? (typeof error.code === 'number' ? error.code : 1) : 0,
49+
stdout,
50+
stderr,
51+
});
4352
},
4453
);
4554
});
4655

47-
if (exitCode != 0) throw Error(`db push failed with exit code ${exitCode}`);
48-
} catch (e) {
49-
console.log(e);
50-
throw e;
56+
console.log(`prisma db push attempt ${attempt}/${maxRetries}`, { exitCode, stdout, stderr });
57+
58+
if (exitCode === 0) return;
59+
60+
const isRetryable =
61+
stderr.includes('P1001') || stderr.includes("Can't reach database") || stderr.includes('Connection refused');
62+
63+
if (!isRetryable || attempt === maxRetries) {
64+
throw new Error(`prisma db push failed after ${attempt} attempt(s): ${stderr}`);
65+
}
66+
67+
const delay = baseDelay * Math.pow(2, attempt - 1) + Math.random() * 1000;
68+
console.log(`Retrying prisma db push in ${Math.round(delay)}ms...`);
69+
await new Promise((r) => setTimeout(r, delay));
5170
}
52-
};
71+
}

webapp/src/lib/prisma.ts

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,70 @@
1-
import { PrismaClient } from '@prisma/client';
1+
import { Prisma, PrismaClient } from '@prisma/client';
22

33
// https://www.prisma.io/docs/guides/nextjs
44

55
const globalForPrisma = global as unknown as {
66
prisma: PrismaClient;
77
};
88

9-
console.log(process.env.DATABASE_URL);
10-
export const prisma = globalForPrisma.prisma || new PrismaClient({ log: ['query', 'info', 'warn', 'error'] });
9+
// Determine if an error is a transient connection issue that may resolve on retry.
10+
// Aurora Serverless v2 can drop connections due to idle_session_timeout (60s) or auto-pause,
11+
// and resume takes approximately 15 seconds.
12+
// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html
13+
function isRetryableError(error: unknown): boolean {
14+
if (!(error instanceof Error)) return false;
15+
const code = (error as { code?: string }).code;
16+
if (
17+
code === 'P2024' || // Connection pool timeout
18+
code === 'P1001' || // Can't reach database server
19+
code === 'P1017' // Server has closed the connection
20+
) {
21+
return true;
22+
}
23+
const msg = error.message;
24+
return (
25+
msg.includes('idle-session timeout') ||
26+
msg.includes('terminating connection') ||
27+
msg.includes('Connection terminated') ||
28+
msg.includes('Timed out fetching a new connection from the connection pool') ||
29+
msg.includes('ECONNRESET')
30+
);
31+
}
32+
33+
const basePrisma = new PrismaClient();
34+
35+
async function withRetry<T>(fn: () => Promise<T>, maxRetries = 3, baseDelay = 500): Promise<T> {
36+
let lastError: unknown;
37+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
38+
try {
39+
const result = await fn();
40+
if (attempt > 0) {
41+
console.warn(`Prisma query succeeded after ${attempt} retry(s)`);
42+
}
43+
return result;
44+
} catch (error) {
45+
lastError = error;
46+
if (attempt === maxRetries || !isRetryableError(error)) throw error;
47+
// Discard stale connections before retrying
48+
await basePrisma.$disconnect();
49+
const delay = baseDelay * Math.pow(2, attempt) + Math.random() * 100;
50+
console.warn(`Prisma retry attempt ${attempt + 1}/${maxRetries}, waiting ${Math.round(delay)}ms`);
51+
await new Promise((r) => setTimeout(r, delay));
52+
}
53+
}
54+
throw lastError;
55+
}
56+
57+
const retryExtension = Prisma.defineExtension({
58+
name: 'retry-on-connection-error',
59+
query: {
60+
$allModels: {
61+
async $allOperations({ args, query }) {
62+
return withRetry(() => query(args));
63+
},
64+
},
65+
},
66+
});
67+
68+
export const prisma = basePrisma.$extends(retryExtension) as unknown as PrismaClient;
1169

1270
if (process.env.NODE_ENV !== 'production') globalForPrisma.prisma = prisma;

0 commit comments

Comments
 (0)