Skip to content

Commit 32c7d40

Browse files
committed
chore(infra): add Node heap caps to api worker deployments
bg, worker-job, temporal, and private ran Node with no --max-old-space-size, so V8 used its default heap sizing (derived from the node's total RAM, not the pod limit) and could grow past the cgroup limit and OOM with no early-GC backstop. Cap each at 90% of its limit via the shared nodeOptions helper, matching api/ws. personalized-digest left uncapped intentionally so it can absorb load spikes.
1 parent 516c142 commit 32c7d40

1 file changed

Lines changed: 17 additions & 7 deletions

File tree

.infra/index.ts

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -232,18 +232,24 @@ const wsLimits: pulumi.Input<{
232232
memory: `${wsMemory}Mi`,
233233
};
234234

235-
const bgLimits: pulumi.Input<{ memory: string }> = { memory: '512Mi' };
235+
const bgMemory = 512;
236+
const bgLimits: pulumi.Input<{ memory: string }> = { memory: `${bgMemory}Mi` };
236237
const bgRequests: pulumi.Input<{ cpu: string; memory: string }> = {
237238
cpu: '50m',
238239
memory: '256Mi',
239240
};
240241

241-
const temporalLimits: pulumi.Input<{ memory: string }> = { memory: '560Mi' };
242+
const temporalMemory = 560;
243+
const temporalLimits: pulumi.Input<{ memory: string }> = {
244+
memory: `${temporalMemory}Mi`,
245+
};
242246
const temporalRequests: pulumi.Input<{ cpu: string; memory: string }> = {
243247
cpu: '10m',
244248
memory: '280Mi',
245249
};
246250

251+
const privateMemory = 700;
252+
247253
const initialDelaySeconds = 20;
248254
const readinessProbe: k8s.types.input.core.v1.Probe = {
249255
httpGet: { path: '/health', port: 'http' },
@@ -463,7 +469,7 @@ if (isAdhocEnv) {
463469
},
464470
{
465471
nameSuffix: 'bg',
466-
env: [...jwtEnv],
472+
env: [nodeOptions(bgMemory), ...jwtEnv],
467473
args: cliArgs('background'),
468474
minReplicas: 2,
469475
maxReplicas: 10,
@@ -485,7 +491,7 @@ if (isAdhocEnv) {
485491
},
486492
{
487493
nameSuffix: 'temporal',
488-
env: [...jwtEnv],
494+
env: [nodeOptions(temporalMemory), ...jwtEnv],
489495
args: cliArgs('temporal'),
490496
minReplicas: 1,
491497
maxReplicas: 3,
@@ -501,15 +507,19 @@ if (isAdhocEnv) {
501507
{
502508
nameSuffix: 'private',
503509
port: 3000,
504-
env: [{ name: 'ENABLE_PRIVATE_ROUTES', value: 'true' }, ...jwtEnv],
510+
env: [
511+
nodeOptions(privateMemory),
512+
{ name: 'ENABLE_PRIVATE_ROUTES', value: 'true' },
513+
...jwtEnv,
514+
],
505515
minReplicas: 1,
506516
maxReplicas: 4,
507517
requests: {
508518
memory: '350Mi',
509519
cpu: '10m',
510520
},
511521
limits: {
512-
memory: '700Mi',
522+
memory: `${privateMemory}Mi`,
513523
},
514524
readinessProbe,
515525
livenessProbe,
@@ -557,7 +567,7 @@ if (isAdhocEnv) {
557567

558568
appsArgs.push({
559569
nameSuffix: 'worker-job',
560-
env: [...jwtEnv],
570+
env: [nodeOptions(bgMemory), ...jwtEnv],
561571
args: cliArgs('worker-job'),
562572
minReplicas: 1,
563573
maxReplicas: 10,

0 commit comments

Comments
 (0)