From 09be02137a9ea3496d395b9d69f2edcba0b4a82b Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Wed, 3 Jun 2026 16:24:46 -0400 Subject: [PATCH 1/2] Measure retained heap in lint memory test to remove flakiness The "memory usage during lint operations" test asserted a hard ceiling on the process heapUsed delta across ten concurrent /_lint requests without forcing a collection first, so the reading was dominated by transient garbage that GC had not yet reclaimed rather than by retained memory. The delta therefore tracked GC timing, not leaks, and intermittently tipped over the threshold. The test now warms up once (so one-time eslint/prettier initialization is not miscounted), forces a full GC before both the baseline and final reading, and asserts on the resulting retained-growth delta. A `forceGc` helper enables GC at runtime via the V8 flag API since the runner does not pass --expose-gc. Retained growth measures ~3MB locally; the bound is set to 20MB as a leak guard with ample headroom. A diagnostic line logs the retained-growth number and whether GC ran, so any future failure can be told apart from measurement noise. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../tests/helpers/prettier-test-utils.ts | 31 ++++++++++ .../tests/realm-endpoints/lint-test.ts | 57 +++++++++++++------ 2 files changed, 72 insertions(+), 16 deletions(-) diff --git a/packages/realm-server/tests/helpers/prettier-test-utils.ts b/packages/realm-server/tests/helpers/prettier-test-utils.ts index 0a626ec09b0..3bbd9dd01bc 100644 --- a/packages/realm-server/tests/helpers/prettier-test-utils.ts +++ b/packages/realm-server/tests/helpers/prettier-test-utils.ts @@ -1,5 +1,36 @@ // Test utilities for prettier formatting tests import { performance } from 'perf_hooks'; +import * as v8 from 'v8'; +import * as vm from 'vm'; + +/** + * Forces a full garbage collection so that a subsequent + * `process.memoryUsage().heapUsed` reading reflects retained memory rather + * than uncollected transient garbage. The test runner does not start Node + * with `--expose-gc`, so `global.gc` is normally absent; enable it at runtime + * through the V8 flag API, then turn it back off so the rest of the process is + * unaffected. Returns true if a collection was actually performed. + * + * Two passes: the first promotes/collects the young generation, the second + * collects what the first pass made unreachable, so the reading settles. + */ +export function forceGc(): boolean { + let gc = (globalThis as { gc?: () => void }).gc; + if (typeof gc !== 'function') { + try { + v8.setFlagsFromString('--expose-gc'); + gc = vm.runInNewContext('gc') as () => void; + } finally { + v8.setFlagsFromString('--no-expose-gc'); + } + } + if (typeof gc !== 'function') { + return false; + } + gc(); + gc(); + return true; +} interface FormattingTestCase { name: string; diff --git a/packages/realm-server/tests/realm-endpoints/lint-test.ts b/packages/realm-server/tests/realm-endpoints/lint-test.ts index a0934b8d7e9..30d6647f3a4 100644 --- a/packages/realm-server/tests/realm-endpoints/lint-test.ts +++ b/packages/realm-server/tests/realm-endpoints/lint-test.ts @@ -8,6 +8,7 @@ import { createConcurrentTestData, createErrorTestCases, createPerformanceAssertion, + forceGc, } from '../helpers/prettier-test-utils'; import '@cardstack/runtime-common/helpers/code-equality-assertion'; @@ -341,27 +342,38 @@ export class MyCard extends CardDef { }); test('memory usage during lint operations', async function (assert) { - const initialMemory = process.memoryUsage().heapUsed; - const testSource = `import { CardDef } from 'https://cardstack.com/base/card-api'; export class MyCard extends CardDef { @field name = contains(StringField); }`; + const lintOnce = () => + request + .post('/_lint') + .set( + 'Authorization', + `Bearer ${createJWT(testRealm, 'john', ['read', 'write'])}`, + ) + .set('X-HTTP-Method-Override', 'QUERY') + .set('Accept', 'application/json') + .send(testSource); + + // Warm up first so one-time, legitimately-retained initialization (eslint + // rule definitions, prettier plugins, module caches) is established before + // the baseline reading and isn't misattributed to the measured loop. + await lintOnce(); + + // Force a collection before each reading so the delta reflects retained + // memory, not transient garbage that GC simply hasn't reclaimed yet. + // Without this the reading is dominated by collectible allocations from + // ten concurrent lint requests, which is noise, not a leak signal. + const gcForced = forceGc(); + const initialMemory = process.memoryUsage().heapUsed; + // Run multiple lint operations to test memory usage const operations = []; for (let i = 0; i < 10; i++) { - operations.push( - request - .post('/_lint') - .set( - 'Authorization', - `Bearer ${createJWT(testRealm, 'john', ['read', 'write'])}`, - ) - .set('X-HTTP-Method-Override', 'QUERY') - .set('Accept', 'application/json') - .send(testSource), - ); + operations.push(lintOnce()); } const results = await Promise.all(operations); @@ -375,13 +387,26 @@ export class MyCard extends CardDef { ); }); + forceGc(); const finalMemory = process.memoryUsage().heapUsed; const memoryIncrease = finalMemory - initialMemory; + const memoryIncreaseMb = memoryIncrease / 1024 / 1024; + + // Diagnostic signal: if this assertion fails, the log shows the + // retained-growth number (and whether GC actually ran), so a CI failure + // distinguishes a real leak from measurement noise. + console.log( + `[lint-memory-test] initial=${(initialMemory / 1024 / 1024).toFixed(2)}MB ` + + `final=${(finalMemory / 1024 / 1024).toFixed(2)}MB ` + + `retainedGrowth=${memoryIncreaseMb.toFixed(2)}MB gcForced=${gcForced}`, + ); - // Memory increase should be reasonable (less than 45MB for lint operations) + // With warm caches and a forced collection, ten idempotent lint operations + // retain almost nothing; this bound is a leak guard, not a tuned ceiling. + const thresholdMb = 20; assert.ok( - memoryIncrease < 45 * 1024 * 1024, - `Memory increase should be under 45MB, got ${(memoryIncrease / 1024 / 1024).toFixed(2)}MB`, + memoryIncrease < thresholdMb * 1024 * 1024, + `Memory increase should be under ${thresholdMb}MB, got ${memoryIncreaseMb.toFixed(2)}MB (gcForced=${gcForced})`, ); }); From 7102eeb056e7214c3922457de04d21910bb526cd Mon Sep 17 00:00:00 2001 From: Hassan Abdel-Rahman Date: Wed, 3 Jun 2026 16:35:10 -0400 Subject: [PATCH 2/2] Address review: gate bound on GC, cache gc resolution, assert warm-up - forceGc resolves the GC function once and caches it, so a missing globalThis.gc no longer creates a throwaway VM context on every call (which would itself allocate and perturb the heap reading). The runtime-enable path now swallows a ReferenceError and returns false instead of letting it escape, honoring the documented contract. - The memory test now keys its threshold on whether a collection actually ran: the tight 20MB retained-memory bound when GC was forced, falling back to the looser historical 45MB bound otherwise, so the test never becomes more flaky than before in an environment without runtime GC. - The warm-up request's status is asserted so a regression there is localized rather than silently shifting the baseline. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../tests/helpers/prettier-test-utils.ts | 47 ++++++++++++++----- .../tests/realm-endpoints/lint-test.ts | 22 ++++++--- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/packages/realm-server/tests/helpers/prettier-test-utils.ts b/packages/realm-server/tests/helpers/prettier-test-utils.ts index 3bbd9dd01bc..140b649c436 100644 --- a/packages/realm-server/tests/helpers/prettier-test-utils.ts +++ b/packages/realm-server/tests/helpers/prettier-test-utils.ts @@ -3,28 +3,49 @@ import { performance } from 'perf_hooks'; import * as v8 from 'v8'; import * as vm from 'vm'; -/** - * Forces a full garbage collection so that a subsequent - * `process.memoryUsage().heapUsed` reading reflects retained memory rather - * than uncollected transient garbage. The test runner does not start Node - * with `--expose-gc`, so `global.gc` is normally absent; enable it at runtime - * through the V8 flag API, then turn it back off so the rest of the process is - * unaffected. Returns true if a collection was actually performed. - * - * Two passes: the first promotes/collects the young generation, the second - * collects what the first pass made unreachable, so the reading settles. - */ -export function forceGc(): boolean { +// Resolved once and cached: the test runner does not start Node with +// `--expose-gc`, so `globalThis.gc` is normally absent and we synthesize the +// function through the V8 flag API. Resolving lazily and caching means we +// create at most one throwaway VM context for the whole suite rather than one +// per `forceGc` call (a per-call context would itself allocate and perturb the +// very heap reading the helper exists to make trustworthy). `null` records a +// completed resolution that produced no usable function. +let resolvedGc: (() => void) | null | undefined; + +function resolveGc(): (() => void) | null { + if (resolvedGc !== undefined) { + return resolvedGc; + } let gc = (globalThis as { gc?: () => void }).gc; if (typeof gc !== 'function') { try { v8.setFlagsFromString('--expose-gc'); + // Evaluating `gc` throws ReferenceError if the flag didn't take effect + // (e.g. an unsupported V8 build); treat that as "no GC available". gc = vm.runInNewContext('gc') as () => void; + } catch { + gc = undefined; } finally { v8.setFlagsFromString('--no-expose-gc'); } } - if (typeof gc !== 'function') { + resolvedGc = typeof gc === 'function' ? gc : null; + return resolvedGc; +} + +/** + * Forces a full garbage collection so that a subsequent + * `process.memoryUsage().heapUsed` reading reflects retained memory rather + * than uncollected transient garbage. Returns true if a collection was + * actually performed, false if no GC function could be resolved — callers can + * branch on the result to decide how much to trust the measurement. + * + * Two passes: the first promotes/collects the young generation, the second + * collects what the first pass made unreachable, so the reading settles. + */ +export function forceGc(): boolean { + const gc = resolveGc(); + if (!gc) { return false; } gc(); diff --git a/packages/realm-server/tests/realm-endpoints/lint-test.ts b/packages/realm-server/tests/realm-endpoints/lint-test.ts index 30d6647f3a4..142a345667a 100644 --- a/packages/realm-server/tests/realm-endpoints/lint-test.ts +++ b/packages/realm-server/tests/realm-endpoints/lint-test.ts @@ -361,7 +361,12 @@ export class MyCard extends CardDef { // Warm up first so one-time, legitimately-retained initialization (eslint // rule definitions, prettier plugins, module caches) is established before // the baseline reading and isn't misattributed to the measured loop. - await lintOnce(); + const warmup = await lintOnce(); + assert.strictEqual( + warmup.status, + 200, + 'warm-up lint request should succeed so the baseline is a steady-state lint path', + ); // Force a collection before each reading so the delta reflects retained // memory, not transient garbage that GC simply hasn't reclaimed yet. @@ -392,18 +397,21 @@ export class MyCard extends CardDef { const memoryIncrease = finalMemory - initialMemory; const memoryIncreaseMb = memoryIncrease / 1024 / 1024; - // Diagnostic signal: if this assertion fails, the log shows the - // retained-growth number (and whether GC actually ran), so a CI failure - // distinguishes a real leak from measurement noise. + // Always record the retained-growth number (and whether GC actually ran) + // so a CI failure — or a passing run drifting toward the bound — can be + // told apart from measurement noise without another CI cycle. console.log( `[lint-memory-test] initial=${(initialMemory / 1024 / 1024).toFixed(2)}MB ` + `final=${(finalMemory / 1024 / 1024).toFixed(2)}MB ` + `retainedGrowth=${memoryIncreaseMb.toFixed(2)}MB gcForced=${gcForced}`, ); - // With warm caches and a forced collection, ten idempotent lint operations - // retain almost nothing; this bound is a leak guard, not a tuned ceiling. - const thresholdMb = 20; + // The bound only means "retained memory" when a collection actually ran; + // with warm caches and a forced GC ten idempotent lint operations retain + // almost nothing, so 20MB is a generous leak guard. If GC could not be + // forced the delta still includes transient garbage, so fall back to the + // looser historical bound rather than flaking against the tight one. + const thresholdMb = gcForced ? 20 : 45; assert.ok( memoryIncrease < thresholdMb * 1024 * 1024, `Memory increase should be under ${thresholdMb}MB, got ${memoryIncreaseMb.toFixed(2)}MB (gcForced=${gcForced})`,