google-gemini
diff --git a/‎.github/workflows/memory-nightly.yml‎
Lines changed: 33 additions & 0 deletions b/‎.github/workflows/memory-nightly.yml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎GEMINI.md‎
Lines changed: 2 additions & 0 deletions b/‎GEMINI.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/integration-tests.md‎
Lines changed: 40 additions & 0 deletions b/‎docs/integration-tests.md‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎memory-tests/baselines.json‎
Lines changed: 30 additions & 0 deletions b/‎memory-tests/baselines.json‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎memory-tests/globalSetup.ts‎
Lines changed: 71 additions & 0 deletions b/‎memory-tests/globalSetup.ts‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎memory-tests/memory-usage.test.ts‎
Lines changed: 185 additions & 0 deletions b/‎memory-tests/memory-usage.test.ts‎
Lines changed: 185 additions & 0 deletions
diff --git a/‎memory-tests/memory.idle-startup.responses‎
Lines changed: 2 additions & 0 deletions b/‎memory-tests/memory.idle-startup.responses‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎memory-tests/memory.multi-function-call.responses‎
Lines changed: 4 additions & 0 deletions b/‎memory-tests/memory.multi-function-call.responses‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎memory-tests/memory.multi-turn.responses‎
Lines changed: 10 additions & 0 deletions b/‎memory-tests/memory.multi-turn.responses‎
Lines changed: 10 additions & 0 deletions
@@ -0,0 +1,33 @@
+name: 'Memory Tests: Nightly'
+
+on:
+  schedule:
+    - cron: '0 2 * * *' # Runs at 2 AM every day
+  workflow_dispatch: # Allow manual trigger
+
+permissions:
+  contents: 'read'
+
+jobs:
+  memory-test:
+    name: 'Run Memory Usage Tests'
+    runs-on: 'gemini-cli-ubuntu-16-core'
+    if: "github.repository == 'google-gemini/gemini-cli'"
+    steps:
+      - name: 'Checkout'
+        uses: 'actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8' # ratchet:actions/checkout@v5
+
+      - name: 'Set up Node.js'
+        uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions/setup-node@v4
+        with:
+          node-version-file: '.nvmrc'
+          cache: 'npm'
+
+      - name: 'Install dependencies'
+        run: 'npm ci'
+
+      - name: 'Build project'
+        run: 'npm run build'
+
+      - name: 'Run Memory Tests'
+        run: 'npm run test:memory'
@@ -44,6 +44,8 @@ powerful tool for developers.
 - **Test Commands:**
   - **Unit (All):** `npm run test`
   - **Integration (E2E):** `npm run test:e2e`
+  - **Memory (Nightly):** `npm run test:memory` (Runs memory regression tests
+    against baselines. Excluded from `preflight`, run nightly.)
   - **Workspace-Specific:** `npm test -w <pkg> -- <path>` (Note: `<path>` must
     be relative to the workspace root, e.g.,
     `-w @google/gemini-cli-core -- src/routing/modelRouterService.test.ts`)
 
@@ -117,6 +117,46 @@ npm run test:integration:sandbox:docker
 npm run test:integration:sandbox:podman
 ```
 
+## Memory regression tests
+
+Memory regression tests are designed to detect heap growth and leaks across key
+CLI scenarios. They are located in the `memory-tests` directory.
+
+These tests are distinct from standard integration tests because they measure
+memory usage and compare it against committed baselines.
+
+### Running memory tests
+
+Memory tests are not run as part of the default `npm run test` or
+`npm run test:e2e` commands. They are run nightly in CI but can be run manually:
+
+```bash
+npm run test:memory
+```
+
+### Updating baselines
+
+If you intentionally change behavior that affects memory usage, you may need to
+update the baselines. Set the `UPDATE_MEMORY_BASELINES` environment variable to
+`true`:
+
+```bash
+UPDATE_MEMORY_BASELINES=true npm run test:memory
+```
+
+This will run the tests, take median snapshots, and overwrite
+`memory-tests/baselines.json`. You should review the changes and commit the
+updated baseline file.
+
+### How it works
+
+The harness (`MemoryTestHarness` in `packages/test-utils`):
+
+- Forces garbage collection multiple times to reduce noise.
+- Takes median snapshots to filter spikes.
+- Compares against baselines with a 10% tolerance.
+- Can analyze sustained leaks across 3 snapshots using `analyzeSnapshots()`.
+
 ## Diagnostics
 
 The integration test runner provides several options for diagnostics to help
 
@@ -0,0 +1,30 @@
+{
+  "version": 1,
+  "updatedAt": "2026-04-08T01:21:58.770Z",
+  "scenarios": {
+    "multi-turn-conversation": {
+      "heapUsedBytes": 120082704,
+      "heapTotalBytes": 177586176,
+      "rssBytes": 269172736,
+      "timestamp": "2026-04-08T01:21:57.127Z"
+    },
+    "multi-function-call-repo-search": {
+      "heapUsedBytes": 104644984,
+      "heapTotalBytes": 111575040,
+      "rssBytes": 204079104,
+      "timestamp": "2026-04-08T01:21:58.770Z"
+    },
+    "idle-session-startup": {
+      "heapUsedBytes": 119813672,
+      "heapTotalBytes": 177061888,
+      "rssBytes": 267943936,
+      "timestamp": "2026-04-08T01:21:53.855Z"
+    },
+    "simple-prompt-response": {
+      "heapUsedBytes": 119722064,
+      "heapTotalBytes": 177324032,
+      "rssBytes": 268812288,
+      "timestamp": "2026-04-08T01:21:55.491Z"
+    }
+  }
+}
@@ -0,0 +1,71 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { mkdir, readdir, rm } from 'node:fs/promises';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { canUseRipgrep } from '../packages/core/src/tools/ripGrep.js';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const rootDir = join(__dirname, '..');
+const memoryTestsDir = join(rootDir, '.memory-tests');
+let runDir = '';
+
+export async function setup() {
+  runDir = join(memoryTestsDir, `${Date.now()}`);
+  await mkdir(runDir, { recursive: true });
+
+  // Set the home directory to the test run directory to avoid conflicts
+  // with the user's local config.
+  process.env['HOME'] = runDir;
+  if (process.platform === 'win32') {
+    process.env['USERPROFILE'] = runDir;
+  }
+  process.env['GEMINI_CONFIG_DIR'] = join(runDir, '.gemini');
+
+  // Download ripgrep to avoid race conditions
+  const available = await canUseRipgrep();
+  if (!available) {
+    throw new Error('Failed to download ripgrep binary');
+  }
+
+  // Clean up old test runs, keeping the latest few for debugging
+  try {
+    const testRuns = await readdir(memoryTestsDir);
+    if (testRuns.length > 3) {
+      const oldRuns = testRuns.sort().slice(0, testRuns.length - 3);
+      await Promise.all(
+        oldRuns.map((oldRun) =>
+          rm(join(memoryTestsDir, oldRun), {
+            recursive: true,
+            force: true,
+          }),
+        ),
+      );
+    }
+  } catch (e) {
+    console.error('Error cleaning up old memory test runs:', e);
+  }
+
+  process.env['INTEGRATION_TEST_FILE_DIR'] = runDir;
+  process.env['GEMINI_CLI_INTEGRATION_TEST'] = 'true';
+  process.env['GEMINI_FORCE_FILE_STORAGE'] = 'true';
+  process.env['TELEMETRY_LOG_FILE'] = join(runDir, 'telemetry.log');
+  process.env['VERBOSE'] = process.env['VERBOSE'] ?? 'false';
+
+  console.log(`\nMemory test output directory: ${runDir}`);
+}
+
+export async function teardown() {
+  // Cleanup unless KEEP_OUTPUT is set
+  if (process.env['KEEP_OUTPUT'] !== 'true' && runDir) {
+    try {
+      await rm(runDir, { recursive: true, force: true });
+    } catch (e) {
+      console.warn('Failed to clean up memory test directory:', e);
+    }
+  }
+}
@@ -0,0 +1,185 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, beforeAll, afterAll, afterEach } from 'vitest';
+import { TestRig, MemoryTestHarness } from '@google/gemini-cli-test-utils';
+import { join, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const BASELINES_PATH = join(__dirname, 'baselines.json');
+const UPDATE_BASELINES = process.env['UPDATE_MEMORY_BASELINES'] === 'true';
+const TOLERANCE_PERCENT = 10;
+
+// Fake API key for tests using fake responses
+const TEST_ENV = { GEMINI_API_KEY: 'fake-memory-test-key' };
+
+describe('Memory Usage Tests', () => {
+  let harness: MemoryTestHarness;
+  let rig: TestRig;
+
+  beforeAll(() => {
+    harness = new MemoryTestHarness({
+      baselinesPath: BASELINES_PATH,
+      defaultTolerancePercent: TOLERANCE_PERCENT,
+      gcCycles: 3,
+      gcDelayMs: 100,
+      sampleCount: 3,
+    });
+  });
+
+  afterEach(async () => {
+    await rig.cleanup();
+  });
+
+  afterAll(async () => {
+    // Generate the summary report after all tests
+    await harness.generateReport();
+  });
+
+  it('idle-session-startup: memory usage within baseline', async () => {
+    rig = new TestRig();
+    rig.setup('memory-idle-startup', {
+      fakeResponsesPath: join(__dirname, 'memory.idle-startup.responses'),
+    });
+
+    const result = await harness.runScenario(
+      'idle-session-startup',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: ['hello'],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-startup');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for idle-session-startup: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('simple-prompt-response: memory usage within baseline', async () => {
+    rig = new TestRig();
+    rig.setup('memory-simple-prompt', {
+      fakeResponsesPath: join(__dirname, 'memory.simple-prompt.responses'),
+    });
+
+    const result = await harness.runScenario(
+      'simple-prompt-response',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: ['What is the capital of France?'],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-response');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for simple-prompt-response: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('multi-turn-conversation: memory remains stable over turns', async () => {
+    rig = new TestRig();
+    rig.setup('memory-multi-turn', {
+      fakeResponsesPath: join(__dirname, 'memory.multi-turn.responses'),
+    });
+
+    const prompts = [
+      'Hello, what can you help me with?',
+      'Tell me about JavaScript',
+      'How is TypeScript different?',
+      'Can you write a simple TypeScript function?',
+      'What are some TypeScript best practices?',
+    ];
+
+    const result = await harness.runScenario(
+      'multi-turn-conversation',
+      async (recordSnapshot) => {
+        // Run through all turns as a piped sequence
+        const stdinContent = prompts.join('\n');
+        await rig.run({
+          stdin: stdinContent,
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        // Take snapshots after the conversation completes
+        await recordSnapshot('after-all-turns');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for multi-turn-conversation: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+
+  it('multi-function-call-repo-search: memory after tool use', async () => {
+    rig = new TestRig();
+    rig.setup('memory-multi-func-call', {
+      fakeResponsesPath: join(
+        __dirname,
+        'memory.multi-function-call.responses',
+      ),
+    });
+
+    // Create directories first, then files in the workspace so the tools have targets
+    rig.mkdir('packages/core/src/telemetry');
+    rig.createFile(
+      'packages/core/src/telemetry/memory-monitor.ts',
+      'export class MemoryMonitor { constructor() {} }',
+    );
+    rig.createFile(
+      'packages/core/src/telemetry/metrics.ts',
+      'export function recordMemoryUsage() {}',
+    );
+
+    const result = await harness.runScenario(
+      'multi-function-call-repo-search',
+      async (recordSnapshot) => {
+        await rig.run({
+          args: [
+            'Search this repository for MemoryMonitor and tell me what it does',
+          ],
+          timeout: 120000,
+          env: TEST_ENV,
+        });
+
+        await recordSnapshot('after-tool-calls');
+      },
+    );
+
+    if (UPDATE_BASELINES) {
+      harness.updateScenarioBaseline(result);
+      console.log(
+        `Updated baseline for multi-function-call-repo-search: ${(result.finalHeapUsed / (1024 * 1024)).toFixed(1)} MB`,
+      );
+    } else {
+      harness.assertWithinBaseline(result);
+    }
+  });
+});
@@ -0,0 +1,2 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
@@ -0,0 +1,4 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I'll search for MemoryMonitor in the repository and analyze what it does."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":15,"totalTokenCount":45,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"functionCall":{"name":"grep_search","args":{"pattern":"MemoryMonitor","path":".","include_pattern":"*.ts"}}},{"functionCall":{"name":"list_directory","args":{"path":"packages/core/src/telemetry"}}},{"functionCall":{"name":"read_file","args":{"file_path":"packages/core/src/telemetry/memory-monitor.ts"}}}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":30,"candidatesTokenCount":80,"totalTokenCount":110,"promptTokensDetails":[{"modality":"TEXT","tokenCount":30}]}}]}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"I found the memory monitoring code. Here's a summary:\n\nThe `MemoryMonitor` class in `packages/core/src/telemetry/memory-monitor.ts` provides:\n\n1. **Continuous monitoring** via `start()`/`stop()` with configurable intervals\n2. **V8 heap snapshots** using `v8.getHeapStatistics()` and `process.memoryUsage()`\n3. **High-water mark tracking** to detect significant memory growth\n4. **Rate-limited recording** to avoid metric flood\n5. **Activity detection** — only records when user is active\n\nThe class uses a singleton pattern via `initializeMemoryMonitor()` for global access."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":500,"candidatesTokenCount":120,"totalTokenCount":620,"promptTokensDetails":[{"modality":"TEXT","tokenCount":500}]}}]}
@@ -0,0 +1,10 @@
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help you with your coding tasks. What would you like to work on today?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":18,"totalTokenCount":23,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"JavaScript is a high-level, interpreted programming language. It was originally designed for adding interactivity to web pages."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":25,"candidatesTokenCount":60,"totalTokenCount":85,"promptTokensDetails":[{"modality":"TEXT","tokenCount":25}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"TypeScript is a typed superset of JavaScript developed by Microsoft. The main differences from JavaScript are static typing and better tooling."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":45,"candidatesTokenCount":80,"totalTokenCount":125,"promptTokensDetails":[{"modality":"TEXT","tokenCount":45}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here is a simple TypeScript function:\n\nfunction greet(name: string): string { return `Hello, ${name}!`; }"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":60,"candidatesTokenCount":55,"totalTokenCount":115,"promptTokensDetails":[{"modality":"TEXT","tokenCount":60}]}}]}
+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}
+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Here are 5 key TypeScript best practices: Enable strict mode, prefer interfaces, use union types, leverage type inference, and use readonly."}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":75,"candidatesTokenCount":70,"totalTokenCount":145,"promptTokensDetails":[{"modality":"TEXT","tokenCount":75}]}}]}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+{"method":"generateContent","response":{"candidates":[{"content":{"parts":[{"text":"0"}],"role":"model"},"finishReason":"STOP","index":0}]}}`
	`2`	`+{"method":"generateContentStream","response":[{"candidates":[{"content":{"parts":[{"text":"Hello! I'm ready to help. What would you like to work on?"}],"role":"model"},"finishReason":"STOP","index":0}],"usageMetadata":{"promptTokenCount":5,"candidatesTokenCount":12,"totalTokenCount":17,"promptTokensDetails":[{"modality":"TEXT","tokenCount":5}]}}]}`