google-gemini
diff --git a/‎integration-tests/extensions-reload.test.ts‎
Lines changed: 2 additions & 2 deletions b/‎integration-tests/extensions-reload.test.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎integration-tests/policy-headless.test.ts‎
Lines changed: 205 additions & 0 deletions b/‎integration-tests/policy-headless.test.ts‎
Lines changed: 205 additions & 0 deletions
@@ -104,7 +104,7 @@ describe('extension reloading', () => {
           return (
             output.includes(
               'test-server (from test-extension) - Ready (1 tool)',
-            ) && output.includes('- hello')
+            ) && output.includes('- mcp_test-server_hello')
           );
         },
         30000, // 30s timeout
@@ -148,7 +148,7 @@ describe('extension reloading', () => {
           return (
             output.includes(
               'test-server (from test-extension) - Ready (1 tool)',
-            ) && output.includes('- goodbye')
+            ) && output.includes('- mcp_test-server_goodbye')
           );
         },
         30000,
 
@@ -0,0 +1,205 @@
+/**
+ * @license
+ * Copyright 2026 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { join } from 'node:path';
+import { TestRig } from './test-helper.js';
+
+interface PromptCommand {
+  prompt: (testFile: string) => string;
+  tool: string;
+  command: string;
+  expectedSuccessResult: string;
+  expectedFailureResult: string;
+}
+
+const ECHO_PROMPT: PromptCommand = {
+  command: 'echo',
+  prompt: () =>
+    `Use the \`echo POLICY_TEST_ECHO_COMMAND\` shell command. On success, ` +
+    `your final response must ONLY be "POLICY_TEST_ECHO_COMMAND". If the ` +
+    `command fails output AR NAR and stop.`,
+  tool: 'run_shell_command',
+  expectedSuccessResult: 'POLICY_TEST_ECHO_COMMAND',
+  expectedFailureResult: 'AR NAR',
+};
+
+const READ_FILE_PROMPT: PromptCommand = {
+  prompt: (testFile: string) =>
+    `Read the file ${testFile} and tell me what language it is, if the ` +
+    `read_file tool fails output AR NAR and stop.`,
+  tool: 'read_file',
+  command: '',
+  expectedSuccessResult: 'Latin',
+  expectedFailureResult: 'AR NAR',
+};
+
+async function waitForToolCallLog(
+  rig: TestRig,
+  tool: string,
+  command: string,
+  timeout: number = 15000,
+) {
+  const foundToolCall = await rig.waitForToolCall(tool, timeout, (args) =>
+    args.toLowerCase().includes(command.toLowerCase()),
+  );
+
+  expect(foundToolCall).toBe(true);
+
+  const toolLogs = rig
+    .readToolLogs()
+    .filter((toolLog) => toolLog.toolRequest.name === tool);
+  const log = toolLogs.find(
+    (toolLog) =>
+      !command ||
+      toolLog.toolRequest.args.toLowerCase().includes(command.toLowerCase()),
+  );
+
+  // The policy engine should have logged the tool call
+  expect(log).toBeTruthy();
+  return log;
+}
+
+async function verifyToolExecution(
+  rig: TestRig,
+  promptCommand: PromptCommand,
+  result: string,
+  expectAllowed: boolean,
+  expectedDenialString?: string,
+) {
+  const log = await waitForToolCallLog(
+    rig,
+    promptCommand.tool,
+    promptCommand.command,
+  );
+
+  if (expectAllowed) {
+    expect(log!.toolRequest.success).toBe(true);
+    expect(result).not.toContain('Tool execution denied by policy');
+    expect(result).not.toContain(`Tool "${promptCommand.tool}" not found`);
+    expect(result).toContain(promptCommand.expectedSuccessResult);
+  } else {
+    expect(log!.toolRequest.success).toBe(false);
+    expect(result).toContain(
+      expectedDenialString || 'Tool execution denied by policy',
+    );
+    expect(result).toContain(promptCommand.expectedFailureResult);
+  }
+}
+
+interface TestCase {
+  name: string;
+  responsesFile: string;
+  promptCommand: PromptCommand;
+  policyContent?: string;
+  expectAllowed: boolean;
+  expectedDenialString?: string;
+}
+
+describe('Policy Engine Headless Mode', () => {
+  let rig: TestRig;
+  let testFile: string;
+
+  beforeEach(() => {
+    rig = new TestRig();
+  });
+
+  afterEach(async () => {
+    if (rig) {
+      await rig.cleanup();
+    }
+  });
+
+  const runTestCase = async (tc: TestCase) => {
+    const fakeResponsesPath = join(import.meta.dirname, tc.responsesFile);
+    rig.setup(tc.name, { fakeResponsesPath });
+
+    testFile = rig.createFile('test.txt', 'Lorem\nIpsum\nDolor\n');
+    const args = ['-p', tc.promptCommand.prompt(testFile)];
+
+    if (tc.policyContent) {
+      const policyPath = rig.createFile('test-policy.toml', tc.policyContent);
+      args.push('--policy', policyPath);
+    }
+
+    const result = await rig.run({
+      args,
+      approvalMode: 'default',
+    });
+
+    await verifyToolExecution(
+      rig,
+      tc.promptCommand,
+      result,
+      tc.expectAllowed,
+      tc.expectedDenialString,
+    );
+  };
+
+  const testCases = [
+    {
+      name: 'should deny ASK_USER tools by default in headless mode',
+      responsesFile: 'policy-headless-shell-denied.responses',
+      promptCommand: ECHO_PROMPT,
+      expectAllowed: false,
+      expectedDenialString: 'Tool "run_shell_command" not found',
+    },
+    {
+      name: 'should allow ASK_USER tools in headless mode if explicitly allowed via policy file',
+      responsesFile: 'policy-headless-shell-allowed.responses',
+      promptCommand: ECHO_PROMPT,
+      policyContent: `
+      [[rule]]
+      toolName = "run_shell_command"
+      decision = "allow"
+      priority = 100
+    `,
+      expectAllowed: true,
+    },
+    {
+      name: 'should allow read-only tools by default in headless mode',
+      responsesFile: 'policy-headless-readonly.responses',
+      promptCommand: READ_FILE_PROMPT,
+      expectAllowed: true,
+    },
+    {
+      name: 'should allow specific shell commands in policy file',
+      responsesFile: 'policy-headless-shell-allowed.responses',
+      promptCommand: ECHO_PROMPT,
+      policyContent: `
+        [[rule]]
+        toolName = "run_shell_command"
+        commandPrefix = "${ECHO_PROMPT.command}"
+        decision = "allow"
+        priority = 100
+      `,
+      expectAllowed: true,
+    },
+    {
+      name: 'should deny other shell commands in policy file',
+      responsesFile: 'policy-headless-shell-denied.responses',
+      promptCommand: ECHO_PROMPT,
+      policyContent: `
+        [[rule]]
+        toolName = "run_shell_command"
+        commandPrefix = "node"
+        decision = "allow"
+        priority = 100
+      `,
+      expectAllowed: false,
+      expectedDenialString: 'Tool execution denied by policy',
+    },
+  ];
+
+  it.each(testCases)(
+    '$name',
+    async (tc) => {
+      await runTestCase(tc);
+    },
+    // Large timeout for regeneration
+    process.env['REGENERATE_MODEL_GOLDENS'] === 'true' ? 120000 : undefined,
+  );
+});