Skip to content
55 changes: 55 additions & 0 deletions packages/a2a-server/src/agent/executor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ vi.mock('../config/config.js', () => ({
getCheckpointingEnabled: () => false,
}),
loadEnvironment: vi.fn(),
setIsTrusted: vi.fn().mockReturnValue(false),
setTargetDir: vi.fn().mockReturnValue('/tmp'),
}));

Expand Down Expand Up @@ -62,6 +63,12 @@ vi.mock('./task.js', () => {
scheduleToolCalls: vi.fn().mockResolvedValue(undefined),
waitForPendingTools: vi.fn().mockResolvedValue(undefined),
getAndClearCompletedTools: vi.fn().mockReturnValue([]),
get hasPendingTools() {
return false;
},
get pendingToolsCount() {
return 0;
},
addToolResponsesToHistory: vi.fn(),
sendCompletedToolsToLlm: vi.fn().mockImplementation(async function* () {}),
cancelPendingTools: vi.fn(),
Expand Down Expand Up @@ -245,4 +252,52 @@ describe('CoderAgentExecutor', () => {
expect(executor.getTask(taskId)).toBeUndefined();
expect(wrapper.task.dispose).toHaveBeenCalled();
});

it('should yield the turn and transition to input-required if tools are pending', async () => {
const taskId = 'test-task-pending-tools';
const contextId = 'test-context';

const mockSocket = new EventEmitter();
(requestStorage.getStore as Mock).mockReturnValue({
req: { socket: mockSocket },
});

// Pre-create the task to safely modify its mocked methods before execution
const wrapper = await executor.createTask(
taskId,
contextId,
undefined,
mockEventBus,
);
const hasPendingToolsSpy = vi
.spyOn(wrapper.task, 'hasPendingTools', 'get')
.mockReturnValue(true);
vi.spyOn(wrapper.task, 'pendingToolsCount', 'get').mockReturnValue(1);

const requestContext = {
userMessage: {
messageId: 'msg-1',
taskId,
contextId,
parts: [{ kind: 'confirmation', callId: '1', outcome: 'proceed' }],
metadata: {
coderAgent: { kind: 'agent-settings', workspacePath: '/tmp' },
},
},
} as unknown as RequestContext;

await executor.execute(requestContext, mockEventBus);

// Assert that the executor yielded the turn correctly without further progression
expect(hasPendingToolsSpy).toHaveBeenCalled();
expect(wrapper.task.getAndClearCompletedTools).not.toHaveBeenCalled();
expect(wrapper.task.sendCompletedToolsToLlm).not.toHaveBeenCalled();
expect(wrapper.task.setTaskStateAndPublishUpdate).toHaveBeenCalledWith(
'input-required',
expect.any(Object),
undefined,
undefined,
true,
);
});
});
80 changes: 46 additions & 34 deletions packages/a2a-server/src/agent/executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@ import {
getContextIdFromMetadata,
getAgentSettingsFromMetadata,
} from '../types.js';
import { loadConfig, loadEnvironment, setTargetDir } from '../config/config.js';
import {
loadConfig,
loadEnvironment,
setIsTrusted,
setTargetDir,
} from '../config/config.js';
import { loadSettings } from '../config/settings.js';
import { loadExtensions } from '../config/extension.js';
import { Task } from './task.js';
Expand Down Expand Up @@ -93,8 +98,8 @@ export class CoderAgentExecutor implements AgentExecutor {
taskId: string,
): Promise<Config> {
const workspaceRoot = setTargetDir(agentSettings);
const isTrusted = agentSettings.isTrusted ?? false;
loadEnvironment(); // Will override any global env with workspace envs
const isTrusted = setIsTrusted(agentSettings);
const settings = loadSettings(workspaceRoot, isTrusted);
const extensions = loadExtensions(workspaceRoot);
return loadConfig(
Expand Down Expand Up @@ -541,42 +546,49 @@ export class CoderAgentExecutor implements AgentExecutor {

if (abortSignal.aborted) throw new Error('Execution aborted');

const completedTools = currentTask.getAndClearCompletedTools();

if (completedTools.length > 0) {
// If all completed tool calls were canceled, manually add them to history and set state to input-required, final:true
if (completedTools.every((tool) => tool.status === 'cancelled')) {
logger.info(
`[CoderAgentExecutor] Task ${taskId}: All tool calls were cancelled. Updating history and ending agent turn.`,
);
currentTask.addToolResponsesToHistory(completedTools);
agentTurnActive = false;
const stateChange: StateChange = {
kind: CoderAgentEvent.StateChangeEvent,
};
currentTask.setTaskStateAndPublishUpdate(
'input-required',
stateChange,
undefined,
undefined,
true,
);
if (currentTask.hasPendingTools) {
logger.info(
`[CoderAgentExecutor] Task ${taskId}: There are still ${currentTask.pendingToolsCount} pending tools waiting for approval. Yielding to user.`,
);
agentTurnActive = false;
} else {
const completedTools = currentTask.getAndClearCompletedTools();

if (completedTools.length > 0) {
// If all completed tool calls were canceled, manually add them to history and set state to input-required, final:true
if (completedTools.every((tool) => tool.status === 'cancelled')) {
logger.info(
`[CoderAgentExecutor] Task ${taskId}: All tool calls were cancelled. Updating history and ending agent turn.`,
);
currentTask.addToolResponsesToHistory(completedTools);
agentTurnActive = false;
const stateChange: StateChange = {
kind: CoderAgentEvent.StateChangeEvent,
};
currentTask.setTaskStateAndPublishUpdate(
'input-required',
stateChange,
undefined,
undefined,
true,
);
} else {
logger.info(
`[CoderAgentExecutor] Task ${taskId}: Found ${completedTools.length} completed tool calls. Sending results back to LLM.`,
);

agentEvents = currentTask.sendCompletedToolsToLlm(
completedTools,
abortSignal,
);
// Continue the loop to process the LLM response to the tool results.
}
} else {
logger.info(
`[CoderAgentExecutor] Task ${taskId}: Found ${completedTools.length} completed tool calls. Sending results back to LLM.`,
);

agentEvents = currentTask.sendCompletedToolsToLlm(
completedTools,
abortSignal,
`[CoderAgentExecutor] Task ${taskId}: No more tool calls to process. Ending agent turn.`,
);
// Continue the loop to process the LLM response to the tool results.
agentTurnActive = false;
}
} else {
logger.info(
`[CoderAgentExecutor] Task ${taskId}: No more tool calls to process. Ending agent turn.`,
);
agentTurnActive = false;
}
}

Expand Down
29 changes: 29 additions & 0 deletions packages/a2a-server/src/agent/task.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,35 @@ describe('Task', () => {

expect(handleEventDrivenToolCallSpy).toHaveBeenCalled();
});

describe('Pending Tools state', () => {
it('should correctly report pending tools presence and count', () => {
const mockConfig = createMockConfig();
const mockEventBus: ExecutionEventBus = {
publish: vi.fn(),
on: vi.fn(),
off: vi.fn(),
once: vi.fn(),
removeAllListeners: vi.fn(),
finished: vi.fn(),
};

// @ts-expect-error - Calling private constructor
const task = new Task(
'task-id',
'context-id',
mockConfig as Config,
mockEventBus,
);

expect(task.hasPendingTools).toBe(false);
expect(task.pendingToolsCount).toBe(0);

task['_registerToolCall']('tool-1', 'scheduled');
expect(task.hasPendingTools).toBe(true);
expect(task.pendingToolsCount).toBe(1);
});
});
});

describe('Serialization and Mapping', () => {
Expand Down
8 changes: 8 additions & 0 deletions packages/a2a-server/src/agent/task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,14 @@ export class Task {
);
}

get hasPendingTools(): boolean {
return this.pendingToolCalls.size > 0;
}

get pendingToolsCount(): number {
return this.pendingToolCalls.size;
}

static async create(
id: string,
contextId: string,
Expand Down
32 changes: 32 additions & 0 deletions packages/a2a-server/src/config/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
PRIORITY_YOLO_ALLOW_ALL,
createPolicyEngineConfig,
} from '@google/gemini-cli-core';
import type { AgentSettings } from '../types.js';

// Mock dependencies
vi.mock('@google/gemini-cli-core', async (importOriginal) => {
Expand Down Expand Up @@ -612,3 +613,34 @@ describe('loadConfig', () => {
});
});
});

describe('setIsTrusted', () => {
beforeEach(() => {
vi.resetModules();
});

afterEach(() => {
vi.unstubAllEnvs();
});

it('should return true when GEMINI_FOLDER_TRUST env var is true', async () => {
vi.stubEnv('GEMINI_FOLDER_TRUST', 'true');
const { setIsTrusted } = await import('./config.js');
expect(setIsTrusted(undefined)).toBe(true);
expect(setIsTrusted({ isTrusted: false } as AgentSettings)).toBe(true);
});

it('should return false when GEMINI_FOLDER_TRUST env var is false', async () => {
vi.stubEnv('GEMINI_FOLDER_TRUST', 'false');
const { setIsTrusted } = await import('./config.js');
expect(setIsTrusted(undefined)).toBe(false);
expect(setIsTrusted({ isTrusted: true } as AgentSettings)).toBe(false);
});

it('should fallback to agentSettings.isTrusted if env var is undefined', async () => {
const { setIsTrusted } = await import('./config.js');
expect(setIsTrusted({ isTrusted: true } as AgentSettings)).toBe(true);
expect(setIsTrusted({ isTrusted: false } as AgentSettings)).toBe(false);
expect(setIsTrusted(undefined)).toBe(false);
});
});
11 changes: 11 additions & 0 deletions packages/a2a-server/src/config/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import { logger } from '../utils/logger.js';
import type { Settings } from './settings.js';
import { type AgentSettings, CoderAgentEvent } from '../types.js';

const INITIAL_FOLDER_TRUST = process.env['GEMINI_FOLDER_TRUST'];

export async function loadConfig(
settings: Settings,
extensionLoader: ExtensionLoader,
Expand Down Expand Up @@ -182,6 +184,15 @@ export async function loadConfig(
return config;
}

export function setIsTrusted(
agentSettings: AgentSettings | undefined,
): boolean {
if (INITIAL_FOLDER_TRUST !== undefined) {
return INITIAL_FOLDER_TRUST === 'true';
}
return !!agentSettings?.isTrusted;
}
Comment on lines +187 to +194

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-critical critical

Folder Trust Bypass via Workspace .env Environment Pollution

Severity: critical
Sub-category: Broken Access Control / Environment Pollution

Description:
The setIsTrusted function determines if a workspace folder is trusted by checking process.env['GEMINI_FOLDER_TRUST'] and falling back to agentSettings.isTrusted.
However, in CoderAgentExecutor.getConfig (in packages/a2a-server/src/agent/executor.ts), loadEnvironment() is called immediately after setIsTrusted. loadEnvironment() loads environment variables from the workspace's .env file and overrides process.env globally using dotenv.config({ override: true }).
If an untrusted workspace contains a .env file with GEMINI_FOLDER_TRUST=true, this value will be loaded into process.env globally. On any subsequent task execution or configuration resolution, setIsTrusted will read this polluted environment variable and return true, completely bypassing the folder trust security mechanism.
Since trusted folders allow the agent to execute powerful tools (such as running arbitrary shell commands) without user confirmation, this bypass directly enables Remote Code Execution (RCE) when an untrusted folder is opened.

Remediation:
Do not allow workspace-level .env files to override system-level security environment variables like GEMINI_FOLDER_TRUST. Alternatively, resolve the folder trust status using only the system's initial environment variables or the explicit agentSettings.isTrusted passed by the client, and prevent loadEnvironment() from overriding security-critical environment variables.

const INITIAL_FOLDER_TRUST = process.env['GEMINI_FOLDER_TRUST'];

export function setIsTrusted(
  agentSettings: AgentSettings | undefined,
): boolean {
  if (INITIAL_FOLDER_TRUST !== undefined) {
    return INITIAL_FOLDER_TRUST === 'true';
  }
  return !!agentSettings?.isTrusted;
}
References
  1. Workspace-level configurations should be treated as untrusted by default. Security-sensitive settings, such as policy paths, must be loaded from trusted user-level configuration and should not be overridable by workspace settings unless trust is explicitly granted by the hosting environment.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please fix this.

Comment on lines +187 to +194

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The setIsTrusted function is vulnerable to Remote Code Execution (RCE). An untrusted client can self-certify their workspace as trusted by sending isTrusted: true in userMessage.metadata, which is supplied by the client and is fully user-controlled. This bypasses security restrictions, allowing an attacker to execute arbitrary tools and potentially achieve RCE on the server. To remediate this, the trust status must be determined solely by server-side configuration, such as a strict allowlist of trusted paths, or by requiring the GEMINI_FOLDER_TRUST environment variable to be explicitly set on the server, and not be controlled by the client.

Suggested change
export function setIsTrusted(
agentSettings: AgentSettings | undefined,
): boolean {
if (INITIAL_FOLDER_TRUST !== undefined) {
return INITIAL_FOLDER_TRUST === 'true';
}
return !!agentSettings?.isTrusted;
}
export function setIsTrusted(
agentSettings: AgentSettings | undefined,
): boolean {
if (INITIAL_FOLDER_TRUST !== undefined) {
return INITIAL_FOLDER_TRUST === 'true';
}
return false;
}
References
  1. Workspace-level configurations should be treated as untrusted by default. Security-sensitive settings, such as policy paths, must be loaded from trusted user-level configuration and should not be overridable by workspace settings unless trust is explicitly granted by the hosting environment.


export function setTargetDir(agentSettings: AgentSettings | undefined): string {
const originalCWD = process.cwd();
const targetDir =
Expand Down
10 changes: 8 additions & 2 deletions packages/core/src/scheduler/scheduler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ import {
type ScheduledToolCall,
} from './types.js';
import { ToolErrorType } from '../tools/tool-error.js';
import { UPDATE_TOPIC_TOOL_NAME } from '../tools/tool-names.js';
import {
UPDATE_TOPIC_TOOL_NAME,
EDIT_TOOL_NAMES,
} from '../tools/tool-names.js';
Comment on lines +29 to +32

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Import EDIT_TOOL_NAMES instead of hardcoding individual file-modifying tool names to ensure all file-modifying tools are executed sequentially.

import {
  EDIT_TOOL_NAMES,
  UPDATE_TOPIC_TOOL_NAME,
} from '../tools/tool-names.js';

import { PolicyDecision, type ApprovalMode } from '../policy/types.js';
import {
ToolConfirmationOutcome,
Expand Down Expand Up @@ -548,7 +551,10 @@ export class Scheduler {

private _isParallelizable(request: ToolCallRequestInfo): boolean {
// update_topic tool is forced as sequential call
if (request.name === UPDATE_TOPIC_TOOL_NAME) {
if (
request.name === UPDATE_TOPIC_TOOL_NAME ||
EDIT_TOOL_NAMES.has(request.name)
) {
return false;
}
Comment on lines +554 to 559

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Instead of hardcoding specific file-modifying tool names like WRITE_FILE_TOOL_NAME and EDIT_TOOL_NAME, we should use the EDIT_TOOL_NAMES set (which contains all file-modifying/restorable tools, such as replace or others). This ensures that any new or existing file-modifying tools are automatically executed sequentially, preventing race conditions and potential file corruption.

    if (
      request.name === UPDATE_TOPIC_TOOL_NAME ||
      EDIT_TOOL_NAMES.has(request.name)
    ) {
      return false;
    }

if (request.args) {
Expand Down
Loading
Loading