Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 159 additions & 77 deletions docs/reference/configuration.md

Large diffs are not rendered by default.

28 changes: 24 additions & 4 deletions evals/llm-judge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,30 @@ export class LLMJudge {

for (const res of rawResults) {
// Remove any punctuation the model might have appended
const cleanRes = res.replace(/[^A-Z]/g, '');
if (cleanRes.startsWith('YES')) yes++;
else if (cleanRes.startsWith('NO')) no++;
else other++;
const cleanRes = res.replace(/[^A-Z ]/g, '');
if (
cleanRes.includes('THE ANSWER IS YES') ||
cleanRes.includes('ANSWER IS YES') ||
cleanRes.endsWith('YES')
) {
yes++;
} else if (
cleanRes.includes('THE ANSWER IS NO') ||
cleanRes.includes('ANSWER IS NO') ||
cleanRes.endsWith('NO')
) {
no++;
} else if (cleanRes.trim() === 'YES') {
yes++;
} else if (cleanRes.trim() === 'NO') {
no++;
} else {
// Fallback: look for YES or NO as standalone words or at the end
const words = cleanRes.split(/\s+/);
if (words.includes('YES')) yes++;
else if (words.includes('NO')) no++;
else other++;
}
}

// Pass if YES > NO and YES > OTHER (plurality)
Expand Down
14 changes: 4 additions & 10 deletions packages/cli/src/acp/acpSessionManager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,13 +215,12 @@ describe('AcpSessionManager', () => {
);
});

it('should include gemini-3.1-flash-lite when useGemini31FlashLite is true', async () => {
it('should NOT include retired preview models (none) in available models', async () => {
mockConfig.getContentGeneratorConfig = vi.fn().mockReturnValue({
apiKey: 'test-key',
});
mockConfig.getHasAccessToPreviewModel = vi.fn().mockReturnValue(true);
mockConfig.getGemini31LaunchedSync = vi.fn().mockReturnValue(true);
mockConfig.getGemini31FlashLiteLaunchedSync = vi.fn().mockReturnValue(true);

const response = await manager.newSession(
{
Expand All @@ -231,14 +230,9 @@ describe('AcpSessionManager', () => {
{},
);

expect(response.models?.availableModels).toEqual(
expect.arrayContaining([
expect.objectContaining({
modelId: 'gemini-3.1-flash-lite-preview',
name: 'gemini-3.1-flash-lite-preview',
}),
]),
);
const modelIds =
response.models?.availableModels?.map((m) => m.modelId) ?? [];
expect(modelIds).not.toContain('none');
});

it('should return modes with plan mode when plan is enabled', async () => {
Expand Down
40 changes: 16 additions & 24 deletions packages/cli/src/acp/acpUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@ import {
type ToolCallConfirmationDetails,
Kind,
ApprovalMode,
DEFAULT_GEMINI_MODEL_AUTO,
PREVIEW_GEMINI_MODEL_AUTO,
GEMINI_MODEL_ALIAS_AUTO,
DEFAULT_GEMINI_MODEL,
DEFAULT_GEMINI_FLASH_MODEL,
DEFAULT_GEMINI_FLASH_LITE_MODEL,
PREVIEW_GEMINI_3_1_MODEL,
PREVIEW_GEMINI_MODEL,
PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL,
PREVIEW_GEMINI_FLASH_MODEL,
PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL,
PREVIEW_GEMINI_FLASH_LITE_MODEL,
getDisplayString,
AuthType,
ToolConfirmationOutcome,
getAutoModelDescription,
} from '@google/gemini-cli-core';
import type * as acp from '@agentclientprotocol/sdk';
import { z } from 'zod';
Expand Down Expand Up @@ -262,11 +262,10 @@ export function buildAvailableModels(
}>;
currentModelId: string;
} {
const preferredModel = config.getModel() || DEFAULT_GEMINI_MODEL_AUTO;
const preferredModel = config.getModel() || GEMINI_MODEL_ALIAS_AUTO;
const shouldShowPreviewModels = config.getHasAccessToPreviewModel();
const useGemini31 = config.getGemini31LaunchedSync?.() ?? false;
const useGemini31FlashLite =
config.getGemini31FlashLiteLaunchedSync?.() ?? false;
const useGemini3_5Flash = config.hasGemini35FlashGAAccess?.() ?? false;
const selectedAuthType = settings.merged.security.auth.selectedType;
const useCustomToolModel =
useGemini31 && selectedAuthType === AuthType.USE_GEMINI;
Expand All @@ -278,7 +277,7 @@ export function buildAvailableModels(
) {
const options = config.getModelConfigService().getAvailableModelOptions({
useGemini3_1: useGemini31,
useGemini3_1FlashLite: useGemini31FlashLite,
useGemini3_5Flash,
useCustomTools: useCustomToolModel,
hasAccessToPreview: shouldShowPreviewModels,
});
Expand All @@ -292,23 +291,16 @@ export function buildAvailableModels(
// --- LEGACY PATH ---
const mainOptions = [
{
value: DEFAULT_GEMINI_MODEL_AUTO,
title: getDisplayString(DEFAULT_GEMINI_MODEL_AUTO),
description:
'Let Gemini CLI decide the best model for the task: gemini-2.5-pro, gemini-2.5-flash',
value: GEMINI_MODEL_ALIAS_AUTO,
title: getDisplayString(GEMINI_MODEL_ALIAS_AUTO),
description: getAutoModelDescription(
shouldShowPreviewModels,
useGemini31,
useGemini3_5Flash,
),
},
];

if (shouldShowPreviewModels) {
mainOptions.unshift({
value: PREVIEW_GEMINI_MODEL_AUTO,
title: getDisplayString(PREVIEW_GEMINI_MODEL_AUTO),
description: useGemini31
? 'Let Gemini CLI decide the best model for the task: gemini-3.1-pro, gemini-3-flash'
: 'Let Gemini CLI decide the best model for the task: gemini-3-pro, gemini-3-flash',
});
}

const manualOptions = [
{
value: DEFAULT_GEMINI_MODEL,
Expand Down Expand Up @@ -344,10 +336,10 @@ export function buildAvailableModels(
},
];

if (useGemini31FlashLite) {
if (PREVIEW_GEMINI_FLASH_LITE_MODEL !== 'none') {
previewOptions.push({
value: PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL,
title: getDisplayString(PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL),
value: PREVIEW_GEMINI_FLASH_LITE_MODEL,
title: getDisplayString(PREVIEW_GEMINI_FLASH_LITE_MODEL),
});
}

Expand Down
46 changes: 22 additions & 24 deletions packages/cli/src/ui/components/ModelDialog.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ import { waitFor } from '../../test-utils/async.js';
import { createMockSettings } from '../../test-utils/settings.js';
import {
DEFAULT_GEMINI_MODEL,
DEFAULT_GEMINI_MODEL_AUTO,
GEMINI_MODEL_ALIAS_AUTO,
DEFAULT_GEMINI_FLASH_MODEL,
DEFAULT_GEMINI_FLASH_LITE_MODEL,
PREVIEW_GEMINI_MODEL,
PREVIEW_GEMINI_3_1_MODEL,
PREVIEW_GEMINI_3_1_CUSTOM_TOOLS_MODEL,
PREVIEW_GEMINI_FLASH_MODEL,
PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL,
PREVIEW_GEMINI_FLASH_LITE_MODEL,
AuthType,
} from '@google/gemini-cli-core';
import type { Config, ModelSlashCommandEvent } from '@google/gemini-cli-core';
Expand All @@ -34,6 +34,11 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => {
await importOriginal<typeof import('@google/gemini-cli-core')>();
return {
...actual,
getAutoModelDescription: (
hasAccessToPreview: boolean,
useGemini3_1?: boolean,
) =>
`Auto Model Description (preview: ${hasAccessToPreview}, 3.1: ${useGemini3_1})`,
getDisplayString: (val: string) => mockGetDisplayString(val),
logModelSlashCommand: (config: Config, event: ModelSlashCommandEvent) =>
mockLogModelSlashCommand(config, event),
Expand All @@ -42,7 +47,7 @@ vi.mock('@google/gemini-cli-core', async (importOriginal) => {
mockModelSlashCommandEvent(model);
}
},
PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL: 'gemini-3.1-flash-lite-preview',
PREVIEW_GEMINI_FLASH_LITE_MODEL: 'none',
};
});

Expand All @@ -62,7 +67,6 @@ describe('<ModelDialog />', () => {
getHasAccessToPreviewModel: () => boolean;
getIdeMode: () => boolean;
getGemini31LaunchedSync: () => boolean;
getGemini31FlashLiteLaunchedSync: () => boolean;
getProModelNoAccess: () => Promise<boolean>;
getProModelNoAccessSync: () => boolean;
getExperimentalGemma: () => boolean;
Expand All @@ -83,7 +87,6 @@ describe('<ModelDialog />', () => {
getHasAccessToPreviewModel: mockGetHasAccessToPreviewModel,
getIdeMode: () => false,
getGemini31LaunchedSync: mockGetGemini31LaunchedSync,
getGemini31FlashLiteLaunchedSync: mockGetGemini31FlashLiteLaunchedSync,
getProModelNoAccess: mockGetProModelNoAccess,
getProModelNoAccessSync: mockGetProModelNoAccessSync,
getExperimentalGemma: () => false,
Expand All @@ -93,17 +96,15 @@ describe('<ModelDialog />', () => {

beforeEach(() => {
vi.resetAllMocks();
mockGetModel.mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO);
mockGetModel.mockReturnValue(GEMINI_MODEL_ALIAS_AUTO);
mockGetHasAccessToPreviewModel.mockReturnValue(false);
mockGetGemini31LaunchedSync.mockReturnValue(false);
mockGetGemini31FlashLiteLaunchedSync.mockReturnValue(false);
mockGetProModelNoAccess.mockResolvedValue(false);
mockGetProModelNoAccessSync.mockReturnValue(false);

// Default implementation for getDisplayString
mockGetDisplayString.mockImplementation((val: string) => {
if (val === 'auto-gemini-2.5') return 'Auto (Gemini 2.5)';
if (val === 'auto-gemini-3') return 'Auto (Preview)';
if (val === 'auto') return 'Auto';
return val;
});
});
Expand Down Expand Up @@ -153,17 +154,13 @@ describe('<ModelDialog />', () => {
expect(output).not.toContain(DEFAULT_GEMINI_MODEL);
expect(output).not.toContain(PREVIEW_GEMINI_MODEL);

// Verify order: Flash Preview -> Flash Lite Preview -> Flash -> Flash Lite
// Verify order: Flash Preview -> Flash Lite (Preview/Default) -> Flash
const flashPreviewIdx = output.indexOf(PREVIEW_GEMINI_FLASH_MODEL);
const flashLitePreviewIdx = output.indexOf(
PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL,
);
const flashIdx = output.indexOf(DEFAULT_GEMINI_FLASH_MODEL);
const flashLiteIdx = output.indexOf(DEFAULT_GEMINI_FLASH_LITE_MODEL);
const flashIdx = output.indexOf(DEFAULT_GEMINI_FLASH_MODEL);

expect(flashPreviewIdx).toBeLessThan(flashLitePreviewIdx);
expect(flashLitePreviewIdx).toBeLessThan(flashIdx);
expect(flashIdx).toBeLessThan(flashLiteIdx);
expect(flashPreviewIdx).toBeLessThan(flashLiteIdx);
expect(flashLiteIdx).toBeLessThan(flashIdx);

expect(output).not.toContain('Auto');
unmount();
Expand Down Expand Up @@ -234,7 +231,7 @@ describe('<ModelDialog />', () => {

await waitFor(() => {
expect(mockSetModel).toHaveBeenCalledWith(
DEFAULT_GEMINI_MODEL_AUTO,
GEMINI_MODEL_ALIAS_AUTO,
true, // Session only by default
);
expect(mockOnClose).toHaveBeenCalled();
Expand Down Expand Up @@ -292,7 +289,7 @@ describe('<ModelDialog />', () => {

await waitFor(() => {
expect(mockSetModel).toHaveBeenCalledWith(
DEFAULT_GEMINI_MODEL_AUTO,
GEMINI_MODEL_ALIAS_AUTO,
false, // Persist enabled
);
expect(mockOnClose).toHaveBeenCalled();
Expand Down Expand Up @@ -355,7 +352,7 @@ describe('<ModelDialog />', () => {
mockGetModel.mockReturnValue(DEFAULT_GEMINI_MODEL);
mockGetDisplayString.mockImplementation((val: string) => {
if (val === DEFAULT_GEMINI_MODEL) return 'My Custom Model Display';
if (val === 'auto-gemini-2.5') return 'Auto (Gemini 2.5)';
if (val === 'auto') return 'Auto';
return val;
});
const { lastFrame, unmount } = await renderComponent();
Expand All @@ -369,9 +366,9 @@ describe('<ModelDialog />', () => {
mockGetHasAccessToPreviewModel.mockReturnValue(true);
});

it('shows Auto (Preview) in main view when access is granted', async () => {
it('shows Auto in main view when access is granted', async () => {
const { lastFrame, unmount } = await renderComponent();
expect(lastFrame()).toContain('Auto (Preview)');
expect(lastFrame()).toContain('Auto');
unmount();
});

Expand Down Expand Up @@ -449,7 +446,7 @@ describe('<ModelDialog />', () => {
unmount();
});

it('shows Flash Lite Preview model regardless of tier when flag is enabled', async () => {
it('does not show Flash Lite Preview model when it is retired (none) even if flag is enabled', async () => {
mockGetProModelNoAccessSync.mockReturnValue(false);
mockGetProModelNoAccess.mockResolvedValue(false);
mockGetHasAccessToPreviewModel.mockReturnValue(true);
Expand All @@ -468,7 +465,8 @@ describe('<ModelDialog />', () => {
await waitUntilReady();

const output = lastFrame();
expect(output).toContain(PREVIEW_GEMINI_3_1_FLASH_LITE_MODEL);
expect(output).not.toContain(PREVIEW_GEMINI_FLASH_LITE_MODEL);
expect(output).toContain(DEFAULT_GEMINI_FLASH_LITE_MODEL);
unmount();
});
});
Expand Down
Loading
Loading