Skip to content

Commit 296545f

Browse files
committed
feat(core): request json response format for locate adapters
1 parent aa83614 commit 296545f

11 files changed

Lines changed: 83 additions & 15 deletions

File tree

packages/core/src/ai-model/models/glm.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import type {
44
ChatCompletionParamsResult,
55
ModelAdapterDefinition,
66
} from './types';
7+
import { isLocateIntent } from './utils/intent';
78

89
const buildGlmChatCompletionParams = (
910
input: ChatCompletionCallContext,
@@ -16,6 +17,12 @@ const buildGlmChatCompletionParams = (
1617
commonOverrideConfig.temperature = userConfig.temperature;
1718
}
1819

20+
// Zhipu structured output JSON mode:
21+
// https://docs.bigmodel.cn/cn/guide/capabilities/struct-output
22+
if (isLocateIntent(input.intent)) {
23+
commonOverrideConfig.response_format = { type: 'json_object' };
24+
}
25+
1926
const modelSpecificConfig: Record<string, unknown> = {};
2027

2128
if (reasoningEnabled !== 'default') {

packages/core/src/ai-model/models/gpt.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@ import type {
55
ImageDetail,
66
ModelAdapterDefinition,
77
} from './types';
8+
import { isLocateIntent } from './utils/intent';
89

910
const originalImageDetailForDefaultIntent = (
1011
input: ChatCompletionCallContext,
1112
): ImageDetail | undefined =>
12-
input.intent === 'default' || input.requiresOriginalImageDetail
13+
isLocateIntent(input.intent) || input.requiresOriginalImageDetail
1314
? 'original'
1415
: undefined;
1516

@@ -24,6 +25,12 @@ const buildGpt5ChatCompletionParams = (
2425
commonOverrideConfig.temperature = userConfig.temperature;
2526
}
2627

28+
// OpenAI Chat Completions JSON mode:
29+
// https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#json-mode
30+
if (isLocateIntent(input.intent)) {
31+
commonOverrideConfig.response_format = { type: 'json_object' };
32+
}
33+
2734
const effectiveReasoningEffort =
2835
reasoningEnabled === true ? (reasoningEffort ?? 'medium') : 'none';
2936

packages/core/src/ai-model/models/kimi.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import type {
44
ChatCompletionParamsResult,
55
ModelAdapterDefinition,
66
} from './types';
7+
import { isLocateIntent } from './utils/intent';
78

89
const buildKimiChatCompletionParams = (
910
input: ChatCompletionCallContext,
@@ -16,6 +17,12 @@ const buildKimiChatCompletionParams = (
1617
// kimi disallow custom temperature
1718
commonOverrideConfig.temperature = undefined;
1819

20+
// Kimi Chat Completions response_format:
21+
// https://platform.kimi.com/docs/api/chat
22+
if (isLocateIntent(input.intent)) {
23+
commonOverrideConfig.response_format = { type: 'json_object' };
24+
}
25+
1926
const modelSpecificConfig: Record<string, unknown> = {
2027
thinking: {
2128
type: effectiveReasoningEnabled ? 'enabled' : 'disabled',

packages/core/src/ai-model/models/mimo.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import type {
44
ChatCompletionParamsResult,
55
ModelAdapterDefinition,
66
} from './types';
7+
import { isLocateIntent } from './utils/intent';
78

89
const buildMimoChatCompletionParams = (
910
input: ChatCompletionCallContext,
@@ -14,9 +15,9 @@ const buildMimoChatCompletionParams = (
1415

1516
// https://platform.xiaomimimo.com/docs/zh-CN/api/chat/openai-api
1617
// Observed with thinking disabled: Mimo needs json_object to return JSON.
17-
commonOverrideConfig.response_format = {
18-
type: intent === 'default' ? 'json_object' : 'text',
19-
};
18+
if (isLocateIntent(intent)) {
19+
commonOverrideConfig.response_format = { type: 'json_object' };
20+
}
2021

2122
if (userConfig.temperature !== undefined) {
2223
commonOverrideConfig.temperature = userConfig.temperature;

packages/core/src/ai-model/models/qwen.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import type {
99
ChatCompletionParamsResult,
1010
ModelAdapterDefinition,
1111
} from './types';
12+
import { isLocateIntent } from './utils/intent';
1213

1314
const defaultBboxSize = 20;
1415

@@ -65,6 +66,12 @@ const buildQwenChatCompletionParams = (
6566
commonOverrideConfig.temperature = userConfig.temperature;
6667
}
6768

69+
// Alibaba Cloud Model Studio JSON mode:
70+
// https://help.aliyun.com/zh/model-studio/json-mode
71+
if (isLocateIntent(input.intent)) {
72+
commonOverrideConfig.response_format = { type: 'json_object' };
73+
}
74+
6875
const modelSpecificConfig: Record<string, unknown> = {};
6976

7077
if (reasoningEnabled !== 'default') {
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import type { TIntent } from '@midscene/shared/env';
2+
3+
export function isLocateIntent(intent?: TIntent): boolean {
4+
return intent === 'default';
5+
}

packages/core/tests/unit-test/model-adapter/glm.test.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,17 @@ describe('glm model adapter', () => {
8787
thinking: { type: 'disabled' },
8888
});
8989
});
90+
91+
it('uses json_object response format for glm-v locate intent', () => {
92+
const result = glmAdapter.chatCompletion.buildChatCompletionParams({
93+
intent: 'default',
94+
userConfig: {},
95+
});
96+
97+
expect(result.config).toEqual({
98+
temperature: 0,
99+
response_format: { type: 'json_object' },
100+
thinking: { type: 'disabled' },
101+
});
102+
});
90103
});

packages/core/tests/unit-test/model-adapter/gpt.test.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,13 @@ describe('gpt model adapter', () => {
133133
reasoning_effort: 'none',
134134
});
135135
});
136+
137+
it('uses json_object response format for gpt-5 locate intent', () => {
138+
const result = gpt5Adapter.chatCompletion.buildChatCompletionParams({
139+
intent: 'default',
140+
userConfig: {},
141+
});
142+
143+
expect(result.config.response_format).toEqual({ type: 'json_object' });
144+
});
136145
});

packages/core/tests/unit-test/model-adapter/kimi.test.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,13 @@ describe('kimi model adapter', () => {
104104
thinking: { type: 'disabled' },
105105
});
106106
});
107+
108+
it('uses json_object response format for kimi locate intent', () => {
109+
const result = kimiAdapter.chatCompletion.buildChatCompletionParams({
110+
intent: 'default',
111+
userConfig: {},
112+
});
113+
114+
expect(result.config.response_format).toEqual({ type: 'json_object' });
115+
});
107116
});

packages/core/tests/unit-test/model-adapter/mimo.test.ts

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ describe('mimo model adapter', () => {
1515

1616
expect(result.config).toEqual({
1717
temperature: 0,
18-
response_format: { type: 'text' },
1918
thinking: { type: 'disabled' },
2019
});
2120
});
@@ -48,7 +47,6 @@ describe('mimo model adapter', () => {
4847
expect(result.config).toEqual({
4948
temperature: 0.7,
5049
seed: 123,
51-
response_format: { type: 'text' },
5250
thinking: { type: 'disabled' },
5351
});
5452
});
@@ -69,12 +67,10 @@ describe('mimo model adapter', () => {
6967

7068
expect(disabledResult.config).toEqual({
7169
temperature: 0,
72-
response_format: { type: 'text' },
7370
thinking: { type: 'disabled' },
7471
});
7572
expect(enabledResult.config).toEqual({
7673
temperature: 0,
77-
response_format: { type: 'text' },
7874
thinking: { type: 'enabled' },
7975
});
8076
});
@@ -93,7 +89,6 @@ describe('mimo model adapter', () => {
9389
]);
9490
expect(result.config).toEqual({
9591
temperature: 0,
96-
response_format: { type: 'text' },
9792
thinking: { type: 'disabled' },
9893
});
9994
});
@@ -107,20 +102,18 @@ describe('mimo model adapter', () => {
107102

108103
expect(result.config).toEqual({
109104
temperature: 0.7,
110-
response_format: { type: 'text' },
111105
thinking: { type: 'disabled' },
112106
});
113107
});
114108

115-
it('uses text response format for planning intent', () => {
109+
it('does not set response format for planning intent', () => {
116110
const result = mimoAdapter.chatCompletion.buildChatCompletionParams({
117111
intent: 'planning',
118112
userConfig: {},
119113
});
120114

121115
expect(result.config).toEqual({
122116
temperature: 0,
123-
response_format: { type: 'text' },
124117
thinking: { type: 'disabled' },
125118
});
126119
});
@@ -138,8 +131,6 @@ describe('mimo model adapter', () => {
138131
expect(defaultResult.config.response_format).toEqual({
139132
type: 'json_object',
140133
});
141-
expect(insightResult.config.response_format).toEqual({
142-
type: 'text',
143-
});
134+
expect(insightResult.config.response_format).toBeUndefined();
144135
});
145136
});

0 commit comments

Comments
 (0)