Skip to content

Commit b2d020d

Browse files
fix: forward image attachments for OpenAI-compatible providers (#373)
## Summary - Treat OpenAI-compatible chat, Anthropic, OpenAI Responses, and Codex wires as image-capable when synthesizing provider models. - Forward image attachments through the v0.2 agent path for OpenAI-compatible providers instead of relying on model-id heuristics. - Add focused core/providers tests and a changeset. Fixes #201 ## Verification - pnpm --filter @open-codesign/core test -- src/agent.test.ts - pnpm --filter @open-codesign/providers test -- src/index.test.ts - pnpm --filter @open-codesign/core typecheck - pnpm --filter @open-codesign/providers typecheck Co-authored-by: Sun-sunshine06 <Sun-sunshine06@users.noreply.github.com>
1 parent c7b7634 commit b2d020d

7 files changed

Lines changed: 94 additions & 3 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
"@open-codesign/core": patch
3+
"@open-codesign/providers": patch
4+
---
5+
6+
Forward image attachments through OpenAI-compatible and Anthropic-style provider paths instead of only marking Codex synthesized models as image-capable.

apps/desktop/src/renderer/src/components/AddCustomProviderModal.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ export function AddCustomProviderModal({
298298
if (!editTarget.builtin) {
299299
const previous = editTarget.tlsRejectUnauthorized === true;
300300
if (previous !== tlsRejectUnauthorized) {
301-
update.tlsRejectUnauthorized = tlsRejectUnauthorized ? true : false;
301+
update.tlsRejectUnauthorized = !!tlsRejectUnauthorized;
302302
}
303303
}
304304
await window.codesign.config.updateProvider(update);

biome.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"$schema": "https://biomejs.dev/schemas/2.4.14/schema.json",
2+
"$schema": "https://biomejs.dev/schemas/2.4.15/schema.json",
33
"vcs": {
44
"enabled": true,
55
"clientKind": "git",

packages/core/src/agent.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,33 @@ describe('generateViaAgent()', () => {
11561156
);
11571157
});
11581158

1159+
it('passes image attachments through openai-compatible agent models', async () => {
1160+
scriptedAgent = { assistantText: RESPONSE_WITH_ARTIFACT };
1161+
await generateViaAgent(
1162+
{
1163+
prompt: 'replicate this screenshot',
1164+
history: [],
1165+
model: { provider: 'custom-openai', modelId: 'local-text-or-vision-model' },
1166+
apiKey: 'sk-test',
1167+
wire: 'openai-chat',
1168+
baseUrl: 'https://gateway.example.test/v1',
1169+
attachments: [
1170+
{
1171+
name: 'shot.png',
1172+
path: 'references/shot.png',
1173+
mediaType: 'image/png',
1174+
imageDataUrl: 'data:image/png;base64,aW1n',
1175+
},
1176+
],
1177+
},
1178+
{ fs: makeStubFs({}) },
1179+
);
1180+
1181+
expect(agentCalls[0]?.prompts[0]?.images).toEqual([
1182+
{ type: 'image', data: 'aW1n', mimeType: 'image/png' },
1183+
]);
1184+
});
1185+
11591186
it('blocks preview and done until set_todos has run for fresh multi-step work', async () => {
11601187
scriptedAgent = { assistantText: RESPONSE_WITH_ARTIFACT };
11611188
await generateViaAgent(

packages/core/src/agent.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,9 @@ function supportsImageInput(wire: WireApi | undefined, modelId: string): boolean
242242
if (wire === 'anthropic' || wire === 'openai-responses' || wire === 'openai-codex-responses') {
243243
return true;
244244
}
245+
if (wire === 'openai-chat') {
246+
return true;
247+
}
245248
const lower = modelId.toLowerCase();
246249
return (
247250
lower.includes('vision') ||

packages/providers/src/index.test.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,57 @@ describe('complete', () => {
361361
expect(result.content).toBe('ok');
362362
});
363363

364+
it('keeps image inputs for synthesized openai-chat models', async () => {
365+
getModelMock.mockReturnValue(undefined);
366+
completeSimpleMock.mockImplementationOnce(async (model, context) => {
367+
expect(model).toMatchObject({
368+
api: 'openai-completions',
369+
input: ['text', 'image'],
370+
baseUrl: 'https://gateway.example.test/v1',
371+
});
372+
expect(context.messages).toEqual([
373+
{
374+
role: 'user',
375+
content: [
376+
{ type: 'text', text: 'use this screenshot' },
377+
{ type: 'image', data: 'AAAA', mimeType: 'image/png' },
378+
],
379+
timestamp: 1,
380+
},
381+
]);
382+
return {
383+
role: 'assistant',
384+
content: [{ type: 'text', text: 'ok' }],
385+
api: 'openai-completions',
386+
provider: 'custom-openai',
387+
model: 'local-text-or-vision-model',
388+
usage: {
389+
input: 1,
390+
output: 1,
391+
cacheRead: 0,
392+
cacheWrite: 0,
393+
totalTokens: 2,
394+
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
395+
},
396+
stopReason: 'stop',
397+
timestamp: Date.now(),
398+
};
399+
});
400+
401+
const result = await complete(
402+
{ provider: 'custom-openai', modelId: 'local-text-or-vision-model' },
403+
[{ role: 'user', content: 'use this screenshot' }],
404+
{
405+
apiKey: 'sk-test',
406+
wire: 'openai-chat',
407+
baseUrl: 'https://gateway.example.test/v1',
408+
userImages: [{ data: 'AAAA', mimeType: 'image/png' }],
409+
},
410+
);
411+
412+
expect(result.content).toBe('ok');
413+
});
414+
364415
it('synthesizes openai-chat PiModel with reasoning=false for Qwen DashScope (#183)', async () => {
365416
getModelMock.mockReturnValue(undefined);
366417
completeSimpleMock.mockImplementationOnce(async (model) => {

packages/providers/src/index.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,11 @@ function synthesizeWireModel(
308308
wire: GenerateOptions['wire'],
309309
baseUrl: string | undefined,
310310
): PiModel {
311-
const supportsImageInput = wire === 'openai-codex-responses';
311+
const supportsImageInput =
312+
wire === 'anthropic' ||
313+
wire === 'openai-chat' ||
314+
wire === 'openai-responses' ||
315+
wire === 'openai-codex-responses';
312316
const api =
313317
wire === 'anthropic'
314318
? 'anthropic-messages'

0 commit comments

Comments
 (0)