Skip to content

Commit 17be644

Browse files
committed
fix(translator): improve tool response handling for non-string content
- Added `setToolCallOutputContent` to process various content types, including arrays and fallback cases. - Implemented robust handling for specific tool output types like text, image URLs, and files, ensuring proper serialization. - Improved fallback logic to handle unexpected or missing data. Fixed: #2313 Closes: #2349
1 parent 38dad2a commit 17be644

2 files changed

Lines changed: 263 additions & 2 deletions

File tree

internal/translator/codex/openai/chat-completions/codex_openai_request.go

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,13 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
121121
case "tool":
122122
// Handle tool response messages as top-level function_call_output objects
123123
toolCallID := m.Get("tool_call_id").String()
124-
content := m.Get("content").String()
124+
content := m.Get("content")
125125

126126
// Create function_call_output object
127127
funcOutput := []byte(`{}`)
128128
funcOutput, _ = sjson.SetBytes(funcOutput, "type", "function_call_output")
129129
funcOutput, _ = sjson.SetBytes(funcOutput, "call_id", toolCallID)
130-
funcOutput, _ = sjson.SetBytes(funcOutput, "output", content)
130+
funcOutput = setToolCallOutputContent(funcOutput, content)
131131
out, _ = sjson.SetRawBytes(out, "input.-1", funcOutput)
132132

133133
default:
@@ -359,6 +359,91 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
359359
return out
360360
}
361361

362+
func setToolCallOutputContent(funcOutput []byte, content gjson.Result) []byte {
363+
switch {
364+
case content.Type == gjson.String:
365+
funcOutput, _ = sjson.SetBytes(funcOutput, "output", content.String())
366+
case content.IsArray():
367+
output := []byte(`[]`)
368+
for _, item := range content.Array() {
369+
output = appendToolOutputContentPart(output, item)
370+
}
371+
funcOutput, _ = sjson.SetRawBytes(funcOutput, "output", output)
372+
default:
373+
fallbackOutput := content.Raw
374+
if fallbackOutput == "" {
375+
fallbackOutput = content.String()
376+
}
377+
funcOutput, _ = sjson.SetBytes(funcOutput, "output", fallbackOutput)
378+
}
379+
return funcOutput
380+
}
381+
382+
func appendToolOutputContentPart(output []byte, item gjson.Result) []byte {
383+
switch item.Get("type").String() {
384+
case "text":
385+
part := []byte(`{}`)
386+
part, _ = sjson.SetBytes(part, "type", "input_text")
387+
part, _ = sjson.SetBytes(part, "text", item.Get("text").String())
388+
output, _ = sjson.SetRawBytes(output, "-1", part)
389+
case "image_url":
390+
imageURL := item.Get("image_url.url").String()
391+
fileID := item.Get("image_url.file_id").String()
392+
if imageURL == "" && fileID == "" {
393+
return appendToolOutputFallbackPart(output, item)
394+
}
395+
part := []byte(`{}`)
396+
part, _ = sjson.SetBytes(part, "type", "input_image")
397+
if imageURL != "" {
398+
part, _ = sjson.SetBytes(part, "image_url", imageURL)
399+
}
400+
if fileID != "" {
401+
part, _ = sjson.SetBytes(part, "file_id", fileID)
402+
}
403+
if detail := item.Get("image_url.detail").String(); detail != "" {
404+
part, _ = sjson.SetBytes(part, "detail", detail)
405+
}
406+
output, _ = sjson.SetRawBytes(output, "-1", part)
407+
case "file":
408+
fileID := item.Get("file.file_id").String()
409+
fileData := item.Get("file.file_data").String()
410+
fileURL := item.Get("file.file_url").String()
411+
if fileID == "" && fileData == "" && fileURL == "" {
412+
return appendToolOutputFallbackPart(output, item)
413+
}
414+
part := []byte(`{}`)
415+
part, _ = sjson.SetBytes(part, "type", "input_file")
416+
if fileID != "" {
417+
part, _ = sjson.SetBytes(part, "file_id", fileID)
418+
}
419+
if fileData != "" {
420+
part, _ = sjson.SetBytes(part, "file_data", fileData)
421+
}
422+
if fileURL != "" {
423+
part, _ = sjson.SetBytes(part, "file_url", fileURL)
424+
}
425+
if filename := item.Get("file.filename").String(); filename != "" {
426+
part, _ = sjson.SetBytes(part, "filename", filename)
427+
}
428+
output, _ = sjson.SetRawBytes(output, "-1", part)
429+
default:
430+
output = appendToolOutputFallbackPart(output, item)
431+
}
432+
return output
433+
}
434+
435+
func appendToolOutputFallbackPart(output []byte, item gjson.Result) []byte {
436+
text := item.Raw
437+
if text == "" {
438+
text = item.String()
439+
}
440+
part := []byte(`{}`)
441+
part, _ = sjson.SetBytes(part, "type", "input_text")
442+
part, _ = sjson.SetBytes(part, "text", text)
443+
output, _ = sjson.SetRawBytes(output, "-1", part)
444+
return output
445+
}
446+
362447
// shortenNameIfNeeded applies the simple shortening rule for a single name.
363448
// If the name length exceeds 64, it will try to preserve the "mcp__" prefix and last segment.
364449
// Otherwise it truncates to 64 characters.

internal/translator/codex/openai/chat-completions/codex_openai_request_test.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,182 @@ func TestToolCallWithContent(t *testing.T) {
176176
}
177177
}
178178

179+
func TestToolCallOutputWithMultimodalContent(t *testing.T) {
180+
input := []byte(`{
181+
"model": "gpt-4o",
182+
"messages": [
183+
{"role": "user", "content": "Show me the generated result."},
184+
{
185+
"role": "assistant",
186+
"content": null,
187+
"tool_calls": [
188+
{
189+
"id": "call_output_1",
190+
"type": "function",
191+
"function": {"name": "render_output", "arguments": "{}"}
192+
}
193+
]
194+
},
195+
{
196+
"role": "tool",
197+
"tool_call_id": "call_output_1",
198+
"content": [
199+
{"type":"text","text":"Rendered result attached."},
200+
{"type":"image_url","image_url":{"url":"https://example.com/generated.png","detail":"high"}},
201+
{"type":"image_url","image_url":{"file_id":"file-img-123"}},
202+
{"type":"file","file":{"file_id":"file-doc-123","filename":"doc.pdf"}},
203+
{"type":"file","file":{"file_data":"SGVsbG8=","filename":"inline.txt"}},
204+
{"type":"file","file":{"file_url":"https://example.com/report.pdf","filename":"report.pdf"}}
205+
]
206+
}
207+
],
208+
"tools": [
209+
{
210+
"type": "function",
211+
"function": {"name": "render_output", "description": "Render output", "parameters": {"type": "object", "properties": {}}}
212+
}
213+
]
214+
}`)
215+
216+
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
217+
result := string(out)
218+
219+
output := gjson.Get(result, "input.2.output")
220+
if !output.IsArray() {
221+
t.Fatalf("expected tool output to be an array, got: %s", output.Raw)
222+
}
223+
224+
parts := output.Array()
225+
if len(parts) != 6 {
226+
t.Fatalf("expected 6 output parts, got %d: %s", len(parts), output.Raw)
227+
}
228+
if parts[0].Get("type").String() != "input_text" || parts[0].Get("text").String() != "Rendered result attached." {
229+
t.Fatalf("part 0: expected input_text with rendered text, got %s", parts[0].Raw)
230+
}
231+
if parts[1].Get("type").String() != "input_image" {
232+
t.Fatalf("part 1: expected input_image, got %s", parts[1].Raw)
233+
}
234+
if parts[1].Get("image_url").String() != "https://example.com/generated.png" {
235+
t.Errorf("part 1: unexpected image_url %s", parts[1].Get("image_url").String())
236+
}
237+
if parts[1].Get("detail").String() != "high" {
238+
t.Errorf("part 1: unexpected detail %s", parts[1].Get("detail").String())
239+
}
240+
if parts[2].Get("type").String() != "input_image" || parts[2].Get("file_id").String() != "file-img-123" {
241+
t.Fatalf("part 2: expected file_id-backed input_image, got %s", parts[2].Raw)
242+
}
243+
if parts[3].Get("type").String() != "input_file" || parts[3].Get("file_id").String() != "file-doc-123" {
244+
t.Fatalf("part 3: expected file_id-backed input_file, got %s", parts[3].Raw)
245+
}
246+
if parts[3].Get("filename").String() != "doc.pdf" {
247+
t.Errorf("part 3: unexpected filename %s", parts[3].Get("filename").String())
248+
}
249+
if parts[4].Get("type").String() != "input_file" || parts[4].Get("file_data").String() != "SGVsbG8=" {
250+
t.Fatalf("part 4: expected file_data-backed input_file, got %s", parts[4].Raw)
251+
}
252+
if parts[5].Get("type").String() != "input_file" || parts[5].Get("file_url").String() != "https://example.com/report.pdf" {
253+
t.Fatalf("part 5: expected file_url-backed input_file, got %s", parts[5].Raw)
254+
}
255+
}
256+
257+
func TestToolCallOutputFallsBackForInvalidStructuredParts(t *testing.T) {
258+
input := []byte(`{
259+
"model": "gpt-4o",
260+
"messages": [
261+
{"role": "user", "content": "Check tool output."},
262+
{
263+
"role": "assistant",
264+
"content": null,
265+
"tool_calls": [
266+
{"id": "call_invalid_parts", "type": "function", "function": {"name": "inspect", "arguments": "{}"}}
267+
]
268+
},
269+
{
270+
"role": "tool",
271+
"tool_call_id": "call_invalid_parts",
272+
"content": [
273+
{"type":"image_url","image_url":{"detail":"low"}},
274+
{"type":"file","file":{"filename":"orphan.txt"}},
275+
{"type":"unknown_type","foo":"bar","nested":{"a":1}}
276+
]
277+
}
278+
],
279+
"tools": [
280+
{"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}}
281+
]
282+
}`)
283+
284+
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
285+
result := string(out)
286+
287+
parts := gjson.Get(result, "input.2.output").Array()
288+
if len(parts) != 3 {
289+
t.Fatalf("expected 3 output parts, got %d: %s", len(parts), gjson.Get(result, "input.2.output").Raw)
290+
}
291+
292+
expectedFallbacks := []string{
293+
`{"type":"image_url","image_url":{"detail":"low"}}`,
294+
`{"type":"file","file":{"filename":"orphan.txt"}}`,
295+
`{"type":"unknown_type","foo":"bar","nested":{"a":1}}`,
296+
}
297+
for i, expectedFallback := range expectedFallbacks {
298+
if parts[i].Get("type").String() != "input_text" {
299+
t.Fatalf("part %d: expected input_text fallback, got %s", i, parts[i].Raw)
300+
}
301+
if parts[i].Get("text").String() != expectedFallback {
302+
t.Fatalf("part %d: expected fallback %s, got %s", i, expectedFallback, parts[i].Get("text").String())
303+
}
304+
}
305+
}
306+
307+
func TestToolCallOutputWithNonStringJSONContent(t *testing.T) {
308+
tests := []struct {
309+
name string
310+
content string
311+
expectedOutput string
312+
}{
313+
{name: "null", content: `null`, expectedOutput: `null`},
314+
{name: "object", content: `{"status":"ok","count":2}`, expectedOutput: `{"status":"ok","count":2}`},
315+
}
316+
317+
for _, tt := range tests {
318+
t.Run(tt.name, func(t *testing.T) {
319+
input := []byte(`{
320+
"model": "gpt-4o",
321+
"messages": [
322+
{"role": "user", "content": "Check tool output."},
323+
{
324+
"role": "assistant",
325+
"content": null,
326+
"tool_calls": [
327+
{"id": "call_json", "type": "function", "function": {"name": "inspect", "arguments": "{}"}}
328+
]
329+
},
330+
{
331+
"role": "tool",
332+
"tool_call_id": "call_json",
333+
"content": ` + tt.content + `
334+
}
335+
],
336+
"tools": [
337+
{"type": "function", "function": {"name": "inspect", "description": "Inspect", "parameters": {"type": "object", "properties": {}}}}
338+
]
339+
}`)
340+
341+
out := ConvertOpenAIRequestToCodex("gpt-4o", input, true)
342+
result := string(out)
343+
344+
output := gjson.Get(result, "input.2.output")
345+
if !output.Exists() {
346+
t.Fatalf("expected output field to exist: %s", gjson.Get(result, "input.2").Raw)
347+
}
348+
if output.String() != tt.expectedOutput {
349+
t.Fatalf("expected output %s, got %s", tt.expectedOutput, output.String())
350+
}
351+
})
352+
}
353+
}
354+
179355
// Parallel tool calls: assistant invokes 3 tools at once, all call_ids
180356
// and outputs must be translated and paired correctly.
181357
func TestMultipleToolCalls(t *testing.T) {

0 commit comments

Comments
 (0)