Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions backend/internal/pkg/antigravity/claude_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ type ContentBlock struct {
ToolUseID string `json:"tool_use_id,omitempty"`
Content json.RawMessage `json:"content,omitempty"`
IsError bool `json:"is_error,omitempty"`
// image
// image/document/file
Source *ImageSource `json:"source,omitempty"`
}

// ImageSource Claude 图片来源
// ImageSource Claude 图片/文件来源
type ImageSource struct {
Type string `json:"type"` // "base64"
MediaType string `json:"media_type"` // "image/png", "image/jpeg" 等
MediaType string `json:"media_type"` // "image/png", "image/jpeg", "application/pdf"
Data string `json:"data"`
}

Expand Down
2 changes: 1 addition & 1 deletion backend/internal/pkg/antigravity/request_transformer.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ func buildParts(content json.RawMessage, toolIDToName map[string]string, allowDu
}
parts = append(parts, part)

case "image":
case "image", "document", "file":
if block.Source != nil && block.Source.Type == "base64" {
parts = append(parts, GeminiPart{
InlineData: &GeminiInlineData{
Expand Down
15 changes: 15 additions & 0 deletions backend/internal/pkg/antigravity/request_transformer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,21 @@ func TestBuildParts_ToolUseSignatureHandling(t *testing.T) {
})
}

func TestBuildParts_DocumentBase64PreservedAsInlineData(t *testing.T) {
content := `[
{"type":"text","text":"这可以看得出是谁购买的吗?"},
{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQ="},"title":"receipt.pdf"}
]`

parts, _, err := buildParts(json.RawMessage(content), map[string]string{}, true)
require.NoError(t, err)
require.Len(t, parts, 2)
require.Equal(t, "这可以看得出是谁购买的吗?", parts[0].Text)
require.NotNil(t, parts[1].InlineData)
require.Equal(t, "application/pdf", parts[1].InlineData.MimeType)
require.Equal(t, "JVBERi0xLjQ=", parts[1].InlineData.Data)
}

// TestBuildTools_CustomTypeTools 测试custom类型工具转换
func TestBuildTools_CustomTypeTools(t *testing.T) {
tests := []struct {
Expand Down
130 changes: 130 additions & 0 deletions backend/internal/pkg/apicompat/anthropic_responses_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,136 @@ func TestAnthropicToResponses_ImageOnlyUserMessage(t *testing.T) {
assert.Equal(t, "data:image/jpeg;base64,/9j/4AAQ", parts[0].ImageURL)
}

func TestAnthropicToResponses_UserDocumentBlock(t *testing.T) {
req := &AnthropicRequest{
Model: "gpt-5.2",
MaxTokens: 1024,
Messages: []AnthropicMessage{
{Role: "user", Content: json.RawMessage(`[
{"type":"text","text":"Summarize this file"},
{"type":"document","title":"example-report.pdf","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0x"}}
]`)},
},
}

resp, err := AnthropicToResponses(req)
require.NoError(t, err)

var items []ResponsesInputItem
require.NoError(t, json.Unmarshal(resp.Input, &items))
require.Len(t, items, 1)

var parts []ResponsesContentPart
require.NoError(t, json.Unmarshal(items[0].Content, &parts))
require.Len(t, parts, 2)
assert.Equal(t, "input_text", parts[0].Type)
assert.Equal(t, "Summarize this file", parts[0].Text)
assert.Equal(t, "input_file", parts[1].Type)
assert.Equal(t, "example-report.pdf", parts[1].Filename)
assert.Equal(t, "data:application/pdf;base64,JVBERi0x", parts[1].FileData)
assert.Empty(t, parts[1].FileURL)
}

func TestAnthropicToResponses_UserDocumentURLBlock(t *testing.T) {
req := &AnthropicRequest{
Model: "gpt-5.2",
MaxTokens: 1024,
Messages: []AnthropicMessage{
{Role: "user", Content: json.RawMessage(`[
{"type":"document","title":"report.pdf","source":{"type":"url","url":"https://example.com/report.pdf"}}
]`)},
},
}

resp, err := AnthropicToResponses(req)
require.NoError(t, err)

var items []ResponsesInputItem
require.NoError(t, json.Unmarshal(resp.Input, &items))
require.Len(t, items, 1)

var parts []ResponsesContentPart
require.NoError(t, json.Unmarshal(items[0].Content, &parts))
require.Len(t, parts, 1)
assert.Equal(t, "input_file", parts[0].Type)
assert.Equal(t, "report.pdf", parts[0].Filename)
assert.Equal(t, "https://example.com/report.pdf", parts[0].FileURL)
assert.Empty(t, parts[0].FileData)
}

func TestAnthropicToResponses_UserFileBlock(t *testing.T) {
req := &AnthropicRequest{
Model: "gpt-5.2",
MaxTokens: 1024,
Messages: []AnthropicMessage{
{Role: "user", Content: json.RawMessage(`[
{"type":"file","title":"example-report.pdf","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0x"}}
]`)},
},
}

resp, err := AnthropicToResponses(req)
require.NoError(t, err)

var items []ResponsesInputItem
require.NoError(t, json.Unmarshal(resp.Input, &items))
require.Len(t, items, 1)

var parts []ResponsesContentPart
require.NoError(t, json.Unmarshal(items[0].Content, &parts))
require.Len(t, parts, 1)
assert.Equal(t, "input_file", parts[0].Type)
assert.Equal(t, "example-report.pdf", parts[0].Filename)
assert.Equal(t, "data:application/pdf;base64,JVBERi0x", parts[0].FileData)
}

func TestResponsesToAnthropicRequest_InputFileDataBecomesDocument(t *testing.T) {
input := json.RawMessage(`[
{"role":"user","content":[
{"type":"input_text","text":"Analyze this"},
{"type":"input_file","filename":"receipt.pdf","file_data":"data:application/pdf;base64,JVBERi0x"}
]}
]`)
req := &ResponsesRequest{Model: "claude-sonnet-4-6", Input: input}

out, err := ResponsesToAnthropicRequest(req)
require.NoError(t, err)
require.Len(t, out.Messages, 1)

var blocks []AnthropicContentBlock
require.NoError(t, json.Unmarshal(out.Messages[0].Content, &blocks))
require.Len(t, blocks, 2)
assert.Equal(t, "text", blocks[0].Type)
assert.Equal(t, "document", blocks[1].Type)
assert.Equal(t, "receipt.pdf", blocks[1].Title)
require.NotNil(t, blocks[1].Source)
assert.Equal(t, "base64", blocks[1].Source.Type)
assert.Equal(t, "application/pdf", blocks[1].Source.MediaType)
assert.Equal(t, "JVBERi0x", blocks[1].Source.Data)
}

func TestResponsesToAnthropicRequest_InputFileURLBecomesDocument(t *testing.T) {
input := json.RawMessage(`[
{"role":"user","content":[
{"type":"input_file","filename":"receipt.pdf","file_url":"https://example.com/receipt.pdf"}
]}
]`)
req := &ResponsesRequest{Model: "claude-sonnet-4-6", Input: input}

out, err := ResponsesToAnthropicRequest(req)
require.NoError(t, err)
require.Len(t, out.Messages, 1)

var blocks []AnthropicContentBlock
require.NoError(t, json.Unmarshal(out.Messages[0].Content, &blocks))
require.Len(t, blocks, 1)
assert.Equal(t, "document", blocks[0].Type)
assert.Equal(t, "receipt.pdf", blocks[0].Title)
require.NotNil(t, blocks[0].Source)
assert.Equal(t, "url", blocks[0].Source.Type)
assert.Equal(t, "https://example.com/receipt.pdf", blocks[0].Source.URL)
}

func TestAnthropicToResponses_ToolResultWithImage(t *testing.T) {
req := &AnthropicRequest{
Model: "gpt-5.2",
Expand Down
30 changes: 29 additions & 1 deletion backend/internal/pkg/apicompat/anthropic_to_responses.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error)
toolResultImageParts = append(toolResultImageParts, imageParts...)
}

// Remaining text + image blocks → user message with content parts.
// Remaining text + image/file blocks → user message with content parts.
// Also include images extracted from tool_results so the model can see them.
var parts []ResponsesContentPart
for _, b := range blocks {
Expand All @@ -239,6 +239,10 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error)
if uri := anthropicImageToDataURI(b.Source); uri != "" {
parts = append(parts, ResponsesContentPart{Type: "input_image", ImageURL: uri})
}
case "document", "file":
if part := anthropicFileToResponsesPart(b); part != nil {
parts = append(parts, *part)
}
}
}
parts = append(parts, toolResultImageParts...)
Expand Down Expand Up @@ -341,6 +345,30 @@ func anthropicImageToDataURI(src *AnthropicImageSource) string {
return "data:" + mediaType + ";base64," + src.Data
}

func anthropicFileToResponsesPart(block AnthropicContentBlock) *ResponsesContentPart {
if block.Source == nil {
return nil
}
part := ResponsesContentPart{Type: "input_file", Filename: block.Title}
switch block.Source.Type {
case "url":
part.FileURL = block.Source.URL
default:
if block.Source.Data == "" {
return nil
}
mediaType := block.Source.MediaType
if mediaType == "" {
mediaType = "application/octet-stream"
}
part.FileData = "data:" + mediaType + ";base64," + block.Source.Data
}
if part.FileData == "" && part.FileURL == "" {
return nil
}
return &part
}

// convertToolResultOutput extracts text and image content from a tool_result
// block. Returns the text as a string for the function_call_output Output
// field, plus any image parts that must be sent in a separate user message
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func responsesInputToChatMessages(instructions string, inputRaw json.RawMessage)
content, _ := json.Marshal(rawString(item["text"]))
messages = append(messages, ChatMessage{Role: "user", Content: content})
continue
case "input_image":
case "input_image", "input_file":
content, err := chatContentFromSingleResponsesPart(itemType, item)
if err != nil {
return nil, err
Expand Down Expand Up @@ -217,6 +217,31 @@ func responsesContentPartsToChatContent(rawParts []json.RawMessage, role string)
Type: "image_url",
ImageURL: &ChatImageURL{URL: imageURL},
})
case "input_file", "file":
fileData := rawString(part["file_data"])
fileURL := rawString(part["file_url"])
filename := rawString(part["filename"])
if fileData == "" {
fileData = rawNestedString(part["file"], "file_data")
}
if fileURL == "" {
fileURL = rawNestedString(part["file"], "file_url")
}
if filename == "" {
filename = rawNestedString(part["file"], "filename")
}
if fileData == "" && fileURL == "" {
continue
}
hasNonText = true
chatParts = append(chatParts, ChatContentPart{
Type: "file",
File: &ChatFile{
Filename: filename,
FileData: fileData,
FileURL: fileURL,
},
})
}
}

Expand Down Expand Up @@ -246,6 +271,27 @@ func chatContentFromSingleResponsesPart(partType string, part map[string]json.Ra
Type: "image_url",
ImageURL: &ChatImageURL{URL: imageURL},
}})
case "input_file", "file":
fileData := rawString(part["file_data"])
fileURL := rawString(part["file_url"])
filename := rawString(part["filename"])
if fileData == "" {
fileData = rawNestedString(part["file"], "file_data")
}
if fileURL == "" {
fileURL = rawNestedString(part["file"], "file_url")
}
if filename == "" {
filename = rawNestedString(part["file"], "filename")
}
return json.Marshal([]ChatContentPart{{
Type: "file",
File: &ChatFile{
Filename: filename,
FileData: fileData,
FileURL: fileURL,
},
}})
default:
return json.Marshal(rawString(part["text"]))
}
Expand Down
Loading
Loading