Skip to content

Commit 332e8ca

Browse files
committed
Merge remote-tracking branch 'upstream/main' into feat/copilot
2 parents 8e896d7 + 4b68103 commit 332e8ca

14 files changed

Lines changed: 843 additions & 146 deletions

config.example.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ disable-cooling: false
100100
# - "chat": disable image_generation injection on non-images endpoints, but keep /v1/images/generations and /v1/images/edits enabled.
101101
disable-image-generation: false
102102

103+
# Base model used when proxying gpt-image-2 via the hosted image_generation tool (Responses API).
104+
# Must start with "gpt-" (case-insensitive). If unset or invalid, defaults to "gpt-5.4-mini".
105+
# gpt-image-2-base-model: "gpt-5.4-mini"
106+
103107
# Core auth auto-refresh worker pool size (OAuth/file-based auth token refresh).
104108
# When > 0, overrides the default worker count (16).
105109
# auth-auto-refresh-workers: 16

internal/config/sdk_config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ type SDKConfig struct {
1919
// while keeping /v1/images/generations and /v1/images/edits enabled and preserving image_generation there.
2020
DisableImageGeneration DisableImageGenerationMode `yaml:"disable-image-generation" json:"disable-image-generation"`
2121

22+
// GPTImage2BaseModel sets the base (mainline) model used when proxying GPT Image 2
23+
// requests via the hosted image_generation tool (e.g. Codex OAuth /v1/images/*).
24+
//
25+
// The value must start with "gpt-" (case-insensitive). If empty or invalid, the
26+
// default base model ("gpt-5.4-mini") is used.
27+
GPTImage2BaseModel string `yaml:"gpt-image-2-base-model,omitempty" json:"gpt-image-2-base-model,omitempty"`
28+
2229
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
2330
// Default is false for safety; when false, /v1internal:* requests are rejected.
2431
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`

internal/runtime/executor/codex_openai_images.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,20 @@ func codexIsImagesEndpointPath(path string) bool {
6363
return strings.HasSuffix(path, codexImagesGenerationsPath) || strings.HasSuffix(path, codexImagesEditsPath)
6464
}
6565

66+
func (e *CodexExecutor) resolveGPTImage2BaseModel() string {
67+
if e == nil || e.cfg == nil {
68+
return codexOpenAIImagesMainModel
69+
}
70+
model := strings.TrimSpace(e.cfg.GPTImage2BaseModel)
71+
if model == "" {
72+
return codexOpenAIImagesMainModel
73+
}
74+
if strings.HasPrefix(strings.ToLower(model), "gpt-") {
75+
return model
76+
}
77+
return codexOpenAIImagesMainModel
78+
}
79+
6680
func (e *CodexExecutor) executeOpenAIImage(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
6781
prepared, errPrepare := codexPrepareOpenAIImageRequest(req, opts)
6882
if errPrepare != nil {
@@ -74,10 +88,11 @@ func (e *CodexExecutor) executeOpenAIImage(ctx context.Context, auth *cliproxyau
7488
baseURL = "https://chatgpt.com/backend-api/codex"
7589
}
7690

77-
reporter := helps.NewUsageReporter(ctx, e.Identifier(), codexOpenAIImagesMainModel, auth)
91+
mainModel := e.resolveGPTImage2BaseModel()
92+
reporter := helps.NewUsageReporter(ctx, e.Identifier(), mainModel, auth)
7893
defer reporter.TrackFailure(ctx, &err)
7994

80-
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts)
95+
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts, mainModel)
8196
if errBuild != nil {
8297
return resp, errBuild
8398
}
@@ -161,10 +176,11 @@ func (e *CodexExecutor) executeOpenAIImageStream(ctx context.Context, auth *clip
161176
baseURL = "https://chatgpt.com/backend-api/codex"
162177
}
163178

164-
reporter := helps.NewUsageReporter(ctx, e.Identifier(), codexOpenAIImagesMainModel, auth)
179+
mainModel := e.resolveGPTImage2BaseModel()
180+
reporter := helps.NewUsageReporter(ctx, e.Identifier(), mainModel, auth)
165181
defer reporter.TrackFailure(ctx, &err)
166182

167-
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts)
183+
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts, mainModel)
168184
if errBuild != nil {
169185
return nil, errBuild
170186
}
@@ -277,18 +293,22 @@ func (e *CodexExecutor) executeOpenAIImageStream(ctx context.Context, auth *clip
277293
return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
278294
}
279295

280-
func (e *CodexExecutor) prepareCodexOpenAIImageBody(body []byte, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) ([]byte, error) {
296+
func (e *CodexExecutor) prepareCodexOpenAIImageBody(body []byte, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, mainModel string) ([]byte, error) {
281297
out := body
298+
mainModel = strings.TrimSpace(mainModel)
299+
if mainModel == "" {
300+
mainModel = codexOpenAIImagesMainModel
301+
}
282302
var errThinking error
283-
out, errThinking = thinking.ApplyThinking(out, codexOpenAIImagesMainModel, codexOpenAIImageSourceFormat, "codex", e.Identifier())
303+
out, errThinking = thinking.ApplyThinking(out, mainModel, codexOpenAIImageSourceFormat, "codex", e.Identifier())
284304
if errThinking != nil {
285305
return nil, errThinking
286306
}
287307

288308
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
289309
requestPath := helps.PayloadRequestPath(opts)
290-
out = helps.ApplyPayloadConfigWithRequest(e.cfg, codexOpenAIImagesMainModel, "codex", codexOpenAIImageSourceFormat, "", out, body, requestedModel, requestPath, opts.Headers)
291-
out, _ = sjson.SetBytes(out, "model", codexOpenAIImagesMainModel)
310+
out = helps.ApplyPayloadConfigWithRequest(e.cfg, mainModel, "codex", codexOpenAIImageSourceFormat, "", out, body, requestedModel, requestPath, opts.Headers)
311+
out, _ = sjson.SetBytes(out, "model", mainModel)
292312
out, _ = sjson.SetBytes(out, "stream", true)
293313
out, _ = sjson.DeleteBytes(out, "previous_response_id")
294314
out, _ = sjson.DeleteBytes(out, "prompt_cache_retention")

internal/runtime/executor/gemini_cli_executor.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
141141
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
142142
requestPath := helps.PayloadRequestPath(opts)
143143
basePayload = helps.ApplyPayloadConfigWithRequest(e.cfg, baseModel, "gemini", from.String(), "request", basePayload, originalTranslated, requestedModel, requestPath, opts.Headers)
144+
basePayload = cleanGeminiCLIRequestSchemas(basePayload)
144145

145146
action := "generateContent"
146147
if req.Metadata != nil {
@@ -297,6 +298,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
297298
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
298299
requestPath := helps.PayloadRequestPath(opts)
299300
basePayload = helps.ApplyPayloadConfigWithRequest(e.cfg, baseModel, "gemini", from.String(), "request", basePayload, originalTranslated, requestedModel, requestPath, opts.Headers)
301+
basePayload = cleanGeminiCLIRequestSchemas(basePayload)
300302

301303
projectID := resolveGeminiProjectID(auth)
302304

@@ -530,6 +532,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
530532
payload = deleteJSONField(payload, "model")
531533
payload = deleteJSONField(payload, "request.safetySettings")
532534
payload = fixGeminiCLIImageAspectRatio(baseModel, payload)
535+
payload = cleanGeminiCLIRequestSchemas(payload)
533536

534537
tok, errTok := tokenSource.Token()
535538
if errTok != nil {
@@ -859,6 +862,65 @@ func deleteJSONField(body []byte, key string) []byte {
859862
return updated
860863
}
861864

865+
func cleanGeminiCLIRequestSchemas(body []byte) []byte {
866+
if len(body) == 0 {
867+
return body
868+
}
869+
hasTools := gjson.GetBytes(body, "request.tools.0").Exists()
870+
hasResponseSchema := gjson.GetBytes(body, "request.generationConfig.responseSchema").Exists()
871+
hasResponseJSONSchema := gjson.GetBytes(body, "request.generationConfig.responseJsonSchema").Exists()
872+
if !hasTools && !hasResponseSchema && !hasResponseJSONSchema {
873+
return body
874+
}
875+
876+
tools := gjson.GetBytes(body, "request.tools")
877+
if tools.IsArray() {
878+
for i, tool := range tools.Array() {
879+
for _, declarationsKey := range []string{"function_declarations", "functionDeclarations"} {
880+
funcDecls := tool.Get(declarationsKey)
881+
if !funcDecls.IsArray() {
882+
continue
883+
}
884+
for j, decl := range funcDecls.Array() {
885+
for _, schemaKey := range []string{"parameters", "parametersJsonSchema"} {
886+
params := decl.Get(schemaKey)
887+
if !params.Exists() || !params.IsObject() {
888+
continue
889+
}
890+
cleaned := util.CleanJSONSchemaForGemini(params.Raw)
891+
path := fmt.Sprintf("request.tools.%d.%s.%d.%s", i, declarationsKey, j, schemaKey)
892+
updated, errSet := sjson.SetRawBytes(body, path, []byte(cleaned))
893+
if errSet != nil {
894+
log.Errorf("gemini cli executor: failed to set cleaned schema at %s: %v", path, errSet)
895+
continue
896+
}
897+
body = updated
898+
}
899+
}
900+
}
901+
}
902+
}
903+
904+
for _, schemaPath := range []string{
905+
"request.generationConfig.responseSchema",
906+
"request.generationConfig.responseJsonSchema",
907+
} {
908+
responseSchema := gjson.GetBytes(body, schemaPath)
909+
if !responseSchema.IsObject() {
910+
continue
911+
}
912+
cleaned := util.CleanJSONSchemaForGemini(responseSchema.Raw)
913+
updated, errSet := sjson.SetRawBytes(body, schemaPath, []byte(cleaned))
914+
if errSet != nil {
915+
log.Errorf("gemini cli executor: failed to set cleaned response schema at %s: %v", schemaPath, errSet)
916+
continue
917+
}
918+
body = updated
919+
}
920+
921+
return body
922+
}
923+
862924
func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
863925
if modelName == "gemini-2.5-flash-image-preview" {
864926
aspectRatioResult := gjson.GetBytes(rawJSON, "request.generationConfig.imageConfig.aspectRatio")
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package executor
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/tidwall/gjson"
8+
)
9+
10+
func TestCleanGeminiCLIRequestSchemasFlattensFunctionDeclarationTypeArray(t *testing.T) {
11+
input := []byte(`{
12+
"request": {
13+
"tools": [
14+
{
15+
"function_declarations": [
16+
{
17+
"name": "wecom_mcp",
18+
"parameters": {
19+
"type": "object",
20+
"properties": {
21+
"args": {
22+
"description": "call args",
23+
"type": ["string", "object"]
24+
}
25+
}
26+
}
27+
}
28+
]
29+
},
30+
{
31+
"functionDeclarations": [
32+
{
33+
"name": "camel_tool",
34+
"parametersJsonSchema": {
35+
"type": "object",
36+
"properties": {
37+
"value": {
38+
"type": ["integer", "string"]
39+
}
40+
}
41+
}
42+
}
43+
]
44+
}
45+
],
46+
"nonSchema": {
47+
"type": ["string", "object"]
48+
}
49+
}
50+
}`)
51+
52+
out := cleanGeminiCLIRequestSchemas(input)
53+
54+
argsType := gjson.GetBytes(out, "request.tools.0.function_declarations.0.parameters.properties.args.type")
55+
if argsType.String() != "string" {
56+
t.Fatalf("args.type = %s, want string; body=%s", argsType.Raw, string(out))
57+
}
58+
argsDesc := gjson.GetBytes(out, "request.tools.0.function_declarations.0.parameters.properties.args.description").String()
59+
if !strings.Contains(argsDesc, "Accepts: string | object") {
60+
t.Fatalf("args.description = %q, want accepted type hint", argsDesc)
61+
}
62+
63+
valueType := gjson.GetBytes(out, "request.tools.1.functionDeclarations.0.parametersJsonSchema.properties.value.type")
64+
if valueType.String() != "integer" {
65+
t.Fatalf("value.type = %s, want integer; body=%s", valueType.Raw, string(out))
66+
}
67+
valueDesc := gjson.GetBytes(out, "request.tools.1.functionDeclarations.0.parametersJsonSchema.properties.value.description").String()
68+
if !strings.Contains(valueDesc, "Accepts: integer | string") {
69+
t.Fatalf("value.description = %q, want accepted type hint", valueDesc)
70+
}
71+
72+
if nonSchema := gjson.GetBytes(out, "request.nonSchema.type"); !nonSchema.IsArray() {
73+
t.Fatalf("request.nonSchema.type should be preserved outside schema paths, got %s", nonSchema.Raw)
74+
}
75+
}

internal/translator/claude/openai/responses/claude_openai-responses_request.go

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,19 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
168168
}
169169

170170
// input array processing
171+
var pendingReasoningParts []string
172+
flushPendingReasoning := func() {
173+
if len(pendingReasoningParts) == 0 {
174+
return
175+
}
176+
asst := []byte(`{"role":"assistant","content":[]}`)
177+
for _, partJSON := range pendingReasoningParts {
178+
asst, _ = sjson.SetRawBytes(asst, "content.-1", []byte(partJSON))
179+
}
180+
out, _ = sjson.SetRawBytes(out, "messages.-1", asst)
181+
pendingReasoningParts = nil
182+
}
183+
171184
if input := root.Get("input"); input.Exists() && input.IsArray() {
172185
input.ForEach(func(_, item gjson.Result) bool {
173186
if extractedFromSystem && strings.EqualFold(item.Get("role").String(), "system") {
@@ -279,10 +292,26 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
279292
}
280293
}
281294

295+
hasReasoningParts := false
296+
if len(pendingReasoningParts) > 0 {
297+
if role == "assistant" {
298+
if len(partsJSON) == 0 && textAggregate.Len() > 0 {
299+
contentPart := []byte(`{"type":"text","text":""}`)
300+
contentPart, _ = sjson.SetBytes(contentPart, "text", textAggregate.String())
301+
partsJSON = append(partsJSON, string(contentPart))
302+
}
303+
partsJSON = append(append([]string{}, pendingReasoningParts...), partsJSON...)
304+
pendingReasoningParts = nil
305+
hasReasoningParts = true
306+
} else {
307+
flushPendingReasoning()
308+
}
309+
}
310+
282311
if len(partsJSON) > 0 {
283312
msg := []byte(`{"role":"","content":[]}`)
284313
msg, _ = sjson.SetBytes(msg, "role", role)
285-
if len(partsJSON) == 1 && !hasImage && !hasFile {
314+
if len(partsJSON) == 1 && !hasImage && !hasFile && !hasReasoningParts {
286315
// Preserve legacy behavior for single text content
287316
msg, _ = sjson.DeleteBytes(msg, "content")
288317
textPart := gjson.Parse(partsJSON[0])
@@ -300,6 +329,11 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
300329
out, _ = sjson.SetRawBytes(out, "messages.-1", msg)
301330
}
302331

332+
case "reasoning":
333+
if thinkingPart := convertResponsesReasoningToClaudeThinking(item); len(thinkingPart) > 0 {
334+
pendingReasoningParts = append(pendingReasoningParts, string(thinkingPart))
335+
}
336+
303337
case "function_call":
304338
// Map to assistant tool_use
305339
callID := item.Get("call_id").String()
@@ -320,10 +354,15 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
320354
}
321355

322356
asst := []byte(`{"role":"assistant","content":[]}`)
357+
for _, partJSON := range pendingReasoningParts {
358+
asst, _ = sjson.SetRawBytes(asst, "content.-1", []byte(partJSON))
359+
}
360+
pendingReasoningParts = nil
323361
asst, _ = sjson.SetRawBytes(asst, "content.-1", toolUse)
324362
out, _ = sjson.SetRawBytes(out, "messages.-1", asst)
325363

326364
case "function_call_output":
365+
flushPendingReasoning()
327366
// Map to user tool_result
328367
callID := item.Get("call_id").String()
329368
outputStr := item.Get("output").String()
@@ -338,6 +377,7 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
338377
return true
339378
})
340379
}
380+
flushPendingReasoning()
341381

342382
includedToolNames := map[string]struct{}{}
343383
toolNameMap := map[string]string{}
@@ -398,6 +438,34 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte
398438
return out
399439
}
400440

441+
func convertResponsesReasoningToClaudeThinking(item gjson.Result) []byte {
442+
signature := item.Get("encrypted_content").String()
443+
if signature == "" {
444+
return nil
445+
}
446+
447+
thinkingText := responsesReasoningSummaryText(item)
448+
thinkingPart := []byte(`{"type":"thinking","thinking":"","signature":""}`)
449+
thinkingPart, _ = sjson.SetBytes(thinkingPart, "thinking", thinkingText)
450+
thinkingPart, _ = sjson.SetBytes(thinkingPart, "signature", signature)
451+
return thinkingPart
452+
}
453+
454+
func responsesReasoningSummaryText(item gjson.Result) string {
455+
var builder strings.Builder
456+
if summary := item.Get("summary"); summary.Exists() && summary.IsArray() {
457+
summary.ForEach(func(_, part gjson.Result) bool {
458+
if text := part.Get("text"); text.Exists() {
459+
builder.WriteString(text.String())
460+
} else if part.Type == gjson.String {
461+
builder.WriteString(part.String())
462+
}
463+
return true
464+
})
465+
}
466+
return builder.String()
467+
}
468+
401469
func convertResponsesToolToClaudeTools(tool gjson.Result, toolNameMap map[string]string) [][]byte {
402470
toolType := strings.TrimSpace(tool.Get("type").String())
403471
switch toolType {

0 commit comments

Comments
 (0)