Skip to content

Commit e399edd

Browse files
committed
feat(images): add support for configurable GPT Image 2 base model and improved SSE handling
- Introduced `GPTImage2BaseModel` configuration for hosted image generation tools with validation for "gpt-" prefix. - Added logic to dynamically resolve and apply the base model in Codex executor workflows. - Enhanced server-sent events (SSE) implementation with keep-alive tickers and error events for stream reliability. - Updated configuration file examples and internal documentation.
1 parent b5959c3 commit e399edd

5 files changed

Lines changed: 324 additions & 123 deletions

File tree

config.example.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ disable-cooling: false
100100
# - "chat": disable image_generation injection on non-images endpoints, but keep /v1/images/generations and /v1/images/edits enabled.
101101
disable-image-generation: false
102102

103+
# Base model used when proxying gpt-image-2 via the hosted image_generation tool (Responses API).
104+
# Must start with "gpt-" (case-insensitive). If unset or invalid, defaults to "gpt-5.4-mini".
105+
# gpt-image-2-base-model: "gpt-5.4-mini"
106+
103107
# Core auth auto-refresh worker pool size (OAuth/file-based auth token refresh).
104108
# When > 0, overrides the default worker count (16).
105109
# auth-auto-refresh-workers: 16

internal/config/sdk_config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ type SDKConfig struct {
1919
// while keeping /v1/images/generations and /v1/images/edits enabled and preserving image_generation there.
2020
DisableImageGeneration DisableImageGenerationMode `yaml:"disable-image-generation" json:"disable-image-generation"`
2121

22+
// GPTImage2BaseModel sets the base (mainline) model used when proxying GPT Image 2
23+
// requests via the hosted image_generation tool (e.g. Codex OAuth /v1/images/*).
24+
//
25+
// The value must start with "gpt-" (case-insensitive). If empty or invalid, the
26+
// default base model ("gpt-5.4-mini") is used.
27+
GPTImage2BaseModel string `yaml:"gpt-image-2-base-model,omitempty" json:"gpt-image-2-base-model,omitempty"`
28+
2229
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
2330
// Default is false for safety; when false, /v1internal:* requests are rejected.
2431
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`

internal/runtime/executor/codex_openai_images.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,20 @@ func codexIsImagesEndpointPath(path string) bool {
6363
return strings.HasSuffix(path, codexImagesGenerationsPath) || strings.HasSuffix(path, codexImagesEditsPath)
6464
}
6565

66+
func (e *CodexExecutor) resolveGPTImage2BaseModel() string {
67+
if e == nil || e.cfg == nil {
68+
return codexOpenAIImagesMainModel
69+
}
70+
model := strings.TrimSpace(e.cfg.GPTImage2BaseModel)
71+
if model == "" {
72+
return codexOpenAIImagesMainModel
73+
}
74+
if strings.HasPrefix(strings.ToLower(model), "gpt-") {
75+
return model
76+
}
77+
return codexOpenAIImagesMainModel
78+
}
79+
6680
func (e *CodexExecutor) executeOpenAIImage(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
6781
prepared, errPrepare := codexPrepareOpenAIImageRequest(req, opts)
6882
if errPrepare != nil {
@@ -74,10 +88,11 @@ func (e *CodexExecutor) executeOpenAIImage(ctx context.Context, auth *cliproxyau
7488
baseURL = "https://chatgpt.com/backend-api/codex"
7589
}
7690

77-
reporter := helps.NewUsageReporter(ctx, e.Identifier(), codexOpenAIImagesMainModel, auth)
91+
mainModel := e.resolveGPTImage2BaseModel()
92+
reporter := helps.NewUsageReporter(ctx, e.Identifier(), mainModel, auth)
7893
defer reporter.TrackFailure(ctx, &err)
7994

80-
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts)
95+
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts, mainModel)
8196
if errBuild != nil {
8297
return resp, errBuild
8398
}
@@ -161,10 +176,11 @@ func (e *CodexExecutor) executeOpenAIImageStream(ctx context.Context, auth *clip
161176
baseURL = "https://chatgpt.com/backend-api/codex"
162177
}
163178

164-
reporter := helps.NewUsageReporter(ctx, e.Identifier(), codexOpenAIImagesMainModel, auth)
179+
mainModel := e.resolveGPTImage2BaseModel()
180+
reporter := helps.NewUsageReporter(ctx, e.Identifier(), mainModel, auth)
165181
defer reporter.TrackFailure(ctx, &err)
166182

167-
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts)
183+
body, errBuild := e.prepareCodexOpenAIImageBody(prepared.Body, req, opts, mainModel)
168184
if errBuild != nil {
169185
return nil, errBuild
170186
}
@@ -277,18 +293,22 @@ func (e *CodexExecutor) executeOpenAIImageStream(ctx context.Context, auth *clip
277293
return &cliproxyexecutor.StreamResult{Headers: httpResp.Header.Clone(), Chunks: out}, nil
278294
}
279295

280-
func (e *CodexExecutor) prepareCodexOpenAIImageBody(body []byte, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) ([]byte, error) {
296+
func (e *CodexExecutor) prepareCodexOpenAIImageBody(body []byte, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, mainModel string) ([]byte, error) {
281297
out := body
298+
mainModel = strings.TrimSpace(mainModel)
299+
if mainModel == "" {
300+
mainModel = codexOpenAIImagesMainModel
301+
}
282302
var errThinking error
283-
out, errThinking = thinking.ApplyThinking(out, codexOpenAIImagesMainModel, codexOpenAIImageSourceFormat, "codex", e.Identifier())
303+
out, errThinking = thinking.ApplyThinking(out, mainModel, codexOpenAIImageSourceFormat, "codex", e.Identifier())
284304
if errThinking != nil {
285305
return nil, errThinking
286306
}
287307

288308
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
289309
requestPath := helps.PayloadRequestPath(opts)
290-
out = helps.ApplyPayloadConfigWithRequest(e.cfg, codexOpenAIImagesMainModel, "codex", codexOpenAIImageSourceFormat, "", out, body, requestedModel, requestPath, opts.Headers)
291-
out, _ = sjson.SetBytes(out, "model", codexOpenAIImagesMainModel)
310+
out = helps.ApplyPayloadConfigWithRequest(e.cfg, mainModel, "codex", codexOpenAIImageSourceFormat, "", out, body, requestedModel, requestPath, opts.Headers)
311+
out, _ = sjson.SetBytes(out, "model", mainModel)
292312
out, _ = sjson.SetBytes(out, "stream", true)
293313
out, _ = sjson.DeleteBytes(out, "previous_response_id")
294314
out, _ = sjson.DeleteBytes(out, "prompt_cache_retention")

internal/watcher/diff/config_diff.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
4848
if oldCfg.DisableImageGeneration != newCfg.DisableImageGeneration {
4949
changes = append(changes, fmt.Sprintf("disable-image-generation: %v -> %v", oldCfg.DisableImageGeneration, newCfg.DisableImageGeneration))
5050
}
51+
if strings.TrimSpace(oldCfg.GPTImage2BaseModel) != strings.TrimSpace(newCfg.GPTImage2BaseModel) {
52+
changes = append(changes, fmt.Sprintf("gpt-image-2-base-model: %s -> %s", strings.TrimSpace(oldCfg.GPTImage2BaseModel), strings.TrimSpace(newCfg.GPTImage2BaseModel)))
53+
}
5154
if oldCfg.RequestLog != newCfg.RequestLog {
5255
changes = append(changes, fmt.Sprintf("request-log: %t -> %t", oldCfg.RequestLog, newCfg.RequestLog))
5356
}

0 commit comments

Comments
 (0)