Skip to content

Commit 7caa447

Browse files
committed
feat: enhance openai-image provider and batch settings with i18n support
1 parent 954adaa commit 7caa447

14 files changed

Lines changed: 140 additions & 18 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,4 @@ templates.json
7676
templates_nano_banana_pro.json
7777
TEMPLATE_MARKET_PLAN.md
7878
.sisyphus/
79+
.codex/

backend/internal/provider/openai_image.go

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ func (p *OpenAIImageProvider) Generate(ctx context.Context, params map[string]in
9595
return nil, fmt.Errorf("缺少 model_id 参数")
9696
}
9797

98-
reqBody, promptPreview, err := p.buildImagesGenerationRequestBody(modelID, params)
98+
refImages, err := collectOpenAIImageReferences(params["reference_images"])
9999
if err != nil {
100100
return nil, err
101101
}
102-
refImages, err := collectOpenAIImageReferences(params["reference_images"])
102+
reqBody, promptPreview, err := p.buildImagesGenerationRequestBody(modelID, params, refImages)
103103
if err != nil {
104104
return nil, err
105105
}
@@ -158,7 +158,7 @@ func (p *OpenAIImageProvider) Generate(ctx context.Context, params map[string]in
158158
}, nil
159159
}
160160

161-
func (p *OpenAIImageProvider) buildImagesGenerationRequestBody(modelID string, params map[string]interface{}) (*openAIImagesGenerationRequest, string, error) {
161+
func (p *OpenAIImageProvider) buildImagesGenerationRequestBody(modelID string, params map[string]interface{}, refs []openAIImageReference) (*openAIImagesGenerationRequest, string, error) {
162162
prompt, _ := params["prompt"].(string)
163163
prompt = strings.TrimSpace(prompt)
164164
if prompt == "" {
@@ -168,7 +168,7 @@ func (p *OpenAIImageProvider) buildImagesGenerationRequestBody(modelID string, p
168168
body := &openAIImagesGenerationRequest{
169169
Model: modelID,
170170
Prompt: prompt,
171-
Size: resolveOpenAIImageSize(modelID, params),
171+
Size: resolveOpenAIImageSize(modelID, params, refs),
172172
N: 1,
173173
}
174174
if quality, _ := params["quality"].(string); strings.TrimSpace(quality) != "" {
@@ -461,12 +461,13 @@ func isValidOpenAIImageSize(size string) bool {
461461
return regexp.MustCompile(`^[1-9][0-9]{1,4}x[1-9][0-9]{1,4}$`).MatchString(size)
462462
}
463463

464-
func resolveOpenAIImageSize(modelID string, params map[string]interface{}) string {
464+
func resolveOpenAIImageSize(modelID string, params map[string]interface{}, refs ...[]openAIImageReference) string {
465465
aspectRatio := firstStringParam(params, "aspect_ratio", "aspectRatio", "aspect")
466466
resolution := firstStringParam(params, "resolution_level", "imageSize", "image_size", "resolution")
467+
refImages := firstOpenAIImageReferenceSlice(refs)
467468
model := strings.ToLower(strings.TrimSpace(modelID))
468469
if size, _ := params["size"].(string); strings.TrimSpace(size) != "" {
469-
return normalizeExplicitOpenAIImageSize(model, strings.TrimSpace(strings.ToLower(size)), aspectRatio, resolution)
470+
return normalizeExplicitOpenAIImageSize(model, strings.TrimSpace(strings.ToLower(size)), aspectRatio, resolution, refImages)
470471
}
471472

472473
if strings.Contains(model, "dall-e-3") {
@@ -476,12 +477,15 @@ func resolveOpenAIImageSize(modelID string, params map[string]interface{}) strin
476477
return "1024x1024"
477478
}
478479
if strings.Contains(model, "gpt-image-2") {
480+
if isAutoAspectRatio(aspectRatio) {
481+
return computeDynamicOpenAIImageSizeFromReference(refImages, resolution)
482+
}
479483
return computeDynamicOpenAIImageSize(aspectRatio, resolution)
480484
}
481485
return resolveStandardGPTImageSize(aspectRatio)
482486
}
483487

484-
func normalizeExplicitOpenAIImageSize(model, size, aspectRatio, resolution string) string {
488+
func normalizeExplicitOpenAIImageSize(model, size, aspectRatio, resolution string, refs []openAIImageReference) string {
485489
if strings.Contains(model, "dall-e-3") {
486490
switch size {
487491
case "1024x1024", "1792x1024", "1024x1792":
@@ -502,6 +506,9 @@ func normalizeExplicitOpenAIImageSize(model, size, aspectRatio, resolution strin
502506

503507
if strings.Contains(model, "gpt-image-2") {
504508
if size == "auto" {
509+
if isAutoAspectRatio(aspectRatio) {
510+
return computeDynamicOpenAIImageSizeFromReference(refs, resolution)
511+
}
505512
return computeDynamicOpenAIImageSize(aspectRatio, resolution)
506513
}
507514
return size
@@ -515,6 +522,17 @@ func normalizeExplicitOpenAIImageSize(model, size, aspectRatio, resolution strin
515522
}
516523
}
517524

525+
func firstOpenAIImageReferenceSlice(refs [][]openAIImageReference) []openAIImageReference {
526+
if len(refs) == 0 {
527+
return nil
528+
}
529+
return refs[0]
530+
}
531+
532+
func isAutoAspectRatio(aspectRatio string) bool {
533+
return strings.EqualFold(strings.TrimSpace(aspectRatio), "auto")
534+
}
535+
518536
func firstStringParam(params map[string]interface{}, keys ...string) string {
519537
for _, key := range keys {
520538
if val, ok := params[key].(string); ok && strings.TrimSpace(val) != "" {
@@ -555,11 +573,29 @@ func resolveStandardGPTImageSize(aspectRatio string) string {
555573
// 根据用户选择的宽高比和分辨率档位,为 gpt-image-2 代理计算实际 WxH。
556574
// 计算流程:先确定长边,再按宽高比推导短边,最后做 16 像素对齐和总像素上限保护。
557575
func computeDynamicOpenAIImageSize(aspectRatio, resolution string) string {
576+
if isAutoAspectRatio(aspectRatio) {
577+
return "auto"
578+
}
579+
558580
wRatio, hRatio, ok := parseAspectRatio(aspectRatio)
559581
if !ok {
560582
return "auto"
561583
}
584+
return computeDynamicOpenAIImageSizeFromRatio(wRatio, hRatio, resolution)
585+
}
586+
587+
func computeDynamicOpenAIImageSizeFromReference(refs []openAIImageReference, resolution string) string {
588+
if len(refs) == 0 || len(refs[0].Content) == 0 {
589+
return "auto"
590+
}
591+
cfg, _, err := image.DecodeConfig(bytes.NewReader(refs[0].Content))
592+
if err != nil || cfg.Width <= 0 || cfg.Height <= 0 {
593+
return "auto"
594+
}
595+
return computeDynamicOpenAIImageSizeFromRatio(cfg.Width, cfg.Height, resolution)
596+
}
562597

598+
func computeDynamicOpenAIImageSizeFromRatio(wRatio, hRatio int, resolution string) string {
563599
// gpt-image-2 代理支持更灵活的 WxH。这里用用户选择的分辨率作为长边,
564600
// 再按比例计算短边,并统一向下取 16 的倍数,避免上游拒绝非对齐尺寸。
565601
longEdge := 2048

backend/internal/provider/openai_image_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"image-gen-service/internal/model"
99
"image/color"
1010
"image/jpeg"
11+
"image/png"
1112
"io"
1213
"net/http"
1314
"net/http/httptest"
@@ -43,6 +44,15 @@ func TestResolveOpenAIImageSize(t *testing.T) {
4344
},
4445
want: "2048x1152",
4546
},
47+
{
48+
name: "gpt image 2 supports auto aspect ratio",
49+
model: "gpt-image-2",
50+
params: map[string]interface{}{
51+
"aspectRatio": "auto",
52+
"imageSize": "4K",
53+
},
54+
want: "auto",
55+
},
4656
{
4757
name: "standard gpt image uses supported landscape size",
4858
model: "gpt-image-1",
@@ -223,6 +233,56 @@ func TestOpenAIImageProviderGenerateWithReferenceUsesEdits(t *testing.T) {
223233
}
224234
}
225235

236+
func TestOpenAIImageProviderAutoAspectRatioUsesReferenceDimensions(t *testing.T) {
237+
var ref bytes.Buffer
238+
img := image.NewRGBA(image.Rect(0, 0, 100, 200))
239+
img.Set(0, 0, color.White)
240+
if err := png.Encode(&ref, img); err != nil {
241+
t.Fatalf("encode png: %v", err)
242+
}
243+
244+
seenFields := map[string]string{}
245+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
246+
if err := r.ParseMultipartForm(2 << 20); err != nil {
247+
t.Fatalf("ParseMultipartForm: %v", err)
248+
}
249+
for key, values := range r.MultipartForm.Value {
250+
if len(values) > 0 {
251+
seenFields[key] = values[0]
252+
}
253+
}
254+
_ = json.NewEncoder(w).Encode(map[string]interface{}{
255+
"data": []map[string]string{{"b64_json": tinyPNGBase64}},
256+
})
257+
}))
258+
defer server.Close()
259+
260+
p, err := NewOpenAIImageProvider(&model.ProviderConfig{
261+
ProviderName: "openai-image",
262+
APIBase: server.URL,
263+
APIKey: "test-key",
264+
TimeoutSeconds: 5,
265+
})
266+
if err != nil {
267+
t.Fatalf("NewOpenAIImageProvider: %v", err)
268+
}
269+
270+
_, err = p.Generate(t.Context(), map[string]interface{}{
271+
"prompt": "edit prompt",
272+
"model_id": "gpt-image-2",
273+
"aspect_ratio": "auto",
274+
"resolution_level": "2K",
275+
"count": 1,
276+
"reference_images": []interface{}{ref.Bytes()},
277+
})
278+
if err != nil {
279+
t.Fatalf("Generate: %v", err)
280+
}
281+
if seenFields["size"] != "1024x2048" {
282+
t.Fatalf("size = %q, want 1024x2048; fields=%+v", seenFields["size"], seenFields)
283+
}
284+
}
285+
226286
func TestOpenAIImageProviderConvertsJPEGReferenceToPNG(t *testing.T) {
227287
var jpegRef bytes.Buffer
228288
img := image.NewRGBA(image.Rect(0, 0, 1, 1))

desktop/package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

desktop/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "nano-banana-pro-frontend",
33
"private": true,
4-
"version": "2.8.2",
4+
"version": "2.8.3",
55
"license": "MIT",
66
"description": "大香蕉图片生成工具 - 批量图片生成应用前端",
77
"type": "module",

desktop/src-tauri/Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

desktop/src-tauri/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "desktop"
3-
version = "2.8.2"
3+
version = "2.8.3"
44
description = "A Tauri App"
55
authors = ["you"]
66
edition = "2021"

desktop/src-tauri/tauri.conf.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"$schema": "https://schema.tauri.app/config/2",
33
"productName": "大香蕉 AI",
4-
"version": "2.8.2",
4+
"version": "2.8.3",
55
"identifier": "com.dztool.banana",
66
"build": {
77
"beforeDevCommand": "npm run dev",

desktop/src/components/ConfigPanel/BatchSettings.tsx

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { useMemo, useEffect } from 'react';
2-
import { Settings2 } from 'lucide-react';
2+
import { Info, Settings2 } from 'lucide-react';
33
import { useTranslation } from 'react-i18next';
44
import {
55
useConfigStore,
@@ -27,12 +27,14 @@ export function BatchSettings() {
2727
aspectRatio,
2828
setAspectRatio,
2929
imageModel,
30-
imageProvider
30+
imageProvider,
31+
refFiles
3132
} = useConfigStore();
3233

3334
const supportedRatios = useMemo(() => getModelAspectRatios(imageModel), [imageModel]);
3435
const useNativeSize = isUsingNativeImageSize(imageProvider, imageModel);
3536
const useQuality = isQualityControlSupported(imageProvider);
37+
const showAutoReferenceHint = !useNativeSize && aspectRatio === 'auto' && refFiles.length > 0;
3638

3739
useEffect(() => {
3840
if (useNativeSize) {
@@ -100,7 +102,22 @@ export function BatchSettings() {
100102

101103
{!useNativeSize ? (
102104
<div className="space-y-1">
103-
<label className="text-xs text-gray-500">{t('config.batch.aspectRatio')}</label>
105+
<div className="flex items-center gap-1.5">
106+
<label className="text-xs text-gray-500">{t('config.batch.aspectRatio')}</label>
107+
{showAutoReferenceHint ? (
108+
<span
109+
className="group relative inline-flex h-4 w-4 items-center justify-center text-gray-400 hover:text-gray-600 focus:outline-none"
110+
tabIndex={0}
111+
title={t('config.batch.autoRatioReferenceHint')}
112+
aria-label={t('config.batch.autoRatioReferenceHint')}
113+
>
114+
<Info className="h-3.5 w-3.5" />
115+
<span className="pointer-events-none absolute left-0 top-full z-20 mt-1 hidden w-56 rounded-md bg-gray-900 px-2 py-1.5 text-xs leading-4 text-white shadow-lg group-hover:block group-focus:block">
116+
{t('config.batch.autoRatioReferenceHint')}
117+
</span>
118+
</span>
119+
) : null}
120+
</div>
104121
<Select value={aspectRatio} onChange={(e) => setAspectRatio(e.target.value)} className="h-9 text-sm">
105122
{supportedRatios.map((ratio) => {
106123
const key = ratio.replace(':', '_');

desktop/src/i18n/locales/en-US.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,9 @@
431431
"resolution2k": "2K (2048px)",
432432
"resolution4k": "4K (3840px)",
433433
"aspectRatio": "Aspect Ratio",
434+
"autoRatioReferenceHint": "Auto uses the first reference image's aspect ratio and the selected resolution.",
434435
"ratio": {
436+
"auto": "Auto",
435437
"1_1": "1:1 (Square)",
436438
"1_4": "1:4 (Tall Portrait)",
437439
"1_8": "1:8 (Ultra-tall Portrait)",

0 commit comments

Comments
 (0)