@@ -95,11 +95,11 @@ func (p *OpenAIImageProvider) Generate(ctx context.Context, params map[string]in
9595 return nil , fmt .Errorf ("缺少 model_id 参数" )
9696 }
9797
98- reqBody , promptPreview , err := p . buildImagesGenerationRequestBody ( modelID , params )
98+ refImages , err := collectOpenAIImageReferences ( params [ "reference_images" ] )
9999 if err != nil {
100100 return nil , err
101101 }
102- refImages , err := collectOpenAIImageReferences ( params [ "reference_images" ] )
102+ reqBody , promptPreview , err := p . buildImagesGenerationRequestBody ( modelID , params , refImages )
103103 if err != nil {
104104 return nil , err
105105 }
@@ -158,7 +158,7 @@ func (p *OpenAIImageProvider) Generate(ctx context.Context, params map[string]in
158158 }, nil
159159}
160160
161- func (p * OpenAIImageProvider ) buildImagesGenerationRequestBody (modelID string , params map [string ]interface {}) (* openAIImagesGenerationRequest , string , error ) {
161+ func (p * OpenAIImageProvider ) buildImagesGenerationRequestBody (modelID string , params map [string ]interface {}, refs [] openAIImageReference ) (* openAIImagesGenerationRequest , string , error ) {
162162 prompt , _ := params ["prompt" ].(string )
163163 prompt = strings .TrimSpace (prompt )
164164 if prompt == "" {
@@ -168,7 +168,7 @@ func (p *OpenAIImageProvider) buildImagesGenerationRequestBody(modelID string, p
168168 body := & openAIImagesGenerationRequest {
169169 Model : modelID ,
170170 Prompt : prompt ,
171- Size : resolveOpenAIImageSize (modelID , params ),
171+ Size : resolveOpenAIImageSize (modelID , params , refs ),
172172 N : 1 ,
173173 }
174174 if quality , _ := params ["quality" ].(string ); strings .TrimSpace (quality ) != "" {
@@ -461,12 +461,13 @@ func isValidOpenAIImageSize(size string) bool {
461461 return regexp .MustCompile (`^[1-9][0-9]{1,4}x[1-9][0-9]{1,4}$` ).MatchString (size )
462462}
463463
464- func resolveOpenAIImageSize (modelID string , params map [string ]interface {}) string {
464+ func resolveOpenAIImageSize (modelID string , params map [string ]interface {}, refs ... [] openAIImageReference ) string {
465465 aspectRatio := firstStringParam (params , "aspect_ratio" , "aspectRatio" , "aspect" )
466466 resolution := firstStringParam (params , "resolution_level" , "imageSize" , "image_size" , "resolution" )
467+ refImages := firstOpenAIImageReferenceSlice (refs )
467468 model := strings .ToLower (strings .TrimSpace (modelID ))
468469 if size , _ := params ["size" ].(string ); strings .TrimSpace (size ) != "" {
469- return normalizeExplicitOpenAIImageSize (model , strings .TrimSpace (strings .ToLower (size )), aspectRatio , resolution )
470+ return normalizeExplicitOpenAIImageSize (model , strings .TrimSpace (strings .ToLower (size )), aspectRatio , resolution , refImages )
470471 }
471472
472473 if strings .Contains (model , "dall-e-3" ) {
@@ -476,12 +477,15 @@ func resolveOpenAIImageSize(modelID string, params map[string]interface{}) strin
476477 return "1024x1024"
477478 }
478479 if strings .Contains (model , "gpt-image-2" ) {
480+ if isAutoAspectRatio (aspectRatio ) {
481+ return computeDynamicOpenAIImageSizeFromReference (refImages , resolution )
482+ }
479483 return computeDynamicOpenAIImageSize (aspectRatio , resolution )
480484 }
481485 return resolveStandardGPTImageSize (aspectRatio )
482486}
483487
484- func normalizeExplicitOpenAIImageSize (model , size , aspectRatio , resolution string ) string {
488+ func normalizeExplicitOpenAIImageSize (model , size , aspectRatio , resolution string , refs [] openAIImageReference ) string {
485489 if strings .Contains (model , "dall-e-3" ) {
486490 switch size {
487491 case "1024x1024" , "1792x1024" , "1024x1792" :
@@ -502,6 +506,9 @@ func normalizeExplicitOpenAIImageSize(model, size, aspectRatio, resolution strin
502506
503507 if strings .Contains (model , "gpt-image-2" ) {
504508 if size == "auto" {
509+ if isAutoAspectRatio (aspectRatio ) {
510+ return computeDynamicOpenAIImageSizeFromReference (refs , resolution )
511+ }
505512 return computeDynamicOpenAIImageSize (aspectRatio , resolution )
506513 }
507514 return size
@@ -515,6 +522,17 @@ func normalizeExplicitOpenAIImageSize(model, size, aspectRatio, resolution strin
515522 }
516523}
517524
525+ func firstOpenAIImageReferenceSlice (refs [][]openAIImageReference ) []openAIImageReference {
526+ if len (refs ) == 0 {
527+ return nil
528+ }
529+ return refs [0 ]
530+ }
531+
532+ func isAutoAspectRatio (aspectRatio string ) bool {
533+ return strings .EqualFold (strings .TrimSpace (aspectRatio ), "auto" )
534+ }
535+
518536func firstStringParam (params map [string ]interface {}, keys ... string ) string {
519537 for _ , key := range keys {
520538 if val , ok := params [key ].(string ); ok && strings .TrimSpace (val ) != "" {
@@ -555,11 +573,29 @@ func resolveStandardGPTImageSize(aspectRatio string) string {
555573// 根据用户选择的宽高比和分辨率档位,为 gpt-image-2 代理计算实际 WxH。
556574// 计算流程:先确定长边,再按宽高比推导短边,最后做 16 像素对齐和总像素上限保护。
557575func computeDynamicOpenAIImageSize (aspectRatio , resolution string ) string {
576+ if isAutoAspectRatio (aspectRatio ) {
577+ return "auto"
578+ }
579+
558580 wRatio , hRatio , ok := parseAspectRatio (aspectRatio )
559581 if ! ok {
560582 return "auto"
561583 }
584+ return computeDynamicOpenAIImageSizeFromRatio (wRatio , hRatio , resolution )
585+ }
586+
587+ func computeDynamicOpenAIImageSizeFromReference (refs []openAIImageReference , resolution string ) string {
588+ if len (refs ) == 0 || len (refs [0 ].Content ) == 0 {
589+ return "auto"
590+ }
591+ cfg , _ , err := image .DecodeConfig (bytes .NewReader (refs [0 ].Content ))
592+ if err != nil || cfg .Width <= 0 || cfg .Height <= 0 {
593+ return "auto"
594+ }
595+ return computeDynamicOpenAIImageSizeFromRatio (cfg .Width , cfg .Height , resolution )
596+ }
562597
598+ func computeDynamicOpenAIImageSizeFromRatio (wRatio , hRatio int , resolution string ) string {
563599 // gpt-image-2 代理支持更灵活的 WxH。这里用用户选择的分辨率作为长边,
564600 // 再按比例计算短边,并统一向下取 16 的倍数,避免上游拒绝非对齐尺寸。
565601 longEdge := 2048
0 commit comments