diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index ff180605ca..2cda2c937f 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -168,6 +168,8 @@ export type CreateImageRequest = { size?: string style?: string outputFormat?: "png" | "jpeg" | "webp" + image?: string | Uint8Array | Buffer // Base image for editing (base64 or binary) + mask?: string | Uint8Array | Buffer // Optional mask for editing (base64 or binary) } export interface ImageGenerationUsage { diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index 6d9b9b8299..622e976c8a 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -787,10 +787,15 @@ export async function OpenAIImageGeneration( quality, style, outputFormat, + image, + mask, ...rest } = req const { trace } = options || {} - let url = `${cfg.base}/images/generations` + + // Detect edit mode based on presence of image parameter + const isEditMode = !!image + let url = `${cfg.base}/images/${isEditMode ? 'edits' : 'generations'}` const isDallE = /^dall-e/i.test(model) const isDallE2 = /^dall-e-2/i.test(model) @@ -798,7 +803,6 @@ export async function OpenAIImageGeneration( const isGpt = /^gpt-image/i.test(model) const body: any = { - model, prompt, size, quality, @@ -806,6 +810,15 @@ export async function OpenAIImageGeneration( ...rest, } + // Add edit mode parameters if present + if (isEditMode) { + body.image = image + if (mask) body.mask = mask + // Model parameter is typically not used in edit mode for OpenAI + } else { + body.model = model + } + // auto is the default quality, so always delete it if (body.quality === "auto" || isDallE2) delete body.quality if (isDallE3) { @@ -846,12 +859,17 @@ export async function OpenAIImageGeneration( if (cfg.type === "azure") { const version = cfg.version || AZURE_OPENAI_API_VERSION trace?.itemValue(`version`, version) - url = - trimTrailingSlash(cfg.base) + - "/" + - body.model + - `/images/generations?api-version=${version}` - delete body.model + if (isEditMode) { + // Azure doesn't use model in the URL for edit mode + url = trimTrailingSlash(cfg.base) + `/images/edits?api-version=${version}` + } else { + url = + trimTrailingSlash(cfg.base) + + "/" + + body.model + + `/images/generations?api-version=${version}` + delete body.model + } } const fetch = await createFetch(options) diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index 8f568a0152..5473b9e4be 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -1184,12 +1184,12 @@ export function createChatGenerationContext( quality, style, outputFormat, + ...rest, }) satisfies CreateImageRequest const m = measure("img.generate", `${req.model} -> image`) const res = await imageGenerator(req, configuration, { trace: imgTrace, cancellationToken, - ...rest, }) const duration = m() if (res.error) { diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index c00a4d709d..6123b3e76b 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -4631,6 +4631,18 @@ interface ImageGenerationOptions extends ImageTransformOptions, RetryOptions { * For gpt-image-1 only, the type of image format to generate. */ outputFormat?: "png" | "jpeg" | "webp" + + /** + * Base image for editing (edit mode). When provided, enables image editing instead of generation. + * Can be a base64-encoded string, Buffer, Uint8Array, or file path. + */ + image?: string | Uint8Array | Buffer + + /** + * Optional mask for editing. Only used in edit mode when `image` is provided. + * Specifies which parts of the image to edit. Can be a base64-encoded string, Buffer, Uint8Array, or file path. + */ + mask?: string | Uint8Array | Buffer } interface TranscriptionOptions extends CacheOptions, RetryOptions {