Skip to content

Commit 530c98f

Browse files
authored
Merge branch 'main' into feature/tap-image-target
2 parents e103933 + 3a22875 commit 530c98f

2 files changed

Lines changed: 281 additions & 27 deletions

File tree

pyrit/prompt_target/openai/openai_image_target.py

Lines changed: 85 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Licensed under the MIT license.
33
import base64
44
import logging
5+
import warnings
56
from typing import Any, Literal, Optional
67

78
import httpx
@@ -47,14 +48,27 @@ class OpenAIImageTarget(OpenAITarget):
4748
)
4849
)
4950

51+
# DALL-E-only image sizes that are deprecated in favor of GPT image model sizes.
52+
_DEPRECATED_SIZES = {"256x256", "512x512", "1792x1024", "1024x1792"}
53+
# DALL-E-only quality values that are deprecated in favor of GPT image model values.
54+
_DEPRECATED_QUALITY_VALUES = {"standard", "hd"}
55+
5056
def __init__(
5157
self,
5258
image_size: Literal[
53-
"256x256", "512x512", "1024x1024", "1536x1024", "1024x1536", "1792x1024", "1024x1792"
59+
"auto",
60+
"1024x1024",
61+
"1536x1024",
62+
"1024x1536",
63+
"256x256",
64+
"512x512",
65+
"1792x1024",
66+
"1024x1792",
5467
] = "1024x1024",
5568
output_format: Optional[Literal["png", "jpeg", "webp"]] = None,
56-
quality: Optional[Literal["standard", "hd", "low", "medium", "high"]] = None,
69+
quality: Optional[Literal["auto", "low", "medium", "high", "standard", "hd"]] = None,
5770
style: Optional[Literal["natural", "vivid"]] = None,
71+
background: Optional[Literal["transparent", "opaque", "auto"]] = None,
5872
custom_configuration: Optional[TargetConfiguration] = None,
5973
custom_capabilities: Optional[TargetCapabilities] = None,
6074
*args: Any,
@@ -76,25 +90,27 @@ def __init__(
7690
minute before hitting a rate limit. The number of requests sent to the target
7791
will be capped at the value provided.
7892
image_size (Literal, Optional): The size of the generated image.
79-
Accepts "256x256", "512x512", "1024x1024", "1536x1024",
80-
"1024x1536", "1792x1024", or "1024x1792".
81-
Different models support different image sizes.
82-
GPT image models support "1024x1024", "1536x1024" and "1024x1536".
83-
DALL-E-3 supports "1024x1024", "1792x1024" and "1024x1792".
84-
DALL-E-2 supports "256x256", "512x512" and "1024x1024".
93+
GPT image models support "auto", "1024x1024", "1536x1024", and "1024x1536".
8594
Defaults to "1024x1024".
95+
96+
**Deprecated sizes (will be removed in v0.15.0):**
97+
"256x256", "512x512" (DALL-E-2 only), "1792x1024", "1024x1792" (DALL-E-3 only).
8698
output_format (Literal["png", "jpeg", "webp"], Optional): The output format of the generated images.
87-
This parameter is only supported for GPT image models.
88-
Default is to not specify (which will use the model's default format, e.g. PNG for OpenAI image models).
89-
quality (Literal["standard", "hd", "low", "medium", "high"], Optional): The quality of the generated images.
90-
Different models support different quality settings.
91-
GPT image models support "high", "medium" and "low".
92-
DALL-E-3 supports "hd" and "standard".
93-
DALL-E-2 supports "standard" only.
94-
Default is to not specify.
95-
style (Literal["natural", "vivid"], Optional): The style of the generated images.
96-
This parameter is only supported for DALL-E-3.
97-
Default is to not specify.
99+
Default is to not specify (which will use the model's default format, e.g. PNG).
100+
quality (Literal["auto", "low", "medium", "high"], Optional): The quality of the generated images.
101+
GPT image models support "auto", "high", "medium", and "low".
102+
Default is to not specify, which will use "auto" behavior for platform OpenAI endpoints
103+
and "high" behavior for Azure OpenAI endpoints.
104+
105+
**Deprecated values (will be removed in v0.15.0):**
106+
"standard", "hd" (DALL-E only).
107+
style (Literal["natural", "vivid"], Optional): **Deprecated.** This parameter was only
108+
supported for DALL-E-3 and is not supported by GPT image models.
109+
Will be removed in v0.15.0.
110+
background (Literal["transparent", "opaque", "auto"], Optional): Background behavior for
111+
the generated image. When "transparent", the output format must support transparency
112+
("png" or "webp"). When "auto", the model automatically determines the best background.
113+
Default is to not specify, which will use "auto" behavior.
98114
custom_configuration (TargetConfiguration, Optional): Override the default configuration for
99115
this target instance. Defaults to None.
100116
custom_capabilities (TargetCapabilities, Optional): **Deprecated.** Use
@@ -104,11 +120,49 @@ def __init__(
104120
httpx_client_kwargs (dict, Optional): Additional kwargs to be passed to the
105121
`httpx.AsyncClient()` constructor.
106122
For example, to specify a 3 minutes timeout: httpx_client_kwargs={"timeout": 180}
123+
124+
Raises:
125+
ValueError: If background is "transparent" and output_format is "jpeg",
126+
since JPEG does not support transparency.
107127
"""
128+
# Emit deprecation warnings for DALL-E-only parameters
129+
if style is not None:
130+
warnings.warn(
131+
"The 'style' parameter is deprecated and will be removed in v0.15.0. "
132+
"It was only supported for DALL-E-3, which is being shut down on 2026-05-12.",
133+
DeprecationWarning,
134+
stacklevel=2,
135+
)
136+
137+
if image_size in self._DEPRECATED_SIZES:
138+
warnings.warn(
139+
f"image_size='{image_size}' is a DALL-E-only value and is deprecated. "
140+
f"It will be removed in v0.15.0. DALL-E models are being shut down on 2026-05-12. "
141+
f"GPT image models support 'auto', '1024x1024', '1536x1024', and '1024x1536'.",
142+
DeprecationWarning,
143+
stacklevel=2,
144+
)
145+
146+
if quality is not None and quality in self._DEPRECATED_QUALITY_VALUES:
147+
warnings.warn(
148+
f"quality='{quality}' is a DALL-E-only value and is deprecated. "
149+
f"It will be removed in v0.15.0. DALL-E models are being shut down on 2026-05-12. "
150+
f"GPT image models support 'auto', 'low', 'medium', and 'high'.",
151+
DeprecationWarning,
152+
stacklevel=2,
153+
)
154+
155+
if background == "transparent" and output_format == "jpeg":
156+
raise ValueError(
157+
"background='transparent' requires an output format that supports transparency ('png' or 'webp'). "
158+
"Got output_format='jpeg'."
159+
)
160+
108161
self.output_format = output_format
109162
self.quality = quality
110163
self.style = style
111164
self.image_size = image_size
165+
self.background = background
112166

113167
super().__init__(
114168
*args, custom_configuration=custom_configuration, custom_capabilities=custom_capabilities, **kwargs
@@ -142,6 +196,7 @@ def _build_identifier(self) -> ComponentIdentifier:
142196
"image_size": self.image_size,
143197
"quality": self.quality,
144198
"style": self.style,
199+
"background": self.background,
145200
},
146201
)
147202

@@ -204,6 +259,8 @@ async def _send_generate_request_async(self, message: Message) -> Message:
204259
image_generation_args["quality"] = self.quality
205260
if self.style:
206261
image_generation_args["style"] = self.style
262+
if self.background:
263+
image_generation_args["background"] = self.background
207264

208265
# Use unified error handler for consistent error handling
209266
return await self._handle_openai_request(
@@ -255,6 +312,8 @@ async def _send_edit_request_async(self, message: Message) -> Message:
255312
image_edit_args["quality"] = self.quality
256313
if self.style:
257314
image_edit_args["style"] = self.style
315+
if self.background:
316+
image_edit_args["background"] = self.background
258317

259318
return await self._handle_openai_request(
260319
api_call=lambda: self._client.images.edit(**image_edit_args),
@@ -294,8 +353,7 @@ async def _get_image_bytes(self, image_data: Any) -> bytes:
294353
"""
295354
Extract image bytes from the API response.
296355
297-
Handles both base64-encoded data and URL responses. Some models (like gpt-image-1)
298-
return base64 directly, while others (like dall-e) may return URLs.
356+
GPT image models always return base64-encoded data.
299357
300358
Args:
301359
image_data: The image data object from the API response.
@@ -306,15 +364,18 @@ async def _get_image_bytes(self, image_data: Any) -> bytes:
306364
Raises:
307365
EmptyResponseException: If neither base64 data nor URL is available.
308366
"""
309-
# Try base64 first (preferred format)
310367
b64_data = getattr(image_data, "b64_json", None)
311368
if b64_data:
312369
return base64.b64decode(b64_data)
313370

314-
# Fall back to URL download
371+
# Legacy fallback for DALL-E models that may return URLs instead of base64.
372+
# This code path is deprecated and will be removed in v0.15.0.
315373
image_url = getattr(image_data, "url", None)
316374
if image_url:
317-
logger.info("Image model returned URL. Downloading image.")
375+
logger.warning(
376+
"Image model returned a URL instead of base64 data. "
377+
"This is a DALL-E behavior that is deprecated. Downloading image from URL."
378+
)
318379
async with httpx.AsyncClient() as http_client:
319380
image_response = await http_client.get(image_url)
320381
image_response.raise_for_status()

0 commit comments

Comments
 (0)