Add configurable aspect ratio and resolution for image generation

Copilot · owndev · Copilot · commit 1ebd24895dac · 2025-12-08T08:42:27.000Z
- Added IMAGE_GENERATION_ASPECT_RATIO valve (default: 1:1)
- Added IMAGE_GENERATION_RESOLUTION valve (default: 2K)
- Added validation methods for aspect_ratio and resolution
- Integrated ImageConfig into _configure_generation when image generation is enabled
- Updated documentation with new environment variables and usage examples
- Updated version to 1.10.0

Co-authored-by: owndev &lt;69784886+owndev@users.noreply.github.com&gt;
diff --git a/docs/google-gemini-integration.md b/docs/google-gemini-integration.md
@@ -128,6 +128,17 @@ GOOGLE_IMAGE_HISTORY_FIRST=true
 # Default: true
 GOOGLE_IMAGE_UPLOAD_FALLBACK=true
 
+# Image generation configuration (for image-capable models like gemini-2.5-flash-image-preview)
+# Default aspect ratio for generated images
+# Valid values: "1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"
+# Default: "1:1"
+GOOGLE_IMAGE_GENERATION_ASPECT_RATIO="1:1"
+
+# Default resolution for generated images
+# Valid values: "1K", "2K", "4K"
+# Default: "2K"
+GOOGLE_IMAGE_GENERATION_RESOLUTION="2K"
+
 # Enable Gemini thoughts outputs globally
 # Default: true
 GOOGLE_INCLUDE_THOUGHTS=true
@@ -199,6 +210,100 @@ VERTEX_AI_RAG_STORE="projects/your-project/locations/global/collections/default_
 >
 > Future versions may extend these settings to also optimize generated images before upload/display.
 
+## Image Generation Configuration
+
+The Google Gemini pipeline supports configurable aspect ratios and resolutions for image generation with image-capable models (e.g., `gemini-2.5-flash-image-preview`).
+
+### Aspect Ratio
+
+Control the shape and proportions of generated images using the aspect ratio setting:
+
+**Valid Values:**
+- `1:1` - Square (default)
+- `2:3`, `3:2` - Classic photo ratios
+- `3:4`, `4:3` - Standard display ratios
+- `4:5`, `5:4` - Portrait/landscape variants
+- `9:16`, `16:9` - Mobile and widescreen ratios
+- `21:9` - Ultra-wide format
+
+**Configuration:**
+```bash
+# Set via environment variable (global default)
+GOOGLE_IMAGE_GENERATION_ASPECT_RATIO="16:9"
+```
+
+Or configure through the pipeline valves in Open WebUI's Admin panel.
+
+### Resolution
+
+Control the quality and size of generated images:
+
+**Valid Values:**
+- `1K` - Lower resolution, faster generation
+- `2K` - Balanced quality and speed (default)
+- `4K` - Highest quality, slower generation
+
+**Configuration:**
+```bash
+# Set via environment variable (global default)
+GOOGLE_IMAGE_GENERATION_RESOLUTION="4K"
+```
+
+Or configure through the pipeline valves in Open WebUI's Admin panel.
+
+### Per-Request Override
+
+You can override the default settings on a per-request basis by including these parameters in the request body:
+
+**Example API Usage:**
+
+```python
+from google import genai
+from google.genai import types
+
+client = genai.Client(api_key="your-api-key")
+
+# Generate a 4K widescreen image
+response = client.models.generate_content(
+    model="gemini-2.5-flash-image-preview",
+    contents="A serene mountain landscape at sunset",
+    config=types.GenerateContentConfig(
+        response_modalities=["TEXT", "IMAGE"],
+        image_config=types.ImageConfig(
+            aspect_ratio="16:9",
+            image_size="4K"
+        ),
+    )
+)
+
+for part in response.parts:
+    if part.text:
+        print(part.text)
+    elif image := part.as_image():
+        image.save("landscape.png")
+```
+
+### Use Cases
+
+**Portrait Photography (`3:4` or `4:5`)**
+- Social media profile images
+- Portrait-oriented artwork
+
+**Widescreen Content (`16:9` or `21:9`)**
+- Desktop wallpapers
+- YouTube thumbnails
+- Presentation slides
+
+**Square Images (`1:1`)**
+- Instagram posts
+- Icons and logos
+- Product photos
+
+**Mobile-First (`9:16`)**
+- Instagram Stories
+- TikTok content
+- Mobile app screens
+
 ## Web search and access 
 
 [Grounding with Google search](https://ai.google.dev/gemini-api/docs/google-search) together with the [URL context tool](https://ai.google.dev/gemini-api/docs/url-context) are enabled/disabled together via the `google_search_tool` feature, which can be switched on/off in a Filter.
diff --git a/pipelines/google/google_gemini.py b/pipelines/google/google_gemini.py
@@ -4,7 +4,7 @@
 author_url: https://github.com/owndev/
 project_url: https://github.com/owndev/Open-WebUI-Functions
 funding_url: https://github.com/sponsors/owndev
-version: 1.9.2
+version: 1.10.0
 required_open_webui_version: 0.6.26
 license: Apache License 2.0
 description: Highly optimized Google Gemini pipeline with advanced image generation capabilities, intelligent compression, and streamlined processing workflows.
@@ -35,6 +35,7 @@
   - Flexible upload fallback options and optimization controls
   - Configurable thinking levels (low/high) for Gemini 3 models
   - Configurable thinking budgets (0-32768 tokens) for Gemini 2.5 models
+  - Configurable image generation aspect ratio (1:1, 16:9, etc.) and resolution (1K, 2K, 4K)
 """
 
 import os
@@ -259,6 +260,14 @@ class Valves(BaseModel):
             default=os.getenv("GOOGLE_IMAGE_HISTORY_FIRST", "true").lower() == "true",
             description="If true (default), history images precede current message images; if false, current images first.",
         )
+        IMAGE_GENERATION_ASPECT_RATIO: str = Field(
+            default=os.getenv("GOOGLE_IMAGE_GENERATION_ASPECT_RATIO", "1:1"),
+            description="Default aspect ratio for image generation. Valid values: '1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9'",
+        )
+        IMAGE_GENERATION_RESOLUTION: str = Field(
+            default=os.getenv("GOOGLE_IMAGE_GENERATION_RESOLUTION", "2K"),
+            description="Default resolution for image generation. Valid values: '1K', '2K', '4K'",
+        )
 
     # ---------------- Internal Helpers ---------------- #
     async def _gather_history_images(
@@ -863,6 +872,69 @@ def _validate_thinking_budget(self, budget: int) -> int:
         )
         return -1
 
+    def _validate_aspect_ratio(self, aspect_ratio: str) -> Optional[str]:
+        """
+        Validate and normalize the aspect ratio value.
+
+        Args:
+            aspect_ratio: The aspect ratio string to validate
+
+        Returns:
+            Validated aspect ratio string or None if invalid
+        """
+        if not aspect_ratio:
+            return None
+
+        # Valid aspect ratios according to Google's API
+        valid_ratios = [
+            "1:1",
+            "2:3",
+            "3:2",
+            "3:4",
+            "4:3",
+            "4:5",
+            "5:4",
+            "9:16",
+            "16:9",
+            "21:9",
+        ]
+
+        normalized = aspect_ratio.strip()
+        if normalized in valid_ratios:
+            return normalized
+
+        self.log.warning(
+            f"Invalid aspect ratio '{aspect_ratio}'. Valid values are: {', '.join(valid_ratios)}. "
+            "Using default '1:1'."
+        )
+        return "1:1"
+
+    def _validate_resolution(self, resolution: str) -> Optional[str]:
+        """
+        Validate and normalize the resolution value.
+
+        Args:
+            resolution: The resolution string to validate
+
+        Returns:
+            Validated resolution string or None if invalid
+        """
+        if not resolution:
+            return None
+
+        # Valid resolutions according to Google's API
+        valid_resolutions = ["1K", "2K", "4K"]
+
+        normalized = resolution.strip().upper()
+        if normalized in valid_resolutions:
+            return normalized
+
+        self.log.warning(
+            f"Invalid resolution '{resolution}'. Valid values are: {', '.join(valid_resolutions)}. "
+            "Using default '2K'."
+        )
+        return "2K"
+
     def pipes(self) -> List[Dict[str, str]]:
         """
         Returns a list of available Google Gemini models for the UI.
@@ -1569,6 +1641,36 @@ def _configure_generation(
         if enable_image_generation:
             gen_config_params["response_modalities"] = ["TEXT", "IMAGE"]
 
+            # Configure image generation parameters (aspect ratio and resolution)
+            # Body parameters override valve defaults for per-request customization
+            aspect_ratio = body.get(
+                "aspect_ratio", self.valves.IMAGE_GENERATION_ASPECT_RATIO
+            )
+            resolution = body.get("image_size", self.valves.IMAGE_GENERATION_RESOLUTION)
+
+            # Validate and normalize the values
+            validated_aspect_ratio = self._validate_aspect_ratio(aspect_ratio)
+            validated_resolution = self._validate_resolution(resolution)
+
+            # Create image config if we have valid values
+            if validated_aspect_ratio and validated_resolution:
+                try:
+                    gen_config_params["image_config"] = types.ImageConfig(
+                        aspect_ratio=validated_aspect_ratio,
+                        image_size=validated_resolution,
+                    )
+                    self.log.debug(
+                        f"Image generation config: aspect_ratio={validated_aspect_ratio}, resolution={validated_resolution}"
+                    )
+                except (AttributeError, TypeError) as e:
+                    # Fall back if SDK does not support ImageConfig
+                    self.log.warning(
+                        f"ImageConfig not supported by SDK version: {e}. Image generation will use default settings."
+                    )
+                except Exception as e:
+                    # Log unexpected errors but continue without image config
+                    self.log.warning(f"Unexpected error configuring ImageConfig: {e}")
+
         # Configure Gemini thinking/reasoning for models that support it
         # This is independent of include_thoughts - thinking config controls HOW the model reasons,
         # while include_thoughts controls whether the reasoning is shown in the output