Skip to content

Commit 88fd0a0

Browse files
committed
feat: Enhance video generation support for Google Veo models; add new duration option and improve error handling
1 parent 0be1b5b commit 88fd0a0

3 files changed

Lines changed: 64 additions & 15 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ The functions include a built-in encryption mechanism for sensitive information:
165165
- **Configurable Parameters**: Environment variables for image optimization (quality, max dimensions, format conversion).
166166
- **Multi-Image History**: Configurable history image limit, hash-based deduplication, and automatic `[Image N]` labels so the model can reference earlier images.
167167
- **Image Generation (Gemini 3)**: Configurable aspect ratio (e.g. `16:9`, `1:1`) and resolution (`1K`/`2K`/`4K`) for Gemini 3 image models; per-user valve overrides supported.
168-
- **Video Generation (Veo)**: Generate videos with Google Veo models (3.1, 3, 2). Configurable aspect ratio, resolution, duration, negative prompt, and person generation controls. Supports text-to-video and image-to-video (Veo 3.1). Videos are automatically uploaded and embedded with playback controls.
168+
- **Video Generation (Veo)**: Generate videos with Google Veo models (3.1, 3, 2). Configurable aspect ratio, resolution, duration, negative prompt, and person generation controls. Supports text-to-video and image-to-video for all supported Veo models. Videos are automatically uploaded and embedded with playback controls.
169169
- **Token Usage Tracking**: Returns prompt, completion, and total token counts to Open WebUI for automatic saving to the database.
170170
- **Model Whitelist & Additional Models**: Restrict the visible model list via `GOOGLE_MODEL_WHITELIST` and add SDK-unsupported models via `GOOGLE_MODEL_ADDITIONAL`.
171171
- Grounding with Google search with [google_search_tool.py filter](./filters/google_search_tool.py)

docs/google-gemini-integration.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,13 +435,13 @@ Attach an image to your message when using any Veo model to use it as the starti
435435

436436
1. Select a Veo model (marked with 🎬) from the model list
437437
2. Type your video description prompt
438-
3. Optionally attach an image for image-to-video (Veo 3.1 only)
438+
3. Optionally attach an image for image-to-video (supported by all Veo models)
439439
4. The pipeline submits the request and shows polling status updates
440440
5. Once complete, the video is uploaded to Open WebUI and embedded with a `<video>` player
441441

442442
### Vertex AI Note
443443

444-
When using Vertex AI, video download via `files.download()` is not available. If the Veo API returns a GCS URI instead of raw bytes, the pipeline will link to that URI directly.
444+
When using Vertex AI, video download via `files.download()` is not available. If the Veo API returns a GCS URI instead of raw bytes, the current pipeline does not yet surface that URI or attach the video output in the chat. You may need to retrieve the generated video directly from Vertex AI or the underlying GCS bucket.
445445

446446
## Model Configuration
447447

pipelines/google/google_gemini.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
VIDEO_DURATION_OPTIONS: List[str] = [
108108
"default",
109109
"4",
110+
"5",
110111
"6",
111112
"8",
112113
]
@@ -226,7 +227,7 @@ class UserValves(BaseModel):
226227
)
227228
VIDEO_GENERATION_DURATION: str = Field(
228229
default=os.getenv("GOOGLE_VIDEO_GENERATION_DURATION", "default"),
229-
description="Default duration in seconds for video generation (4, 6, or 8).",
230+
description="Default duration in seconds for video generation (4, 5, 6, or 8 - availability varies by model).",
230231
json_schema_extra={"enum": VIDEO_DURATION_OPTIONS},
231232
)
232233

@@ -381,7 +382,7 @@ class Valves(BaseModel):
381382
)
382383
VIDEO_GENERATION_DURATION: str = Field(
383384
default=os.getenv("GOOGLE_VIDEO_GENERATION_DURATION", "default"),
384-
description="Default duration in seconds for video generation (4, 6, or 8).",
385+
description="Default duration in seconds for video generation (4, 5, 6, or 8 - availability varies by model).",
385386
json_schema_extra={"enum": VIDEO_DURATION_OPTIONS},
386387
)
387388
VIDEO_GENERATION_NEGATIVE_PROMPT: str = Field(
@@ -402,6 +403,10 @@ class Valves(BaseModel):
402403
default=int(os.getenv("GOOGLE_VIDEO_POLL_INTERVAL", "10")),
403404
description="Polling interval in seconds when waiting for video generation to complete.",
404405
)
406+
VIDEO_POLL_TIMEOUT: int = Field(
407+
default=int(os.getenv("GOOGLE_VIDEO_POLL_TIMEOUT", "600")),
408+
description="Maximum time in seconds to wait for video generation before timing out (0=no limit).",
409+
)
405410

406411
# ---------------- Internal Helpers ---------------- #
407412
async def _gather_history_images(
@@ -1126,9 +1131,9 @@ def _validate_resolution(self, resolution: str) -> Optional[str]:
11261131

11271132
def _check_video_generation_support(self, model_id: str) -> bool:
11281133
model_lower = model_id.lower()
1129-
if "veo" in model_lower and "generate" in model_lower:
1130-
return True
1131-
return False
1134+
return model_lower.startswith("veo-") or (
1135+
"veo" in model_lower and "generate" in model_lower
1136+
)
11321137

11331138
def _check_veo_3_1_support(self, model_id: str) -> bool:
11341139
"""Check if a Veo model is version 3.1 (supports reference images, interpolation, 4k, extension)."""
@@ -1218,14 +1223,15 @@ def _validate_video_resolution(self, resolution: str) -> Optional[str]:
12181223
def _validate_video_duration(self, duration: str) -> Optional[int]:
12191224
if not duration or duration.lower() == "default":
12201225
return None
1226+
valid = {int(d) for d in VIDEO_DURATION_OPTIONS if d != "default"}
12211227
try:
12221228
val = int(duration)
1223-
if val in (4, 6, 8):
1229+
if val in valid:
12241230
return val
12251231
except (ValueError, TypeError):
12261232
pass
12271233
self.log.warning(
1228-
f"Invalid video duration '{duration}'. Valid: 4, 6, 8. Using default."
1234+
f"Invalid video duration '{duration}'. Valid: {', '.join(str(v) for v in sorted(valid))}. Using default."
12291235
)
12301236
return None
12311237

@@ -1265,16 +1271,32 @@ def _build_video_generation_config(
12651271
)
12661272
person_generation = None
12671273
if person_generation_raw and person_generation_raw != "default":
1268-
person_generation = person_generation_raw
1274+
valid_person_values = [
1275+
v for v in VIDEO_PERSON_GENERATION_OPTIONS if v != "default"
1276+
]
1277+
if person_generation_raw in valid_person_values:
1278+
person_generation = person_generation_raw
1279+
else:
1280+
self.log.warning(
1281+
f"Invalid person_generation '{person_generation_raw}'. "
1282+
f"Valid: {', '.join(valid_person_values)}. Ignoring."
1283+
)
12691284

12701285
enhance_prompt = body.get(
12711286
"enhance_prompt", self.valves.VIDEO_GENERATION_ENHANCE_PROMPT
12721287
)
12731288

1274-
number_of_videos = body.get("number_of_videos", 1)
1289+
number_of_videos_raw = body.get("number_of_videos", 1)
1290+
try:
1291+
number_of_videos = int(number_of_videos_raw)
1292+
except (ValueError, TypeError):
1293+
self.log.warning(
1294+
f"Invalid number_of_videos '{number_of_videos_raw}', defaulting to 1"
1295+
)
1296+
number_of_videos = 1
12751297

12761298
config_params: Dict[str, Any] = {
1277-
"number_of_videos": min(max(int(number_of_videos), 1), caps["max_videos"]),
1299+
"number_of_videos": min(max(number_of_videos, 1), caps["max_videos"]),
12781300
}
12791301

12801302
# enhance_prompt: not supported by Fast models or Veo 2
@@ -2820,10 +2842,28 @@ async def _generate_video(
28202842
return f"Error starting video generation: {e}"
28212843

28222844
poll_interval = max(self.valves.VIDEO_POLL_INTERVAL, 5)
2845+
poll_timeout = max(self.valves.VIDEO_POLL_TIMEOUT, 0)
28232846
elapsed = 0
28242847
while not operation.done:
28252848
await asyncio.sleep(poll_interval)
28262849
elapsed += poll_interval
2850+
if poll_timeout > 0 and elapsed >= poll_timeout:
2851+
error_msg = (
2852+
f"Video generation timed out after {elapsed}s "
2853+
f"(limit: {poll_timeout}s)"
2854+
)
2855+
self.log.error(error_msg)
2856+
await __event_emitter__(
2857+
{
2858+
"type": "status",
2859+
"data": {
2860+
"action": "video_generation",
2861+
"description": error_msg,
2862+
"done": True,
2863+
},
2864+
}
2865+
)
2866+
return f"Error: {error_msg}"
28272867
try:
28282868
operation = await client.aio.operations.get(operation)
28292869
except Exception as e:
@@ -2889,6 +2929,7 @@ async def _generate_video(
28892929

28902930
# Fallback: save to temp file via SDK
28912931
if not video_bytes:
2932+
tmp_path = None
28922933
try:
28932934
import tempfile
28942935

@@ -2899,13 +2940,18 @@ async def _generate_video(
28992940
await asyncio.to_thread(video.save, tmp_path)
29002941
async with aiofiles.open(tmp_path, "rb") as f:
29012942
video_bytes = await f.read()
2902-
os.unlink(tmp_path)
29032943
self.log.debug(
29042944
f"Video {idx}: temp-file download complete, "
29052945
f"size={len(video_bytes)} bytes"
29062946
)
29072947
except Exception as save_err:
29082948
self.log.warning(f"Video {idx} temp-file save failed: {save_err}")
2949+
finally:
2950+
if tmp_path:
2951+
try:
2952+
os.unlink(tmp_path)
2953+
except OSError:
2954+
pass
29092955

29102956
if not video_bytes:
29112957
self.log.warning(f"Video {idx}: could not obtain video bytes")
@@ -3024,7 +3070,10 @@ async def pipe(
30243070
request_id = id(body)
30253071
self.log.debug(f"Processing request {request_id}")
30263072
self.log.debug(f"User request body: {__user__}")
3027-
self.user = Users.get_user_by_id(__user__["id"])
3073+
if __user__:
3074+
self.user = Users.get_user_by_id(__user__["id"])
3075+
else:
3076+
self.user = None
30283077

30293078
try:
30303079
# Parse and validate model ID

0 commit comments

Comments
 (0)