Fix Veo video generation: retire veo-2, repair polling loop

anbeckham · claude · anbeckham · commit 5033db3cf171 · 2026-04-20T21:56:17.000-05:00
Two independent bugs caused every generate_video call to fail:

1. The default model "veo-2" (veo-2.0-generate-001) was retired from
   the Gemini API on 2026-04-02, so default calls hit a dead endpoint.
   Default is now veo-3.1-fast; veo-2 removed from config, map, tool
   schema, command doc, and skill matrix.

2. The poll loop called operation.reload(), which does not exist on
   google-genai's GenerateVideosOperation. Replaced with the canonical
   pattern: operation = client.operations.get(operation). Added a
   regression test that asserts operations.get is used.

Also corrects the hooks/hooks.json manifest to the object-based schema.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/commands/create-video.md b/commands/create-video.md
@@ -26,9 +26,8 @@ Follow this workflow strictly:
 - Clarify: motion style, camera movement, duration, mood
 
 ### 2. Choose Model
-- **veo-2** (default): Stable, reliable — good for most use cases
-- **veo-3.1**: Latest model, best quality — use for premium or complex scenes
-- **veo-3.1-fast**: Faster iteration — use when exploring ideas or testing prompts
+- **veo-3.1-fast** (default): Faster iteration — good for most use cases
+- **veo-3.1**: Best quality — use for premium or complex scenes
 
 Ask the user which model to use if they haven't specified.
 
diff --git a/hooks/hooks.json b/hooks/hooks.json
@@ -1,12 +1,16 @@
-[
-  {
-    "event": "SessionStart",
-    "matcher": "*",
-    "hooks": [
+{
+  "description": "gemini-visual-design hooks — API key validation on session start",
+  "hooks": {
+    "SessionStart": [
       {
-        "type": "command",
-        "command": "${CLAUDE_PLUGIN_ROOT}/hooks/scripts/check-api-key.sh"
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "${CLAUDE_PLUGIN_ROOT}/hooks/scripts/check-api-key.sh"
+          }
+        ]
       }
     ]
   }
-]
+}
diff --git a/skills/visual-design-system/SKILL.md b/skills/visual-design-system/SKILL.md
@@ -47,8 +47,8 @@ Use this skill when working on visual design tasks — image generation, UI mock
 | Iterative editing | Gemini Flash | Multi-turn editing support |
 | Final production assets | Imagen 4 | Highest quality output |
 | Design analysis | Gemini Flash | Multimodal understanding |
-| Short video clips | Veo 2 | Stable, reliable |
-| High-quality video | Veo 3.1 | Latest features |
+| Short video clips | Veo 3.1 Fast | Faster iteration, good quality |
+| High-quality video | Veo 3.1 | Best quality, latest features |
 
 ## Template Categories
 
diff --git a/src/gemini_visual_mcp/config.py b/src/gemini_visual_mcp/config.py
@@ -15,7 +15,6 @@
 GEMINI_FLASH_IMAGE = "gemini-3.1-flash-image-preview"
 GEMINI_FLASH_TEXT = "gemini-2.5-flash"
 IMAGEN_MODEL = "imagen-4.0-generate-001"
-VEO_2_MODEL = "veo-2.0-generate-001"
 VEO_3_MODEL = "veo-3.1-generate-preview"
 VEO_3_FAST_MODEL = "veo-3.1-fast-generate-preview"
 
@@ -59,7 +58,7 @@
 
 # Model selection labels
 MODEL_CHOICES_IMAGE = ["gemini", "imagen", "auto"]
-MODEL_CHOICES_VIDEO = ["veo-2", "veo-3.1", "veo-3.1-fast"]
+MODEL_CHOICES_VIDEO = ["veo-3.1", "veo-3.1-fast"]
 
 # Analysis focus areas
 ANALYSIS_FOCUS_AREAS = ["color", "layout", "typography", "overall"]
diff --git a/src/gemini_visual_mcp/gemini_client.py b/src/gemini_visual_mcp/gemini_client.py
@@ -13,7 +13,6 @@
     GEMINI_FLASH_IMAGE,
     GEMINI_FLASH_TEXT,
     IMAGEN_MODEL,
-    VEO_2_MODEL,
     VEO_3_FAST_MODEL,
     VEO_3_MODEL,
 )
@@ -27,7 +26,6 @@
 
 # Map friendly model names to API model IDs
 VIDEO_MODEL_MAP = {
-    "veo-2": VEO_2_MODEL,
     "veo-3.1": VEO_3_MODEL,
     "veo-3.1-fast": VEO_3_FAST_MODEL,
 }
@@ -325,19 +323,19 @@ def _call():
     async def generate_video(
         self,
         prompt: str,
-        model: str = "veo-2",
+        model: str = "veo-3.1-fast",
         image_data: Optional[bytes] = None,
         image_mime_type: Optional[str] = None,
     ) -> Any:
         """Start async video generation. Returns an operation to poll.
 
         Args:
             prompt: Video description
-            model: One of "veo-2", "veo-3.1", "veo-3.1-fast"
+            model: One of "veo-3.1", "veo-3.1-fast"
             image_data: Optional reference image bytes
             image_mime_type: MIME type of reference image
         """
-        model_id = VIDEO_MODEL_MAP.get(model, VEO_2_MODEL)
+        model_id = VIDEO_MODEL_MAP.get(model, VEO_3_FAST_MODEL)
 
         def _call():
             # Veo accepts a prompt alongside an image input — passing both lets
@@ -372,16 +370,19 @@ async def poll_video_operation(
         """
 
         def _poll():
+            nonlocal operation
             start = time.monotonic()
-            # Poll until complete
+            # Poll until complete. The google-genai SDK's operation objects are
+            # immutable pydantic models — fetch the updated state via
+            # client.operations.get() rather than mutating in place.
             while not operation.done:
                 if time.monotonic() - start > timeout_seconds:
                     raise GeminiClientError(
                         f"Video generation timed out after {timeout_seconds}s. "
                         "The operation may still be running — try again later."
                     )
                 time.sleep(5)
-                operation.reload()
+                operation = self._client.operations.get(operation)
 
             if operation.response and operation.response.generated_videos:
                 results = []
diff --git a/src/gemini_visual_mcp/server.py b/src/gemini_visual_mcp/server.py
@@ -214,8 +214,8 @@ async def list_tools() -> list[Tool]:
                             "model": {
                                 "type": "string",
                                 "enum": MODEL_CHOICES_VIDEO,
-                                "default": "veo-2",
-                                "description": "Video model: 'veo-2' (stable), 'veo-3.1' (latest), 'veo-3.1-fast'",
+                                "default": "veo-3.1-fast",
+                                "description": "Video model: 'veo-3.1' (best quality) or 'veo-3.1-fast' (faster iteration)",
                             },
                             "reference_image": {
                                 "type": "string",
@@ -451,7 +451,7 @@ async def _handle_tool(self, name: str, args: dict) -> Any:
             result = await generate_video(
                 client=client,
                 prompt=args["prompt"],
-                model=args.get("model", "veo-2"),
+                model=args.get("model", "veo-3.1-fast"),
                 reference_image=args.get("reference_image"),
                 cwd=self._cwd(),
             )
diff --git a/src/gemini_visual_mcp/video_gen.py b/src/gemini_visual_mcp/video_gen.py
@@ -1,6 +1,6 @@
 """Video generation via Veo with async polling.
 
-Supports Veo 2, Veo 3.1, and Veo 3.1 Fast models.
+Supports Veo 3.1 and Veo 3.1 Fast models.
 Generates short video clips with prompt enhancement.
 """
 
@@ -19,7 +19,7 @@
 async def generate_video(
     client: GeminiClient,
     prompt: str,
-    model: str = "veo-2",
+    model: str = "veo-3.1-fast",
     reference_image: Optional[str] = None,
     cwd: str = ".",
     use_profile: bool = True,
@@ -29,7 +29,7 @@ async def generate_video(
     Args:
         client: GeminiClient instance
         prompt: Video description
-        model: "veo-2", "veo-3.1", or "veo-3.1-fast"
+        model: "veo-3.1" or "veo-3.1-fast"
         reference_image: Optional path to a reference image
         cwd: Current working directory for style profile
         use_profile: Whether to apply style profile to prompt
diff --git a/tests/test_gemini_client.py b/tests/test_gemini_client.py
@@ -138,9 +138,10 @@ class TestVideoModelMap:
     def test_model_mapping(self):
         from gemini_visual_mcp.gemini_client import VIDEO_MODEL_MAP
 
-        assert "veo-2" in VIDEO_MODEL_MAP
         assert "veo-3.1" in VIDEO_MODEL_MAP
         assert "veo-3.1-fast" in VIDEO_MODEL_MAP
+        # Retired model (removed from API on 2026-04-02) must not be listed.
+        assert "veo-2" not in VIDEO_MODEL_MAP
 
 
 class TestVideoGenerationWithImage:
@@ -161,7 +162,7 @@ async def test_prompt_passed_with_image(self):
             client = GeminiClient(api_key="test-key")
             await client.generate_video(
                 prompt="dolly forward through a misty forest",
-                model="veo-2",
+                model="veo-3.1-fast",
                 image_data=b"image-bytes",
                 image_mime_type="image/png",
             )
@@ -180,9 +181,54 @@ async def test_prompt_passed_without_image(self):
             client = GeminiClient(api_key="test-key")
             await client.generate_video(
                 prompt="a calm ocean at sunset",
-                model="veo-2",
+                model="veo-3.1-fast",
             )
 
             call_kwargs = mock_models.generate_videos.call_args.kwargs
             assert call_kwargs["prompt"] == "a calm ocean at sunset"
             assert call_kwargs["image"] is None
+
+
+class TestPollVideoOperation:
+    """Regression tests for video operation polling.
+
+    A previous bug called operation.reload() inside the poll loop, which does
+    not exist on google-genai's GenerateVideosOperation pydantic model — every
+    poll iteration would raise AttributeError after the first 5-second sleep.
+    The correct SDK pattern is client.operations.get(operation), which returns
+    a fresh operation object.
+    """
+
+    @pytest.mark.asyncio
+    async def test_poll_uses_operations_get_not_reload(self):
+        with patch("gemini_visual_mcp.gemini_client.genai") as mock_genai:
+            mock_client_inst = mock_genai.Client.return_value
+
+            op_pending = MagicMock(done=False)
+            op_done = MagicMock(done=True)
+            op_done.response.generated_videos = [MagicMock(video="video-ref")]
+            mock_client_inst.operations.get.return_value = op_done
+            mock_client_inst.files.download.return_value = b"video-bytes"
+
+            with patch("gemini_visual_mcp.gemini_client.time.sleep"):
+                client = GeminiClient(api_key="test-key")
+                results = await client.poll_video_operation(op_pending)
+
+            mock_client_inst.operations.get.assert_called_with(op_pending)
+            assert results == [{"data": b"video-bytes", "mime_type": "video/mp4"}]
+
+    @pytest.mark.asyncio
+    async def test_poll_returns_immediately_when_done(self):
+        with patch("gemini_visual_mcp.gemini_client.genai") as mock_genai:
+            mock_client_inst = mock_genai.Client.return_value
+
+            op_done = MagicMock(done=True)
+            op_done.response.generated_videos = [MagicMock(video="video-ref")]
+            mock_client_inst.files.download.return_value = b"video-bytes"
+
+            client = GeminiClient(api_key="test-key")
+            results = await client.poll_video_operation(op_done)
+
+            # Operation already done — no polling required.
+            mock_client_inst.operations.get.assert_not_called()
+            assert results == [{"data": b"video-bytes", "mime_type": "video/mp4"}]
diff --git a/tests/test_server.py b/tests/test_server.py
@@ -76,7 +76,7 @@ async def test_generate_video(self, server):
         with patch("gemini_visual_mcp.server.generate_video") as mock_video:
             mock_video.return_value = {
                 "path": "/tmp/video.mp4",
-                "model": "veo-2",
+                "model": "veo-3.1-fast",
                 "enhanced_prompt": "enhanced",
                 "warnings": [],
             }
@@ -85,7 +85,7 @@ async def test_generate_video(self, server):
             })
 
         assert result["video_path"] == "/tmp/video.mp4"
-        assert result["model"] == "veo-2"
+        assert result["model"] == "veo-3.1-fast"
 
     @pytest.mark.asyncio
     async def test_save_asset(self, server):
diff --git a/tests/test_video_gen.py b/tests/test_video_gen.py
@@ -25,15 +25,15 @@ async def test_text_to_video(self, tmp_path):
                 result = await generate_video(
                     client=mock_client,
                     prompt="A slow camera pan across a mountain landscape at sunrise",
-                    model="veo-2",
+                    model="veo-3.1-fast",
                     cwd=str(tmp_path),
                     use_profile=False,
                 )
 
         mock_client.generate_video.assert_called_once()
         mock_client.poll_video_operation.assert_called_once_with(mock_operation)
         assert "path" in result
-        assert result["model"] == "veo-2"
+        assert result["model"] == "veo-3.1-fast"
         assert "enhanced_prompt" in result
         assert "warnings" in result
 
@@ -55,7 +55,7 @@ async def test_reference_image_read_and_passed(self, tmp_path):
                 await generate_video(
                     client=mock_client,
                     prompt="Animate this scene with gentle motion",
-                    model="veo-2",
+                    model="veo-3.1-fast",
                     reference_image=str(ref_img),
                     cwd=str(tmp_path),
                     use_profile=False,