test(backends): fix vision Ollama tests failing in CI with 400 model does not support vision (#1185) (#1188)

planetf1 · web-flow · commit 006485cd06f3 · 2026-06-02T22:39:26.000Z
* test(backends): fix vision Ollama tests failing in CI with 400 model does not support vision Structural payload tests (test_image_block_in_instruction, test_image_block_in_chat) were failing in CI because the m_session fixture called start_session() with no model_id, resolving to IBM_GRANITE_4_1_3B (granite4.1:3b) — a text-only model. Attaching images caused Ollama to reject the request with 400, preventing post_processing from running and the structural assertions from ever executing. Fixes are: 1. Add test/backends/conftest.py with a shared mock_ollama_backend fixture that constructs an OllamaModelBackend entirely offline (patches _check_ollama_server, _pull_ollama_model, ollama.Client, ollama.AsyncClient). No live server required. 2. Rewrite test_vision_ollama.py into three tiers: - Tier 1 (construction): pure ImageBlock unit tests, no model or server. - Tier 2 (structural payload): mocked offline tests that verify images are embedded correctly in the Ollama conversation payload. The _async_client property is mocked via PropertyMock at the class level so the mock is returned regardless of which event loop _run_async_in_thread creates in its background thread. Runs in CI unconditionally. - Tier 3 (dormant e2e): skipped until granite-vision-4.1 lands on Ollama; tracked in #1187. 3. Refactor test_ollama_unit.py to use the shared mock_ollama_backend fixture, removing the duplicated _make_backend() helper. Closes #1185. Assisted-by: Claude Code Signed-off-by: Nigel Jones <jonesn@uk.ibm.com> * test(backends): address code review findings for vision Ollama tests Remove dead code after unconditional pytest.skip() in vision_session — the availability check is now the sole gate, which auto-activates once granite-vision-4.1 lands on Ollama. Update conftest.py docstring to show the correct PropertyMock class-level patching pattern (instance assignment does not override the event-loop-keyed _async_client property). Compute ImageBlock.from_pil_image() once in test_image_block_in_chat instead of three times. Assisted-by: Claude Code Signed-off-by: Nigel Jones <jonesn@uk.ibm.com> --------- Signed-off-by: Nigel Jones <jonesn@uk.ibm.com>
diff --git a/test/backends/conftest.py b/test/backends/conftest.py
@@ -0,0 +1,50 @@
+"""Shared fixtures for backend tests."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from mellea.backends.ollama import OllamaModelBackend
+
+
+@pytest.fixture
+def mock_ollama_backend():
+    """Factory fixture: returns an OllamaModelBackend with all network calls patched out.
+
+    No live Ollama server or pulled model is required. The returned backend has
+    real client objects replaced with MagicMocks, so subsequent tests can set
+    attributes such as ``backend._async_client.chat`` to control behaviour.
+
+    Usage::
+
+        def test_something(mock_ollama_backend):
+            backend = mock_ollama_backend(model_options={ModelOption.MAX_NEW_TOKENS: 5})
+            # _async_client is an event-loop-keyed property; instance assignment won't
+            # override it for tests that call through _run_async_in_thread.  Patch at
+            # the class level instead:
+            mock_async = MagicMock()
+            mock_async.chat = AsyncMock(return_value=canned_response)
+            with patch.object(
+                type(backend), "_async_client", new_callable=PropertyMock, return_value=mock_async
+            ):
+                yield MelleaSession(backend)
+    """
+
+    def _make(
+        model_id: str = "granite4.1:3b",
+        model_options: dict | None = None,
+        timeout: float | None = None,
+    ) -> OllamaModelBackend:
+        with (
+            patch.object(OllamaModelBackend, "_check_ollama_server", return_value=True),
+            patch.object(OllamaModelBackend, "_pull_ollama_model", return_value=True),
+            patch("mellea.backends.ollama.ollama.Client", return_value=MagicMock()),
+            patch(
+                "mellea.backends.ollama.ollama.AsyncClient", return_value=MagicMock()
+            ),
+        ):
+            return OllamaModelBackend(
+                model_id=model_id, model_options=model_options, timeout=timeout
+            )
+
+    return _make
diff --git a/test/backends/test_ollama_unit.py b/test/backends/test_ollama_unit.py
@@ -15,25 +15,10 @@
 from mellea.stdlib.context import SimpleContext
 
 
-def _make_backend(
-    model_options: dict | None = None, timeout: float | None = None
-) -> OllamaModelBackend:
-    """Return an OllamaModelBackend with all network calls patched."""
-    with (
-        patch.object(OllamaModelBackend, "_check_ollama_server", return_value=True),
-        patch.object(OllamaModelBackend, "_pull_ollama_model", return_value=True),
-        patch("mellea.backends.ollama.ollama.Client", return_value=MagicMock()),
-        patch("mellea.backends.ollama.ollama.AsyncClient", return_value=MagicMock()),
-    ):
-        return OllamaModelBackend(
-            model_id="granite3.3:8b", model_options=model_options, timeout=timeout
-        )
-
-
 @pytest.fixture
-def backend():
+def backend(mock_ollama_backend):
     """Return an OllamaModelBackend with no pre-set model options."""
-    return _make_backend()
+    return mock_ollama_backend(model_id="granite3.3:8b")
 
 
 # --- Map consistency ---
@@ -73,9 +58,11 @@ def test_simplify_and_merge_remaps_num_predict(backend):
     assert result[ModelOption.MAX_NEW_TOKENS] == 128
 
 
-def test_simplify_and_merge_per_call_overrides_backend():
+def test_simplify_and_merge_per_call_overrides_backend(mock_ollama_backend):
     # Backend sets num_predict=128; per-call value of 256 must win.
-    b = _make_backend(model_options={"num_predict": 128})
+    b = mock_ollama_backend(
+        model_id="granite3.3:8b", model_options={"num_predict": 128}
+    )
     result = b._simplify_and_merge({"num_predict": 256})
     assert result[ModelOption.MAX_NEW_TOKENS] == 256
 
@@ -189,9 +176,9 @@ def test_timeout_forwarded_to_sync_and_async_clients():
     assert async_kwargs.get("timeout") == 12.5
 
 
-def test_timeout_forwarded_to_new_async_clients_per_event_loop():
+def test_timeout_forwarded_to_new_async_clients_per_event_loop(mock_ollama_backend):
     """Newly created AsyncClients (one per event loop) must inherit the timeout."""
-    backend = _make_backend(timeout=7.0)
+    backend = mock_ollama_backend(model_id="granite3.3:8b", timeout=7.0)
     with patch(
         "mellea.backends.ollama.ollama.AsyncClient", return_value=MagicMock()
     ) as mock_async_client:
diff --git a/test/backends/test_vision_ollama.py b/test/backends/test_vision_ollama.py