From 3e3a4d0a720caa7b443b57f9ac31bae6670e493e Mon Sep 17 00:00:00 2001
From: Will Anderson <will.anderson@neurontechnologies.ai>
Date: Thu, 14 May 2026 12:20:35 -0500
Subject: [PATCH] fix: fail fast when model not downloaded instead of infinite
 offline retry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a TTS engine's model weights are not on disk, generation previously
attempted a HuggingFace download that fails in offline/cached mode with a
cryptic error, and the resulting 'failed' status invited the user to retry
— creating an apparent infinite retry loop (issue #434, PR #443 analogue).

Add a pre-load cache check in run_generation(): if the backend reports the
model is not downloaded, raise immediately with a clear message ("Model not
downloaded. Connect to the internet and download it from Settings → Models
first.") rather than attempting a download that will fail in offline mode.

The check is defensive: falls back gracefully if the backend's
_is_model_cached() signature is unusual or if the check itself raises.
---
 backend/services/generation.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/backend/services/generation.py b/backend/services/generation.py
index ce8fe93c..39567b82 100644
--- a/backend/services/generation.py
+++ b/backend/services/generation.py
@@ -58,6 +58,31 @@ async def run_generation(
     try:
         tts_model = get_tts_backend_for_engine(engine)
 
+        # Fast-fail if model weights are not on disk.  Attempting to load a
+        # model that has never been downloaded produces a cryptic HuggingFace
+        # error in offline mode and can create an apparent infinite-retry loop
+        # (the generation is marked failed → user retries → same failure).
+        # Raising here produces a clear message and avoids the retry cycle.
+        is_cached = getattr(tts_model, "_is_model_cached", None)
+        if is_cached is not None:
+            try:
+                cached = (
+                    is_cached(model_size)
+                    if model_size and model_size != "default"
+                    else is_cached()
+                )
+            except TypeError:
+                # Some backends accept no args, some require model_size
+                try:
+                    cached = is_cached()
+                except Exception:
+                    cached = True  # can't tell — let load attempt proceed
+            if not cached:
+                raise RuntimeError(
+                    "Model not downloaded. Connect to the internet and download it "
+                    "from Settings → Models first."
+                )
+
         if not tts_model.is_loaded():
             await history.update_generation_status(generation_id, "loading_model", bg_db)