[ai] endpoint -> Ollama; firstboot retries instead of premature sentinel

Kabuki94 · Kabuki94 · commit f2ce225d1e51 · 2026-05-11T12:25:58.000-04:00
Two bugs that left `mios hello` returning 500s on a fresh install: 1. mios.toml [ai].endpoint pointed at LocalAI (:8080/v1) but [ai].model used Ollama's name:tag format (qwen2.5-coder:7b), which LocalAI does not understand. LocalAI v2.20.0 also chokes on aichat's multimodal content-list request shape with: interface conversion: interface {} is []interface {}, not string The intended chat surface for `mios <prompt>` was always Ollama (the dashboard's `AI http://localhost:8080/v1 qwen2.5-coder:7b` line is the leftover misalignment; that endpoint will route to Ollama once mios-dashboard reads the new value). LocalAI stays in the stack for sidecar surfaces (embeddings, image gen, STT/TTS) -- it just isn't the chat backend. 2. usr/libexec/mios/ollama-firstboot.sh wrote the sentinel /var/lib/mios/.ollama-firstboot-done even when the ollama container was unreachable after the 50-second poll. Symptom 2026-05-11: during the operator's install the ollama container crash-looped on the UID-818 typo (commit 9702d43 fix). firstboot ran during that crash window, timed out polling for the container, wrote the sentinel as if it had succeeded, and the unit's ConditionPathExists=!sentinel permanently blocked any future retry. Result: zero models pulled, /v1/chat/completions served by an empty model list, 500s forever. Exit non-zero on container-unreachable and DO NOT write the sentinel. systemd's Restart= behavior (and the next boot's ConditionPathExists check) then retries until the container actually responds.
diff --git a/usr/libexec/mios/ollama-firstboot.sh b/usr/libexec/mios/ollama-firstboot.sh
@@ -84,10 +84,18 @@ for attempt in 1 2 3 4 5 6 7 8 9 10; do
     sleep 5
 done
 if ! podman exec "$CONTAINER" ollama list >/dev/null 2>&1; then
-    _log "WARN: $CONTAINER not reachable after 50 s; skipping pull check"
-    install -d -m 0755 "$(dirname "$SENTINEL")"
-    touch "$SENTINEL"
-    exit 0
+    # Do NOT touch the sentinel here. Earlier revisions did; symptom
+    # 2026-05-11: if the ollama container crash-looped during first
+    # boot (e.g. UID mismatch with /var/lib/ollama bind-mount), this
+    # script gave up after 50 s, wrote the sentinel as if successful,
+    # and the unit's ConditionPathExists permanently blocked any
+    # retry. The operator's `mios hello` then hit a chat-completions
+    # endpoint with zero models loaded and got 500s forever. Exit
+    # non-zero so systemd's Restart= (or the next boot's
+    # ConditionPathExists=!sentinel) retries until the container is
+    # actually reachable.
+    _log "WARN: $CONTAINER not reachable after 50 s -- exiting non-zero to retry on next start (sentinel NOT written)"
+    exit 1
 fi
 
 failures=0
diff --git a/usr/share/mios/mios.toml b/usr/share/mios/mios.toml
@@ -392,7 +392,17 @@ endpoint = "http://localhost:3030/"
 enable   = true
 
 [ai]
-endpoint            = "http://localhost:8080/v1"
+# Point at Ollama's OpenAI-compatible /v1, NOT LocalAI's. The `model`
+# below uses Ollama's name:tag format which Ollama natively understands.
+# LocalAI uses different model names and (on 2.20.0) chokes on aichat's
+# multimodal content-list format with
+#   InternalServerError: interface conversion:
+#   interface {} is []interface {}, not string
+# LocalAI stays in the stack for sidecar surfaces (embeddings,
+# image gen, STT/TTS); the chat surface `mios <prompt>` drives is
+# Ollama. Operator-flagged 2026-05-11 (`mios hello` -> 500 from
+# LocalAI before this fix).
+endpoint            = "http://localhost:11434/v1"
 model               = "qwen2.5-coder:7b"
 embed_model         = "nomic-embed-text"
 api_key             = ""                # empty for localhost; cloud needs a key