drop LocalAI from default stack + dashboard polish

Kabuki94 · Kabuki94 · commit d92014a38c60 · 2026-05-11T13:24:41.000-04:00
Operator directive 2026-05-11: 'docker.io/localai/localai:v2.20.0 07f03c3494ad 31.9 GB ... we don't need these components'. Disable mios-ai by default and point Hermes' auxiliary (compression / summarization) at Ollama instead of LocalAI -- one less daemon to keep alive, ~32 GB less image storage, no functional loss because Ollama serves the same /v1 surface and the same qwen3.5:2b model that the primary chat backend uses. mios.toml [quadlets.enable]: mios-ai = false # was true; opt-in via /etc/mios/mios.toml for # embeddings / image-gen / STT-TTS sidecars hermes/config.yaml auxiliary: base_url: http://mios-ai:8080/v1 -> http://mios-ollama:11434/v1 api_key: ${API_SERVER_KEY} -> "" (Ollama's /v1 accepts unauthenticated localhost; no shared secret needed for the loopback path. mios-ai stays in-tree as an opt-in sidecar -- aux can be repointed at it by an operator override under /etc/mios/hermes/config.local.yaml.) mios-dashboard.sh: * AI line removed -- Ollama line gains the model annotation. The dashboard now shows the canonical chain (Ollama -> Hermes -> Workspace) without the redundant LocalAI URL. * Hermes status probe -> /health (no-auth 200) instead of /v1/models (which returns 401 without the API_SERVER_KEY bearer; ep_dot's naive curl read that as DOWN every refresh). * 'WebUI' label -> 'Workspace' on port 3030 (the workspace owns that port now; Open WebUI moves to 3031 if re-enabled). * mios-hermes-workspace added to the Quadlet services list. * MIOS_AI_MODEL default qwen2.5-coder:7b -> qwen3.5:2b (matches mios.toml [ai].model from commit 95471ae).
diff --git a/usr/libexec/mios/mios-dashboard.sh b/usr/libexec/mios/mios-dashboard.sh
@@ -183,7 +183,7 @@ fi
 # describes the OPERATOR'S login surface, not the running process.
 MIOS_LINUX_USER="${MIOS_USER:-${MIOS_LINUX_USER:-mios}}"
 [[ -z "${MIOS_VERSION:-}" ]] && MIOS_VERSION="$(cat /usr/share/mios/VERSION 2>/dev/null || cat /etc/mios/VERSION 2>/dev/null || echo "0.2.4")"
-MIOS_AI_MODEL="${MIOS_AI_MODEL:-qwen2.5-coder:7b}"
+MIOS_AI_MODEL="${MIOS_AI_MODEL:-qwen3.5:2b}"
 
 # ── Frame helpers ────────────────────────────────────────────────────────────
 # Repeat a single char N times.
@@ -315,18 +315,28 @@ print_endpoints() {
     section_header "Self-replication loop"
     printf '    %s  Forge       %shttp://localhost:3000/%s\n' \
         "$(ep_dot http://localhost:3000/api/v1/version)" "$C_D" "$C_R"
-    printf '    %s  AI          %shttp://localhost:8080/v1%s   %s%s%s\n' \
-        "$(ep_dot http://localhost:8080/v1/models)" "$C_D" "$C_R" "$C_GRY" "$MIOS_AI_MODEL" "$C_R"
+    # Ollama is the canonical chat model surface (operator directive
+    # 2026-05-11: drop LocalAI -- redundant ~32 GB image). Hermes
+    # talks to Ollama via /v1; mios-ai (LocalAI) stays in-tree as an
+    # opt-in sidecar for non-chat surfaces (embeddings, image-gen,
+    # STT/TTS) but doesn't autostart.
+    printf '    %s  Ollama      %shttp://localhost:11434%s   %s%s%s\n' \
+        "$(ep_dot http://localhost:11434/)" "$C_D" "$C_R" "$C_GRY" "$MIOS_AI_MODEL" "$C_R"
     printf '    %s  Cockpit     %shttps://localhost:9090/%s   %slogin: %s / %s%s\n' \
         "$(ep_dot https://localhost:9090/)" "$C_D" "$C_R" \
         "$C_GRY" "${MIOS_LINUX_USER:-mios}" "${MIOS_DEV_DEFAULT_PASSWORD:-mios}" "$C_R"
-    printf '    %s  Ollama      %shttp://localhost:11434%s\n' \
-        "$(ep_dot http://localhost:11434/)" "$C_D" "$C_R"
     printf '    %s  Search      %shttp://localhost:8888/%s\n' \
         "$(ep_dot http://localhost:8888/)" "$C_D" "$C_R"
+    # Hermes /v1/models needs Bearer auth -- returns 401 to ep_dot's
+    # naive curl and reads as DOWN even when Hermes is healthy.
+    # Probe /health instead (200, no auth required by Hermes Gateway).
     printf '    %s  Hermes      %shttp://localhost:8642/v1%s\n' \
-        "$(ep_dot http://localhost:8642/v1/models)" "$C_D" "$C_R"
-    printf '    %s  WebUI       %shttp://localhost:3030/%s\n' \
+        "$(ep_dot http://localhost:8642/health)" "$C_D" "$C_R"
+    # Default chat frontend is Hermes Workspace (operator directive
+    # 2026-05-11). Open WebUI moves to :3031 if re-enabled; the legacy
+    # 'WebUI' label here would be confusing now that 3030 is the
+    # workspace, so rename it.
+    printf '    %s  Workspace   %shttp://localhost:3030/%s\n' \
         "$(ep_dot http://localhost:3030/)" "$C_D" "$C_R"
 }
 
@@ -335,7 +345,7 @@ print_quadlets() {
     local svc info name dot color
     for svc in mios-ai mios-forge mios-forgejo-runner mios-cockpit-link \
                mios-ceph mios-k3s ollama mios-searxng \
-               mios-hermes mios-webui crowdsec-dashboard \
+               mios-hermes mios-hermes-workspace mios-webui crowdsec-dashboard \
                mios-guacamole guacd guacamole-postgres; do
         info="$(service_status "${svc}.service")"
         IFS='|' read -r name dot color <<< "$info"
diff --git a/usr/share/mios/hermes/config.yaml b/usr/share/mios/hermes/config.yaml
@@ -30,15 +30,17 @@ backend:
 #     Auxiliary auto-detect: no provider available
 #         (tried: openrouter, nous, local/custom, api-key)
 # and silently degrades (compression drops middle turns without a
-# summary, memory flush no-ops). Point it at the same mios-ai
-# (LocalAI OpenAI-compatible /v1) surface the rest of MiOS uses
-# so the agent stays 100% local-stack by default. API key reused
-# from /etc/mios/hermes/api.env so we don't add another secret.
+# summary, memory flush no-ops). Operator directive 2026-05-11
+# dropped LocalAI (mios-ai) -- redundant 31.9 GB image; Ollama
+# already serves the same /v1 surface. Aux now points at Ollama
+# too; the same backend handles primary chat AND the compression /
+# summarization passes. Empty api_key works because Ollama's /v1
+# accepts unauthenticated localhost requests by default.
 auxiliary:
   provider: local/custom
-  base_url: http://mios-ai:8080/v1
+  base_url: http://mios-ollama:11434/v1
   model: qwen3.5:2b
-  api_key: ${API_SERVER_KEY}
+  api_key: ""
 
 api_server:
   # Bind / port / key / cors values come from the EnvironmentFile=
diff --git a/usr/share/mios/mios.toml b/usr/share/mios/mios.toml
@@ -2170,7 +2170,14 @@ features = ["ai", "virtualization", "k3s"]
 # force-disable a service even when it would otherwise run.
 # ----------------------------------------------------------------------------
 [quadlets.enable]
-mios-ai               = true
+# Chat chain: ollama (LLM) -> mios-hermes (OpenAI gateway) ->
+# mios-hermes-workspace (web frontend). mios-ai (LocalAI) and
+# mios-webui (Open WebUI) stay in-tree but disabled by default:
+# operator directives 2026-05-11 ("forget open webui for now",
+# "we don't need these components" re LocalAI's 31.9 GB image).
+# Flip to true in /etc/mios/mios.toml [quadlets.enable] to opt in
+# (LocalAI for non-chat surfaces -- embeddings, image-gen, STT/TTS).
+mios-ai               = false  # LocalAI -- opt-in only; chat goes via Ollama
 mios-ceph             = true
 mios-k3s              = true
 mios-forge            = true