mios-dev
diff --git a/‎usr/lib/mios/agent-pipe/server.py‎
Lines changed: 59 additions & 6 deletions b/‎usr/lib/mios/agent-pipe/server.py‎
Lines changed: 59 additions & 6 deletions
diff --git a/‎usr/lib/tmpfiles.d/mios-shim-links.conf‎
Lines changed: 7 additions & 0 deletions b/‎usr/lib/tmpfiles.d/mios-shim-links.conf‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎usr/libexec/mios/mios-everything‎
Lines changed: 7 additions & 1 deletion b/‎usr/libexec/mios/mios-everything‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎usr/libexec/mios/mios-firecrawl‎
Lines changed: 14 additions & 4 deletions b/‎usr/libexec/mios/mios-firecrawl‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎usr/libexec/mios/mios-knowledge-search‎
Lines changed: 13 additions & 1 deletion b/‎usr/libexec/mios/mios-knowledge-search‎
Lines changed: 13 additions & 1 deletion
@@ -166,6 +166,14 @@
                if (os.environ.get("MIOS_AGENT_PIPE_BACKEND_LIGHT") or "").strip().lower()
                   in {"1", "true", "yes", "on"}
                else "http://localhost:8642/v1")).rstrip("/")
+# True when the reasoning backend is the light llama.cpp lane DIRECTLY (the
+# BACKEND_LIGHT "bypass Hermes" deployment), so callers know the primary endpoint
+# is llama.cpp -- which 200-accepts but SILENTLY IGNORES tool_choice='required'.
+# Hermes (:8642) is an OpenAI gateway that DOES honor it, so this stays False then.
+_BACKEND_IS_LIGHT = (
+    (os.environ.get("MIOS_AGENT_PIPE_BACKEND_LIGHT") or "").strip().lower()
+    in {"1", "true", "yes", "on"}
+    and not (os.environ.get("MIOS_AGENT_PIPE_BACKEND") or "").strip())
 BACKEND_MODEL = (os.environ.get("MIOS_AGENT_PIPE_BACKEND_MODEL")
                  or os.environ.get("MIOS_AI_MODEL")   # WS-0B: ONE owned key = [ai].model
                  or "hermes-agent")
@@ -7659,15 +7667,36 @@ def _perm_rank(perm: str) -> int:
                       or "interactive").strip().lower()
 
 
-def _hitl_block_reason(tool: str) -> "Optional[str]":
+def _effective_perm(tool: str, args: "Optional[dict]" = None) -> str:
+    """The permission tier that actually governs THIS call. Umbrella verbs that
+    dispatch to a NAMED sub-action with its own permission (os_recipe -> a named
+    [recipes.*]) must be gated by the RECIPE's tier, not the umbrella verb's
+    worst-case 'interactive' -- otherwise HITL block-mode neutralizes even the
+    read-only recipes (service-status / show-network / disk-usage / os-control-
+    health) the agent needs for routine OS introspection. Falls back to the
+    verb's own permission. Degrade-open: any lookup miss -> the verb tier."""
+    vperm = str((_VERB_CATALOG.get(tool) or {}).get("permission", "read")).lower()
+    try:
+        if tool == "os_recipe" and args:
+            rn = str((args or {}).get("name") or "").strip().replace("_", "-")
+            rc = _RECIPE_CATALOG.get(rn)
+            if rc:
+                return str(rc.get("permission", vperm)).lower()
+    except Exception:  # noqa: BLE001 -- degrade-open
+        pass
+    return vperm
+
+
+def _hitl_block_reason(tool: str, args: "Optional[dict]" = None) -> "Optional[str]":
     """#62: in BLOCK mode return a human-readable refusal reason if `tool` is a
     high-risk (tier >= [ai].hitl_threshold) action requiring approval, else None.
     AUDIT mode logs the high-risk action and returns None (proceed). OFF returns
-    None immediately. Degrade-open: never raises, never gates on error."""
+    None immediately. Degrade-open: never raises, never gates on error. For
+    os_recipe the effective tier is the NAMED recipe's, not the umbrella verb's."""
     if _HITL_MODE not in ("audit", "block"):
         return None
     try:
-        vperm = str((_VERB_CATALOG.get(tool) or {}).get("permission", "read")).lower()
+        vperm = _effective_perm(tool, args)
         if _perm_rank(vperm) < _perm_rank(_HITL_THRESHOLD):
             return None  # below the gate threshold -> not human-gated
         if _HITL_MODE == "audit":
@@ -7700,7 +7729,7 @@ async def _hitl_arbiter_verdict(tool: str, args: dict) -> "Optional[str]":
     if not _HITL_ARBITER_URL:
         return None
     try:
-        vperm = str((_VERB_CATALOG.get(tool) or {}).get("permission", "read")).lower()
+        vperm = _effective_perm(tool, args)
         if _perm_rank(vperm) < _perm_rank(_HITL_THRESHOLD):
             return None  # below the threshold -> arbiter not consulted
         client = await _get_client()
@@ -9576,7 +9605,12 @@ async def _firecrawl(url: str) -> tuple:
         # backend). A THIRD fetch engine raced beside extract + crawl4ai so the
         # pipeline uses ALL web tools (operator) -- richest wins in _fetch_all.
         # Returns (markdown, links).
-        if not _is_port_open(3002) or not _is_port_open(6379):
+        # Gate ONLY on :3002 (the host-published Firecrawl proxy mios-firecrawl
+        # targets). Redis :6379 is the firecrawl pod's INTERNAL job queue, reached
+        # only by the firecrawl-api/worker containers -- never from this host-side
+        # broker -- so probing it here always failed and silently dropped the
+        # firecrawl engine out of the _fetch_all race once the pod was deployed.
+        if not _is_port_open(3002):
             return "", []
         try:
             p = await asyncio.create_subprocess_exec(
@@ -17898,7 +17932,7 @@ async def dispatch_mios_verb(
     # #62 HITL gate (off by default -> the helper early-returns, ~zero overhead).
     # In block mode a high-risk verb is REFUSED here (never executed) pending human
     # approval; audit mode logs + proceeds. Keys off the resolved verb above.
-    _hitl_reason = _hitl_block_reason(tool)
+    _hitl_reason = _hitl_block_reason(tool, args)
     if _hitl_reason is None and _HITL_ARBITER_URL:
         _hitl_reason = await _hitl_arbiter_verdict(tool, args)  # #62 out-of-process arbiter
     if _hitl_reason is not None:
@@ -28440,6 +28474,25 @@ async def _finalize(emit=None):
             # refusal instead of calling discord_send). An action domain MUST act.
             if not isinstance(pb.get("tool_choice"), dict):
                 pb["tool_choice"] = "required"
+        # PRIMARY force-tool opt-out (mirrors the council/secondary downgrade at
+        # _sec_body): llama.cpp 200-ACCEPTS but SILENTLY IGNORES tool_choice, so
+        # 'required'/named reaches :11450 un-forcing -- the force-tool guard is a
+        # no-op on the BACKEND_LIGHT primary path. Downgrade required->auto when the
+        # resolved primary endpoint doesn't honor it (llama.cpp still emits real
+        # tool_calls under 'auto'); leave SGLang/vLLM heavy lanes that DO honor it
+        # untouched. The BACKEND-light lane carries no api='llamacpp' in target_cfg,
+        # so synthesize that cfg from the SSOT flag so the helper recognizes it.
+        _pc = pb.get("tool_choice")
+        if _pc not in ("none", "auto", None):
+            _prim_cfg = ({"api": "llamacpp"}
+                         if (_BACKEND_IS_LIGHT
+                             and str(target_endpoint).rstrip("/")
+                             == str(BACKEND).rstrip("/"))
+                         else target_cfg)
+            if not _endpoint_supports_tool_choice(
+                    str(target_endpoint or ""), _prim_cfg,
+                    _agent_offload_engine(_prim_cfg)):
+                pb["tool_choice"] = "auto"
         # Universal agent contract FIRST (operator 2026-05-30 ".md presented
         # to every agent"): the primary + every council secondary lead with
         # the overlay contract (global tools, live internet, delegation, no
 
@@ -124,6 +124,13 @@ L+ /usr/local/bin/mios-window-active       - - - - /usr/libexec/mios/mios-window
 L+ /usr/local/sbin/mios-window-active      - - - - /usr/libexec/mios/mios-window-active
 L+ /usr/local/bin/mios-pc-control          - - - - /usr/libexec/mios/mios-pc-control
 L+ /usr/local/sbin/mios-pc-control         - - - - /usr/libexec/mios/mios-pc-control
+# mios-pc-vision: the VLM screen-grounding FALLBACK for cu_ground -- used when
+# AT-SPI can't name a target element (canvas / Electron surfaces, exactly what
+# vision exists to cover). The binary + the VLM (qwen3-vl on :11450) are both
+# present, but cu_ground does shutil.which("mios-pc-vision") and dies if the
+# shim is absent. MUST be on PATH alongside its mios-pc-control sibling.
+L+ /usr/local/bin/mios-pc-vision           - - - - /usr/libexec/mios/mios-pc-vision
+L+ /usr/local/sbin/mios-pc-vision          - - - - /usr/libexec/mios/mios-pc-vision
 # mios-computer-use: the Linux/Wayland desktop-control executor (cu_* verb
 # backend; env-adaptive: bare-metal GNOME/KDE portal+uinput, WSLg delegate to
 # mios-pc-control, or federation-route to a remote desktop). MUST be on PATH so
 
@@ -130,7 +130,13 @@ fi
 # slower -- 8s ceiling so a cold first call isn't killed mid-spawn. stderr is
 # CAPTURED (not discarded) so a connection failure is reported, not silenced.
 _ES_ERR="$(mktemp 2>/dev/null || echo /tmp/mios-es-err.$$)"
-RAW=$(timeout 8 "$ES_BIN" -n "$MAX" "$QUERY" 2>"$_ES_ERR" | tr -d '\r')
+# `|| true`: this script runs under `set -euo pipefail`, and a nonzero es.exe
+# (e.g. exit 8 "IPC window not found" when es.exe is version-mismatched to the
+# running Everything) would otherwise abort the WHOLE script here -- BEFORE the
+# unreachable-vs-no-match JSON envelope below runs -- so the agent got a bare
+# exit code instead of the structured error. stderr is captured separately into
+# $_ES_ERR, so the IPC-detection grep + both envelopes still fire correctly.
+RAW=$(timeout 8 "$ES_BIN" -n "$MAX" "$QUERY" 2>"$_ES_ERR" | tr -d '\r') || true
 _ES_STDERR="$(tr -d '\r' < "$_ES_ERR" 2>/dev/null)"; rm -f "$_ES_ERR"
 
 if [ -z "$RAW" ]; then
 
@@ -49,8 +49,13 @@ def scrape(url: str, max_chars: int, timeout: float) -> dict:
     """POST /v1/scrape -> clean markdown of the page. Firecrawl renders the
     page (JS) server-side and strips chrome/nav, so the markdown is article
     text ready to ground on."""
-    if not _is_port_open(3002) or not _is_port_open(6379):
-        raise ConnectionError("Firecrawl (3002) or Redis (6379) port is closed")
+    # Gate ONLY on the Firecrawl API port this CLI actually talks to. Redis
+    # (6379) is the firecrawl pod's INTERNAL job queue -- reachable only by the
+    # firecrawl-api/worker containers inside the pod netns, never from this
+    # host-side CLI -- so probing it here ALWAYS failed and wrongly forced the
+    # healthy Firecrawl primary down to web_extract_fallback after deploy.
+    if not _is_port_open(3002):
+        raise ConnectionError("Firecrawl (3002) port is closed")
 
     payload = {
         "url": url,
@@ -76,14 +81,19 @@ def scrape(url: str, max_chars: int, timeout: float) -> dict:
     if job_id:
         poll_start = time.time()
         while time.time() - poll_start < timeout:
-            status_req = urllib.request.Request(f"{FIRECRAWL_URL}/v1/crawl/status/{job_id}")
+            # A scrape job_id polls the SCRAPE-status endpoint (v1.0.0:
+            # GET /v1/scrape/{job_id}), NOT the crawl-status one -- a scrape job
+            # is not a crawl job. v1.0.0 may also return the scraped data
+            # directly (no crawl-style status=="completed"), so treat a payload
+            # that already carries data/markdown as complete.
+            status_req = urllib.request.Request(f"{FIRECRAWL_URL}/v1/scrape/{job_id}")
             try:
                 with urllib.request.urlopen(status_req, timeout=10.0) as r:
                     status_d = json.loads(r.read().decode("utf-8", "replace"))
                 if not isinstance(status_d, dict):
                     break
                 status = str(status_d.get("status") or "").lower()
-                if status == "completed":
+                if status == "completed" or status_d.get("data") or status_d.get("markdown"):
                     d = status_d
                     break
                 elif status == "failed":
 
@@ -115,13 +115,25 @@ def _pgvector_knowledge_fallback(query: str, top_k: int, threshold: float,
             "FROM knowledge WHERE emb IS NOT NULL "
             f"ORDER BY emb <=> '{qv}'::vector LIMIT {int(top_k)}")
         hits = []
+        # Independent relevance FLOOR for this degrade-open fallback. The pg
+        # `knowledge` table holds agent CHAT Q&A memories, NOT the curated doc
+        # corpus this verb claims to search, so an unrelated query can surface
+        # low-score off-topic rows (~0.51-0.53 observed) and present them as an
+        # answer. Gate on max(caller threshold, floor) so an unrelated query
+        # returns an honest empty result. Tunable via MIOS_KNOWLEDGE_FALLBACK_FLOOR.
+        try:
+            import os as _os
+            _floor = float(_os.environ.get("MIOS_KNOWLEDGE_FALLBACK_FLOOR", "0.62"))
+        except Exception:  # noqa: BLE001
+            _floor = 0.62
+        _gate = max(threshold or 0.0, _floor)
         for r in rows:
             if len(r) >= 3:
                 try:
                     sc = float(r[2])
                 except (ValueError, TypeError):
                     sc = 0.0
-                if sc >= (threshold or 0.0):
+                if sc >= _gate:
                     hits.append({"q": r[0], "answer": r[1], "score": sc})
         env = {"ok": True, "query": query,
                "source": "pgvector_knowledge_fallback",