sunbos
diff --git a/‎plugins/mcp-server-sqlseed/src/mcp_server_sqlseed/server.py‎
Lines changed: 114 additions & 95 deletions b/‎plugins/mcp-server-sqlseed/src/mcp_server_sqlseed/server.py‎
Lines changed: 114 additions & 95 deletions
diff --git a/‎plugins/sqlseed-ai/src/sqlseed_ai/_hardware.py‎
Lines changed: 24 additions & 34 deletions b/‎plugins/sqlseed-ai/src/sqlseed_ai/_hardware.py‎
Lines changed: 24 additions & 34 deletions
@@ -328,142 +328,161 @@ def sqlseed_gemma4_agent_fill(
         }
 
 
-@mcp.tool()
-def sqlseed_list_gemma_models() -> dict[str, Any]:
-    """List Gemma 4 models with hardware compatibility and backend availability.
+_BACKEND_DESCRIPTIONS: dict[str, str] = {
+    "google_ai_studio": "Google AI Studio API (free tier available, recommended)",
+    "lm_studio": "LM Studio local deployment (http://127.0.0.1:1234, GUI-based)",
+    "ollama": "Ollama local deployment (offline, CLI-based)",
+    "openai_compat": "Any OpenAI-compatible API endpoint",
+}
+
+_LOCAL_BACKEND_URLS: dict[str, str] = {
+    "lm_studio": "http://127.0.0.1:1234/v1/models",
+    "ollama": "http://localhost:11434/v1/models",
+}
+
+_STATUS_ICONS: dict[str, str] = {
+    "recommended": "recommended",
+    "capable": "capable (meets minimum specs)",
+    "capable_slow": "capable but likely slow (VRAM < minimum, will use RAM offloading)",
+    "cpu_only": "CPU-only inference (no GPU detected)",
+    "insufficient": "insufficient hardware",
+    "cloud_only": "cloud API only",
+}
+
+
+def _check_local_backend(backend_id: str, url: str) -> dict[str, Any]:
+    """Check reachability and loaded models for a local LLM backend."""
+    reachable = False
+    loaded: list[str] = []
+    try:
+        req = urllib.request.Request(url)
+        with urllib.request.urlopen(req, timeout=3) as resp:
+            data = json.loads(resp.read().decode())
+            loaded = [m.get("id", "unknown") for m in data.get("data", []) if m.get("id")]
+            reachable = True
+    except (OSError, ValueError):
+        pass
+
+    if reachable and loaded:
+        reason = f"{len(loaded)} model(s) loaded"
+    elif reachable:
+        reason = "Service running, no models loaded"
+    else:
+        reason = "Service not running"
 
-    Dynamically detects the current hardware environment (RAM, GPU/VRAM)
-    and checks which LLM backends are reachable. Returns models annotated
-    with compatibility status and backends annotated with availability.
-    """
-    backend_descriptions: dict[str, str] = {
-        "google_ai_studio": "Google AI Studio API (free tier available, recommended)",
-        "lm_studio": "LM Studio local deployment (http://127.0.0.1:1234, GUI-based)",
-        "ollama": "Ollama local deployment (offline, CLI-based)",
-        "openai_compat": "Any OpenAI-compatible API endpoint",
+    return {
+        "id": backend_id,
+        "description": _BACKEND_DESCRIPTIONS[backend_id],
+        "available": reachable and bool(loaded),
+        "reachable": reachable,
+        "loaded_models": loaded,
+        "reason": reason,
     }
 
-    if not _AI_AVAILABLE:
-        return {
-            "models": [],
-            "backends": [
-                {"id": bid, "description": desc, "available": False} for bid, desc in backend_descriptions.items()
-            ],
-            "hardware": {},
-            "error": "sqlseed-ai plugin not installed. Install with: pip install sqlseed-ai",
-        }
-
-    # ── 1. Detect hardware ──
-    hw = detect_hardware()
 
-    # ── 2. Check backend availability ──
-    ai_config = AIConfig.from_env()
-    backends_result = []
+def _build_backends(ai_config: Any) -> list[dict[str, Any]]:
+    """Build the list of backend availability info."""
+    backends: list[dict[str, Any]] = []
 
     # Google AI Studio: check API key
     has_api_key = ai_config.has_real_api_key
-    backends_result.append(
+    backends.append(
         {
             "id": "google_ai_studio",
-            "description": backend_descriptions["google_ai_studio"],
+            "description": _BACKEND_DESCRIPTIONS["google_ai_studio"],
             "available": has_api_key,
             "reason": "API key configured" if has_api_key else "No API key (set GOOGLE_API_KEY or SQLSEED_AI_API_KEY)",
         }
     )
 
     # LM Studio / Ollama: check service reachability + loaded models
-    local_urls: dict[str, str] = {
-        "lm_studio": "http://127.0.0.1:1234/v1/models",
-        "ollama": "http://localhost:11434/v1/models",
-    }
-    for backend_id, url in local_urls.items():
-        reachable = False
-        loaded: list[str] = []
-        try:
-            req = urllib.request.Request(url)
-            with urllib.request.urlopen(req, timeout=3) as resp:
-                data = json.loads(resp.read().decode())
-                loaded = [m.get("id", "unknown") for m in data.get("data", []) if m.get("id")]
-                reachable = True
-        except (OSError, ValueError):
-            pass
-
-        if reachable and loaded:
-            reason = f"{len(loaded)} model(s) loaded"
-        elif reachable:
-            reason = "Service running, no models loaded"
-        else:
-            reason = "Service not running"
-
-        backends_result.append(
-            {
-                "id": backend_id,
-                "description": backend_descriptions[backend_id],
-                "available": reachable and bool(loaded),
-                "reachable": reachable,
-                "loaded_models": loaded,
-                "reason": reason,
-            }
-        )
+    for backend_id, url in _LOCAL_BACKEND_URLS.items():
+        backends.append(_check_local_backend(backend_id, url))
 
     # OpenAI-compatible: informational only
-    backends_result.append(
+    backends.append(
         {
             "id": "openai_compat",
-            "description": backend_descriptions["openai_compat"],
+            "description": _BACKEND_DESCRIPTIONS["openai_compat"],
             "available": False,
             "reason": "Requires explicit base_url configuration",
         }
     )
+    return backends
 
-    # ── 3. Build model list with compatibility status ──
-    status_icons: dict[str, str] = {
-        "recommended": "recommended",
-        "capable": "capable (meets minimum specs)",
-        "capable_slow": "capable but likely slow (VRAM < minimum, will use RAM offloading)",
-        "cpu_only": "CPU-only inference (no GPU detected)",
-        "insufficient": "insufficient hardware",
-        "cloud_only": "cloud API only",
-    }
 
+def _build_models(hw: dict[str, Any]) -> list[dict[str, Any]]:
+    """Build the list of Gemma models with hardware compatibility status."""
     models = []
     for member in GemmaModel:
         status = evaluate_model_status(member.value, hw)
-        model_req = MODEL_REQUIREMENTS.get(member.value, {})
+        req = MODEL_REQUIREMENTS.get(member.value)
         models.append(
             {
                 "id": member.value,
                 "display_name": member.display_name,
                 "status": status,
-                "status_description": status_icons.get(status, status),
+                "status_description": _STATUS_ICONS.get(status, status),
                 "local_only": member.is_local_only,
                 "requirements": {
-                    "min_ram_gb": model_req.get("min_ram_gb", 0),
-                    "min_vram_gb": model_req.get("min_vram_gb", 0),
-                    "recommended_vram_gb": model_req.get("recommended_vram_gb", 0),
+                    "min_ram_gb": req.min_ram_gb if req else 0,
+                    "min_vram_gb": req.min_vram_gb if req else 0,
+                    "recommended_vram_gb": req.recommended_vram_gb if req else 0,
                 },
             }
         )
+    return models
+
 
-    # ── 4. Determine best default ──
-    # Pick the largest capable model (iterate from largest to smallest)
-    default_model = GemmaModel.GEMMA_4_26B_A4B.value
+def _pick_default_model(models: list[dict[str, Any]]) -> str:
+    """Pick the largest capable model (iterate from largest to smallest)."""
     for m in reversed(models):
-        if m["status"] in ("recommended", "capable") and not m["local_only"]:
-            default_model = str(m["id"])
-            break
-
-    # Pick the first available backend (prefer local over cloud)
-    default_backend = "google_ai_studio"
-    backend_priority = ["lm_studio", "ollama", "google_ai_studio", "openai_compat"]
-    for b_id in backend_priority:
-        for b in backends_result:
+        if m["status"] in {"recommended", "capable"} and not m["local_only"]:
+            return str(m["id"])
+    return GemmaModel.GEMMA_4_26B_A4B.value
+
+
+def _pick_default_backend(backends: list[dict[str, Any]]) -> str:
+    """Pick the first available backend, preferring local over cloud."""
+    priority = ["lm_studio", "ollama", "google_ai_studio", "openai_compat"]
+    for b_id in priority:
+        for b in backends:
             if b["id"] == b_id and b.get("available"):
-                default_backend = b_id
-                break
-        else:
-            continue
-        break
+                return b_id
+    return "google_ai_studio"
+
+
+@mcp.tool()
+def sqlseed_list_gemma_models() -> dict[str, Any]:
+    """List Gemma 4 models with hardware compatibility and backend availability.
+
+    Dynamically detects the current hardware environment (RAM, GPU/VRAM)
+    and checks which LLM backends are reachable. Returns models annotated
+    with compatibility status and backends annotated with availability.
+    """
+    if not _AI_AVAILABLE:
+        return {
+            "models": [],
+            "backends": [
+                {"id": bid, "description": desc, "available": False} for bid, desc in _BACKEND_DESCRIPTIONS.items()
+            ],
+            "hardware": {},
+            "error": "sqlseed-ai plugin not installed. Install with: pip install sqlseed-ai",
+        }
+
+    # ── 1. Detect hardware ──
+    hw = detect_hardware()
+
+    # ── 2. Check backend availability ──
+    ai_config = AIConfig.from_env()
+    backends_result = _build_backends(ai_config)
+
+    # ── 3. Build model list with compatibility status ──
+    models = _build_models(hw)
+
+    # ── 4. Determine best defaults ──
+    default_model = _pick_default_model(models)
+    default_backend = _pick_default_backend(backends_result)
 
     return {
         "models": models,
 
@@ -13,7 +13,7 @@
 import platform
 import subprocess
 import time
-from typing import Any
+from typing import Any, NamedTuple
 
 from sqlseed._utils.logger import get_logger
 
@@ -67,11 +67,11 @@ class MEMORYSTATUSEX(ctypes.Structure):
 def _get_ram_linux() -> tuple[float, float] | None:
     """Get RAM from /proc/meminfo. Returns (total_gb, available_gb)."""
     try:
-        with open("/proc/meminfo") as f:
+        with open("/proc/meminfo", encoding="utf-8") as f:
             info: dict[str, int] = {}
             for line in f:
                 parts = line.split()
-                if parts[0] in ("MemTotal:", "MemAvailable:"):
+                if parts[0] in {"MemTotal:", "MemAvailable:"}:
                     info[parts[0].rstrip(":")] = int(parts[1])  # in kB
             total = info.get("MemTotal", 0) / (1024**2)
             avail = info.get("MemAvailable", 0) / (1024**2)
@@ -214,7 +214,7 @@ def _detect_gpu_macos() -> list[dict[str, Any]]:
                 }
             )
         return gpus
-    except (FileNotFoundError, json.JSONDecodeError, subprocess.TimeoutExpired, ValueError):
+    except (FileNotFoundError, ValueError, subprocess.TimeoutExpired):
         return []
 
 
@@ -275,32 +275,22 @@ def detect_hardware() -> dict[str, Any]:
 # ── Model requirements ───────────────────────────────────────────────
 
 # Approximate requirements for Gemma 4 (Q4_K_M quantized for local inference)
-MODEL_REQUIREMENTS: dict[str, dict[str, Any]] = {
-    "gemma-4-e2b-it": {
-        "min_ram_gb": 4,
-        "min_vram_gb": 2,
-        "recommended_vram_gb": 4,
-    },
-    "gemma-4-e4b-it": {
-        "min_ram_gb": 6,
-        "min_vram_gb": 3,
-        "recommended_vram_gb": 6,
-    },
-    "gemma-4-12b-it": {
-        "min_ram_gb": 12,
-        "min_vram_gb": 8,
-        "recommended_vram_gb": 10,
-    },
-    "gemma-4-26b-a4b-it": {
-        "min_ram_gb": 16,
-        "min_vram_gb": 14,
-        "recommended_vram_gb": 16,
-    },
-    "gemma-4-31b-it": {
-        "min_ram_gb": 24,
-        "min_vram_gb": 18,
-        "recommended_vram_gb": 24,
-    },
+
+
+class ModelRequirement(NamedTuple):
+    """Hardware requirements for a local model variant."""
+
+    min_ram_gb: int
+    min_vram_gb: int
+    recommended_vram_gb: int
+
+
+MODEL_REQUIREMENTS: dict[str, ModelRequirement] = {
+    "gemma-4-e2b-it": ModelRequirement(min_ram_gb=4, min_vram_gb=2, recommended_vram_gb=4),
+    "gemma-4-e4b-it": ModelRequirement(min_ram_gb=6, min_vram_gb=3, recommended_vram_gb=6),
+    "gemma-4-12b-it": ModelRequirement(min_ram_gb=12, min_vram_gb=8, recommended_vram_gb=10),
+    "gemma-4-26b-a4b-it": ModelRequirement(min_ram_gb=16, min_vram_gb=14, recommended_vram_gb=16),
+    "gemma-4-31b-it": ModelRequirement(min_ram_gb=24, min_vram_gb=18, recommended_vram_gb=24),
 }
 
 
@@ -325,12 +315,12 @@ def evaluate_model_status(
     max_vram = hw.get("max_vram_gb", 0)
     total_ram = hw.get("ram", {}).get("total_gb", 0)
 
-    if max_vram >= req["recommended_vram_gb"]:
+    if max_vram >= req.recommended_vram_gb:
         return "recommended"
-    if max_vram >= req["min_vram_gb"]:
+    if max_vram >= req.min_vram_gb:
         return "capable"
-    if total_ram >= req["min_ram_gb"] and max_vram == 0:
+    if total_ram >= req.min_ram_gb and max_vram == 0:
         return "cpu_only"
-    if total_ram >= req["min_ram_gb"]:
+    if total_ram >= req.min_ram_gb:
         return "capable_slow"
     return "insufficient"