@@ -328,142 +328,161 @@ def sqlseed_gemma4_agent_fill(
328328 }
329329
330330
331- @mcp .tool ()
332- def sqlseed_list_gemma_models () -> dict [str , Any ]:
333- """List Gemma 4 models with hardware compatibility and backend availability.
331+ _BACKEND_DESCRIPTIONS : dict [str , str ] = {
332+ "google_ai_studio" : "Google AI Studio API (free tier available, recommended)" ,
333+ "lm_studio" : "LM Studio local deployment (http://127.0.0.1:1234, GUI-based)" ,
334+ "ollama" : "Ollama local deployment (offline, CLI-based)" ,
335+ "openai_compat" : "Any OpenAI-compatible API endpoint" ,
336+ }
337+
338+ _LOCAL_BACKEND_URLS : dict [str , str ] = {
339+ "lm_studio" : "http://127.0.0.1:1234/v1/models" ,
340+ "ollama" : "http://localhost:11434/v1/models" ,
341+ }
342+
343+ _STATUS_ICONS : dict [str , str ] = {
344+ "recommended" : "recommended" ,
345+ "capable" : "capable (meets minimum specs)" ,
346+ "capable_slow" : "capable but likely slow (VRAM < minimum, will use RAM offloading)" ,
347+ "cpu_only" : "CPU-only inference (no GPU detected)" ,
348+ "insufficient" : "insufficient hardware" ,
349+ "cloud_only" : "cloud API only" ,
350+ }
351+
352+
353+ def _check_local_backend (backend_id : str , url : str ) -> dict [str , Any ]:
354+ """Check reachability and loaded models for a local LLM backend."""
355+ reachable = False
356+ loaded : list [str ] = []
357+ try :
358+ req = urllib .request .Request (url )
359+ with urllib .request .urlopen (req , timeout = 3 ) as resp :
360+ data = json .loads (resp .read ().decode ())
361+ loaded = [m .get ("id" , "unknown" ) for m in data .get ("data" , []) if m .get ("id" )]
362+ reachable = True
363+ except (OSError , ValueError ):
364+ pass
365+
366+ if reachable and loaded :
367+ reason = f"{ len (loaded )} model(s) loaded"
368+ elif reachable :
369+ reason = "Service running, no models loaded"
370+ else :
371+ reason = "Service not running"
334372
335- Dynamically detects the current hardware environment (RAM, GPU/VRAM)
336- and checks which LLM backends are reachable. Returns models annotated
337- with compatibility status and backends annotated with availability.
338- """
339- backend_descriptions : dict [str , str ] = {
340- "google_ai_studio" : "Google AI Studio API (free tier available, recommended)" ,
341- "lm_studio" : "LM Studio local deployment (http://127.0.0.1:1234, GUI-based)" ,
342- "ollama" : "Ollama local deployment (offline, CLI-based)" ,
343- "openai_compat" : "Any OpenAI-compatible API endpoint" ,
373+ return {
374+ "id" : backend_id ,
375+ "description" : _BACKEND_DESCRIPTIONS [backend_id ],
376+ "available" : reachable and bool (loaded ),
377+ "reachable" : reachable ,
378+ "loaded_models" : loaded ,
379+ "reason" : reason ,
344380 }
345381
346- if not _AI_AVAILABLE :
347- return {
348- "models" : [],
349- "backends" : [
350- {"id" : bid , "description" : desc , "available" : False } for bid , desc in backend_descriptions .items ()
351- ],
352- "hardware" : {},
353- "error" : "sqlseed-ai plugin not installed. Install with: pip install sqlseed-ai" ,
354- }
355-
356- # ── 1. Detect hardware ──
357- hw = detect_hardware ()
358382
359- # ── 2. Check backend availability ──
360- ai_config = AIConfig . from_env ()
361- backends_result = []
383+ def _build_backends ( ai_config : Any ) -> list [ dict [ str , Any ]]:
384+ """Build the list of backend availability info."""
385+ backends : list [ dict [ str , Any ]] = []
362386
363387 # Google AI Studio: check API key
364388 has_api_key = ai_config .has_real_api_key
365- backends_result .append (
389+ backends .append (
366390 {
367391 "id" : "google_ai_studio" ,
368- "description" : backend_descriptions ["google_ai_studio" ],
392+ "description" : _BACKEND_DESCRIPTIONS ["google_ai_studio" ],
369393 "available" : has_api_key ,
370394 "reason" : "API key configured" if has_api_key else "No API key (set GOOGLE_API_KEY or SQLSEED_AI_API_KEY)" ,
371395 }
372396 )
373397
374398 # LM Studio / Ollama: check service reachability + loaded models
375- local_urls : dict [str , str ] = {
376- "lm_studio" : "http://127.0.0.1:1234/v1/models" ,
377- "ollama" : "http://localhost:11434/v1/models" ,
378- }
379- for backend_id , url in local_urls .items ():
380- reachable = False
381- loaded : list [str ] = []
382- try :
383- req = urllib .request .Request (url )
384- with urllib .request .urlopen (req , timeout = 3 ) as resp :
385- data = json .loads (resp .read ().decode ())
386- loaded = [m .get ("id" , "unknown" ) for m in data .get ("data" , []) if m .get ("id" )]
387- reachable = True
388- except (OSError , ValueError ):
389- pass
390-
391- if reachable and loaded :
392- reason = f"{ len (loaded )} model(s) loaded"
393- elif reachable :
394- reason = "Service running, no models loaded"
395- else :
396- reason = "Service not running"
397-
398- backends_result .append (
399- {
400- "id" : backend_id ,
401- "description" : backend_descriptions [backend_id ],
402- "available" : reachable and bool (loaded ),
403- "reachable" : reachable ,
404- "loaded_models" : loaded ,
405- "reason" : reason ,
406- }
407- )
399+ for backend_id , url in _LOCAL_BACKEND_URLS .items ():
400+ backends .append (_check_local_backend (backend_id , url ))
408401
409402 # OpenAI-compatible: informational only
410- backends_result .append (
403+ backends .append (
411404 {
412405 "id" : "openai_compat" ,
413- "description" : backend_descriptions ["openai_compat" ],
406+ "description" : _BACKEND_DESCRIPTIONS ["openai_compat" ],
414407 "available" : False ,
415408 "reason" : "Requires explicit base_url configuration" ,
416409 }
417410 )
411+ return backends
418412
419- # ── 3. Build model list with compatibility status ──
420- status_icons : dict [str , str ] = {
421- "recommended" : "recommended" ,
422- "capable" : "capable (meets minimum specs)" ,
423- "capable_slow" : "capable but likely slow (VRAM < minimum, will use RAM offloading)" ,
424- "cpu_only" : "CPU-only inference (no GPU detected)" ,
425- "insufficient" : "insufficient hardware" ,
426- "cloud_only" : "cloud API only" ,
427- }
428413
414+ def _build_models (hw : dict [str , Any ]) -> list [dict [str , Any ]]:
415+ """Build the list of Gemma models with hardware compatibility status."""
429416 models = []
430417 for member in GemmaModel :
431418 status = evaluate_model_status (member .value , hw )
432- model_req = MODEL_REQUIREMENTS .get (member .value , {} )
419+ req = MODEL_REQUIREMENTS .get (member .value )
433420 models .append (
434421 {
435422 "id" : member .value ,
436423 "display_name" : member .display_name ,
437424 "status" : status ,
438- "status_description" : status_icons .get (status , status ),
425+ "status_description" : _STATUS_ICONS .get (status , status ),
439426 "local_only" : member .is_local_only ,
440427 "requirements" : {
441- "min_ram_gb" : model_req . get ( " min_ram_gb" , 0 ) ,
442- "min_vram_gb" : model_req . get ( " min_vram_gb" , 0 ) ,
443- "recommended_vram_gb" : model_req . get ( " recommended_vram_gb" , 0 ) ,
428+ "min_ram_gb" : req . min_ram_gb if req else 0 ,
429+ "min_vram_gb" : req . min_vram_gb if req else 0 ,
430+ "recommended_vram_gb" : req . recommended_vram_gb if req else 0 ,
444431 },
445432 }
446433 )
434+ return models
435+
447436
448- # ── 4. Determine best default ──
449- # Pick the largest capable model (iterate from largest to smallest)
450- default_model = GemmaModel .GEMMA_4_26B_A4B .value
437+ def _pick_default_model (models : list [dict [str , Any ]]) -> str :
438+ """Pick the largest capable model (iterate from largest to smallest)."""
451439 for m in reversed (models ):
452- if m ["status" ] in ("recommended" , "capable" ) and not m ["local_only" ]:
453- default_model = str (m ["id" ])
454- break
455-
456- # Pick the first available backend (prefer local over cloud)
457- default_backend = "google_ai_studio"
458- backend_priority = ["lm_studio" , "ollama" , "google_ai_studio" , "openai_compat" ]
459- for b_id in backend_priority :
460- for b in backends_result :
440+ if m ["status" ] in {"recommended" , "capable" } and not m ["local_only" ]:
441+ return str (m ["id" ])
442+ return GemmaModel .GEMMA_4_26B_A4B .value
443+
444+
445+ def _pick_default_backend (backends : list [dict [str , Any ]]) -> str :
446+ """Pick the first available backend, preferring local over cloud."""
447+ priority = ["lm_studio" , "ollama" , "google_ai_studio" , "openai_compat" ]
448+ for b_id in priority :
449+ for b in backends :
461450 if b ["id" ] == b_id and b .get ("available" ):
462- default_backend = b_id
463- break
464- else :
465- continue
466- break
451+ return b_id
452+ return "google_ai_studio"
453+
454+
455+ @mcp .tool ()
456+ def sqlseed_list_gemma_models () -> dict [str , Any ]:
457+ """List Gemma 4 models with hardware compatibility and backend availability.
458+
459+ Dynamically detects the current hardware environment (RAM, GPU/VRAM)
460+ and checks which LLM backends are reachable. Returns models annotated
461+ with compatibility status and backends annotated with availability.
462+ """
463+ if not _AI_AVAILABLE :
464+ return {
465+ "models" : [],
466+ "backends" : [
467+ {"id" : bid , "description" : desc , "available" : False } for bid , desc in _BACKEND_DESCRIPTIONS .items ()
468+ ],
469+ "hardware" : {},
470+ "error" : "sqlseed-ai plugin not installed. Install with: pip install sqlseed-ai" ,
471+ }
472+
473+ # ── 1. Detect hardware ──
474+ hw = detect_hardware ()
475+
476+ # ── 2. Check backend availability ──
477+ ai_config = AIConfig .from_env ()
478+ backends_result = _build_backends (ai_config )
479+
480+ # ── 3. Build model list with compatibility status ──
481+ models = _build_models (hw )
482+
483+ # ── 4. Determine best defaults ──
484+ default_model = _pick_default_model (models )
485+ default_backend = _pick_default_backend (backends_result )
467486
468487 return {
469488 "models" : models ,
0 commit comments