@@ -2592,6 +2592,242 @@ def auth_check(project_dir: str) -> None:
25922592main .add_command (auth )
25932593
25942594
2595+ # ---------------------------------------------------------------------------
2596+ # Ollama — local LLM model management
2597+ # ---------------------------------------------------------------------------
2598+
2599+
2600+ @main .group (name = "ollama" )
2601+ def ollama_group () -> None :
2602+ """Manage local Ollama models (list, download, GPU detection)."""
2603+
2604+
2605+ @ollama_group .command (name = "list" )
2606+ def ollama_list () -> None :
2607+ """List locally installed Ollama models."""
2608+ from specsmith .ollama_cmds import get_installed_models , is_running
2609+
2610+ if not is_running ():
2611+ console .print ("[red]✗[/red] Ollama is not running. Start it with: [bold]ollama serve[/bold]" )
2612+ raise SystemExit (1 )
2613+
2614+ models = get_installed_models ()
2615+ if not models :
2616+ console .print ("[yellow]No models installed.[/yellow] Run: [bold]specsmith ollama available[/bold]" )
2617+ return
2618+
2619+ console .print (f"[bold]Installed Ollama Models[/bold] ({ len (models )} )\n " )
2620+ for m in models :
2621+ size_gb = m .get ("size" , 0 ) / (1024 ** 3 )
2622+ modified = m .get ("modified_at" , "" )[:10 ]
2623+ console .print (f" [green]✓[/green] { m ['name' ]:<35s} { size_gb :.1f} GB [{ modified } ]" )
2624+
2625+
2626+ @ollama_group .command (name = "available" )
2627+ def ollama_available () -> None :
2628+ """Show recommended models vs installed. GPU-aware."""
2629+ from specsmith .ollama_cmds import (
2630+ MODEL_CATALOG ,
2631+ detect_gpu ,
2632+ get_installed_ids ,
2633+ gpu_tier ,
2634+ is_running ,
2635+ )
2636+
2637+ gpus = detect_gpu ()
2638+ vram = max ((g ["vram_gb" ] for g in gpus ), default = 0 )
2639+ tier = gpu_tier (vram )
2640+ budget = vram * 0.90 if vram else 999
2641+
2642+ if gpus :
2643+ console .print (f"[bold]GPU:[/bold] { gpus [0 ]['name' ]} — { vram :.1f} GB VRAM ({ tier } )\n " )
2644+ else :
2645+ console .print ("[yellow]No GPU detected — CPU mode (small models only)[/yellow]\n " )
2646+
2647+ running = is_running ()
2648+ installed_ids : list [str ] = []
2649+ if running :
2650+ installed_ids = get_installed_ids ()
2651+ else :
2652+ console .print ("[dim]Ollama not running — showing catalog only[/dim]\n " )
2653+
2654+ console .print (
2655+ f"{ 'Model' :<35s} { 'VRAM' :<8s} { 'Size' :<8s} { 'Fits?' :<7s} { 'Status' :<12s} Best for"
2656+ )
2657+ console .print ("-" * 100 )
2658+ for m in MODEL_CATALOG :
2659+ fits = m ["vram_gb" ] <= budget or not gpus
2660+ is_installed = any (m ["id" ] in iid or iid in m ["id" ] for iid in installed_ids )
2661+ fits_str = "[green]✓[/green]" if fits else "[dim]✓ CPU?[/dim]" if not gpus else "[red]✗ VRAM[/red]"
2662+ status = "[green]✓ Installed[/green]" if is_installed else "[dim]⬇ Available[/dim]"
2663+ best = ", " .join (m ["best_for" ][:2 ])
2664+ console .print (
2665+ f" { m ['name' ]:<33s} { m ['vram_gb' ]:<8.1f} { m ['size_gb' ]:<8.1f} "
2666+ f"{ fits_str :<12s} { status :<20s} { best } "
2667+ )
2668+
2669+ console .print ("\n Run [bold]specsmith ollama pull <model-id>[/bold] to download" )
2670+ console .print (" e.g. [bold]specsmith ollama pull qwen2.5:14b[/bold]" )
2671+
2672+
2673+ @ollama_group .command (name = "gpu" )
2674+ def ollama_gpu () -> None :
2675+ """Show GPU information and model tier recommendations."""
2676+ from specsmith .ollama_cmds import detect_gpu , gpu_tier , recommend_models
2677+
2678+ gpus = detect_gpu ()
2679+ if not gpus :
2680+ console .print ("[yellow]No dedicated GPU detected.[/yellow]" )
2681+ console .print (" Ollama can run on CPU but will be slow." )
2682+ console .print (" Recommended: llama3.2:latest (2GB RAM only)" )
2683+ return
2684+
2685+ for gpu in gpus :
2686+ vram = gpu ["vram_gb" ]
2687+ tier = gpu_tier (vram )
2688+ console .print (f"[bold]GPU:[/bold] { gpu ['name' ]} " )
2689+ console .print (f" VRAM: { vram :.1f} GB" )
2690+ console .print (f" Tier: { tier } " )
2691+ console .print ()
2692+
2693+ recs = recommend_models (vram )
2694+ console .print (f" [bold]Recommended models for { vram :.1f} GB VRAM:[/bold]" )
2695+ for m in recs :
2696+ console .print (
2697+ f" [green]✓[/green] { m ['name' ]:<35s} "
2698+ f"{ m ['vram_gb' ]:.1f} GB — { ', ' .join (m ['best_for' ][:2 ])} "
2699+ )
2700+ console .print ()
2701+ console .print (" Run [bold]specsmith ollama pull <model-id>[/bold] to download." )
2702+
2703+
2704+ @ollama_group .command (name = "pull" )
2705+ @click .argument ("model" )
2706+ def ollama_pull (model : str ) -> None :
2707+ """Download a model from Ollama library.
2708+
2709+ MODEL: model id, e.g. qwen2.5:14b, phi4:latest
2710+
2711+ Press Ctrl-C to cancel the download.
2712+ """
2713+ import signal
2714+ import sys
2715+ import urllib .error
2716+
2717+ from specsmith .ollama_cmds import MODEL_CATALOG , is_running
2718+
2719+ if not is_running ():
2720+ console .print ("[red]✗[/red] Ollama is not running. Start it first: [bold]ollama serve[/bold]" )
2721+ raise SystemExit (1 )
2722+
2723+ # Show model info if in catalog
2724+ info = next ((m for m in MODEL_CATALOG if m ["id" ] == model ), None )
2725+ if info :
2726+ console .print (
2727+ f"[bold]Pulling[/bold] { info ['name' ]} ({ info ['size_gb' ]:.1f} GB)\n "
2728+ f" Best for: { ', ' .join (info ['best_for' ])} \n "
2729+ f" Notes: { info ['notes' ]} \n "
2730+ )
2731+ else :
2732+ console .print (f"[bold]Pulling[/bold] { model } from Ollama library\n " )
2733+
2734+ console .print ("[dim]Press Ctrl-C to cancel[/dim]\n " )
2735+
2736+ # Stream pull progress from Ollama
2737+ import json
2738+ import urllib .request
2739+ from specsmith .ollama_cmds import OLLAMA_API
2740+
2741+ payload = json .dumps ({"name" : model }).encode ()
2742+ req = urllib .request .Request ( # noqa: S310
2743+ f"{ OLLAMA_API } /api/pull" ,
2744+ data = payload ,
2745+ headers = {"Content-Type" : "application/json" },
2746+ )
2747+
2748+ last_status = ""
2749+ try :
2750+ with urllib .request .urlopen (req , timeout = 600 ) as resp : # noqa: S310
2751+ for line in resp :
2752+ line = line .strip ()
2753+ if not line :
2754+ continue
2755+ try :
2756+ chunk = json .loads (line )
2757+ status = chunk .get ("status" , "" )
2758+ completed = chunk .get ("completed" , 0 )
2759+ total = chunk .get ("total" , 0 )
2760+ if status == "success" :
2761+ console .print (f"\n [bold green]✓ { model } downloaded successfully.[/bold green]" )
2762+ return
2763+ if total and status == "pulling " + (chunk .get ("digest" , "" )[:12 ] if chunk .get ("digest" ) else "" ):
2764+ pct = int (completed / total * 100 )
2765+ bar = "█" * (pct // 5 ) + "░" * (20 - pct // 5 )
2766+ sys .stdout .write (f"\r [{ bar } ] { pct :3d} % { completed / (1024 ** 3 ):.2f} /{ total / (1024 ** 3 ):.2f} GB" )
2767+ sys .stdout .flush ()
2768+ elif status != last_status :
2769+ console .print (f" { status } " )
2770+ last_status = status
2771+ except json .JSONDecodeError :
2772+ pass
2773+ except KeyboardInterrupt :
2774+ console .print ("\n [yellow]Download cancelled.[/yellow]" )
2775+ raise SystemExit (0 )
2776+ except urllib .error .URLError as e :
2777+ console .print (f"\n [red]Error: { e } [/red]" )
2778+ raise SystemExit (1 )
2779+
2780+
2781+ @ollama_group .command (name = "suggest" )
2782+ @click .argument ("task" , required = False )
2783+ def ollama_suggest (task : str | None ) -> None :
2784+ """Suggest models for a given task type.
2785+
2786+ TASK: coding | requirements | architecture | chat | analysis | reasoning
2787+
2788+ Example: specsmith ollama suggest coding
2789+ """
2790+ from specsmith .ollama_cmds import (
2791+ TASK_TAGS ,
2792+ detect_gpu ,
2793+ get_installed_ids ,
2794+ is_running ,
2795+ suggest_for_task ,
2796+ )
2797+
2798+ if not task :
2799+ console .print ("[bold]Available task types:[/bold]" )
2800+ for key in TASK_TAGS :
2801+ console .print (f" { key } " )
2802+ console .print ("\n Usage: [bold]specsmith ollama suggest <task>[/bold]" )
2803+ return
2804+
2805+ gpus = detect_gpu ()
2806+ vram = max ((g ["vram_gb" ] for g in gpus ), default = 999 )
2807+ suggestions = suggest_for_task (task , vram )
2808+
2809+ if not suggestions :
2810+ console .print (f"[yellow]No models found for task '{ task } '.[/yellow]" )
2811+ return
2812+
2813+ installed_ids : list [str ] = []
2814+ if is_running ():
2815+ installed_ids = get_installed_ids ()
2816+
2817+ console .print (f"[bold]Model suggestions for task: { task } [/bold]\n " )
2818+ for m in suggestions :
2819+ is_installed = any (m ["id" ] in iid or iid in m ["id" ] for iid in installed_ids )
2820+ status = "[green]✓ installed[/green]" if is_installed else f"[dim]⬇ { m ['size_gb' ]:.1f} GB[/dim]"
2821+ console .print (
2822+ f" { status } [bold]{ m ['name' ]:<35s} [/bold] { m ['notes' ]} "
2823+ )
2824+ console .print (f" [dim]ID: { m ['id' ]} VRAM: { m ['vram_gb' ]:.1f} GB ctx: { m ['ctx_k' ]} K[/dim]" )
2825+ console .print ("\n Download: [bold]specsmith ollama pull <id>[/bold]" )
2826+
2827+
2828+ main .add_command (ollama_group )
2829+
2830+
25952831# ---------------------------------------------------------------------------
25962832# Workspace — multi-project management (#17)
25972833# ---------------------------------------------------------------------------
0 commit comments