Skip to content

Commit e8718ab

Browse files
tbitcsoz-agent
andcommitted
feat: specsmith ollama command group + GPU detection
New module: src/specsmith/ollama_cmds.py - detect_gpu(): nvidia-smi primary, Windows WMI fallback (AMD/Intel) - MODEL_CATALOG: 9 curated models with VRAM, size, best-for, tier, ctx - get_installed_models/ids(): calls Ollama /api/tags - recommend_models(vram_gb): filter by 90% VRAM budget - suggest_for_task(task, vram_gb): scored model suggestions by task type New CLI: specsmith ollama <command> - list: table of installed models with size + date - available: GPU-aware table of catalog vs installed (✓/⬇) - gpu: detect GPU, show VRAM tier, list recommended models - pull <model>: streaming download with progress bar, Ctrl-C cancels - suggest <task>: ordered suggestions for coding/requirements/architecture/chat/analysis/reasoning Co-Authored-By: Oz <oz-agent@warp.dev>
1 parent 4fc992f commit e8718ab

2 files changed

Lines changed: 506 additions & 0 deletions

File tree

src/specsmith/cli.py

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2592,6 +2592,242 @@ def auth_check(project_dir: str) -> None:
25922592
main.add_command(auth)
25932593

25942594

2595+
# ---------------------------------------------------------------------------
2596+
# Ollama — local LLM model management
2597+
# ---------------------------------------------------------------------------
2598+
2599+
2600+
@main.group(name="ollama")
2601+
def ollama_group() -> None:
2602+
"""Manage local Ollama models (list, download, GPU detection)."""
2603+
2604+
2605+
@ollama_group.command(name="list")
2606+
def ollama_list() -> None:
2607+
"""List locally installed Ollama models."""
2608+
from specsmith.ollama_cmds import get_installed_models, is_running
2609+
2610+
if not is_running():
2611+
console.print("[red]✗[/red] Ollama is not running. Start it with: [bold]ollama serve[/bold]")
2612+
raise SystemExit(1)
2613+
2614+
models = get_installed_models()
2615+
if not models:
2616+
console.print("[yellow]No models installed.[/yellow] Run: [bold]specsmith ollama available[/bold]")
2617+
return
2618+
2619+
console.print(f"[bold]Installed Ollama Models[/bold] ({len(models)})\n")
2620+
for m in models:
2621+
size_gb = m.get("size", 0) / (1024**3)
2622+
modified = m.get("modified_at", "")[:10]
2623+
console.print(f" [green]✓[/green] {m['name']:<35s} {size_gb:.1f}GB [{modified}]")
2624+
2625+
2626+
@ollama_group.command(name="available")
2627+
def ollama_available() -> None:
2628+
"""Show recommended models vs installed. GPU-aware."""
2629+
from specsmith.ollama_cmds import (
2630+
MODEL_CATALOG,
2631+
detect_gpu,
2632+
get_installed_ids,
2633+
gpu_tier,
2634+
is_running,
2635+
)
2636+
2637+
gpus = detect_gpu()
2638+
vram = max((g["vram_gb"] for g in gpus), default=0)
2639+
tier = gpu_tier(vram)
2640+
budget = vram * 0.90 if vram else 999
2641+
2642+
if gpus:
2643+
console.print(f"[bold]GPU:[/bold] {gpus[0]['name']}{vram:.1f}GB VRAM ({tier})\n")
2644+
else:
2645+
console.print("[yellow]No GPU detected — CPU mode (small models only)[/yellow]\n")
2646+
2647+
running = is_running()
2648+
installed_ids: list[str] = []
2649+
if running:
2650+
installed_ids = get_installed_ids()
2651+
else:
2652+
console.print("[dim]Ollama not running — showing catalog only[/dim]\n")
2653+
2654+
console.print(
2655+
f"{'Model':<35s} {'VRAM':<8s} {'Size':<8s} {'Fits?':<7s} {'Status':<12s} Best for"
2656+
)
2657+
console.print("-" * 100)
2658+
for m in MODEL_CATALOG:
2659+
fits = m["vram_gb"] <= budget or not gpus
2660+
is_installed = any(m["id"] in iid or iid in m["id"] for iid in installed_ids)
2661+
fits_str = "[green]✓[/green]" if fits else "[dim]✓ CPU?[/dim]" if not gpus else "[red]✗ VRAM[/red]"
2662+
status = "[green]✓ Installed[/green]" if is_installed else "[dim]⬇ Available[/dim]"
2663+
best = ", ".join(m["best_for"][:2])
2664+
console.print(
2665+
f" {m['name']:<33s} {m['vram_gb']:<8.1f} {m['size_gb']:<8.1f} "
2666+
f"{fits_str:<12s} {status:<20s} {best}"
2667+
)
2668+
2669+
console.print("\n Run [bold]specsmith ollama pull <model-id>[/bold] to download")
2670+
console.print(" e.g. [bold]specsmith ollama pull qwen2.5:14b[/bold]")
2671+
2672+
2673+
@ollama_group.command(name="gpu")
2674+
def ollama_gpu() -> None:
2675+
"""Show GPU information and model tier recommendations."""
2676+
from specsmith.ollama_cmds import detect_gpu, gpu_tier, recommend_models
2677+
2678+
gpus = detect_gpu()
2679+
if not gpus:
2680+
console.print("[yellow]No dedicated GPU detected.[/yellow]")
2681+
console.print(" Ollama can run on CPU but will be slow.")
2682+
console.print(" Recommended: llama3.2:latest (2GB RAM only)")
2683+
return
2684+
2685+
for gpu in gpus:
2686+
vram = gpu["vram_gb"]
2687+
tier = gpu_tier(vram)
2688+
console.print(f"[bold]GPU:[/bold] {gpu['name']}")
2689+
console.print(f" VRAM: {vram:.1f} GB")
2690+
console.print(f" Tier: {tier}")
2691+
console.print()
2692+
2693+
recs = recommend_models(vram)
2694+
console.print(f" [bold]Recommended models for {vram:.1f}GB VRAM:[/bold]")
2695+
for m in recs:
2696+
console.print(
2697+
f" [green]✓[/green] {m['name']:<35s} "
2698+
f"{m['vram_gb']:.1f}GB — {', '.join(m['best_for'][:2])}"
2699+
)
2700+
console.print()
2701+
console.print(" Run [bold]specsmith ollama pull <model-id>[/bold] to download.")
2702+
2703+
2704+
@ollama_group.command(name="pull")
2705+
@click.argument("model")
2706+
def ollama_pull(model: str) -> None:
2707+
"""Download a model from Ollama library.
2708+
2709+
MODEL: model id, e.g. qwen2.5:14b, phi4:latest
2710+
2711+
Press Ctrl-C to cancel the download.
2712+
"""
2713+
import signal
2714+
import sys
2715+
import urllib.error
2716+
2717+
from specsmith.ollama_cmds import MODEL_CATALOG, is_running
2718+
2719+
if not is_running():
2720+
console.print("[red]✗[/red] Ollama is not running. Start it first: [bold]ollama serve[/bold]")
2721+
raise SystemExit(1)
2722+
2723+
# Show model info if in catalog
2724+
info = next((m for m in MODEL_CATALOG if m["id"] == model), None)
2725+
if info:
2726+
console.print(
2727+
f"[bold]Pulling[/bold] {info['name']} ({info['size_gb']:.1f} GB)\n"
2728+
f" Best for: {', '.join(info['best_for'])}\n"
2729+
f" Notes: {info['notes']}\n"
2730+
)
2731+
else:
2732+
console.print(f"[bold]Pulling[/bold] {model} from Ollama library\n")
2733+
2734+
console.print("[dim]Press Ctrl-C to cancel[/dim]\n")
2735+
2736+
# Stream pull progress from Ollama
2737+
import json
2738+
import urllib.request
2739+
from specsmith.ollama_cmds import OLLAMA_API
2740+
2741+
payload = json.dumps({"name": model}).encode()
2742+
req = urllib.request.Request( # noqa: S310
2743+
f"{OLLAMA_API}/api/pull",
2744+
data=payload,
2745+
headers={"Content-Type": "application/json"},
2746+
)
2747+
2748+
last_status = ""
2749+
try:
2750+
with urllib.request.urlopen(req, timeout=600) as resp: # noqa: S310
2751+
for line in resp:
2752+
line = line.strip()
2753+
if not line:
2754+
continue
2755+
try:
2756+
chunk = json.loads(line)
2757+
status = chunk.get("status", "")
2758+
completed = chunk.get("completed", 0)
2759+
total = chunk.get("total", 0)
2760+
if status == "success":
2761+
console.print(f"\n[bold green]✓ {model} downloaded successfully.[/bold green]")
2762+
return
2763+
if total and status == "pulling " + (chunk.get("digest", "")[:12] if chunk.get("digest") else ""):
2764+
pct = int(completed / total * 100)
2765+
bar = "█" * (pct // 5) + "░" * (20 - pct // 5)
2766+
sys.stdout.write(f"\r [{bar}] {pct:3d}% {completed/(1024**3):.2f}/{total/(1024**3):.2f} GB")
2767+
sys.stdout.flush()
2768+
elif status != last_status:
2769+
console.print(f" {status}")
2770+
last_status = status
2771+
except json.JSONDecodeError:
2772+
pass
2773+
except KeyboardInterrupt:
2774+
console.print("\n[yellow]Download cancelled.[/yellow]")
2775+
raise SystemExit(0)
2776+
except urllib.error.URLError as e:
2777+
console.print(f"\n[red]Error: {e}[/red]")
2778+
raise SystemExit(1)
2779+
2780+
2781+
@ollama_group.command(name="suggest")
2782+
@click.argument("task", required=False)
2783+
def ollama_suggest(task: str | None) -> None:
2784+
"""Suggest models for a given task type.
2785+
2786+
TASK: coding | requirements | architecture | chat | analysis | reasoning
2787+
2788+
Example: specsmith ollama suggest coding
2789+
"""
2790+
from specsmith.ollama_cmds import (
2791+
TASK_TAGS,
2792+
detect_gpu,
2793+
get_installed_ids,
2794+
is_running,
2795+
suggest_for_task,
2796+
)
2797+
2798+
if not task:
2799+
console.print("[bold]Available task types:[/bold]")
2800+
for key in TASK_TAGS:
2801+
console.print(f" {key}")
2802+
console.print("\nUsage: [bold]specsmith ollama suggest <task>[/bold]")
2803+
return
2804+
2805+
gpus = detect_gpu()
2806+
vram = max((g["vram_gb"] for g in gpus), default=999)
2807+
suggestions = suggest_for_task(task, vram)
2808+
2809+
if not suggestions:
2810+
console.print(f"[yellow]No models found for task '{task}'.[/yellow]")
2811+
return
2812+
2813+
installed_ids: list[str] = []
2814+
if is_running():
2815+
installed_ids = get_installed_ids()
2816+
2817+
console.print(f"[bold]Model suggestions for task: {task}[/bold]\n")
2818+
for m in suggestions:
2819+
is_installed = any(m["id"] in iid or iid in m["id"] for iid in installed_ids)
2820+
status = "[green]✓ installed[/green]" if is_installed else f"[dim]⬇ {m['size_gb']:.1f}GB[/dim]"
2821+
console.print(
2822+
f" {status} [bold]{m['name']:<35s}[/bold] {m['notes']}"
2823+
)
2824+
console.print(f" [dim]ID: {m['id']} VRAM: {m['vram_gb']:.1f}GB ctx: {m['ctx_k']}K[/dim]")
2825+
console.print("\n Download: [bold]specsmith ollama pull <id>[/bold]")
2826+
2827+
2828+
main.add_command(ollama_group)
2829+
2830+
25952831
# ---------------------------------------------------------------------------
25962832
# Workspace — multi-project management (#17)
25972833
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)