From 5c8b46b13b2b6c98cd2f13ef3ad34c5ac0800405 Mon Sep 17 00:00:00 2001
From: "zhengxiao.wu" <zhengxiao.wu@bytedance.com>
Date: Fri, 10 Apr 2026 18:21:22 +0800
Subject: [PATCH 1/7] feat: add `ov init` interactive setup wizard for local
 model deployment

Add an interactive CLI wizard that guides users through configuring
OpenViking with local Ollama models, especially targeting macOS/Apple
Silicon beginners. The wizard auto-detects and installs Ollama,
recommends models based on system RAM, pulls selected models, and
generates a valid ov.conf.

Supported models:
- Embedding: qwen3-embedding (0.6b/4b/8b), embeddinggemma:300m
- VLM: qwen3.5 (2b-122b), gemma4 (e2b/e4b/26b/31b)

Also fixes `ov doctor` to recognize Ollama providers as valid without
requiring an API key.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 openviking_cli/doctor.py                      |   8 +
 openviking_cli/rust_cli.py                    |   5 +
 openviking_cli/setup_wizard.py                | 702 ++++++++++++++++++
 .../utils/config/embedding_config.py          |   6 +
 tests/cli/test_setup_wizard.py                | 205 +++++
 5 files changed, 926 insertions(+)
 create mode 100644 openviking_cli/setup_wizard.py
 create mode 100644 tests/cli/test_setup_wizard.py

diff --git a/openviking_cli/doctor.py b/openviking_cli/doctor.py
index bcb231aaf..82345afbb 100644
--- a/openviking_cli/doctor.py
+++ b/openviking_cli/doctor.py
@@ -160,6 +160,10 @@ def check_embedding() -> tuple[bool, str, Optional[str]]:
     if provider == "unknown":
         return False, "No embedding provider configured", "Add embedding.dense section to ov.conf"
 
+    # Ollama doesn't need an API key
+    if provider == "ollama":
+        return True, f"{provider}/{model}", None
+
     api_key = dense.get("api_key", "")
     if not api_key or api_key.startswith("{"):
         return (
@@ -188,6 +192,10 @@ def check_vlm() -> tuple[bool, str, Optional[str]]:
     if not provider:
         return False, "No VLM provider configured", "Add vlm section to ov.conf"
 
+    # Ollama via LiteLLM doesn't need a real API key
+    if provider == "litellm" and model.startswith("ollama/"):
+        return True, f"{provider}/{model}", None
+
     api_key = vlm.get("api_key", "")
     if not api_key or api_key.startswith("{"):
         return (
diff --git a/openviking_cli/rust_cli.py b/openviking_cli/rust_cli.py
index 35823f1c1..d940a4286 100644
--- a/openviking_cli/rust_cli.py
+++ b/openviking_cli/rust_cli.py
@@ -54,6 +54,11 @@ def main():
         from openviking_cli.doctor import main as doctor_main
 
         sys.exit(doctor_main())
+
+    if len(sys.argv) > 1 and sys.argv[1] == "init":
+        from openviking_cli.setup_wizard import main as init_main
+
+        sys.exit(init_main())
     # 1. 检查开发环境（仅在直接运行脚本时有效）
     try:
         # __file__ is openviking_cli/rust_cli.py, so parent is openviking_cli directory
diff --git a/openviking_cli/setup_wizard.py b/openviking_cli/setup_wizard.py
new file mode 100644
index 000000000..8f678e10f
--- /dev/null
+++ b/openviking_cli/setup_wizard.py
@@ -0,0 +1,702 @@
+"""ov init - interactive setup wizard for OpenViking.
+
+Guides users through model selection and configuration, with a focus on
+local deployment via Ollama for macOS / Apple Silicon beginners.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import platform
+import shutil
+import subprocess
+import sys
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from openviking_cli.utils.config.consts import DEFAULT_CONFIG_DIR
+
+# ---------------------------------------------------------------------------
+# ANSI helpers (same pattern as doctor.py)
+# ---------------------------------------------------------------------------
+
+_USE_COLOR = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
+
+
+def _green(t: str) -> str:
+    return f"\033[32m{t}\033[0m" if _USE_COLOR else t
+
+
+def _red(t: str) -> str:
+    return f"\033[31m{t}\033[0m" if _USE_COLOR else t
+
+
+def _yellow(t: str) -> str:
+    return f"\033[33m{t}\033[0m" if _USE_COLOR else t
+
+
+def _dim(t: str) -> str:
+    return f"\033[2m{t}\033[0m" if _USE_COLOR else t
+
+
+def _bold(t: str) -> str:
+    return f"\033[1m{t}\033[0m" if _USE_COLOR else t
+
+
+def _cyan(t: str) -> str:
+    return f"\033[36m{t}\033[0m" if _USE_COLOR else t
+
+
+# ---------------------------------------------------------------------------
+# Interactive prompt helpers (stdlib only)
+# ---------------------------------------------------------------------------
+
+
+def _prompt_choice(prompt: str, options: list[tuple[str, str]], default: int = 1) -> int:
+    """Display numbered options and return 1-based selection index."""
+    print(f"\n  {_bold(prompt)}\n")
+    for i, (label, desc) in enumerate(options, 1):
+        marker = "  "
+        line = f"  {marker}[{i}] {label}"
+        if desc:
+            line += f"  {_dim(desc)}"
+        print(line)
+
+    while True:
+        try:
+            raw = input(f"\n  Select [{default}]: ").strip()
+        except EOFError:
+            return default
+        if not raw:
+            return default
+        try:
+            choice = int(raw)
+            if 1 <= choice <= len(options):
+                return choice
+        except ValueError:
+            pass
+        print(f"  {_red('Please enter a number between 1 and ' + str(len(options)))}")
+
+
+def _prompt_input(prompt: str, default: str = "") -> str:
+    """Prompt for free-text input with optional default."""
+    suffix = f" [{default}]" if default else ""
+    try:
+        raw = input(f"  {prompt}{suffix}: ").strip()
+    except EOFError:
+        return default
+    return raw or default
+
+
+def _prompt_confirm(prompt: str, default: bool = True) -> bool:
+    """Yes/no confirmation prompt."""
+    hint = "Y/n" if default else "y/N"
+    try:
+        raw = input(f"  {prompt} [{hint}]: ").strip().lower()
+    except EOFError:
+        return default
+    if not raw:
+        return default
+    return raw in ("y", "yes")
+
+
+# ---------------------------------------------------------------------------
+# System info
+# ---------------------------------------------------------------------------
+
+
+def _get_system_ram_gb() -> int:
+    """Get total system RAM in GB."""
+    try:
+        pages = os.sysconf("SC_PHYS_PAGES")
+        page_size = os.sysconf("SC_PAGE_SIZE")
+        return (pages * page_size) // (1024**3)
+    except (ValueError, OSError, AttributeError):
+        pass
+    # Windows fallback
+    try:
+        import ctypes
+
+        kernel32 = ctypes.windll.kernel32  # type: ignore[attr-defined]
+
+        class MEMORYSTATUSEX(ctypes.Structure):
+            _fields_ = [
+                ("dwLength", ctypes.c_ulong),
+                ("dwMemoryLoad", ctypes.c_ulong),
+                ("ullTotalPhys", ctypes.c_ulonglong),
+                ("ullAvailPhys", ctypes.c_ulonglong),
+                ("ullTotalPageFile", ctypes.c_ulonglong),
+                ("ullAvailPageFile", ctypes.c_ulonglong),
+                ("ullTotalVirtual", ctypes.c_ulonglong),
+                ("ullAvailVirtual", ctypes.c_ulonglong),
+                ("ullAvailExtendedVirtual", ctypes.c_ulonglong),
+            ]
+
+        stat = MEMORYSTATUSEX()
+        stat.dwLength = ctypes.sizeof(stat)
+        kernel32.GlobalMemoryStatusEx(ctypes.byref(stat))
+        return stat.ullTotalPhys // (1024**3)
+    except Exception:
+        return 0
+
+
+# ---------------------------------------------------------------------------
+# Ollama interaction
+# ---------------------------------------------------------------------------
+
+_OLLAMA_DEFAULT_HOST = "localhost"
+_OLLAMA_DEFAULT_PORT = 11434
+
+
+def _check_ollama_running(
+    host: str = _OLLAMA_DEFAULT_HOST, port: int = _OLLAMA_DEFAULT_PORT
+) -> bool:
+    """Check if Ollama is running by hitting the /api/tags endpoint."""
+    try:
+        url = f"http://{host}:{port}/api/tags"
+        req = urllib.request.Request(url, method="GET")
+        with urllib.request.urlopen(req, timeout=3):
+            return True
+    except (urllib.error.URLError, OSError, TimeoutError):
+        return False
+
+
+def _get_ollama_models(
+    host: str = _OLLAMA_DEFAULT_HOST, port: int = _OLLAMA_DEFAULT_PORT
+) -> list[str]:
+    """Fetch names of locally available Ollama models."""
+    try:
+        url = f"http://{host}:{port}/api/tags"
+        req = urllib.request.Request(url, method="GET")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            data = json.loads(resp.read().decode())
+            return [m["name"] for m in data.get("models", [])]
+    except (urllib.error.URLError, OSError, TimeoutError, json.JSONDecodeError, KeyError):
+        return []
+
+
+def _is_model_available(model_name: str, available: list[str]) -> bool:
+    """Check if a model is available locally (prefix match for tag variants)."""
+    for m in available:
+        # "qwen3-embedding:0.6b" matches "qwen3-embedding:0.6b"
+        # "qwen3-embedding:8b" matches "qwen3-embedding:8b-fp16" etc.
+        if m == model_name or m.startswith(model_name + "-"):
+            return True
+        # model_name without tag matches model with ":latest"
+        if ":" not in model_name and m.split(":")[0] == model_name:
+            return True
+    return False
+
+
+def _ollama_pull_model(model_name: str) -> bool:
+    """Pull an Ollama model via CLI subprocess (shows native progress bar)."""
+    try:
+        result = subprocess.run(["ollama", "pull", model_name], check=False)
+        return result.returncode == 0
+    except FileNotFoundError:
+        print(f"  {_red('ollama command not found. Is Ollama installed?')}")
+        return False
+
+
+def _is_ollama_installed() -> bool:
+    """Check if the ollama CLI binary is on PATH."""
+    return shutil.which("ollama") is not None
+
+
+def _install_ollama() -> bool:
+    """Install Ollama automatically based on the current platform."""
+    system = platform.system()
+
+    if system == "Darwin":
+        # macOS: prefer brew, fallback to official script
+        if shutil.which("brew"):
+            print(f"  {_dim('Installing via Homebrew...')}")
+            result = subprocess.run(["brew", "install", "ollama"], check=False)
+            if result.returncode == 0:
+                return True
+        # Fallback: official install script
+        print(f"  {_dim('Installing via official script...')}")
+        result = subprocess.run(
+            ["bash", "-c", "curl -fsSL https://ollama.com/install.sh | sh"],
+            check=False,
+        )
+        return result.returncode == 0
+
+    elif system == "Linux":
+        print(f"  {_dim('Installing via official script...')}")
+        result = subprocess.run(
+            ["bash", "-c", "curl -fsSL https://ollama.com/install.sh | sh"],
+            check=False,
+        )
+        return result.returncode == 0
+
+    else:
+        # Windows or other: can't auto-install
+        print(f"  {_yellow('Automatic installation is not supported on ' + system)}")
+        print(f"  Please download from: {_cyan('https://ollama.com/download')}")
+        return False
+
+
+def _start_ollama() -> bool:
+    """Start Ollama in the background and wait for it to be ready."""
+    # Already running?
+    if _check_ollama_running():
+        return True
+
+    print(f"  {_dim('Starting Ollama...')}", end=" ", flush=True)
+    try:
+        subprocess.Popen(
+            ["ollama", "serve"],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+    except FileNotFoundError:
+        print(_red("failed"))
+        return False
+
+    # Wait up to 15 seconds for Ollama to become ready
+    for _ in range(30):
+        time.sleep(0.5)
+        if _check_ollama_running():
+            print(_green("ready"))
+            return True
+
+    print(_yellow("timeout (Ollama may still be starting)"))
+    return False
+
+
+def _ensure_ollama() -> bool:
+    """Make sure Ollama is installed and running. Returns True if ready."""
+    print("\n  Checking Ollama...", end=" ", flush=True)
+
+    if _is_ollama_installed():
+        if _check_ollama_running():
+            print(_green("running at localhost:11434"))
+            return True
+        print(_yellow("installed but not running"))
+        return _start_ollama()
+
+    # Not installed
+    print(_yellow("not installed"))
+    if not _prompt_confirm("Install Ollama now?"):
+        print(f"\n  {_dim('Manual install: https://ollama.com/download')}")
+        return False
+
+    print()
+    if not _install_ollama():
+        print(f"  {_red('Installation failed.')}")
+        print(f"  {_dim('Try manually: https://ollama.com/download')}")
+        return False
+
+    print(f"  {_green('OK')} Ollama installed")
+    return _start_ollama()
+
+
+# ---------------------------------------------------------------------------
+# Model presets
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class EmbeddingPreset:
+    label: str
+    model: str  # Ollama model name
+    dimension: int
+    size_hint: str
+    min_ram_gb: int  # Minimum recommended RAM
+
+
+@dataclass
+class VLMPreset:
+    label: str
+    ollama_model: str  # For ollama pull
+    litellm_model: str  # For config: "ollama/xxx"
+    size_hint: str
+    min_ram_gb: int  # Minimum recommended RAM
+
+
+EMBEDDING_PRESETS: list[EmbeddingPreset] = [
+    EmbeddingPreset("Qwen3-Embedding 0.6B", "qwen3-embedding:0.6b", 1024, "~639 MB", 4),
+    EmbeddingPreset("Qwen3-Embedding 4B", "qwen3-embedding:4b", 1024, "~2.5 GB", 8),
+    EmbeddingPreset("Qwen3-Embedding 8B", "qwen3-embedding:8b", 1024, "~4.7 GB", 16),
+    EmbeddingPreset("EmbeddingGemma 300M", "embeddinggemma:300m", 768, "~622 MB", 4),
+]
+
+VLM_PRESETS: list[VLMPreset] = [
+    VLMPreset("Qwen 3.5 2B", "qwen3.5:2b", "ollama/qwen3.5:2b", "~2.7 GB", 4),
+    VLMPreset("Qwen 3.5 4B", "qwen3.5:4b", "ollama/qwen3.5:4b", "~3.4 GB", 8),
+    VLMPreset("Qwen 3.5 9B", "qwen3.5:9b", "ollama/qwen3.5:9b", "~6.6 GB", 16),
+    VLMPreset("Qwen 3.5 27B", "qwen3.5:27b", "ollama/qwen3.5:27b", "~17 GB", 32),
+    VLMPreset("Qwen 3.5 35B", "qwen3.5:35b", "ollama/qwen3.5:35b", "~24 GB", 48),
+    VLMPreset("Qwen 3.5 122B", "qwen3.5:122b", "ollama/qwen3.5:122b", "~81 GB", 128),
+    VLMPreset("Gemma 4 E2B", "gemma4:e2b", "ollama/gemma4:e2b", "~7.2 GB", 16),
+    VLMPreset("Gemma 4 E4B", "gemma4:e4b", "ollama/gemma4:e4b", "~9.6 GB", 16),
+    VLMPreset("Gemma 4 26B", "gemma4:26b", "ollama/gemma4:26b", "~18 GB", 32),
+    VLMPreset("Gemma 4 31B", "gemma4:31b", "ollama/gemma4:31b", "~20 GB", 48),
+]
+
+# Recommended defaults indexed by RAM tier
+_RAM_TIERS: list[tuple[int, int, int]] = [
+    # (max_ram_gb, embedding_preset_index, vlm_preset_index)
+    (8, 0, 0),  # ≤8 GB: qwen3-embedding:0.6b + qwen3.5:2b
+    (16, 0, 1),  # 8-16 GB: qwen3-embedding:0.6b + qwen3.5:4b
+    (32, 2, 2),  # 16-32 GB: qwen3-embedding:8b + qwen3.5:9b
+    (64, 2, 7),  # 32-64 GB: qwen3-embedding:8b + gemma4:e4b
+]
+_RAM_DEFAULT_EMBED = 2  # ≥64 GB: qwen3-embedding:8b
+_RAM_DEFAULT_VLM = 3  # ≥64 GB: qwen3.5:27b
+
+
+def _get_recommended_indices(ram_gb: int) -> tuple[int, int]:
+    """Return (embedding_index, vlm_index) for the RAM tier (0-based)."""
+    for max_ram, emb_idx, vlm_idx in _RAM_TIERS:
+        if ram_gb <= max_ram:
+            return emb_idx, vlm_idx
+    return _RAM_DEFAULT_EMBED, _RAM_DEFAULT_VLM
+
+
+# ---------------------------------------------------------------------------
+# Cloud provider presets
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class CloudProvider:
+    label: str
+    provider: str
+    default_api_base: str
+    default_embedding_model: str
+    default_embedding_dim: int
+    default_vlm_model: str
+
+
+CLOUD_PROVIDERS: list[CloudProvider] = [
+    CloudProvider(
+        "OpenAI", "openai", "https://api.openai.com/v1",
+        "text-embedding-3-small", 1536, "gpt-4o-mini",
+    ),
+    CloudProvider(
+        "Volcengine (Doubao)", "volcengine", "https://ark.cn-beijing.volces.com/api/v3",
+        "doubao-embedding-vision-250615", 1024, "doubao-seed-2-0-pro-260215",
+    ),
+]
+
+
+# ---------------------------------------------------------------------------
+# Config building
+# ---------------------------------------------------------------------------
+
+
+def _build_ollama_config(
+    embedding: EmbeddingPreset,
+    vlm: VLMPreset,
+    workspace: str,
+) -> dict[str, Any]:
+    """Build ov.conf dict for Ollama-based setup."""
+    return {
+        "storage": {"workspace": workspace},
+        "embedding": {
+            "dense": {
+                "provider": "ollama",
+                "model": embedding.model,
+                "api_base": "http://localhost:11434/v1",
+                "dimension": embedding.dimension,
+                "input": "text",
+            },
+        },
+        "vlm": {
+            "provider": "litellm",
+            "model": vlm.litellm_model,
+            "api_key": "no-key",
+            "api_base": "http://localhost:11434",
+            "temperature": 0.0,
+            "max_retries": 2,
+        },
+    }
+
+
+def _build_cloud_config(
+    provider: CloudProvider,
+    embedding_api_key: str,
+    embedding_model: str,
+    embedding_dim: int,
+    vlm_api_key: str,
+    vlm_model: str,
+    workspace: str,
+    embedding_api_base: str | None = None,
+    vlm_api_base: str | None = None,
+) -> dict[str, Any]:
+    """Build ov.conf dict for cloud API setup."""
+    return {
+        "storage": {"workspace": workspace},
+        "embedding": {
+            "dense": {
+                "provider": provider.provider,
+                "model": embedding_model,
+                "api_key": embedding_api_key,
+                "api_base": embedding_api_base or provider.default_api_base,
+                "dimension": embedding_dim,
+            },
+        },
+        "vlm": {
+            "provider": provider.provider,
+            "model": vlm_model,
+            "api_key": vlm_api_key,
+            "api_base": vlm_api_base or provider.default_api_base,
+            "temperature": 0.0,
+            "max_retries": 2,
+        },
+    }
+
+
+# ---------------------------------------------------------------------------
+# Config I/O
+# ---------------------------------------------------------------------------
+
+_DEFAULT_CONFIG_PATH = DEFAULT_CONFIG_DIR / "ov.conf"
+_DEFAULT_WORKSPACE = str(DEFAULT_CONFIG_DIR / "data")
+
+
+def _write_config(config_dict: dict[str, Any], config_path: Path) -> bool:
+    """Write config dict as JSON. Backs up existing file as .bak."""
+    try:
+        config_path.parent.mkdir(parents=True, exist_ok=True)
+        if config_path.exists():
+            backup = config_path.with_suffix(".conf.bak")
+            config_path.rename(backup)
+            print(f"  {_dim('Existing config backed up to ' + str(backup))}")
+        config_path.write_text(json.dumps(config_dict, indent=2, ensure_ascii=False) + "\n",
+                               encoding="utf-8")
+        return True
+    except OSError as exc:
+        print(f"  {_red(f'Failed to write config: {exc}')}")
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Wizard flows
+# ---------------------------------------------------------------------------
+
+
+def _wizard_ollama() -> dict[str, Any] | None:
+    """Ollama-based local model setup flow."""
+    # Ensure Ollama is installed and running
+    ollama_running = _ensure_ollama()
+
+    if not ollama_running:
+        if not _prompt_confirm(
+            "Continue without Ollama? (config will be generated but models won't be pulled)",
+            default=False,
+        ):
+            return None
+
+    available_models = _get_ollama_models() if ollama_running else []
+
+    # System RAM
+    ram_gb = _get_system_ram_gb()
+    rec_embed_idx, rec_vlm_idx = _get_recommended_indices(ram_gb)
+    if ram_gb > 0:
+        print(f"\n  {_dim(f'Detected {ram_gb} GB RAM')}")
+
+    # --- Embedding selection ---
+    embed_options: list[tuple[str, str]] = []
+    for i, p in enumerate(EMBEDDING_PRESETS):
+        rec = " *" if i == rec_embed_idx else ""
+        avail = ""
+        if ollama_running and _is_model_available(p.model, available_models):
+            avail = _green(" [downloaded]")
+        embed_options.append((
+            f"{p.label}",
+            f"({p.dimension}d, {p.size_hint}){avail}{rec}",
+        ))
+
+    embed_choice = _prompt_choice("Embedding model:", embed_options, default=rec_embed_idx + 1)
+    embedding = EMBEDDING_PRESETS[embed_choice - 1]
+
+    # Pull embedding model
+    if ollama_running and not _is_model_available(embedding.model, available_models):
+        if _prompt_confirm(f"'{embedding.model}' not found locally. Pull now?"):
+            print()
+            if not _ollama_pull_model(embedding.model):
+                print(f"  {_yellow('Pull failed. You can pull it later: ollama pull ' + embedding.model)}")
+            else:
+                print(f"  {_green('OK')} {embedding.model} pulled successfully")
+
+    # --- VLM selection ---
+    vlm_options: list[tuple[str, str]] = []
+    for i, p in enumerate(VLM_PRESETS):
+        rec = " *" if i == rec_vlm_idx else ""
+        avail = ""
+        if ollama_running and _is_model_available(p.ollama_model, available_models):
+            avail = _green(" [downloaded]")
+        vlm_options.append((
+            f"{p.label}",
+            f"({p.size_hint}){avail}{rec}",
+        ))
+
+    vlm_choice = _prompt_choice("Language model (VLM):", vlm_options, default=rec_vlm_idx + 1)
+    vlm = VLM_PRESETS[vlm_choice - 1]
+
+    # Pull VLM model
+    if ollama_running and not _is_model_available(vlm.ollama_model, available_models):
+        if _prompt_confirm(f"'{vlm.ollama_model}' not found locally. Pull now?"):
+            print()
+            if not _ollama_pull_model(vlm.ollama_model):
+                print(f"  {_yellow('Pull failed. You can pull it later: ollama pull ' + vlm.ollama_model)}")
+            else:
+                print(f"  {_green('OK')} {vlm.ollama_model} pulled successfully")
+
+    # Workspace
+    workspace = _prompt_input("Workspace", default=_DEFAULT_WORKSPACE)
+
+    return _build_ollama_config(embedding, vlm, workspace)
+
+
+def _wizard_cloud() -> dict[str, Any] | None:
+    """Cloud API model setup flow."""
+    # Provider selection
+    provider_options = [(p.label, "") for p in CLOUD_PROVIDERS]
+    provider_options.append(("Other (manual)", ""))
+    choice = _prompt_choice("Cloud provider:", provider_options, default=1)
+
+    if choice > len(CLOUD_PROVIDERS):
+        # Manual / Other
+        print(f"\n  See example config: {_cyan('examples/ov.conf.example')}")
+        print(f"  Edit {_cyan(str(_DEFAULT_CONFIG_PATH))} manually.\n")
+        return None
+
+    provider = CLOUD_PROVIDERS[choice - 1]
+
+    # Embedding config
+    print(f"\n  {_bold('Embedding configuration')}")
+    embedding_api_key = _prompt_input("API Key")
+    if not embedding_api_key:
+        print(f"  {_red('API key is required')}")
+        return None
+    embedding_model = _prompt_input("Model", default=provider.default_embedding_model)
+    embedding_dim_str = _prompt_input("Dimension", default=str(provider.default_embedding_dim))
+    try:
+        embedding_dim = int(embedding_dim_str)
+    except ValueError:
+        embedding_dim = provider.default_embedding_dim
+    embedding_api_base = _prompt_input("API Base", default=provider.default_api_base)
+
+    # VLM config
+    print(f"\n  {_bold('VLM configuration')}")
+    vlm_api_key = _prompt_input("API Key (same as above?)", default=embedding_api_key)
+    vlm_model = _prompt_input("Model", default=provider.default_vlm_model)
+    vlm_api_base = _prompt_input("API Base", default=provider.default_api_base)
+
+    # Workspace
+    workspace = _prompt_input("Workspace", default=_DEFAULT_WORKSPACE)
+
+    return _build_cloud_config(
+        provider, embedding_api_key, embedding_model, embedding_dim,
+        vlm_api_key, vlm_model, workspace,
+        embedding_api_base, vlm_api_base,
+    )
+
+
+def _wizard_custom() -> dict[str, Any] | None:
+    """Custom configuration - point user to example config."""
+    example = Path(__file__).parent.parent / "examples" / "ov.conf.example"
+    print(f"\n  Example config: {_cyan(str(example))}")
+    print(f"  Config path:    {_cyan(str(_DEFAULT_CONFIG_PATH))}")
+
+    editor = os.environ.get("EDITOR", os.environ.get("VISUAL", ""))
+    if editor:
+        if _prompt_confirm(f"Open {_DEFAULT_CONFIG_PATH} in {editor}?"):
+            _DEFAULT_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
+            if not _DEFAULT_CONFIG_PATH.exists():
+                # Copy example as starting point
+                try:
+                    _DEFAULT_CONFIG_PATH.write_text(example.read_text(encoding="utf-8"),
+                                                     encoding="utf-8")
+                except OSError:
+                    pass
+            subprocess.run([editor, str(_DEFAULT_CONFIG_PATH)], check=False)
+    else:
+        print(f"\n  {_dim('Set $EDITOR to open the config file automatically.')}")
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Main orchestrator
+# ---------------------------------------------------------------------------
+
+
+def run_init() -> int:
+    """Run the interactive setup wizard."""
+    print(f"\n  {_bold('OpenViking Setup')}")
+    print(f"  {'=' * 16}\n")
+
+    # Check for existing config
+    if _DEFAULT_CONFIG_PATH.exists():
+        print(f"  {_yellow('Existing config found:')} {_DEFAULT_CONFIG_PATH}")
+        if not _prompt_confirm("Overwrite? (current config will be backed up as .bak)"):
+            print("  Setup cancelled.\n")
+            return 0
+
+    # Deployment mode
+    mode = _prompt_choice("Choose setup mode:", [
+        ("Local models via Ollama", "(recommended for macOS / Apple Silicon)"),
+        ("Cloud API", "(OpenAI, Volcengine, etc.)"),
+        ("Custom", "(manual editing)"),
+    ], default=1)
+
+    config_dict: dict[str, Any] | None = None
+
+    if mode == 1:
+        config_dict = _wizard_ollama()
+    elif mode == 2:
+        config_dict = _wizard_cloud()
+    else:
+        _wizard_custom()
+        return 0
+
+    if config_dict is None:
+        print("\n  Setup cancelled.\n")
+        return 0
+
+    # Summary
+    emb = config_dict.get("embedding", {}).get("dense", {})
+    vlm = config_dict.get("vlm", {})
+    ws = config_dict.get("storage", {}).get("workspace", _DEFAULT_WORKSPACE)
+
+    print(f"\n  {_bold('Summary:')}")
+    print(f"    Embedding:  {emb.get('provider', '')} / {emb.get('model', '')} ({emb.get('dimension', '')}d)")
+    print(f"    VLM:        {vlm.get('provider', '')} / {vlm.get('model', '')}")
+    print(f"    Workspace:  {ws}")
+    print(f"    Config:     {_DEFAULT_CONFIG_PATH}")
+
+    if not _prompt_confirm("\n  Save configuration?"):
+        print("\n  Setup cancelled.\n")
+        return 0
+
+    # Write
+    if not _write_config(config_dict, _DEFAULT_CONFIG_PATH):
+        return 1
+
+    print(f"  {_green('OK')} Configuration written to {_DEFAULT_CONFIG_PATH}\n")
+
+    # Post-init tips
+    print(f"  {_bold('Next steps:')}")
+    print(f"    Start the server:  {_cyan('openviking-server')}")
+    print(f"    Validate setup:    {_cyan('ov doctor')}")
+    print()
+
+    return 0
+
+
+def main() -> int:
+    """Entry point for ``ov init``."""
+    try:
+        return run_init()
+    except KeyboardInterrupt:
+        print("\n\n  Setup cancelled.\n")
+        return 130
diff --git a/openviking_cli/utils/config/embedding_config.py b/openviking_cli/utils/config/embedding_config.py
index c64068843..9c0da2d6d 100644
--- a/openviking_cli/utils/config/embedding_config.py
+++ b/openviking_cli/utils/config/embedding_config.py
@@ -232,6 +232,12 @@ def get_effective_dimension(self) -> int:
                 "all-minilm-l6-v2": 384,
                 "snowflake-arctic-embed": 1024,
                 "snowflake-arctic-embed-l": 1024,
+                "qwen3-embedding": 1024,
+                "qwen3-embedding:0.6b": 1024,
+                "qwen3-embedding:4b": 1024,
+                "qwen3-embedding:8b": 1024,
+                "embeddinggemma": 768,
+                "embeddinggemma:300m": 768,
             }
             model_lower = (self.model or "").lower()
             if model_lower in ollama_model_dimensions:
diff --git a/tests/cli/test_setup_wizard.py b/tests/cli/test_setup_wizard.py
new file mode 100644
index 000000000..d272a85ee
--- /dev/null
+++ b/tests/cli/test_setup_wizard.py
@@ -0,0 +1,205 @@
+"""Tests for the ov init setup wizard."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+from openviking_cli.setup_wizard import (
+    CLOUD_PROVIDERS,
+    EMBEDDING_PRESETS,
+    VLM_PRESETS,
+    _build_cloud_config,
+    _build_ollama_config,
+    _check_ollama_running,
+    _get_ollama_models,
+    _get_recommended_indices,
+    _is_model_available,
+    _write_config,
+)
+
+# ---------------------------------------------------------------------------
+# Ollama detection
+# ---------------------------------------------------------------------------
+
+
+class TestOllamaDetection:
+    def test_ollama_running(self):
+        mock_resp = MagicMock()
+        mock_resp.__enter__ = MagicMock(return_value=mock_resp)
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        with patch("openviking_cli.setup_wizard.urllib.request.urlopen", return_value=mock_resp):
+            assert _check_ollama_running() is True
+
+    def test_ollama_not_running(self):
+        import urllib.error
+
+        with patch(
+            "openviking_cli.setup_wizard.urllib.request.urlopen",
+            side_effect=urllib.error.URLError("refused"),
+        ):
+            assert _check_ollama_running() is False
+
+    def test_get_models(self):
+        mock_data = json.dumps({
+            "models": [
+                {"name": "qwen3-embedding:0.6b", "size": 639000000},
+                {"name": "gemma4:e4b", "size": 9600000000},
+            ]
+        }).encode()
+
+        mock_resp = MagicMock()
+        mock_resp.read.return_value = mock_data
+        mock_resp.__enter__ = MagicMock(return_value=mock_resp)
+        mock_resp.__exit__ = MagicMock(return_value=False)
+
+        with patch("openviking_cli.setup_wizard.urllib.request.urlopen", return_value=mock_resp):
+            models = _get_ollama_models()
+            assert "qwen3-embedding:0.6b" in models
+            assert "gemma4:e4b" in models
+
+    def test_get_models_error(self):
+        import urllib.error
+
+        with patch(
+            "openviking_cli.setup_wizard.urllib.request.urlopen",
+            side_effect=urllib.error.URLError("refused"),
+        ):
+            assert _get_ollama_models() == []
+
+
+# ---------------------------------------------------------------------------
+# Model availability
+# ---------------------------------------------------------------------------
+
+
+class TestModelAvailability:
+    def test_exact_match(self):
+        available = ["qwen3-embedding:0.6b", "gemma4:e4b"]
+        assert _is_model_available("qwen3-embedding:0.6b", available) is True
+
+    def test_no_match(self):
+        available = ["qwen3-embedding:0.6b"]
+        assert _is_model_available("nomic-embed-text", available) is False
+
+    def test_tagless_matches_latest(self):
+        available = ["gemma:300m"]
+        assert _is_model_available("gemma", available) is True
+
+    def test_prefix_variant(self):
+        available = ["qwen3-embedding:0.6b-fp16"]
+        assert _is_model_available("qwen3-embedding:0.6b", available) is True
+
+
+# ---------------------------------------------------------------------------
+# Config building
+# ---------------------------------------------------------------------------
+
+
+class TestConfigBuilding:
+    def test_ollama_config_structure(self):
+        embedding = EMBEDDING_PRESETS[0]  # qwen3-embedding:0.6b
+        vlm = VLM_PRESETS[0]  # qwen3.5:2b
+
+        config = _build_ollama_config(embedding, vlm, "/tmp/ov_test")
+
+        assert config["storage"]["workspace"] == "/tmp/ov_test"
+
+        dense = config["embedding"]["dense"]
+        assert dense["provider"] == "ollama"
+        assert dense["model"] == "qwen3-embedding:0.6b"
+        assert dense["dimension"] == 1024
+        assert dense["api_base"] == "http://localhost:11434/v1"
+
+        vlm_cfg = config["vlm"]
+        assert vlm_cfg["provider"] == "litellm"
+        assert vlm_cfg["model"] == "ollama/qwen3.5:2b"
+        assert vlm_cfg["api_key"] == "no-key"
+        assert vlm_cfg["api_base"] == "http://localhost:11434"
+
+    def test_cloud_config_structure(self):
+        provider = CLOUD_PROVIDERS[0]  # OpenAI
+
+        config = _build_cloud_config(
+            provider,
+            embedding_api_key="sk-test",
+            embedding_model="text-embedding-3-small",
+            embedding_dim=1536,
+            vlm_api_key="sk-test",
+            vlm_model="gpt-4o-mini",
+            workspace="/tmp/ov_test",
+        )
+
+        assert config["embedding"]["dense"]["api_key"] == "sk-test"
+        assert config["vlm"]["api_key"] == "sk-test"
+        assert config["vlm"]["provider"] == "openai"
+
+    def test_all_presets_valid(self):
+        """Every preset should produce a config with required fields."""
+        for emb in EMBEDDING_PRESETS:
+            for vlm in VLM_PRESETS:
+                config = _build_ollama_config(emb, vlm, "/tmp/test")
+                assert "embedding" in config
+                assert "vlm" in config
+                assert config["embedding"]["dense"]["dimension"] > 0
+
+
+# ---------------------------------------------------------------------------
+# RAM-based recommendations
+# ---------------------------------------------------------------------------
+
+
+class TestRAMRecommendations:
+    def test_low_ram(self):
+        emb_idx, vlm_idx = _get_recommended_indices(4)
+        assert EMBEDDING_PRESETS[emb_idx].model == "qwen3-embedding:0.6b"
+        assert VLM_PRESETS[vlm_idx].ollama_model == "qwen3.5:2b"
+
+    def test_medium_ram(self):
+        emb_idx, vlm_idx = _get_recommended_indices(16)
+        assert EMBEDDING_PRESETS[emb_idx].model == "qwen3-embedding:0.6b"
+        assert VLM_PRESETS[vlm_idx].ollama_model == "qwen3.5:4b"
+
+    def test_high_ram(self):
+        emb_idx, vlm_idx = _get_recommended_indices(32)
+        assert EMBEDDING_PRESETS[emb_idx].model == "qwen3-embedding:8b"
+
+    def test_very_high_ram(self):
+        emb_idx, vlm_idx = _get_recommended_indices(128)
+        assert EMBEDDING_PRESETS[emb_idx].model == "qwen3-embedding:8b"
+
+
+# ---------------------------------------------------------------------------
+# Config writing
+# ---------------------------------------------------------------------------
+
+
+class TestConfigWriting:
+    def test_write_new_config(self, tmp_path):
+        config_path = tmp_path / "ov.conf"
+        config = _build_ollama_config(EMBEDDING_PRESETS[0], VLM_PRESETS[0], str(tmp_path / "data"))
+
+        assert _write_config(config, config_path) is True
+        assert config_path.exists()
+
+        loaded = json.loads(config_path.read_text(encoding="utf-8"))
+        assert loaded["embedding"]["dense"]["provider"] == "ollama"
+
+    def test_backup_existing(self, tmp_path):
+        config_path = tmp_path / "ov.conf"
+        config_path.write_text('{"old": true}', encoding="utf-8")
+
+        config = _build_ollama_config(EMBEDDING_PRESETS[0], VLM_PRESETS[0], str(tmp_path / "data"))
+        assert _write_config(config, config_path) is True
+
+        backup = tmp_path / "ov.conf.bak"
+        assert backup.exists()
+        assert json.loads(backup.read_text())["old"] is True
+
+    def test_creates_parent_dirs(self, tmp_path):
+        config_path = tmp_path / "subdir" / "ov.conf"
+        config = _build_ollama_config(EMBEDDING_PRESETS[0], VLM_PRESETS[0], "/tmp/data")
+
+        assert _write_config(config, config_path) is True
+        assert config_path.exists()

From 4bcc3f64da7e793b5358febd3721c31103c53a2f Mon Sep 17 00:00:00 2001
From: "zhengxiao.wu" <zhengxiao.wu@bytedance.com>
Date: Fri, 10 Apr 2026 19:41:41 +0800
Subject: [PATCH 2/7] feat: add Ollama lifecycle management for server startup
 and health checks

Extract Ollama utilities into shared module (openviking_cli/utils/ollama.py)
so both `ov init` and `openviking-server` can reuse them. Server now
auto-detects Ollama from config and ensures it's running at startup
("ensure running, never stop" pattern). Adds Ollama connectivity to
`/ready` health check and `ov doctor` diagnostics.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 openviking/server/bootstrap.py      |  21 ++
 openviking/server/routers/system.py |  17 ++
 openviking_cli/doctor.py            |  38 ++++
 openviking_cli/setup_wizard.py      | 176 ++++-------------
 openviking_cli/utils/ollama.py      | 290 ++++++++++++++++++++++++++++
 tests/cli/test_setup_wizard.py      |  32 +--
 tests/unit/test_ollama_utils.py     | 193 ++++++++++++++++++
 7 files changed, 613 insertions(+), 154 deletions(-)
 create mode 100644 openviking_cli/utils/ollama.py
 create mode 100644 tests/unit/test_ollama_utils.py

diff --git a/openviking/server/bootstrap.py b/openviking/server/bootstrap.py
index 1bd8c509e..3c8fc6b65 100644
--- a/openviking/server/bootstrap.py
+++ b/openviking/server/bootstrap.py
@@ -131,6 +131,27 @@ def main():
         print(e, file=sys.stderr)
         sys.exit(1)
 
+    # Ensure Ollama is running if configured
+    try:
+        from openviking_cli.utils.ollama import detect_ollama_in_config, ensure_ollama_for_server
+
+        ov_config = OpenVikingConfigSingleton.get_instance()
+        uses_ollama, ollama_host, ollama_port = detect_ollama_in_config(ov_config)
+        if uses_ollama:
+            result = ensure_ollama_for_server(ollama_host, ollama_port)
+            if result.success:
+                print(f"Ollama is running at {ollama_host}:{ollama_port}")
+            else:
+                print(
+                    f"Warning: Ollama not available at {ollama_host}:{ollama_port}. "
+                    f"Embedding/VLM may fail. ({result.message})",
+                    file=sys.stderr,
+                )
+                if result.stderr_output:
+                    print(f"  Ollama stderr: {result.stderr_output}", file=sys.stderr)
+    except Exception as e:
+        print(f"Warning: Ollama pre-flight check failed: {e}", file=sys.stderr)
+
     # Override with command line arguments
     if args.host is not None:
         config.host = _normalize_host_arg(args.host)
diff --git a/openviking/server/routers/system.py b/openviking/server/routers/system.py
index ba5bd4666..3e571469b 100644
--- a/openviking/server/routers/system.py
+++ b/openviking/server/routers/system.py
@@ -99,6 +99,23 @@ async def readiness_check(request: Request):
     except Exception as e:
         checks["api_key_manager"] = f"error: {e}"
 
+    # 4. Ollama: connectivity check if configured
+    try:
+        from openviking_cli.utils.config.open_viking_config import OpenVikingConfigSingleton
+        from openviking_cli.utils.ollama import check_ollama_running, detect_ollama_in_config
+
+        ov_config = OpenVikingConfigSingleton.get_instance()
+        uses_ollama, ollama_host, ollama_port = detect_ollama_in_config(ov_config)
+        if uses_ollama:
+            if check_ollama_running(ollama_host, ollama_port):
+                checks["ollama"] = "ok"
+            else:
+                checks["ollama"] = f"unreachable at {ollama_host}:{ollama_port}"
+        else:
+            checks["ollama"] = "not_configured"
+    except Exception as e:
+        checks["ollama"] = f"error: {e}"
+
     all_ok = all(v in ("ok", "not_configured") for v in checks.values())
     status_code = 200 if all_ok else 503
     return JSONResponse(
diff --git a/openviking_cli/doctor.py b/openviking_cli/doctor.py
index 82345afbb..c3a642877 100644
--- a/openviking_cli/doctor.py
+++ b/openviking_cli/doctor.py
@@ -207,6 +207,43 @@ def check_vlm() -> tuple[bool, str, Optional[str]]:
     return True, f"{provider}/{model}", None
 
 
+def check_ollama() -> tuple[bool, str, Optional[str]]:
+    """Check Ollama connectivity if the config uses an Ollama provider."""
+    config_path = _find_config()
+    if config_path is None:
+        return True, "not configured", None
+
+    data = _load_config_json(config_path)
+    if data is None:
+        return True, "not configured", None
+
+    # Detect whether config uses Ollama
+    dense = data.get("embedding", {}).get("dense", {})
+    vlm = data.get("vlm", {})
+    uses_embedding = dense.get("provider") == "ollama"
+    uses_vlm = vlm.get("provider") == "litellm" and (vlm.get("model", "")).startswith("ollama/")
+
+    if not uses_embedding and not uses_vlm:
+        return True, "not configured", None
+
+    from openviking_cli.utils.ollama import check_ollama_running, parse_ollama_url
+
+    # Determine host/port from config
+    if uses_embedding:
+        host, port = parse_ollama_url(dense.get("api_base"))
+    else:
+        host, port = parse_ollama_url(vlm.get("api_base"))
+
+    if check_ollama_running(host, port):
+        return True, f"running at {host}:{port}", None
+
+    return (
+        False,
+        f"unreachable at {host}:{port}",
+        "Run 'ollama serve' or check your Ollama configuration",
+    )
+
+
 def check_disk() -> tuple[bool, str, Optional[str]]:
     """Check free disk space in the workspace directory."""
     config_path = _find_config()
@@ -245,6 +282,7 @@ def check_disk() -> tuple[bool, str, Optional[str]]:
     ("AGFS", check_agfs),
     ("Embedding", check_embedding),
     ("VLM", check_vlm),
+    ("Ollama", check_ollama),
     ("Disk", check_disk),
 ]
 
diff --git a/openviking_cli/setup_wizard.py b/openviking_cli/setup_wizard.py
index 8f678e10f..854896280 100644
--- a/openviking_cli/setup_wizard.py
+++ b/openviking_cli/setup_wizard.py
@@ -8,18 +8,22 @@
 
 import json
 import os
-import platform
-import shutil
 import subprocess
 import sys
-import time
-import urllib.error
-import urllib.request
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 
 from openviking_cli.utils.config.consts import DEFAULT_CONFIG_DIR
+from openviking_cli.utils.ollama import (
+    check_ollama_running,
+    get_ollama_models,
+    install_ollama,
+    is_model_available,
+    is_ollama_installed,
+    ollama_pull_model,
+    start_ollama,
+)
 
 # ---------------------------------------------------------------------------
 # ANSI helpers (same pattern as doctor.py)
@@ -146,140 +150,27 @@ class MEMORYSTATUSEX(ctypes.Structure):
 
 
 # ---------------------------------------------------------------------------
-# Ollama interaction
+# Ollama interaction (delegates to openviking_cli.utils.ollama)
 # ---------------------------------------------------------------------------
 
-_OLLAMA_DEFAULT_HOST = "localhost"
-_OLLAMA_DEFAULT_PORT = 11434
-
-
-def _check_ollama_running(
-    host: str = _OLLAMA_DEFAULT_HOST, port: int = _OLLAMA_DEFAULT_PORT
-) -> bool:
-    """Check if Ollama is running by hitting the /api/tags endpoint."""
-    try:
-        url = f"http://{host}:{port}/api/tags"
-        req = urllib.request.Request(url, method="GET")
-        with urllib.request.urlopen(req, timeout=3):
-            return True
-    except (urllib.error.URLError, OSError, TimeoutError):
-        return False
-
-
-def _get_ollama_models(
-    host: str = _OLLAMA_DEFAULT_HOST, port: int = _OLLAMA_DEFAULT_PORT
-) -> list[str]:
-    """Fetch names of locally available Ollama models."""
-    try:
-        url = f"http://{host}:{port}/api/tags"
-        req = urllib.request.Request(url, method="GET")
-        with urllib.request.urlopen(req, timeout=5) as resp:
-            data = json.loads(resp.read().decode())
-            return [m["name"] for m in data.get("models", [])]
-    except (urllib.error.URLError, OSError, TimeoutError, json.JSONDecodeError, KeyError):
-        return []
-
-
-def _is_model_available(model_name: str, available: list[str]) -> bool:
-    """Check if a model is available locally (prefix match for tag variants)."""
-    for m in available:
-        # "qwen3-embedding:0.6b" matches "qwen3-embedding:0.6b"
-        # "qwen3-embedding:8b" matches "qwen3-embedding:8b-fp16" etc.
-        if m == model_name or m.startswith(model_name + "-"):
-            return True
-        # model_name without tag matches model with ":latest"
-        if ":" not in model_name and m.split(":")[0] == model_name:
-            return True
-    return False
-
-
-def _ollama_pull_model(model_name: str) -> bool:
-    """Pull an Ollama model via CLI subprocess (shows native progress bar)."""
-    try:
-        result = subprocess.run(["ollama", "pull", model_name], check=False)
-        return result.returncode == 0
-    except FileNotFoundError:
-        print(f"  {_red('ollama command not found. Is Ollama installed?')}")
-        return False
-
-
-def _is_ollama_installed() -> bool:
-    """Check if the ollama CLI binary is on PATH."""
-    return shutil.which("ollama") is not None
-
-
-def _install_ollama() -> bool:
-    """Install Ollama automatically based on the current platform."""
-    system = platform.system()
-
-    if system == "Darwin":
-        # macOS: prefer brew, fallback to official script
-        if shutil.which("brew"):
-            print(f"  {_dim('Installing via Homebrew...')}")
-            result = subprocess.run(["brew", "install", "ollama"], check=False)
-            if result.returncode == 0:
-                return True
-        # Fallback: official install script
-        print(f"  {_dim('Installing via official script...')}")
-        result = subprocess.run(
-            ["bash", "-c", "curl -fsSL https://ollama.com/install.sh | sh"],
-            check=False,
-        )
-        return result.returncode == 0
-
-    elif system == "Linux":
-        print(f"  {_dim('Installing via official script...')}")
-        result = subprocess.run(
-            ["bash", "-c", "curl -fsSL https://ollama.com/install.sh | sh"],
-            check=False,
-        )
-        return result.returncode == 0
-
-    else:
-        # Windows or other: can't auto-install
-        print(f"  {_yellow('Automatic installation is not supported on ' + system)}")
-        print(f"  Please download from: {_cyan('https://ollama.com/download')}")
-        return False
-
-
-def _start_ollama() -> bool:
-    """Start Ollama in the background and wait for it to be ready."""
-    # Already running?
-    if _check_ollama_running():
-        return True
-
-    print(f"  {_dim('Starting Ollama...')}", end=" ", flush=True)
-    try:
-        subprocess.Popen(
-            ["ollama", "serve"],
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-        )
-    except FileNotFoundError:
-        print(_red("failed"))
-        return False
-
-    # Wait up to 15 seconds for Ollama to become ready
-    for _ in range(30):
-        time.sleep(0.5)
-        if _check_ollama_running():
-            print(_green("ready"))
-            return True
-
-    print(_yellow("timeout (Ollama may still be starting)"))
-    return False
-
 
 def _ensure_ollama() -> bool:
-    """Make sure Ollama is installed and running. Returns True if ready."""
+    """Make sure Ollama is installed and running (interactive). Returns True if ready."""
     print("\n  Checking Ollama...", end=" ", flush=True)
 
-    if _is_ollama_installed():
-        if _check_ollama_running():
+    if is_ollama_installed():
+        if check_ollama_running():
             print(_green("running at localhost:11434"))
             return True
         print(_yellow("installed but not running"))
-        return _start_ollama()
+        print(f"  {_dim('Starting Ollama...')}", end=" ", flush=True)
+        result = start_ollama()
+        if result.success:
+            print(_green("ready"))
+        else:
+            msg = result.stderr_output or result.message
+            print(_yellow(f"failed ({msg})"))
+        return result.success
 
     # Not installed
     print(_yellow("not installed"))
@@ -288,13 +179,20 @@ def _ensure_ollama() -> bool:
         return False
 
     print()
-    if not _install_ollama():
+    if not install_ollama():
         print(f"  {_red('Installation failed.')}")
         print(f"  {_dim('Try manually: https://ollama.com/download')}")
         return False
 
     print(f"  {_green('OK')} Ollama installed")
-    return _start_ollama()
+    print(f"  {_dim('Starting Ollama...')}", end=" ", flush=True)
+    result = start_ollama()
+    if result.success:
+        print(_green("ready"))
+    else:
+        msg = result.stderr_output or result.message
+        print(_yellow(f"failed ({msg})"))
+    return result.success
 
 
 # ---------------------------------------------------------------------------
@@ -495,7 +393,7 @@ def _wizard_ollama() -> dict[str, Any] | None:
         ):
             return None
 
-    available_models = _get_ollama_models() if ollama_running else []
+    available_models = get_ollama_models() if ollama_running else []
 
     # System RAM
     ram_gb = _get_system_ram_gb()
@@ -508,7 +406,7 @@ def _wizard_ollama() -> dict[str, Any] | None:
     for i, p in enumerate(EMBEDDING_PRESETS):
         rec = " *" if i == rec_embed_idx else ""
         avail = ""
-        if ollama_running and _is_model_available(p.model, available_models):
+        if ollama_running and is_model_available(p.model, available_models):
             avail = _green(" [downloaded]")
         embed_options.append((
             f"{p.label}",
@@ -519,10 +417,10 @@ def _wizard_ollama() -> dict[str, Any] | None:
     embedding = EMBEDDING_PRESETS[embed_choice - 1]
 
     # Pull embedding model
-    if ollama_running and not _is_model_available(embedding.model, available_models):
+    if ollama_running and not is_model_available(embedding.model, available_models):
         if _prompt_confirm(f"'{embedding.model}' not found locally. Pull now?"):
             print()
-            if not _ollama_pull_model(embedding.model):
+            if not ollama_pull_model(embedding.model):
                 print(f"  {_yellow('Pull failed. You can pull it later: ollama pull ' + embedding.model)}")
             else:
                 print(f"  {_green('OK')} {embedding.model} pulled successfully")
@@ -532,7 +430,7 @@ def _wizard_ollama() -> dict[str, Any] | None:
     for i, p in enumerate(VLM_PRESETS):
         rec = " *" if i == rec_vlm_idx else ""
         avail = ""
-        if ollama_running and _is_model_available(p.ollama_model, available_models):
+        if ollama_running and is_model_available(p.ollama_model, available_models):
             avail = _green(" [downloaded]")
         vlm_options.append((
             f"{p.label}",
@@ -543,10 +441,10 @@ def _wizard_ollama() -> dict[str, Any] | None:
     vlm = VLM_PRESETS[vlm_choice - 1]
 
     # Pull VLM model
-    if ollama_running and not _is_model_available(vlm.ollama_model, available_models):
+    if ollama_running and not is_model_available(vlm.ollama_model, available_models):
         if _prompt_confirm(f"'{vlm.ollama_model}' not found locally. Pull now?"):
             print()
-            if not _ollama_pull_model(vlm.ollama_model):
+            if not ollama_pull_model(vlm.ollama_model):
                 print(f"  {_yellow('Pull failed. You can pull it later: ollama pull ' + vlm.ollama_model)}")
             else:
                 print(f"  {_green('OK')} {vlm.ollama_model} pulled successfully")
diff --git a/openviking_cli/utils/ollama.py b/openviking_cli/utils/ollama.py
new file mode 100644
index 000000000..fca3f950f
--- /dev/null
+++ b/openviking_cli/utils/ollama.py
@@ -0,0 +1,290 @@
+"""Shared Ollama utilities for OpenViking.
+
+Used by both the ``ov init`` setup wizard and the ``openviking-server``
+bootstrap to detect, start, and health-check a local Ollama instance.
+
+Design principle: **ensure running, never stop** — Ollama is a shared
+service that other tools may depend on.  We start it if needed but never
+tear it down on exit.
+
+stdlib-only (no third-party dependencies).
+"""
+
+from __future__ import annotations
+
+import json
+import platform
+import shutil
+import subprocess
+import tempfile
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from dataclasses import dataclass
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+OLLAMA_DEFAULT_HOST = "localhost"
+OLLAMA_DEFAULT_PORT = 11434
+
+_LOCAL_HOSTS = frozenset({"localhost", "127.0.0.1", "::1"})
+
+# ---------------------------------------------------------------------------
+# Data types
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class OllamaStartResult:
+    """Result of attempting to start / ensure Ollama is available."""
+
+    success: bool
+    stderr_output: str = ""
+    message: str = ""
+
+
+# ---------------------------------------------------------------------------
+# URL parsing
+# ---------------------------------------------------------------------------
+
+
+def parse_ollama_url(api_base: str | None) -> tuple[str, int]:
+    """Extract ``(host, port)`` from an Ollama *api_base* URL.
+
+    Handles forms like ``http://localhost:11434/v1`` or
+    ``http://gpu-server:11434``.  Falls back to defaults when *api_base*
+    is ``None`` or unparseable.
+    """
+    if not api_base:
+        return OLLAMA_DEFAULT_HOST, OLLAMA_DEFAULT_PORT
+    try:
+        parsed = urllib.parse.urlparse(api_base)
+        host = parsed.hostname or OLLAMA_DEFAULT_HOST
+        port = parsed.port or OLLAMA_DEFAULT_PORT
+        return host, port
+    except Exception:
+        return OLLAMA_DEFAULT_HOST, OLLAMA_DEFAULT_PORT
+
+
+# ---------------------------------------------------------------------------
+# Ollama detection / health
+# ---------------------------------------------------------------------------
+
+
+def check_ollama_running(
+    host: str = OLLAMA_DEFAULT_HOST,
+    port: int = OLLAMA_DEFAULT_PORT,
+) -> bool:
+    """Return ``True`` if Ollama is responding at *host*:*port*."""
+    try:
+        url = f"http://{host}:{port}/api/tags"
+        req = urllib.request.Request(url, method="GET")
+        with urllib.request.urlopen(req, timeout=3):
+            return True
+    except (urllib.error.URLError, OSError, TimeoutError):
+        return False
+
+
+def get_ollama_models(
+    host: str = OLLAMA_DEFAULT_HOST,
+    port: int = OLLAMA_DEFAULT_PORT,
+) -> list[str]:
+    """Fetch names of locally available Ollama models."""
+    try:
+        url = f"http://{host}:{port}/api/tags"
+        req = urllib.request.Request(url, method="GET")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            data = json.loads(resp.read().decode())
+            return [m["name"] for m in data.get("models", [])]
+    except (urllib.error.URLError, OSError, TimeoutError, json.JSONDecodeError, KeyError):
+        return []
+
+
+def is_model_available(model_name: str, available: list[str]) -> bool:
+    """Check if *model_name* is available locally (prefix match for tag variants)."""
+    for m in available:
+        if m == model_name or m.startswith(model_name + "-"):
+            return True
+        # model_name without tag matches model with ":latest"
+        if ":" not in model_name and m.split(":")[0] == model_name:
+            return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Ollama installation & startup
+# ---------------------------------------------------------------------------
+
+
+def ollama_pull_model(model_name: str) -> bool:
+    """Pull an Ollama model via CLI (shows native progress bar)."""
+    try:
+        result = subprocess.run(["ollama", "pull", model_name], check=False)
+        return result.returncode == 0
+    except FileNotFoundError:
+        return False
+
+
+def is_ollama_installed() -> bool:
+    """Check if the ``ollama`` CLI binary is on PATH."""
+    return shutil.which("ollama") is not None
+
+
+def install_ollama() -> bool:
+    """Install Ollama automatically based on the current platform."""
+    system = platform.system()
+
+    if system == "Darwin":
+        if shutil.which("brew"):
+            result = subprocess.run(["brew", "install", "ollama"], check=False)
+            if result.returncode == 0:
+                return True
+        result = subprocess.run(
+            ["bash", "-c", "curl -fsSL https://ollama.com/install.sh | sh"],
+            check=False,
+        )
+        return result.returncode == 0
+
+    elif system == "Linux":
+        result = subprocess.run(
+            ["bash", "-c", "curl -fsSL https://ollama.com/install.sh | sh"],
+            check=False,
+        )
+        return result.returncode == 0
+
+    return False
+
+
+def start_ollama(
+    host: str = OLLAMA_DEFAULT_HOST,
+    port: int = OLLAMA_DEFAULT_PORT,
+) -> OllamaStartResult:
+    """Start ``ollama serve`` in the background and wait for it to be ready.
+
+    Unlike the old fire-and-forget approach, stderr is captured so that
+    failure reasons are visible to the caller.
+
+    Returns an :class:`OllamaStartResult` with ``success`` and any
+    ``stderr_output`` on failure.
+    """
+    # Already running?
+    if check_ollama_running(host, port):
+        return OllamaStartResult(success=True, message="already running")
+
+    stderr_file = tempfile.TemporaryFile(mode="w+")
+    try:
+        subprocess.Popen(
+            ["ollama", "serve"],
+            stdout=subprocess.DEVNULL,
+            stderr=stderr_file,
+        )
+    except FileNotFoundError:
+        stderr_file.close()
+        return OllamaStartResult(
+            success=False,
+            message="ollama command not found",
+        )
+
+    # Poll up to 15 seconds for readiness
+    for _ in range(30):
+        time.sleep(0.5)
+        if check_ollama_running(host, port):
+            stderr_file.close()
+            return OllamaStartResult(success=True, message="started")
+
+    # Timeout — read stderr for diagnostics
+    stderr_output = ""
+    try:
+        stderr_file.seek(0)
+        stderr_output = stderr_file.read()
+    except Exception:
+        pass
+    finally:
+        stderr_file.close()
+
+    return OllamaStartResult(
+        success=False,
+        stderr_output=stderr_output,
+        message="timeout waiting for Ollama to become ready",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Config detection
+# ---------------------------------------------------------------------------
+
+
+def detect_ollama_in_config(config) -> tuple[bool, str, int]:
+    """Detect whether *config* uses Ollama and return ``(uses_ollama, host, port)``.
+
+    *config* is an :class:`OpenVikingConfig` instance (imported lazily to
+    avoid circular deps).
+
+    Detection rules:
+    - ``embedding.dense.provider == "ollama"``
+    - ``vlm.provider == "litellm"`` **and** ``vlm.model`` starts with ``"ollama/"``
+    """
+    host, port = OLLAMA_DEFAULT_HOST, OLLAMA_DEFAULT_PORT
+    uses_ollama = False
+
+    # Check embedding
+    dense = getattr(config.embedding, "dense", None)
+    if dense is not None and getattr(dense, "provider", None) == "ollama":
+        uses_ollama = True
+        api_base = getattr(dense, "api_base", None)
+        host, port = parse_ollama_url(api_base)
+
+    # Check VLM
+    vlm = getattr(config, "vlm", None)
+    if vlm is not None:
+        vlm_provider = getattr(vlm, "provider", None)
+        vlm_model = getattr(vlm, "model", None) or ""
+        if vlm_provider == "litellm" and vlm_model.startswith("ollama/"):
+            if not uses_ollama:
+                # Only use VLM's URL if embedding didn't already set it
+                api_base = getattr(vlm, "api_base", None)
+                host, port = parse_ollama_url(api_base)
+            uses_ollama = True
+
+    return uses_ollama, host, port
+
+
+# ---------------------------------------------------------------------------
+# Server-oriented ensure (non-interactive)
+# ---------------------------------------------------------------------------
+
+
+def ensure_ollama_for_server(
+    host: str = OLLAMA_DEFAULT_HOST,
+    port: int = OLLAMA_DEFAULT_PORT,
+) -> OllamaStartResult:
+    """Ensure Ollama is available — non-interactive, for server startup.
+
+    - Already running → success.
+    - Remote host (not localhost) → only probe, never attempt local start.
+    - Not installed → warn, return failure (no interactive install).
+    - Installed but not running → ``ollama serve`` with stderr capture.
+    """
+    # 1. Already running?
+    if check_ollama_running(host, port):
+        return OllamaStartResult(success=True, message=f"running at {host}:{port}")
+
+    # 2. Remote host — can't start locally
+    if host not in _LOCAL_HOSTS:
+        return OllamaStartResult(
+            success=False,
+            message=f"Ollama at {host}:{port} is not reachable. Cannot auto-start remote Ollama.",
+        )
+
+    # 3. Not installed?
+    if not is_ollama_installed():
+        return OllamaStartResult(
+            success=False,
+            message="ollama is not installed. Install from https://ollama.com/download",
+        )
+
+    # 4. Installed but not running — start it
+    return start_ollama(host, port)
diff --git a/tests/cli/test_setup_wizard.py b/tests/cli/test_setup_wizard.py
index d272a85ee..7422759c4 100644
--- a/tests/cli/test_setup_wizard.py
+++ b/tests/cli/test_setup_wizard.py
@@ -11,12 +11,14 @@
     VLM_PRESETS,
     _build_cloud_config,
     _build_ollama_config,
-    _check_ollama_running,
-    _get_ollama_models,
     _get_recommended_indices,
-    _is_model_available,
     _write_config,
 )
+from openviking_cli.utils.ollama import (
+    check_ollama_running,
+    get_ollama_models,
+    is_model_available,
+)
 
 # ---------------------------------------------------------------------------
 # Ollama detection
@@ -29,17 +31,17 @@ def test_ollama_running(self):
         mock_resp.__enter__ = MagicMock(return_value=mock_resp)
         mock_resp.__exit__ = MagicMock(return_value=False)
 
-        with patch("openviking_cli.setup_wizard.urllib.request.urlopen", return_value=mock_resp):
-            assert _check_ollama_running() is True
+        with patch("openviking_cli.utils.ollama.urllib.request.urlopen", return_value=mock_resp):
+            assert check_ollama_running() is True
 
     def test_ollama_not_running(self):
         import urllib.error
 
         with patch(
-            "openviking_cli.setup_wizard.urllib.request.urlopen",
+            "openviking_cli.utils.ollama.urllib.request.urlopen",
             side_effect=urllib.error.URLError("refused"),
         ):
-            assert _check_ollama_running() is False
+            assert check_ollama_running() is False
 
     def test_get_models(self):
         mock_data = json.dumps({
@@ -54,8 +56,8 @@ def test_get_models(self):
         mock_resp.__enter__ = MagicMock(return_value=mock_resp)
         mock_resp.__exit__ = MagicMock(return_value=False)
 
-        with patch("openviking_cli.setup_wizard.urllib.request.urlopen", return_value=mock_resp):
-            models = _get_ollama_models()
+        with patch("openviking_cli.utils.ollama.urllib.request.urlopen", return_value=mock_resp):
+            models = get_ollama_models()
             assert "qwen3-embedding:0.6b" in models
             assert "gemma4:e4b" in models
 
@@ -63,10 +65,10 @@ def test_get_models_error(self):
         import urllib.error
 
         with patch(
-            "openviking_cli.setup_wizard.urllib.request.urlopen",
+            "openviking_cli.utils.ollama.urllib.request.urlopen",
             side_effect=urllib.error.URLError("refused"),
         ):
-            assert _get_ollama_models() == []
+            assert get_ollama_models() == []
 
 
 # ---------------------------------------------------------------------------
@@ -77,19 +79,19 @@ def test_get_models_error(self):
 class TestModelAvailability:
     def test_exact_match(self):
         available = ["qwen3-embedding:0.6b", "gemma4:e4b"]
-        assert _is_model_available("qwen3-embedding:0.6b", available) is True
+        assert is_model_available("qwen3-embedding:0.6b", available) is True
 
     def test_no_match(self):
         available = ["qwen3-embedding:0.6b"]
-        assert _is_model_available("nomic-embed-text", available) is False
+        assert is_model_available("nomic-embed-text", available) is False
 
     def test_tagless_matches_latest(self):
         available = ["gemma:300m"]
-        assert _is_model_available("gemma", available) is True
+        assert is_model_available("gemma", available) is True
 
     def test_prefix_variant(self):
         available = ["qwen3-embedding:0.6b-fp16"]
-        assert _is_model_available("qwen3-embedding:0.6b", available) is True
+        assert is_model_available("qwen3-embedding:0.6b", available) is True
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/unit/test_ollama_utils.py b/tests/unit/test_ollama_utils.py
new file mode 100644
index 000000000..8fea27ccf
--- /dev/null
+++ b/tests/unit/test_ollama_utils.py
@@ -0,0 +1,193 @@
+"""Tests for the shared Ollama utility module."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from openviking_cli.utils.ollama import (
+    OllamaStartResult,
+    check_ollama_running,
+    detect_ollama_in_config,
+    ensure_ollama_for_server,
+    parse_ollama_url,
+    start_ollama,
+)
+
+
+# ---------------------------------------------------------------------------
+# parse_ollama_url
+# ---------------------------------------------------------------------------
+
+
+class TestParseOllamaUrl:
+    def test_localhost_with_v1_suffix(self):
+        assert parse_ollama_url("http://localhost:11434/v1") == ("localhost", 11434)
+
+    def test_custom_host(self):
+        assert parse_ollama_url("http://gpu-server:11434") == ("gpu-server", 11434)
+
+    def test_ip_address(self):
+        assert parse_ollama_url("http://192.168.1.100:11434/v1") == ("192.168.1.100", 11434)
+
+    def test_custom_port(self):
+        assert parse_ollama_url("http://localhost:8080") == ("localhost", 8080)
+
+    def test_none_returns_defaults(self):
+        assert parse_ollama_url(None) == ("localhost", 11434)
+
+    def test_empty_returns_defaults(self):
+        assert parse_ollama_url("") == ("localhost", 11434)
+
+
+# ---------------------------------------------------------------------------
+# detect_ollama_in_config
+# ---------------------------------------------------------------------------
+
+
+def _make_config(
+    embedding_provider="volcengine",
+    embedding_api_base=None,
+    vlm_provider="volcengine",
+    vlm_model="doubao-seed",
+    vlm_api_base=None,
+):
+    """Build a minimal config-like object for detect_ollama_in_config."""
+    dense = SimpleNamespace(provider=embedding_provider, api_base=embedding_api_base)
+    embedding = SimpleNamespace(dense=dense)
+    vlm = SimpleNamespace(provider=vlm_provider, model=vlm_model, api_base=vlm_api_base)
+    return SimpleNamespace(embedding=embedding, vlm=vlm)
+
+
+class TestDetectOllamaInConfig:
+    def test_embedding_ollama_detected(self):
+        config = _make_config(
+            embedding_provider="ollama",
+            embedding_api_base="http://localhost:11434/v1",
+        )
+        uses, host, port = detect_ollama_in_config(config)
+        assert uses is True
+        assert host == "localhost"
+        assert port == 11434
+
+    def test_vlm_ollama_detected(self):
+        config = _make_config(
+            vlm_provider="litellm",
+            vlm_model="ollama/gemma4:e4b",
+            vlm_api_base="http://localhost:11434",
+        )
+        uses, host, port = detect_ollama_in_config(config)
+        assert uses is True
+        assert host == "localhost"
+        assert port == 11434
+
+    def test_both_detected_uses_embedding_url(self):
+        config = _make_config(
+            embedding_provider="ollama",
+            embedding_api_base="http://gpu-server:11434/v1",
+            vlm_provider="litellm",
+            vlm_model="ollama/qwen3.5:9b",
+            vlm_api_base="http://localhost:11434",
+        )
+        uses, host, port = detect_ollama_in_config(config)
+        assert uses is True
+        assert host == "gpu-server"  # embedding takes priority
+
+    def test_neither_detected(self):
+        config = _make_config()
+        uses, host, port = detect_ollama_in_config(config)
+        assert uses is False
+
+    def test_litellm_non_ollama_model(self):
+        config = _make_config(vlm_provider="litellm", vlm_model="anthropic/claude-3")
+        uses, _, _ = detect_ollama_in_config(config)
+        assert uses is False
+
+    def test_custom_api_base(self):
+        config = _make_config(
+            embedding_provider="ollama",
+            embedding_api_base="http://192.168.1.50:8080/v1",
+        )
+        uses, host, port = detect_ollama_in_config(config)
+        assert uses is True
+        assert host == "192.168.1.50"
+        assert port == 8080
+
+
+# ---------------------------------------------------------------------------
+# ensure_ollama_for_server
+# ---------------------------------------------------------------------------
+
+
+class TestEnsureOllamaForServer:
+    @patch("openviking_cli.utils.ollama.check_ollama_running", return_value=True)
+    def test_already_running(self, mock_check):
+        result = ensure_ollama_for_server()
+        assert result.success is True
+        assert "running" in result.message
+
+    @patch("openviking_cli.utils.ollama.check_ollama_running", return_value=False)
+    def test_remote_host_unreachable(self, mock_check):
+        result = ensure_ollama_for_server(host="gpu-server", port=11434)
+        assert result.success is False
+        assert "remote" in result.message.lower()
+
+    @patch("openviking_cli.utils.ollama.check_ollama_running", return_value=False)
+    @patch("openviking_cli.utils.ollama.is_ollama_installed", return_value=False)
+    def test_not_installed(self, mock_installed, mock_check):
+        result = ensure_ollama_for_server()
+        assert result.success is False
+        assert "not installed" in result.message
+
+    @patch("openviking_cli.utils.ollama.start_ollama")
+    @patch("openviking_cli.utils.ollama.is_ollama_installed", return_value=True)
+    @patch("openviking_cli.utils.ollama.check_ollama_running", return_value=False)
+    def test_installed_starts_ollama(self, mock_check, mock_installed, mock_start):
+        mock_start.return_value = OllamaStartResult(success=True, message="started")
+        result = ensure_ollama_for_server()
+        assert result.success is True
+        mock_start.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# start_ollama
+# ---------------------------------------------------------------------------
+
+
+class TestStartOllama:
+    @patch("openviking_cli.utils.ollama.check_ollama_running", return_value=True)
+    def test_already_running_returns_success(self, mock_check):
+        result = start_ollama()
+        assert result.success is True
+        assert result.message == "already running"
+
+    @patch("openviking_cli.utils.ollama.check_ollama_running", return_value=False)
+    @patch("subprocess.Popen", side_effect=FileNotFoundError)
+    def test_command_not_found(self, mock_popen, mock_check):
+        result = start_ollama()
+        assert result.success is False
+        assert "not found" in result.message
+
+    @patch("openviking_cli.utils.ollama.check_ollama_running")
+    @patch("subprocess.Popen")
+    @patch("openviking_cli.utils.ollama.time.sleep")
+    def test_start_success(self, mock_sleep, mock_popen, mock_check):
+        # First call: not running (initial check), second call onward: running
+        mock_check.side_effect = [False, False, True]
+        mock_popen.return_value = MagicMock()
+
+        result = start_ollama()
+        assert result.success is True
+        assert result.message == "started"
+
+    @patch("openviking_cli.utils.ollama.check_ollama_running", return_value=False)
+    @patch("subprocess.Popen")
+    @patch("openviking_cli.utils.ollama.time.sleep")
+    def test_start_timeout(self, mock_sleep, mock_popen, mock_check):
+        mock_popen.return_value = MagicMock()
+
+        result = start_ollama()
+        assert result.success is False
+        assert "timeout" in result.message.lower()

From a105d6265246f6fa5b1b70ebcc58703a70ea3855 Mon Sep 17 00:00:00 2001
From: "zhengxiao.wu" <zhengxiao.wu@bytedance.com>
Date: Mon, 13 Apr 2026 16:41:53 +0800
Subject: [PATCH 3/7] refactor: move init and doctor from ov to
 openviking-server subcommands

These are server-side configuration commands (generate/validate ov.conf),
not client operations. Having them under `ov` (the client CLI) was
confusing. Now:

  openviking-server init     # setup wizard
  openviking-server doctor   # diagnostics
  ov <subcommand>            # client operations only
---
 openviking_cli/rust_cli.py         | 11 -----------
 openviking_cli/server_bootstrap.py | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/openviking_cli/rust_cli.py b/openviking_cli/rust_cli.py
index d940a4286..cb7c6552e 100644
--- a/openviking_cli/rust_cli.py
+++ b/openviking_cli/rust_cli.py
@@ -44,21 +44,10 @@ def main():
     极简入口点：查找 ov 二进制并执行
 
     按优先级查找：
-    0. Python-native 子命令（doctor）
     1. ./target/release/ov（开发环境）
     2. Wheel 自带：{package_dir}/openviking/bin/ov
     3. PATH 查找：系统全局安装的 ov
     """
-    # 0. Python-native subcommands (no Rust binary needed)
-    if len(sys.argv) > 1 and sys.argv[1] == "doctor":
-        from openviking_cli.doctor import main as doctor_main
-
-        sys.exit(doctor_main())
-
-    if len(sys.argv) > 1 and sys.argv[1] == "init":
-        from openviking_cli.setup_wizard import main as init_main
-
-        sys.exit(init_main())
     # 1. 检查开发环境（仅在直接运行脚本时有效）
     try:
         # __file__ is openviking_cli/rust_cli.py, so parent is openviking_cli directory
diff --git a/openviking_cli/server_bootstrap.py b/openviking_cli/server_bootstrap.py
index 2385145af..3fd3374f0 100644
--- a/openviking_cli/server_bootstrap.py
+++ b/openviking_cli/server_bootstrap.py
@@ -9,6 +9,9 @@
 The real bootstrap logic stays in ``openviking.server.bootstrap``; we just
 pre-parse ``--config`` and set the environment variable before that module
 is ever imported.
+
+Subcommands ``init`` and ``doctor`` are handled here directly (they don't
+need a running server).
 """
 
 import os
@@ -16,6 +19,17 @@
 
 
 def main():
+    # Intercept subcommands that don't need the server.
+    if len(sys.argv) > 1 and sys.argv[1] == "init":
+        from openviking_cli.setup_wizard import main as init_main
+
+        sys.exit(init_main())
+
+    if len(sys.argv) > 1 and sys.argv[1] == "doctor":
+        from openviking_cli.doctor import main as doctor_main
+
+        sys.exit(doctor_main())
+
     # Pre-parse --config from sys.argv before any openviking imports,
     # so the env var is visible when the config singleton first initialises.
     for i, arg in enumerate(sys.argv):

From 793cf953d6bb67215e67f186574c5e85c86a014a Mon Sep 17 00:00:00 2001
From: "zhengxiao.wu" <zhengxiao.wu@bytedance.com>
Date: Mon, 13 Apr 2026 16:43:53 +0800
Subject: [PATCH 4/7] restore rust_cli.py

---
 openviking_cli/rust_cli.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/openviking_cli/rust_cli.py b/openviking_cli/rust_cli.py
index cb7c6552e..35823f1c1 100644
--- a/openviking_cli/rust_cli.py
+++ b/openviking_cli/rust_cli.py
@@ -44,10 +44,16 @@ def main():
     极简入口点：查找 ov 二进制并执行
 
     按优先级查找：
+    0. Python-native 子命令（doctor）
     1. ./target/release/ov（开发环境）
     2. Wheel 自带：{package_dir}/openviking/bin/ov
     3. PATH 查找：系统全局安装的 ov
     """
+    # 0. Python-native subcommands (no Rust binary needed)
+    if len(sys.argv) > 1 and sys.argv[1] == "doctor":
+        from openviking_cli.doctor import main as doctor_main
+
+        sys.exit(doctor_main())
     # 1. 检查开发环境（仅在直接运行脚本时有效）
     try:
         # __file__ is openviking_cli/rust_cli.py, so parent is openviking_cli directory

From 65a45d2ba45699d97d3b07bf65ee101b66e11a9d Mon Sep 17 00:00:00 2001
From: "zhengxiao.wu" <zhengxiao.wu@bytedance.com>
Date: Mon, 13 Apr 2026 16:49:56 +0800
Subject: [PATCH 5/7] refactor: update command references to use
 'openviking-server doctor'

---
 openviking_cli/doctor.py       |  6 ++--
 openviking_cli/setup_wizard.py |  2 +-
 tests/cli/test_doctor.py       |  2 +-
 uv.lock                        | 64 ++++++++++++++++++++++++++++++----
 4 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/openviking_cli/doctor.py b/openviking_cli/doctor.py
index c3a642877..2d7f8dfbb 100644
--- a/openviking_cli/doctor.py
+++ b/openviking_cli/doctor.py
@@ -1,8 +1,8 @@
 # Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
 # SPDX-License-Identifier: AGPL-3.0
-"""ov doctor - validate OpenViking subsystems and report actionable diagnostics.
+"""openviking-server doctor - validate OpenViking subsystems and report actionable diagnostics.
 
-Unlike ``ov health`` (which pings a running server), ``ov doctor`` checks
+Unlike ``ov health`` (which pings a running server), ``openviking-server doctor`` checks
 local prerequisites without requiring a server: config file, Python version,
 native vector engine, AGFS, embedding provider, VLM provider, and disk space.
 """
@@ -325,5 +325,5 @@ def run_doctor() -> int:
 
 
 def main() -> int:
-    """Entry point for ``ov doctor``."""
+    """Entry point for ``openviking-server doctor``."""
     return run_doctor()
diff --git a/openviking_cli/setup_wizard.py b/openviking_cli/setup_wizard.py
index 854896280..e9b6b107a 100644
--- a/openviking_cli/setup_wizard.py
+++ b/openviking_cli/setup_wizard.py
@@ -585,7 +585,7 @@ def run_init() -> int:
     # Post-init tips
     print(f"  {_bold('Next steps:')}")
     print(f"    Start the server:  {_cyan('openviking-server')}")
-    print(f"    Validate setup:    {_cyan('ov doctor')}")
+    print(f"    Validate setup:    {_cyan('openviking-server doctor')}")
     print()
 
     return 0
diff --git a/tests/cli/test_doctor.py b/tests/cli/test_doctor.py
index 428975053..e0a60a2f5 100644
--- a/tests/cli/test_doctor.py
+++ b/tests/cli/test_doctor.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
 # SPDX-License-Identifier: AGPL-3.0
-"""Tests for ``ov doctor`` diagnostic checks."""
+"""Tests for ``openviking-server doctor`` diagnostic checks."""
 
 from __future__ import annotations
 
diff --git a/uv.lock b/uv.lock
index 3c8b05997..e79cc887c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1550,7 +1550,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" },
     { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" },
     { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" },
-    { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" },
     { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" },
     { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" },
     { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" },
@@ -1558,7 +1557,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" },
     { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" },
     { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/8b/1430a04657735a3f23116c2e0d5eb10220928846e4537a938a41b350bed6/greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2", size = 605046, upload-time = "2026-02-20T21:02:45.234Z" },
     { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" },
     { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" },
     { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" },
@@ -1567,7 +1565,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" },
     { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" },
     { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" },
     { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" },
     { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" },
     { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" },
@@ -1576,7 +1573,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" },
     { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" },
     { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" },
     { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" },
     { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" },
     { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" },
@@ -1585,7 +1581,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" },
     { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" },
     { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" },
     { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" },
     { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" },
     { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" },
@@ -1594,7 +1589,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" },
     { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" },
     { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" },
     { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" },
     { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" },
     { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" },
@@ -2707,6 +2701,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/be/2f/5108cb3ee4ba6501748c4908b908e55f42a5b66245b4cfe0c99326e1ef6e/marshmallow-3.26.2-py3-none-any.whl", hash = "sha256:013fa8a3c4c276c24d26d84ce934dc964e2aa794345a0f8c7e5a7191482c8a73", size = 50964, upload-time = "2025-12-22T06:53:51.801Z" },
 ]
 
+[[package]]
+name = "mcp"
+version = "1.27.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "httpx" },
+    { name = "httpx-sse" },
+    { name = "jsonschema" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "python-multipart" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "sse-starlette" },
+    { name = "starlette" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+    { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/eb/c0cfc62075dc6e1ec1c64d352ae09ac051d9334311ed226f1f425312848a/mcp-1.27.0.tar.gz", hash = "sha256:d3dc35a7eec0d458c1da4976a48f982097ddaab87e278c5511d5a4a56e852b83", size = 607509, upload-time = "2026-04-02T14:48:08.88Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" },
+]
+
 [[package]]
 name = "mdit-py-plugins"
 version = "0.5.0"
@@ -3528,6 +3547,7 @@ bot = [
     { name = "gradio" },
     { name = "html2text" },
     { name = "httpx", extra = ["socks"] },
+    { name = "mcp" },
     { name = "msgpack" },
     { name = "prompt-toolkit" },
     { name = "py-machineid" },
@@ -3560,6 +3580,7 @@ bot-full = [
     { name = "httpx", extra = ["socks"] },
     { name = "langfuse" },
     { name = "lark-oapi" },
+    { name = "mcp" },
     { name = "msgpack" },
     { name = "opencode-ai" },
     { name = "opensandbox" },
@@ -3696,6 +3717,7 @@ requires-dist = [
     { name = "litellm", specifier = ">=1.0.0,<1.83.1" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "markdownify", specifier = ">=0.11.0" },
+    { name = "mcp", marker = "extra == 'bot'", specifier = ">=1.0.0" },
     { name = "msgpack", marker = "extra == 'bot'", specifier = ">=1.0.8" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0.0" },
     { name = "myst-parser", marker = "extra == 'doc'", specifier = ">=2.0.0" },
@@ -4692,6 +4714,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyjwt"
+version = "2.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" },
+]
+
+[package.optional-dependencies]
+crypto = [
+    { name = "cryptography" },
+]
+
 [[package]]
 name = "pypdfium2"
 version = "5.6.0"
@@ -5807,6 +5846,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/46/2c/9664130905f03db57961b8980b05cab624afd114bf2be2576628a9f22da4/sqlalchemy-2.0.48-py3-none-any.whl", hash = "sha256:a66fe406437dd65cacd96a72689a3aaaecaebbcd62d81c5ac1c0fdbeac835096", size = 1940202, upload-time = "2026-03-02T15:52:43.285Z" },
 ]
 
+[[package]]
+name = "sse-starlette"
+version = "3.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "starlette" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/26/8c/f9290339ef6d79badbc010f067cd769d6601ec11a57d78569c683fb4dd87/sse_starlette-3.3.4.tar.gz", hash = "sha256:aaf92fc067af8a5427192895ac028e947b484ac01edbc3caf00e7e7137c7bef1", size = 32427, upload-time = "2026-03-29T09:00:23.307Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/7f/3de5402f39890ac5660b86bcf5c03f9d855dad5c4ed764866d7b592b46fd/sse_starlette-3.3.4-py3-none-any.whl", hash = "sha256:84bb06e58939a8b38d8341f1bc9792f06c2b53f48c608dd207582b664fc8f3c1", size = 14330, upload-time = "2026-03-29T09:00:21.846Z" },
+]
+
 [[package]]
 name = "starlette"
 version = "0.52.1"

From 54fb4f0b7f1fb965641cd4f6b55f2a94ff844d22 Mon Sep 17 00:00:00 2001
From: "zhengxiao.wu" <zhengxiao.wu@bytedance.com>
Date: Mon, 13 Apr 2026 17:03:05 +0800
Subject: [PATCH 6/7] fix: add existence check for example config in custom
 configuration wizard

---
 openviking_cli/setup_wizard.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openviking_cli/setup_wizard.py b/openviking_cli/setup_wizard.py
index e9b6b107a..f5f16dedd 100644
--- a/openviking_cli/setup_wizard.py
+++ b/openviking_cli/setup_wizard.py
@@ -503,7 +503,8 @@ def _wizard_cloud() -> dict[str, Any] | None:
 def _wizard_custom() -> dict[str, Any] | None:
     """Custom configuration - point user to example config."""
     example = Path(__file__).parent.parent / "examples" / "ov.conf.example"
-    print(f"\n  Example config: {_cyan(str(example))}")
+    if example.exists():
+        print(f"\n  Example config: {_cyan(str(example))}")
     print(f"  Config path:    {_cyan(str(_DEFAULT_CONFIG_PATH))}")
 
     editor = os.environ.get("EDITOR", os.environ.get("VISUAL", ""))

From d14e04961ee067ee211c0a3cdce66bbd8f126d19 Mon Sep 17 00:00:00 2001
From: "zhengxiao.wu" <zhengxiao.wu@bytedance.com>
Date: Tue, 14 Apr 2026 14:21:24 +0800
Subject: [PATCH 7/7] refactor: update references from 'ov init' to
 'openviking-server init' in setup wizard and tests

---
 openviking_cli/setup_wizard.py | 4 ++--
 openviking_cli/utils/ollama.py | 2 +-
 tests/cli/test_setup_wizard.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/openviking_cli/setup_wizard.py b/openviking_cli/setup_wizard.py
index f5f16dedd..e25281e10 100644
--- a/openviking_cli/setup_wizard.py
+++ b/openviking_cli/setup_wizard.py
@@ -1,4 +1,4 @@
-"""ov init - interactive setup wizard for OpenViking.
+"""openviking-server init - interactive setup wizard for OpenViking.
 
 Guides users through model selection and configuration, with a focus on
 local deployment via Ollama for macOS / Apple Silicon beginners.
@@ -593,7 +593,7 @@ def run_init() -> int:
 
 
 def main() -> int:
-    """Entry point for ``ov init``."""
+    """Entry point for ``openviking-server init``."""
     try:
         return run_init()
     except KeyboardInterrupt:
diff --git a/openviking_cli/utils/ollama.py b/openviking_cli/utils/ollama.py
index fca3f950f..11655e1ea 100644
--- a/openviking_cli/utils/ollama.py
+++ b/openviking_cli/utils/ollama.py
@@ -1,6 +1,6 @@
 """Shared Ollama utilities for OpenViking.
 
-Used by both the ``ov init`` setup wizard and the ``openviking-server``
+Used by both the ``openviking-server init`` setup wizard and the ``openviking-server``
 bootstrap to detect, start, and health-check a local Ollama instance.
 
 Design principle: **ensure running, never stop** — Ollama is a shared
diff --git a/tests/cli/test_setup_wizard.py b/tests/cli/test_setup_wizard.py
index 7422759c4..2a690639c 100644
--- a/tests/cli/test_setup_wizard.py
+++ b/tests/cli/test_setup_wizard.py
@@ -1,4 +1,4 @@
-"""Tests for the ov init setup wizard."""
+"""Tests for the openviking-server init setup wizard."""
 
 from __future__ import annotations