From d36daa290fc6ffbd12712c894cd65c69cd129d3b Mon Sep 17 00:00:00 2001 From: Harald Nezbeda Date: Tue, 17 Mar 2026 21:36:43 +0100 Subject: [PATCH] Add LLM server integration for connecting agents (claude, codex) to Ollama, vLLM, and other providers --- docs/configuration.md | 11 ++ docs/llm.md | 122 ++++++++++++++ mkdocs.yml | 1 + src/vibepod/commands/run.py | 19 +++ src/vibepod/core/agents.py | 8 + src/vibepod/core/config.py | 10 ++ tests/test_agents.py | 23 +++ tests/test_config.py | 25 +++ tests/test_run.py | 312 ++++++++++++++++++++++++++++++++++++ 9 files changed, 531 insertions(+) create mode 100644 docs/llm.md diff --git a/docs/configuration.md b/docs/configuration.md index 81285e3..2911d00 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -85,6 +85,13 @@ agents: volumes: [] init: [] +# Connect agents to a local or remote LLM server (Ollama, vLLM, etc.) +llm: + enabled: false + base_url: "" # Server endpoint URL + api_key: "" # Auth token (set to "ollama" for Ollama) + model: "" # Model name passed to the agent + logging: enabled: true image: vibepod/datasette:latest @@ -111,6 +118,10 @@ These variables override the corresponding config keys without editing any file: | `VP_NO_COLOR` | `no_color` | `VP_NO_COLOR=true` | | `VP_DATASETTE_PORT` | `logging.ui_port` | `VP_DATASETTE_PORT=9001` | | `VP_PROXY_ENABLED` | `proxy.enabled` | `VP_PROXY_ENABLED=false` | +| `VP_LLM_ENABLED` | `llm.enabled` | `VP_LLM_ENABLED=true` | +| `VP_LLM_BASE_URL` | `llm.base_url` | `VP_LLM_BASE_URL=http://localhost:11434` | +| `VP_LLM_API_KEY` | `llm.api_key` | `VP_LLM_API_KEY=ollama` | +| `VP_LLM_MODEL` | `llm.model` | `VP_LLM_MODEL=qwen3:14b` | | `VP_CONFIG_DIR` | *(config root)* | `VP_CONFIG_DIR=/custom/path` | ### Image overrides diff --git a/docs/llm.md b/docs/llm.md new file mode 100644 index 0000000..156200c --- /dev/null +++ b/docs/llm.md @@ -0,0 +1,122 @@ +# Using OSS Models via Ollama, vLLM, and other LLM servers + +VibePod can connect agents to external LLM servers that expose OpenAI- or Anthropic-compatible APIs. This lets you run agents like Claude Code and Codex against open-source models served by [Ollama](https://ollama.com), [vLLM](https://docs.vllm.ai), or any compatible endpoint. + +## Supported agents + +| Agent | Env vars injected | CLI flags appended | +|-------|------------------|--------------------| +| claude | `ANTHROPIC_BASE_URL`, `ANTHROPIC_API_KEY` | `--model ` | +| codex | `CODEX_OSS_BASE_URL` | `--oss -m ` | + +Other agents do not yet have LLM mapping and will not receive any LLM configuration. + +## Quick start with Ollama + +### 1. Start Ollama and pull a model + +```bash +ollama pull qwen3:14b +``` + +### 2. Configure VibePod + +Add the following to your global or project config: + +```yaml +# ~/.config/vibepod/config.yaml +llm: + enabled: true + base_url: "http://host.docker.internal:11434" + api_key: "ollama" + model: "qwen3:14b" +``` + +!!! note + Use `host.docker.internal` (not `localhost`) so the Docker container can reach Ollama on the host machine. + +### 3. Run an agent + +```bash +vp run claude +# Starts Claude Code with: +# ANTHROPIC_BASE_URL=http://host.docker.internal:11434 +# ANTHROPIC_API_KEY=ollama +# claude --model qwen3:14b + +vp run codex +# Starts Codex with: +# CODEX_OSS_BASE_URL=http://host.docker.internal:11434 +# codex --oss -m qwen3:14b +``` + +## Using environment variables + +You can also configure LLM settings at runtime without editing config files. + +**Claude Code with a remote Ollama server:** + +```bash +VP_LLM_ENABLED=true VP_LLM_MODEL=qwen3.5:9b VP_LLM_BASE_URL=https://ollama.example.com vp run claude +``` + +**Codex with a remote Ollama server (note the `/v1` suffix):** + +```bash +VP_LLM_ENABLED=true VP_LLM_MODEL=qwen3.5:9b VP_LLM_BASE_URL=https://ollama.example.com/v1 vp run codex +``` + +**Local Ollama with an API key:** + +```bash +VP_LLM_ENABLED=true VP_LLM_BASE_URL=http://host.docker.internal:11434 VP_LLM_API_KEY=ollama VP_LLM_MODEL=qwen3:14b vp run claude +``` + +!!! note + Claude Code uses the Anthropic-compatible endpoint (no `/v1` suffix), while Codex uses the OpenAI-compatible endpoint (with `/v1` suffix). Adjust `VP_LLM_BASE_URL` accordingly, or use per-agent overrides if you need both agents to work from the same config. + +See [Configuration > Environment variables](configuration.md#environment-variables) for the full list. + +## Using vLLM or other OpenAI-compatible servers + +Point `base_url` at any server that speaks the OpenAI or Anthropic API: + +```yaml +llm: + enabled: true + base_url: "http://my-vllm-server:8000/v1" + api_key: "my-api-key" + model: "meta-llama/Llama-3-8B-Instruct" +``` + +## Per-agent overrides + +If you need different LLM settings for a specific agent, use the per-agent `env` config. Per-agent env vars take precedence over the `llm` section: + +```yaml +llm: + enabled: true + base_url: "http://host.docker.internal:11434" + api_key: "ollama" + model: "qwen3:14b" + +agents: + claude: + env: + ANTHROPIC_BASE_URL: "http://different-server:11434" +``` + +## Disabling + +To turn off LLM injection without removing the config: + +```yaml +llm: + enabled: false +``` + +Or at runtime: + +```bash +VP_LLM_ENABLED=false vp run claude +``` diff --git a/mkdocs.yml b/mkdocs.yml index 513e7ad..591006d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -53,4 +53,5 @@ nav: - Development: development.md - Agents: agents/index.md - Configuration: configuration.md + - LLM Integration: llm.md - CLI Reference: cli-reference.md diff --git a/src/vibepod/commands/run.py b/src/vibepod/commands/run.py index a9b2eb8..7038bd9 100644 --- a/src/vibepod/commands/run.py +++ b/src/vibepod/commands/run.py @@ -265,6 +265,22 @@ def run( **_parse_env_pairs(env or []), } + llm_cfg = config.get("llm", {}) + llm_command_extra: list[str] = [] + if llm_cfg.get("enabled") and spec.llm_env_map: + llm_values = { + "base_url": str(llm_cfg.get("base_url", "")).strip(), + "api_key": str(llm_cfg.get("api_key", "")).strip(), + "model": str(llm_cfg.get("model", "")).strip(), + } + for key, env_var in spec.llm_env_map.items(): + value = llm_values.get(key, "") + if value: + merged_env.setdefault(env_var, value) + llm_model = llm_values["model"] + if llm_model and spec.llm_model_args: + llm_command_extra = [*spec.llm_model_args, llm_model] + image = effective_agent_image(selected_agent, config) try: @@ -304,6 +320,9 @@ def run( else: warning(f"IKWID mode not supported for agent '{selected_agent}', ignoring") + if llm_command_extra: + command = list(command or []) + llm_command_extra + config_dir = agent_config_dir(selected_agent) config_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/vibepod/core/agents.py b/src/vibepod/core/agents.py index 34cedaf..801feb2 100644 --- a/src/vibepod/core/agents.py +++ b/src/vibepod/core/agents.py @@ -22,6 +22,8 @@ class AgentSpec: platform: str | None = None run_as_host_user: bool = False ikwid_args: list[str] | None = None + llm_env_map: dict[str, str] | None = None + llm_model_args: list[str] | None = None AGENT_SPECS: dict[str, AgentSpec] = { @@ -34,6 +36,8 @@ class AgentSpec: "/claude", {"CLAUDE_CONFIG_DIR": "/claude"}, ikwid_args=["--dangerously-skip-permissions"], + llm_env_map={"base_url": "ANTHROPIC_BASE_URL", "api_key": "ANTHROPIC_API_KEY"}, + llm_model_args=["--model"], ), "gemini": AgentSpec( "gemini", @@ -91,6 +95,10 @@ class AgentSpec: "/config", {"HOME": "/config"}, ikwid_args=["--full-auto"], + llm_env_map={ + "base_url": "CODEX_OSS_BASE_URL", + }, + llm_model_args=["--oss", "-m"], ), } diff --git a/src/vibepod/core/config.py b/src/vibepod/core/config.py index 1f13d3d..31a5b9c 100644 --- a/src/vibepod/core/config.py +++ b/src/vibepod/core/config.py @@ -105,6 +105,12 @@ def _default_config() -> dict[str, Any]: "ca_dir": str(config_root / "proxy" / "mitmproxy"), "ca_path": str(config_root / "proxy" / "mitmproxy" / "mitmproxy-ca-cert.pem"), }, + "llm": { + "enabled": False, + "base_url": "", + "api_key": "", + "model": "", + }, "aliases": DEFAULT_ALIASES.copy(), } @@ -146,6 +152,10 @@ def _apply_env(config: dict[str, Any]) -> dict[str, Any]: "VP_NO_COLOR": ("no_color", lambda x: x.lower() == "true"), "VP_DATASETTE_PORT": ("logging.ui_port", int), "VP_PROXY_ENABLED": ("proxy.enabled", lambda x: x.lower() == "true"), + "VP_LLM_ENABLED": ("llm.enabled", lambda x: x.lower() == "true"), + "VP_LLM_BASE_URL": ("llm.base_url", str), + "VP_LLM_API_KEY": ("llm.api_key", str), + "VP_LLM_MODEL": ("llm.model", str), } for env_key, (config_path, converter) in mappings.items(): diff --git a/tests/test_agents.py b/tests/test_agents.py index e44b09a..cd25f66 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -77,3 +77,26 @@ def test_get_agent_shortcut_known_agent() -> None: assert get_agent_shortcut(agent) == expected_by_agent[agent] assert get_agent_shortcut(f" {agent.upper()} ") == expected_by_agent[agent] assert get_agent_shortcut("unknown") is None + + +def test_claude_spec_has_llm_env_map() -> None: + spec = get_agent_spec("claude") + assert spec.llm_env_map == { + "base_url": "ANTHROPIC_BASE_URL", + "api_key": "ANTHROPIC_API_KEY", + } + assert spec.llm_model_args == ["--model"] + + +def test_codex_spec_has_llm_env_map() -> None: + spec = get_agent_spec("codex") + assert spec.llm_env_map == { + "base_url": "CODEX_OSS_BASE_URL", + } + assert spec.llm_model_args == ["--oss", "-m"] + + +def test_agents_without_llm_env_map() -> None: + for agent in ("gemini", "opencode", "devstral", "auggie", "copilot"): + spec = get_agent_spec(agent) + assert spec.llm_env_map is None, f"{agent} should not have llm_env_map" diff --git a/tests/test_config.py b/tests/test_config.py index b1d2c05..6c765ac 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -138,3 +138,28 @@ def test_per_agent_auto_pull_override(monkeypatch, tmp_path: Path) -> None: assert config["agents"]["claude"]["auto_pull"] is True # Other agents should still have None (unset) assert config["agents"]["gemini"]["auto_pull"] is None + + +def test_default_config_includes_llm_section(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setenv("VP_CONFIG_DIR", str(tmp_path)) + config = get_config() + llm = config.get("llm") + assert isinstance(llm, dict) + assert llm["enabled"] is False + assert llm["base_url"] == "" + assert llm["api_key"] == "" + assert llm["model"] == "" + + +def test_llm_env_overrides(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setenv("VP_CONFIG_DIR", str(tmp_path)) + monkeypatch.setenv("VP_LLM_ENABLED", "true") + monkeypatch.setenv("VP_LLM_BASE_URL", "http://localhost:11434/v1") + monkeypatch.setenv("VP_LLM_API_KEY", "sk-test") + monkeypatch.setenv("VP_LLM_MODEL", "llama3") + config = get_config() + llm = config["llm"] + assert llm["enabled"] is True + assert llm["base_url"] == "http://localhost:11434/v1" + assert llm["api_key"] == "sk-test" + assert llm["model"] == "llama3" diff --git a/tests/test_run.py b/tests/test_run.py index 8c67452..62b9942 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -708,6 +708,318 @@ def run_agent(self, **kwargs) -> object: # type: ignore[no-untyped-def] assert captured["command"] == ["claude"] +def test_llm_enabled_injects_openai_env_vars(monkeypatch, tmp_path: Path) -> None: + """When llm.enabled=true, OPENAI_BASE_URL/API_KEY/MODEL are injected.""" + captured: dict = {} + + class _CapturingDockerManager: + def ensure_network(self, name: str) -> None: + pass + + def networks_with_running_containers(self) -> list[str]: + return [] + + def pull_image(self, image: str) -> None: + pass + + def ensure_proxy(self, **kwargs) -> None: # type: ignore[no-untyped-def] + pass + + def run_agent(self, **kwargs) -> object: # type: ignore[no-untyped-def] + captured.update(kwargs) + container = type( + "_Container", + (), + { + "name": "vibepod-claude-test", + "id": "abc123", + "status": "running", + "attrs": {"NetworkSettings": {"Networks": {}}}, + "reload": lambda self: None, + "labels": {}, + "logs": lambda self, **kw: b"", + }, + )() + return container + + cfg = _make_config() + cfg["llm"] = { + "enabled": True, + "base_url": "http://localhost:11434/v1", + "api_key": "sk-test-key", + "model": "llama3", + } + monkeypatch.setattr(run_cmd, "get_config", lambda: cfg) + monkeypatch.setattr(run_cmd, "DockerManager", _CapturingDockerManager) + + run_cmd.run(agent="claude", workspace=tmp_path, detach=True) + + env = captured["env"] + assert env["ANTHROPIC_BASE_URL"] == "http://localhost:11434/v1" + assert env["ANTHROPIC_API_KEY"] == "sk-test-key" + assert "ANTHROPIC_MODEL" not in env + assert captured["command"] == ["claude", "--model", "llama3"] + + +def test_llm_enabled_injects_openai_env_vars_for_codex(monkeypatch, tmp_path: Path) -> None: + """When llm.enabled=true, codex gets OPENAI_* env vars.""" + captured: dict = {} + + class _CapturingDockerManager: + def ensure_network(self, name: str) -> None: + pass + + def networks_with_running_containers(self) -> list[str]: + return [] + + def pull_image(self, image: str) -> None: + pass + + def ensure_proxy(self, **kwargs) -> None: # type: ignore[no-untyped-def] + pass + + def run_agent(self, **kwargs) -> object: # type: ignore[no-untyped-def] + captured.update(kwargs) + container = type( + "_Container", + (), + { + "name": "vibepod-codex-test", + "id": "abc123", + "status": "running", + "attrs": {"NetworkSettings": {"Networks": {}}}, + "reload": lambda self: None, + "labels": {}, + "logs": lambda self, **kw: b"", + }, + )() + return container + + cfg = _make_config() + cfg["agents"]["codex"] = {"env": {}, "init": []} + cfg["llm"] = { + "enabled": True, + "base_url": "http://localhost:11434/v1", + "api_key": "sk-test-key", + "model": "llama3", + } + monkeypatch.setattr(run_cmd, "get_config", lambda: cfg) + monkeypatch.setattr(run_cmd, "DockerManager", _CapturingDockerManager) + + run_cmd.run(agent="codex", workspace=tmp_path, detach=True) + + env = captured["env"] + assert env["CODEX_OSS_BASE_URL"] == "http://localhost:11434/v1" + assert "OPENAI_API_KEY" not in env + assert captured["command"] == ["codex", "--oss", "-m", "llama3"] + + +def test_llm_disabled_does_not_inject_env_vars(monkeypatch, tmp_path: Path) -> None: + """When llm.enabled=false, no LLM env vars are injected.""" + captured: dict = {} + + class _CapturingDockerManager: + def ensure_network(self, name: str) -> None: + pass + + def networks_with_running_containers(self) -> list[str]: + return [] + + def pull_image(self, image: str) -> None: + pass + + def ensure_proxy(self, **kwargs) -> None: # type: ignore[no-untyped-def] + pass + + def run_agent(self, **kwargs) -> object: # type: ignore[no-untyped-def] + captured.update(kwargs) + container = type( + "_Container", + (), + { + "name": "vibepod-claude-test", + "id": "abc123", + "status": "running", + "attrs": {"NetworkSettings": {"Networks": {}}}, + "reload": lambda self: None, + "labels": {}, + "logs": lambda self, **kw: b"", + }, + )() + return container + + cfg = _make_config() + cfg["llm"] = { + "enabled": False, + "base_url": "http://localhost:11434/v1", + "api_key": "sk-test-key", + "model": "llama3", + } + monkeypatch.setattr(run_cmd, "get_config", lambda: cfg) + monkeypatch.setattr(run_cmd, "DockerManager", _CapturingDockerManager) + + run_cmd.run(agent="claude", workspace=tmp_path, detach=True) + + env = captured["env"] + assert "ANTHROPIC_BASE_URL" not in env + assert "ANTHROPIC_API_KEY" not in env + assert "ANTHROPIC_MODEL" not in env + + +def test_llm_skipped_for_agent_without_mapping(monkeypatch, tmp_path: Path) -> None: + """Agents without llm_env_map get no LLM env vars even when enabled.""" + captured: dict = {} + + class _CapturingDockerManager: + def ensure_network(self, name: str) -> None: + pass + + def networks_with_running_containers(self) -> list[str]: + return [] + + def pull_image(self, image: str) -> None: + pass + + def ensure_proxy(self, **kwargs) -> None: # type: ignore[no-untyped-def] + pass + + def run_agent(self, **kwargs) -> object: # type: ignore[no-untyped-def] + captured.update(kwargs) + container = type( + "_Container", + (), + { + "name": "vibepod-gemini-test", + "id": "abc123", + "status": "running", + "attrs": {"NetworkSettings": {"Networks": {}}}, + "reload": lambda self: None, + "labels": {}, + "logs": lambda self, **kw: b"", + }, + )() + return container + + cfg = _make_config() + cfg["agents"]["gemini"] = {"env": {}, "init": []} + cfg["llm"] = { + "enabled": True, + "base_url": "http://localhost:11434/v1", + "api_key": "sk-test", + "model": "llama3", + } + monkeypatch.setattr(run_cmd, "get_config", lambda: cfg) + monkeypatch.setattr(run_cmd, "DockerManager", _CapturingDockerManager) + + run_cmd.run(agent="gemini", workspace=tmp_path, detach=True) + + env = captured["env"] + assert "ANTHROPIC_BASE_URL" not in env + assert "OPENAI_BASE_URL" not in env + + +def test_llm_empty_model_not_injected(monkeypatch, tmp_path: Path) -> None: + """When llm.model is empty, model env var is not set.""" + captured: dict = {} + + class _CapturingDockerManager: + def ensure_network(self, name: str) -> None: + pass + + def networks_with_running_containers(self) -> list[str]: + return [] + + def pull_image(self, image: str) -> None: + pass + + def ensure_proxy(self, **kwargs) -> None: # type: ignore[no-untyped-def] + pass + + def run_agent(self, **kwargs) -> object: # type: ignore[no-untyped-def] + captured.update(kwargs) + container = type( + "_Container", + (), + { + "name": "vibepod-claude-test", + "id": "abc123", + "status": "running", + "attrs": {"NetworkSettings": {"Networks": {}}}, + "reload": lambda self: None, + "labels": {}, + "logs": lambda self, **kw: b"", + }, + )() + return container + + cfg = _make_config() + cfg["llm"] = { + "enabled": True, + "base_url": "http://localhost:11434/v1", + "api_key": "sk-test", + "model": "", + } + monkeypatch.setattr(run_cmd, "get_config", lambda: cfg) + monkeypatch.setattr(run_cmd, "DockerManager", _CapturingDockerManager) + + run_cmd.run(agent="claude", workspace=tmp_path, detach=True) + + env = captured["env"] + assert env["ANTHROPIC_BASE_URL"] == "http://localhost:11434/v1" + assert captured["command"] == ["claude"] + + +def test_llm_per_agent_env_overrides_llm(monkeypatch, tmp_path: Path) -> None: + """Per-agent env settings take precedence over LLM injection.""" + captured: dict = {} + + class _CapturingDockerManager: + def ensure_network(self, name: str) -> None: + pass + + def networks_with_running_containers(self) -> list[str]: + return [] + + def pull_image(self, image: str) -> None: + pass + + def ensure_proxy(self, **kwargs) -> None: # type: ignore[no-untyped-def] + pass + + def run_agent(self, **kwargs) -> object: # type: ignore[no-untyped-def] + captured.update(kwargs) + container = type( + "_Container", + (), + { + "name": "vibepod-claude-test", + "id": "abc123", + "status": "running", + "attrs": {"NetworkSettings": {"Networks": {}}}, + "reload": lambda self: None, + "labels": {}, + "logs": lambda self, **kw: b"", + }, + )() + return container + + cfg = _make_config() + cfg["agents"]["claude"]["env"] = {"ANTHROPIC_BASE_URL": "http://custom:9999/v1"} + cfg["llm"] = { + "enabled": True, + "base_url": "http://localhost:11434/v1", + "api_key": "sk-test", + "model": "llama3", + } + monkeypatch.setattr(run_cmd, "get_config", lambda: cfg) + monkeypatch.setattr(run_cmd, "DockerManager", _CapturingDockerManager) + + run_cmd.run(agent="claude", workspace=tmp_path, detach=True) + + env = captured["env"] + assert env["ANTHROPIC_BASE_URL"] == "http://custom:9999/v1" + + def test_run_accepts_short_agent_name(monkeypatch, tmp_path: Path) -> None: class _UnavailableDockerManager: def __init__(self) -> None: