Skip to content

Commit b6b7e97

Browse files
feat: add OpenRouter as LLM and embedding provider (#56)
* feat: add OpenRouter as LLM and embedding provider Add OpenRouter as a first-class provider, enabling access to 200+ models (Claude, GPT, Gemini, Llama, Qwen, etc.) through a single API key. LLM provider: - New OpenRouterProvider using OpenAI-compatible endpoint - Supports generate() and stream_chat() (ChatProvider protocol) - Sets recommended HTTP-Referer and X-Title headers - Default model: anthropic/claude-sonnet-4.6 - Rate limits: 60 RPM / 200K TPM - Cost tracking intentionally disabled (OpenRouter proxies models with varying prices — users should check the OpenRouter dashboard) Embedding provider: - New OpenRouterEmbedder for vector search and chat - Default model: google/gemini-embedding-001 (768 dims) - One OPENROUTER_API_KEY covers both LLM and embeddings Integration: - Registered in LLM and embedding registries (lazy import) - CLI auto-detection from OPENROUTER_API_KEY env var - Interactive provider selection in `repowise init` - Embedder selection in `repowise serve` - Server provider catalog for web UI - No new pip dependency (uses existing openai package) Tests: - 13 unit tests (construction, generation, error mapping, headers) - Registry test updated (builtin count 6 → 7) - Integration test (skipped without OPENROUTER_API_KEY) * fix: remove dead cost_tracker code from OpenRouterProvider Remove the cost_tracker parameter and unreachable if-block from generate(). OpenRouter proxies 200+ models with varying prices, so cost tracking is documented as unsupported — users should check the OpenRouter dashboard instead. * test: add stream_chat and OpenRouterEmbedder tests - stream_chat: text deltas, tool calls, rate limit error (3 tests) - OpenRouterEmbedder: construction, dimensions, embedding, base URL (12 tests) * fix: address OpenRouter PR review comments - Embedder: raise ValueError in __init__ for unknown models instead of silently falling back to 768 dims, which would mis-size stored vectors against the model's real output and corrupt the vector store. - Provider: drop **_kwargs catchall and accept cost_tracker explicitly so unknown kwargs from future registry changes fail loudly instead of vanishing. - Provider: switch max_completion_tokens → max_tokens in generate() and stream_chat(). Per OpenRouter API docs, max_tokens is the universal parameter across the 200+ proxied models; max_completion_tokens is an OpenAI-specific newer name not all proxied models accept. --------- Co-authored-by: Raghav Chamadiya <65403859+RaghavChamadiya@users.noreply.github.com>
1 parent 88ea2a7 commit b6b7e97

17 files changed

Lines changed: 945 additions & 20 deletions

File tree

packages/cli/src/repowise/cli/commands/init_cmd.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ def _resolve_embedder(embedder_flag: str | None) -> str:
116116
return "gemini"
117117
if os.environ.get("OPENAI_API_KEY"):
118118
return "openai"
119+
if os.environ.get("OPENROUTER_API_KEY"):
120+
return "openrouter"
119121
return "mock"
120122

121123

packages/cli/src/repowise/cli/commands/serve_cmd.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def _setup_embedder() -> None:
4141
# Detect which providers already have keys in the environment.
4242
has_gemini = bool(os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY"))
4343
has_openai = bool(os.environ.get("OPENAI_API_KEY"))
44+
has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY"))
4445

4546
console.print(
4647
"\n[bold]Chat & search require an embedder.[/bold] "
@@ -51,18 +52,24 @@ def _setup_embedder() -> None:
5152
labels = []
5253
if has_gemini:
5354
options.append("gemini")
54-
labels.append("[1] gemini [green]✓ key set[/green]")
55+
labels.append("[1] gemini [green]✓ key set[/green]")
5556
else:
5657
options.append("gemini")
57-
labels.append("[1] gemini [dim]needs GEMINI_API_KEY / GOOGLE_API_KEY[/dim]")
58+
labels.append("[1] gemini [dim]needs GEMINI_API_KEY / GOOGLE_API_KEY[/dim]")
5859
if has_openai:
5960
options.append("openai")
60-
labels.append("[2] openai [green]✓ key set[/green]")
61+
labels.append("[2] openai [green]✓ key set[/green]")
6162
else:
6263
options.append("openai")
63-
labels.append("[2] openai [dim]needs OPENAI_API_KEY[/dim]")
64+
labels.append("[2] openai [dim]needs OPENAI_API_KEY[/dim]")
65+
if has_openrouter:
66+
options.append("openrouter")
67+
labels.append("[3] openrouter [green]✓ key set[/green]")
68+
else:
69+
options.append("openrouter")
70+
labels.append("[3] openrouter [dim]needs OPENROUTER_API_KEY[/dim]")
6471
options.append("skip")
65-
labels.append("[3] skip [dim]no chat/search[/dim]")
72+
labels.append(f"[{len(options)}] skip [dim]no chat/search[/dim]")
6673

6774
for label in labels:
6875
console.print(f" {label}")
@@ -106,6 +113,11 @@ def _get_or_prompt_api_key(embedder: str) -> str:
106113
if key:
107114
return key
108115
return click.prompt(" OPENAI_API_KEY", default="", show_default=False).strip()
116+
if embedder == "openrouter":
117+
key = os.environ.get("OPENROUTER_API_KEY", "")
118+
if key:
119+
return key
120+
return click.prompt(" OPENROUTER_API_KEY", default="", show_default=False).strip()
109121
return ""
110122

111123

@@ -116,6 +128,8 @@ def _set_api_key_env(embedder: str, key: str) -> None:
116128
os.environ.setdefault("GEMINI_API_KEY", key)
117129
elif embedder == "openai":
118130
os.environ.setdefault("OPENAI_API_KEY", key)
131+
elif embedder == "openrouter":
132+
os.environ.setdefault("OPENROUTER_API_KEY", key)
119133

120134

121135
def _save_global_embedder(embedder: str, api_key: str) -> None:

packages/cli/src/repowise/cli/helpers.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@ def resolve_provider(
260260
os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
261261
):
262262
kwargs["api_key"] = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
263+
elif provider_name == "openrouter" and os.environ.get("OPENROUTER_API_KEY"):
264+
kwargs["api_key"] = os.environ["OPENROUTER_API_KEY"]
263265
elif provider_name == "ollama" and os.environ.get("OLLAMA_BASE_URL"):
264266
kwargs["base_url"] = os.environ["OLLAMA_BASE_URL"]
265267

@@ -280,6 +282,13 @@ def resolve_provider(
280282
else {"api_key": os.environ["OPENAI_API_KEY"]}
281283
)
282284
return get_provider("openai", **kwargs)
285+
if os.environ.get("OPENROUTER_API_KEY") and os.environ["OPENROUTER_API_KEY"].strip():
286+
kwargs = (
287+
{"model": model, "api_key": os.environ["OPENROUTER_API_KEY"]}
288+
if model
289+
else {"api_key": os.environ["OPENROUTER_API_KEY"]}
290+
)
291+
return get_provider("openrouter", **kwargs)
283292
if os.environ.get("OLLAMA_BASE_URL") and os.environ["OLLAMA_BASE_URL"].strip():
284293
kwargs = (
285294
{"model": model, "base_url": os.environ["OLLAMA_BASE_URL"]}
@@ -296,7 +305,7 @@ def resolve_provider(
296305

297306
raise click.ClickException(
298307
"No provider configured. Use --provider, set REPOWISE_PROVIDER, "
299-
"or set ANTHROPIC_API_KEY / OPENAI_API_KEY / OLLAMA_BASE_URL / GEMINI_API_KEY / GOOGLE_API_KEY."
308+
"or set ANTHROPIC_API_KEY / OPENAI_API_KEY / OPENROUTER_API_KEY / OLLAMA_BASE_URL / GEMINI_API_KEY / GOOGLE_API_KEY."
300309
)
301310

302311

@@ -330,6 +339,7 @@ def _is_env_var_exists(var_name: str) -> bool:
330339
provider_env_vars = {
331340
"anthropic": ["ANTHROPIC_API_KEY"],
332341
"openai": ["OPENAI_API_KEY"],
342+
"openrouter": ["OPENROUTER_API_KEY"],
333343
"gemini": ["GEMINI_API_KEY", "GOOGLE_API_KEY"], # Either one
334344
"ollama": ["OLLAMA_BASE_URL"],
335345
"litellm": ["LITELLM_API_KEY"], # May need others depending on backend

packages/cli/src/repowise/cli/ui.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ def print_phase_header(
265265
"openai": "gpt-4.1",
266266
"anthropic": "claude-sonnet-4-6",
267267
"ollama": "llama3.2",
268+
"openrouter": "anthropic/claude-sonnet-4.6",
268269
"litellm": "groq/llama-3.1-70b-versatile",
269270
}
270271

@@ -273,13 +274,15 @@ def print_phase_header(
273274
"openai": "OPENAI_API_KEY",
274275
"anthropic": "ANTHROPIC_API_KEY",
275276
"ollama": "OLLAMA_BASE_URL",
277+
"openrouter": "OPENROUTER_API_KEY",
276278
}
277279

278280
_PROVIDER_SIGNUP: dict[str, str] = {
279281
"gemini": "https://aistudio.google.com/apikey",
280282
"openai": "https://platform.openai.com/api-keys",
281283
"anthropic": "https://console.anthropic.com/settings/keys",
282284
"ollama": "https://ollama.com/download",
285+
"openrouter": "https://openrouter.ai/keys",
283286
}
284287

285288

packages/core/src/repowise/core/providers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""repowise provider package.
22
33
Sub-packages:
4-
llm/ — LLM providers (Anthropic, OpenAI, Gemini, Ollama, LiteLLM)
4+
llm/ — LLM providers (Anthropic, OpenAI, OpenRouter, Gemini, Ollama, LiteLLM)
55
embedding/ — Embedding providers (OpenAI, Gemini, Mock)
66
77
Preferred entry points:
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""OpenRouter embedding support for repowise semantic search.
2+
3+
Uses the OpenAI-compatible endpoint at ``https://openrouter.ai/api/v1``.
4+
No additional pip install required — uses the ``openai`` package.
5+
6+
Default model: google/gemini-embedding-001 (768 dims)
7+
8+
Usage:
9+
from repowise.core.providers.embedding.openrouter import OpenRouterEmbedder
10+
11+
embedder = OpenRouterEmbedder(api_key="sk-or-...")
12+
vectors = await embedder.embed(["some text"])
13+
"""
14+
15+
from __future__ import annotations
16+
17+
import asyncio
18+
import math
19+
import os
20+
21+
22+
class OpenRouterEmbedder:
23+
"""OpenRouter embedding adapter implementing the repowise Embedder protocol.
24+
25+
Args:
26+
api_key: OpenRouter API key. Falls back to OPENROUTER_API_KEY env var.
27+
model: Embedding model name. Default: "google/gemini-embedding-001".
28+
"""
29+
30+
_DIMS: dict[str, int] = {
31+
"google/gemini-embedding-001": 768,
32+
"openai/text-embedding-3-small": 1536,
33+
"openai/text-embedding-3-large": 3072,
34+
}
35+
36+
_DEFAULT_TIMEOUT: float = 10.0
37+
38+
def __init__(
39+
self,
40+
api_key: str | None = None,
41+
model: str = "google/gemini-embedding-001",
42+
timeout: float = _DEFAULT_TIMEOUT,
43+
) -> None:
44+
self._api_key = api_key or os.environ.get("OPENROUTER_API_KEY")
45+
if not self._api_key:
46+
raise ValueError(
47+
"OpenRouter API key required. Pass api_key= or set OPENROUTER_API_KEY env var."
48+
)
49+
if model not in self._DIMS:
50+
known = ", ".join(sorted(self._DIMS))
51+
raise ValueError(
52+
f"Unknown embedding model {model!r}. Stored vectors would be mis-sized "
53+
f"against the model's real output, silently corrupting the vector store. "
54+
f"Add {model!r} to OpenRouterEmbedder._DIMS with its correct dimension count, "
55+
f"or pick a known model: {known}."
56+
)
57+
self._model = model
58+
self._timeout = timeout
59+
self._client: object | None = None
60+
61+
@property
62+
def dimensions(self) -> int:
63+
return self._DIMS[self._model]
64+
65+
async def embed(self, texts: list[str]) -> list[list[float]]:
66+
"""Embed a batch of texts using OpenRouter.
67+
68+
Runs the synchronous SDK call in a thread pool to avoid blocking the
69+
asyncio event loop.
70+
"""
71+
if not texts:
72+
return []
73+
74+
model = self._model
75+
timeout = self._timeout
76+
77+
def _embed_sync() -> list[list[float]]:
78+
import openai
79+
80+
if self._client is None:
81+
self._client = openai.OpenAI(
82+
api_key=self._api_key,
83+
base_url="https://openrouter.ai/api/v1",
84+
timeout=timeout,
85+
)
86+
response = self._client.embeddings.create(model=model, input=texts) # type: ignore[union-attr]
87+
raw_vectors = [list(item.embedding) for item in response.data]
88+
return [_l2_normalize(v) for v in raw_vectors]
89+
90+
return await asyncio.to_thread(_embed_sync)
91+
92+
93+
def _l2_normalize(vec: list[float]) -> list[float]:
94+
"""L2-normalize a vector to unit length."""
95+
norm = math.sqrt(sum(x * x for x in vec))
96+
if norm == 0.0:
97+
norm = 1.0
98+
return [x / norm for x in vec]

packages/core/src/repowise/core/providers/embedding/registry.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424

2525
_BUILTIN_EMBEDDERS: dict[str, tuple[str, str]] = {
2626
"openai": ("repowise.core.providers.embedding.openai", "OpenAIEmbedder"),
27-
"gemini": ("repowise.core.providers.embedding.gemini", "GeminiEmbedder"),
28-
"mock": ("repowise.core.providers.embedding.base", "MockEmbedder"),
27+
"gemini": ("repowise.core.providers.embedding.gemini", "GeminiEmbedder"),
28+
"openrouter": ("repowise.core.providers.embedding.openrouter", "OpenRouterEmbedder"),
29+
"mock": ("repowise.core.providers.embedding.base", "MockEmbedder"),
2930
}
3031

3132
_custom_embedders: dict[str, Callable[..., Embedder]] = {}
@@ -80,6 +81,7 @@ def get_embedder(name: str, **kwargs: Any) -> Embedder:
8081
_missing = {
8182
"openai": "openai",
8283
"gemini": "google-genai",
84+
"openrouter": "openai", # openrouter uses the openai package
8385
}
8486
try:
8587
module = importlib.import_module(module_path)

packages/core/src/repowise/core/providers/llm/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
anthropic — claude-opus-4-6, claude-sonnet-4-6, claude-haiku-4-5
1313
openai — gpt-5.4-nano, gpt-5.4-mini, gpt-5.4
1414
gemini — gemini-3.1-flash-lite-preview, gemini-3-flash-preview, gemini-3.1-pro-preview
15+
openrouter — 200+ models via OpenRouter (anthropic/claude-sonnet-4.6, etc.)
1516
ollama — local inference (llama3.2, codellama, etc.)
1617
litellm — 100+ providers via LiteLLM proxy
1718
mock — deterministic test provider

0 commit comments

Comments
 (0)