Skip to content

Commit 5f26498

Browse files
Merge pull request #39 from GoodbyePlanet/feat/jina-api-provider
Feat/jina api provider
2 parents 011e19e + 17f2acd commit 5f26498

9 files changed

Lines changed: 464 additions & 11 deletions

File tree

.env.example

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
1-
# Embeddings provider — one of: jina | voyage | openai | ollama
1+
# Embeddings provider — one of: jina | jina-api | voyage | openai | ollama
22
EMBEDDINGS_PROVIDER=jina
33

44
# Jina Code V2 via HuggingFace TEI (default)
55
JINA_URL=http://localhost:8087
66
JINA_MODEL=jinaai/jina-embeddings-v2-base-code
77
JINA_DIMENSIONS=768
88

9+
# Jina hosted API (api.jina.ai)
10+
# JINA_API_KEY=
11+
# JINA_API_MODEL=jina-embeddings-v2-base-code
12+
# JINA_API_DIMENSIONS=
13+
914
# Voyage AI — set EMBEDDINGS_PROVIDER=voyage to use
1015
# VOYAGE_API_KEY=
1116
# VOYAGE_MODEL=voyage-code-3

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,6 @@ blog.md
2727
config.yaml
2828

2929
# Claude Code local settings
30-
.claude/settings.local.json
30+
.claude/settings.local.json
31+
32+
memory/

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
QDRANT_URL := http://localhost:6333
22
SEMCODE_URL := http://localhost:8090
33

4-
.PHONY: qdrant-clean qdrant-dashboard index-code index-history \
4+
.PHONY: qdrant-clean qdrant-dashboard index-code index-history docker-build \
55
docker-build-restart docker-build-restart-jina docker-up docker-up-jina docker-logs docker-logs-semcode
66

77
qdrant-clean:
@@ -22,6 +22,9 @@ index-history:
2222
-H "Content-Type: application/json" \
2323
--no-buffer
2424

25+
docker-build:
26+
docker compose build
27+
2528
docker-build-restart:
2629
docker compose down && docker compose up --build -d
2730

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ For `/reindex-history` the `phase` value is `discovery|embedding|upserting` and
228228
| `QDRANT_URL` | `http://localhost:6333` | Qdrant connection URL |
229229
| `QDRANT_COLLECTION` | `code_symbols` | Collection name for code symbol vectors |
230230
| `QDRANT_COMMITS_COLLECTION` | `git_commits` | Collection name for commit message vectors |
231-
| `EMBEDDINGS_PROVIDER` | `jina` | One of `jina`, `voyage`, `openai`, `ollama` — see *Embedding providers* below |
231+
| `EMBEDDINGS_PROVIDER` | `jina` | One of `jina`, `jina-api`, `voyage`, `openai`, `ollama` — see *Embedding providers* below |
232232
| `GIT_HISTORY_MAX_COMMITS` | `500` | Max commits indexed per service |
233233
| `MCP_TRANSPORT` | `streamable-http` | One of `streamable-http`, `sse`, `stdio` |
234234
| `MCP_HOST` / `MCP_PORT` | `127.0.0.1` / `8090` | Server bind address |
@@ -245,6 +245,9 @@ configured model — no need to set dimensions manually unless you want to overr
245245
| `JINA_URL` | `http://localhost:8087` | `jina` | TEI base URL |
246246
| `JINA_MODEL` | `jinaai/jina-embeddings-v2-base-code` | `jina` | Informational only — the TEI container's `--model-id` flag is what actually loads. Edit `docker-compose.yaml` to change models. |
247247
| `JINA_DIMENSIONS` | `768` | `jina` | Vector dimensions of the TEI model |
248+
| `JINA_API_KEY` | *(required if provider=jina-api)* | `jina-api` | Jina AI API key (hosted endpoint at `api.jina.ai`) |
249+
| `JINA_API_MODEL` | `jina-embeddings-v2-base-code` | `jina-api` | Hosted Jina model — also supports `jina-code-embeddings-0.5b`, `jina-code-embeddings-1.5b` |
250+
| `JINA_API_DIMENSIONS` | *(native)* | `jina-api` | Optional Matryoshka override (code-embeddings models support shrinking); required for models without a native default |
248251
| `VOYAGE_API_KEY` | *(required if provider=voyage)* | `voyage` | Voyage AI API key |
249252
| `VOYAGE_MODEL` | `voyage-code-3` | `voyage` | Voyage embedding model |
250253
| `VOYAGE_DIMENSIONS` | *(native)* | `voyage` | Optional override — Voyage code-3 supports `256` / `512` / `1024` / `2048` |

server/config.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __init__(
2323
self.exclude = exclude
2424

2525

26-
EmbeddingsProviderName = Literal["jina", "voyage", "openai", "ollama"]
26+
EmbeddingsProviderName = Literal["jina", "jina-api", "voyage", "openai", "ollama"]
2727

2828

2929
class Settings(BaseSettings):
@@ -40,6 +40,13 @@ class Settings(BaseSettings):
4040
)
4141
jina_dimensions: int = Field(default=768, alias="JINA_DIMENSIONS")
4242

43+
# Jina AI (hosted API at api.jina.ai)
44+
jina_api_key: str = Field(default="", alias="JINA_API_KEY")
45+
jina_api_model: str = Field(
46+
default="jina-embeddings-v2-base-code", alias="JINA_API_MODEL"
47+
)
48+
jina_api_dimensions: int | None = Field(default=None, alias="JINA_API_DIMENSIONS")
49+
4350
# Voyage AI
4451
voyage_api_key: str = Field(default="", alias="VOYAGE_API_KEY")
4552
voyage_model: str = Field(default="voyage-code-3", alias="VOYAGE_MODEL")

server/embeddings/factory.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ def get_embedding_provider() -> EmbeddingProvider:
1616
from server.embeddings.jina import JinaEmbeddingProvider
1717

1818
_provider = JinaEmbeddingProvider()
19+
elif name == "jina-api":
20+
from server.embeddings.jina_api import JinaApiEmbeddingProvider
21+
22+
_provider = JinaApiEmbeddingProvider()
1923
elif name == "voyage":
2024
from server.embeddings.voyage import VoyageEmbeddingProvider
2125

@@ -31,7 +35,7 @@ def get_embedding_provider() -> EmbeddingProvider:
3135
else:
3236
raise ValueError(
3337
f"Unknown EMBEDDINGS_PROVIDER {name!r}. "
34-
"Expected one of: jina, voyage, openai, ollama."
38+
"Expected one of: jina, jina-api, voyage, openai, ollama."
3539
)
3640
return _provider
3741

server/embeddings/jina_api.py

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
from __future__ import annotations
2+
3+
import asyncio
4+
import logging
5+
6+
import httpx
7+
8+
from server.config import settings
9+
from server.embeddings.base import EmbeddingProvider
10+
11+
logger = logging.getLogger(__name__)
12+
13+
_API_URL = "https://api.jina.ai/v1/embeddings"
14+
# Jina's hosted API accepts up to 2048 inputs per request; 128 keeps us
15+
# uniform with the OpenAI/Voyage providers.
16+
_BATCH_SIZE = 128
17+
_BACKOFF_DELAYS = [10, 20, 30, 40]
18+
# Conservative character cap (~8 k tokens at ~4 chars/token) to avoid
19+
# "Failed to encode text" 400s on models with limited context windows.
20+
_MAX_TEXT_CHARS = 32_000
21+
22+
# Native output dimensions for known models. The jina-code-embeddings family
23+
# supports Matryoshka truncation via the `dimensions` API parameter —
24+
# override with JINA_API_DIMENSIONS to one of the supported sizes:
25+
# - jina-code-embeddings-0.5b: 64, 128, 256, 512, 896 (native)
26+
# https://jina.ai/models/jina-code-embeddings-0.5b
27+
# - jina-code-embeddings-1.5b: 128, 256, 512, 1024, 1536 (native)
28+
# https://jina.ai/models/jina-code-embeddings-1.5b
29+
# jina-embeddings-v2-base-code is fixed-size and does not support truncation.
30+
# https://jina.ai/models/jina-embeddings-v2-base-code
31+
_NATIVE_DIMENSIONS: dict[str, int] = {
32+
"jina-embeddings-v2-base-code": 768,
33+
"jina-code-embeddings-0.5b": 896,
34+
"jina-code-embeddings-1.5b": 1536,
35+
}
36+
37+
# Models that accept the `task` parameter (asymmetric retrieval). The v2 model
38+
# is single-mode and rejects `task`, so we omit it.
39+
_TASK_AWARE_PREFIXES = ("jina-code-embeddings-",)
40+
41+
# jina-code-embeddings models use a different task vocabulary than the generic
42+
# "retrieval.*" tasks accepted by other Jina models.
43+
_JINA_CODE_TASK_MAP = {
44+
"retrieval.passage": "nl2code.passage",
45+
"retrieval.query": "nl2code.query",
46+
}
47+
48+
49+
class JinaApiEmbeddingProvider(EmbeddingProvider):
50+
"""Jina AI hosted embeddings — see https://jina.ai/embeddings/."""
51+
52+
def __init__(self) -> None:
53+
if not settings.jina_api_key:
54+
raise RuntimeError(
55+
"JINA_API_KEY is not set but EMBEDDINGS_PROVIDER=jina-api."
56+
)
57+
self._api_key = settings.jina_api_key
58+
self._model = settings.jina_api_model
59+
self._dims_override = settings.jina_api_dimensions
60+
if self._dims_override is not None:
61+
self._dims = self._dims_override
62+
elif self._model in _NATIVE_DIMENSIONS:
63+
self._dims = _NATIVE_DIMENSIONS[self._model]
64+
else:
65+
raise RuntimeError(
66+
f"Unknown Jina model {self._model!r} — set JINA_API_DIMENSIONS "
67+
"to declare the output size, or use a known model "
68+
f"({', '.join(sorted(_NATIVE_DIMENSIONS))})."
69+
)
70+
self._supports_task = self._model.startswith(_TASK_AWARE_PREFIXES)
71+
self._uses_code_tasks = self._model.startswith("jina-code-embeddings-")
72+
self._client = httpx.AsyncClient(
73+
timeout=120.0,
74+
headers={
75+
"Authorization": f"Bearer {self._api_key}",
76+
"Content-Type": "application/json",
77+
},
78+
)
79+
80+
@property
81+
def dimensions(self) -> int:
82+
return self._dims
83+
84+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
85+
return await self._embed(texts, task="retrieval.passage")
86+
87+
async def embed_query(self, text: str) -> list[float]:
88+
vectors = await self._embed([text], task="retrieval.query")
89+
return vectors[0] if vectors else []
90+
91+
def _sanitize(self, text: str) -> str:
92+
# Encode to UTF-8 replacing lone surrogates and other unencodable
93+
# code points, then decode back — this removes anything that would
94+
# cause Jina's tokenizer to return 400 "Failed to encode text".
95+
cleaned = text.encode("utf-8", errors="replace").decode("utf-8")
96+
cleaned = "".join(ch for ch in cleaned if ch >= " " or ch in "\t\n\r")
97+
return cleaned[:_MAX_TEXT_CHARS].strip() or "."
98+
99+
def _make_body(self, inputs: list[str], task: str) -> dict:
100+
body: dict = {"model": self._model, "input": inputs}
101+
if self._supports_task:
102+
body["task"] = (
103+
_JINA_CODE_TASK_MAP.get(task, task) if self._uses_code_tasks else task
104+
)
105+
if self._dims_override is not None:
106+
body["dimensions"] = self._dims_override
107+
return body
108+
109+
async def _post_with_retry(self, body: dict) -> dict:
110+
for attempt in range(4):
111+
resp = await self._client.post(_API_URL, json=body)
112+
if resp.status_code != 429:
113+
break
114+
retry_after = float(resp.headers.get("Retry-After", 0))
115+
wait = retry_after if retry_after > 0 else _BACKOFF_DELAYS[attempt]
116+
logger.warning(
117+
"Jina rate-limited (429) — retrying in %.0fs (attempt %d/4)",
118+
wait,
119+
attempt + 1,
120+
)
121+
await asyncio.sleep(wait)
122+
if resp.status_code >= 400:
123+
logger.error("Jina API error %d: %s", resp.status_code, resp.text[:500])
124+
resp.raise_for_status()
125+
return resp.json()
126+
127+
async def _embed_batch_with_fallback(
128+
self, batch: list[str], task: str
129+
) -> list[list[float]]:
130+
"""Embed one item at a time, halving on failure, substituting '.' only as last resort."""
131+
vectors: list[list[float]] = []
132+
for idx, text in enumerate(batch):
133+
candidate = text
134+
embedded = False
135+
while candidate:
136+
try:
137+
data = await self._post_with_retry(
138+
self._make_body([candidate], task)
139+
)
140+
vectors.append(data["data"][0]["embedding"])
141+
if len(candidate) < len(text):
142+
logger.info(
143+
"Encoded truncated text at batch index %d (%d → %d chars)",
144+
idx,
145+
len(text),
146+
len(candidate),
147+
)
148+
embedded = True
149+
break
150+
except Exception:
151+
half = len(candidate) // 2
152+
if half < 64:
153+
break
154+
logger.warning(
155+
"Text at batch index %d (len=%d) failed — retrying with first %d chars",
156+
idx,
157+
len(candidate),
158+
half,
159+
)
160+
candidate = candidate[:half]
161+
if not embedded:
162+
logger.warning(
163+
"Skipping unencodable text at batch index %d (original len=%d), using placeholder.",
164+
idx,
165+
len(text),
166+
)
167+
data = await self._post_with_retry(self._make_body(["."], task))
168+
vectors.append(data["data"][0]["embedding"])
169+
return vectors
170+
171+
async def _embed(self, texts: list[str], task: str) -> list[list[float]]:
172+
if not texts:
173+
return []
174+
sanitized = [self._sanitize(t) for t in texts]
175+
all_vectors: list[list[float]] = []
176+
for i in range(0, len(sanitized), _BATCH_SIZE):
177+
batch = sanitized[i : i + _BATCH_SIZE]
178+
try:
179+
data = await self._post_with_retry(self._make_body(batch, task))
180+
except Exception as exc:
181+
if "400" in str(exc):
182+
logger.warning(
183+
"Batch of %d failed with 400 — retrying one-by-one", len(batch)
184+
)
185+
all_vectors.extend(
186+
await self._embed_batch_with_fallback(batch, task)
187+
)
188+
continue
189+
raise
190+
batch_vectors = [item["embedding"] for item in data.get("data", [])]
191+
if len(batch_vectors) != len(batch):
192+
raise ValueError(
193+
f"Jina returned {len(batch_vectors)} vectors for "
194+
f"{len(batch)} inputs — response may be malformed"
195+
)
196+
all_vectors.extend(batch_vectors)
197+
return all_vectors
198+
199+
async def close(self) -> None:
200+
await self._client.aclose()

server/routes/reindex.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def register_http_routes(mcp: FastMCP) -> None:
2020

2121
@mcp.custom_route("/reindex", methods=["POST"])
2222
async def reindex(request: Request) -> StreamingResponse:
23-
"""POST /reindex/streamstreaming variant of /reindex, returns NDJSON.
23+
"""POST /reindex — reindex code symbols, returns NDJSON stream.
2424
2525
Emits progress frames while indexing, followed by a final summary frame:
2626
{"type": "progress", "phase": "discovery"|"upserting"|"cleanup",
@@ -33,7 +33,9 @@ async def reindex(request: Request) -> StreamingResponse:
3333
"""
3434
body: dict = {}
3535
if request.headers.get("content-type", "").startswith("application/json"):
36-
body = await request.json()
36+
raw = await request.body()
37+
if raw:
38+
body = json.loads(raw)
3739

3840
service: str | None = body.get("service")
3941
force: bool = bool(body.get("force", False))
@@ -87,7 +89,7 @@ async def run() -> None:
8789

8890
@mcp.custom_route("/reindex-history", methods=["POST"])
8991
async def reindex_history(request: Request) -> StreamingResponse:
90-
"""POST /reindex-history/streamstreaming variant of /reindex-history, returns NDJSON.
92+
"""POST /reindex-history — index git commit history, returns NDJSON stream.
9193
9294
Emits progress frames while indexing, followed by a final summary frame:
9395
{"type": "progress", "phase": "discovery"|"embedding"|"upserting",
@@ -100,14 +102,16 @@ async def reindex_history(request: Request) -> StreamingResponse:
100102
"""
101103
body: dict = {}
102104
if request.headers.get("content-type", "").startswith("application/json"):
103-
body = await request.json()
105+
raw = await request.body()
106+
if raw:
107+
body = json.loads(raw)
104108

105109
service: str | None = body.get("service")
106110
force: bool = bool(body.get("force", False))
107111

108112
pipeline = GitHistoryPipeline(get_commit_store())
109113
logger.info(
110-
"Reindex-history/stream started: service=%s force=%s",
114+
"Reindex-history started: service=%s force=%s",
111115
service or "ALL",
112116
force,
113117
)

0 commit comments

Comments
 (0)