Skip to content

Commit eca2212

Browse files
lexgeniusclaude
andcommitted
fix: bypass LiteLLM for Ollama embeddings to resolve 400 Bad Request
LiteLLM's Ollama embedding handler sends a malformed request to Ollama's /api/embed endpoint, causing a 400 Bad Request error on Ollama 0.18.x. - Add `_ollama_embed()` to `LiteLLMEmbeddingWrapper` that calls Ollama's `/api/embed` directly via httpx, stripping the "ollama/" prefix from the model name (the root cause of the malformed request) - Route `embed_query` and `embed_documents` through this helper when provider == "ollama", bypassing LiteLLM entirely - Wrap `search_similarity_threshold` in try/except so an embedding failure returns [] instead of crashing the agent Fixes #1425 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 69e1774 commit eca2212

2 files changed

Lines changed: 26 additions & 7 deletions

File tree

models.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,13 +593,25 @@ def __init__(
593593
**kwargs: Any,
594594
):
595595
self.model_name = f"{provider}/{model}" if provider != "openai" else model
596+
self.provider = provider
596597
self.kwargs = kwargs
597598
self.a0_model_conf = model_config
598599

600+
def _ollama_embed(self, inputs: List[str]) -> List[List[float]]:
601+
import httpx
602+
api_base = self.kwargs.get("api_base", "http://host.docker.internal:11434").rstrip("/")
603+
model = self.model_name.split("/", 1)[-1] # strip "ollama/" prefix
604+
resp = httpx.post(f"{api_base}/api/embed", json={"model": model, "input": inputs}, timeout=60)
605+
resp.raise_for_status()
606+
return resp.json()["embeddings"]
607+
599608
def embed_documents(self, texts: List[str]) -> List[List[float]]:
600609
# Apply rate limiting if configured
601610
apply_rate_limiter_sync(self.a0_model_conf, " ".join(texts))
602611

612+
if self.provider == "ollama":
613+
return self._ollama_embed(texts)
614+
603615
resp = embedding(model=self.model_name, input=texts, **self.kwargs)
604616
return [
605617
item.get("embedding") if isinstance(item, dict) else item.embedding # type: ignore
@@ -610,6 +622,9 @@ def embed_query(self, text: str) -> List[float]:
610622
# Apply rate limiting if configured
611623
apply_rate_limiter_sync(self.a0_model_conf, text)
612624

625+
if self.provider == "ollama":
626+
return self._ollama_embed([text])[0]
627+
613628
resp = embedding(model=self.model_name, input=[text], **self.kwargs)
614629
item = resp.data[0] # type: ignore
615630
return item.get("embedding") if isinstance(item, dict) else item.embedding # type: ignore

plugins/_memory/helpers/memory.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -337,13 +337,17 @@ async def search_similarity_threshold(
337337
):
338338
comparator = Memory._get_comparator(filter) if filter else None
339339

340-
return await self.db.asearch(
341-
query,
342-
search_type="similarity_score_threshold",
343-
k=limit,
344-
score_threshold=threshold,
345-
filter=comparator,
346-
)
340+
try:
341+
return await self.db.asearch(
342+
query,
343+
search_type="similarity_score_threshold",
344+
k=limit,
345+
score_threshold=threshold,
346+
filter=comparator,
347+
)
348+
except Exception as e:
349+
PrintStyle(font_color="yellow").print(f"Memory search failed (embedding error): {e}")
350+
return []
347351

348352
async def delete_documents_by_query(
349353
self, query: str, threshold: float, filter: str = ""

0 commit comments

Comments
 (0)