agent0ai · PaoloC68 · Apr 3, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/models.py b/models.py
@@ -600,7 +600,21 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]:
         # Apply rate limiting if configured
         apply_rate_limiter_sync(self.a0_model_conf, " ".join(texts))
 
-        resp = embedding(model=self.model_name, input=texts, **self.kwargs)
+        embed_kwargs = {"encoding_format": "float", **self.kwargs}
+        ctx = int(max((self.a0_model_conf.ctx_length if self.a0_model_conf else 0) or 8192, 1000) * 0.80)
+        texts = [trim_to_tokens(t, ctx, "start", ellipsis="") for t in texts]
+        try:
+            resp = embedding(model=self.model_name, input=texts, **embed_kwargs)
+        except Exception as e:
+            if "input_tokens" in str(e) and "context" in str(e):
+                texts = [t[: len(t) // 2] for t in texts]
+                resp = embedding(model=self.model_name, input=texts, **embed_kwargs)
+                break
+            except Exception as e:
+                if getattr(e, "status_code", None) == 400 and any(len(t) > 10 for t in texts):
+                    texts = [t[: max(len(t) // 2, 10)] for t in texts]
+                else:
+                    raise
         return [
             item.get("embedding") if isinstance(item, dict) else item.embedding  # type: ignore
             for item in resp.data  # type: ignore
@@ -610,7 +624,18 @@ def embed_query(self, text: str) -> List[float]:
         # Apply rate limiting if configured
         apply_rate_limiter_sync(self.a0_model_conf, text)
 
-        resp = embedding(model=self.model_name, input=[text], **self.kwargs)
+        embed_kwargs = {"encoding_format": "float", **self.kwargs}
+        ctx = int(max((self.a0_model_conf.ctx_length if self.a0_model_conf else 0) or 8192, 1000) * 0.80)
+        text = trim_to_tokens(text, ctx, "start", ellipsis="")
+        while True:
+            try:
+                resp = embedding(model=self.model_name, input=[text], **embed_kwargs)
+                break
+            except Exception as e:
+                if getattr(e, "status_code", None) == 400 and len(text) > 10:
+                    text = text[: max(len(text) // 2, 10)]
+                else:
+                    raise
         item = resp.data[0]  # type: ignore
         return item.get("embedding") if isinstance(item, dict) else item.embedding  # type: ignore
 

diff --git a/plugins/_memory/extensions/python/message_loop_prompts_after/_50_recall_memories.py b/plugins/_memory/extensions/python/message_loop_prompts_after/_50_recall_memories.py
@@ -117,7 +117,7 @@ async def search_memories(self, log_item: log.LogItem, loop_data: LoopData, **kw
 
         # otherwise use the message and history as query
         else:
-            query = user_instruction + "\n\n" + history
+            query = (user_instruction + "\n\n" + history)[:4000]
 
         # if there is no query (or just dash by the LLM), do not continue
         if not query or len(query) <= 3: