@@ -205,16 +205,16 @@ def _extract_embeddings(payload: Any) -> List[List[float]]:
205205 return vectors
206206 raise RuntimeError ("Unexpected llama-cpp-python batch embedding response format" )
207207
208+ def _embed_formatted_text (self , formatted : str ) -> EmbedResult :
209+ payload = self ._llama .create_embedding (formatted )
210+ return EmbedResult (dense_vector = self ._extract_embedding (payload ))
211+
208212 def embed (self , text : str , is_query : bool = False ) -> EmbedResult :
209213 formatted = self ._format_text (text , is_query = is_query )
210214
211- def _call () -> EmbedResult :
212- payload = self ._llama .create_embedding (formatted )
213- return EmbedResult (dense_vector = self ._extract_embedding (payload ))
214-
215215 try :
216216 result = self ._run_with_retry (
217- _call ,
217+ lambda : self . _embed_formatted_text ( formatted ) ,
218218 logger = logger ,
219219 operation_name = "local embedding" ,
220220 )
@@ -236,20 +236,35 @@ def embed_batch(self, texts: List[str], is_query: bool = False) -> List[EmbedRes
236236
237237 formatted = [self ._format_text (text , is_query = is_query ) for text in texts ]
238238
239- def _call () -> List [EmbedResult ]:
239+ def _call_batch () -> List [EmbedResult ]:
240240 payload = self ._llama .create_embedding (formatted )
241241 return [
242242 EmbedResult (dense_vector = vector ) for vector in self ._extract_embeddings (payload )
243243 ]
244244
245245 try :
246246 results = self ._run_with_retry (
247- _call ,
247+ _call_batch ,
248248 logger = logger ,
249249 operation_name = "local batch embedding" ,
250250 )
251- except Exception as exc :
252- raise RuntimeError (f"Local batch embedding failed: { exc } " ) from exc
251+ except Exception as batch_exc :
252+ logger .warning (
253+ "Local batch embedding failed for model=%s (%s); falling back to sequential embedding" ,
254+ self .model_name ,
255+ batch_exc ,
256+ )
257+ try :
258+ results = [
259+ self ._run_with_retry (
260+ lambda formatted_text = text : self ._embed_formatted_text (formatted_text ),
261+ logger = logger ,
262+ operation_name = "local sequential batch embedding" ,
263+ )
264+ for text in formatted
265+ ]
266+ except Exception as exc :
267+ raise RuntimeError (f"Local batch embedding failed: { exc } " ) from exc
253268
254269 estimated_tokens = sum (self ._estimate_tokens (text ) for text in formatted )
255270 self .update_token_usage (
0 commit comments