Revert "perf(model): batch inference in worker threads"

donhardman · donhardman · commit 980b24b0123d · 2026-04-25T20:47:36.000+03:00
This reverts commit 0b8835a.
diff --git a/embeddings/src/model/local.rs b/embeddings/src/model/local.rs
@@ -999,11 +999,14 @@ impl OnnxEmbeddingModel {
                 .map(|worker_texts| {
                     s.spawn(move || -> Result<Vec<Vec<f32>>, LibError> {
                         let mut embeddings = Vec::with_capacity(worker_texts.len());
-                        // Process in batches of bs — same efficiency as batched path
-                        for batch in worker_texts.chunks(bs) {
-                            let embs =
-                                Self::tokenize_and_infer(session, tokenizer, batch, max_input)
-                                    .map_err(|_| LibError::OnnxModelEvalFailed)?;
+                        for text in worker_texts {
+                            let embs = Self::tokenize_and_infer(
+                                session,
+                                tokenizer,
+                                std::slice::from_ref(text),
+                                max_input,
+                            )
+                            .map_err(|_| LibError::OnnxModelEvalFailed)?;
                             embeddings.extend(embs);
                         }
                         Ok(embeddings)