Skip to content

Commit 980b24b

Browse files
committed
Revert "perf(model): batch inference in worker threads"
This reverts commit 0b8835a.
1 parent 2b757f4 commit 980b24b

1 file changed

Lines changed: 8 additions & 5 deletions

File tree

embeddings/src/model/local.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -999,11 +999,14 @@ impl OnnxEmbeddingModel {
999999
.map(|worker_texts| {
10001000
s.spawn(move || -> Result<Vec<Vec<f32>>, LibError> {
10011001
let mut embeddings = Vec::with_capacity(worker_texts.len());
1002-
// Process in batches of bs — same efficiency as batched path
1003-
for batch in worker_texts.chunks(bs) {
1004-
let embs =
1005-
Self::tokenize_and_infer(session, tokenizer, batch, max_input)
1006-
.map_err(|_| LibError::OnnxModelEvalFailed)?;
1002+
for text in worker_texts {
1003+
let embs = Self::tokenize_and_infer(
1004+
session,
1005+
tokenizer,
1006+
std::slice::from_ref(text),
1007+
max_input,
1008+
)
1009+
.map_err(|_| LibError::OnnxModelEvalFailed)?;
10071010
embeddings.extend(embs);
10081011
}
10091012
Ok(embeddings)

0 commit comments

Comments
 (0)