Skip to content

Commit 8c75457

Browse files
committed
perf(core): tune fastembed auto threads
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent a3c912d commit 8c75457

File tree

2 files changed

+32
-8
lines changed

2 files changed

+32
-8
lines changed

src/basic_memory/repository/embedding_provider_factory.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,16 @@ def _available_cpu_count() -> int | None:
2626
process_cpu_count = getattr(os, "process_cpu_count", None)
2727
if callable(process_cpu_count):
2828
cpu_count = process_cpu_count()
29-
if cpu_count is not None and cpu_count > 0:
29+
if isinstance(cpu_count, int) and cpu_count > 0:
3030
return cpu_count
3131

3232
cpu_count = os.cpu_count()
3333
return cpu_count if cpu_count is not None and cpu_count > 0 else None
3434

3535

36-
def _resolve_fastembed_runtime_knobs(app_config: BasicMemoryConfig) -> tuple[int | None, int | None]:
36+
def _resolve_fastembed_runtime_knobs(
37+
app_config: BasicMemoryConfig,
38+
) -> tuple[int | None, int | None]:
3739
"""Resolve FastEmbed threads/parallel from explicit config or CPU-aware defaults."""
3840
configured_threads = app_config.semantic_embedding_threads
3941
configured_parallel = app_config.semantic_embedding_parallel
@@ -45,15 +47,15 @@ def _resolve_fastembed_runtime_knobs(app_config: BasicMemoryConfig) -> tuple[int
4547
return None, None
4648

4749
# Trigger: local laptops and cloud workers expose different CPU budgets.
48-
# Why: FastEmbed throughput wants enough ONNX threads to use the machine,
49-
# but the multiprocessing-style ``parallel`` fan-out can add a lot of
50-
# overhead for this workload and make full rebuilds slower instead of faster.
51-
# Outcome: when config leaves the knobs unset, each process uses a bounded
52-
# thread count and keeps FastEmbed on the simpler single-process path.
50+
# Why: full rebuilds got faster when FastEmbed used most, but not all, of
51+
# the available CPUs. Leaving a little headroom avoids starving the rest of
52+
# the pipeline while still giving ONNX enough threads to stay busy.
53+
# Outcome: when config leaves the knobs unset, each process reserves a small
54+
# CPU cushion and keeps FastEmbed on the simpler single-process path.
5355
if available_cpus <= 2:
5456
return available_cpus, 1
5557

56-
threads = min(8, available_cpus)
58+
threads = min(8, max(2, available_cpus - 2))
5759
return threads, 1
5860

5961

tests/repository/test_openai_provider.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,28 @@ def test_embedding_provider_factory_auto_tunes_fastembed_runtime_knobs_from_cpu_
282282

283283
provider = create_embedding_provider(config)
284284

285+
assert isinstance(provider, FastEmbedEmbeddingProvider)
286+
assert provider.threads == 6
287+
assert provider.parallel == 1
288+
289+
290+
def test_embedding_provider_factory_auto_tuning_caps_large_cpu_budgets(monkeypatch):
291+
"""Large workers should still leave some headroom and stop at the thread cap."""
292+
monkeypatch.setattr(embedding_provider_factory_module.os, "process_cpu_count", lambda: 16)
293+
monkeypatch.setattr(embedding_provider_factory_module.os, "cpu_count", lambda: 16)
294+
295+
config = BasicMemoryConfig(
296+
env="test",
297+
projects={"test-project": "/tmp/basic-memory-test"},
298+
default_project="test-project",
299+
semantic_search_enabled=True,
300+
semantic_embedding_provider="fastembed",
301+
semantic_embedding_threads=None,
302+
semantic_embedding_parallel=None,
303+
)
304+
305+
provider = create_embedding_provider(config)
306+
285307
assert isinstance(provider, FastEmbedEmbeddingProvider)
286308
assert provider.threads == 8
287309
assert provider.parallel == 1

0 commit comments

Comments
 (0)