refactor(core): address review nits

phernandez · phernandez · commit d59b97491279 · 2026-04-08T15:46:09.000-05:00
Signed-off-by: phernandez &lt;paul@basicmachines.co&gt;
diff --git a/src/basic_memory/repository/embedding_provider.py b/src/basic_memory/repository/embedding_provider.py
@@ -1,6 +1,6 @@
 """Embedding provider protocol for pluggable semantic backends."""
 
-from typing import Protocol
+from typing import Any, Protocol
 
 
 class EmbeddingProvider(Protocol):
@@ -16,3 +16,7 @@ async def embed_query(self, text: str) -> list[float]:
     async def embed_documents(self, texts: list[str]) -> list[list[float]]:
         """Embed a list of document chunks."""
         ...
+
+    def runtime_log_attrs(self) -> dict[str, Any]:
+        """Return provider-specific runtime settings suitable for startup logs."""
+        ...
diff --git a/src/basic_memory/repository/embedding_provider_factory.py b/src/basic_memory/repository/embedding_provider_factory.py
@@ -19,6 +19,7 @@
 
 _EMBEDDING_PROVIDER_CACHE: dict[ProviderCacheKey, EmbeddingProvider] = {}
 _EMBEDDING_PROVIDER_CACHE_LOCK = Lock()
+_FASTEMBED_MAX_THREADS = 8
 
 
 def _available_cpu_count() -> int | None:
@@ -55,7 +56,7 @@ def _resolve_fastembed_runtime_knobs(
     if available_cpus <= 2:
         return available_cpus, 1
 
-    threads = min(8, max(2, available_cpus - 2))
+    threads = min(_FASTEMBED_MAX_THREADS, max(2, available_cpus - 2))
     return threads, 1
 
 
diff --git a/src/basic_memory/repository/fastembed_provider.py b/src/basic_memory/repository/fastembed_provider.py
@@ -24,6 +24,15 @@ class FastEmbedEmbeddingProvider(EmbeddingProvider):
     def _effective_parallel(self) -> int | None:
         return self.parallel if self.parallel is not None and self.parallel > 1 else None
 
+    def runtime_log_attrs(self) -> dict[str, int | str | None]:
+        """Return the resolved runtime knobs that shape FastEmbed throughput."""
+        return {
+            "provider_batch_size": self.batch_size,
+            "threads": self.threads,
+            "configured_parallel": self.parallel,
+            "effective_parallel": self._effective_parallel(),
+        }
+
     def __init__(
         self,
         model_name: str = "bge-small-en-v1.5",
diff --git a/src/basic_memory/repository/openai_provider.py b/src/basic_memory/repository/openai_provider.py
@@ -34,6 +34,13 @@ def __init__(
         self._client: Any | None = None
         self._client_lock = asyncio.Lock()
 
+    def runtime_log_attrs(self) -> dict[str, int]:
+        """Return the request fan-out knobs that shape API embedding batches."""
+        return {
+            "provider_batch_size": self.batch_size,
+            "request_concurrency": self.request_concurrency,
+        }
+
     async def _get_client(self) -> Any:
         if self._client is not None:
             return self._client
diff --git a/src/basic_memory/repository/search_repository_base.py b/src/basic_memory/repository/search_repository_base.py
@@ -37,6 +37,7 @@
 HEADER_LINE_PATTERN = re.compile(r"^\s*#{1,6}\s+")
 BULLET_PATTERN = re.compile(r"^[\-\*]\s+")
 OVERSIZED_ENTITY_VECTOR_SHARD_SIZE = 256
+_SQLITE_MAX_PREPARE_WINDOW = 8
 
 
 @dataclass
@@ -800,7 +801,9 @@ async def _sync_entity_vectors_internal(
         batch_start = time.perf_counter()
         backend_name = type(self).__name__.removesuffix("SearchRepository").lower()
 
-        self._log_vector_sync_runtime_settings(backend_name=backend_name, entities_total=total_entities)
+        self._log_vector_sync_runtime_settings(
+            backend_name=backend_name, entities_total=total_entities
+        )
         logger.info(
             "Vector batch sync start: project_id={project_id} entities_total={entities_total} "
             "sync_batch_size={sync_batch_size} prepare_window_size={prepare_window_size}",
@@ -1006,102 +1009,104 @@ def emit_progress(entity_id: int) -> None:
                     for failed_entity_id in affected_entity_ids:
                         emit_progress(failed_entity_id)
 
-        # Trigger: this should never happen after all flushes succeed.
-        # Why: remaining jobs mean runtime tracking drifted from queued jobs.
-        # Outcome: fail-safe marks these entities as failed to avoid false positives.
-        if entity_runtime:
-            orphan_runtime_entities = sorted(entity_runtime.keys())
-            failed_entity_ids.update(orphan_runtime_entities)
-            synced_entity_ids.difference_update(orphan_runtime_entities)
-            deferred_entity_ids.difference_update(orphan_runtime_entities)
-            logger.warning(
-                "Vector batch sync left unfinished entities after flushes: "
-                "project_id={project_id} unfinished_entities={unfinished_entities}",
+            # Trigger: this should never happen after all flushes succeed.
+            # Why: remaining jobs mean runtime tracking drifted from queued jobs.
+            # Outcome: fail-safe marks these entities as failed to avoid false positives.
+            if entity_runtime:
+                orphan_runtime_entities = sorted(entity_runtime.keys())
+                failed_entity_ids.update(orphan_runtime_entities)
+                synced_entity_ids.difference_update(orphan_runtime_entities)
+                deferred_entity_ids.difference_update(orphan_runtime_entities)
+                logger.warning(
+                    "Vector batch sync left unfinished entities after flushes: "
+                    "project_id={project_id} unfinished_entities={unfinished_entities}",
+                    project_id=self.project_id,
+                    unfinished_entities=orphan_runtime_entities,
+                )
+                for failed_entity_id in orphan_runtime_entities:
+                    emit_progress(failed_entity_id)
+
+            # Keep result counters aligned with successful/failed terminal states.
+            synced_entity_ids.difference_update(failed_entity_ids)
+            deferred_entity_ids.difference_update(failed_entity_ids)
+            deferred_entity_ids.difference_update(synced_entity_ids)
+            result.failed_entity_ids = sorted(failed_entity_ids)
+            result.entities_failed = len(result.failed_entity_ids)
+            result.entities_deferred = len(deferred_entity_ids)
+            result.entities_synced = len(synced_entity_ids)
+
+            logger.info(
+                "Vector batch sync complete: project_id={project_id} entities_total={entities_total} "
+                "entities_synced={entities_synced} entities_failed={entities_failed} "
+                "entities_deferred={entities_deferred} "
+                "entities_skipped={entities_skipped} chunks_total={chunks_total} "
+                "chunks_skipped={chunks_skipped} embedding_jobs_total={embedding_jobs_total} "
+                "prepare_seconds_total={prepare_seconds_total:.3f} "
+                "queue_wait_seconds_total={queue_wait_seconds_total:.3f} "
+                "embed_seconds_total={embed_seconds_total:.3f} write_seconds_total={write_seconds_total:.3f}",
                 project_id=self.project_id,
-                unfinished_entities=orphan_runtime_entities,
+                entities_total=result.entities_total,
+                entities_synced=result.entities_synced,
+                entities_failed=result.entities_failed,
+                entities_deferred=result.entities_deferred,
+                entities_skipped=result.entities_skipped,
+                chunks_total=result.chunks_total,
+                chunks_skipped=result.chunks_skipped,
+                embedding_jobs_total=result.embedding_jobs_total,
+                prepare_seconds_total=result.prepare_seconds_total,
+                queue_wait_seconds_total=result.queue_wait_seconds_total,
+                embed_seconds_total=result.embed_seconds_total,
+                write_seconds_total=result.write_seconds_total,
             )
-            for failed_entity_id in orphan_runtime_entities:
-                emit_progress(failed_entity_id)
-
-        # Keep result counters aligned with successful/failed terminal states.
-        synced_entity_ids.difference_update(failed_entity_ids)
-        deferred_entity_ids.difference_update(failed_entity_ids)
-        deferred_entity_ids.difference_update(synced_entity_ids)
-        result.failed_entity_ids = sorted(failed_entity_ids)
-        result.entities_failed = len(result.failed_entity_ids)
-        result.entities_deferred = len(deferred_entity_ids)
-        result.entities_synced = len(synced_entity_ids)
-
-        logger.info(
-            "Vector batch sync complete: project_id={project_id} entities_total={entities_total} "
-            "entities_synced={entities_synced} entities_failed={entities_failed} "
-            "entities_deferred={entities_deferred} "
-            "entities_skipped={entities_skipped} chunks_total={chunks_total} "
-            "chunks_skipped={chunks_skipped} embedding_jobs_total={embedding_jobs_total} "
-            "prepare_seconds_total={prepare_seconds_total:.3f} "
-            "queue_wait_seconds_total={queue_wait_seconds_total:.3f} "
-            "embed_seconds_total={embed_seconds_total:.3f} write_seconds_total={write_seconds_total:.3f}",
-            project_id=self.project_id,
-            entities_total=result.entities_total,
-            entities_synced=result.entities_synced,
-            entities_failed=result.entities_failed,
-            entities_deferred=result.entities_deferred,
-            entities_skipped=result.entities_skipped,
-            chunks_total=result.chunks_total,
-            chunks_skipped=result.chunks_skipped,
-            embedding_jobs_total=result.embedding_jobs_total,
-            prepare_seconds_total=result.prepare_seconds_total,
-            queue_wait_seconds_total=result.queue_wait_seconds_total,
-            embed_seconds_total=result.embed_seconds_total,
-            write_seconds_total=result.write_seconds_total,
-        )
-        batch_total_seconds = time.perf_counter() - batch_start
-        metric_attrs = {
-            "backend": backend_name,
-            "skip_only_batch": result.embedding_jobs_total == 0,
-        }
-        telemetry.record_histogram(
-            "vector_sync_batch_total_seconds",
-            batch_total_seconds,
-            unit="s",
-            **metric_attrs,
-        )
-        telemetry.add_counter("vector_sync_entities_total", result.entities_total, **metric_attrs)
-        telemetry.add_counter(
-            "vector_sync_entities_skipped",
-            result.entities_skipped,
-            **metric_attrs,
-        )
-        telemetry.add_counter(
-            "vector_sync_entities_deferred",
-            result.entities_deferred,
-            **metric_attrs,
-        )
-        telemetry.add_counter(
-            "vector_sync_embedding_jobs_total",
-            result.embedding_jobs_total,
-            **metric_attrs,
-        )
-        telemetry.add_counter("vector_sync_chunks_total", result.chunks_total, **metric_attrs)
-        telemetry.add_counter(
-            "vector_sync_chunks_skipped",
-            result.chunks_skipped,
-            **metric_attrs,
-        )
-        if batch_span is not None:
-            batch_span.set_attributes(
-                {
-                    "backend": backend_name,
-                    "entities_synced": result.entities_synced,
-                    "entities_failed": result.entities_failed,
-                    "entities_deferred": result.entities_deferred,
-                    "entities_skipped": result.entities_skipped,
-                    "embedding_jobs_total": result.embedding_jobs_total,
-                    "chunks_total": result.chunks_total,
-                    "chunks_skipped": result.chunks_skipped,
-                    "batch_total_seconds": batch_total_seconds,
-                }
+            batch_total_seconds = time.perf_counter() - batch_start
+            metric_attrs = {
+                "backend": backend_name,
+                "skip_only_batch": result.embedding_jobs_total == 0,
+            }
+            telemetry.record_histogram(
+                "vector_sync_batch_total_seconds",
+                batch_total_seconds,
+                unit="s",
+                **metric_attrs,
+            )
+            telemetry.add_counter(
+                "vector_sync_entities_total", result.entities_total, **metric_attrs
+            )
+            telemetry.add_counter(
+                "vector_sync_entities_skipped",
+                result.entities_skipped,
+                **metric_attrs,
+            )
+            telemetry.add_counter(
+                "vector_sync_entities_deferred",
+                result.entities_deferred,
+                **metric_attrs,
+            )
+            telemetry.add_counter(
+                "vector_sync_embedding_jobs_total",
+                result.embedding_jobs_total,
+                **metric_attrs,
+            )
+            telemetry.add_counter("vector_sync_chunks_total", result.chunks_total, **metric_attrs)
+            telemetry.add_counter(
+                "vector_sync_chunks_skipped",
+                result.chunks_skipped,
+                **metric_attrs,
             )
+            if batch_span is not None:
+                batch_span.set_attributes(
+                    {
+                        "backend": backend_name,
+                        "entities_synced": result.entities_synced,
+                        "entities_failed": result.entities_failed,
+                        "entities_deferred": result.entities_deferred,
+                        "entities_skipped": result.entities_skipped,
+                        "embedding_jobs_total": result.embedding_jobs_total,
+                        "chunks_total": result.chunks_total,
+                        "chunks_skipped": result.chunks_skipped,
+                        "batch_total_seconds": batch_total_seconds,
+                    }
+                )
 
         return result
 
@@ -1113,7 +1118,10 @@ def _vector_prepare_window_size(self) -> int:
         # explode to the full embed batch size creates unnecessary write contention.
         # Outcome: local backends get a small bounded window, while Postgres keeps
         # its explicit higher concurrency override.
-        return max(1, min(self._semantic_embedding_sync_batch_size, 8))
+        return max(
+            1,
+            min(self._semantic_embedding_sync_batch_size, _SQLITE_MAX_PREPARE_WINDOW),
+        )
 
     @asynccontextmanager
     async def _prepare_entity_write_scope(self):
@@ -1223,14 +1231,18 @@ async def _prepare_entity_vector_jobs_window(
                     session, entity_ids
                 )
         except Exception as exc:
+            # Trigger: the shared read pass failed before we had entity-level diffs.
+            # Why: once the window-level read session breaks, we cannot safely
+            # distinguish one entity from another inside that window.
+            # Outcome: every entity in the window gets the same failure object.
             return [exc for _ in entity_ids]
 
         # Trigger: prepare now does one shared read pass per window instead of
         # paying the same select/join round-trips per entity.
         # Why: both SQLite and Postgres were still burning wall clock in read-side
         # fingerprint/orphan checks even when every entity ended up skipped.
-        # Outcome: we batch the reads once, then fan back out over entities while
-        # preserving input order in the gathered results.
+        # Outcome: we batch the reads once, close that shared read session, and
+        # then fan back out over entities while preserving input order.
         prepared_window = await asyncio.gather(
             *(
                 self._prepare_entity_vector_jobs_prefetched(
@@ -1264,7 +1276,8 @@ async def _prepare_entity_vector_jobs_prefetched(
         prepare_start = sync_start
         source_rows_count = len(source_rows)
 
-        if not source_rows:
+        async def _delete_entity_chunks_and_finish() -> _PreparedEntityVectorSync:
+            """Delete derived rows and return the empty prepare result."""
             async with self._prepare_entity_write_scope():
                 async with db.scoped_session(self.session_maker) as session:
                     await self._prepare_vector_session(session)
@@ -1279,22 +1292,13 @@ async def _prepare_entity_vector_jobs_prefetched(
                 prepare_seconds=prepare_seconds,
             )
 
+        if not source_rows:
+            return await _delete_entity_chunks_and_finish()
+
         chunk_records = self._build_chunk_records(source_rows)
         built_chunk_records_count = len(chunk_records)
         if not chunk_records:
-            async with self._prepare_entity_write_scope():
-                async with db.scoped_session(self.session_maker) as session:
-                    await self._prepare_vector_session(session)
-                    await self._delete_entity_chunks(session, entity_id)
-                    await session.commit()
-            prepare_seconds = time.perf_counter() - prepare_start
-            return _PreparedEntityVectorSync(
-                entity_id=entity_id,
-                sync_start=sync_start,
-                source_rows_count=source_rows_count,
-                embedding_jobs=[],
-                prepare_seconds=prepare_seconds,
-            )
+            return await _delete_entity_chunks_and_finish()
 
         current_entity_fingerprint = self._build_entity_fingerprint(chunk_records)
         current_embedding_model = self._embedding_model_key()
@@ -1607,27 +1611,25 @@ def _log_vector_sync_runtime_settings(self, *, backend_name: str, entities_total
         """
         assert self._embedding_provider is not None
 
-        from basic_memory.repository.fastembed_provider import FastEmbedEmbeddingProvider
-
         provider = self._embedding_provider
-        if isinstance(provider, FastEmbedEmbeddingProvider):
+        runtime_attrs = (
+            provider.runtime_log_attrs() if hasattr(provider, "runtime_log_attrs") else {}
+        )
+        if runtime_attrs:
             logger.info(
                 "Vector batch runtime settings: project_id={project_id} backend={backend} "
                 "entities_total={entities_total} provider={provider} model_name={model_name} "
-                "dimensions={dimensions} provider_batch_size={provider_batch_size} "
-                "sync_batch_size={sync_batch_size} threads={threads} "
-                "configured_parallel={configured_parallel} effective_parallel={effective_parallel}",
+                "dimensions={dimensions} sync_batch_size={sync_batch_size} "
+                "{runtime_attrs}",
                 project_id=self.project_id,
                 backend=backend_name,
                 entities_total=entities_total,
                 provider=type(provider).__name__,
                 model_name=provider.model_name,
                 dimensions=provider.dimensions,
-                provider_batch_size=provider.batch_size,
                 sync_batch_size=self._semantic_embedding_sync_batch_size,
-                threads=provider.threads,
-                configured_parallel=provider.parallel,
-                effective_parallel=provider._effective_parallel(),
+                runtime_attrs=" ".join(f"{key}={value}" for key, value in runtime_attrs.items()),
+                **runtime_attrs,
             )
             return
 
diff --git a/tests/repository/test_openai_provider.py b/tests/repository/test_openai_provider.py