Skip to content

Commit d15f6a8

Browse files
committed
Add batched vector sync orchestration across repositories
1 parent b8a3a14 commit d15f6a8

10 files changed

Lines changed: 526 additions & 127 deletions

File tree

src/basic_memory/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ class BasicMemoryConfig(BaseSettings):
173173
description="Batch size for embedding generation.",
174174
gt=0,
175175
)
176+
semantic_embedding_sync_batch_size: int = Field(
177+
default=64,
178+
description="Batch size for vector sync orchestration flushes.",
179+
gt=0,
180+
)
176181
semantic_embedding_cache_dir: str | None = Field(
177182
default=None,
178183
description="Optional cache directory for FastEmbed model artifacts.",

src/basic_memory/db.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,13 @@ async def _run_semantic_embedding_backfill(
128128
project_id=project_id,
129129
app_config=app_config,
130130
)
131-
for entity_id in entity_ids:
132-
await search_repository.sync_entity_vectors(entity_id)
131+
batch_result = await search_repository.sync_entity_vectors_batch(entity_ids)
132+
if batch_result.entities_failed > 0:
133+
logger.warning(
134+
"Automatic semantic embedding backfill encountered entity failures: "
135+
f"project={project_name}, failed={batch_result.entities_failed}, "
136+
f"failed_entity_ids={batch_result.failed_entity_ids}"
137+
)
133138

134139
logger.info(
135140
"Automatic semantic embedding backfill complete: "

src/basic_memory/repository/postgres_search_repository.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ def __init__(
5858
self._semantic_enabled = self._app_config.semantic_search_enabled
5959
self._semantic_vector_k = self._app_config.semantic_vector_k
6060
self._semantic_min_similarity = self._app_config.semantic_min_similarity
61+
self._semantic_embedding_sync_batch_size = (
62+
self._app_config.semantic_embedding_sync_batch_size
63+
)
6164
self._embedding_provider = embedding_provider
6265
self._vector_dimensions = 384
6366
self._vector_tables_initialized = False

src/basic_memory/repository/search_repository.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@
77
"""
88

99
from datetime import datetime
10-
from typing import List, Optional, Protocol
10+
from typing import Any, Callable, List, Optional, Protocol
1111

1212
from sqlalchemy import Result
1313
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
1414

1515
from basic_memory.config import BasicMemoryConfig, ConfigManager, DatabaseBackend
1616
from basic_memory.repository.postgres_search_repository import PostgresSearchRepository
1717
from basic_memory.repository.search_index_row import SearchIndexRow
18+
from basic_memory.repository.search_repository_base import VectorSyncBatchResult
1819
from basic_memory.repository.sqlite_search_repository import SQLiteSearchRepository
1920
from basic_memory.schemas.search import SearchItemType, SearchRetrievalMode
2021

@@ -69,6 +70,14 @@ async def sync_entity_vectors(self, entity_id: int) -> None:
6970
"""Sync semantic vector chunks for an entity."""
7071
...
7172

73+
async def sync_entity_vectors_batch(
74+
self,
75+
entity_ids: list[int],
76+
progress_callback: Optional[Callable[[int, int, int], Any]] = None,
77+
) -> VectorSyncBatchResult:
78+
"""Sync semantic vector chunks for a batch of entities."""
79+
...
80+
7281
async def execute_query(self, query, params: dict) -> Result:
7382
"""Execute a raw SQL query."""
7483
...

0 commit comments

Comments
 (0)