|
1 | 1 | """Semantic search service regression tests for local SQLite search.""" |
2 | 2 |
|
| 3 | +from datetime import datetime |
| 4 | +from types import SimpleNamespace |
| 5 | +from unittest.mock import AsyncMock |
| 6 | + |
3 | 7 | import pytest |
4 | 8 |
|
| 9 | +from basic_memory.repository import EntityRepository |
| 10 | +from basic_memory.repository.search_repository_base import VectorSyncBatchResult |
5 | 11 | from basic_memory.repository.semantic_errors import ( |
6 | 12 | SemanticDependenciesMissingError, |
7 | 13 | SemanticSearchDisabledError, |
@@ -89,3 +95,125 @@ async def test_semantic_fts_mode_still_returns_observations(search_service, test |
89 | 95 |
|
90 | 96 | assert results |
91 | 97 | assert any(result.type == SearchItemType.OBSERVATION.value for result in results) |
| 98 | + |
| 99 | + |
| 100 | +@pytest.mark.asyncio |
| 101 | +async def test_semantic_vector_sync_skips_embed_opt_out_and_clears_vectors( |
| 102 | + search_service, monkeypatch |
| 103 | +): |
| 104 | + """Embed opt-out should clear stale vectors instead of regenerating them.""" |
| 105 | + repository = _sqlite_repo(search_service) |
| 106 | + repository._semantic_enabled = True |
| 107 | + |
| 108 | + monkeypatch.setattr( |
| 109 | + search_service.entity_repository, |
| 110 | + "find_by_id", |
| 111 | + AsyncMock(return_value=SimpleNamespace(id=42, entity_metadata={"embed": False})), |
| 112 | + ) |
| 113 | + sync_vectors = AsyncMock() |
| 114 | + execute_query = AsyncMock() |
| 115 | + monkeypatch.setattr(repository, "sync_entity_vectors", sync_vectors) |
| 116 | + monkeypatch.setattr(repository, "execute_query", execute_query) |
| 117 | + |
| 118 | + await search_service.sync_entity_vectors(42) |
| 119 | + |
| 120 | + sync_vectors.assert_not_awaited() |
| 121 | + assert execute_query.await_count == 2 |
| 122 | + |
| 123 | + |
| 124 | +@pytest.mark.asyncio |
| 125 | +async def test_semantic_vector_sync_resumes_when_embed_opt_out_removed( |
| 126 | + search_service, monkeypatch |
| 127 | +): |
| 128 | + """Removing the opt-out should restore normal embedding sync.""" |
| 129 | + repository = _sqlite_repo(search_service) |
| 130 | + repository._semantic_enabled = True |
| 131 | + |
| 132 | + monkeypatch.setattr( |
| 133 | + search_service.entity_repository, |
| 134 | + "find_by_id", |
| 135 | + AsyncMock(return_value=SimpleNamespace(id=42, entity_metadata={})), |
| 136 | + ) |
| 137 | + sync_vectors = AsyncMock() |
| 138 | + execute_query = AsyncMock() |
| 139 | + monkeypatch.setattr(repository, "sync_entity_vectors", sync_vectors) |
| 140 | + monkeypatch.setattr(repository, "execute_query", execute_query) |
| 141 | + |
| 142 | + await search_service.sync_entity_vectors(42) |
| 143 | + |
| 144 | + sync_vectors.assert_awaited_once_with(42) |
| 145 | + execute_query.assert_not_awaited() |
| 146 | + |
| 147 | + |
| 148 | +@pytest.mark.asyncio |
| 149 | +async def test_semantic_vector_sync_batch_skips_embed_opt_out_and_reports_skips( |
| 150 | + search_service, monkeypatch |
| 151 | +): |
| 152 | + """Batch vector sync should only embed eligible notes and report skipped opt-outs.""" |
| 153 | + repository = _sqlite_repo(search_service) |
| 154 | + repository._semantic_enabled = True |
| 155 | + |
| 156 | + monkeypatch.setattr( |
| 157 | + search_service.entity_repository, |
| 158 | + "find_by_ids", |
| 159 | + AsyncMock( |
| 160 | + return_value=[ |
| 161 | + SimpleNamespace(id=41, entity_metadata={"embed": False}), |
| 162 | + SimpleNamespace(id=42, entity_metadata={}), |
| 163 | + ] |
| 164 | + ), |
| 165 | + ) |
| 166 | + sync_batch = AsyncMock( |
| 167 | + return_value=VectorSyncBatchResult( |
| 168 | + entities_total=1, |
| 169 | + entities_synced=1, |
| 170 | + entities_failed=0, |
| 171 | + ) |
| 172 | + ) |
| 173 | + execute_query = AsyncMock() |
| 174 | + monkeypatch.setattr(repository, "sync_entity_vectors_batch", sync_batch) |
| 175 | + monkeypatch.setattr(repository, "execute_query", execute_query) |
| 176 | + |
| 177 | + result = await search_service.sync_entity_vectors_batch([41, 42]) |
| 178 | + |
| 179 | + sync_batch.assert_awaited_once() |
| 180 | + assert sync_batch.await_args.args[0] == [42] |
| 181 | + assert result.entities_total == 2 |
| 182 | + assert result.entities_synced == 1 |
| 183 | + assert result.entities_skipped == 1 |
| 184 | + assert execute_query.await_count == 2 |
| 185 | + |
| 186 | + |
| 187 | +@pytest.mark.asyncio |
| 188 | +async def test_embed_opt_out_note_still_participates_in_fts( |
| 189 | + search_service, session_maker, test_project |
| 190 | +): |
| 191 | + """Per-note semantic opt-out should not remove the note from FTS search.""" |
| 192 | + entity_repo = EntityRepository(session_maker, project_id=test_project.id) |
| 193 | + entity = await entity_repo.create( |
| 194 | + { |
| 195 | + "title": "FTS Opt Out", |
| 196 | + "note_type": "note", |
| 197 | + "entity_metadata": {"embed": False}, |
| 198 | + "content_type": "text/markdown", |
| 199 | + "file_path": "test/fts-opt-out.md", |
| 200 | + "permalink": "test/fts-opt-out", |
| 201 | + "project_id": test_project.id, |
| 202 | + "created_at": datetime.now(), |
| 203 | + "updated_at": datetime.now(), |
| 204 | + } |
| 205 | + ) |
| 206 | + |
| 207 | + await search_service.index_entity( |
| 208 | + entity, |
| 209 | + content="This note should stay searchable through full text indexing.", |
| 210 | + ) |
| 211 | + |
| 212 | + results = await search_service.search( |
| 213 | + SearchQuery( |
| 214 | + text="stay searchable", |
| 215 | + retrieval_mode=SearchRetrievalMode.FTS, |
| 216 | + ) |
| 217 | + ) |
| 218 | + |
| 219 | + assert any(result.entity_id == entity.id for result in results) |
0 commit comments