|
12 | 12 | from sqlalchemy.exc import IntegrityError |
13 | 13 |
|
14 | 14 | from basic_memory.config import BasicMemoryConfig |
15 | | -from basic_memory.file_utils import compute_checksum, has_frontmatter |
| 15 | +from basic_memory.file_utils import compute_checksum, has_frontmatter, remove_frontmatter |
16 | 16 | from basic_memory.markdown.schemas import EntityMarkdown |
17 | 17 | from basic_memory.indexing.models import ( |
18 | 18 | IndexedEntity, |
@@ -43,12 +43,19 @@ class _PreparedMarkdownFile: |
43 | 43 | class _PreparedEntity: |
44 | 44 | path: str |
45 | 45 | entity_id: int |
| 46 | + permalink: str | None |
46 | 47 | checksum: str |
47 | 48 | content_type: str | None |
48 | 49 | search_content: str | None |
49 | 50 | markdown_content: str | None = None |
50 | 51 |
|
51 | 52 |
|
| 53 | +@dataclass(slots=True) |
| 54 | +class _PersistedMarkdownFile: |
| 55 | + prepared: _PreparedMarkdownFile |
| 56 | + entity: Entity |
| 57 | + |
| 58 | + |
52 | 59 | class BatchIndexer: |
53 | 60 | """Index already-loaded files without assuming where they came from.""" |
54 | 61 |
|
@@ -118,6 +125,9 @@ async def index_files( |
118 | 125 | ) |
119 | 126 | error_by_path.update(markdown_errors) |
120 | 127 | prepared_entities.update(markdown_upserts) |
| 128 | + if existing_permalink_by_path is not None: |
| 129 | + for path, prepared_entity in markdown_upserts.items(): |
| 130 | + existing_permalink_by_path[path] = prepared_entity.permalink |
121 | 131 |
|
122 | 132 | regular_upserts, regular_errors = await self._run_bounded( |
123 | 133 | regular_paths, |
@@ -168,6 +178,57 @@ async def index_files( |
168 | 178 | search_indexed=search_indexed, |
169 | 179 | ) |
170 | 180 |
|
| 181 | + async def index_markdown_file( |
| 182 | + self, |
| 183 | + file: IndexInputFile, |
| 184 | + *, |
| 185 | + new: bool | None = None, |
| 186 | + existing_permalink_by_path: dict[str, str | None] | None = None, |
| 187 | + index_search: bool = True, |
| 188 | + ) -> IndexedEntity: |
| 189 | + """Index one markdown file using the same normalization and upsert path as batches.""" |
| 190 | + if not self._is_markdown(file): |
| 191 | + raise ValueError(f"index_markdown_file requires markdown input: {file.path}") |
| 192 | + |
| 193 | + prepared = await self._prepare_markdown_file(file) |
| 194 | + if existing_permalink_by_path is None: |
| 195 | + existing_permalink_by_path = { |
| 196 | + path: permalink |
| 197 | + for path, permalink in ( |
| 198 | + await self.entity_repository.get_file_path_to_permalink_map() |
| 199 | + ).items() |
| 200 | + } |
| 201 | + |
| 202 | + reserved_permalinks = { |
| 203 | + permalink |
| 204 | + for path, permalink in existing_permalink_by_path.items() |
| 205 | + if path != file.path and permalink |
| 206 | + } |
| 207 | + prepared = await self._normalize_markdown_file(prepared, reserved_permalinks) |
| 208 | + existing_permalink_by_path[file.path] = prepared.markdown.frontmatter.permalink |
| 209 | + |
| 210 | + persisted = await self._persist_markdown_file(prepared, is_new=new) |
| 211 | + existing_permalink_by_path[file.path] = persisted.entity.permalink |
| 212 | + await self._resolve_batch_relations([persisted.entity.id], max_concurrent=1) |
| 213 | + |
| 214 | + refreshed = await self.entity_repository.find_by_ids([persisted.entity.id]) |
| 215 | + if len(refreshed) != 1: # pragma: no cover |
| 216 | + raise ValueError(f"Failed to reload indexed entity for {file.path}") |
| 217 | + entity = refreshed[0] |
| 218 | + prepared_entity = self._build_prepared_entity(persisted.prepared, entity) |
| 219 | + |
| 220 | + if index_search: |
| 221 | + return await self._refresh_search_index(prepared_entity, entity) |
| 222 | + |
| 223 | + return IndexedEntity( |
| 224 | + path=prepared_entity.path, |
| 225 | + entity_id=entity.id, |
| 226 | + permalink=entity.permalink, |
| 227 | + checksum=prepared_entity.checksum, |
| 228 | + content_type=prepared_entity.content_type, |
| 229 | + markdown_content=prepared_entity.markdown_content, |
| 230 | + ) |
| 231 | + |
171 | 232 | # --- Preparation --- |
172 | 233 |
|
173 | 234 | async def _prepare_markdown_file(self, file: IndexInputFile) -> _PreparedMarkdownFile: |
@@ -320,34 +381,8 @@ def _reserve_batch_permalink( |
320 | 381 | # --- Persistence --- |
321 | 382 |
|
322 | 383 | async def _upsert_markdown_file(self, prepared: _PreparedMarkdownFile) -> _PreparedEntity: |
323 | | - existing = await self.entity_repository.get_by_file_path( |
324 | | - prepared.file.path, |
325 | | - load_relations=False, |
326 | | - ) |
327 | | - entity = await self.entity_service.upsert_entity_from_markdown( |
328 | | - Path(prepared.file.path), |
329 | | - prepared.markdown, |
330 | | - is_new=existing is None, |
331 | | - ) |
332 | | - updated = await self.entity_repository.update( |
333 | | - entity.id, |
334 | | - self._entity_metadata_updates(prepared.file, prepared.final_checksum), |
335 | | - ) |
336 | | - if updated is None: |
337 | | - raise ValueError(f"Failed to update markdown entity metadata for {prepared.file.path}") |
338 | | - |
339 | | - return _PreparedEntity( |
340 | | - path=prepared.file.path, |
341 | | - entity_id=updated.id, |
342 | | - checksum=prepared.final_checksum, |
343 | | - content_type=prepared.file.content_type, |
344 | | - search_content=( |
345 | | - prepared.markdown.content |
346 | | - if prepared.markdown.content is not None |
347 | | - else prepared.content |
348 | | - ), |
349 | | - markdown_content=prepared.content, |
350 | | - ) |
| 384 | + persisted = await self._persist_markdown_file(prepared) |
| 385 | + return self._build_prepared_entity(persisted.prepared, persisted.entity) |
351 | 386 |
|
352 | 387 | async def _upsert_regular_file(self, file: IndexInputFile) -> _PreparedEntity: |
353 | 388 | checksum = await self._resolve_checksum(file) |
@@ -405,6 +440,7 @@ async def _upsert_regular_file(self, file: IndexInputFile) -> _PreparedEntity: |
405 | 440 | return _PreparedEntity( |
406 | 441 | path=file.path, |
407 | 442 | entity_id=updated.id, |
| 443 | + permalink=updated.permalink, |
408 | 444 | checksum=checksum, |
409 | 445 | content_type=file.content_type, |
410 | 446 | search_content=None, |
@@ -495,6 +531,92 @@ async def _refresh_search_index( |
495 | 531 |
|
496 | 532 | # --- Helpers --- |
497 | 533 |
|
| 534 | + async def _persist_markdown_file( |
| 535 | + self, |
| 536 | + prepared: _PreparedMarkdownFile, |
| 537 | + *, |
| 538 | + is_new: bool | None = None, |
| 539 | + ) -> _PersistedMarkdownFile: |
| 540 | + existing = await self.entity_repository.get_by_file_path( |
| 541 | + prepared.file.path, |
| 542 | + load_relations=False, |
| 543 | + ) |
| 544 | + if is_new is None: |
| 545 | + is_new = existing is None |
| 546 | + entity = await self.entity_service.upsert_entity_from_markdown( |
| 547 | + Path(prepared.file.path), |
| 548 | + prepared.markdown, |
| 549 | + is_new=is_new, |
| 550 | + ) |
| 551 | + prepared = await self._reconcile_persisted_permalink(prepared, entity) |
| 552 | + updated = await self.entity_repository.update( |
| 553 | + entity.id, |
| 554 | + self._entity_metadata_updates(prepared.file, prepared.final_checksum), |
| 555 | + ) |
| 556 | + if updated is None: |
| 557 | + raise ValueError(f"Failed to update markdown entity metadata for {prepared.file.path}") |
| 558 | + return _PersistedMarkdownFile(prepared=prepared, entity=updated) |
| 559 | + |
| 560 | + async def _reconcile_persisted_permalink( |
| 561 | + self, |
| 562 | + prepared: _PreparedMarkdownFile, |
| 563 | + entity: Entity, |
| 564 | + ) -> _PreparedMarkdownFile: |
| 565 | + # Trigger: the source file started without frontmatter and sync is configured |
| 566 | + # to leave frontmatterless files alone. |
| 567 | + # Why: upsert may still assign a DB permalink even when disk content should stay untouched. |
| 568 | + # Outcome: skip reconciliation writes that would silently inject frontmatter. |
| 569 | + if ( |
| 570 | + self.app_config.disable_permalinks |
| 571 | + or ( |
| 572 | + not prepared.file_contains_frontmatter |
| 573 | + and not self.app_config.ensure_frontmatter_on_sync |
| 574 | + ) |
| 575 | + or entity.permalink is None |
| 576 | + or entity.permalink == prepared.markdown.frontmatter.permalink |
| 577 | + ): |
| 578 | + return prepared |
| 579 | + |
| 580 | + logger.debug( |
| 581 | + "Updating permalink after upsert conflict resolution", |
| 582 | + path=prepared.file.path, |
| 583 | + old_permalink=prepared.markdown.frontmatter.permalink, |
| 584 | + new_permalink=entity.permalink, |
| 585 | + ) |
| 586 | + prepared.markdown.frontmatter.metadata["permalink"] = entity.permalink |
| 587 | + write_result = await self.file_writer.write_frontmatter( |
| 588 | + IndexFrontmatterUpdate( |
| 589 | + path=prepared.file.path, |
| 590 | + metadata={"permalink": entity.permalink}, |
| 591 | + ) |
| 592 | + ) |
| 593 | + return _PreparedMarkdownFile( |
| 594 | + file=prepared.file, |
| 595 | + content=write_result.content, |
| 596 | + final_checksum=write_result.checksum, |
| 597 | + markdown=prepared.markdown, |
| 598 | + file_contains_frontmatter=prepared.file_contains_frontmatter, |
| 599 | + ) |
| 600 | + |
| 601 | + def _build_prepared_entity( |
| 602 | + self, |
| 603 | + prepared: _PreparedMarkdownFile, |
| 604 | + entity: Entity, |
| 605 | + ) -> _PreparedEntity: |
| 606 | + return _PreparedEntity( |
| 607 | + path=prepared.file.path, |
| 608 | + entity_id=entity.id, |
| 609 | + permalink=entity.permalink, |
| 610 | + checksum=prepared.final_checksum, |
| 611 | + content_type=prepared.file.content_type, |
| 612 | + search_content=( |
| 613 | + prepared.markdown.content |
| 614 | + if prepared.markdown.content is not None |
| 615 | + else remove_frontmatter(prepared.content) |
| 616 | + ), |
| 617 | + markdown_content=prepared.content, |
| 618 | + ) |
| 619 | + |
498 | 620 | async def _resolve_checksum(self, file: IndexInputFile) -> str: |
499 | 621 | if file.checksum is not None: |
500 | 622 | return file.checksum |
|
0 commit comments