Skip to content

Commit b3d5448

Browse files
authored
fix(sync): preserve canonical markdown in single-file sync (#746)
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 4e53bb8 commit b3d5448

File tree

8 files changed

+696
-104
lines changed

8 files changed

+696
-104
lines changed

.claude/settings.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
"env": {
44
"CLAUDE_BASH_MAINTAIN_PROJECT_WORKING_DIR": "1",
55
"CLAUDE_CODE_DISABLE_FEEDBACK_SURVEY": "1",
6-
"DISABLE_TELEMETRY": "1",
76
"CLAUDE_CODE_NO_FLICKER": "1",
87
"CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING": "1"
98
},

src/basic_memory/indexing/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
IndexingBatchResult,
1313
IndexInputFile,
1414
IndexProgress,
15+
SyncedMarkdownFile,
1516
)
1617

1718
__all__ = [
@@ -25,5 +26,6 @@
2526
"IndexingBatchResult",
2627
"IndexInputFile",
2728
"IndexProgress",
29+
"SyncedMarkdownFile",
2830
"build_index_batches",
2931
]

src/basic_memory/indexing/batch_indexer.py

Lines changed: 151 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from sqlalchemy.exc import IntegrityError
1313

1414
from basic_memory.config import BasicMemoryConfig
15-
from basic_memory.file_utils import compute_checksum, has_frontmatter
15+
from basic_memory.file_utils import compute_checksum, has_frontmatter, remove_frontmatter
1616
from basic_memory.markdown.schemas import EntityMarkdown
1717
from basic_memory.indexing.models import (
1818
IndexedEntity,
@@ -43,12 +43,19 @@ class _PreparedMarkdownFile:
4343
class _PreparedEntity:
4444
path: str
4545
entity_id: int
46+
permalink: str | None
4647
checksum: str
4748
content_type: str | None
4849
search_content: str | None
4950
markdown_content: str | None = None
5051

5152

53+
@dataclass(slots=True)
54+
class _PersistedMarkdownFile:
55+
prepared: _PreparedMarkdownFile
56+
entity: Entity
57+
58+
5259
class BatchIndexer:
5360
"""Index already-loaded files without assuming where they came from."""
5461

@@ -118,6 +125,9 @@ async def index_files(
118125
)
119126
error_by_path.update(markdown_errors)
120127
prepared_entities.update(markdown_upserts)
128+
if existing_permalink_by_path is not None:
129+
for path, prepared_entity in markdown_upserts.items():
130+
existing_permalink_by_path[path] = prepared_entity.permalink
121131

122132
regular_upserts, regular_errors = await self._run_bounded(
123133
regular_paths,
@@ -168,6 +178,57 @@ async def index_files(
168178
search_indexed=search_indexed,
169179
)
170180

181+
async def index_markdown_file(
182+
self,
183+
file: IndexInputFile,
184+
*,
185+
new: bool | None = None,
186+
existing_permalink_by_path: dict[str, str | None] | None = None,
187+
index_search: bool = True,
188+
) -> IndexedEntity:
189+
"""Index one markdown file using the same normalization and upsert path as batches."""
190+
if not self._is_markdown(file):
191+
raise ValueError(f"index_markdown_file requires markdown input: {file.path}")
192+
193+
prepared = await self._prepare_markdown_file(file)
194+
if existing_permalink_by_path is None:
195+
existing_permalink_by_path = {
196+
path: permalink
197+
for path, permalink in (
198+
await self.entity_repository.get_file_path_to_permalink_map()
199+
).items()
200+
}
201+
202+
reserved_permalinks = {
203+
permalink
204+
for path, permalink in existing_permalink_by_path.items()
205+
if path != file.path and permalink
206+
}
207+
prepared = await self._normalize_markdown_file(prepared, reserved_permalinks)
208+
existing_permalink_by_path[file.path] = prepared.markdown.frontmatter.permalink
209+
210+
persisted = await self._persist_markdown_file(prepared, is_new=new)
211+
existing_permalink_by_path[file.path] = persisted.entity.permalink
212+
await self._resolve_batch_relations([persisted.entity.id], max_concurrent=1)
213+
214+
refreshed = await self.entity_repository.find_by_ids([persisted.entity.id])
215+
if len(refreshed) != 1: # pragma: no cover
216+
raise ValueError(f"Failed to reload indexed entity for {file.path}")
217+
entity = refreshed[0]
218+
prepared_entity = self._build_prepared_entity(persisted.prepared, entity)
219+
220+
if index_search:
221+
return await self._refresh_search_index(prepared_entity, entity)
222+
223+
return IndexedEntity(
224+
path=prepared_entity.path,
225+
entity_id=entity.id,
226+
permalink=entity.permalink,
227+
checksum=prepared_entity.checksum,
228+
content_type=prepared_entity.content_type,
229+
markdown_content=prepared_entity.markdown_content,
230+
)
231+
171232
# --- Preparation ---
172233

173234
async def _prepare_markdown_file(self, file: IndexInputFile) -> _PreparedMarkdownFile:
@@ -320,34 +381,8 @@ def _reserve_batch_permalink(
320381
# --- Persistence ---
321382

322383
async def _upsert_markdown_file(self, prepared: _PreparedMarkdownFile) -> _PreparedEntity:
323-
existing = await self.entity_repository.get_by_file_path(
324-
prepared.file.path,
325-
load_relations=False,
326-
)
327-
entity = await self.entity_service.upsert_entity_from_markdown(
328-
Path(prepared.file.path),
329-
prepared.markdown,
330-
is_new=existing is None,
331-
)
332-
updated = await self.entity_repository.update(
333-
entity.id,
334-
self._entity_metadata_updates(prepared.file, prepared.final_checksum),
335-
)
336-
if updated is None:
337-
raise ValueError(f"Failed to update markdown entity metadata for {prepared.file.path}")
338-
339-
return _PreparedEntity(
340-
path=prepared.file.path,
341-
entity_id=updated.id,
342-
checksum=prepared.final_checksum,
343-
content_type=prepared.file.content_type,
344-
search_content=(
345-
prepared.markdown.content
346-
if prepared.markdown.content is not None
347-
else prepared.content
348-
),
349-
markdown_content=prepared.content,
350-
)
384+
persisted = await self._persist_markdown_file(prepared)
385+
return self._build_prepared_entity(persisted.prepared, persisted.entity)
351386

352387
async def _upsert_regular_file(self, file: IndexInputFile) -> _PreparedEntity:
353388
checksum = await self._resolve_checksum(file)
@@ -405,6 +440,7 @@ async def _upsert_regular_file(self, file: IndexInputFile) -> _PreparedEntity:
405440
return _PreparedEntity(
406441
path=file.path,
407442
entity_id=updated.id,
443+
permalink=updated.permalink,
408444
checksum=checksum,
409445
content_type=file.content_type,
410446
search_content=None,
@@ -495,6 +531,92 @@ async def _refresh_search_index(
495531

496532
# --- Helpers ---
497533

534+
async def _persist_markdown_file(
535+
self,
536+
prepared: _PreparedMarkdownFile,
537+
*,
538+
is_new: bool | None = None,
539+
) -> _PersistedMarkdownFile:
540+
existing = await self.entity_repository.get_by_file_path(
541+
prepared.file.path,
542+
load_relations=False,
543+
)
544+
if is_new is None:
545+
is_new = existing is None
546+
entity = await self.entity_service.upsert_entity_from_markdown(
547+
Path(prepared.file.path),
548+
prepared.markdown,
549+
is_new=is_new,
550+
)
551+
prepared = await self._reconcile_persisted_permalink(prepared, entity)
552+
updated = await self.entity_repository.update(
553+
entity.id,
554+
self._entity_metadata_updates(prepared.file, prepared.final_checksum),
555+
)
556+
if updated is None:
557+
raise ValueError(f"Failed to update markdown entity metadata for {prepared.file.path}")
558+
return _PersistedMarkdownFile(prepared=prepared, entity=updated)
559+
560+
async def _reconcile_persisted_permalink(
561+
self,
562+
prepared: _PreparedMarkdownFile,
563+
entity: Entity,
564+
) -> _PreparedMarkdownFile:
565+
# Trigger: the source file started without frontmatter and sync is configured
566+
# to leave frontmatterless files alone.
567+
# Why: upsert may still assign a DB permalink even when disk content should stay untouched.
568+
# Outcome: skip reconciliation writes that would silently inject frontmatter.
569+
if (
570+
self.app_config.disable_permalinks
571+
or (
572+
not prepared.file_contains_frontmatter
573+
and not self.app_config.ensure_frontmatter_on_sync
574+
)
575+
or entity.permalink is None
576+
or entity.permalink == prepared.markdown.frontmatter.permalink
577+
):
578+
return prepared
579+
580+
logger.debug(
581+
"Updating permalink after upsert conflict resolution",
582+
path=prepared.file.path,
583+
old_permalink=prepared.markdown.frontmatter.permalink,
584+
new_permalink=entity.permalink,
585+
)
586+
prepared.markdown.frontmatter.metadata["permalink"] = entity.permalink
587+
write_result = await self.file_writer.write_frontmatter(
588+
IndexFrontmatterUpdate(
589+
path=prepared.file.path,
590+
metadata={"permalink": entity.permalink},
591+
)
592+
)
593+
return _PreparedMarkdownFile(
594+
file=prepared.file,
595+
content=write_result.content,
596+
final_checksum=write_result.checksum,
597+
markdown=prepared.markdown,
598+
file_contains_frontmatter=prepared.file_contains_frontmatter,
599+
)
600+
601+
def _build_prepared_entity(
602+
self,
603+
prepared: _PreparedMarkdownFile,
604+
entity: Entity,
605+
) -> _PreparedEntity:
606+
return _PreparedEntity(
607+
path=prepared.file.path,
608+
entity_id=entity.id,
609+
permalink=entity.permalink,
610+
checksum=prepared.final_checksum,
611+
content_type=prepared.file.content_type,
612+
search_content=(
613+
prepared.markdown.content
614+
if prepared.markdown.content is not None
615+
else remove_frontmatter(prepared.content)
616+
),
617+
markdown_content=prepared.content,
618+
)
619+
498620
async def _resolve_checksum(self, file: IndexInputFile) -> str:
499621
if file.checksum is not None:
500622
return file.checksum

src/basic_memory/indexing/models.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44

55
from dataclasses import dataclass, field
66
from datetime import datetime
7-
from typing import Any, Protocol
7+
from typing import Any, Protocol, TYPE_CHECKING
8+
9+
if TYPE_CHECKING: # pragma: no cover
10+
from basic_memory.models import Entity
811

912

1013
@dataclass(slots=True)
@@ -75,6 +78,19 @@ class IndexedEntity:
7578
markdown_content: str | None = None
7679

7780

81+
@dataclass(slots=True)
82+
class SyncedMarkdownFile:
83+
"""Canonical result for syncing one markdown file end-to-end."""
84+
85+
entity: Entity
86+
checksum: str
87+
markdown_content: str
88+
file_path: str
89+
content_type: str
90+
updated_at: datetime
91+
size: int
92+
93+
7894
@dataclass(slots=True)
7995
class IndexingBatchResult:
8096
"""Outcome for one batch execution."""

src/basic_memory/services/file_service.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,9 @@ async def read_file_content(self, path: FilePath) -> str:
273273
logger.warning("File not found", operation="read_file_content", path=str(full_path))
274274
raise
275275
except Exception as e:
276+
if isinstance(e, FileNotFoundError):
277+
logger.warning("File not found", operation="read_file", path=str(full_path))
278+
raise
276279
logger.exception("File read error", path=str(full_path), error=str(e))
277280
raise FileOperationError(f"Failed to read file: {e}")
278281

@@ -366,6 +369,9 @@ async def read_file(self, path: FilePath) -> Tuple[str, str]:
366369
)
367370
return content, checksum
368371

372+
except FileNotFoundError as e:
373+
logger.warning("File not found", operation="read_file", path=str(full_path))
374+
raise FileOperationError(f"Failed to read file: {e}") from e
369375
except Exception as e:
370376
logger.exception("File read error", path=str(full_path), error=str(e))
371377
raise FileOperationError(f"Failed to read file: {e}")

0 commit comments

Comments
 (0)