Skip to content

Commit 1bb42df

Browse files
committed
fix(sync): preserve frontmatterless markdown sync
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 8ef292d commit 1bb42df

File tree

4 files changed

+116
-4
lines changed

4 files changed

+116
-4
lines changed

src/basic_memory/indexing/batch_indexer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,16 @@ async def _reconcile_persisted_permalink(
562562
prepared: _PreparedMarkdownFile,
563563
entity: Entity,
564564
) -> _PreparedMarkdownFile:
565+
# Trigger: the source file started without frontmatter and sync is configured
566+
# to leave frontmatterless files alone.
567+
# Why: upsert may still assign a DB permalink even when disk content should stay untouched.
568+
# Outcome: skip reconciliation writes that would silently inject frontmatter.
565569
if (
566570
self.app_config.disable_permalinks
571+
or (
572+
not prepared.file_contains_frontmatter
573+
and not self.app_config.ensure_frontmatter_on_sync
574+
)
567575
or entity.permalink is None
568576
or entity.permalink == prepared.markdown.frontmatter.permalink
569577
):

src/basic_memory/sync/sync_service.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from basic_memory import telemetry
1919
from basic_memory import db
2020
from basic_memory.config import BasicMemoryConfig, ConfigManager
21-
from basic_memory.file_utils import compute_checksum, remove_frontmatter
21+
from basic_memory.file_utils import ParseError, compute_checksum, remove_frontmatter
2222
from basic_memory.indexing import (
2323
BatchIndexer,
2424
IndexFileMetadata,
@@ -1126,9 +1126,18 @@ async def sync_one_markdown_file(
11261126
raise ValueError(f"Failed to update markdown entity metadata for {path}")
11271127

11281128
if index_search:
1129+
# Trigger: markdown may start with '---' as a thematic break or malformed
1130+
# frontmatter that the parser already treated as plain content.
1131+
# Why: one-file sync should not fail after the entity upsert just because
1132+
# strict frontmatter stripping rejects that exact text shape.
1133+
# Outcome: fall back to indexing the raw markdown content for these cases.
1134+
try:
1135+
search_content = remove_frontmatter(final_markdown_content)
1136+
except ParseError:
1137+
search_content = final_markdown_content
11291138
await self.search_service.index_entity_data(
11301139
updated_entity,
1131-
content=remove_frontmatter(final_markdown_content),
1140+
content=search_content,
11321141
)
11331142

11341143
logger.debug(

tests/indexing/test_batch_indexer.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,3 +677,61 @@ async def test_batch_indexer_strips_frontmatter_from_search_content_when_body_is
677677
args, kwargs = await_args
678678
assert args[0].id == entity.id
679679
assert kwargs["content"] == remove_frontmatter(persisted_content)
680+
681+
682+
@pytest.mark.asyncio
683+
async def test_batch_indexer_does_not_inject_frontmatter_when_sync_enforcement_is_disabled(
684+
app_config,
685+
entity_service,
686+
entity_repository,
687+
relation_repository,
688+
search_service,
689+
file_service,
690+
project_config,
691+
monkeypatch,
692+
):
693+
app_config.ensure_frontmatter_on_sync = False
694+
695+
created = await entity_service.create_entity_with_content(
696+
EntitySchema(
697+
title="Frontmatterless",
698+
directory="notes",
699+
content="# Frontmatterless\n\nOriginal content.\n",
700+
)
701+
)
702+
path = created.entity.file_path
703+
assert path is not None
704+
existing_permalink = created.entity.permalink
705+
assert existing_permalink is not None
706+
707+
original_content = "# Frontmatterless\n\nBody content.\n"
708+
await _create_file(project_config.home / path, original_content)
709+
710+
original_writer = file_service.update_frontmatter_with_result
711+
frontmatter_writer = AsyncMock(side_effect=original_writer)
712+
monkeypatch.setattr(file_service, "update_frontmatter_with_result", frontmatter_writer)
713+
714+
batch_indexer = _make_batch_indexer(
715+
app_config,
716+
entity_service,
717+
entity_repository,
718+
relation_repository,
719+
search_service,
720+
file_service,
721+
)
722+
723+
indexed = await batch_indexer.index_markdown_file(
724+
await _load_input(file_service, path),
725+
index_search=False,
726+
)
727+
728+
# Trigger: Windows persists CRLF for text files even when the test literal uses LF.
729+
# Why: this assertion cares about preserving a frontmatterless file, not about newline style.
730+
# Outcome: compare against the exact content stored on disk after sync.
731+
persisted_content = (project_config.home / path).read_bytes().decode("utf-8")
732+
entity = await entity_repository.get_by_file_path(path)
733+
assert entity is not None
734+
assert entity.permalink == existing_permalink
735+
assert frontmatter_writer.await_count == 0
736+
assert indexed.markdown_content == persisted_content
737+
assert await file_service.read_file_content(path) == persisted_content

tests/sync/test_sync_one_markdown_file.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,14 @@ async def test_sync_one_markdown_file_returns_original_content_when_no_rewrite_n
146146

147147
result = await sync_service.sync_one_markdown_file("notes/no-rewrite.md", index_search=False)
148148

149+
# Trigger: Windows persists CRLF for text files even when the test literal uses LF.
150+
# Why: this assertion cares about "no rewrite happened", not about pinning one newline style.
151+
# Outcome: compare against the exact markdown bytes stored on disk.
152+
persisted_content = file_path.read_bytes().decode("utf-8")
153+
149154
assert frontmatter_writer.await_count == 0
150-
assert result.markdown_content == original_content
151-
assert file_path.read_text(encoding="utf-8") == original_content
155+
assert result.markdown_content == persisted_content
156+
assert file_path.read_text(encoding="utf-8") == persisted_content
152157
assert result.checksum == await sync_service.file_service.compute_checksum(
153158
"notes/no-rewrite.md"
154159
)
@@ -216,3 +221,35 @@ async def test_sync_markdown_file_remains_tuple_compatible(sync_service, test_pr
216221
assert entity.file_path == "notes/compat.md"
217222
assert entity.permalink == f"{test_project.name}/notes/compat"
218223
assert checksum == await sync_service.file_service.compute_checksum("notes/compat.md")
224+
225+
226+
@pytest.mark.asyncio
227+
async def test_sync_one_markdown_file_indexes_thematic_break_content_without_frontmatter(
228+
sync_service,
229+
test_project,
230+
app_config,
231+
monkeypatch,
232+
):
233+
"""Leading thematic-break markdown should index as raw content when frontmatter is absent."""
234+
app_config.ensure_frontmatter_on_sync = False
235+
236+
original_content = "---\nBody content after a thematic break.\n"
237+
file_path = _write_markdown(
238+
Path(test_project.path),
239+
"notes/thematic-break.md",
240+
original_content,
241+
)
242+
243+
index_entity_data = AsyncMock()
244+
monkeypatch.setattr(sync_service.search_service, "index_entity_data", index_entity_data)
245+
246+
result = await sync_service.sync_one_markdown_file("notes/thematic-break.md")
247+
248+
persisted_content = file_path.read_bytes().decode("utf-8")
249+
250+
assert result.markdown_content == persisted_content
251+
assert file_path.read_text(encoding="utf-8") == persisted_content
252+
index_entity_data.assert_awaited_once_with(
253+
result.entity,
254+
content=persisted_content,
255+
)

0 commit comments

Comments
 (0)