Skip to content

Commit 8ef292d

Browse files
committed
fix(sync): hash one-file markdown from raw bytes
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent ee92812 commit 8ef292d

2 files changed

Lines changed: 15 additions & 6 deletions

File tree

src/basic_memory/sync/sync_service.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,9 +1075,18 @@ async def sync_one_markdown_file(
10751075
"""
10761076
logger.debug(f"Parsing markdown file, path: {path}, new: {new}")
10771077

1078-
initial_markdown_content = await self.file_service.read_file_content(path)
1078+
try:
1079+
initial_markdown_bytes = await self.file_service.read_file_bytes(path)
1080+
except FileOperationError as exc:
1081+
# Trigger: FileService wraps binary read failures in FileOperationError.
1082+
# Why: sync_file() treats bare FileNotFoundError as a deletion race and cleans up the DB row.
1083+
# Outcome: preserve that contract while still hashing the exact bytes we loaded.
1084+
if isinstance(exc.__cause__, FileNotFoundError):
1085+
raise exc.__cause__ from exc
1086+
raise
1087+
initial_markdown_content = initial_markdown_bytes.decode("utf-8")
10791088
file_metadata = await self.file_service.get_file_metadata(path)
1080-
initial_checksum = await compute_checksum(initial_markdown_content)
1089+
initial_checksum = await compute_checksum(initial_markdown_bytes)
10811090
indexed = await self.batch_indexer.index_markdown_file(
10821091
IndexInputFile(
10831092
path=path,
@@ -1086,7 +1095,7 @@ async def sync_one_markdown_file(
10861095
content_type=self.file_service.content_type(path),
10871096
last_modified=file_metadata.modified_at,
10881097
created_at=file_metadata.created_at,
1089-
content=initial_markdown_content.encode("utf-8"),
1098+
content=initial_markdown_bytes,
10901099
),
10911100
new=new,
10921101
index_search=False,

tests/sync/test_sync_one_markdown_file.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ async def test_sync_one_markdown_file_does_not_reread_for_initial_checksum_when_
160160
test_project,
161161
monkeypatch,
162162
):
163-
"""Initial checksum comes from the loaded content, not a second storage read."""
163+
"""Initial checksum comes from the loaded file bytes, not a second storage read."""
164164
original_content = dedent(
165165
f"""\
166166
---
@@ -174,7 +174,7 @@ async def test_sync_one_markdown_file_does_not_reread_for_initial_checksum_when_
174174
Body content.
175175
"""
176176
)
177-
_write_markdown(
177+
file_path = _write_markdown(
178178
Path(test_project.path),
179179
"notes/no-rewrite.md",
180180
original_content,
@@ -186,7 +186,7 @@ async def test_sync_one_markdown_file_does_not_reread_for_initial_checksum_when_
186186
result = await sync_service.sync_one_markdown_file("notes/no-rewrite.md", index_search=False)
187187

188188
checksum_spy.assert_not_awaited()
189-
assert result.checksum == await compute_checksum(original_content)
189+
assert result.checksum == await compute_checksum(file_path.read_bytes())
190190

191191

192192
@pytest.mark.asyncio

0 commit comments

Comments
 (0)