Skip to content

Commit b7497d7

Browse files
authored
fix: Use filesystem timestamps for entity sync instead of database operation time (#138) (#369)
Signed-off-by: Claude <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent d1431bd commit b7497d7

3 files changed

Lines changed: 101 additions & 5 deletions

File tree

src/basic_memory/sync/sync_service.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,11 @@ async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optiona
547547
file_content = await self._read_file_async(file_path)
548548
file_contains_frontmatter = has_frontmatter(file_content)
549549

550+
# Get file timestamps for tracking modification times
551+
file_stats = self.file_service.file_stats(path)
552+
created = datetime.fromtimestamp(file_stats.st_ctime).astimezone()
553+
modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
554+
550555
# entity markdown will always contain front matter, so it can be used up create/update the entity
551556
entity_markdown = await self.entity_parser.parse_file(path)
552557

@@ -585,8 +590,11 @@ async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optiona
585590
# after relation processing is complete
586591
final_checksum = await self._compute_checksum_async(path)
587592

588-
# set checksum
589-
await self.entity_repository.update(entity.id, {"checksum": final_checksum})
593+
# Update checksum and timestamps from file system
594+
# This ensures temporal ordering in search and recent activity uses actual file modification times
595+
await self.entity_repository.update(
596+
entity.id, {"checksum": final_checksum, "created_at": created, "updated_at": modified}
597+
)
590598

591599
logger.debug(
592600
f"Markdown sync completed: path={path}, entity_id={entity.id}, "
@@ -659,13 +667,18 @@ async def sync_regular_file(self, path: str, new: bool = True) -> Tuple[Optional
659667
# Re-raise if it's a different integrity error
660668
raise
661669
else:
670+
# Get file timestamps for updating modification time
671+
file_stats = self.file_service.file_stats(path)
672+
modified = datetime.fromtimestamp(file_stats.st_mtime).astimezone()
673+
662674
entity = await self.entity_repository.get_by_file_path(path)
663675
if entity is None: # pragma: no cover
664676
logger.error(f"Entity not found for existing file, path={path}")
665677
raise ValueError(f"Entity not found for existing file: {path}")
666678

679+
# Update checksum and modification time from file system
667680
updated = await self.entity_repository.update(
668-
entity.id, {"file_path": path, "checksum": checksum}
681+
entity.id, {"file_path": path, "checksum": checksum, "updated_at": modified}
669682
)
670683

671684
if updated is None: # pragma: no cover

tests/markdown/test_entity_parser_error_handling.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Tests for entity parser error handling (issues #184 and #185)."""
22

33
import pytest
4-
from pathlib import Path
54
from textwrap import dedent
65

76
from basic_memory.markdown.entity_parser import EntityParser
@@ -213,4 +212,4 @@ async def test_parse_valid_file_still_works(tmp_path):
213212
assert result is not None
214213
assert result.frontmatter.title == "Valid File"
215214
assert result.frontmatter.type == "knowledge"
216-
assert result.frontmatter.tags == ["test", "valid"]
215+
assert result.frontmatter.tags == ["test", "valid"]

tests/sync/test_sync_service.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,90 @@ async def test_sync_preserves_timestamps(
645645
assert abs(entity_updated_epoch - file_stats.st_mtime) < tolerance # Allow tolerance difference
646646

647647

648+
@pytest.mark.asyncio
649+
async def test_sync_updates_timestamps_on_file_modification(
650+
sync_service: SyncService,
651+
project_config: ProjectConfig,
652+
entity_service: EntityService,
653+
):
654+
"""Test that sync updates entity timestamps when files are modified.
655+
656+
This test specifically validates that when an existing file is modified and re-synced,
657+
the updated_at timestamp in the database reflects the file's actual modification time,
658+
not the database operation time. This is critical for accurate temporal ordering in
659+
search and recent_activity queries.
660+
"""
661+
import time
662+
663+
project_dir = project_config.home
664+
665+
# Create initial file
666+
initial_content = """
667+
---
668+
type: knowledge
669+
---
670+
# Test File
671+
Initial content for timestamp test
672+
"""
673+
file_path = project_dir / "timestamp_test.md"
674+
await create_test_file(file_path, initial_content)
675+
676+
# Initial sync
677+
await sync_service.sync(project_config.home)
678+
679+
# Get initial entity and timestamps
680+
entity_before = await entity_service.get_by_permalink("timestamp-test")
681+
initial_updated_at = entity_before.updated_at
682+
683+
# Wait a bit to ensure filesystem timestamp changes
684+
time.sleep(0.1)
685+
686+
# Modify the file content
687+
modified_content = """
688+
---
689+
type: knowledge
690+
---
691+
# Test File
692+
Modified content for timestamp test
693+
694+
## Observations
695+
- [test] This was modified
696+
"""
697+
file_path.write_text(modified_content)
698+
699+
# Wait to ensure mtime is different
700+
time.sleep(0.1)
701+
702+
# Get the file's modification time after our changes
703+
file_stats_after_modification = file_path.stat()
704+
705+
# Re-sync the modified file
706+
await sync_service.sync(project_config.home)
707+
708+
# Get entity after re-sync
709+
entity_after = await entity_service.get_by_permalink("timestamp-test")
710+
711+
# Verify that updated_at changed
712+
assert entity_after.updated_at != initial_updated_at, (
713+
"updated_at should change when file is modified"
714+
)
715+
716+
# Verify that updated_at matches the file's modification time, not db operation time
717+
entity_updated_epoch = entity_after.updated_at.timestamp()
718+
file_mtime = file_stats_after_modification.st_mtime
719+
720+
# Allow 2s difference on Windows due to filesystem timing precision
721+
tolerance = 2 if os.name == "nt" else 1
722+
assert abs(entity_updated_epoch - file_mtime) < tolerance, (
723+
f"Entity updated_at ({entity_after.updated_at}) should match file mtime "
724+
f"({datetime.fromtimestamp(file_mtime)}) within {tolerance}s tolerance"
725+
)
726+
727+
# Verify the content was actually updated
728+
assert len(entity_after.observations) == 1
729+
assert entity_after.observations[0].content == "This was modified"
730+
731+
648732
@pytest.mark.asyncio
649733
async def test_file_move_updates_search_index(
650734
sync_service: SyncService,

0 commit comments

Comments
 (0)