Skip to content

Commit 6b110b2

Browse files
authored
fix: don't sync *.tmp files on watch (#31)
Fixes #30 Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 88d193e commit 6b110b2

File tree

2 files changed

+204
-13
lines changed

2 files changed

+204
-13
lines changed

src/basic_memory/sync/watch_service.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,15 @@
55
from pathlib import Path
66
from typing import List, Optional, Set
77

8+
from basic_memory.config import ProjectConfig
9+
from basic_memory.services.file_service import FileService
10+
from basic_memory.sync.sync_service import SyncService
811
from loguru import logger
912
from pydantic import BaseModel
1013
from rich.console import Console
1114
from watchfiles import awatch
1215
from watchfiles.main import FileChange, Change
1316

14-
from basic_memory.config import ProjectConfig
15-
from basic_memory.services.file_service import FileService
16-
from basic_memory.sync.sync_service import SyncService
17-
1817
WATCH_STATUS_JSON = "watch-status.json"
1918

2019

@@ -138,6 +137,10 @@ def filter_changes(self, change: Change, path: str) -> bool: # pragma: no cover
138137
if part.startswith("."):
139138
return False
140139

140+
# Skip temp files used in atomic operations
141+
if path.endswith(".tmp"):
142+
return False
143+
141144
return True
142145

143146
async def write_status(self):
@@ -161,6 +164,11 @@ async def handle_changes(self, directory: Path, changes: Set[FileChange]):
161164
for change, path in changes:
162165
# convert to relative path
163166
relative_path = str(Path(path).relative_to(directory))
167+
168+
# Skip .tmp files - they're temporary and shouldn't be synced
169+
if relative_path.endswith(".tmp"):
170+
continue
171+
164172
if change == Change.added:
165173
adds.append(relative_path)
166174
elif change == Change.deleted:
@@ -184,8 +192,8 @@ async def handle_changes(self, directory: Path, changes: Set[FileChange]):
184192
# We don't need to process directories, only the files inside them
185193
# This prevents errors when trying to compute checksums or read directories as files
186194
added_full_path = directory / added_path
187-
if added_full_path.is_dir():
188-
logger.debug("Skipping directory for move detection", path=added_path)
195+
if not added_full_path.exists() or added_full_path.is_dir():
196+
logger.debug("Skipping non-existent or directory path", path=added_path)
189197
processed.add(added_path)
190198
continue
191199

@@ -247,10 +255,12 @@ async def handle_changes(self, directory: Path, changes: Set[FileChange]):
247255
if path not in processed:
248256
# Skip directories - only process files
249257
full_path = directory / path
250-
if full_path.is_dir(): # pragma: no cover
251-
logger.debug("Skipping directory", path=path)
252-
processed.add(path)
253-
continue
258+
if not full_path.exists() or full_path.is_dir():
259+
logger.debug(
260+
"Skipping non-existent or directory path", path=path
261+
) # pragma: no cover
262+
processed.add(path) # pragma: no cover
263+
continue # pragma: no cover
254264

255265
logger.debug("Processing new file", path=path)
256266
entity, checksum = await self.sync_service.sync_file(path, new=True)
@@ -281,8 +291,8 @@ async def handle_changes(self, directory: Path, changes: Set[FileChange]):
281291
if path not in processed:
282292
# Skip directories - only process files
283293
full_path = directory / path
284-
if full_path.is_dir():
285-
logger.debug("Skipping directory", path=path)
294+
if not full_path.exists() or full_path.is_dir():
295+
logger.debug("Skipping non-existent or directory path", path=path)
286296
processed.add(path)
287297
continue
288298

@@ -341,4 +351,4 @@ async def handle_changes(self, directory: Path, changes: Set[FileChange]):
341351
duration_ms=duration_ms,
342352
)
343353

344-
await self.write_status()
354+
await self.write_status()

tests/sync/test_tmp_files.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
"""Test proper handling of .tmp files during sync."""
2+
3+
import asyncio
4+
from pathlib import Path
5+
6+
import pytest
7+
from watchfiles import Change
8+
9+
from basic_memory.sync.watch_service import WatchService
10+
11+
12+
async def create_test_file(path: Path, content: str = "test content") -> None:
13+
"""Create a test file with given content."""
14+
path.parent.mkdir(parents=True, exist_ok=True)
15+
path.write_text(content)
16+
17+
18+
@pytest.mark.asyncio
19+
async def test_temp_file_filter(watch_service):
20+
"""Test that .tmp files are correctly filtered out."""
21+
# Test filter_changes method directly
22+
tmp_path = str(watch_service.config.home / "test.tmp")
23+
assert not watch_service.filter_changes(Change.added, tmp_path)
24+
25+
# Test with valid file
26+
valid_path = str(watch_service.config.home / "test.md")
27+
assert watch_service.filter_changes(Change.added, valid_path)
28+
29+
30+
@pytest.mark.asyncio
31+
async def test_handle_tmp_files(watch_service, test_config, monkeypatch):
32+
"""Test handling of .tmp files during sync process."""
33+
project_dir = test_config.home
34+
35+
# Create a .tmp file - this simulates a file being written with write_file_atomic
36+
tmp_file = project_dir / "test.tmp"
37+
await create_test_file(tmp_file, "This is a temporary file")
38+
39+
# Create the target final file
40+
final_file = project_dir / "test.md"
41+
await create_test_file(final_file, "This is the final file")
42+
43+
# Setup changes that include both the .tmp and final file
44+
changes = {
45+
(Change.added, str(tmp_file)),
46+
(Change.added, str(final_file)),
47+
}
48+
49+
# Track sync_file calls
50+
sync_calls = []
51+
52+
# Mock sync_file to track calls
53+
original_sync_file = watch_service.sync_service.sync_file
54+
55+
async def mock_sync_file(path, new=True):
56+
sync_calls.append(path)
57+
return await original_sync_file(path, new)
58+
59+
monkeypatch.setattr(watch_service.sync_service, "sync_file", mock_sync_file)
60+
61+
# Handle changes
62+
await watch_service.handle_changes(project_dir, changes)
63+
64+
# Verify .tmp file was not processed
65+
assert "test.tmp" not in sync_calls
66+
assert "test.md" in sync_calls
67+
68+
# Verify only the final file got an entity
69+
tmp_entity = await watch_service.sync_service.entity_repository.get_by_file_path("test.tmp")
70+
final_entity = await watch_service.sync_service.entity_repository.get_by_file_path("test.md")
71+
72+
assert tmp_entity is None, "Temp file should not have an entity"
73+
assert final_entity is not None, "Final file should have an entity"
74+
75+
76+
@pytest.mark.asyncio
77+
async def test_atomic_write_tmp_file_handling(watch_service, test_config, monkeypatch):
78+
"""Test handling of file changes during atomic write operations."""
79+
project_dir = test_config.home
80+
81+
# This test simulates the full atomic write process:
82+
# 1. First a .tmp file is created
83+
# 2. Then the .tmp file is renamed to the final file
84+
# 3. Both events are processed by the watch service
85+
86+
# Setup file paths
87+
tmp_path = project_dir / "document.tmp"
88+
final_path = project_dir / "document.md"
89+
90+
# Create mockup of the atomic write process
91+
await create_test_file(tmp_path, "Content for document")
92+
93+
# First batch of changes - .tmp file created
94+
changes1 = {(Change.added, str(tmp_path))}
95+
96+
# Process first batch
97+
await watch_service.handle_changes(project_dir, changes1)
98+
99+
# Now "replace" the temp file with the final file
100+
tmp_path.rename(final_path)
101+
102+
# Second batch of changes - .tmp file deleted, final file added
103+
changes2 = {
104+
(Change.deleted, str(tmp_path)),
105+
(Change.added, str(final_path))
106+
}
107+
108+
# Process second batch
109+
await watch_service.handle_changes(project_dir, changes2)
110+
111+
# Verify only the final file is in the database
112+
tmp_entity = await watch_service.sync_service.entity_repository.get_by_file_path("document.tmp")
113+
final_entity = await watch_service.sync_service.entity_repository.get_by_file_path("document.md")
114+
115+
assert tmp_entity is None, "Temp file should not have an entity"
116+
assert final_entity is not None, "Final file should have an entity"
117+
118+
# Check events
119+
new_events = [e for e in watch_service.state.recent_events if e.action == "new"]
120+
assert len(new_events) == 1
121+
assert new_events[0].path == "document.md"
122+
123+
124+
@pytest.mark.asyncio
125+
async def test_rapid_atomic_writes(watch_service, test_config):
126+
"""Test handling of rapid atomic writes to the same destination."""
127+
project_dir = test_config.home
128+
129+
# This test simulates multiple rapid atomic writes to the same file:
130+
# 1. Several .tmp files are created one after another
131+
# 2. Each is then renamed to the same final file
132+
# 3. Events are batched and processed together
133+
134+
# Setup file paths
135+
tmp1_path = project_dir / "document.1.tmp"
136+
tmp2_path = project_dir / "document.2.tmp"
137+
final_path = project_dir / "document.md"
138+
139+
# Create multiple temp files that will be used in sequence
140+
await create_test_file(tmp1_path, "First version")
141+
await create_test_file(tmp2_path, "Second version")
142+
143+
# Simulate the first atomic write
144+
tmp1_path.rename(final_path)
145+
146+
# Brief pause to ensure file system registers the change
147+
await asyncio.sleep(0.1)
148+
149+
# Read content to verify
150+
content1 = final_path.read_text()
151+
assert content1 == "First version"
152+
153+
# Simulate the second atomic write
154+
tmp2_path.rename(final_path)
155+
156+
# Verify content was updated
157+
content2 = final_path.read_text()
158+
assert content2 == "Second version"
159+
160+
# Create a batch of changes that might arrive in mixed order
161+
changes = {
162+
(Change.added, str(tmp1_path)),
163+
(Change.deleted, str(tmp1_path)),
164+
(Change.added, str(tmp2_path)),
165+
(Change.deleted, str(tmp2_path)),
166+
(Change.added, str(final_path)),
167+
(Change.modified, str(final_path)),
168+
}
169+
170+
# Process all changes
171+
await watch_service.handle_changes(project_dir, changes)
172+
173+
# Verify only the final file is in the database
174+
final_entity = await watch_service.sync_service.entity_repository.get_by_file_path("document.md")
175+
assert final_entity is not None
176+
177+
# Also verify no tmp entities were created
178+
tmp1_entity = await watch_service.sync_service.entity_repository.get_by_file_path("document.1.tmp")
179+
tmp2_entity = await watch_service.sync_service.entity_repository.get_by_file_path("document.2.tmp")
180+
assert tmp1_entity is None
181+
assert tmp2_entity is None

0 commit comments

Comments
 (0)