Skip to content

Commit 2a050ed

Browse files
phernandezclaude
andcommitted
fix: prevent permalink collision via strict link resolution
Fixes critical data loss bug where creating similar entity names (e.g., "Node C") would overwrite existing entities (e.g., "Node A.md") due to fuzzy search incorrectly matching similar file paths. Changes: - Add strict=True to resolve_link() calls in entity_service.py - Disables fuzzy search fallback during entity creation/update - Prevents false positive matches on similar paths like "edge-cases/Node A.md" and "edge-cases/Node C.md" Testing: - Added comprehensive integration test reproducing the bug scenario - Added MCP-level permalink collision tests - All 55 entity service tests pass - Manual testing confirms fix prevents file overwrite 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent f3b1945 commit 2a050ed

3 files changed

Lines changed: 452 additions & 3 deletions

File tree

src/basic_memory/services/entity_service.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,11 @@ async def create_or_update_entity(self, schema: EntitySchema) -> Tuple[EntityMod
146146
f"Creating or updating entity: {schema.file_path}, permalink: {schema.permalink}"
147147
)
148148

149-
# Try to find existing entity using smart resolution
150-
existing = await self.link_resolver.resolve_link(schema.file_path)
149+
# Try to find existing entity using strict resolution (no fuzzy search)
150+
# This prevents incorrectly matching similar file paths like "Node A.md" and "Node C.md"
151+
existing = await self.link_resolver.resolve_link(schema.file_path, strict=True)
151152
if not existing and schema.permalink:
152-
existing = await self.link_resolver.resolve_link(schema.permalink)
153+
existing = await self.link_resolver.resolve_link(schema.permalink, strict=True)
153154

154155
if existing:
155156
logger.debug(f"Found existing entity: {existing.file_path}")
Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
"""Tests for permalink collision file overwrite bug discovered in live testing.
2+
3+
This test reproduces a critical data loss bug where creating notes with
4+
titles that normalize to different permalinks but resolve to the same
5+
file location causes silent file overwrites without warning.
6+
7+
Related to GitHub Issue #139 but tests a different aspect - not database
8+
UNIQUE constraints, but actual file overwrite behavior.
9+
10+
Example scenario from live testing:
11+
1. Create "Node A" → file: edge-cases/Node A.md, permalink: edge-cases/node-a
12+
2. Create "Node C" → file: edge-cases/Node C.md, permalink: edge-cases/node-c
13+
3. BUG: Node C creation overwrites edge-cases/Node A.md file content
14+
4. Result: File "Node A.md" exists but contains "Node C" content
15+
"""
16+
17+
import pytest
18+
from pathlib import Path
19+
from textwrap import dedent
20+
21+
from basic_memory.mcp.tools import write_note, read_note
22+
from basic_memory.sync.sync_service import SyncService
23+
from basic_memory.config import ProjectConfig
24+
from basic_memory.services import EntityService
25+
26+
27+
@pytest.mark.asyncio
28+
async def test_permalink_collision_should_not_overwrite_different_file(app, test_project):
29+
"""Test that creating notes with different titles doesn't overwrite existing files.
30+
31+
This test reproduces the critical bug discovered in Phase 4 of live testing where:
32+
- Creating "Node A" worked fine
33+
- Creating "Node C" silently overwrote Node A.md's content
34+
- No warning or error was shown to the user
35+
- Original Node A content was permanently lost
36+
37+
Expected behavior:
38+
- Each note with a different title should create/update its own file
39+
- No silent overwrites should occur
40+
- Files should maintain their distinct content
41+
42+
Current behavior (BUG):
43+
- Second note creation sometimes overwrites first note's file
44+
- File "Node A.md" contains "Node C" content after creating Node C
45+
- Data loss occurs without user warning
46+
"""
47+
# Step 1: Create first note "Node A"
48+
result_a = await write_note.fn(
49+
project=test_project.name,
50+
title="Node A",
51+
folder="edge-cases",
52+
content="# Node A\n\nOriginal content for Node A\n\n## Relations\n- links_to [[Node B]]",
53+
)
54+
55+
assert "# Created note" in result_a
56+
assert "file_path: edge-cases/Node A.md" in result_a
57+
assert "permalink: edge-cases/node-a" in result_a
58+
59+
# Verify Node A content via read
60+
content_a = await read_note.fn("edge-cases/node-a", project=test_project.name)
61+
assert "Node A" in content_a
62+
assert "Original content for Node A" in content_a
63+
64+
# Step 2: Create second note "Node B" (should be independent)
65+
result_b = await write_note.fn(
66+
project=test_project.name,
67+
title="Node B",
68+
folder="edge-cases",
69+
content="# Node B\n\nContent for Node B",
70+
)
71+
72+
assert "# Created note" in result_b
73+
assert "file_path: edge-cases/Node B.md" in result_b
74+
assert "permalink: edge-cases/node-b" in result_b
75+
76+
# Step 3: Create third note "Node C" (this is where the bug occurs)
77+
result_c = await write_note.fn(
78+
project=test_project.name,
79+
title="Node C",
80+
folder="edge-cases",
81+
content="# Node C\n\nContent for Node C\n\n## Relations\n- links_to [[Node A]]",
82+
)
83+
84+
assert "# Created note" in result_c
85+
assert "file_path: edge-cases/Node C.md" in result_c
86+
assert "permalink: edge-cases/node-c" in result_c
87+
88+
# CRITICAL CHECK: Verify Node A still has its original content
89+
# This is where the bug manifests - Node A.md gets overwritten with Node C content
90+
content_a_after = await read_note.fn("edge-cases/node-a", project=test_project.name)
91+
assert "Node A" in content_a_after, "Node A title should still be 'Node A'"
92+
assert "Original content for Node A" in content_a_after, \
93+
"Node A file should NOT be overwritten by Node C creation"
94+
assert "Content for Node C" not in content_a_after, \
95+
"Node A should NOT contain Node C's content"
96+
97+
# Verify Node C has its own content
98+
content_c = await read_note.fn("edge-cases/node-c", project=test_project.name)
99+
assert "Node C" in content_c
100+
assert "Content for Node C" in content_c
101+
assert "Original content for Node A" not in content_c, \
102+
"Node C should not contain Node A's content"
103+
104+
# Verify files physically exist with correct content
105+
project_path = Path(test_project.path)
106+
node_a_file = project_path / "edge-cases" / "Node A.md"
107+
node_c_file = project_path / "edge-cases" / "Node C.md"
108+
109+
assert node_a_file.exists(), "Node A.md file should exist"
110+
assert node_c_file.exists(), "Node C.md file should exist"
111+
112+
# Read actual file contents to verify no overwrite occurred
113+
node_a_file_content = node_a_file.read_text()
114+
node_c_file_content = node_c_file.read_text()
115+
116+
assert "Node A" in node_a_file_content, \
117+
"Physical file Node A.md should contain Node A title"
118+
assert "Original content for Node A" in node_a_file_content, \
119+
"Physical file Node A.md should contain original Node A content"
120+
assert "Content for Node C" not in node_a_file_content, \
121+
"Physical file Node A.md should NOT contain Node C content"
122+
123+
assert "Node C" in node_c_file_content, \
124+
"Physical file Node C.md should contain Node C title"
125+
assert "Content for Node C" in node_c_file_content, \
126+
"Physical file Node C.md should contain Node C content"
127+
128+
129+
@pytest.mark.asyncio
130+
async def test_notes_with_similar_titles_maintain_separate_files(app, test_project):
131+
"""Test that notes with similar titles that normalize differently don't collide.
132+
133+
Tests additional edge cases around permalink normalization to ensure
134+
we don't have collision issues with various title patterns.
135+
"""
136+
# Create notes with titles that could potentially cause issues
137+
titles_and_folders = [
138+
("My Note", "test"),
139+
("My-Note", "test"), # Different title, similar permalink
140+
("My_Note", "test"), # Underscore vs hyphen
141+
("my note", "test"), # Case variation
142+
]
143+
144+
created_permalinks = []
145+
146+
for title, folder in titles_and_folders:
147+
result = await write_note.fn(
148+
project=test_project.name,
149+
title=title,
150+
folder=folder,
151+
content=f"# {title}\n\nUnique content for {title}",
152+
)
153+
154+
permalink = None
155+
# Extract permalink from result
156+
for line in result.split("\n"):
157+
if line.startswith("permalink:"):
158+
permalink = line.split(":", 1)[1].strip()
159+
created_permalinks.append((title, permalink))
160+
break
161+
162+
# Verify each note can be read back with its own content
163+
content = await read_note.fn(permalink, project=test_project.name)
164+
assert f"Unique content for {title}" in content, \
165+
f"Note with title '{title}' should maintain its unique content"
166+
167+
# Verify all created permalinks are tracked
168+
assert len(created_permalinks) == len(titles_and_folders), \
169+
"All notes should be created successfully"
170+
171+
172+
@pytest.mark.asyncio
173+
async def test_sequential_note_creation_preserves_all_files(app, test_project):
174+
"""Test that rapid sequential note creation doesn't cause file overwrites.
175+
176+
This test creates multiple notes in sequence to ensure that file
177+
creation/update logic doesn't have race conditions or state issues
178+
that could cause overwrites.
179+
"""
180+
notes_data = [
181+
("Alpha", "# Alpha\n\nAlpha content"),
182+
("Beta", "# Beta\n\nBeta content"),
183+
("Gamma", "# Gamma\n\nGamma content"),
184+
("Delta", "# Delta\n\nDelta content"),
185+
("Epsilon", "# Epsilon\n\nEpsilon content"),
186+
]
187+
188+
# Create all notes
189+
for title, content in notes_data:
190+
result = await write_note.fn(
191+
project=test_project.name,
192+
title=title,
193+
folder="sequence-test",
194+
content=content,
195+
)
196+
assert "# Created note" in result or "# Updated note" in result
197+
198+
# Verify all notes still exist with correct content
199+
for title, expected_content in notes_data:
200+
# Normalize title to permalink format
201+
permalink = f"sequence-test/{title.lower()}"
202+
content = await read_note.fn(permalink, project=test_project.name)
203+
204+
assert title in content, f"Note '{title}' should still have its title"
205+
assert expected_content.split("\n\n")[1] in content, \
206+
f"Note '{title}' should still have its original content"
207+
208+
# Verify physical files exist
209+
project_path = Path(test_project.path)
210+
sequence_dir = project_path / "sequence-test"
211+
212+
for title, _ in notes_data:
213+
file_path = sequence_dir / f"{title}.md"
214+
assert file_path.exists(), f"File for '{title}' should exist"
215+
216+
file_content = file_path.read_text()
217+
assert title in file_content, \
218+
f"Physical file for '{title}' should contain correct title"
219+
220+
221+
@pytest.mark.asyncio
222+
async def test_sync_permalink_collision_file_overwrite_bug(
223+
sync_service: SyncService,
224+
project_config: ProjectConfig,
225+
entity_service: EntityService,
226+
):
227+
"""Test that reproduces the permalink collision file overwrite bug via sync.
228+
229+
This test directly creates files and runs sync to reproduce the exact bug
230+
discovered in live testing where Node C overwrote Node A.md.
231+
232+
The bug occurs when:
233+
1. File "Node A.md" exists with permalink "edge-cases/node-a"
234+
2. File "Node C.md" is created with permalink "edge-cases/node-c"
235+
3. During sync, somehow Node C content overwrites Node A.md
236+
4. Result: File "Node A.md" contains Node C content (data loss!)
237+
"""
238+
project_dir = project_config.home
239+
edge_cases_dir = project_dir / "edge-cases"
240+
edge_cases_dir.mkdir(parents=True, exist_ok=True)
241+
242+
# Step 1: Create Node A file
243+
node_a_content = dedent("""
244+
---
245+
title: Node A
246+
type: note
247+
tags:
248+
- circular-test
249+
---
250+
251+
# Node A
252+
253+
Original content for Node A
254+
255+
## Relations
256+
- links_to [[Node B]]
257+
- references [[Node C]]
258+
""").strip()
259+
260+
node_a_file = edge_cases_dir / "Node A.md"
261+
node_a_file.write_text(node_a_content)
262+
263+
# Sync to create Node A in database
264+
await sync_service.sync(project_dir)
265+
266+
# Verify Node A is in database
267+
node_a = await entity_service.get_by_permalink("edge-cases/node-a")
268+
assert node_a is not None
269+
assert node_a.title == "Node A"
270+
271+
# Verify Node A file has correct content
272+
assert node_a_file.exists()
273+
node_a_file_content = node_a_file.read_text()
274+
assert "title: Node A" in node_a_file_content
275+
assert "Original content for Node A" in node_a_file_content
276+
277+
# Step 2: Create Node B file
278+
node_b_content = dedent("""
279+
---
280+
title: Node B
281+
type: note
282+
tags:
283+
- circular-test
284+
---
285+
286+
# Node B
287+
288+
Content for Node B
289+
290+
## Relations
291+
- links_to [[Node C]]
292+
- part_of [[Node A]]
293+
""").strip()
294+
295+
node_b_file = edge_cases_dir / "Node B.md"
296+
node_b_file.write_text(node_b_content)
297+
298+
# Sync to create Node B
299+
await sync_service.sync(project_dir)
300+
301+
# Step 3: Create Node C file (this is where the bug might occur)
302+
node_c_content = dedent("""
303+
---
304+
title: Node C
305+
type: note
306+
tags:
307+
- circular-test
308+
---
309+
310+
# Node C
311+
312+
Content for Node C
313+
314+
## Relations
315+
- links_to [[Node A]]
316+
- references [[Node B]]
317+
""").strip()
318+
319+
node_c_file = edge_cases_dir / "Node C.md"
320+
node_c_file.write_text(node_c_content)
321+
322+
# Sync to create Node C - THIS IS WHERE THE BUG OCCURS
323+
await sync_service.sync(project_dir)
324+
325+
# CRITICAL VERIFICATION: Check if Node A file was overwritten
326+
assert node_a_file.exists(), "Node A.md file should still exist"
327+
328+
# Read Node A file content to check for overwrite bug
329+
node_a_after_sync = node_a_file.read_text()
330+
331+
# The bug: Node A.md contains Node C content instead of Node A content
332+
assert "title: Node A" in node_a_after_sync, \
333+
"Node A.md file should still have title: Node A in frontmatter"
334+
assert "Node A" in node_a_after_sync, \
335+
"Node A.md file should still contain 'Node A' title"
336+
assert "Original content for Node A" in node_a_after_sync, \
337+
f"Node A.md file should NOT be overwritten! Content: {node_a_after_sync[:200]}"
338+
assert "Content for Node C" not in node_a_after_sync, \
339+
f"Node A.md should NOT contain Node C content! Content: {node_a_after_sync[:200]}"
340+
341+
# Verify Node C file exists with correct content
342+
assert node_c_file.exists(), "Node C.md file should exist"
343+
node_c_after_sync = node_c_file.read_text()
344+
assert "Node C" in node_c_after_sync
345+
assert "Content for Node C" in node_c_after_sync
346+
347+
# Verify database has both entities correctly
348+
node_a_db = await entity_service.get_by_permalink("edge-cases/node-a")
349+
node_c_db = await entity_service.get_by_permalink("edge-cases/node-c")
350+
351+
assert node_a_db is not None, "Node A should exist in database"
352+
assert node_a_db.title == "Node A", "Node A database entry should have correct title"
353+
354+
assert node_c_db is not None, "Node C should exist in database"
355+
assert node_c_db.title == "Node C", "Node C database entry should have correct title"

0 commit comments

Comments
 (0)