Skip to content

Commit 42abc53

Browse files
phernandezclaude
andcommitted
fix: Resolve UNIQUE constraint violation in entity upsert with observations (#187)
## Problem Sync was failing with IntegrityError when attempting to update entities that already exist with the same file_path and project_id. This caused infinite retry loops in production, affecting files with observations. Error message: ``` Failed to sync file: error=Failed to create entity: (sqlite3.IntegrityError) UNIQUE constraint failed: entity.file_path, entity.project_id ``` ## Root Cause The upsert_entity method's file_path conflict handling was directly manipulating observation objects and entity attributes, causing SQLAlchemy session state conflicts when trying to merge entities with observations. ## Solution Changed the conflict resolution logic to use SQLAlchemy's merge() method properly: 1. Set the new entity's ID to match the existing entity 2. Clear observation IDs to force new INSERT operations 3. Use session.merge() to properly handle entity state 4. Re-query after commit to get properly loaded relationships This approach correctly handles the SQLAlchemy session lifecycle and avoids constraint violations when updating entities with observations. ## Testing - Added reproduction test for entities with observations - Added test for repeated sync attempts (infinite retry scenario) - All existing upsert tests pass (9/9) - All repository tests pass (108/108) ## Impact - Fixes UNIQUE constraint violations during sync - Prevents infinite retry loops - Works with existing circuit breaker mechanism 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 729a5a3 commit 42abc53

2 files changed

Lines changed: 150 additions & 15 deletions

File tree

src/basic_memory/repository/entity_repository.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -163,25 +163,28 @@ async def upsert_entity(self, entity: Entity) -> Entity:
163163

164164
if existing_entity:
165165
# File path conflict - update the existing entity
166-
for key, value in {
167-
"title": entity.title,
168-
"entity_type": entity.entity_type,
169-
"entity_metadata": entity.entity_metadata,
170-
"content_type": entity.content_type,
171-
"permalink": entity.permalink,
172-
"checksum": entity.checksum,
173-
"updated_at": entity.updated_at,
174-
}.items():
175-
setattr(existing_entity, key, value)
176-
177-
# Clear and re-add observations
178-
existing_entity.observations.clear()
166+
# Use merge to avoid session state conflicts
167+
# Set the ID to update existing entity
168+
entity.id = existing_entity.id
169+
170+
# Ensure observations reference the correct entity_id
179171
for obs in entity.observations:
180172
obs.entity_id = existing_entity.id
181-
existing_entity.observations.append(obs)
173+
# Clear any existing ID to force INSERT as new observation
174+
obs.id = None
175+
176+
# Merge the entity which will update the existing one
177+
merged_entity = await session.merge(entity)
182178

183179
await session.commit()
184-
return existing_entity
180+
181+
# Re-query to get proper relationships loaded
182+
final_result = await session.execute(
183+
select(Entity)
184+
.where(Entity.id == merged_entity.id)
185+
.options(*self.get_load_options())
186+
)
187+
return final_result.scalar_one()
185188

186189
else:
187190
# No file_path conflict - must be permalink conflict
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
"""Tests for issue #187 - UNIQUE constraint violation on file_path during sync."""
2+
3+
import pytest
4+
from datetime import datetime, timezone
5+
6+
from basic_memory.models.knowledge import Entity, Observation
7+
from basic_memory.repository.entity_repository import EntityRepository
8+
9+
10+
@pytest.mark.asyncio
11+
async def test_upsert_entity_with_observations_conflict(entity_repository: EntityRepository):
12+
"""Test upserting an entity that already exists with observations.
13+
14+
This reproduces issue #187 where sync fails with UNIQUE constraint violations
15+
when trying to update entities that already exist with observations.
16+
"""
17+
# Create initial entity with observations
18+
entity1 = Entity(
19+
project_id=entity_repository.project_id,
20+
title="Original Title",
21+
entity_type="note",
22+
permalink="debugging/backup-system/coderabbit-feedback-resolution",
23+
file_path="debugging/backup-system/CodeRabbit Feedback Resolution - Backup System Issues.md",
24+
content_type="text/markdown",
25+
created_at=datetime.now(timezone.utc),
26+
updated_at=datetime.now(timezone.utc),
27+
)
28+
29+
# Add observations to the entity
30+
obs1 = Observation(
31+
content="This is a test observation",
32+
category="testing",
33+
tags=["test"],
34+
)
35+
entity1.observations.append(obs1)
36+
37+
result1 = await entity_repository.upsert_entity(entity1)
38+
original_id = result1.id
39+
40+
# Verify entity was created with observations
41+
assert result1.id is not None
42+
assert len(result1.observations) == 1
43+
44+
# Now try to upsert the same file_path with different content/observations
45+
# This simulates a file being modified and re-synced
46+
entity2 = Entity(
47+
project_id=entity_repository.project_id,
48+
title="Updated Title",
49+
entity_type="note",
50+
permalink="debugging/backup-system/coderabbit-feedback-resolution", # Same permalink
51+
file_path="debugging/backup-system/CodeRabbit Feedback Resolution - Backup System Issues.md", # Same file_path
52+
content_type="text/markdown",
53+
created_at=datetime.now(timezone.utc),
54+
updated_at=datetime.now(timezone.utc),
55+
)
56+
57+
# Add different observations
58+
obs2 = Observation(
59+
content="This is an updated observation",
60+
category="updated",
61+
tags=["updated"],
62+
)
63+
obs3 = Observation(
64+
content="This is a second observation",
65+
category="second",
66+
tags=["second"],
67+
)
68+
entity2.observations.extend([obs2, obs3])
69+
70+
# This should UPDATE the existing entity, not fail with IntegrityError
71+
result2 = await entity_repository.upsert_entity(entity2)
72+
73+
# Should update existing entity (same ID)
74+
assert result2.id == original_id
75+
assert result2.title == "Updated Title"
76+
assert result2.file_path == entity1.file_path
77+
assert result2.permalink == entity1.permalink
78+
79+
# Observations should be updated
80+
assert len(result2.observations) == 2
81+
assert result2.observations[0].content == "This is an updated observation"
82+
assert result2.observations[1].content == "This is a second observation"
83+
84+
85+
@pytest.mark.asyncio
86+
async def test_upsert_entity_repeated_sync_same_file(entity_repository: EntityRepository):
87+
"""Test that syncing the same file multiple times doesn't cause IntegrityError.
88+
89+
This tests the specific scenario from issue #187 where files are being
90+
synced repeatedly and hitting UNIQUE constraint violations.
91+
"""
92+
file_path = "processes/Complete Process for Uploading New Training Videos.md"
93+
permalink = "processes/complete-process-for-uploading-new-training-videos"
94+
95+
# Create initial entity
96+
entity1 = Entity(
97+
project_id=entity_repository.project_id,
98+
title="Complete Process for Uploading New Training Videos",
99+
entity_type="note",
100+
permalink=permalink,
101+
file_path=file_path,
102+
content_type="text/markdown",
103+
checksum="abc123",
104+
created_at=datetime.now(timezone.utc),
105+
updated_at=datetime.now(timezone.utc),
106+
)
107+
108+
result1 = await entity_repository.upsert_entity(entity1)
109+
first_id = result1.id
110+
111+
# Simulate multiple sync attempts (like the infinite retry loop in the issue)
112+
for i in range(5):
113+
entity_new = Entity(
114+
project_id=entity_repository.project_id,
115+
title="Complete Process for Uploading New Training Videos",
116+
entity_type="note",
117+
permalink=permalink,
118+
file_path=file_path,
119+
content_type="text/markdown",
120+
checksum=f"def{456 + i}", # Different checksum each time
121+
created_at=datetime.now(timezone.utc),
122+
updated_at=datetime.now(timezone.utc),
123+
)
124+
125+
# Each upsert should succeed and update the existing entity
126+
result = await entity_repository.upsert_entity(entity_new)
127+
128+
# Should always return the same entity (updated)
129+
assert result.id == first_id
130+
assert result.checksum == entity_new.checksum
131+
assert result.file_path == file_path
132+
assert result.permalink == permalink

0 commit comments

Comments
 (0)