Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions src/basic_memory/repository/entity_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
from typing import List, Optional, Sequence, Union

from loguru import logger
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
Expand Down Expand Up @@ -163,25 +164,32 @@ async def upsert_entity(self, entity: Entity) -> Entity:

if existing_entity:
# File path conflict - update the existing entity
for key, value in {
"title": entity.title,
"entity_type": entity.entity_type,
"entity_metadata": entity.entity_metadata,
"content_type": entity.content_type,
"permalink": entity.permalink,
"checksum": entity.checksum,
"updated_at": entity.updated_at,
}.items():
setattr(existing_entity, key, value)

# Clear and re-add observations
existing_entity.observations.clear()
logger.debug(
f"Resolving file_path conflict for {entity.file_path}, "
f"entity_id={existing_entity.id}, observations={len(entity.observations)}"
)
# Use merge to avoid session state conflicts
# Set the ID to update existing entity
entity.id = existing_entity.id

# Ensure observations reference the correct entity_id
for obs in entity.observations:
obs.entity_id = existing_entity.id
existing_entity.observations.append(obs)
# Clear any existing ID to force INSERT as new observation
obs.id = None

# Merge the entity which will update the existing one
merged_entity = await session.merge(entity)

await session.commit()
return existing_entity

# Re-query to get proper relationships loaded
final_result = await session.execute(
select(Entity)
.where(Entity.id == merged_entity.id)
.options(*self.get_load_options())
)
return final_result.scalar_one()

else:
# No file_path conflict - must be permalink conflict
Expand Down
132 changes: 132 additions & 0 deletions tests/repository/test_entity_upsert_issue_187.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Tests for issue #187 - UNIQUE constraint violation on file_path during sync."""

import pytest
from datetime import datetime, timezone

from basic_memory.models.knowledge import Entity, Observation
from basic_memory.repository.entity_repository import EntityRepository


@pytest.mark.asyncio
async def test_upsert_entity_with_observations_conflict(entity_repository: EntityRepository):
"""Test upserting an entity that already exists with observations.

This reproduces issue #187 where sync fails with UNIQUE constraint violations
when trying to update entities that already exist with observations.
"""
# Create initial entity with observations
entity1 = Entity(
project_id=entity_repository.project_id,
title="Original Title",
entity_type="note",
permalink="debugging/backup-system/coderabbit-feedback-resolution",
file_path="debugging/backup-system/CodeRabbit Feedback Resolution - Backup System Issues.md",
content_type="text/markdown",
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
)

# Add observations to the entity
obs1 = Observation(
content="This is a test observation",
category="testing",
tags=["test"],
)
entity1.observations.append(obs1)

result1 = await entity_repository.upsert_entity(entity1)
original_id = result1.id

# Verify entity was created with observations
assert result1.id is not None
assert len(result1.observations) == 1

# Now try to upsert the same file_path with different content/observations
# This simulates a file being modified and re-synced
entity2 = Entity(
project_id=entity_repository.project_id,
title="Updated Title",
entity_type="note",
permalink="debugging/backup-system/coderabbit-feedback-resolution", # Same permalink
file_path="debugging/backup-system/CodeRabbit Feedback Resolution - Backup System Issues.md", # Same file_path
content_type="text/markdown",
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
)

# Add different observations
obs2 = Observation(
content="This is an updated observation",
category="updated",
tags=["updated"],
)
obs3 = Observation(
content="This is a second observation",
category="second",
tags=["second"],
)
entity2.observations.extend([obs2, obs3])

# This should UPDATE the existing entity, not fail with IntegrityError
result2 = await entity_repository.upsert_entity(entity2)

# Should update existing entity (same ID)
assert result2.id == original_id
assert result2.title == "Updated Title"
assert result2.file_path == entity1.file_path
assert result2.permalink == entity1.permalink

# Observations should be updated
assert len(result2.observations) == 2
assert result2.observations[0].content == "This is an updated observation"
assert result2.observations[1].content == "This is a second observation"


@pytest.mark.asyncio
async def test_upsert_entity_repeated_sync_same_file(entity_repository: EntityRepository):
"""Test that syncing the same file multiple times doesn't cause IntegrityError.

This tests the specific scenario from issue #187 where files are being
synced repeatedly and hitting UNIQUE constraint violations.
"""
file_path = "processes/Complete Process for Uploading New Training Videos.md"
permalink = "processes/complete-process-for-uploading-new-training-videos"

# Create initial entity
entity1 = Entity(
project_id=entity_repository.project_id,
title="Complete Process for Uploading New Training Videos",
entity_type="note",
permalink=permalink,
file_path=file_path,
content_type="text/markdown",
checksum="abc123",
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
)

result1 = await entity_repository.upsert_entity(entity1)
first_id = result1.id

# Simulate multiple sync attempts (like the infinite retry loop in the issue)
for i in range(5):
entity_new = Entity(
project_id=entity_repository.project_id,
title="Complete Process for Uploading New Training Videos",
entity_type="note",
permalink=permalink,
file_path=file_path,
content_type="text/markdown",
checksum=f"def{456 + i}", # Different checksum each time
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
)

# Each upsert should succeed and update the existing entity
result = await entity_repository.upsert_entity(entity_new)

# Should always return the same entity (updated)
assert result.id == first_id
assert result.checksum == entity_new.checksum
assert result.file_path == file_path
assert result.permalink == permalink
1 change: 1 addition & 0 deletions tests/sync/test_sync_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ async def test_sync_empty_directories(sync_service: SyncService, project_config:
assert (project_config.home).exists()


@pytest.mark.skip("flaky on Windows due to filesystem timing precision")
@pytest.mark.asyncio
async def test_sync_file_modified_during_sync(
sync_service: SyncService, project_config: ProjectConfig
Expand Down
Loading