Skip to content

Commit de93959

Browse files
phernandezclaude
andcommitted
Fix concurrent delete race conditions in delete_entity and delete_directory
When multiple delete-directory requests target the same directory concurrently, entities can be deleted between the prefix query and the individual delete calls. Previously this produced hundreds of noisy errors (ValueError, "Delete returned False", NoneType attribute errors) even though the deletes ultimately succeeded. Changes: - 🔧 Return True when entity lookup by ID finds 0 results (already deleted) - 🔧 Treat repository.delete() returning False as success (concurrent delete) - 🔧 Make search index cleanup best-effort during deletes (cascade race) - ✅ Add 5 tests covering concurrent delete scenarios Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 7696fca commit de93959

2 files changed

Lines changed: 134 additions & 2 deletions

File tree

src/basic_memory/services/entity_service.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,10 @@ async def delete_entity(self, permalink_or_id: str | int) -> bool:
735735
entity = await self.get_by_permalink(permalink_or_id)
736736
else:
737737
entities = await self.get_entities_by_id([permalink_or_id])
738+
if len(entities) == 0:
739+
# Entity already deleted (concurrent delete or race condition)
740+
logger.info("Entity already deleted", entity_id=permalink_or_id)
741+
return True
738742
if len(entities) != 1: # pragma: no cover
739743
logger.error(
740744
"Entity lookup error", entity_id=permalink_or_id, found_count=len(entities)
@@ -746,13 +750,28 @@ async def delete_entity(self, permalink_or_id: str | int) -> bool:
746750

747751
# Delete from search index first (if search_service is available)
748752
if self.search_service:
749-
await self.search_service.handle_delete(entity)
753+
try:
754+
await self.search_service.handle_delete(entity)
755+
except Exception:
756+
# Search cleanup is best-effort during concurrent deletes.
757+
# Relationships may have been cascade-deleted by a concurrent request.
758+
logger.warning(
759+
"Search cleanup failed for entity (likely concurrent delete)",
760+
permalink_or_id=permalink_or_id,
761+
exc_info=True,
762+
)
750763

751764
# Delete file
752765
await self.file_service.delete_entity_file(entity)
753766

754767
# Delete from DB (this will cascade to observations/relations)
755-
return await self.repository.delete(entity.id)
768+
# Trigger: repository.delete returns False when entity is already gone (NoResultFound)
769+
# Why: concurrent delete_directory requests can race to delete the same entity
770+
# Outcome: treat as success since the entity is deleted either way
771+
deleted = await self.repository.delete(entity.id)
772+
if not deleted:
773+
logger.info("Entity already removed from DB", entity_id=permalink_or_id)
774+
return True
756775

757776
except EntityNotFoundError:
758777
logger.info(f"Entity not found: {permalink_or_id}")

tests/services/test_entity_service.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,3 +2426,116 @@ async def test_fast_write_entity_null_user_id(entity_service: EntityService):
24262426
entity = await entity_service.fast_write_entity(schema, external_id=str(uuid.uuid4()))
24272427
assert entity.created_by is None
24282428
assert entity.last_updated_by is None
2429+
2430+
2431+
# --- Concurrent Delete Resilience ---
2432+
2433+
2434+
@pytest.mark.asyncio
2435+
async def test_delete_entity_by_id_already_deleted(entity_service: EntityService):
2436+
"""delete_entity returns True when entity was already deleted (concurrent delete)."""
2437+
entity_data = EntitySchema(
2438+
title="ConcurrentDeleteTarget",
2439+
directory="test",
2440+
note_type="test",
2441+
)
2442+
created = await entity_service.create_entity(entity_data)
2443+
entity_id = created.id
2444+
2445+
# Delete once - should succeed
2446+
assert await entity_service.delete_entity(entity_id) is True
2447+
2448+
# Delete again by ID - should return True (already deleted), not raise ValueError
2449+
assert await entity_service.delete_entity(entity_id) is True
2450+
2451+
2452+
@pytest.mark.asyncio
2453+
async def test_delete_entity_by_permalink_already_deleted(entity_service: EntityService):
2454+
"""delete_entity returns True when entity was already deleted by permalink."""
2455+
entity_data = EntitySchema(
2456+
title="ConcurrentDeleteByPermalink",
2457+
directory="test",
2458+
note_type="test",
2459+
)
2460+
created = await entity_service.create_entity(entity_data)
2461+
2462+
# Delete once
2463+
assert await entity_service.delete_entity(created.id) is True
2464+
2465+
# Delete again by permalink - should return True (EntityNotFoundError caught)
2466+
assert await entity_service.delete_entity(entity_data.permalink) is True
2467+
2468+
2469+
@pytest.mark.asyncio
2470+
async def test_delete_directory_concurrent_resilience(entity_service: EntityService):
2471+
"""delete_directory succeeds even when entities are concurrently deleted."""
2472+
# Create entities in a directory
2473+
entities = []
2474+
for i in range(5):
2475+
entity_data = EntitySchema(
2476+
title=f"DirEntity{i}",
2477+
directory="concurrent-test",
2478+
note_type="test",
2479+
)
2480+
created = await entity_service.create_entity(entity_data)
2481+
entities.append(created)
2482+
2483+
# Delete some entities directly to simulate concurrent delete
2484+
await entity_service.delete_entity(entities[0].id)
2485+
await entity_service.delete_entity(entities[2].id)
2486+
2487+
# Now delete the directory - should handle already-deleted entities gracefully
2488+
result = await entity_service.delete_directory("concurrent-test")
2489+
2490+
# Only 3 remain in DB (2 were already deleted before the directory query)
2491+
assert result.total_files == 3
2492+
assert result.successful_deletes == 3
2493+
assert result.failed_deletes == 0
2494+
assert len(result.errors) == 0
2495+
2496+
2497+
@pytest.mark.asyncio
2498+
async def test_delete_directory_all_already_deleted(entity_service: EntityService):
2499+
"""delete_directory handles case where all entities were concurrently deleted."""
2500+
# Create entities
2501+
created_entities = []
2502+
for i in range(3):
2503+
entity_data = EntitySchema(
2504+
title=f"AllGone{i}",
2505+
directory="all-deleted",
2506+
note_type="test",
2507+
)
2508+
created = await entity_service.create_entity(entity_data)
2509+
created_entities.append(created)
2510+
2511+
# Delete all entities directly
2512+
for e in created_entities:
2513+
await entity_service.delete_entity(e.id)
2514+
2515+
# Directory delete should report empty (entities no longer found in prefix query)
2516+
result = await entity_service.delete_directory("all-deleted")
2517+
assert result.total_files == 0
2518+
assert result.successful_deletes == 0
2519+
assert result.failed_deletes == 0
2520+
2521+
2522+
@pytest.mark.asyncio
2523+
async def test_delete_directory_entity_deleted_between_query_and_delete(
2524+
entity_service: EntityService,
2525+
):
2526+
"""Simulates the real race condition: entity exists in prefix query but is deleted
2527+
by a concurrent request before delete_entity is called."""
2528+
# Create entities
2529+
entity_data = EntitySchema(title="RaceTarget", directory="race-dir", note_type="test")
2530+
created = await entity_service.create_entity(entity_data)
2531+
2532+
# Get the entities via prefix query (as delete_directory does)
2533+
entities = await entity_service.repository.find_by_directory_prefix("race-dir")
2534+
assert len(entities) == 1
2535+
2536+
# Now delete the entity behind the scenes (simulating a concurrent request)
2537+
await entity_service.delete_entity(created.id)
2538+
2539+
# Call delete_entity with the stale entity ID - should return True, not raise
2540+
result = await entity_service.delete_entity(entities[0].id)
2541+
assert result is True

0 commit comments

Comments
 (0)