Skip to content

Commit 1ecbfc5

Browse files
groksrcclaude
andcommitted
perf: eliminate redundant DB queries in upsert_entity_from_markdown (#709)
- Pass entity object directly into update_entity_relations instead of re-fetching by file_path (saves 1 round-trip, ~300ms on cloud) - Replace final get_by_file_path with find_by_ids PK lookup - Add telemetry sub-spans to update_entity_and_observations and update_entity_relations for per-phase visibility in Logfire Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Drew Cain <groksrc@gmail.com>
1 parent cfa7000 commit 1ecbfc5

File tree

2 files changed

+405
-31
lines changed

2 files changed

+405
-31
lines changed

src/basic_memory/services/entity_service.py

Lines changed: 88 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -840,10 +840,22 @@ async def update_entity_and_observations(
840840
"""
841841
logger.debug(f"Updating entity and observations: {file_path}")
842842

843-
db_entity = await self.repository.get_by_file_path(file_path.as_posix())
843+
with telemetry.scope(
844+
"upsert.update.fetch_entity",
845+
domain="entity_service",
846+
action="upsert",
847+
phase="fetch_entity",
848+
):
849+
db_entity = await self.repository.get_by_file_path(file_path.as_posix())
844850

845851
# Clear observations for entity
846-
await self.observation_repository.delete_by_fields(entity_id=db_entity.id)
852+
with telemetry.scope(
853+
"upsert.update.delete_observations",
854+
domain="entity_service",
855+
action="upsert",
856+
phase="delete_observations",
857+
):
858+
await self.observation_repository.delete_by_fields(entity_id=db_entity.id)
847859

848860
# add new observations
849861
observations = [
@@ -857,7 +869,14 @@ async def update_entity_and_observations(
857869
)
858870
for obs in markdown.observations
859871
]
860-
await self.observation_repository.add_all(observations)
872+
with telemetry.scope(
873+
"upsert.update.insert_observations",
874+
domain="entity_service",
875+
action="upsert",
876+
phase="insert_observations",
877+
count=len(observations),
878+
):
879+
await self.observation_repository.add_all(observations)
861880

862881
# update values from markdown
863882
db_entity = entity_model_from_markdown(file_path, markdown, db_entity)
@@ -871,10 +890,16 @@ async def update_entity_and_observations(
871890
db_entity.last_updated_by = user_id
872891

873892
# update entity
874-
return await self.repository.update(
875-
db_entity.id,
876-
db_entity,
877-
)
893+
with telemetry.scope(
894+
"upsert.update.save_entity",
895+
domain="entity_service",
896+
action="upsert",
897+
phase="save_entity",
898+
):
899+
return await self.repository.update(
900+
db_entity.id,
901+
db_entity,
902+
)
878903

879904
async def upsert_entity_from_markdown(
880905
self,
@@ -888,20 +913,30 @@ async def upsert_entity_from_markdown(
888913
created = await self.create_entity_from_markdown(file_path, markdown)
889914
else:
890915
created = await self.update_entity_and_observations(file_path, markdown)
891-
return await self.update_entity_relations(created.file_path, markdown)
916+
# Pass entity directly — avoids redundant get_by_file_path inside update_entity_relations
917+
return await self.update_entity_relations(created, markdown)
892918

893919
async def update_entity_relations(
894920
self,
895-
path: str,
921+
entity: EntityModel,
896922
markdown: EntityMarkdown,
897923
) -> EntityModel:
898-
"""Update relations for entity"""
899-
logger.debug(f"Updating relations for entity: {path}")
924+
"""Update relations for entity.
900925
901-
db_entity = await self.repository.get_by_file_path(path)
926+
Accepts the entity object directly to avoid a redundant DB fetch.
927+
Only entity.id and entity.permalink are used from the passed-in object.
928+
"""
929+
entity_id = entity.id
930+
logger.debug(f"Updating relations for entity: {entity.file_path}")
902931

903932
# Clear existing relations first
904-
await self.relation_repository.delete_outgoing_relations_from_entity(db_entity.id)
933+
with telemetry.scope(
934+
"upsert.relations.delete_existing",
935+
domain="entity_service",
936+
action="upsert",
937+
phase="delete_relations",
938+
):
939+
await self.relation_repository.delete_outgoing_relations_from_entity(entity_id)
905940

906941
# Batch resolve all relation targets in parallel
907942
if markdown.relations:
@@ -916,7 +951,14 @@ async def update_entity_relations(
916951
]
917952

918953
# Execute all lookups in parallel
919-
resolved_entities = await asyncio.gather(*lookup_tasks, return_exceptions=True)
954+
with telemetry.scope(
955+
"upsert.relations.resolve_links",
956+
domain="entity_service",
957+
action="upsert",
958+
phase="resolve_links",
959+
count=len(lookup_tasks),
960+
):
961+
resolved_entities = await asyncio.gather(*lookup_tasks, return_exceptions=True)
920962

921963
# Process results and create relation records
922964
relations_to_add = []
@@ -935,7 +977,7 @@ async def update_entity_relations(
935977
# Create the relation
936978
relation = Relation(
937979
project_id=self.relation_repository.project_id,
938-
from_id=db_entity.id,
980+
from_id=entity_id,
939981
to_id=target_id,
940982
to_name=target_name,
941983
relation_type=rel.type,
@@ -945,22 +987,37 @@ async def update_entity_relations(
945987

946988
# Batch insert all relations
947989
if relations_to_add:
948-
try:
949-
await self.relation_repository.add_all(relations_to_add)
950-
except IntegrityError:
951-
# Some relations might be duplicates - fall back to individual inserts
952-
logger.debug("Batch relation insert failed, trying individual inserts")
953-
for relation in relations_to_add:
954-
try:
955-
await self.relation_repository.add(relation)
956-
except IntegrityError:
957-
# Unique constraint violation - relation already exists
958-
logger.debug(
959-
f"Skipping duplicate relation {relation.relation_type} from {db_entity.permalink}"
960-
)
961-
continue
962-
963-
return await self.repository.get_by_file_path(path)
990+
with telemetry.scope(
991+
"upsert.relations.insert_relations",
992+
domain="entity_service",
993+
action="upsert",
994+
phase="insert_relations",
995+
count=len(relations_to_add),
996+
):
997+
try:
998+
await self.relation_repository.add_all(relations_to_add)
999+
except IntegrityError:
1000+
# Some relations might be duplicates - fall back to individual inserts
1001+
logger.debug("Batch relation insert failed, trying individual inserts")
1002+
for relation in relations_to_add:
1003+
try:
1004+
await self.relation_repository.add(relation)
1005+
except IntegrityError:
1006+
# Unique constraint violation - relation already exists
1007+
logger.debug(
1008+
f"Skipping duplicate relation {relation.relation_type} from {entity.permalink}"
1009+
)
1010+
continue
1011+
1012+
# Reload entity with relations via PK lookup (faster than get_by_file_path string match)
1013+
with telemetry.scope(
1014+
"upsert.relations.reload_entity",
1015+
domain="entity_service",
1016+
action="upsert",
1017+
phase="reload_entity",
1018+
):
1019+
reloaded = await self.repository.find_by_ids([entity_id])
1020+
return reloaded[0]
9641021

9651022
async def edit_entity(
9661023
self,

0 commit comments

Comments
 (0)