Skip to content

Commit 367fcaa

Browse files
groksrcclaudephernandez
authored
perf: eliminate redundant DB queries in upsert_entity_from_markdown (#714)
Signed-off-by: Drew Cain <groksrc@gmail.com> Signed-off-by: phernandez <paul@basicmachines.co> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: phernandez <paul@basicmachines.co>
1 parent 69808b2 commit 367fcaa

File tree

4 files changed

+605
-63
lines changed

4 files changed

+605
-63
lines changed

src/basic_memory/repository/entity_repository.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,17 @@ async def get_by_id(self, entity_id: int) -> Optional[Entity]: # pragma: no cov
4545
async with db.scoped_session(self.session_maker) as session:
4646
return await self.select_by_id(session, entity_id)
4747

48-
async def get_by_external_id(self, external_id: str) -> Optional[Entity]:
48+
async def _find_one_by_query(self, query, *, load_relations: bool) -> Optional[Entity]:
49+
"""Return one entity row with optional eager loading."""
50+
if load_relations:
51+
return await self.find_one(query)
52+
53+
result = await self.execute_query(query, use_query_options=False)
54+
return result.scalars().one_or_none()
55+
56+
async def get_by_external_id(
57+
self, external_id: str, *, load_relations: bool = True
58+
) -> Optional[Entity]:
4959
"""Get entity by external UUID.
5060
5161
Args:
@@ -54,21 +64,21 @@ async def get_by_external_id(self, external_id: str) -> Optional[Entity]:
5464
Returns:
5565
Entity if found, None otherwise
5666
"""
57-
query = (
58-
self.select().where(Entity.external_id == external_id).options(*self.get_load_options())
59-
)
60-
return await self.find_one(query)
67+
query = self.select().where(Entity.external_id == external_id)
68+
return await self._find_one_by_query(query, load_relations=load_relations)
6169

62-
async def get_by_permalink(self, permalink: str) -> Optional[Entity]:
70+
async def get_by_permalink(
71+
self, permalink: str, *, load_relations: bool = True
72+
) -> Optional[Entity]:
6373
"""Get entity by permalink.
6474
6575
Args:
6676
permalink: Unique identifier for the entity
6777
"""
68-
query = self.select().where(Entity.permalink == permalink).options(*self.get_load_options())
69-
return await self.find_one(query)
78+
query = self.select().where(Entity.permalink == permalink)
79+
return await self._find_one_by_query(query, load_relations=load_relations)
7080

71-
async def get_by_title(self, title: str) -> Sequence[Entity]:
81+
async def get_by_title(self, title: str, *, load_relations: bool = True) -> Sequence[Entity]:
7282
"""Get entities by title, ordered by shortest path first.
7383
7484
When multiple entities share the same title (in different folders),
@@ -82,23 +92,20 @@ async def get_by_title(self, title: str) -> Sequence[Entity]:
8292
self.select()
8393
.where(Entity.title == title)
8494
.order_by(func.length(Entity.file_path), Entity.file_path)
85-
.options(*self.get_load_options())
8695
)
87-
result = await self.execute_query(query)
96+
result = await self.execute_query(query, use_query_options=load_relations)
8897
return list(result.scalars().all())
8998

90-
async def get_by_file_path(self, file_path: Union[Path, str]) -> Optional[Entity]:
99+
async def get_by_file_path(
100+
self, file_path: Union[Path, str], *, load_relations: bool = True
101+
) -> Optional[Entity]:
91102
"""Get entity by file_path.
92103
93104
Args:
94105
file_path: Path to the entity file (will be converted to string internally)
95106
"""
96-
query = (
97-
self.select()
98-
.where(Entity.file_path == Path(file_path).as_posix())
99-
.options(*self.get_load_options())
100-
)
101-
return await self.find_one(query)
107+
query = self.select().where(Entity.file_path == Path(file_path).as_posix())
108+
return await self._find_one_by_query(query, load_relations=load_relations)
102109

103110
# -------------------------------------------------------------------------
104111
# Lightweight methods for permalink resolution (no eager loading)

src/basic_memory/services/entity_service.py

Lines changed: 108 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,17 @@ async def create_or_update_entity(self, schema: EntitySchema) -> Tuple[EntityMod
242242

243243
# Try to find existing entity using strict resolution (no fuzzy search)
244244
# This prevents incorrectly matching similar file paths like "Node A.md" and "Node C.md"
245-
existing = await self.link_resolver.resolve_link(schema.file_path, strict=True)
245+
existing = await self.link_resolver.resolve_link(
246+
schema.file_path,
247+
strict=True,
248+
load_relations=False,
249+
)
246250
if not existing and schema.permalink:
247-
existing = await self.link_resolver.resolve_link(schema.permalink, strict=True)
251+
existing = await self.link_resolver.resolve_link(
252+
schema.permalink,
253+
strict=True,
254+
load_relations=False,
255+
)
248256

249257
if existing:
250258
logger.debug(f"Found existing entity: {existing.file_path}")
@@ -863,10 +871,22 @@ async def update_entity_and_observations(
863871
"""
864872
logger.debug(f"Updating entity and observations: {file_path}")
865873

866-
db_entity = await self.repository.get_by_file_path(file_path.as_posix())
874+
with telemetry.scope(
875+
"upsert.update.fetch_entity",
876+
domain="entity_service",
877+
action="upsert",
878+
phase="fetch_entity",
879+
):
880+
db_entity = await self.repository.get_by_file_path(file_path.as_posix())
867881

868882
# Clear observations for entity
869-
await self.observation_repository.delete_by_fields(entity_id=db_entity.id)
883+
with telemetry.scope(
884+
"upsert.update.delete_observations",
885+
domain="entity_service",
886+
action="upsert",
887+
phase="delete_observations",
888+
):
889+
await self.observation_repository.delete_by_fields(entity_id=db_entity.id)
870890

871891
# add new observations
872892
observations = [
@@ -880,7 +900,14 @@ async def update_entity_and_observations(
880900
)
881901
for obs in markdown.observations
882902
]
883-
await self.observation_repository.add_all(observations)
903+
with telemetry.scope(
904+
"upsert.update.insert_observations",
905+
domain="entity_service",
906+
action="upsert",
907+
phase="insert_observations",
908+
count=len(observations),
909+
):
910+
await self.observation_repository.add_all(observations)
884911

885912
# update values from markdown
886913
db_entity = entity_model_from_markdown(file_path, markdown, db_entity)
@@ -894,10 +921,16 @@ async def update_entity_and_observations(
894921
db_entity.last_updated_by = user_id
895922

896923
# update entity
897-
return await self.repository.update(
898-
db_entity.id,
899-
db_entity,
900-
)
924+
with telemetry.scope(
925+
"upsert.update.save_entity",
926+
domain="entity_service",
927+
action="upsert",
928+
phase="save_entity",
929+
):
930+
return await self.repository.update(
931+
db_entity.id,
932+
db_entity,
933+
)
901934

902935
async def upsert_entity_from_markdown(
903936
self,
@@ -911,20 +944,30 @@ async def upsert_entity_from_markdown(
911944
created = await self.create_entity_from_markdown(file_path, markdown)
912945
else:
913946
created = await self.update_entity_and_observations(file_path, markdown)
914-
return await self.update_entity_relations(created.file_path, markdown)
947+
# Pass entity directly — avoids redundant get_by_file_path inside update_entity_relations
948+
return await self.update_entity_relations(created, markdown)
915949

916950
async def update_entity_relations(
917951
self,
918-
path: str,
952+
entity: EntityModel,
919953
markdown: EntityMarkdown,
920954
) -> EntityModel:
921-
"""Update relations for entity"""
922-
logger.debug(f"Updating relations for entity: {path}")
955+
"""Update relations for entity.
923956
924-
db_entity = await self.repository.get_by_file_path(path)
957+
Accepts the entity object directly to avoid a redundant DB fetch.
958+
Only entity.id and entity.permalink are used from the passed-in object.
959+
"""
960+
entity_id = entity.id
961+
logger.debug(f"Updating relations for entity: {entity.file_path}")
925962

926963
# Clear existing relations first
927-
await self.relation_repository.delete_outgoing_relations_from_entity(db_entity.id)
964+
with telemetry.scope(
965+
"upsert.relations.delete_existing",
966+
domain="entity_service",
967+
action="upsert",
968+
phase="delete_relations",
969+
):
970+
await self.relation_repository.delete_outgoing_relations_from_entity(entity_id)
928971

929972
# Batch resolve all relation targets in parallel
930973
if markdown.relations:
@@ -934,12 +977,23 @@ async def update_entity_relations(
934977
# Use strict=True to disable fuzzy search - only exact matches should create resolved relations
935978
# This ensures forward references (links to non-existent entities) remain unresolved (to_id=NULL)
936979
lookup_tasks = [
937-
self.link_resolver.resolve_link(rel.target, strict=True)
980+
self.link_resolver.resolve_link(
981+
rel.target,
982+
strict=True,
983+
load_relations=False,
984+
)
938985
for rel in markdown.relations
939986
]
940987

941988
# Execute all lookups in parallel
942-
resolved_entities = await asyncio.gather(*lookup_tasks, return_exceptions=True)
989+
with telemetry.scope(
990+
"upsert.relations.resolve_links",
991+
domain="entity_service",
992+
action="upsert",
993+
phase="resolve_links",
994+
count=len(lookup_tasks),
995+
):
996+
resolved_entities = await asyncio.gather(*lookup_tasks, return_exceptions=True)
943997

944998
# Process results and create relation records
945999
relations_to_add = []
@@ -958,7 +1012,7 @@ async def update_entity_relations(
9581012
# Create the relation
9591013
relation = Relation(
9601014
project_id=self.relation_repository.project_id,
961-
from_id=db_entity.id,
1015+
from_id=entity_id,
9621016
to_id=target_id,
9631017
to_name=target_name,
9641018
relation_type=rel.type,
@@ -968,22 +1022,37 @@ async def update_entity_relations(
9681022

9691023
# Batch insert all relations
9701024
if relations_to_add:
971-
try:
972-
await self.relation_repository.add_all(relations_to_add)
973-
except IntegrityError:
974-
# Some relations might be duplicates - fall back to individual inserts
975-
logger.debug("Batch relation insert failed, trying individual inserts")
976-
for relation in relations_to_add:
977-
try:
978-
await self.relation_repository.add(relation)
979-
except IntegrityError:
980-
# Unique constraint violation - relation already exists
981-
logger.debug(
982-
f"Skipping duplicate relation {relation.relation_type} from {db_entity.permalink}"
983-
)
984-
continue
985-
986-
return await self.repository.get_by_file_path(path)
1025+
with telemetry.scope(
1026+
"upsert.relations.insert_relations",
1027+
domain="entity_service",
1028+
action="upsert",
1029+
phase="insert_relations",
1030+
count=len(relations_to_add),
1031+
):
1032+
try:
1033+
await self.relation_repository.add_all(relations_to_add)
1034+
except IntegrityError:
1035+
# Some relations might be duplicates - fall back to individual inserts
1036+
logger.debug("Batch relation insert failed, trying individual inserts")
1037+
for relation in relations_to_add:
1038+
try:
1039+
await self.relation_repository.add(relation)
1040+
except IntegrityError:
1041+
# Unique constraint violation - relation already exists
1042+
logger.debug(
1043+
f"Skipping duplicate relation {relation.relation_type} from {entity.permalink}"
1044+
)
1045+
continue
1046+
1047+
# Reload entity with relations via PK lookup (faster than get_by_file_path string match)
1048+
with telemetry.scope(
1049+
"upsert.relations.reload_entity",
1050+
domain="entity_service",
1051+
action="upsert",
1052+
phase="reload_entity",
1053+
):
1054+
reloaded = await self.repository.find_by_ids([entity_id])
1055+
return reloaded[0]
9871056

9881057
async def edit_entity(
9891058
self,
@@ -1040,7 +1109,11 @@ async def edit_entity_with_content(
10401109
action="edit",
10411110
phase="resolve_entity",
10421111
):
1043-
entity = await self.link_resolver.resolve_link(identifier, strict=True)
1112+
entity = await self.link_resolver.resolve_link(
1113+
identifier,
1114+
strict=True,
1115+
load_relations=False,
1116+
)
10441117
if not entity:
10451118
raise EntityNotFoundError(f"Entity not found: {identifier}")
10461119

0 commit comments

Comments
 (0)