Skip to content

Commit 69808b2

Browse files
authored
perf(core): reuse written note content after writes (#717)
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 6f207c2 commit 69808b2

File tree

8 files changed

+250
-56
lines changed

8 files changed

+250
-56
lines changed

.github/workflows/test.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ concurrency:
66

77
on:
88
push:
9-
branches: [ "main" ]
109
pull_request:
1110
branches: [ "main" ]
1211

@@ -144,7 +143,6 @@ jobs:
144143
test-postgres-unit:
145144
name: Test Postgres Unit (Python ${{ matrix.python-version }})
146145
timeout-minutes: 30
147-
if: github.event_name != 'pull_request' || matrix.python-version == '3.12'
148146
strategy:
149147
fail-fast: false
150148
matrix:
@@ -202,7 +200,6 @@ jobs:
202200
test-postgres-integration:
203201
name: Test Postgres Integration (Python ${{ matrix.python-version }})
204202
timeout-minutes: 45
205-
if: github.event_name != 'pull_request' || matrix.python-version == '3.12'
206203
strategy:
207204
fail-fast: false
208205
matrix:

src/basic_memory/api/v2/routers/knowledge_router.py

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,13 @@ async def create_entity(
306306
):
307307
if fast:
308308
entity = await entity_service.fast_write_entity(data)
309+
written_content = None
310+
search_content = None
309311
else:
310-
entity = await entity_service.create_entity(data)
312+
write_result = await entity_service.create_entity_with_content(data)
313+
entity = write_result.entity
314+
written_content = write_result.content
315+
search_content = write_result.search_content
311316

312317
if fast:
313318
with telemetry.scope(
@@ -329,7 +334,7 @@ async def create_entity(
329334
action="create_entity",
330335
phase="search_index",
331336
):
332-
await search_service.index_entity(entity)
337+
await search_service.index_entity(entity, content=search_content)
333338
with telemetry.scope(
334339
"api.knowledge.create_entity.vector_sync",
335340
domain="knowledge",
@@ -352,8 +357,15 @@ async def create_entity(
352357
domain="knowledge",
353358
action="create_entity",
354359
phase="read_content",
360+
source="file" if fast else "memory",
355361
):
356-
content = await file_service.read_file_content(entity.file_path)
362+
if fast:
363+
content = await file_service.read_file_content(entity.file_path)
364+
else:
365+
# Non-fast writes already captured the markdown in memory. Reuse it here
366+
# instead of re-reading the file; format_on_save is the one config that can
367+
# still make the persisted file diverge because write_file only returns a checksum.
368+
content = written_content
357369
result = result.model_copy(update={"content": content})
358370

359371
logger.info(
@@ -421,18 +433,28 @@ async def update_entity_by_id(
421433
):
422434
if fast:
423435
entity = await entity_service.fast_write_entity(data, external_id=entity_id)
436+
written_content = None
437+
search_content = None
424438
response.status_code = 200 if existing else 201
425439
else:
426440
if existing:
427-
entity = await entity_service.update_entity(existing, data)
441+
write_result = await entity_service.update_entity_with_content(existing, data)
442+
entity = write_result.entity
443+
written_content = write_result.content
444+
search_content = write_result.search_content
428445
response.status_code = 200
429446
else:
430-
entity = await entity_service.create_entity(data)
447+
write_result = await entity_service.create_entity_with_content(data)
448+
entity = write_result.entity
449+
written_content = write_result.content
450+
search_content = write_result.search_content
431451
if entity.external_id != entity_id:
432452
entity = await entity_repository.update(
433453
entity.id,
434454
{"external_id": entity_id},
435455
)
456+
# external_id fixup only changes the DB row. The file content is unchanged,
457+
# so the markdown captured during the write remains valid downstream.
436458
if not entity:
437459
raise HTTPException(
438460
status_code=404,
@@ -461,7 +483,7 @@ async def update_entity_by_id(
461483
action="update_entity",
462484
phase="search_index",
463485
):
464-
await search_service.index_entity(entity)
486+
await search_service.index_entity(entity, content=search_content)
465487
with telemetry.scope(
466488
"api.knowledge.update_entity.vector_sync",
467489
domain="knowledge",
@@ -484,8 +506,15 @@ async def update_entity_by_id(
484506
domain="knowledge",
485507
action="update_entity",
486508
phase="read_content",
509+
source="file" if fast else "memory",
487510
):
488-
content = await file_service.read_file_content(entity.file_path)
511+
if fast:
512+
content = await file_service.read_file_content(entity.file_path)
513+
else:
514+
# Non-fast writes already captured the markdown in memory. Reuse it here
515+
# instead of re-reading the file; format_on_save is the one config that can
516+
# still make the persisted file diverge because write_file only returns a checksum.
517+
content = written_content
489518
result = result.model_copy(update={"content": content})
490519

491520
logger.info(
@@ -563,16 +592,21 @@ async def edit_entity_by_id(
563592
find_text=data.find_text,
564593
expected_replacements=data.expected_replacements,
565594
)
595+
written_content = None
596+
search_content = None
566597
else:
567598
identifier = entity.permalink or entity.file_path
568-
updated_entity = await entity_service.edit_entity(
599+
write_result = await entity_service.edit_entity_with_content(
569600
identifier=identifier,
570601
operation=data.operation,
571602
content=data.content,
572603
section=data.section,
573604
find_text=data.find_text,
574605
expected_replacements=data.expected_replacements,
575606
)
607+
updated_entity = write_result.entity
608+
written_content = write_result.content
609+
search_content = write_result.search_content
576610

577611
if fast:
578612
with telemetry.scope(
@@ -594,7 +628,7 @@ async def edit_entity_by_id(
594628
action="edit_entity",
595629
phase="search_index",
596630
):
597-
await search_service.index_entity(updated_entity)
631+
await search_service.index_entity(updated_entity, content=search_content)
598632
with telemetry.scope(
599633
"api.knowledge.edit_entity.vector_sync",
600634
domain="knowledge",
@@ -617,8 +651,15 @@ async def edit_entity_by_id(
617651
domain="knowledge",
618652
action="edit_entity",
619653
phase="read_content",
654+
source="file" if fast else "memory",
620655
):
621-
content = await file_service.read_file_content(updated_entity.file_path)
656+
if fast:
657+
content = await file_service.read_file_content(updated_entity.file_path)
658+
else:
659+
# Non-fast writes already captured the markdown in memory. Reuse it here
660+
# instead of re-reading the file; format_on_save is the one config that can
661+
# still make the persisted file diverge because write_file only returns a checksum.
662+
content = written_content
622663
result = result.model_copy(update={"content": content})
623664

624665
logger.info(

src/basic_memory/services/entity_service.py

Lines changed: 70 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Service for managing entities in the database."""
22

33
from collections.abc import Callable
4+
from dataclasses import dataclass
45
from datetime import datetime
56
from pathlib import Path
67
from typing import List, Optional, Sequence, Tuple, Union
@@ -50,6 +51,15 @@
5051
from basic_memory.utils import build_canonical_permalink
5152

5253

54+
@dataclass(frozen=True)
55+
class EntityWriteResult:
56+
"""Persisted entity plus the response/search content produced during this call."""
57+
58+
entity: EntityModel
59+
content: str
60+
search_content: str
61+
62+
5363
class EntityService(BaseService[EntityModel]):
5464
"""Service for managing entities in the database."""
5565

@@ -79,7 +89,7 @@ def __init__(
7989

8090
async def detect_file_path_conflicts(
8191
self, file_path: str, skip_check: bool = False
82-
) -> List[Entity]:
92+
) -> List[str]:
8393
"""Detect potential file path conflicts for a given file path.
8494
8595
This checks for entities with similar file paths that might cause conflicts:
@@ -93,28 +103,19 @@ async def detect_file_path_conflicts(
93103
skip_check: If True, skip the check and return empty list (optimization for bulk operations)
94104
95105
Returns:
96-
List of entities that might conflict with the given file path
106+
List of file paths that might conflict with the given file path
97107
"""
98108
if skip_check:
99109
return []
100110

101111
from basic_memory.utils import detect_potential_file_conflicts
102112

103-
conflicts = []
104-
105-
# Get all existing file paths
106-
all_entities = await self.repository.find_all()
107-
existing_paths = [entity.file_path for entity in all_entities]
113+
# Load only file paths. Conflict detection is on the hot write path and
114+
# does not need observations or relations.
115+
existing_paths = await self.repository.get_all_file_paths()
108116

109117
# Use the enhanced conflict detection utility
110-
conflicting_paths = detect_potential_file_conflicts(file_path, existing_paths)
111-
112-
# Find the entities corresponding to conflicting paths
113-
for entity in all_entities:
114-
if entity.file_path in conflicting_paths:
115-
conflicts.append(entity)
116-
117-
return conflicts
118+
return detect_potential_file_conflicts(file_path, existing_paths)
118119

119120
async def resolve_permalink(
120121
self,
@@ -143,8 +144,7 @@ async def resolve_permalink(
143144
)
144145
if conflicts:
145146
logger.warning(
146-
f"Detected potential file path conflicts for '{file_path_str}': "
147-
f"{[entity.file_path for entity in conflicts]}"
147+
f"Detected potential file path conflicts for '{file_path_str}': {conflicts}"
148148
)
149149

150150
# If markdown has explicit permalink, try to validate it
@@ -255,6 +255,10 @@ async def create_or_update_entity(self, schema: EntitySchema) -> Tuple[EntityMod
255255

256256
async def create_entity(self, schema: EntitySchema) -> EntityModel:
257257
"""Create a new entity and write to filesystem."""
258+
return (await self.create_entity_with_content(schema)).entity
259+
260+
async def create_entity_with_content(self, schema: EntitySchema) -> EntityWriteResult:
261+
"""Create a new entity and return both the entity row and written markdown."""
258262
logger.debug(f"Creating entity: {schema.title}")
259263

260264
# Get file path and ensure it's a Path object
@@ -328,10 +332,23 @@ async def create_entity(self, schema: EntitySchema) -> EntityModel:
328332
action="create",
329333
phase="update_checksum",
330334
):
331-
return await self.repository.update(entity.id, {"checksum": checksum})
335+
updated = await self.repository.update(entity.id, {"checksum": checksum})
336+
if not updated: # pragma: no cover
337+
raise ValueError(f"Failed to update entity checksum after create: {entity.id}")
338+
return EntityWriteResult(
339+
entity=updated,
340+
content=final_content,
341+
search_content=remove_frontmatter(final_content),
342+
)
332343

333344
async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> EntityModel:
334345
"""Update an entity's content and metadata."""
346+
return (await self.update_entity_with_content(entity, schema)).entity
347+
348+
async def update_entity_with_content(
349+
self, entity: EntityModel, schema: EntitySchema
350+
) -> EntityWriteResult:
351+
"""Update an entity and return both the entity row and written markdown."""
335352
logger.debug(
336353
f"Updating entity with permalink: {entity.permalink} content-type: {schema.content_type}"
337354
)
@@ -444,8 +461,14 @@ async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> Enti
444461
phase="update_checksum",
445462
):
446463
entity = await self.repository.update(entity.id, {"checksum": checksum})
464+
if not entity: # pragma: no cover
465+
raise ValueError(f"Failed to update entity checksum after update: {file_path}")
447466

448-
return entity
467+
return EntityWriteResult(
468+
entity=entity,
469+
content=final_content,
470+
search_content=remove_frontmatter(final_content),
471+
)
449472

450473
async def fast_write_entity(
451474
self,
@@ -988,6 +1011,27 @@ async def edit_entity(
9881011
EntityNotFoundError: If the entity cannot be found
9891012
ValueError: If required parameters are missing for the operation or replacement count doesn't match expected
9901013
"""
1014+
return (
1015+
await self.edit_entity_with_content(
1016+
identifier=identifier,
1017+
operation=operation,
1018+
content=content,
1019+
section=section,
1020+
find_text=find_text,
1021+
expected_replacements=expected_replacements,
1022+
)
1023+
).entity
1024+
1025+
async def edit_entity_with_content(
1026+
self,
1027+
identifier: str,
1028+
operation: str,
1029+
content: str,
1030+
section: Optional[str] = None,
1031+
find_text: Optional[str] = None,
1032+
expected_replacements: int = 1,
1033+
) -> EntityWriteResult:
1034+
"""Edit an entity and return both the entity row and written markdown."""
9911035
logger.debug(f"Editing entity: {identifier}, operation: {operation}")
9921036

9931037
with telemetry.scope(
@@ -1055,8 +1099,14 @@ async def edit_entity(
10551099
phase="update_checksum",
10561100
):
10571101
entity = await self.repository.update(entity.id, {"checksum": checksum})
1102+
if not entity: # pragma: no cover
1103+
raise ValueError(f"Failed to update entity checksum after edit: {file_path}")
10581104

1059-
return entity
1105+
return EntityWriteResult(
1106+
entity=entity,
1107+
content=new_content,
1108+
search_content=remove_frontmatter(new_content),
1109+
)
10601110

10611111
def apply_edit_operation(
10621112
self,

test-int/conftest.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,7 @@ async def _reset_postgres_integration_schema(engine) -> None:
178178
await conn.execute(text(f"DROP TABLE IF EXISTS {table_name} CASCADE"))
179179

180180
await conn.execute(
181-
text(
182-
f"TRUNCATE TABLE {', '.join(_postgres_reset_tables())} "
183-
"RESTART IDENTITY CASCADE"
184-
)
181+
text(f"TRUNCATE TABLE {', '.join(_postgres_reset_tables())} RESTART IDENTITY CASCADE")
185182
)
186183

187184

tests/api/v2/test_knowledge_router.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ async def test_update_entity_by_id(
355355
response = await client.put(
356356
f"{v2_project_url}/knowledge/entities/{original_external_id}",
357357
json=update_data,
358+
params={"fast": False},
358359
)
359360

360361
assert response.status_code == 200
@@ -363,6 +364,8 @@ async def test_update_entity_by_id(
363364
# V2 update must return external_id field
364365
assert updated_entity.external_id is not None
365366
assert updated_entity.api_version == "v2"
367+
assert updated_entity.content is not None
368+
assert "Updated content via V2" in updated_entity.content
366369

367370
# Verify file was updated
368371
file_path = file_service.get_entity_path(updated_entity)
@@ -532,6 +535,7 @@ async def test_edit_entity_by_id_append(
532535
response = await client.patch(
533536
f"{v2_project_url}/knowledge/entities/{original_external_id}",
534537
json=edit_data,
538+
params={"fast": False},
535539
)
536540

537541
assert response.status_code == 200
@@ -540,6 +544,8 @@ async def test_edit_entity_by_id_append(
540544
# V2 patch must return external_id field
541545
assert edited_entity.external_id is not None
542546
assert edited_entity.api_version == "v2"
547+
assert edited_entity.content is not None
548+
assert "Appended content" in edited_entity.content
543549

544550
# Verify file has both original and appended content
545551
file_path = file_service.get_entity_path(edited_entity)

0 commit comments

Comments
 (0)