Skip to content

Commit 1bd1aad

Browse files
committed
fix(core): keep response markdown while indexing body content
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 8ad2a6b commit 1bd1aad

File tree

4 files changed

+147
-13
lines changed

4 files changed

+147
-13
lines changed

src/basic_memory/api/v2/routers/knowledge_router.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,10 +307,12 @@ async def create_entity(
307307
if fast:
308308
entity = await entity_service.fast_write_entity(data)
309309
written_content = None
310+
search_content = None
310311
else:
311312
write_result = await entity_service.create_entity_with_content(data)
312313
entity = write_result.entity
313314
written_content = write_result.content
315+
search_content = write_result.search_content
314316

315317
if fast:
316318
with telemetry.scope(
@@ -332,7 +334,7 @@ async def create_entity(
332334
action="create_entity",
333335
phase="search_index",
334336
):
335-
await search_service.index_entity(entity, content=written_content)
337+
await search_service.index_entity(entity, content=search_content)
336338
with telemetry.scope(
337339
"api.knowledge.create_entity.vector_sync",
338340
domain="knowledge",
@@ -360,6 +362,9 @@ async def create_entity(
360362
if fast:
361363
content = await file_service.read_file_content(entity.file_path)
362364
else:
365+
# Non-fast writes already captured the markdown in memory. Reuse it here
366+
# instead of re-reading the file; format_on_save is the one config that can
367+
# still make the persisted file diverge because write_file only returns a checksum.
363368
content = written_content
364369
result = result.model_copy(update={"content": content})
365370

@@ -429,17 +434,20 @@ async def update_entity_by_id(
429434
if fast:
430435
entity = await entity_service.fast_write_entity(data, external_id=entity_id)
431436
written_content = None
437+
search_content = None
432438
response.status_code = 200 if existing else 201
433439
else:
434440
if existing:
435441
write_result = await entity_service.update_entity_with_content(existing, data)
436442
entity = write_result.entity
437443
written_content = write_result.content
444+
search_content = write_result.search_content
438445
response.status_code = 200
439446
else:
440447
write_result = await entity_service.create_entity_with_content(data)
441448
entity = write_result.entity
442449
written_content = write_result.content
450+
search_content = write_result.search_content
443451
if entity.external_id != entity_id:
444452
entity = await entity_repository.update(
445453
entity.id,
@@ -475,7 +483,7 @@ async def update_entity_by_id(
475483
action="update_entity",
476484
phase="search_index",
477485
):
478-
await search_service.index_entity(entity, content=written_content)
486+
await search_service.index_entity(entity, content=search_content)
479487
with telemetry.scope(
480488
"api.knowledge.update_entity.vector_sync",
481489
domain="knowledge",
@@ -503,6 +511,9 @@ async def update_entity_by_id(
503511
if fast:
504512
content = await file_service.read_file_content(entity.file_path)
505513
else:
514+
# Non-fast writes already captured the markdown in memory. Reuse it here
515+
# instead of re-reading the file; format_on_save is the one config that can
516+
# still make the persisted file diverge because write_file only returns a checksum.
506517
content = written_content
507518
result = result.model_copy(update={"content": content})
508519

@@ -582,6 +593,7 @@ async def edit_entity_by_id(
582593
expected_replacements=data.expected_replacements,
583594
)
584595
written_content = None
596+
search_content = None
585597
else:
586598
identifier = entity.permalink or entity.file_path
587599
write_result = await entity_service.edit_entity_with_content(
@@ -594,6 +606,7 @@ async def edit_entity_by_id(
594606
)
595607
updated_entity = write_result.entity
596608
written_content = write_result.content
609+
search_content = write_result.search_content
597610

598611
if fast:
599612
with telemetry.scope(
@@ -615,7 +628,7 @@ async def edit_entity_by_id(
615628
action="edit_entity",
616629
phase="search_index",
617630
):
618-
await search_service.index_entity(updated_entity, content=written_content)
631+
await search_service.index_entity(updated_entity, content=search_content)
619632
with telemetry.scope(
620633
"api.knowledge.edit_entity.vector_sync",
621634
domain="knowledge",
@@ -643,6 +656,9 @@ async def edit_entity_by_id(
643656
if fast:
644657
content = await file_service.read_file_content(updated_entity.file_path)
645658
else:
659+
# Non-fast writes already captured the markdown in memory. Reuse it here
660+
# instead of re-reading the file; format_on_save is the one config that can
661+
# still make the persisted file diverge because write_file only returns a checksum.
646662
content = written_content
647663
result = result.model_copy(update={"content": content})
648664

src/basic_memory/services/entity_service.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,11 @@
5353

5454
@dataclass(frozen=True)
5555
class EntityWriteResult:
56-
"""Persisted entity plus the markdown written during this call."""
56+
"""Persisted entity plus the response/search content produced during this call."""
5757

5858
entity: EntityModel
5959
content: str
60+
search_content: str
6061

6162

6263
class EntityService(BaseService[EntityModel]):
@@ -334,7 +335,11 @@ async def create_entity_with_content(self, schema: EntitySchema) -> EntityWriteR
334335
updated = await self.repository.update(entity.id, {"checksum": checksum})
335336
if not updated: # pragma: no cover
336337
raise ValueError(f"Failed to update entity checksum after create: {entity.id}")
337-
return EntityWriteResult(entity=updated, content=final_content)
338+
return EntityWriteResult(
339+
entity=updated,
340+
content=final_content,
341+
search_content=remove_frontmatter(final_content),
342+
)
338343

339344
async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> EntityModel:
340345
"""Update an entity's content and metadata."""
@@ -459,7 +464,11 @@ async def update_entity_with_content(
459464
if not entity: # pragma: no cover
460465
raise ValueError(f"Failed to update entity checksum after update: {file_path}")
461466

462-
return EntityWriteResult(entity=entity, content=final_content)
467+
return EntityWriteResult(
468+
entity=entity,
469+
content=final_content,
470+
search_content=remove_frontmatter(final_content),
471+
)
463472

464473
async def fast_write_entity(
465474
self,
@@ -1093,7 +1102,11 @@ async def edit_entity_with_content(
10931102
if not entity: # pragma: no cover
10941103
raise ValueError(f"Failed to update entity checksum after edit: {file_path}")
10951104

1096-
return EntityWriteResult(entity=entity, content=new_content)
1105+
return EntityWriteResult(
1106+
entity=entity,
1107+
content=new_content,
1108+
search_content=remove_frontmatter(new_content),
1109+
)
10971110

10981111
def apply_edit_operation(
10991112
self,

tests/api/v2/test_knowledge_router_telemetry.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,15 @@ async def test_create_entity_emits_root_and_nested_spans(monkeypatch) -> None:
5959
monkeypatch.setattr(knowledge_router_module.telemetry, "span", fake_span)
6060

6161
entity = _fake_entity()
62+
response_content = "---\ntitle: Telemetry Entity\ntype: note\npermalink: notes/test\n---\n\ntelemetry content"
6263

6364
class FakeEntityService:
6465
async def create_entity_with_content(self, data):
65-
return SimpleNamespace(entity=entity, content="telemetry content")
66+
return SimpleNamespace(
67+
entity=entity,
68+
content=response_content,
69+
search_content="telemetry content",
70+
)
6671

6772
class FakeSearchService:
6873
async def index_entity(self, entity, content=None):
@@ -95,7 +100,7 @@ async def read_file_content(self, path):
95100
fast=False,
96101
)
97102

98-
assert result.content == "telemetry content"
103+
assert result.content == response_content
99104
_assert_names_in_order(
100105
[name for name, _ in spans],
101106
[
@@ -114,10 +119,17 @@ async def test_update_entity_emits_root_and_nested_spans(monkeypatch) -> None:
114119
monkeypatch.setattr(knowledge_router_module.telemetry, "span", fake_span)
115120

116121
entity = _fake_entity()
122+
response_content = (
123+
"---\ntitle: Telemetry Entity\ntype: note\npermalink: notes/test\n---\n\nupdated telemetry content"
124+
)
117125

118126
class FakeEntityService:
119127
async def update_entity_with_content(self, existing, data):
120-
return SimpleNamespace(entity=entity, content="updated telemetry content")
128+
return SimpleNamespace(
129+
entity=entity,
130+
content=response_content,
131+
search_content="updated telemetry content",
132+
)
121133

122134
class FakeSearchService:
123135
async def index_entity(self, entity, content=None):
@@ -158,7 +170,7 @@ async def read_file_content(self, path):
158170
fast=False,
159171
)
160172

161-
assert result.content == "updated telemetry content"
173+
assert result.content == response_content
162174
_assert_names_in_order(
163175
[name for name, _ in spans],
164176
[
@@ -178,10 +190,17 @@ async def test_edit_entity_emits_root_and_nested_spans(monkeypatch) -> None:
178190
monkeypatch.setattr(knowledge_router_module.telemetry, "span", fake_span)
179191

180192
entity = _fake_entity()
193+
response_content = (
194+
"---\ntitle: Telemetry Entity\ntype: note\npermalink: notes/test\n---\n\nedited telemetry content"
195+
)
181196

182197
class FakeEntityService:
183198
async def edit_entity_with_content(self, **kwargs):
184-
return SimpleNamespace(entity=entity, content="edited telemetry content")
199+
return SimpleNamespace(
200+
entity=entity,
201+
content=response_content,
202+
search_content="edited telemetry content",
203+
)
185204

186205
class FakeSearchService:
187206
async def index_entity(self, entity, content=None):
@@ -214,7 +233,7 @@ async def read_file_content(self, path):
214233
fast=False,
215234
)
216235

217-
assert result.content == "edited telemetry content"
236+
assert result.content == response_content
218237
_assert_names_in_order(
219238
[name for name, _ in spans],
220239
[
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""Tests for EntityWriteResult content variants."""
2+
3+
import pytest
4+
5+
from basic_memory.file_utils import remove_frontmatter
6+
from basic_memory.schemas import Entity as EntitySchema
7+
8+
9+
@pytest.mark.asyncio
10+
async def test_create_entity_with_content_returns_full_and_search_content(
11+
entity_service, file_service
12+
) -> None:
13+
result = await entity_service.create_entity_with_content(
14+
EntitySchema(
15+
title="Create Write Result",
16+
directory="notes",
17+
note_type="note",
18+
content="Create body content",
19+
)
20+
)
21+
22+
file_path = file_service.get_entity_path(result.entity)
23+
file_content, _ = await file_service.read_file(file_path)
24+
25+
assert result.content == file_content
26+
assert result.search_content == remove_frontmatter(file_content)
27+
assert result.search_content == "Create body content"
28+
29+
30+
@pytest.mark.asyncio
31+
async def test_update_entity_with_content_returns_full_and_search_content(
32+
entity_service, file_service
33+
) -> None:
34+
created = await entity_service.create_entity(
35+
EntitySchema(
36+
title="Update Write Result",
37+
directory="notes",
38+
note_type="note",
39+
content="Original body content",
40+
)
41+
)
42+
43+
result = await entity_service.update_entity_with_content(
44+
created,
45+
EntitySchema(
46+
title="Update Write Result",
47+
directory="notes",
48+
note_type="note",
49+
content="Updated body content",
50+
),
51+
)
52+
53+
file_path = file_service.get_entity_path(result.entity)
54+
file_content, _ = await file_service.read_file(file_path)
55+
56+
assert result.content == file_content
57+
assert result.search_content == remove_frontmatter(file_content)
58+
assert result.search_content == "Updated body content"
59+
60+
61+
@pytest.mark.asyncio
62+
async def test_edit_entity_with_content_returns_full_and_search_content(
63+
entity_service, file_service
64+
) -> None:
65+
created = await entity_service.create_entity(
66+
EntitySchema(
67+
title="Edit Write Result",
68+
directory="notes",
69+
note_type="note",
70+
content="Original body content",
71+
)
72+
)
73+
74+
result = await entity_service.edit_entity_with_content(
75+
identifier=created.permalink,
76+
operation="find_replace",
77+
content="Edited body content",
78+
find_text="Original body content",
79+
)
80+
81+
file_path = file_service.get_entity_path(result.entity)
82+
file_content, _ = await file_service.read_file(file_path)
83+
84+
assert result.content == file_content
85+
assert result.search_content == remove_frontmatter(file_content)
86+
assert result.search_content == "Edited body content"

0 commit comments

Comments
 (0)