Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 33 additions & 10 deletions src/basic_memory/api/v2/routers/knowledge_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,11 @@ async def create_entity(
):
if fast:
entity = await entity_service.fast_write_entity(data)
written_content = None
else:
entity = await entity_service.create_entity(data)
write_result = await entity_service.create_entity_with_content(data)
entity = write_result.entity
written_content = write_result.content

if fast:
with telemetry.scope(
Expand All @@ -329,7 +332,7 @@ async def create_entity(
action="create_entity",
phase="search_index",
):
await search_service.index_entity(entity)
await search_service.index_entity(entity, content=written_content)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep search indexing on normalized note content

The non-fast create path now calls search_service.index_entity(entity, content=written_content), but written_content is full serialized markdown from dump_frontmatter(...) in create_entity_with_content, while the previous path indexed read_entity_content(...) output (content normalized by the markdown processor). This change makes YAML/frontmatter tokens part of indexed content, which can materially degrade search relevance by injecting metadata noise into every note; pass normalized body content (or keep the existing read/normalize path) before indexing.

Useful? React with 👍 / 👎.

with telemetry.scope(
"api.knowledge.create_entity.vector_sync",
domain="knowledge",
Expand All @@ -352,8 +355,12 @@ async def create_entity(
domain="knowledge",
action="create_entity",
phase="read_content",
source="file" if fast else "memory",
):
content = await file_service.read_file_content(entity.file_path)
if fast:
content = await file_service.read_file_content(entity.file_path)
else:
content = written_content
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Return persisted content after formatter rewrites

For non-fast writes, the response content now comes from in-memory written_content instead of reading the saved file. FileService.write_file can rewrite the file when format_on_save is enabled, so this branch can return pre-format text that no longer matches what was persisted, causing API clients to observe stale content immediately after a successful write; use the persisted file content (or formatter output) for the response body in this branch.

Useful? React with 👍 / 👎.

result = result.model_copy(update={"content": content})

logger.info(
Expand Down Expand Up @@ -421,13 +428,18 @@ async def update_entity_by_id(
):
if fast:
entity = await entity_service.fast_write_entity(data, external_id=entity_id)
written_content = None
response.status_code = 200 if existing else 201
else:
if existing:
entity = await entity_service.update_entity(existing, data)
write_result = await entity_service.update_entity_with_content(existing, data)
entity = write_result.entity
written_content = write_result.content
response.status_code = 200
else:
entity = await entity_service.create_entity(data)
write_result = await entity_service.create_entity_with_content(data)
entity = write_result.entity
written_content = write_result.content
if entity.external_id != entity_id:
entity = await entity_repository.update(
entity.id,
Expand Down Expand Up @@ -461,7 +473,7 @@ async def update_entity_by_id(
action="update_entity",
phase="search_index",
):
await search_service.index_entity(entity)
await search_service.index_entity(entity, content=written_content)
with telemetry.scope(
"api.knowledge.update_entity.vector_sync",
domain="knowledge",
Expand All @@ -484,8 +496,12 @@ async def update_entity_by_id(
domain="knowledge",
action="update_entity",
phase="read_content",
source="file" if fast else "memory",
):
content = await file_service.read_file_content(entity.file_path)
if fast:
content = await file_service.read_file_content(entity.file_path)
else:
content = written_content
result = result.model_copy(update={"content": content})

logger.info(
Expand Down Expand Up @@ -563,16 +579,19 @@ async def edit_entity_by_id(
find_text=data.find_text,
expected_replacements=data.expected_replacements,
)
written_content = None
else:
identifier = entity.permalink or entity.file_path
updated_entity = await entity_service.edit_entity(
write_result = await entity_service.edit_entity_with_content(
identifier=identifier,
operation=data.operation,
content=data.content,
section=data.section,
find_text=data.find_text,
expected_replacements=data.expected_replacements,
)
updated_entity = write_result.entity
written_content = write_result.content

if fast:
with telemetry.scope(
Expand All @@ -594,7 +613,7 @@ async def edit_entity_by_id(
action="edit_entity",
phase="search_index",
):
await search_service.index_entity(updated_entity)
await search_service.index_entity(updated_entity, content=written_content)
with telemetry.scope(
"api.knowledge.edit_entity.vector_sync",
domain="knowledge",
Expand All @@ -617,8 +636,12 @@ async def edit_entity_by_id(
domain="knowledge",
action="edit_entity",
phase="read_content",
source="file" if fast else "memory",
):
content = await file_service.read_file_content(updated_entity.file_path)
if fast:
content = await file_service.read_file_content(updated_entity.file_path)
else:
content = written_content
result = result.model_copy(update={"content": content})

logger.info(
Expand Down
77 changes: 57 additions & 20 deletions src/basic_memory/services/entity_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Service for managing entities in the database."""

from collections.abc import Callable
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Sequence, Tuple, Union
Expand Down Expand Up @@ -50,6 +51,14 @@
from basic_memory.utils import build_canonical_permalink


@dataclass(frozen=True)
class EntityWriteResult:
"""Persisted entity plus the markdown written during this call."""

entity: EntityModel
content: str


class EntityService(BaseService[EntityModel]):
"""Service for managing entities in the database."""

Expand Down Expand Up @@ -79,7 +88,7 @@ def __init__(

async def detect_file_path_conflicts(
self, file_path: str, skip_check: bool = False
) -> List[Entity]:
) -> List[str]:
"""Detect potential file path conflicts for a given file path.

This checks for entities with similar file paths that might cause conflicts:
Expand All @@ -93,28 +102,19 @@ async def detect_file_path_conflicts(
skip_check: If True, skip the check and return empty list (optimization for bulk operations)

Returns:
List of entities that might conflict with the given file path
List of file paths that might conflict with the given file path
"""
if skip_check:
return []

from basic_memory.utils import detect_potential_file_conflicts

conflicts = []

# Get all existing file paths
all_entities = await self.repository.find_all()
existing_paths = [entity.file_path for entity in all_entities]
# Load only file paths. Conflict detection is on the hot write path and
# does not need observations or relations.
existing_paths = await self.repository.get_all_file_paths()

# Use the enhanced conflict detection utility
conflicting_paths = detect_potential_file_conflicts(file_path, existing_paths)

# Find the entities corresponding to conflicting paths
for entity in all_entities:
if entity.file_path in conflicting_paths:
conflicts.append(entity)

return conflicts
return detect_potential_file_conflicts(file_path, existing_paths)

async def resolve_permalink(
self,
Expand Down Expand Up @@ -143,8 +143,7 @@ async def resolve_permalink(
)
if conflicts:
logger.warning(
f"Detected potential file path conflicts for '{file_path_str}': "
f"{[entity.file_path for entity in conflicts]}"
f"Detected potential file path conflicts for '{file_path_str}': {conflicts}"
)

# If markdown has explicit permalink, try to validate it
Expand Down Expand Up @@ -255,6 +254,10 @@ async def create_or_update_entity(self, schema: EntitySchema) -> Tuple[EntityMod

async def create_entity(self, schema: EntitySchema) -> EntityModel:
"""Create a new entity and write to filesystem."""
return (await self.create_entity_with_content(schema)).entity

async def create_entity_with_content(self, schema: EntitySchema) -> EntityWriteResult:
"""Create a new entity and return both the entity row and written markdown."""
logger.debug(f"Creating entity: {schema.title}")

# Get file path and ensure it's a Path object
Expand Down Expand Up @@ -328,10 +331,19 @@ async def create_entity(self, schema: EntitySchema) -> EntityModel:
action="create",
phase="update_checksum",
):
return await self.repository.update(entity.id, {"checksum": checksum})
updated = await self.repository.update(entity.id, {"checksum": checksum})
if not updated: # pragma: no cover
raise ValueError(f"Failed to update entity checksum after create: {entity.id}")
return EntityWriteResult(entity=updated, content=final_content)

async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> EntityModel:
"""Update an entity's content and metadata."""
return (await self.update_entity_with_content(entity, schema)).entity

async def update_entity_with_content(
self, entity: EntityModel, schema: EntitySchema
) -> EntityWriteResult:
"""Update an entity and return both the entity row and written markdown."""
logger.debug(
f"Updating entity with permalink: {entity.permalink} content-type: {schema.content_type}"
)
Expand Down Expand Up @@ -444,8 +456,10 @@ async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> Enti
phase="update_checksum",
):
entity = await self.repository.update(entity.id, {"checksum": checksum})
if not entity: # pragma: no cover
raise ValueError(f"Failed to update entity checksum after update: {file_path}")

return entity
return EntityWriteResult(entity=entity, content=final_content)

async def fast_write_entity(
self,
Expand Down Expand Up @@ -988,6 +1002,27 @@ async def edit_entity(
EntityNotFoundError: If the entity cannot be found
ValueError: If required parameters are missing for the operation or replacement count doesn't match expected
"""
return (
await self.edit_entity_with_content(
identifier=identifier,
operation=operation,
content=content,
section=section,
find_text=find_text,
expected_replacements=expected_replacements,
)
).entity

async def edit_entity_with_content(
self,
identifier: str,
operation: str,
content: str,
section: Optional[str] = None,
find_text: Optional[str] = None,
expected_replacements: int = 1,
) -> EntityWriteResult:
"""Edit an entity and return both the entity row and written markdown."""
logger.debug(f"Editing entity: {identifier}, operation: {operation}")

with telemetry.scope(
Expand Down Expand Up @@ -1055,8 +1090,10 @@ async def edit_entity(
phase="update_checksum",
):
entity = await self.repository.update(entity.id, {"checksum": checksum})
if not entity: # pragma: no cover
raise ValueError(f"Failed to update entity checksum after edit: {file_path}")

return entity
return EntityWriteResult(entity=entity, content=new_content)

def apply_edit_operation(
self,
Expand Down
6 changes: 6 additions & 0 deletions tests/api/v2/test_knowledge_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ async def test_update_entity_by_id(
response = await client.put(
f"{v2_project_url}/knowledge/entities/{original_external_id}",
json=update_data,
params={"fast": False},
)

assert response.status_code == 200
Expand All @@ -363,6 +364,8 @@ async def test_update_entity_by_id(
# V2 update must return external_id field
assert updated_entity.external_id is not None
assert updated_entity.api_version == "v2"
assert updated_entity.content is not None
assert "Updated content via V2" in updated_entity.content

# Verify file was updated
file_path = file_service.get_entity_path(updated_entity)
Expand Down Expand Up @@ -532,6 +535,7 @@ async def test_edit_entity_by_id_append(
response = await client.patch(
f"{v2_project_url}/knowledge/entities/{original_external_id}",
json=edit_data,
params={"fast": False},
)

assert response.status_code == 200
Expand All @@ -540,6 +544,8 @@ async def test_edit_entity_by_id_append(
# V2 patch must return external_id field
assert edited_entity.external_id is not None
assert edited_entity.api_version == "v2"
assert edited_entity.content is not None
assert "Appended content" in edited_entity.content

# Verify file has both original and appended content
file_path = file_service.get_entity_path(edited_entity)
Expand Down
27 changes: 15 additions & 12 deletions tests/api/v2/test_knowledge_router_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ async def test_create_entity_emits_root_and_nested_spans(monkeypatch) -> None:
entity = _fake_entity()

class FakeEntityService:
async def create_entity(self, data):
return entity
async def create_entity_with_content(self, data):
return SimpleNamespace(entity=entity, content="telemetry content")

class FakeSearchService:
async def index_entity(self, entity):
async def index_entity(self, entity, content=None):
assert content == "telemetry content"
return None

class FakeTaskScheduler:
Expand All @@ -74,7 +75,7 @@ def schedule(self, *args, **kwargs):

class FakeFileService:
async def read_file_content(self, path):
return "telemetry content"
raise AssertionError("non-fast create should not re-read file content")

result = await knowledge_router_module.create_entity(
project_id="project-123",
Expand Down Expand Up @@ -115,11 +116,12 @@ async def test_update_entity_emits_root_and_nested_spans(monkeypatch) -> None:
entity = _fake_entity()

class FakeEntityService:
async def update_entity(self, existing, data):
return entity
async def update_entity_with_content(self, existing, data):
return SimpleNamespace(entity=entity, content="updated telemetry content")

class FakeSearchService:
async def index_entity(self, entity):
async def index_entity(self, entity, content=None):
assert content == "updated telemetry content"
return None

class FakeEntityRepository:
Expand All @@ -132,7 +134,7 @@ def schedule(self, *args, **kwargs):

class FakeFileService:
async def read_file_content(self, path):
return "updated telemetry content"
raise AssertionError("non-fast update should not re-read file content")

response = Response()
result = await knowledge_router_module.update_entity_by_id(
Expand Down Expand Up @@ -178,11 +180,12 @@ async def test_edit_entity_emits_root_and_nested_spans(monkeypatch) -> None:
entity = _fake_entity()

class FakeEntityService:
async def edit_entity(self, **kwargs):
return entity
async def edit_entity_with_content(self, **kwargs):
return SimpleNamespace(entity=entity, content="edited telemetry content")

class FakeSearchService:
async def index_entity(self, entity):
async def index_entity(self, entity, content=None):
assert content == "edited telemetry content"
return None

class FakeEntityRepository:
Expand All @@ -195,7 +198,7 @@ def schedule(self, *args, **kwargs):

class FakeFileService:
async def read_file_content(self, path):
return "edited telemetry content"
raise AssertionError("non-fast edit should not re-read file content")

result = await knowledge_router_module.edit_entity_by_id(
data=EditEntityRequest(operation="append", content="edited telemetry content"),
Expand Down
Loading