Skip to content

Commit 1639bff

Browse files
committed
feat: add failure-focused telemetry details
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent f0b4437 commit 1639bff

9 files changed

Lines changed: 272 additions & 50 deletions

docs/logfire-instrumentation-strategy.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ Avoid full automatic SQL span firehose by default.
348348
- [x] Phase 1: Bootstrap and config gating
349349
- [x] Phase 2: Root spans for entrypoints and primary operations
350350
- [x] Phase 3: Child spans for sync, search, and routing
351-
- [ ] Phase 4: Failure-focused detail and final verification
351+
- [x] Phase 4: Failure-focused detail and final verification
352352

353353
## Recommended Rollout Phases
354354

src/basic_memory/mcp/async_client.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from httpx import ASGITransport, AsyncClient, Timeout
66
from loguru import logger
77

8+
from basic_memory import telemetry
89
from basic_memory.api.app import app as fastapi_app
910
from basic_memory.config import ConfigManager, ProjectMode
1011

@@ -43,21 +44,26 @@ def _asgi_client(timeout: Timeout) -> AsyncClient:
4344

4445
async def _resolve_cloud_token(config) -> str:
4546
"""Resolve cloud token with API key preferred, OAuth fallback."""
46-
token = config.cloud_api_key
47-
if token:
48-
return token
49-
50-
from basic_memory.cli.auth import CLIAuth
51-
52-
auth = CLIAuth(client_id=config.cloud_client_id, authkit_domain=config.cloud_domain)
53-
token = await auth.get_valid_token()
54-
if token:
55-
return token
56-
57-
raise RuntimeError(
58-
"Cloud routing requested but no credentials found. "
59-
"Run 'bm cloud api-key save <key>' or 'bm cloud login' first."
60-
)
47+
with telemetry.span(
48+
"routing.resolve_cloud_credentials",
49+
has_api_key=bool(config.cloud_api_key),
50+
):
51+
token = config.cloud_api_key
52+
if token:
53+
return token
54+
55+
from basic_memory.cli.auth import CLIAuth
56+
57+
auth = CLIAuth(client_id=config.cloud_client_id, authkit_domain=config.cloud_domain)
58+
token = await auth.get_valid_token()
59+
if token:
60+
return token
61+
62+
logger.error("Cloud routing requested but no credentials were available")
63+
raise RuntimeError(
64+
"Cloud routing requested but no credentials found. "
65+
"Run 'bm cloud api-key save <key>' or 'bm cloud login' first."
66+
)
6167

6268

6369
@asynccontextmanager

src/basic_memory/mcp/project_context.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,27 @@ async def _resolve_default_project_from_api() -> Optional[str]:
6464
return None
6565

6666

67+
def _canonicalize_project_name(
68+
project_name: Optional[str],
69+
config: BasicMemoryConfig,
70+
) -> Optional[str]:
71+
"""Return the configured project name when the identifier matches by permalink.
72+
73+
Project routing happens before API validation, so we normalize explicit inputs
74+
here to keep local/cloud routing aligned with the database's case-insensitive
75+
project resolver.
76+
"""
77+
if project_name is None:
78+
return None
79+
80+
requested_permalink = generate_permalink(project_name)
81+
for configured_name in config.projects:
82+
if generate_permalink(configured_name) == requested_permalink:
83+
return configured_name
84+
85+
return project_name
86+
87+
6788
async def resolve_project_parameter(
6889
project: Optional[str] = None,
6990
allow_discovery: bool = False,
@@ -95,11 +116,12 @@ async def resolve_project_parameter(
95116
requested_project=project,
96117
allow_discovery=allow_discovery,
97118
):
119+
config = ConfigManager().config
120+
98121
# Load config for any values not explicitly provided.
99122
# ConfigManager reads from the local config file, which doesn't exist in cloud mode.
100123
# When it returns None, fall back to querying the projects API for the is_default flag.
101124
if default_project is None:
102-
config = ConfigManager().config
103125
default_project = config.default_project
104126

105127
if default_project is None:
@@ -110,7 +132,7 @@ async def resolve_project_parameter(
110132
default_project=default_project,
111133
)
112134
result = resolver.resolve(project=project, allow_discovery=allow_discovery)
113-
return result.project
135+
return _canonicalize_project_name(result.project, config)
114136

115137

116138
async def get_project_names(client: AsyncClient, headers: HeaderTypes | None = None) -> List[str]:

src/basic_memory/mcp/tools/write_note.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,9 @@ async def write_note(
250250
title, entity.permalink, active_project.name
251251
)
252252

253-
logger.debug(f"Entity exists, updating instead permalink={entity.permalink}")
253+
logger.debug(
254+
f"Entity exists, updating instead permalink={entity.permalink}"
255+
)
254256
try:
255257
if not entity.permalink:
256258
raise ValueError(

src/basic_memory/sync/sync_service.py

Lines changed: 76 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
# Circuit breaker configuration
3939
MAX_CONSECUTIVE_FAILURES = 3
40+
SLOW_FILE_SYNC_WARNING_MS = 500
4041

4142

4243
@dataclass
@@ -650,12 +651,14 @@ async def sync_file(
650651
logger.warning(f"Skipping file due to repeated failures: {path}")
651652
return None, None
652653

654+
start_time = time.time()
655+
is_markdown = self.file_service.is_markdown(path)
656+
file_kind = "markdown" if is_markdown else "regular"
657+
653658
try:
654-
logger.debug(
655-
f"Syncing file path={path} is_new={new} is_markdown={self.file_service.is_markdown(path)}"
656-
)
659+
logger.debug(f"Syncing file path={path} is_new={new} is_markdown={is_markdown}")
657660

658-
if self.file_service.is_markdown(path):
661+
if is_markdown:
659662
entity, checksum = await self.sync_markdown_file(path, new)
660663
else:
661664
entity, checksum = await self.sync_regular_file(path, new)
@@ -680,33 +683,63 @@ async def sync_file(
680683
logger.debug(
681684
f"File sync completed, path={path}, entity_id={entity.id}, checksum={checksum[:8]}"
682685
)
686+
duration_ms = int((time.time() - start_time) * 1000)
687+
if duration_ms >= SLOW_FILE_SYNC_WARNING_MS:
688+
logger.warning(
689+
f"Slow file sync detected: path={path}, file_kind={file_kind}, duration_ms={duration_ms}"
690+
)
683691
return entity, checksum
684692

685693
except FileNotFoundError:
686694
# File exists in database but not on filesystem
687695
# This indicates a database/filesystem inconsistency - treat as deletion
688-
logger.warning(
689-
f"File not found during sync, treating as deletion: path={path}. "
690-
"This may indicate a race condition or manual file deletion."
691-
)
692-
await self.handle_delete(path)
696+
with telemetry.span(
697+
"sync.file.failure",
698+
failure_type="file_not_found",
699+
path=path,
700+
file_kind=file_kind,
701+
is_new=new,
702+
is_fatal=False,
703+
):
704+
logger.warning(
705+
f"File not found during sync, treating as deletion: path={path}. "
706+
"This may indicate a race condition or manual file deletion."
707+
)
708+
await self.handle_delete(path)
693709
return None, None
694710

695711
except Exception as e:
712+
failure_type = type(e).__name__
696713
# Check if this is a fatal error (or caused by one)
697714
# Fatal errors like project deletion should terminate sync immediately
698715
if isinstance(e, SyncFatalError) or isinstance(
699716
e.__cause__, SyncFatalError
700717
): # pragma: no cover
701-
logger.error(f"Fatal sync error encountered, terminating sync: path={path}")
718+
with telemetry.span(
719+
"sync.file.failure",
720+
failure_type=failure_type,
721+
path=path,
722+
file_kind=file_kind,
723+
is_new=new,
724+
is_fatal=True,
725+
):
726+
logger.error(f"Fatal sync error encountered, terminating sync: path={path}")
702727
raise
703728

704729
# Otherwise treat as recoverable file-level error
705730
error_msg = str(e)
706-
logger.error(f"Failed to sync file: path={path}, error={error_msg}")
731+
with telemetry.span(
732+
"sync.file.failure",
733+
failure_type=failure_type,
734+
path=path,
735+
file_kind=file_kind,
736+
is_new=new,
737+
is_fatal=False,
738+
):
739+
logger.error(f"Failed to sync file: path={path}, error={error_msg}")
707740

708-
# Record failure for circuit breaker
709-
await self._record_failure(path, error_msg)
741+
# Record failure for circuit breaker
742+
await self._record_failure(path, error_msg)
710743

711744
return None, None
712745

@@ -1100,24 +1133,36 @@ async def resolve_relations(self, entity_id: int | None = None):
11001133
# update search index only on successful resolution
11011134
await self.search_service.index_entity(resolved_entity)
11021135
except IntegrityError:
1103-
# IntegrityError means a relation with this (from_id, to_id, relation_type)
1104-
# already exists. The UPDATE was rolled back, so our unresolved relation
1105-
# (to_id=NULL) still exists in the database. We delete it because:
1106-
# 1. It's redundant - a resolved relation already captures this relationship
1107-
# 2. If we don't delete it, future syncs will try to resolve it again
1108-
# and get the same IntegrityError
1109-
logger.debug(
1110-
"Deleting duplicate unresolved relation "
1111-
f"relation_id={relation.id} "
1112-
f"from_id={relation.from_id} "
1113-
f"to_name={relation.to_name} "
1114-
f"resolved_to_id={resolved_entity.id}"
1115-
)
1116-
try:
1117-
await self.relation_repository.delete(relation.id)
1118-
except Exception as e:
1119-
# Log but don't fail - the relation may have been deleted already
1120-
logger.debug(f"Could not delete duplicate relation {relation.id}: {e}")
1136+
with telemetry.span(
1137+
"sync.relation.resolve_conflict",
1138+
relation_id=relation.id,
1139+
relation_type=relation.relation_type,
1140+
):
1141+
# IntegrityError means a relation with this (from_id, to_id, relation_type)
1142+
# already exists. The UPDATE was rolled back, so our unresolved relation
1143+
# (to_id=NULL) still exists in the database. We delete it because:
1144+
# 1. It's redundant - a resolved version already captures this relationship
1145+
# 2. If we don't delete it, future syncs will try to resolve it again
1146+
# and get the same IntegrityError
1147+
logger.debug(
1148+
"Deleting duplicate unresolved relation "
1149+
f"relation_id={relation.id} "
1150+
f"from_id={relation.from_id} "
1151+
f"to_name={relation.to_name} "
1152+
f"resolved_to_id={resolved_entity.id}"
1153+
)
1154+
try:
1155+
await self.relation_repository.delete(relation.id)
1156+
except Exception as e:
1157+
with telemetry.span(
1158+
"sync.relation.cleanup_failure",
1159+
relation_id=relation.id,
1160+
relation_type=relation.relation_type,
1161+
):
1162+
# Log but don't fail - the relation may have been deleted already
1163+
logger.debug(
1164+
f"Could not delete duplicate relation {relation.id}: {e}"
1165+
)
11211166

11221167
async def _quick_count_files(self, directory: Path) -> int:
11231168
"""Fast file count using find command.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Telemetry coverage for async client auth failures."""
2+
3+
from __future__ import annotations
4+
5+
import importlib
6+
from contextlib import contextmanager
7+
from types import SimpleNamespace
8+
9+
import pytest
10+
11+
async_client_module = importlib.import_module("basic_memory.mcp.async_client")
12+
13+
14+
@pytest.mark.asyncio
15+
async def test_resolve_cloud_token_emits_failure_span(monkeypatch) -> None:
16+
spans: list[tuple[str, dict]] = []
17+
error_messages: list[str] = []
18+
19+
class FakeAuth:
20+
def __init__(self, client_id: str, authkit_domain: str) -> None:
21+
self.client_id = client_id
22+
self.authkit_domain = authkit_domain
23+
24+
async def get_valid_token(self):
25+
return None
26+
27+
@contextmanager
28+
def fake_span(name: str, **attrs):
29+
spans.append((name, attrs))
30+
yield
31+
32+
monkeypatch.setattr(async_client_module.telemetry, "span", fake_span)
33+
monkeypatch.setattr("basic_memory.cli.auth.CLIAuth", FakeAuth)
34+
monkeypatch.setattr(async_client_module.logger, "error", error_messages.append)
35+
36+
config = SimpleNamespace(
37+
cloud_api_key=None,
38+
cloud_client_id="client-123",
39+
cloud_domain="auth.example.com",
40+
)
41+
42+
with pytest.raises(RuntimeError, match="no credentials found"):
43+
await async_client_module._resolve_cloud_token(config)
44+
45+
assert spans == [("routing.resolve_cloud_credentials", {"has_api_key": False})]
46+
assert error_messages == ["Cloud routing requested but no credentials were available"]

tests/mcp/test_project_context.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,26 @@ async def test_uses_explicit_project_when_no_env(config_manager, monkeypatch):
9494
assert await resolve_project_parameter(project="explicit-project") == "explicit-project"
9595

9696

97+
@pytest.mark.asyncio
98+
async def test_canonicalizes_case_insensitive_project_reference(
99+
config_manager, config_home, monkeypatch
100+
):
101+
from basic_memory.config import ProjectEntry
102+
from basic_memory.mcp.project_context import resolve_project_parameter
103+
104+
cfg = config_manager.load_config()
105+
project_name = "Personal-Project"
106+
project_path = config_home / "personal-project"
107+
project_path.mkdir(parents=True, exist_ok=True)
108+
cfg.projects[project_name] = ProjectEntry(path=str(project_path))
109+
config_manager.save_config(cfg)
110+
111+
monkeypatch.delenv("BASIC_MEMORY_MCP_PROJECT", raising=False)
112+
113+
assert await resolve_project_parameter(project="personal-project") == project_name
114+
assert await resolve_project_parameter(project="PERSONAL-PROJECT") == project_name
115+
116+
97117
@pytest.mark.asyncio
98118
async def test_uses_default_project(config_manager, config_home, monkeypatch):
99119
from basic_memory.mcp.project_context import resolve_project_parameter

tests/mcp/test_tool_telemetry.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ async def test_write_note_emits_root_operation_and_project_context(
6060
)
6161
]
6262
assert contexts[0] == {
63+
"route_mode": "local_asgi",
64+
"workspace_id": None,
65+
}
66+
assert contexts[1] == {
6367
"project_name": test_project.name,
6468
"workspace_id": None,
6569
"tool_name": "write_note",
@@ -104,6 +108,10 @@ async def test_read_note_emits_root_operation_and_project_context(
104108
)
105109
]
106110
assert contexts[0] == {
111+
"route_mode": "local_asgi",
112+
"workspace_id": None,
113+
}
114+
assert contexts[1] == {
107115
"project_name": test_project.name,
108116
"workspace_id": None,
109117
"tool_name": "read_note",
@@ -155,6 +163,10 @@ async def test_search_notes_emits_root_operation_and_project_context(
155163
)
156164
assert ("api.request.search",) == (operations[1][0],)
157165
assert contexts[0] == {
166+
"route_mode": "local_asgi",
167+
"workspace_id": None,
168+
}
169+
assert contexts[1] == {
158170
"project_name": test_project.name,
159171
"workspace_id": None,
160172
"tool_name": "search_notes",

0 commit comments

Comments
 (0)