Skip to content

Commit 5d5efa0

Browse files
phernandezclaude
andcommitted
fix: format schema_infer and schema_diff as markdown text (#28)
schema_infer and schema_diff returned raw Pydantic models in text mode, causing LLMs to render field names as "undefined". Add text formatters (_format_inference_report, _format_drift_report) matching the existing _format_validation_report pattern. CLI paths are unaffected — they always use output_format="json". Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 4f12182 commit 5d5efa0

2 files changed

Lines changed: 117 additions & 11 deletions

File tree

src/basic_memory/mcp/tools/schema.py

Lines changed: 107 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from basic_memory.mcp.project_context import get_project_client
1313
from basic_memory.mcp.server import mcp
14-
from basic_memory.schemas.schema import ValidationReport, InferenceReport, DriftReport
14+
from basic_memory.schemas.schema import DriftReport, InferenceReport, ValidationReport
1515

1616

1717
def _format_validation_report(report: ValidationReport) -> str:
@@ -44,6 +44,108 @@ def _format_validation_report(report: ValidationReport) -> str:
4444
return "\n".join(lines)
4545

4646

47+
def _format_inference_report(report: InferenceReport) -> str:
48+
"""Render an InferenceReport as readable markdown.
49+
50+
Without this formatter the LLM receives raw JSON and renders
51+
field names as "undefined".
52+
"""
53+
lines: list[str] = []
54+
55+
# --- Header ---
56+
lines.append(f"# Schema Inference: {report.note_type}")
57+
lines.append("")
58+
lines.append(f"Notes analyzed: {report.notes_analyzed}")
59+
lines.append("")
60+
61+
# --- Suggested schema YAML ---
62+
if report.suggested_schema:
63+
lines.append("## Suggested Schema")
64+
lines.append("")
65+
lines.append("```yaml")
66+
lines.append("---")
67+
lines.append(f"title: {report.note_type.title()}")
68+
lines.append("type: schema")
69+
lines.append(f"entity: {report.note_type}")
70+
lines.append("version: 1")
71+
lines.append("schema:")
72+
for field_name, field_def in report.suggested_schema.items():
73+
lines.append(f" {field_name}: {field_def}")
74+
lines.append("---")
75+
lines.append("```")
76+
lines.append("")
77+
78+
# --- Field frequency table ---
79+
if report.field_frequencies:
80+
lines.append("## Field Frequencies")
81+
lines.append("")
82+
for f in report.field_frequencies:
83+
pct = f"{f.percentage:.0%}"
84+
req_marker = "required" if f.name in report.suggested_required else "optional"
85+
samples = ", ".join(f.sample_values[:3]) if f.sample_values else ""
86+
sample_str = f" (e.g. {samples})" if samples else ""
87+
lines.append(f"- **{f.name}** ({f.source}) — {pct} ({f.count}/{f.total}) "
88+
f"[{req_marker}]{sample_str}")
89+
lines.append("")
90+
91+
# --- Excluded fields ---
92+
if report.excluded:
93+
lines.append("## Excluded (below threshold)")
94+
lines.append("")
95+
for name in report.excluded:
96+
lines.append(f"- {name}")
97+
lines.append("")
98+
99+
return "\n".join(lines)
100+
101+
102+
def _format_drift_report(report: DriftReport) -> str:
103+
"""Render a DriftReport as readable markdown.
104+
105+
Without this formatter the LLM receives raw JSON and renders
106+
field names as "undefined".
107+
"""
108+
lines: list[str] = []
109+
110+
# --- Header ---
111+
lines.append(f"# Schema Drift: {report.note_type}")
112+
lines.append("")
113+
114+
has_drift = report.new_fields or report.dropped_fields or report.cardinality_changes
115+
116+
if not has_drift:
117+
lines.append("No drift detected — schema matches actual usage.")
118+
return "\n".join(lines)
119+
120+
# --- New fields ---
121+
if report.new_fields:
122+
lines.append("## New Fields (in notes but not in schema)")
123+
lines.append("")
124+
for f in report.new_fields:
125+
pct = f"{f.percentage:.0%}"
126+
lines.append(f"- **{f.name}** ({f.source}) — {pct} ({f.count}/{f.total})")
127+
lines.append("")
128+
129+
# --- Dropped fields ---
130+
if report.dropped_fields:
131+
lines.append("## Dropped Fields (in schema but rare in notes)")
132+
lines.append("")
133+
for f in report.dropped_fields:
134+
pct = f"{f.percentage:.0%}"
135+
lines.append(f"- **{f.name}** ({f.source}) — {pct} ({f.count}/{f.total})")
136+
lines.append("")
137+
138+
# --- Cardinality changes ---
139+
if report.cardinality_changes:
140+
lines.append("## Cardinality Changes")
141+
lines.append("")
142+
for change in report.cardinality_changes:
143+
lines.append(f"- {change}")
144+
lines.append("")
145+
146+
return "\n".join(lines)
147+
148+
47149
def _no_notes_guidance(note_type: str, tool_name: str) -> str:
48150
"""Build guidance string when no notes of a given type exist.
49151
@@ -217,7 +319,7 @@ async def schema_infer(
217319
workspace: Optional[str] = None,
218320
output_format: Literal["text", "json"] = "text",
219321
context: Context | None = None,
220-
) -> InferenceReport | str | dict:
322+
) -> str | dict:
221323
"""Analyze existing notes and suggest a schema definition.
222324
223325
Examines observation categories and relation types across all notes
@@ -305,7 +407,7 @@ async def schema_infer(
305407
if output_format == "json":
306408
return result.model_dump(mode="json", exclude_none=True)
307409

308-
return result
410+
return _format_inference_report(result)
309411

310412
except Exception as e:
311413
logger.error(f"Schema inference failed: {e}, project: {active_project.name}")
@@ -331,7 +433,7 @@ async def schema_diff(
331433
workspace: Optional[str] = None,
332434
output_format: Literal["text", "json"] = "text",
333435
context: Context | None = None,
334-
) -> DriftReport | str | dict:
436+
) -> str | dict:
335437
"""Detect drift between a schema definition and actual note usage.
336438
337439
Compares the existing schema for a note type against how notes of
@@ -393,7 +495,7 @@ async def schema_diff(
393495
if output_format == "json":
394496
return result.model_dump(mode="json", exclude_none=True)
395497

396-
return result
498+
return _format_drift_report(result)
397499

398500
except Exception as e:
399501
logger.error(f"Schema diff failed: {e}, project: {active_project.name}")

tests/mcp/test_tool_schema.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
from basic_memory.mcp.tools.schema import schema_validate, schema_infer, schema_diff
1414
from basic_memory.mcp.tools.write_note import write_note
15-
from basic_memory.schemas.schema import InferenceReport, DriftReport
1615

1716

1817
# --- Helpers ---
@@ -221,9 +220,12 @@ async def test_schema_infer(app, test_project, sync_service):
221220
project=test_project.name,
222221
)
223222

224-
assert isinstance(result, InferenceReport)
225-
assert result.note_type == "person"
226-
assert result.notes_analyzed >= 3
223+
assert isinstance(result, str)
224+
assert "Schema Inference: person" in result
225+
assert "Notes analyzed: 3" in result
226+
assert "Field Frequencies" in result
227+
assert "**name**" in result
228+
assert "**role**" in result
227229

228230

229231
@pytest.mark.asyncio
@@ -260,8 +262,10 @@ async def test_schema_diff(app, test_project, sync_service):
260262
project=test_project.name,
261263
)
262264

263-
assert isinstance(result, DriftReport)
264-
assert result.note_type == "person"
265+
assert isinstance(result, str)
266+
assert "Schema Drift: person" in result
267+
# Dave has a "hobby" field not in the schema, so drift should be detected
268+
assert "**hobby**" in result
265269

266270

267271
# --- write_note metadata → schema workflow ---

0 commit comments

Comments
 (0)