1111
1212from basic_memory .mcp .project_context import get_project_client
1313from basic_memory .mcp .server import mcp
14- from basic_memory .schemas .schema import ValidationReport , InferenceReport , DriftReport
14+ from basic_memory .schemas .schema import DriftReport , InferenceReport , ValidationReport
1515
1616
1717def _format_validation_report (report : ValidationReport ) -> str :
@@ -44,6 +44,108 @@ def _format_validation_report(report: ValidationReport) -> str:
4444 return "\n " .join (lines )
4545
4646
47+ def _format_inference_report (report : InferenceReport ) -> str :
48+ """Render an InferenceReport as readable markdown.
49+
50+ Without this formatter the LLM receives raw JSON and renders
51+ field names as "undefined".
52+ """
53+ lines : list [str ] = []
54+
55+ # --- Header ---
56+ lines .append (f"# Schema Inference: { report .note_type } " )
57+ lines .append ("" )
58+ lines .append (f"Notes analyzed: { report .notes_analyzed } " )
59+ lines .append ("" )
60+
61+ # --- Suggested schema YAML ---
62+ if report .suggested_schema :
63+ lines .append ("## Suggested Schema" )
64+ lines .append ("" )
65+ lines .append ("```yaml" )
66+ lines .append ("---" )
67+ lines .append (f"title: { report .note_type .title ()} " )
68+ lines .append ("type: schema" )
69+ lines .append (f"entity: { report .note_type } " )
70+ lines .append ("version: 1" )
71+ lines .append ("schema:" )
72+ for field_name , field_def in report .suggested_schema .items ():
73+ lines .append (f" { field_name } : { field_def } " )
74+ lines .append ("---" )
75+ lines .append ("```" )
76+ lines .append ("" )
77+
78+ # --- Field frequency table ---
79+ if report .field_frequencies :
80+ lines .append ("## Field Frequencies" )
81+ lines .append ("" )
82+ for f in report .field_frequencies :
83+ pct = f"{ f .percentage :.0%} "
84+ req_marker = "required" if f .name in report .suggested_required else "optional"
85+ samples = ", " .join (f .sample_values [:3 ]) if f .sample_values else ""
86+ sample_str = f" (e.g. { samples } )" if samples else ""
87+ lines .append (f"- **{ f .name } ** ({ f .source } ) — { pct } ({ f .count } /{ f .total } ) "
88+ f"[{ req_marker } ]{ sample_str } " )
89+ lines .append ("" )
90+
91+ # --- Excluded fields ---
92+ if report .excluded :
93+ lines .append ("## Excluded (below threshold)" )
94+ lines .append ("" )
95+ for name in report .excluded :
96+ lines .append (f"- { name } " )
97+ lines .append ("" )
98+
99+ return "\n " .join (lines )
100+
101+
102+ def _format_drift_report (report : DriftReport ) -> str :
103+ """Render a DriftReport as readable markdown.
104+
105+ Without this formatter the LLM receives raw JSON and renders
106+ field names as "undefined".
107+ """
108+ lines : list [str ] = []
109+
110+ # --- Header ---
111+ lines .append (f"# Schema Drift: { report .note_type } " )
112+ lines .append ("" )
113+
114+ has_drift = report .new_fields or report .dropped_fields or report .cardinality_changes
115+
116+ if not has_drift :
117+ lines .append ("No drift detected — schema matches actual usage." )
118+ return "\n " .join (lines )
119+
120+ # --- New fields ---
121+ if report .new_fields :
122+ lines .append ("## New Fields (in notes but not in schema)" )
123+ lines .append ("" )
124+ for f in report .new_fields :
125+ pct = f"{ f .percentage :.0%} "
126+ lines .append (f"- **{ f .name } ** ({ f .source } ) — { pct } ({ f .count } /{ f .total } )" )
127+ lines .append ("" )
128+
129+ # --- Dropped fields ---
130+ if report .dropped_fields :
131+ lines .append ("## Dropped Fields (in schema but rare in notes)" )
132+ lines .append ("" )
133+ for f in report .dropped_fields :
134+ pct = f"{ f .percentage :.0%} "
135+ lines .append (f"- **{ f .name } ** ({ f .source } ) — { pct } ({ f .count } /{ f .total } )" )
136+ lines .append ("" )
137+
138+ # --- Cardinality changes ---
139+ if report .cardinality_changes :
140+ lines .append ("## Cardinality Changes" )
141+ lines .append ("" )
142+ for change in report .cardinality_changes :
143+ lines .append (f"- { change } " )
144+ lines .append ("" )
145+
146+ return "\n " .join (lines )
147+
148+
47149def _no_notes_guidance (note_type : str , tool_name : str ) -> str :
48150 """Build guidance string when no notes of a given type exist.
49151
@@ -217,7 +319,7 @@ async def schema_infer(
217319 workspace : Optional [str ] = None ,
218320 output_format : Literal ["text" , "json" ] = "text" ,
219321 context : Context | None = None ,
220- ) -> InferenceReport | str | dict :
322+ ) -> str | dict :
221323 """Analyze existing notes and suggest a schema definition.
222324
223325 Examines observation categories and relation types across all notes
@@ -305,7 +407,7 @@ async def schema_infer(
305407 if output_format == "json" :
306408 return result .model_dump (mode = "json" , exclude_none = True )
307409
308- return result
410+ return _format_inference_report ( result )
309411
310412 except Exception as e :
311413 logger .error (f"Schema inference failed: { e } , project: { active_project .name } " )
@@ -331,7 +433,7 @@ async def schema_diff(
331433 workspace : Optional [str ] = None ,
332434 output_format : Literal ["text" , "json" ] = "text" ,
333435 context : Context | None = None ,
334- ) -> DriftReport | str | dict :
436+ ) -> str | dict :
335437 """Detect drift between a schema definition and actual note usage.
336438
337439 Compares the existing schema for a note type against how notes of
@@ -393,7 +495,7 @@ async def schema_diff(
393495 if output_format == "json" :
394496 return result .model_dump (mode = "json" , exclude_none = True )
395497
396- return result
498+ return _format_drift_report ( result )
397499
398500 except Exception as e :
399501 logger .error (f"Schema diff failed: { e } , project: { active_project .name } " )
0 commit comments