Skip to content

Commit 8451f2b

Browse files
phernandezclaude
andauthored
feat: add frontmatter validation to schema system (#597)
Signed-off-by: phernandez <paul@basicmachines.co> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ee03975 commit 8451f2b

8 files changed

Lines changed: 527 additions & 4 deletions

File tree

docs/specs/SPEC-SCHEMA-IMPL.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class SchemaDefinition:
6464
version: int # Schema version
6565
fields: list[SchemaField] # Parsed fields
6666
validation_mode: str # "warn" | "strict" | "off"
67+
frontmatter_fields: list[SchemaField] # From settings.frontmatter (default: [])
6768

6869

6970
def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
@@ -145,14 +146,16 @@ class ValidationResult:
145146
async def validate_note(
146147
note: Note,
147148
schema: SchemaDefinition,
149+
frontmatter: dict | None = None,
148150
) -> ValidationResult:
149151
"""Validate a note against a schema definition.
150152
151153
Mapping rules:
152-
- field: string → observation [field] exists
153-
- field?(array): type → multiple [field] observations
154-
- field?: EntityType → relation 'field [[...]]' exists
155-
- field?(enum): [v] → observation [field] value ∈ enum values
154+
- field: string → observation [field] exists
155+
- field?(array): type → multiple [field] observations
156+
- field?: EntityType → relation 'field [[...]]' exists
157+
- field?(enum): [v] → observation [field] value ∈ enum values
158+
- settings.frontmatter field → frontmatter key presence/value
156159
"""
157160
```
158161

docs/specs/SPEC-SCHEMA.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ authors to learn.
7373
| `field?(array): EntityType` | Multiple `field` relations | `- authored [[Book]]` (×N) |
7474
| `tags` | Frontmatter `tags` array | `tags: [startups, essays]` |
7575
| `field?(enum): [values]` | Observation `[field] value` where value ∈ set | `- [status] active` |
76+
| `settings.frontmatter` field | Frontmatter key presence/value | `tags: [python, ai]` |
7677

7778
### Key Insight
7879

@@ -99,6 +100,9 @@ schema:
99100
expertise?(array): string, areas of knowledge
100101
settings:
101102
validation: warn # warn | strict | off
103+
frontmatter:
104+
tags?(array): string, note categories
105+
status?(enum): [draft, review, published]
102106
---
103107
104108
# Person
@@ -230,6 +234,32 @@ $ bm schema validate people/ada-lovelace.md
230234
"Unmatched" items are informational — observations and relations the schema doesn't cover.
231235
They're valid. Schemas are a subset, not a straitjacket.
232236

237+
### Frontmatter Validation
238+
239+
Schema notes can declare validation rules for frontmatter keys under `settings.frontmatter`
240+
using the same Picoschema syntax as the `schema` block:
241+
242+
```yaml
243+
settings:
244+
validation: warn
245+
frontmatter:
246+
tags?(array): string
247+
status?(enum): [draft, review, published]
248+
```
249+
250+
- Frontmatter rules use the same Picoschema key syntax (`?` for optional, `(enum)`, `(array)`)
251+
- Only available on schema notes (inline schemas skip frontmatter validation)
252+
- Checks key presence (required vs optional) and enum value membership
253+
- Unmatched frontmatter keys not in the schema are silently ignored
254+
- Missing required frontmatter keys produce a warning (or error in strict mode)
255+
256+
Example output for a missing required frontmatter key:
257+
258+
```
259+
⚠ Person schema validation:
260+
- Missing required frontmatter key: status
261+
```
262+
233263
### Batch Validation
234264

235265
```

src/basic_memory/api/v2/routers/schema_router.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ async def search_fn(query: str) -> list[dict]:
115115
schema_def,
116116
_entity_observations(entity),
117117
_entity_relations(entity),
118+
frontmatter=frontmatter,
118119
)
119120
results.append(_to_note_validation_response(result))
120121

@@ -149,6 +150,7 @@ async def search_fn(query: str) -> list[dict]:
149150
schema_def,
150151
_entity_observations(entity),
151152
_entity_relations(entity),
153+
frontmatter=frontmatter,
152154
)
153155
results.append(_to_note_validation_response(result))
154156

src/basic_memory/schema/parser.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class SchemaDefinition:
4949
version: int # Schema version
5050
fields: list[SchemaField] # Parsed fields
5151
validation_mode: str # "warn" | "strict" | "off"
52+
frontmatter_fields: list[SchemaField] = field(default_factory=list) # From settings.frontmatter
5253

5354

5455
# --- Built-in scalar types ---
@@ -228,9 +229,19 @@ def parse_schema_note(frontmatter: dict) -> SchemaDefinition:
228229

229230
fields = parse_picoschema(schema_dict)
230231

232+
# --- Frontmatter validation rules ---
233+
# Trigger: settings.frontmatter is a dict of Picoschema field declarations
234+
# Why: allows schema notes to validate frontmatter keys (tags, status, etc.)
235+
# Outcome: frontmatter_fields populated using same parser as schema fields
236+
frontmatter_dict = settings.get("frontmatter") if isinstance(settings, dict) else None
237+
frontmatter_fields = (
238+
parse_picoschema(frontmatter_dict) if isinstance(frontmatter_dict, dict) else []
239+
)
240+
231241
return SchemaDefinition(
232242
entity=entity,
233243
version=version,
234244
fields=fields,
235245
validation_mode=validation_mode,
246+
frontmatter_fields=frontmatter_fields,
236247
)

src/basic_memory/schema/validator.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def validate_note(
5656
schema: SchemaDefinition,
5757
observations: list[ObservationData],
5858
relations: list[RelationData],
59+
frontmatter: dict | None = None,
5960
) -> ValidationResult:
6061
"""Validate a note against a schema definition.
6162
@@ -64,6 +65,7 @@ def validate_note(
6465
schema: The resolved SchemaDefinition to validate against.
6566
observations: List of ObservationData from the note's observations.
6667
relations: List of RelationData from the note's relations.
68+
frontmatter: The note's frontmatter dict for settings.frontmatter validation.
6769
6870
Returns:
6971
A ValidationResult with per-field results, unmatched items, and warnings/errors.
@@ -113,6 +115,33 @@ def validate_note(
113115
else:
114116
result.warnings.append(msg)
115117

118+
# --- Validate frontmatter fields ---
119+
# Trigger: schema has frontmatter_fields and caller provided frontmatter dict
120+
# Why: settings.frontmatter rules validate metadata keys like tags, status
121+
# Outcome: frontmatter fields produce the same FieldResult/warning/error as content fields
122+
if frontmatter is not None and schema.frontmatter_fields:
123+
for fm_field in schema.frontmatter_fields:
124+
field_result = _validate_frontmatter_field(fm_field, frontmatter)
125+
result.field_results.append(field_result)
126+
127+
if field_result.status == "missing" and fm_field.required:
128+
msg = f"Missing required frontmatter key: {fm_field.name}"
129+
if schema.validation_mode == "strict":
130+
result.errors.append(msg)
131+
result.passed = False
132+
else:
133+
result.warnings.append(msg)
134+
135+
elif field_result.status == "enum_mismatch":
136+
msg = field_result.message or (
137+
f"Frontmatter key '{fm_field.name}' has invalid enum value"
138+
)
139+
if schema.validation_mode == "strict":
140+
result.errors.append(msg)
141+
result.passed = False
142+
else:
143+
result.warnings.append(msg)
144+
116145
# --- Collect unmatched observations ---
117146
for category, values in obs_by_category.items():
118147
if category not in matched_categories:
@@ -227,6 +256,61 @@ def _validate_enum_field(
227256
)
228257

229258

259+
# --- Frontmatter Field Validation ---
260+
261+
262+
def _validate_frontmatter_field(
263+
schema_field: SchemaField,
264+
frontmatter: dict,
265+
) -> FieldResult:
266+
"""Validate a single frontmatter key against a schema field declaration.
267+
268+
Checks presence and, for enum fields, value membership. Array fields
269+
collect all list items as string values.
270+
"""
271+
value = frontmatter.get(schema_field.name)
272+
273+
if value is None:
274+
return FieldResult(
275+
field=schema_field,
276+
status="missing",
277+
message=f"Missing frontmatter key: {schema_field.name}",
278+
)
279+
280+
# --- Enum validation ---
281+
if schema_field.is_enum:
282+
str_value = str(value)
283+
if str_value not in schema_field.enum_values:
284+
allowed = ", ".join(schema_field.enum_values)
285+
return FieldResult(
286+
field=schema_field,
287+
status="enum_mismatch",
288+
values=[str_value],
289+
message=f"Frontmatter key '{schema_field.name}' has invalid value: "
290+
f"{str_value} (allowed: {allowed})",
291+
)
292+
return FieldResult(
293+
field=schema_field,
294+
status="present",
295+
values=[str_value],
296+
)
297+
298+
# --- Array / list values ---
299+
if isinstance(value, list):
300+
return FieldResult(
301+
field=schema_field,
302+
status="present",
303+
values=[str(v) for v in value],
304+
)
305+
306+
# --- Scalar values ---
307+
return FieldResult(
308+
field=schema_field,
309+
status="present",
310+
values=[str(value)],
311+
)
312+
313+
230314
# --- Helper Functions ---
231315

232316

0 commit comments

Comments
 (0)