Skip to content

Commit f0335b9

Browse files
phernandezclaude
andcommitted
fix: handle quoted picoschema enum strings in YAML frontmatter (#612)
The picoschema enum-with-description syntax `[val1, val2], description` is invalid YAML. Users must quote it so YAML parses it as a string. This adds `_parse_enum_string()` to extract enum values and description from the resulting string value (e.g., "[active, blocked], current state"). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 66effb0 commit f0335b9

2 files changed

Lines changed: 73 additions & 4 deletions

File tree

src/basic_memory/schema/parser.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
EntityName as type (capitalized) # entity reference
1515
"""
1616

17+
import re
1718
from dataclasses import dataclass, field
1819

1920

@@ -124,6 +125,31 @@ def _is_entity_ref_type(type_str: str) -> bool:
124125
return len(type_str) > 0 and type_str[0].isupper()
125126

126127

128+
# --- Enum String Parsing ---
129+
130+
131+
def _parse_enum_string(value: str) -> tuple[list[str], str | None]:
132+
"""Parse a string-typed enum value into enum values and optional description.
133+
134+
When picoschema enum values are quoted in YAML frontmatter (required when a
135+
description follows the list), YAML parses the whole thing as a string. This
136+
function extracts the enum values and description from that string.
137+
138+
Examples:
139+
"[active, blocked, done], current state" -> (['active', 'blocked', 'done'], 'current state')
140+
"[active, blocked]" -> (['active', 'blocked'], None)
141+
"active" -> (['active'], None)
142+
"""
143+
# Match bracketed list with optional trailing description
144+
m = re.match(r"\[([^\]]+)\](?:\s*,\s*(.+))?", value)
145+
if m:
146+
items = [item.strip() for item in m.group(1).split(",")]
147+
description = m.group(2).strip() if m.group(2) else None
148+
return items, description
149+
# Plain string — single enum value
150+
return [value.strip()], None
151+
152+
127153
# --- Main Parser ---
128154

129155

@@ -147,18 +173,25 @@ def parse_picoschema(yaml_dict: dict) -> list[SchemaField]:
147173
name, required, is_array, is_enum, is_object = _parse_field_key(key)
148174

149175
# --- Enum fields ---
150-
# Trigger: value is a list (e.g., [active, inactive])
151-
# Why: enums declare allowed values directly as a YAML list
176+
# Trigger: value is a list or a string containing bracketed enum values
177+
# Why: enums declare allowed values directly as a YAML list, or as a quoted
178+
# string when a description follows (e.g., "[a, b], desc" must be quoted
179+
# in YAML to avoid parse errors)
152180
# Outcome: SchemaField with is_enum=True and enum_values populated
153181
if is_enum:
154-
enum_values = value if isinstance(value, list) else [str(value)]
182+
description = None
183+
if isinstance(value, list):
184+
enum_values = [str(v) for v in value]
185+
else:
186+
enum_values, description = _parse_enum_string(str(value))
155187
fields.append(
156188
SchemaField(
157189
name=name,
158190
type="enum",
159191
required=required,
160192
is_enum=True,
161-
enum_values=[str(v) for v in enum_values],
193+
enum_values=enum_values,
194+
description=description,
162195
)
163196
)
164197
continue

tests/schema/test_parser.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
parse_schema_note,
99
_parse_field_key,
1010
_parse_type_and_description,
11+
_parse_enum_string,
1112
_is_entity_ref_type,
1213
SCALAR_TYPES,
1314
)
@@ -105,6 +106,26 @@ def test_empty_string(self):
105106
assert _is_entity_ref_type("") is False
106107

107108

109+
# --- _parse_enum_string ---
110+
111+
112+
class TestParseEnumString:
113+
def test_bracketed_list_with_description(self):
114+
values, desc = _parse_enum_string("[active, blocked, done, abandoned], current state")
115+
assert values == ["active", "blocked", "done", "abandoned"]
116+
assert desc == "current state"
117+
118+
def test_bracketed_list_without_description(self):
119+
values, desc = _parse_enum_string("[active, blocked]")
120+
assert values == ["active", "blocked"]
121+
assert desc is None
122+
123+
def test_plain_string(self):
124+
values, desc = _parse_enum_string("active")
125+
assert values == ["active"]
126+
assert desc is None
127+
128+
108129
# --- parse_picoschema ---
109130

110131

@@ -153,6 +174,21 @@ def test_enum_values_coerced_to_string(self):
153174
fields = parse_picoschema({"year?(enum)": [2020, 2021, 2022]})
154175
assert fields[0].enum_values == ["2020", "2021", "2022"]
155176

177+
def test_enum_string_with_brackets_and_description(self):
178+
"""Quoted picoschema enum string parsed from YAML frontmatter."""
179+
fields = parse_picoschema(
180+
{"status?(enum)": "[active, blocked, done, abandoned], current state"}
181+
)
182+
assert fields[0].is_enum is True
183+
assert fields[0].enum_values == ["active", "blocked", "done", "abandoned"]
184+
assert fields[0].description == "current state"
185+
186+
def test_enum_string_with_brackets_no_description(self):
187+
fields = parse_picoschema({"status?(enum)": "[active, blocked]"})
188+
assert fields[0].is_enum is True
189+
assert fields[0].enum_values == ["active", "blocked"]
190+
assert fields[0].description is None
191+
156192
def test_object_field(self):
157193
fields = parse_picoschema(
158194
{

0 commit comments

Comments
 (0)