Skip to content

Commit 236ae26

Browse files
phernandezclaude
andcommitted
fix: coerce list frontmatter values to strings for title and type fields
YAML block sequence syntax can cause PyYAML to parse scalar fields like `title` and `type` as lists instead of strings. Downstream code calls .strip()/.casefold() on these values, crashing with "'list' object has no attribute 'strip'". Add _coerce_to_string() helper that joins list items with ", " and apply it in entity_parser.parse_markdown_content() and entity_service.fast_edit_entity() where these fields are extracted. Fixes basic-memory-cloud#376 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Signed-off-by: phernandez <paul@basicmachines.co>
1 parent bd5923a commit 236ae26

3 files changed

Lines changed: 174 additions & 69 deletions

File tree

src/basic_memory/markdown/entity_parser.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,22 @@ def normalize_frontmatter_value(value: Any) -> Any:
8888
return value
8989

9090

91+
def _coerce_to_string(value: Any) -> str:
92+
"""Coerce a frontmatter value to a string.
93+
94+
YAML can parse scalar-looking fields as lists when the author uses block
95+
sequence syntax. For fields like ``title`` and ``type`` that *must* be
96+
strings, this helper converts lists to a comma-separated string and any
97+
other non-string type via ``str()``.
98+
"""
99+
if isinstance(value, str):
100+
return value
101+
if isinstance(value, list):
102+
# Join list items, converting each to string first
103+
return ", ".join(str(item) for item in value)
104+
return str(value)
105+
106+
91107
def normalize_frontmatter_metadata(metadata: dict) -> dict:
92108
"""Normalize all values in frontmatter metadata dict.
93109
@@ -248,14 +264,21 @@ async def parse_markdown_content(
248264
# Normalize frontmatter values
249265
metadata = normalize_frontmatter_metadata(post.metadata)
250266

251-
# Ensure required fields have defaults
267+
# Ensure required string fields are always strings.
268+
# YAML can parse these as lists when authors use block sequence syntax
269+
# (e.g. "title:\n - My Title"), causing 'list' has no attribute 'strip'
270+
# downstream. See basic-memory-cloud#376.
252271
title = metadata.get("title")
272+
if title is not None:
273+
title = _coerce_to_string(title)
253274
if not title or title == "None":
254275
metadata["title"] = file_path.stem
255276
else:
256277
metadata["title"] = title
257278

258279
note_type = metadata.get("type")
280+
if note_type is not None:
281+
note_type = _coerce_to_string(note_type)
259282
metadata["type"] = note_type if note_type is not None else "note"
260283

261284
tags = parse_tags(metadata.get("tags", [])) # pyright: ignore

src/basic_memory/services/entity_service.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919
dump_frontmatter,
2020
)
2121
from basic_memory.markdown import EntityMarkdown
22-
from basic_memory.markdown.entity_parser import EntityParser, normalize_frontmatter_metadata
22+
from basic_memory.markdown.entity_parser import (
23+
EntityParser,
24+
_coerce_to_string,
25+
normalize_frontmatter_metadata,
26+
)
2327
from basic_memory.markdown.utils import entity_model_from_markdown, schema_to_markdown
2428
from basic_memory.models import Entity as EntityModel
2529
from basic_memory.models import Observation, Relation
@@ -501,10 +505,11 @@ async def fast_edit_entity(
501505
if has_frontmatter(new_content):
502506
content_frontmatter = parse_frontmatter(new_content)
503507

508+
# Coerce to string — YAML may parse these as lists (cloud#376)
504509
if "title" in content_frontmatter:
505-
update_data["title"] = content_frontmatter["title"]
510+
update_data["title"] = _coerce_to_string(content_frontmatter["title"])
506511
if "type" in content_frontmatter:
507-
update_data["note_type"] = content_frontmatter["type"]
512+
update_data["note_type"] = _coerce_to_string(content_frontmatter["type"])
508513

509514
if "permalink" in content_frontmatter:
510515
content_markdown = self._build_frontmatter_markdown(

tests/markdown/test_date_frontmatter_parsing.py

Lines changed: 142 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,65 +5,68 @@
55
but later code expects strings and calls .strip() on them, causing AttributeError.
66
"""
77

8+
from textwrap import dedent
9+
810
import pytest
11+
912
from basic_memory.markdown.entity_parser import EntityParser
1013

1114

1215
@pytest.fixture
1316
def test_file_with_date(tmp_path):
1417
"""Create a test file with date fields in frontmatter."""
1518
test_file = tmp_path / "test_note.md"
16-
content = """---
17-
title: Test Note
18-
date: 2025-10-24
19-
created: 2025-10-24
20-
tags:
21-
- python
22-
- testing
23-
---
24-
25-
# Test Content
26-
27-
This file has date fields in frontmatter that PyYAML will parse as datetime.date objects.
28-
"""
29-
test_file.write_text(content)
19+
test_file.write_text(dedent("""\
20+
---
21+
title: Test Note
22+
date: 2025-10-24
23+
created: 2025-10-24
24+
tags:
25+
- python
26+
- testing
27+
---
28+
29+
# Test Content
30+
31+
This file has date fields in frontmatter that PyYAML will parse as datetime.date objects.
32+
"""))
3033
return test_file
3134

3235

3336
@pytest.fixture
3437
def test_file_with_date_in_tags(tmp_path):
3538
"""Create a test file with a date value in tags (edge case)."""
3639
test_file = tmp_path / "test_note_date_tags.md"
37-
content = """---
38-
title: Test Note with Date Tags
39-
tags: 2025-10-24
40-
---
40+
test_file.write_text(dedent("""\
41+
---
42+
title: Test Note with Date Tags
43+
tags: 2025-10-24
44+
---
4145
42-
# Test Content
46+
# Test Content
4347
44-
This file has a date value as tags, which will be parsed as datetime.date.
45-
"""
46-
test_file.write_text(content)
48+
This file has a date value as tags, which will be parsed as datetime.date.
49+
"""))
4750
return test_file
4851

4952

5053
@pytest.fixture
5154
def test_file_with_dates_in_tag_list(tmp_path):
5255
"""Create a test file with dates in a tag list (edge case)."""
5356
test_file = tmp_path / "test_note_dates_in_list.md"
54-
content = """---
55-
title: Test Note with Dates in Tags List
56-
tags:
57-
- valid-tag
58-
- 2025-10-24
59-
- another-tag
60-
---
61-
62-
# Test Content
63-
64-
This file has date values mixed into tags list.
65-
"""
66-
test_file.write_text(content)
57+
test_file.write_text(dedent("""\
58+
---
59+
title: Test Note with Dates in Tags List
60+
tags:
61+
- valid-tag
62+
- 2025-10-24
63+
- another-tag
64+
---
65+
66+
# Test Content
67+
68+
This file has date values mixed into tags list.
69+
"""))
6770
return test_file
6871

6972

@@ -129,6 +132,83 @@ async def test_parse_file_with_dates_in_tag_list(test_file_with_dates_in_tag_lis
129132
assert "2025-10-24" in tags
130133

131134

135+
@pytest.mark.asyncio
136+
async def test_parse_file_with_list_frontmatter_fields(tmp_path):
137+
"""Test that list values in expected-string frontmatter fields are coerced to strings.
138+
139+
Reproduces basic-memory-cloud#376 where a markdown file has YAML list values
140+
in frontmatter fields like 'title' or 'type' that downstream code expects
141+
to be strings, causing 'list' object has no attribute 'strip'.
142+
"""
143+
test_file = tmp_path / "test_list_fields.md"
144+
test_file.write_text(dedent("""\
145+
---
146+
title:
147+
- Week 2 Discussion Post
148+
- Alternate Title
149+
tags:
150+
- coursework
151+
- sie-571
152+
type:
153+
- note
154+
- assignment
155+
some_field:
156+
- item1
157+
- item2
158+
---
159+
160+
# Content
161+
162+
Some body text.
163+
"""))
164+
165+
parser = EntityParser(tmp_path)
166+
entity_markdown = await parser.parse_file(test_file)
167+
168+
# title must always be a string, even when YAML parses it as a list
169+
title = entity_markdown.frontmatter.title
170+
assert isinstance(title, str), f"Expected str, got {type(title)}"
171+
assert "Week 2 Discussion Post" in title
172+
173+
# type must always be a string
174+
note_type = entity_markdown.frontmatter.type
175+
assert isinstance(note_type, str), f"Expected str, got {type(note_type)}"
176+
177+
# tags should still be a list (they're explicitly handled)
178+
tags = entity_markdown.frontmatter.tags
179+
assert isinstance(tags, list)
180+
assert "coursework" in tags
181+
182+
# arbitrary list fields in metadata are preserved as lists
183+
some_field = entity_markdown.frontmatter.metadata.get("some_field")
184+
assert isinstance(some_field, list)
185+
assert some_field == ["item1", "item2"]
186+
187+
# Verify title is safe for .strip() and .casefold() (the actual crash sites)
188+
assert title.strip().casefold()
189+
190+
191+
@pytest.mark.asyncio
192+
async def test_parse_file_with_list_title_single_item(tmp_path):
193+
"""Test that a single-item list title is coerced to a plain string."""
194+
test_file = tmp_path / "test_single_list_title.md"
195+
test_file.write_text(dedent("""\
196+
---
197+
title:
198+
- My Single Title
199+
---
200+
201+
# Content
202+
"""))
203+
204+
parser = EntityParser(tmp_path)
205+
entity_markdown = await parser.parse_file(test_file)
206+
207+
title = entity_markdown.frontmatter.title
208+
assert isinstance(title, str)
209+
assert title == "My Single Title"
210+
211+
132212
@pytest.mark.asyncio
133213
async def test_parse_file_with_various_yaml_types(tmp_path):
134214
"""Test that various YAML types in frontmatter don't cause errors.
@@ -138,24 +218,24 @@ async def test_parse_file_with_various_yaml_types(tmp_path):
138218
when code expects strings and calls .strip().
139219
"""
140220
test_file = tmp_path / "test_yaml_types.md"
141-
content = """---
142-
title: Test YAML Types
143-
date: 2025-10-24
144-
priority: 1
145-
completed: true
146-
tags:
147-
- python
148-
- testing
149-
metadata:
150-
author: Test User
151-
version: 1.0
152-
---
153-
154-
# Test Content
155-
156-
This file has various YAML types that need to be normalized.
157-
"""
158-
test_file.write_text(content)
221+
test_file.write_text(dedent("""\
222+
---
223+
title: Test YAML Types
224+
date: 2025-10-24
225+
priority: 1
226+
completed: true
227+
tags:
228+
- python
229+
- testing
230+
metadata:
231+
author: Test User
232+
version: 1.0
233+
---
234+
235+
# Test Content
236+
237+
This file has various YAML types that need to be normalized.
238+
"""))
159239

160240
parser = EntityParser(tmp_path)
161241
entity_markdown = await parser.parse_file(test_file)
@@ -202,20 +282,17 @@ async def test_parse_file_with_datetime_objects(tmp_path):
202282
with time components (as parsed by PyYAML), ensuring they're converted to ISO format strings.
203283
"""
204284
test_file = tmp_path / "test_datetime.md"
285+
test_file.write_text(dedent("""\
286+
---
287+
title: Test Datetime
288+
created_at: 2025-10-24 14:30:00
289+
updated_at: 2025-10-24T00:00:00
290+
---
205291
206-
# YAML datetime strings that PyYAML will parse as datetime objects
207-
# Format: YYYY-MM-DD HH:MM:SS or YYYY-MM-DDTHH:MM:SS
208-
content = """---
209-
title: Test Datetime
210-
created_at: 2025-10-24 14:30:00
211-
updated_at: 2025-10-24T00:00:00
212-
---
292+
# Test Content
213293
214-
# Test Content
215-
216-
This file has datetime values in frontmatter that PyYAML will parse as datetime objects.
217-
"""
218-
test_file.write_text(content)
294+
This file has datetime values in frontmatter that PyYAML will parse as datetime objects.
295+
"""))
219296

220297
parser = EntityParser(tmp_path)
221298
entity_markdown = await parser.parse_file(test_file)

0 commit comments

Comments
 (0)