Skip to content

Commit d1431bd

Browse files
phernandezclaude
andauthored
fix: Handle YAML parsing errors and missing entity_type in markdown files (#368)
Signed-off-by: phernandez <paul@basicmachines.co> Co-authored-by: Claude <noreply@anthropic.com>
1 parent 171bef7 commit d1431bd

2 files changed

Lines changed: 239 additions & 4 deletions

File tree

src/basic_memory/markdown/entity_parser.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
import dateparser
1212
import frontmatter
13+
import yaml
14+
from loguru import logger
1315
from markdown_it import MarkdownIt
1416

1517
from basic_memory.markdown.plugins import observation_plugin, relation_plugin
@@ -111,18 +113,35 @@ def get_file_path(self, path):
111113
return self.base_path / path
112114

113115
async def parse_file_content(self, absolute_path, file_content):
114-
post = frontmatter.loads(file_content)
116+
# Parse frontmatter with proper error handling for malformed YAML (issue #185)
117+
try:
118+
post = frontmatter.loads(file_content)
119+
except yaml.YAMLError as e:
120+
# Log the YAML parsing error with file context
121+
logger.warning(
122+
f"Failed to parse YAML frontmatter in {absolute_path}: {e}. "
123+
f"Treating file as plain markdown without frontmatter."
124+
)
125+
# Create a post with no frontmatter - treat entire content as markdown
126+
post = frontmatter.Post(file_content, metadata={})
127+
115128
# Extract file stat info
116129
file_stats = absolute_path.stat()
117130
metadata = post.metadata
131+
132+
# Ensure required fields have defaults (issue #184)
118133
metadata["title"] = post.metadata.get("title", absolute_path.stem)
119-
metadata["type"] = post.metadata.get("type", "note")
134+
# Handle type - use default if missing OR explicitly set to None/null
135+
entity_type = post.metadata.get("type")
136+
metadata["type"] = entity_type if entity_type is not None else "note"
137+
120138
tags = parse_tags(post.metadata.get("tags", [])) # pyright: ignore
121139
if tags:
122140
metadata["tags"] = tags
123-
# frontmatter
141+
142+
# frontmatter - use metadata with defaults applied
124143
entity_frontmatter = EntityFrontmatter(
125-
metadata=post.metadata,
144+
metadata=metadata,
126145
)
127146
entity_content = parse(post.content)
128147
return EntityMarkdown(
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
"""Tests for entity parser error handling (issues #184 and #185)."""
2+
3+
import pytest
4+
from pathlib import Path
5+
from textwrap import dedent
6+
7+
from basic_memory.markdown.entity_parser import EntityParser
8+
9+
10+
@pytest.mark.asyncio
11+
async def test_parse_file_with_malformed_yaml_frontmatter(tmp_path):
12+
"""Test that files with malformed YAML frontmatter are parsed gracefully (issue #185).
13+
14+
This reproduces the production error where block sequence entries cause YAML parsing to fail.
15+
The parser should handle the error gracefully and treat the file as plain markdown.
16+
"""
17+
# Create a file with malformed YAML frontmatter
18+
test_file = tmp_path / "malformed.md"
19+
content = dedent(
20+
"""
21+
---
22+
title: Group Chat Texts
23+
tags:
24+
- family # Line 5, column 7 - this syntax can fail in certain YAML contexts
25+
- messages
26+
type: note
27+
---
28+
# Group Chat Texts
29+
30+
Content here
31+
"""
32+
).strip()
33+
test_file.write_text(content)
34+
35+
# Parse the file - should not raise YAMLError
36+
parser = EntityParser(tmp_path)
37+
result = await parser.parse_file(test_file)
38+
39+
# Should successfully parse, treating as plain markdown if YAML fails
40+
assert result is not None
41+
# If YAML parsing succeeded, verify expected values
42+
# If it failed, it should have defaults
43+
assert result.frontmatter.title is not None
44+
assert result.frontmatter.type is not None
45+
46+
47+
@pytest.mark.asyncio
48+
async def test_parse_file_with_completely_invalid_yaml(tmp_path):
49+
"""Test that files with completely invalid YAML are handled gracefully (issue #185).
50+
51+
This tests the extreme case where YAML parsing completely fails.
52+
"""
53+
# Create a file with completely broken YAML
54+
test_file = tmp_path / "broken_yaml.md"
55+
content = dedent(
56+
"""
57+
---
58+
title: Invalid YAML
59+
this is: [not, valid, yaml
60+
missing: closing bracket
61+
---
62+
# Content
63+
64+
This file has broken YAML frontmatter.
65+
"""
66+
).strip()
67+
test_file.write_text(content)
68+
69+
# Parse the file - should not raise exception
70+
parser = EntityParser(tmp_path)
71+
result = await parser.parse_file(test_file)
72+
73+
# Should successfully parse with defaults
74+
assert result is not None
75+
assert result.frontmatter.title == "broken_yaml" # Default from filename
76+
assert result.frontmatter.type == "note" # Default type
77+
# Content should include the whole file since frontmatter parsing failed
78+
assert "# Content" in result.content
79+
80+
81+
@pytest.mark.asyncio
82+
async def test_parse_file_without_entity_type(tmp_path):
83+
"""Test that files without entity_type get a default value (issue #184).
84+
85+
This reproduces the NOT NULL constraint error where entity_type was missing.
86+
"""
87+
# Create a file without entity_type in frontmatter
88+
test_file = tmp_path / "no_type.md"
89+
content = dedent(
90+
"""
91+
---
92+
title: The Invisible Weight of Mental Habits
93+
---
94+
# The Invisible Weight of Mental Habits
95+
96+
An article about mental habits.
97+
"""
98+
).strip()
99+
test_file.write_text(content)
100+
101+
# Parse the file
102+
parser = EntityParser(tmp_path)
103+
result = await parser.parse_file(test_file)
104+
105+
# Should have default entity_type
106+
assert result is not None
107+
assert result.frontmatter.type == "note" # Default type applied
108+
assert result.frontmatter.title == "The Invisible Weight of Mental Habits"
109+
110+
111+
@pytest.mark.asyncio
112+
async def test_parse_file_with_empty_frontmatter(tmp_path):
113+
"""Test that files with empty frontmatter get defaults (issue #184)."""
114+
# Create a file with empty frontmatter
115+
test_file = tmp_path / "empty_frontmatter.md"
116+
content = dedent(
117+
"""
118+
---
119+
---
120+
# Content
121+
122+
This file has empty frontmatter.
123+
"""
124+
).strip()
125+
test_file.write_text(content)
126+
127+
# Parse the file
128+
parser = EntityParser(tmp_path)
129+
result = await parser.parse_file(test_file)
130+
131+
# Should have defaults
132+
assert result is not None
133+
assert result.frontmatter.type == "note" # Default type
134+
assert result.frontmatter.title == "empty_frontmatter" # Default from filename
135+
136+
137+
@pytest.mark.asyncio
138+
async def test_parse_file_without_frontmatter(tmp_path):
139+
"""Test that files without any frontmatter get defaults (issue #184)."""
140+
# Create a file with no frontmatter at all
141+
test_file = tmp_path / "no_frontmatter.md"
142+
content = dedent(
143+
"""
144+
# Just Content
145+
146+
This file has no frontmatter at all.
147+
"""
148+
).strip()
149+
test_file.write_text(content)
150+
151+
# Parse the file
152+
parser = EntityParser(tmp_path)
153+
result = await parser.parse_file(test_file)
154+
155+
# Should have defaults
156+
assert result is not None
157+
assert result.frontmatter.type == "note" # Default type
158+
assert result.frontmatter.title == "no_frontmatter" # Default from filename
159+
160+
161+
@pytest.mark.asyncio
162+
async def test_parse_file_with_null_entity_type(tmp_path):
163+
"""Test that files with explicit null entity_type get default (issue #184)."""
164+
# Create a file with null/None entity_type
165+
test_file = tmp_path / "null_type.md"
166+
content = dedent(
167+
"""
168+
---
169+
title: Test File
170+
type: null
171+
---
172+
# Content
173+
"""
174+
).strip()
175+
test_file.write_text(content)
176+
177+
# Parse the file
178+
parser = EntityParser(tmp_path)
179+
result = await parser.parse_file(test_file)
180+
181+
# Should have default type even when explicitly set to null
182+
assert result is not None
183+
assert result.frontmatter.type == "note" # Default type applied
184+
assert result.frontmatter.title == "Test File"
185+
186+
187+
@pytest.mark.asyncio
188+
async def test_parse_valid_file_still_works(tmp_path):
189+
"""Test that valid files with proper frontmatter still parse correctly."""
190+
# Create a valid file
191+
test_file = tmp_path / "valid.md"
192+
content = dedent(
193+
"""
194+
---
195+
title: Valid File
196+
type: knowledge
197+
tags:
198+
- test
199+
- valid
200+
---
201+
# Valid File
202+
203+
This is a properly formatted file.
204+
"""
205+
).strip()
206+
test_file.write_text(content)
207+
208+
# Parse the file
209+
parser = EntityParser(tmp_path)
210+
result = await parser.parse_file(test_file)
211+
212+
# Should parse correctly with all values
213+
assert result is not None
214+
assert result.frontmatter.title == "Valid File"
215+
assert result.frontmatter.type == "knowledge"
216+
assert result.frontmatter.tags == ["test", "valid"]

0 commit comments

Comments
 (0)