1+ """Tests for entity parser error handling (issues #184 and #185)."""
2+
3+ import pytest
4+ from pathlib import Path
5+ from textwrap import dedent
6+
7+ from basic_memory .markdown .entity_parser import EntityParser
8+
9+
10+ @pytest .mark .asyncio
11+ async def test_parse_file_with_malformed_yaml_frontmatter (tmp_path ):
12+ """Test that files with malformed YAML frontmatter are parsed gracefully (issue #185).
13+
14+ This reproduces the production error where block sequence entries cause YAML parsing to fail.
15+ The parser should handle the error gracefully and treat the file as plain markdown.
16+ """
17+ # Create a file with malformed YAML frontmatter
18+ test_file = tmp_path / "malformed.md"
19+ content = dedent (
20+ """
21+ ---
22+ title: Group Chat Texts
23+ tags:
24+ - family # Line 5, column 7 - this syntax can fail in certain YAML contexts
25+ - messages
26+ type: note
27+ ---
28+ # Group Chat Texts
29+
30+ Content here
31+ """
32+ ).strip ()
33+ test_file .write_text (content )
34+
35+ # Parse the file - should not raise YAMLError
36+ parser = EntityParser (tmp_path )
37+ result = await parser .parse_file (test_file )
38+
39+ # Should successfully parse, treating as plain markdown if YAML fails
40+ assert result is not None
41+ # If YAML parsing succeeded, verify expected values
42+ # If it failed, it should have defaults
43+ assert result .frontmatter .title is not None
44+ assert result .frontmatter .type is not None
45+
46+
47+ @pytest .mark .asyncio
48+ async def test_parse_file_with_completely_invalid_yaml (tmp_path ):
49+ """Test that files with completely invalid YAML are handled gracefully (issue #185).
50+
51+ This tests the extreme case where YAML parsing completely fails.
52+ """
53+ # Create a file with completely broken YAML
54+ test_file = tmp_path / "broken_yaml.md"
55+ content = dedent (
56+ """
57+ ---
58+ title: Invalid YAML
59+ this is: [not, valid, yaml
60+ missing: closing bracket
61+ ---
62+ # Content
63+
64+ This file has broken YAML frontmatter.
65+ """
66+ ).strip ()
67+ test_file .write_text (content )
68+
69+ # Parse the file - should not raise exception
70+ parser = EntityParser (tmp_path )
71+ result = await parser .parse_file (test_file )
72+
73+ # Should successfully parse with defaults
74+ assert result is not None
75+ assert result .frontmatter .title == "broken_yaml" # Default from filename
76+ assert result .frontmatter .type == "note" # Default type
77+ # Content should include the whole file since frontmatter parsing failed
78+ assert "# Content" in result .content
79+
80+
81+ @pytest .mark .asyncio
82+ async def test_parse_file_without_entity_type (tmp_path ):
83+ """Test that files without entity_type get a default value (issue #184).
84+
85+ This reproduces the NOT NULL constraint error where entity_type was missing.
86+ """
87+ # Create a file without entity_type in frontmatter
88+ test_file = tmp_path / "no_type.md"
89+ content = dedent (
90+ """
91+ ---
92+ title: The Invisible Weight of Mental Habits
93+ ---
94+ # The Invisible Weight of Mental Habits
95+
96+ An article about mental habits.
97+ """
98+ ).strip ()
99+ test_file .write_text (content )
100+
101+ # Parse the file
102+ parser = EntityParser (tmp_path )
103+ result = await parser .parse_file (test_file )
104+
105+ # Should have default entity_type
106+ assert result is not None
107+ assert result .frontmatter .type == "note" # Default type applied
108+ assert result .frontmatter .title == "The Invisible Weight of Mental Habits"
109+
110+
111+ @pytest .mark .asyncio
112+ async def test_parse_file_with_empty_frontmatter (tmp_path ):
113+ """Test that files with empty frontmatter get defaults (issue #184)."""
114+ # Create a file with empty frontmatter
115+ test_file = tmp_path / "empty_frontmatter.md"
116+ content = dedent (
117+ """
118+ ---
119+ ---
120+ # Content
121+
122+ This file has empty frontmatter.
123+ """
124+ ).strip ()
125+ test_file .write_text (content )
126+
127+ # Parse the file
128+ parser = EntityParser (tmp_path )
129+ result = await parser .parse_file (test_file )
130+
131+ # Should have defaults
132+ assert result is not None
133+ assert result .frontmatter .type == "note" # Default type
134+ assert result .frontmatter .title == "empty_frontmatter" # Default from filename
135+
136+
137+ @pytest .mark .asyncio
138+ async def test_parse_file_without_frontmatter (tmp_path ):
139+ """Test that files without any frontmatter get defaults (issue #184)."""
140+ # Create a file with no frontmatter at all
141+ test_file = tmp_path / "no_frontmatter.md"
142+ content = dedent (
143+ """
144+ # Just Content
145+
146+ This file has no frontmatter at all.
147+ """
148+ ).strip ()
149+ test_file .write_text (content )
150+
151+ # Parse the file
152+ parser = EntityParser (tmp_path )
153+ result = await parser .parse_file (test_file )
154+
155+ # Should have defaults
156+ assert result is not None
157+ assert result .frontmatter .type == "note" # Default type
158+ assert result .frontmatter .title == "no_frontmatter" # Default from filename
159+
160+
161+ @pytest .mark .asyncio
162+ async def test_parse_file_with_null_entity_type (tmp_path ):
163+ """Test that files with explicit null entity_type get default (issue #184)."""
164+ # Create a file with null/None entity_type
165+ test_file = tmp_path / "null_type.md"
166+ content = dedent (
167+ """
168+ ---
169+ title: Test File
170+ type: null
171+ ---
172+ # Content
173+ """
174+ ).strip ()
175+ test_file .write_text (content )
176+
177+ # Parse the file
178+ parser = EntityParser (tmp_path )
179+ result = await parser .parse_file (test_file )
180+
181+ # Should have default type even when explicitly set to null
182+ assert result is not None
183+ assert result .frontmatter .type == "note" # Default type applied
184+ assert result .frontmatter .title == "Test File"
185+
186+
187+ @pytest .mark .asyncio
188+ async def test_parse_valid_file_still_works (tmp_path ):
189+ """Test that valid files with proper frontmatter still parse correctly."""
190+ # Create a valid file
191+ test_file = tmp_path / "valid.md"
192+ content = dedent (
193+ """
194+ ---
195+ title: Valid File
196+ type: knowledge
197+ tags:
198+ - test
199+ - valid
200+ ---
201+ # Valid File
202+
203+ This is a properly formatted file.
204+ """
205+ ).strip ()
206+ test_file .write_text (content )
207+
208+ # Parse the file
209+ parser = EntityParser (tmp_path )
210+ result = await parser .parse_file (test_file )
211+
212+ # Should parse correctly with all values
213+ assert result is not None
214+ assert result .frontmatter .title == "Valid File"
215+ assert result .frontmatter .type == "knowledge"
216+ assert result .frontmatter .tags == ["test" , "valid" ]
0 commit comments