-
Notifications
You must be signed in to change notification settings - Fork 187
Expand file tree
/
Copy pathtest_entity_parser_error_handling.py
More file actions
216 lines (177 loc) · 6.37 KB
/
test_entity_parser_error_handling.py
File metadata and controls
216 lines (177 loc) · 6.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
"""Tests for entity parser error handling (issues #184 and #185)."""
import pytest
from pathlib import Path
from textwrap import dedent
from basic_memory.markdown.entity_parser import EntityParser
@pytest.mark.asyncio
async def test_parse_file_with_malformed_yaml_frontmatter(tmp_path):
"""Test that files with malformed YAML frontmatter are parsed gracefully (issue #185).
This reproduces the production error where block sequence entries cause YAML parsing to fail.
The parser should handle the error gracefully and treat the file as plain markdown.
"""
# Create a file with malformed YAML frontmatter
test_file = tmp_path / "malformed.md"
content = dedent(
"""
---
title: Group Chat Texts
tags:
- family # Line 5, column 7 - this syntax can fail in certain YAML contexts
- messages
type: note
---
# Group Chat Texts
Content here
"""
).strip()
test_file.write_text(content)
# Parse the file - should not raise YAMLError
parser = EntityParser(tmp_path)
result = await parser.parse_file(test_file)
# Should successfully parse, treating as plain markdown if YAML fails
assert result is not None
# If YAML parsing succeeded, verify expected values
# If it failed, it should have defaults
assert result.frontmatter.title is not None
assert result.frontmatter.type is not None
@pytest.mark.asyncio
async def test_parse_file_with_completely_invalid_yaml(tmp_path):
"""Test that files with completely invalid YAML are handled gracefully (issue #185).
This tests the extreme case where YAML parsing completely fails.
"""
# Create a file with completely broken YAML
test_file = tmp_path / "broken_yaml.md"
content = dedent(
"""
---
title: Invalid YAML
this is: [not, valid, yaml
missing: closing bracket
---
# Content
This file has broken YAML frontmatter.
"""
).strip()
test_file.write_text(content)
# Parse the file - should not raise exception
parser = EntityParser(tmp_path)
result = await parser.parse_file(test_file)
# Should successfully parse with defaults
assert result is not None
assert result.frontmatter.title == "broken_yaml" # Default from filename
assert result.frontmatter.type == "note" # Default type
# Content should include the whole file since frontmatter parsing failed
assert "# Content" in result.content
@pytest.mark.asyncio
async def test_parse_file_without_entity_type(tmp_path):
"""Test that files without entity_type get a default value (issue #184).
This reproduces the NOT NULL constraint error where entity_type was missing.
"""
# Create a file without entity_type in frontmatter
test_file = tmp_path / "no_type.md"
content = dedent(
"""
---
title: The Invisible Weight of Mental Habits
---
# The Invisible Weight of Mental Habits
An article about mental habits.
"""
).strip()
test_file.write_text(content)
# Parse the file
parser = EntityParser(tmp_path)
result = await parser.parse_file(test_file)
# Should have default entity_type
assert result is not None
assert result.frontmatter.type == "note" # Default type applied
assert result.frontmatter.title == "The Invisible Weight of Mental Habits"
@pytest.mark.asyncio
async def test_parse_file_with_empty_frontmatter(tmp_path):
"""Test that files with empty frontmatter get defaults (issue #184)."""
# Create a file with empty frontmatter
test_file = tmp_path / "empty_frontmatter.md"
content = dedent(
"""
---
---
# Content
This file has empty frontmatter.
"""
).strip()
test_file.write_text(content)
# Parse the file
parser = EntityParser(tmp_path)
result = await parser.parse_file(test_file)
# Should have defaults
assert result is not None
assert result.frontmatter.type == "note" # Default type
assert result.frontmatter.title == "empty_frontmatter" # Default from filename
@pytest.mark.asyncio
async def test_parse_file_without_frontmatter(tmp_path):
"""Test that files without any frontmatter get defaults (issue #184)."""
# Create a file with no frontmatter at all
test_file = tmp_path / "no_frontmatter.md"
content = dedent(
"""
# Just Content
This file has no frontmatter at all.
"""
).strip()
test_file.write_text(content)
# Parse the file
parser = EntityParser(tmp_path)
result = await parser.parse_file(test_file)
# Should have defaults
assert result is not None
assert result.frontmatter.type == "note" # Default type
assert result.frontmatter.title == "no_frontmatter" # Default from filename
@pytest.mark.asyncio
async def test_parse_file_with_null_entity_type(tmp_path):
"""Test that files with explicit null entity_type get default (issue #184)."""
# Create a file with null/None entity_type
test_file = tmp_path / "null_type.md"
content = dedent(
"""
---
title: Test File
type: null
---
# Content
"""
).strip()
test_file.write_text(content)
# Parse the file
parser = EntityParser(tmp_path)
result = await parser.parse_file(test_file)
# Should have default type even when explicitly set to null
assert result is not None
assert result.frontmatter.type == "note" # Default type applied
assert result.frontmatter.title == "Test File"
@pytest.mark.asyncio
async def test_parse_valid_file_still_works(tmp_path):
"""Test that valid files with proper frontmatter still parse correctly."""
# Create a valid file
test_file = tmp_path / "valid.md"
content = dedent(
"""
---
title: Valid File
type: knowledge
tags:
- test
- valid
---
# Valid File
This is a properly formatted file.
"""
).strip()
test_file.write_text(content)
# Parse the file
parser = EntityParser(tmp_path)
result = await parser.parse_file(test_file)
# Should parse correctly with all values
assert result is not None
assert result.frontmatter.title == "Valid File"
assert result.frontmatter.type == "knowledge"
assert result.frontmatter.tags == ["test", "valid"]