Skip to content

Commit 89db4a7

Browse files
claude[bot]jope-bm
andcommitted
fix: Fix observation parsing to exclude markdown and wiki links
The is_observation() function was incorrectly identifying markdown links [text](url) and wiki links [[text]] as observations due to overly simple bracket detection. This change: - Uses regex to properly validate observation format: [category] content - Ensures only proper observations with category followed by space and content are detected - Maintains support for tag-only observations (#tag) - Fixes false positives from links at line beginnings Fixes #247 Co-authored-by: jope-bm <jope-bm@users.noreply.github.com>
1 parent 2cd2a62 commit 89db4a7

1 file changed

Lines changed: 14 additions & 18 deletions

File tree

src/basic_memory/markdown/plugins.py

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,56 +8,52 @@
88
# Observation handling functions
99
def is_observation(token: Token) -> bool:
1010
"""Check if token looks like our observation format."""
11+
import re
1112
if token.type != "inline": # pragma: no cover
1213
return False
13-
1414
content = token.content.strip()
1515
if not content: # pragma: no cover
1616
return False
17-
1817
# if it's a markdown_task, return false
1918
if content.startswith("[ ]") or content.startswith("[x]") or content.startswith("[-]"):
2019
return False
21-
22-
has_category = content.startswith("[") and "]" in content
20+
# Check for proper observation format: [category] content
21+
match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
2322
has_tags = "#" in content
24-
return has_category or has_tags
23+
return bool(match) or has_tags
2524

2625

2726
def parse_observation(token: Token) -> Dict[str, Any]:
2827
"""Extract observation parts from token."""
29-
# Strip bullet point if present
28+
import re
3029
content = token.content.strip()
31-
32-
# Parse [category]
30+
31+
# Parse [category] with regex
32+
match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
3333
category = None
34-
if content.startswith("["):
35-
end = content.find("]")
36-
if end != -1:
37-
category = content[1:end].strip() or None # Convert empty to None
38-
content = content[end + 1 :].strip()
39-
34+
if match:
35+
category = match.group(1).strip()
36+
content = match.group(2).strip()
37+
4038
# Parse (context)
4139
context = None
4240
if content.endswith(")"):
4341
start = content.rfind("(")
4442
if start != -1:
4543
context = content[start + 1 : -1].strip()
4644
content = content[:start].strip()
47-
45+
4846
# Extract tags and keep original content
4947
tags = []
5048
parts = content.split()
5149
for part in parts:
5250
if part.startswith("#"):
53-
# Handle multiple #tags stuck together
5451
if "#" in part[1:]:
55-
# Split on # but keep non-empty tags
5652
subtags = [t for t in part.split("#") if t]
5753
tags.extend(subtags)
5854
else:
5955
tags.append(part[1:])
60-
56+
6157
return {
6258
"category": category,
6359
"content": content,

0 commit comments

Comments
 (0)