fix: Fix observation parsing to exclude markdown and wiki links

claude[bot] · jope-bm · claude[bot] · commit 89db4a71fb1f · 2025-08-22T20:40:38.000Z
The is_observation() function was incorrectly identifying markdown links [text](url) and wiki links [[text]] as observations due to overly simple bracket detection. This change: - Uses regex to properly validate observation format: [category] content - Ensures only proper observations with category followed by space and content are detected - Maintains support for tag-only observations (#tag) - Fixes false positives from links at line beginnings Fixes #247 Co-authored-by: jope-bm <jope-bm@users.noreply.github.com>
diff --git a/src/basic_memory/markdown/plugins.py b/src/basic_memory/markdown/plugins.py
@@ -8,56 +8,52 @@
 # Observation handling functions
 def is_observation(token: Token) -> bool:
     """Check if token looks like our observation format."""
+    import re
     if token.type != "inline":  # pragma: no cover
         return False
-
     content = token.content.strip()
     if not content:  # pragma: no cover
         return False
-
     # if it's a markdown_task, return false
     if content.startswith("[ ]") or content.startswith("[x]") or content.startswith("[-]"):
         return False
-
-    has_category = content.startswith("[") and "]" in content
+    # Check for proper observation format: [category] content
+    match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
     has_tags = "#" in content
-    return has_category or has_tags
+    return bool(match) or has_tags
 
 
 def parse_observation(token: Token) -> Dict[str, Any]:
     """Extract observation parts from token."""
-    # Strip bullet point if present
+    import re
     content = token.content.strip()
-
-    # Parse [category]
+    
+    # Parse [category] with regex
+    match = re.match(r"^\[([^\[\]()]+)\]\s+(.+)", content)
     category = None
-    if content.startswith("["):
-        end = content.find("]")
-        if end != -1:
-            category = content[1:end].strip() or None  # Convert empty to None
-            content = content[end + 1 :].strip()
-
+    if match:
+        category = match.group(1).strip()
+        content = match.group(2).strip()
+    
     # Parse (context)
     context = None
     if content.endswith(")"):
         start = content.rfind("(")
         if start != -1:
             context = content[start + 1 : -1].strip()
             content = content[:start].strip()
-
+    
     # Extract tags and keep original content
     tags = []
     parts = content.split()
     for part in parts:
         if part.startswith("#"):
-            # Handle multiple #tags stuck together
             if "#" in part[1:]:
-                # Split on # but keep non-empty tags
                 subtags = [t for t in part.split("#") if t]
                 tags.extend(subtags)
             else:
                 tags.append(part[1:])
-
+    
     return {
         "category": category,
         "content": content,