fix: [BUG] # character accumulation in markdown frontmatter tags prop (#79)

github-actions[bot] · web-flow · commit 6c19c9edf513 · 2025-04-05T23:07:53.000-05:00
diff --git a/src/basic_memory/services/entity_service.py b/src/basic_memory/services/entity_service.py
@@ -315,7 +315,7 @@ async def update_entity_relations(
             except IntegrityError:
                 # Unique constraint violation - relation already exists
                 logger.debug(
-                    f"Skipping duplicate relation {rel.type} from {db_entity.permalink} target: {rel.target}, type: {rel.type}"
+                    f"Skipping duplicate relation {rel.type} from {db_entity.permalink} target: {rel.target}"
                 )
                 continue
 
diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py
@@ -138,15 +138,23 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]:
 
     Returns:
         A list of tag strings, or an empty list if no tags
+    
+    Note:
+        This function strips leading '#' characters from tags to prevent 
+        their accumulation when tags are processed multiple times.
     """
     if tags is None:
         return []
 
+    # Process list of tags
     if isinstance(tags, list):
-        return tags
+        # First strip whitespace, then strip leading '#' characters to prevent accumulation
+        return [tag.strip().lstrip('#') for tag in tags if tag and tag.strip()]
 
+    # Process comma-separated string of tags
     if isinstance(tags, str):
-        return [tag.strip() for tag in tags.split(",") if tag.strip()]
+        # Split by comma, strip whitespace, then strip leading '#' characters
+        return [tag.strip().lstrip('#') for tag in tags.split(",") if tag and tag.strip()]
 
     # For any other type, try to convert to string and parse
     try:  # pragma: no cover
diff --git a/tests/utils/test_parse_tags.py b/tests/utils/test_parse_tags.py
@@ -0,0 +1,51 @@
+"""Tests for parse_tags utility function."""
+
+from typing import List, Union
+
+import pytest
+
+from basic_memory.utils import parse_tags
+
+
+@pytest.mark.parametrize(
+    "input_tags,expected",
+    [
+        # None input
+        (None, []),
+        # List inputs
+        ([], []),
+        (["tag1", "tag2"], ["tag1", "tag2"]),
+        (["tag1", "", "tag2"], ["tag1", "tag2"]),  # Empty tags are filtered
+        ([" tag1 ", " tag2 "], ["tag1", "tag2"]),  # Whitespace is stripped
+        # String inputs
+        ("", []),
+        ("tag1", ["tag1"]),
+        ("tag1,tag2", ["tag1", "tag2"]),
+        ("tag1, tag2", ["tag1", "tag2"]),  # Whitespace after comma is stripped
+        ("tag1,,tag2", ["tag1", "tag2"]),  # Empty tags are filtered
+        # Tags with leading '#' characters - these should be stripped
+        (["#tag1", "##tag2"], ["tag1", "tag2"]),
+        ("#tag1,##tag2", ["tag1", "tag2"]),
+        (["tag1", "#tag2", "##tag3"], ["tag1", "tag2", "tag3"]),
+        # Mixed whitespace and '#' characters
+        ([" #tag1 ", " ##tag2 "], ["tag1", "tag2"]),
+        (" #tag1 , ##tag2 ", ["tag1", "tag2"]),
+    ],
+)
+def test_parse_tags(
+    input_tags: Union[List[str], str, None], expected: List[str]
+) -> None:
+    """Test tag parsing with various input formats."""
+    result = parse_tags(input_tags)
+    assert result == expected
+
+
+def test_parse_tags_special_case() -> None:
+    """Test parsing from non-string, non-list types."""
+    # Test with custom object that has __str__ method
+    class TagObject:
+        def __str__(self) -> str:
+            return "tag1,tag2"
+
+    result = parse_tags(TagObject())  # pyright: ignore [reportArgumentType]
+    assert result == ["tag1", "tag2"]

Original file line number	Diff line number	Diff line change
`@@ -315,7 +315,7 @@ async def update_entity_relations(`
`315`	`315`	`except IntegrityError:`
`316`	`316`	`# Unique constraint violation - relation already exists`
`317`	`317`	`logger.debug(`
`318`		`- f"Skipping duplicate relation {rel.type} from {db_entity.permalink} target: {rel.target}, type: {rel.type}"`
	`318`	`+ f"Skipping duplicate relation {rel.type} from {db_entity.permalink} target: {rel.target}"`
`319`	`319`	`)`
`320`	`320`	`continue`
`321`	`321`