diff --git a/src/basic_memory/services/entity_service.py b/src/basic_memory/services/entity_service.py index 793552ba1..7ce7085b2 100644 --- a/src/basic_memory/services/entity_service.py +++ b/src/basic_memory/services/entity_service.py @@ -315,7 +315,7 @@ async def update_entity_relations( except IntegrityError: # Unique constraint violation - relation already exists logger.debug( - f"Skipping duplicate relation {rel.type} from {db_entity.permalink} target: {rel.target}, type: {rel.type}" + f"Skipping duplicate relation {rel.type} from {db_entity.permalink} target: {rel.target}" ) continue diff --git a/src/basic_memory/utils.py b/src/basic_memory/utils.py index 18caa98af..2b9fb49a4 100644 --- a/src/basic_memory/utils.py +++ b/src/basic_memory/utils.py @@ -138,15 +138,23 @@ def parse_tags(tags: Union[List[str], str, None]) -> List[str]: Returns: A list of tag strings, or an empty list if no tags + + Note: + This function strips leading '#' characters from tags to prevent + their accumulation when tags are processed multiple times. """ if tags is None: return [] + # Process list of tags if isinstance(tags, list): - return tags + # First strip whitespace, then strip leading '#' characters to prevent accumulation + return [tag.strip().lstrip('#') for tag in tags if tag and tag.strip()] + # Process comma-separated string of tags if isinstance(tags, str): - return [tag.strip() for tag in tags.split(",") if tag.strip()] + # Split by comma, strip whitespace, then strip leading '#' characters + return [tag.strip().lstrip('#') for tag in tags.split(",") if tag and tag.strip()] # For any other type, try to convert to string and parse try: # pragma: no cover diff --git a/tests/utils/test_parse_tags.py b/tests/utils/test_parse_tags.py new file mode 100644 index 000000000..7ec86cfd8 --- /dev/null +++ b/tests/utils/test_parse_tags.py @@ -0,0 +1,51 @@ +"""Tests for parse_tags utility function.""" + +from typing import List, Union + +import pytest + +from basic_memory.utils import parse_tags + + +@pytest.mark.parametrize( + "input_tags,expected", + [ + # None input + (None, []), + # List inputs + ([], []), + (["tag1", "tag2"], ["tag1", "tag2"]), + (["tag1", "", "tag2"], ["tag1", "tag2"]), # Empty tags are filtered + ([" tag1 ", " tag2 "], ["tag1", "tag2"]), # Whitespace is stripped + # String inputs + ("", []), + ("tag1", ["tag1"]), + ("tag1,tag2", ["tag1", "tag2"]), + ("tag1, tag2", ["tag1", "tag2"]), # Whitespace after comma is stripped + ("tag1,,tag2", ["tag1", "tag2"]), # Empty tags are filtered + # Tags with leading '#' characters - these should be stripped + (["#tag1", "##tag2"], ["tag1", "tag2"]), + ("#tag1,##tag2", ["tag1", "tag2"]), + (["tag1", "#tag2", "##tag3"], ["tag1", "tag2", "tag3"]), + # Mixed whitespace and '#' characters + ([" #tag1 ", " ##tag2 "], ["tag1", "tag2"]), + (" #tag1 , ##tag2 ", ["tag1", "tag2"]), + ], +) +def test_parse_tags( + input_tags: Union[List[str], str, None], expected: List[str] +) -> None: + """Test tag parsing with various input formats.""" + result = parse_tags(input_tags) + assert result == expected + + +def test_parse_tags_special_case() -> None: + """Test parsing from non-string, non-list types.""" + # Test with custom object that has __str__ method + class TagObject: + def __str__(self) -> str: + return "tag1,tag2" + + result = parse_tags(TagObject()) # pyright: ignore [reportArgumentType] + assert result == ["tag1", "tag2"] \ No newline at end of file