Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/basic_memory/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ class BasicMemoryConfig(BaseSettings):
description="Whether to sync changes in real time. default (True)",
)

kebab_filenames: bool = Field(
default=False,
description="Format for generated filenames. False preserves spaces and special chars, True converts them to hyphens for consistency with permalinks",
)

# API connection configuration
api_url: Optional[str] = Field(
default=None,
Expand Down
19 changes: 19 additions & 0 deletions src/basic_memory/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import hashlib
from pathlib import Path
import re
from typing import Any, Dict, Union

import yaml
Expand Down Expand Up @@ -233,3 +234,21 @@ async def update_frontmatter(path: FilePath, updates: Dict[str, Any]) -> str:
error=str(e),
)
raise FileError(f"Failed to update frontmatter: {e}")


def sanitize_for_filename(text: str, replacement: str = "-") -> str:
"""
Sanitize string to be safe for use as a note title
Replaces path separators and other problematic characters
with hyphens.
"""
# replace both POSIX and Windows path separators
text = re.sub(r"[/\\]", replacement, text)

# replace some other problematic chars
text = re.sub(r'[<>:"|?*]', replacement, text)

# compress multiple, repeated replacements
text = re.sub(f"{re.escape(replacement)}+", replacement, text)

return text.strip(replacement)
28 changes: 26 additions & 2 deletions src/basic_memory/schemas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

from pydantic import BaseModel, BeforeValidator, Field, model_validator

from basic_memory.config import ConfigManager
from basic_memory.file_utils import sanitize_for_filename
from basic_memory.utils import generate_permalink


Expand Down Expand Up @@ -190,13 +192,35 @@ class Entity(BaseModel):
default="text/markdown",
)

@property
def safe_title(self) -> str:
"""
A sanitized version of the title, which is safe for use on the filesystem. For example,
a title of "Coupon Enable/Disable Feature" should create a the file as "Coupon Enable-Disable Feature.md"
instead of creating a file named "Disable Feature.md" beneath the "Coupon Enable" directory.

Replaces POSIX and/or Windows style slashes as well as a few other characters that are not safe for filenames.
If kebab_filenames is True, then behavior is consistent with transformation used when generating permalink
strings (e.g. "Coupon Enable/Disable Feature" -> "coupon-enable-disable-feature").
"""
fixed_title = sanitize_for_filename(self.title)

app_config = ConfigManager().config
use_kebab_case = app_config.kebab_filenames

if use_kebab_case:
fixed_title = generate_permalink(file_path=fixed_title, split_extension=False)

return fixed_title
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'll need to do the same for project names. A project named "Hi/There" will fail to be found by the remove_project tool. Even if escaped, the project name when passed to the FastApi endpoint will confuse FastApi and return 404.


@property
def file_path(self):
"""Get the file path for this entity based on its permalink."""
safe_title = self.safe_title
if self.content_type == "text/markdown":
return f"{self.folder}/{self.title}.md" if self.folder else f"{self.title}.md"
return f"{self.folder}/{safe_title}.md" if self.folder else f"{safe_title}.md"
else:
return f"{self.folder}/{self.title}" if self.folder else self.title
return f"{self.folder}/{safe_title}" if self.folder else safe_title

@property
def permalink(self) -> Permalink:
Expand Down
104 changes: 58 additions & 46 deletions src/basic_memory/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __str__(self) -> str: ...
logging.getLogger("opentelemetry.sdk.metrics._internal.instrument").setLevel(logging.ERROR)


def generate_permalink(file_path: Union[Path, str, PathLike]) -> str:
def generate_permalink(file_path: Union[Path, str, PathLike], split_extension: bool = True) -> str:
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most modifications in this file are simply lint fixes, applied automatically on save in my IDE.

The main changes are:

  • adding the optional split_extension parameter
  • retaining the file extension in a variable
  • appending the extension back into the returned string, when applicable

Doing so allows this function to be reused, keeping things DRY, rather than repeating almost the entire function for use when generating titles.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re: lint fixes. As long as ruff is happy.

"""Generate a stable permalink from a file path.

Args:
Expand All @@ -51,53 +51,59 @@ def generate_permalink(file_path: Union[Path, str, PathLike]) -> str:
# Convert Path to string if needed
path_str = Path(str(file_path)).as_posix()

# Remove extension
base = os.path.splitext(path_str)[0]
# Remove extension (for now, possibly)
(base, extension) = os.path.splitext(path_str)

# Check if we have CJK characters that should be preserved
# CJK ranges: \u4e00-\u9fff (CJK Unified Ideographs), \u3000-\u303f (CJK symbols),
# CJK ranges: \u4e00-\u9fff (CJK Unified Ideographs), \u3000-\u303f (CJK symbols),
# \u3400-\u4dbf (CJK Extension A), \uff00-\uffef (Fullwidth forms)
has_cjk_chars = any(
'\u4e00' <= char <= '\u9fff' or
'\u3000' <= char <= '\u303f' or
'\u3400' <= char <= '\u4dbf' or
'\uff00' <= char <= '\uffef'
"\u4e00" <= char <= "\u9fff"
or "\u3000" <= char <= "\u303f"
or "\u3400" <= char <= "\u4dbf"
or "\uff00" <= char <= "\uffef"
for char in base
)

if has_cjk_chars:
# For text with CJK characters, selectively transliterate only Latin accented chars
result = ""
for char in base:
if ('\u4e00' <= char <= '\u9fff' or
'\u3000' <= char <= '\u303f' or
'\u3400' <= char <= '\u4dbf'):
if (
"\u4e00" <= char <= "\u9fff"
or "\u3000" <= char <= "\u303f"
or "\u3400" <= char <= "\u4dbf"
):
# Preserve CJK ideographs and symbols
result += char
elif ('\uff00' <= char <= '\uffef'):
elif "\uff00" <= char <= "\uffef":
# Remove Chinese fullwidth punctuation entirely (like ,!?)
continue
else:
# Transliterate Latin accented characters to ASCII
result += unidecode(char)

# Insert hyphens between CJK and Latin character transitions
# Match: CJK followed by Latin letter/digit, or Latin letter/digit followed by CJK
result = re.sub(r'([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])([a-zA-Z0-9])', r'\1-\2', result)
result = re.sub(r'([a-zA-Z0-9])([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])', r'\1-\2', result)

result = re.sub(
r"([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])([a-zA-Z0-9])", r"\1-\2", result
)
result = re.sub(
r"([a-zA-Z0-9])([\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf])", r"\1-\2", result
)

# Insert dash between camelCase
result = re.sub(r"([a-z0-9])([A-Z])", r"\1-\2", result)

# Convert ASCII letters to lowercase, preserve CJK
lower_text = "".join(c.lower() if c.isascii() and c.isalpha() else c for c in result)

# Replace underscores with hyphens
text_with_hyphens = lower_text.replace("_", "-")

# Remove apostrophes entirely (don't replace with hyphens)
text_no_apostrophes = text_with_hyphens.replace("'", "")

# Replace unsafe chars with hyphens, but preserve CJK characters
clean_text = re.sub(
r"[^a-z0-9\u4e00-\u9fff\u3000-\u303f\u3400-\u4dbf/\-]", "-", text_no_apostrophes
Expand Down Expand Up @@ -129,7 +135,13 @@ def generate_permalink(file_path: Union[Path, str, PathLike]) -> str:
segments = clean_text.split("/")
clean_segments = [s.strip("-") for s in segments]

return "/".join(clean_segments)
return_val = "/".join(clean_segments)

# Append file extension back, if necessary
if not split_extension and extension:
return_val += extension

return return_val


def setup_logging(
Expand Down Expand Up @@ -229,79 +241,79 @@ def normalize_newlines(multiline: str) -> str:
Returns:
A string with normalized newlines native to the platform.
"""
return re.sub(r'\r\n?|\n', os.linesep, multiline)
return re.sub(r"\r\n?|\n", os.linesep, multiline)


def normalize_file_path_for_comparison(file_path: str) -> str:
"""Normalize a file path for conflict detection.

This function normalizes file paths to help detect potential conflicts:
- Converts to lowercase for case-insensitive comparison
- Normalizes Unicode characters
- Handles path separators consistently

Args:
file_path: The file path to normalize

Returns:
Normalized file path for comparison purposes
"""
import unicodedata

# Convert to lowercase for case-insensitive comparison
normalized = file_path.lower()

# Normalize Unicode characters (NFD normalization)
normalized = unicodedata.normalize('NFD', normalized)
normalized = unicodedata.normalize("NFD", normalized)

# Replace path separators with forward slashes
normalized = normalized.replace('\\', '/')
normalized = normalized.replace("\\", "/")

# Remove multiple slashes
normalized = re.sub(r'/+', '/', normalized)
normalized = re.sub(r"/+", "/", normalized)

return normalized


def detect_potential_file_conflicts(file_path: str, existing_paths: List[str]) -> List[str]:
"""Detect potential conflicts between a file path and existing paths.

This function checks for various types of conflicts:
- Case sensitivity differences
- Unicode normalization differences
- Path separator differences
- Permalink generation conflicts

Args:
file_path: The file path to check
existing_paths: List of existing file paths to check against

Returns:
List of existing paths that might conflict with the given file path
"""
conflicts = []

# Normalize the input file path
normalized_input = normalize_file_path_for_comparison(file_path)
input_permalink = generate_permalink(file_path)

for existing_path in existing_paths:
# Skip identical paths
if existing_path == file_path:
continue

# Check for case-insensitive path conflicts
normalized_existing = normalize_file_path_for_comparison(existing_path)
if normalized_input == normalized_existing:
conflicts.append(existing_path)
continue

# Check for permalink conflicts
existing_permalink = generate_permalink(existing_path)
if input_permalink == existing_permalink:
conflicts.append(existing_path)
continue

return conflicts


Expand Down Expand Up @@ -336,13 +348,13 @@ def validate_project_path(path: str, project_path: Path) -> bool:

def ensure_timezone_aware(dt: datetime) -> datetime:
"""Ensure a datetime is timezone-aware using system timezone.

If the datetime is naive, convert it to timezone-aware using the system's local timezone.
If it's already timezone-aware, return it unchanged.

Args:
dt: The datetime to ensure is timezone-aware

Returns:
A timezone-aware datetime
"""
Expand All @@ -351,4 +363,4 @@ def ensure_timezone_aware(dt: datetime) -> datetime:
return dt.astimezone()
else:
# Already timezone-aware
return dt
return dt
64 changes: 64 additions & 0 deletions test-int/mcp/test_write_note_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

import pytest
from fastmcp import Client
from unittest.mock import patch

from basic_memory.config import ConfigManager


@pytest.mark.asyncio
Expand Down Expand Up @@ -282,3 +285,64 @@ async def test_write_note_preserve_frontmatter(mcp_server, app):
assert "# Created note" in response_text
assert "file_path: test/Frontmatter Note.md" in response_text
assert "permalink: test/frontmatter-note" in response_text


@pytest.mark.asyncio
async def test_write_note_kebab_filenames_basic(mcp_server):
"""Test note creation with kebab_filenames=True and invalid filename characters."""

config = ConfigManager().config
curr_config_val = config.kebab_filenames
config.kebab_filenames = True

with patch.object(ConfigManager, "config", config):
async with Client(mcp_server) as client:
result = await client.call_tool(
"write_note",
{
"title": "My Note: With/Invalid|Chars?",
"folder": "my-folder",
"content": "Testing kebab-case and invalid characters.",
"tags": "kebab,invalid,filename",
},
)

assert len(result.content) == 1
response_text = result.content[0].text

# File path and permalink should be kebab-case and sanitized
assert "file_path: my-folder/my-note-with-invalid-chars.md" in response_text
assert "permalink: my-folder/my-note-with-invalid-chars" in response_text

# Restore original config value
config.kebab_filenames = curr_config_val


@pytest.mark.asyncio
async def test_write_note_kebab_filenames_repeat_invalid(mcp_server):
"""Test note creation with multiple invalid and repeated characters."""

config = ConfigManager().config
curr_config_val = config.kebab_filenames
config.kebab_filenames = True

with patch.object(ConfigManager, "config", config):
async with Client(mcp_server) as client:
result = await client.call_tool(
"write_note",
{
"title": 'Crazy<>:"|?*Note/Name',
"folder": "my-folder",
"content": "Should be fully kebab-case and safe.",
"tags": "crazy,filename,test",
},
)

assert len(result.content) == 1
response_text = result.content[0].text

assert "file_path: my-folder/crazy-note-name.md" in response_text
assert "permalink: my-folder/crazy-note-name" in response_text

# Restore original config value
config.kebab_filenames = curr_config_val
Loading
Loading