Skip to content

Commit 2934176

Browse files
authored
fix: utf8 for all file reads/write/open instead of default platform encoding (#91)
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent ac89eb4 commit 2934176

15 files changed

Lines changed: 54 additions & 52 deletions

installer/installer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def update_claude_config():
4444

4545
# Load existing config or create new
4646
if config_path.exists():
47-
config = json.loads(config_path.read_text())
47+
config = json.loads(config_path.read_text(encoding="utf-8"))
4848
else:
4949
config = {"mcpServers": {}}
5050

src/basic_memory/cli/commands/import_memory_json.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ async def process_memory_json(
3838
read_task = progress.add_task("Reading memory.json...", total=None)
3939

4040
# First pass - collect entities and relations
41-
with open(json_path) as f:
41+
with open(json_path, encoding="utf-8") as f:
4242
lines = f.readlines()
4343
progress.update(read_task, total=len(lines))
4444

src/basic_memory/markdown/entity_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ async def parse_file(self, path: Path | str) -> EntityMarkdown:
104104
absolute_path = self.base_path / path
105105

106106
# Parse frontmatter and content using python-frontmatter
107-
file_content = absolute_path.read_text()
107+
file_content = absolute_path.read_text(encoding="utf-8")
108108
return await self.parse_file_content(absolute_path, file_content)
109109

110110
async def parse_file_content(self, absolute_path, file_content):

src/basic_memory/sync/sync_service.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
"""Service for syncing files between filesystem and database."""
22

33
import os
4-
5-
from dataclasses import dataclass
6-
from dataclasses import field
4+
import time
5+
from dataclasses import dataclass, field
76
from datetime import datetime
87
from pathlib import Path
98
from typing import Dict, Optional, Set, Tuple
@@ -18,7 +17,6 @@
1817
from basic_memory.repository import EntityRepository, RelationRepository
1918
from basic_memory.services import EntityService, FileService
2019
from basic_memory.services.search_service import SearchService
21-
import time
2220

2321

2422
@dataclass
@@ -237,7 +235,7 @@ async def sync_markdown_file(self, path: str, new: bool = True) -> Tuple[Optiona
237235
logger.debug(f"Parsing markdown file, path: {path}, new: {new}")
238236

239237
file_path = self.entity_parser.base_path / path
240-
file_content = file_path.read_text()
238+
file_content = file_path.read_text(encoding="utf-8")
241239
file_contains_frontmatter = has_frontmatter(file_content)
242240

243241
# entity markdown will always contain front matter, so it can be used up create/update the entity

tests/api/test_resource_router.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import json
44
from datetime import datetime, timezone
5+
from pathlib import Path
56

67
import pytest
7-
from pathlib import Path
88

99
from basic_memory.schemas import EntityResponse
1010

@@ -346,7 +346,7 @@ async def test_put_resource_new_file(client, test_config, entity_repository, sea
346346
assert full_path.exists()
347347

348348
# Verify file content
349-
file_content = full_path.read_text()
349+
file_content = full_path.read_text(encoding="utf-8")
350350
assert json.loads(file_content) == canvas_data
351351

352352
# Verify entity was created in DB
@@ -420,7 +420,7 @@ async def test_put_resource_update_existing(client, test_config, entity_reposito
420420
assert response.status_code == 200
421421

422422
# Verify file was updated
423-
updated_content = full_path.read_text()
423+
updated_content = full_path.read_text(encoding="utf-8")
424424
assert json.loads(updated_content) == updated_data
425425

426426
# Verify entity was updated

tests/cli/test_import_chatgpt.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
"""Tests for import_chatgpt command."""
22

33
import json
4+
45
import pytest
56
from typer.testing import CliRunner
67

7-
from basic_memory.cli.app import import_app, app
8+
from basic_memory.cli.app import app, import_app
89
from basic_memory.cli.commands import import_chatgpt
910
from basic_memory.config import config
1011
from basic_memory.markdown import EntityParser, MarkdownProcessor
@@ -144,7 +145,7 @@ def sample_conversation_with_hidden():
144145
def sample_chatgpt_json(tmp_path, sample_conversation):
145146
"""Create a sample ChatGPT JSON file."""
146147
json_file = tmp_path / "conversations.json"
147-
with open(json_file, "w") as f:
148+
with open(json_file, "w", encoding="utf-8") as f:
148149
json.dump([sample_conversation], f)
149150
return json_file
150151

@@ -167,7 +168,7 @@ async def test_process_chatgpt_json(tmp_path, sample_chatgpt_json):
167168
assert conv_path.exists()
168169

169170
# Check content formatting
170-
content = conv_path.read_text()
171+
content = conv_path.read_text(encoding="utf-8")
171172
assert "# Test Conversation" in content
172173
assert "### User" in content
173174
assert "Hello, this is a test message" in content
@@ -183,14 +184,14 @@ async def test_process_code_blocks(tmp_path, sample_conversation_with_code):
183184

184185
# Create test file
185186
json_file = tmp_path / "code_test.json"
186-
with open(json_file, "w") as f:
187+
with open(json_file, "w", encoding="utf-8") as f:
187188
json.dump([sample_conversation_with_code], f)
188189

189190
await import_chatgpt.process_chatgpt_json(json_file, tmp_path, processor)
190191

191192
# Check content
192193
conv_path = tmp_path / "20250111-code-test.md"
193-
content = conv_path.read_text()
194+
content = conv_path.read_text(encoding="utf-8")
194195
assert "```python" in content
195196
assert "def hello():" in content
196197
assert "```" in content
@@ -204,7 +205,7 @@ async def test_hidden_messages(tmp_path, sample_conversation_with_hidden):
204205

205206
# Create test file
206207
json_file = tmp_path / "hidden_test.json"
207-
with open(json_file, "w") as f:
208+
with open(json_file, "w", encoding="utf-8") as f:
208209
json.dump([sample_conversation_with_hidden], f)
209210

210211
results = await import_chatgpt.process_chatgpt_json(json_file, tmp_path, processor)
@@ -214,7 +215,7 @@ async def test_hidden_messages(tmp_path, sample_conversation_with_hidden):
214215

215216
# Check content
216217
conv_path = tmp_path / "20250111-hidden-test.md"
217-
content = conv_path.read_text()
218+
content = conv_path.read_text(encoding="utf-8")
218219
assert "Visible message" in content
219220
assert "Hidden message" not in content
220221

tests/cli/test_import_claude_conversations.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for import_claude command (chat conversations)."""
22

33
import json
4+
45
import pytest
56
from typer.testing import CliRunner
67

@@ -44,7 +45,7 @@ def sample_conversation():
4445
def sample_conversations_json(tmp_path, sample_conversation):
4546
"""Create a sample conversations.json file."""
4647
json_file = tmp_path / "conversations.json"
47-
with open(json_file, "w") as f:
48+
with open(json_file, "w", encoding="utf-8") as f:
4849
json.dump([sample_conversation], f)
4950
return json_file
5051

@@ -65,7 +66,7 @@ async def test_process_chat_json(tmp_path, sample_conversations_json):
6566
# Check conversation file
6667
conv_path = tmp_path / "20250105-test-conversation.md"
6768
assert conv_path.exists()
68-
content = conv_path.read_text()
69+
content = conv_path.read_text(encoding="utf-8")
6970

7071
# Check content formatting
7172
assert "### Human" in content
@@ -156,7 +157,7 @@ def test_import_conversation_with_attachments(tmp_path):
156157
}
157158

158159
json_file = tmp_path / "with_attachments.json"
159-
with open(json_file, "w") as f:
160+
with open(json_file, "w", encoding="utf-8") as f:
160161
json.dump([conversation], f)
161162

162163
# Set up environment
@@ -168,7 +169,7 @@ def test_import_conversation_with_attachments(tmp_path):
168169

169170
# Check attachment formatting
170171
conv_path = tmp_path / "conversations/20250105-test-with-attachments.md"
171-
content = conv_path.read_text()
172+
content = conv_path.read_text(encoding="utf-8")
172173
assert "**Attachment: test.txt**" in content
173174
assert "```" in content
174175
assert "Test file content" in content

tests/cli/test_import_claude_projects.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for import_claude_projects command."""
22

33
import json
4+
45
import pytest
56
from typer.testing import CliRunner
67

@@ -43,7 +44,7 @@ def sample_project():
4344
def sample_projects_json(tmp_path, sample_project):
4445
"""Create a sample projects.json file."""
4546
json_file = tmp_path / "projects.json"
46-
with open(json_file, "w") as f:
47+
with open(json_file, "w", encoding="utf-8") as f:
4748
json.dump([sample_project], f)
4849
return json_file
4950

@@ -70,14 +71,14 @@ async def test_process_projects_json(tmp_path, sample_projects_json):
7071
# Check document files
7172
doc1 = project_dir / "docs/test-document.md"
7273
assert doc1.exists()
73-
content1 = doc1.read_text()
74+
content1 = doc1.read_text(encoding="utf-8")
7475
assert "# Test Document" in content1
7576
assert "This is test content" in content1
7677

7778
# Check prompt template
7879
prompt = project_dir / "prompt-template.md"
7980
assert prompt.exists()
80-
prompt_content = prompt.read_text()
81+
prompt_content = prompt.read_text(encoding="utf-8")
8182
assert "# Test Prompt" in prompt_content
8283
assert "This is a test prompt" in prompt_content
8384

@@ -160,7 +161,7 @@ def test_import_project_without_prompt(tmp_path):
160161
}
161162

162163
json_file = tmp_path / "no_prompt.json"
163-
with open(json_file, "w") as f:
164+
with open(json_file, "w", encoding="utf-8") as f:
164165
json.dump([project], f)
165166

166167
# Set up environment

tests/cli/test_import_memory_json.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for import_memory_json command."""
22

33
import json
4+
45
import pytest
56
from typer.testing import CliRunner
67

@@ -35,7 +36,7 @@ def sample_entities():
3536
def sample_json_file(tmp_path, sample_entities):
3637
"""Create a sample memory.json file."""
3738
json_file = tmp_path / "memory.json"
38-
with open(json_file, "w") as f:
39+
with open(json_file, "w", encoding="utf-8") as f:
3940
for entity in sample_entities:
4041
f.write(json.dumps(entity) + "\n")
4142
return json_file
@@ -55,7 +56,7 @@ async def test_process_memory_json(tmp_path, sample_json_file):
5556
# Check file was created
5657
entity_file = tmp_path / "test/test_entity.md"
5758
assert entity_file.exists()
58-
content = entity_file.read_text()
59+
content = entity_file.read_text(encoding="utf-8")
5960
assert "Test observation 1" in content
6061
assert "Test observation 2" in content
6162
assert "test_relation [[related_entity]]" in content
@@ -120,7 +121,7 @@ def test_import_json_command_handle_old_format(tmp_path):
120121
]
121122

122123
json_file = tmp_path / "old_format.json"
123-
with open(json_file, "w") as f:
124+
with open(json_file, "w", encoding="utf-8") as f:
124125
for item in old_format:
125126
f.write(json.dumps(item) + "\n")
126127

tests/markdown/test_markdown_processor.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88

99
import pytest
1010

11-
from basic_memory.markdown.markdown_processor import MarkdownProcessor, DirtyFileError
11+
from basic_memory.markdown.markdown_processor import DirtyFileError, MarkdownProcessor
1212
from basic_memory.markdown.schemas import (
13-
EntityMarkdown,
1413
EntityFrontmatter,
14+
EntityMarkdown,
1515
Observation,
1616
Relation,
1717
)
@@ -41,7 +41,7 @@ async def test_write_new_minimal_file(markdown_processor: MarkdownProcessor, tmp
4141
await markdown_processor.write_file(path, markdown)
4242

4343
# Read back and verify
44-
content = path.read_text()
44+
content = path.read_text(encoding="utf-8")
4545
assert "---" in content # Has frontmatter
4646
assert "type: note" in content
4747
assert "permalink: test" in content
@@ -90,7 +90,7 @@ async def test_write_new_file_with_content(markdown_processor: MarkdownProcessor
9090
await markdown_processor.write_file(path, markdown)
9191

9292
# Read back and verify
93-
content = path.read_text()
93+
content = path.read_text(encoding="utf-8")
9494

9595
# Check content preserved exactly
9696
assert "# Custom Title" in content
@@ -169,7 +169,7 @@ async def test_dirty_file_detection(markdown_processor: MarkdownProcessor, tmp_p
169169
checksum = await markdown_processor.write_file(path, initial)
170170

171171
# Modify file directly
172-
path.write_text(path.read_text() + "\nModified!")
172+
path.write_text(path.read_text(encoding="utf-8") + "\nModified!")
173173

174174
# Try to update with old checksum
175175
update = EntityMarkdown(

0 commit comments

Comments
 (0)