From b8a7280189e9d6f03a8d8581e001c1ce21191811 Mon Sep 17 00:00:00 2001 From: Hal Wine Date: Sun, 11 Aug 2024 13:11:35 -0700 Subject: [PATCH] Specify file encoding as UTF-8 Windows will default to cp1252, even though all modern text editors use utf-8. --- joplin2obsidian/exporter.py | 5 ++++- joplin2obsidian/parser.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/joplin2obsidian/exporter.py b/joplin2obsidian/exporter.py index 6f35b2d..e4bb3ad 100644 --- a/joplin2obsidian/exporter.py +++ b/joplin2obsidian/exporter.py @@ -21,6 +21,9 @@ def export_md(self, reader: Reader): md_handler = MdHandler(md) dest_md = self.root_dir / md.relative_to(reader.dir) dest_md.parent.mkdir(parents=True, exist_ok=True) - with dest_md.open('w') as f: + # open() defaults to locale.getencoding(), which is cp1252 on + # windows for historical purposes. AFAIK, modern windows uses utf-8 + # everywhere. + with dest_md.open('w', encoding="utf-8") as f: for res_line in md_handler.replace_iter(): f.write(res_line) diff --git a/joplin2obsidian/parser.py b/joplin2obsidian/parser.py index 02c8488..1fe9eaf 100644 --- a/joplin2obsidian/parser.py +++ b/joplin2obsidian/parser.py @@ -9,7 +9,9 @@ def __init__(self, file_path: Path): def replace_iter(self): - with self.file_path.open('r') as f: + # open() defaults to locale.getencoding(), which is cp1252 on windows + # for historical purposes. AFAIK, modern windows uses utf-8 everywhere. + with self.file_path.open('r', encoding="utf-8") as f: for line in f.readlines(): yield self.src_re.sub(self._sub_group, line)