Skip to content

Commit 2a901e3

Browse files
simonwclaude
andauthored
Fix Windows Unicode encoding errors when writing HTML files (#7)
* Fix Windows Unicode encoding errors when writing HTML files On Windows, the default file encoding is cp1252, which cannot encode Unicode characters like emojis. This caused UnicodeEncodeError when writing HTML files containing emoji characters. Fixed by explicitly specifying encoding="utf-8" for all read_text() and write_text() calls on HTML files. * Fix Windows test encoding issues Add encoding="utf-8" to all read_text() and write_text() calls in tests to ensure proper handling of UTF-8 encoded HTML files on Windows. * Set PYTHONUTF8 environment variable for tests For windows, refs: - #6 --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent c420f74 commit 2a901e3

3 files changed

Lines changed: 40 additions & 24 deletions

File tree

.github/workflows/test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,7 @@ jobs:
2424
run: |
2525
pip install -e . --group dev
2626
- name: Run tests
27+
env:
28+
PYTHONUTF8: "1"
2729
run: |
2830
python -m pytest

src/claude_code_transcripts/__init__.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -755,13 +755,13 @@ def inject_gist_preview_js(output_dir):
755755
"""Inject gist preview JavaScript into all HTML files in the output directory."""
756756
output_dir = Path(output_dir)
757757
for html_file in output_dir.glob("*.html"):
758-
content = html_file.read_text()
758+
content = html_file.read_text(encoding="utf-8")
759759
# Insert the gist preview JS before the closing </body> tag
760760
if "</body>" in content:
761761
content = content.replace(
762762
"</body>", f"<script>{GIST_PREVIEW_JS}</script>\n</body>"
763763
)
764-
html_file.write_text(content)
764+
html_file.write_text(content, encoding="utf-8")
765765

766766

767767
def create_gist(output_dir, public=False):
@@ -894,7 +894,9 @@ def generate_html(json_path, output_dir, github_repo=None):
894894
pagination_html=pagination_html,
895895
messages_html="".join(messages_html),
896896
)
897-
(output_dir / f"page-{page_num:03d}.html").write_text(page_content)
897+
(output_dir / f"page-{page_num:03d}.html").write_text(
898+
page_content, encoding="utf-8"
899+
)
898900
print(f"Generated page-{page_num:03d}.html")
899901

900902
# Calculate overall stats and collect all commits for timeline
@@ -977,7 +979,7 @@ def generate_html(json_path, output_dir, github_repo=None):
977979
index_items_html="".join(index_items),
978980
)
979981
index_path = output_dir / "index.html"
980-
index_path.write_text(index_content)
982+
index_path.write_text(index_content, encoding="utf-8")
981983
print(
982984
f"Generated {index_path.resolve()} ({total_convs} prompts, {total_pages} pages)"
983985
)
@@ -1308,7 +1310,9 @@ def generate_html_from_session_data(session_data, output_dir, github_repo=None):
13081310
pagination_html=pagination_html,
13091311
messages_html="".join(messages_html),
13101312
)
1311-
(output_dir / f"page-{page_num:03d}.html").write_text(page_content)
1313+
(output_dir / f"page-{page_num:03d}.html").write_text(
1314+
page_content, encoding="utf-8"
1315+
)
13121316
click.echo(f"Generated page-{page_num:03d}.html")
13131317

13141318
# Calculate overall stats and collect all commits for timeline
@@ -1391,7 +1395,7 @@ def generate_html_from_session_data(session_data, output_dir, github_repo=None):
13911395
index_items_html="".join(index_items),
13921396
)
13931397
index_path = output_dir / "index.html"
1394-
index_path.write_text(index_content)
1398+
index_path.write_text(index_content, encoding="utf-8")
13951399
click.echo(
13961400
f"Generated {index_path.resolve()} ({total_convs} prompts, {total_pages} pages)"
13971401
)

tests/test_generate_html.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,23 +66,23 @@ def test_generates_index_html(self, output_dir, snapshot_html):
6666
fixture_path = Path(__file__).parent / "sample_session.json"
6767
generate_html(fixture_path, output_dir, github_repo="example/project")
6868

69-
index_html = (output_dir / "index.html").read_text()
69+
index_html = (output_dir / "index.html").read_text(encoding="utf-8")
7070
assert index_html == snapshot_html
7171

7272
def test_generates_page_001_html(self, output_dir, snapshot_html):
7373
"""Test page-001.html generation."""
7474
fixture_path = Path(__file__).parent / "sample_session.json"
7575
generate_html(fixture_path, output_dir, github_repo="example/project")
7676

77-
page_html = (output_dir / "page-001.html").read_text()
77+
page_html = (output_dir / "page-001.html").read_text(encoding="utf-8")
7878
assert page_html == snapshot_html
7979

8080
def test_generates_page_002_html(self, output_dir, snapshot_html):
8181
"""Test page-002.html generation (continuation page)."""
8282
fixture_path = Path(__file__).parent / "sample_session.json"
8383
generate_html(fixture_path, output_dir, github_repo="example/project")
8484

85-
page_html = (output_dir / "page-002.html").read_text()
85+
page_html = (output_dir / "page-002.html").read_text(encoding="utf-8")
8686
assert page_html == snapshot_html
8787

8888
def test_github_repo_autodetect(self, sample_session):
@@ -346,16 +346,16 @@ def test_injects_js_into_html_files(self, output_dir):
346346
"""Test that JS is injected before </body> tag."""
347347
# Create test HTML files
348348
(output_dir / "index.html").write_text(
349-
"<html><body><h1>Test</h1></body></html>"
349+
"<html><body><h1>Test</h1></body></html>", encoding="utf-8"
350350
)
351351
(output_dir / "page-001.html").write_text(
352-
"<html><body><p>Page 1</p></body></html>"
352+
"<html><body><p>Page 1</p></body></html>", encoding="utf-8"
353353
)
354354

355355
inject_gist_preview_js(output_dir)
356356

357-
index_content = (output_dir / "index.html").read_text()
358-
page_content = (output_dir / "page-001.html").read_text()
357+
index_content = (output_dir / "index.html").read_text(encoding="utf-8")
358+
page_content = (output_dir / "page-001.html").read_text(encoding="utf-8")
359359

360360
# Check JS was injected
361361
assert GIST_PREVIEW_JS in index_content
@@ -368,11 +368,13 @@ def test_injects_js_into_html_files(self, output_dir):
368368
def test_skips_files_without_body(self, output_dir):
369369
"""Test that files without </body> are not modified."""
370370
original_content = "<html><head><title>Test</title></head></html>"
371-
(output_dir / "fragment.html").write_text(original_content)
371+
(output_dir / "fragment.html").write_text(original_content, encoding="utf-8")
372372

373373
inject_gist_preview_js(output_dir)
374374

375-
assert (output_dir / "fragment.html").read_text() == original_content
375+
assert (output_dir / "fragment.html").read_text(
376+
encoding="utf-8"
377+
) == original_content
376378

377379
def test_handles_empty_directory(self, output_dir):
378380
"""Test that empty directories don't cause errors."""
@@ -389,8 +391,12 @@ def test_creates_gist_successfully(self, output_dir, monkeypatch):
389391
import click
390392

391393
# Create test HTML files
392-
(output_dir / "index.html").write_text("<html><body>Index</body></html>")
393-
(output_dir / "page-001.html").write_text("<html><body>Page</body></html>")
394+
(output_dir / "index.html").write_text(
395+
"<html><body>Index</body></html>", encoding="utf-8"
396+
)
397+
(output_dir / "page-001.html").write_text(
398+
"<html><body>Page</body></html>", encoding="utf-8"
399+
)
394400

395401
# Mock subprocess.run to simulate successful gh gist create
396402
mock_result = subprocess.CompletedProcess(
@@ -425,7 +431,9 @@ def test_raises_on_gh_cli_error(self, output_dir, monkeypatch):
425431
import click
426432

427433
# Create test HTML file
428-
(output_dir / "index.html").write_text("<html><body>Test</body></html>")
434+
(output_dir / "index.html").write_text(
435+
"<html><body>Test</body></html>", encoding="utf-8"
436+
)
429437

430438
# Mock subprocess.run to simulate gh error
431439
def mock_run(*args, **kwargs):
@@ -448,7 +456,9 @@ def test_raises_on_gh_not_found(self, output_dir, monkeypatch):
448456
import click
449457

450458
# Create test HTML file
451-
(output_dir / "index.html").write_text("<html><body>Test</body></html>")
459+
(output_dir / "index.html").write_text(
460+
"<html><body>Test</body></html>", encoding="utf-8"
461+
)
452462

453463
# Mock subprocess.run to simulate gh not found
454464
def mock_run(*args, **kwargs):
@@ -533,7 +543,7 @@ def mock_run(*args, **kwargs):
533543
assert result.exit_code == 0
534544
assert (output_dir / "index.html").exists()
535545
# Verify JS was injected
536-
index_content = (output_dir / "index.html").read_text()
546+
index_content = (output_dir / "index.html").read_text(encoding="utf-8")
537547
assert "gistpreview.github.io" in index_content
538548

539549

@@ -621,13 +631,13 @@ def test_long_text_in_continuation_appears_in_index(self, output_dir):
621631

622632
# Write the session to a temp file
623633
session_file = output_dir / "test_session.json"
624-
session_file.write_text(json.dumps(session_data))
634+
session_file.write_text(json.dumps(session_data), encoding="utf-8")
625635

626636
# Generate HTML
627637
generate_html(session_file, output_dir)
628638

629639
# Read the index.html
630-
index_html = (output_dir / "index.html").read_text()
640+
index_html = (output_dir / "index.html").read_text(encoding="utf-8")
631641

632642
# The long text summary should appear in the index
633643
# This is the bug: currently it doesn't because the continuation
@@ -942,7 +952,7 @@ def test_jsonl_generates_html(self, output_dir, snapshot_html):
942952
fixture_path = Path(__file__).parent / "sample_session.jsonl"
943953
generate_html(fixture_path, output_dir)
944954

945-
index_html = (output_dir / "index.html").read_text()
955+
index_html = (output_dir / "index.html").read_text(encoding="utf-8")
946956
assert "hello world" in index_html.lower()
947957
assert index_html == snapshot_html
948958

@@ -968,7 +978,7 @@ def test_gets_first_user_message_if_no_summary(self, tmp_path):
968978
def test_returns_no_summary_for_empty_file(self, tmp_path):
969979
"""Test handling empty or invalid files."""
970980
jsonl_file = tmp_path / "empty.jsonl"
971-
jsonl_file.write_text("")
981+
jsonl_file.write_text("", encoding="utf-8")
972982
summary = get_session_summary(jsonl_file)
973983
assert summary == "(no summary)"
974984

0 commit comments

Comments
 (0)