Skip to content

Commit fcdd66d

Browse files
ricaskewclaude
andcommitted
fix(toml): swap uiri/toml encoder for tomli_w (issue #439 residual)
The uiri/toml encoder raises IndexError on strings containing certain characters (notably real control characters like chr(27)/ANSI escape). Issue #439 identified this class of bugs and proposed switching to tomli-w. Decode was migrated to stdlib tomllib on 3.11+ previously, but encode still routed through toml.dumps and still crashed. Reproducer on main: >>> benedict({"color": "\033[31m"}).to_toml() IndexError: list index out of range Change: TOMLSerializer.encode() now calls tomli_w.dumps(). Decode path untouched (tomllib on 3.11+, toml on 3.10). toml stays in the [toml] extra guarded by python_version < '3.11' for the 3.10 decode fallback; tomli-w is added unconditionally for encode. Regression tests cover: - ANSI control character (chr(27)) encode + round-trip — was crashing - Issue #439's literal-backslash examples — guard against regression - Round-trip on 7 tricky values (control chars, tabs, unicode, quotes) - Nested dict with embedded control chars - Direct serializer encode/decode path tests/serializers/test_toml_serializer.py replaces the prior TODO stubs with 5 real tests. test_io_dict_toml's "extra not installed" test patches tomli_w_installed (the encode dependency) instead of toml_installed. API note: tomli_w.dumps kwargs differ from toml.dumps (no `encoder=` param; gains `multiline_strings` and `indent`). Callers of `.to_toml(**kwargs)` passing uiri-specific kwargs will hit TypeError and should migrate to tomli-w's kwarg surface. Full suite: 800 tests pass (1 pre-existing skip). Fixes the encode-side failure mode documented in #439. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 253e40d commit fcdd66d

5 files changed

Lines changed: 85 additions & 11 deletions

File tree

benedict/serializers/toml.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
except ModuleNotFoundError:
66
toml_installed = False
77

8+
try:
9+
import tomli_w
10+
11+
tomli_w_installed = True
12+
except ModuleNotFoundError:
13+
tomli_w_installed = False
14+
815
try:
916
# python >= 3.11
1017
import tomllib
@@ -40,6 +47,6 @@ def decode(self, s: str, **kwargs: Any) -> Any:
4047
return data
4148

4249
def encode(self, d: Any, **kwargs: Any) -> str:
43-
require_toml(installed=toml_installed)
44-
data = toml.dumps(dict(d), **kwargs)
50+
require_toml(installed=tomli_w_installed)
51+
data = tomli_w.dumps(dict(d), **kwargs)
4552
return data

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ s3 = [
138138
"boto3 >= 1.24.89, < 2.0.0",
139139
]
140140
toml = [
141-
"toml >= 0.10.2, < 1.0.0",
141+
"toml >= 0.10.2, < 1.0.0; python_version < '3.11'",
142+
"tomli-w >= 1.0.0, < 2.0.0",
142143
]
143144
xls = [
144145
"openpyxl >= 3.0.0, < 4.0.0",

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ python-slugify == 8.0.4
1212
pyyaml == 6.0.3
1313
requests == 2.33.1
1414
toml == 0.10.2
15+
tomli-w == 1.2.0
1516
typing_extensions >= 4.14.1
1617
urllib3 >= 2.6.3
1718
useful-types == 0.2.1

tests/dicts/io/test_io_dict_toml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def test_to_toml_file(self) -> None:
184184
self.assertFileExists(filepath)
185185
self.assertEqual(d, IODict.from_toml(filepath))
186186

187-
@patch("benedict.serializers.toml.toml_installed", False)
187+
@patch("benedict.serializers.toml.tomli_w_installed", False)
188188
def test_to_toml_with_extra_not_installed(self) -> None:
189189
d = IODict(
190190
{
Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,82 @@
11
import unittest
22

3-
# from benedict.serializers import TOMLSerializer
3+
from benedict import benedict
4+
from benedict.dicts.io import IODict
5+
from benedict.serializers import TOMLSerializer
46

57

68
class toml_serializer_test_case(unittest.TestCase):
79
"""
810
This class describes a toml serializer test case.
11+
12+
Regression coverage for issue #439 — the uiri/toml encoder crashes
13+
on certain strings. These tests pin the encode path to a library
14+
that handles them correctly and guard against regression.
915
"""
1016

11-
def test_decode_toml(self) -> None:
12-
# TODO
13-
pass
17+
def test_encode_ansi_control_character(self):
18+
"""Scenario 1 — falsification clause #1.
19+
20+
`benedict({"color": "\033[31m"}).to_toml()` must not raise. On
21+
baseline (uiri/toml) this raises IndexError in the encoder.
22+
"""
23+
payload = {"color": "\033[31m"}
24+
encoded = benedict(payload).to_toml()
25+
self.assertIsInstance(encoded, str)
26+
self.assertGreater(len(encoded), 0)
27+
# Round-trip: decoded value must equal the original string.
28+
decoded = IODict.from_toml(encoded)
29+
self.assertEqual(decoded["color"], "\033[31m")
30+
31+
def test_encode_issue_439_literal_examples(self):
32+
"""Scenario 2 — regression guard for issue #439's cited examples.
33+
34+
These pass on baseline (literal backslashes, not control chars).
35+
Kept so the encoder swap does not silently regress them.
36+
"""
37+
payload = {
38+
"reset": "\\033\\[00;00m",
39+
"lightblue": "\\033\\[01;30m",
40+
}
41+
encoded = benedict(payload).to_toml()
42+
self.assertIsInstance(encoded, str)
43+
decoded = IODict.from_toml(encoded)
44+
self.assertEqual(decoded["reset"], "\\033\\[00;00m")
45+
self.assertEqual(decoded["lightblue"], "\\033\\[01;30m")
46+
47+
def test_roundtrip_control_chars_and_unicode(self):
48+
"""Scenario 4 — round-trip integrity across tricky values."""
49+
payload = {
50+
"ansi_red": "\033[31m",
51+
"ansi_reset": "\033[0m",
52+
"bell": "\x07",
53+
"tab_and_newline": "a\tb\nc",
54+
"unicode_emoji": "benedict 🎩",
55+
"backslash": "path\\to\\file",
56+
"quotes": 'he said "hi"',
57+
}
58+
encoded = benedict(payload).to_toml()
59+
decoded = IODict.from_toml(encoded)
60+
for key, value in payload.items():
61+
self.assertEqual(decoded[key], value, f"round-trip mismatch for {key!r}")
62+
63+
def test_encode_nested_dict(self):
64+
"""Structural coverage — nested dicts still encode correctly."""
65+
payload = {
66+
"section": {
67+
"key": "value",
68+
"control": "\033[31m",
69+
}
70+
}
71+
encoded = benedict(payload).to_toml()
72+
decoded = IODict.from_toml(encoded)
73+
self.assertEqual(decoded["section"]["key"], "value")
74+
self.assertEqual(decoded["section"]["control"], "\033[31m")
1475

15-
def test_encode_toml(self) -> None:
16-
# TODO
17-
pass
76+
def test_serializer_decode_roundtrip(self):
77+
"""Direct serializer-level round-trip (bypasses IODict convenience layer)."""
78+
serializer = TOMLSerializer()
79+
payload = {"color": "\033[31m", "count": 42}
80+
encoded = serializer.encode(payload)
81+
decoded = serializer.decode(encoded)
82+
self.assertEqual(decoded, payload)

0 commit comments

Comments
 (0)