Skip to content
6 changes: 3 additions & 3 deletions zon-format/src/zon/core/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from .exceptions import ZonDecodeError
from ..schema.inference import TypeInferrer
from .utils import parse_value
from .utils import parse_key, parse_value

class ZonDecoder:
"""Decodes ZON format strings into Python data structures.
Expand Down Expand Up @@ -409,7 +409,7 @@ def _parse_zon_node(self, text: str, depth: int = 0) -> Any:
if match:
key_str = match.group(1)
val_str = pair[match.end(1):]
key = parse_value(key_str)
key = parse_key(key_str)
val = self._parse_zon_node(val_str, depth + 1)
obj[key] = val
continue
Expand All @@ -424,7 +424,7 @@ def _parse_zon_node(self, text: str, depth: int = 0) -> Any:
key_str = pair[:colon_pos].strip()
val_str = pair[colon_pos + 1:].strip()

key = parse_value(key_str)
key = parse_key(key_str)
val = self._parse_zon_node(val_str, depth + 1)
obj[key] = val

Expand Down
21 changes: 14 additions & 7 deletions zon-format/src/zon/core/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,17 @@ def _analyze_optimal_sparse_mode(self, values: List[Any]) -> SparseMode:
if len(values) < 5:
return SparseMode.NONE

is_numeric = True
# Only int columns are eligible for delta encoding. Float delta
# encoding cannot satisfy the spec §2.3 MUST round-trip requirement:
# prev + (cur - prev) in IEEE-754 does not preserve the original
# double's bit pattern for arbitrary floats.
is_int_only = True
for val in values:
if not isinstance(val, (int, float)) or isinstance(val, bool):
is_numeric = False
if not isinstance(val, int) or isinstance(val, bool):
is_int_only = False
break
if is_numeric:

if is_int_only:
return SparseMode.DELTA

return SparseMode.NONE
Expand Down Expand Up @@ -534,7 +538,9 @@ def _write_dictionary_table(
lines: List[str] = []

for col, values in dictionaries.items():
lines.append(f"{col}[{len(values)}]:{','.join(values)}")
# Quote dictionary values that contain special characters
formatted_values = [self._format_value(v) for v in values]
lines.append(f"{col}[{len(values)}]:{','.join(formatted_values)}")

dict_cols = list(dictionaries.keys())
regular_cols = [c for c in cols if c not in dictionaries]
Expand Down Expand Up @@ -650,7 +656,8 @@ def _format_zon_node(self, val: Any, visited: Optional[set] = None) -> str:
for k in keys:
v = val[k]
k_str = str(k)
if re.search(r'[,:\{\}\[\]"]', k_str):
# Quote keys with special chars OR boolean/null keywords
if re.search(r'[,:\{\}\[\]"]', k_str) or re.match(r'^(true|false|t|f|null|none|nil)$', k_str, re.IGNORECASE):
k_str = json.dumps(k_str)

v_str = self._format_zon_node(v, visited.copy())
Expand Down
27 changes: 27 additions & 0 deletions zon-format/src/zon/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,33 @@ def quote_string(s: str) -> str:
zon_str = inner.replace('\\"', '""')
return f'"{zon_str}"'

def parse_key(val: str) -> str:
"""Parse a ZON dictionary key string.

Unlike parse_value, this does NOT convert boolean keywords (t, f, true, false)
or null keywords to Python types. Keys are always strings.

Args:
val: The key string to parse

Returns:
The parsed key as a string
"""
trimmed = val.strip()

# Handle quoted strings
if trimmed.startswith('"'):
try:
return json.loads(trimmed)
except json.JSONDecodeError:
if trimmed.endswith('"'):
inner = trimmed[1:-1]
json_str = inner.replace('""', '\\"')
return json.loads(f'"{json_str}"')

return trimmed


def parse_value(val: str) -> Any:
"""Parse a ZON value string into the appropriate Python type.

Expand Down
139 changes: 139 additions & 0 deletions zon-format/tests/unit/test_boolean_keys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Tests for boolean-like dictionary keys.

This module tests the fix for a bug where dictionary keys like "f", "t",
"true", "false", "null" were incorrectly parsed as boolean/null values
instead of being preserved as strings.

"""

import unittest
import zon


class TestBooleanLikeKeys(unittest.TestCase):
"""Test that boolean-like strings are preserved as dictionary keys."""

def test_single_char_f_key(self):
"""Key 'f' should not become False."""
data = {"f": 1}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
self.assertIn("f", decoded)
self.assertNotIn(False, decoded)

def test_single_char_t_key(self):
"""Key 't' should not become True."""
data = {"t": 1}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
self.assertIn("t", decoded)
self.assertNotIn(True, decoded)

def test_nested_f_key(self):
"""Nested key 'f' should not become False."""
data = {"a": {"b": {"c": {"d": {"e": {"f": 1}}}}}}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
# Verify the innermost key is string "f", not False
inner = decoded["a"]["b"]["c"]["d"]["e"]
self.assertIn("f", inner)
self.assertNotIn(False, inner)

def test_true_key(self):
"""Key 'true' should not become True."""
data = {"true": 1}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
self.assertIn("true", decoded)

def test_false_key(self):
"""Key 'false' should not become False."""
data = {"false": 1}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
self.assertIn("false", decoded)

def test_null_key(self):
"""Key 'null' should not become None."""
data = {"null": 1}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
self.assertIn("null", decoded)
self.assertNotIn(None, decoded)

def test_none_key(self):
"""Key 'none' should not become None."""
data = {"none": 1}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
self.assertIn("none", decoded)

def test_nil_key(self):
"""Key 'nil' should not become None."""
data = {"nil": 1}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
self.assertIn("nil", decoded)

def test_case_insensitive_keys(self):
"""Case variants should also be preserved as strings."""
test_cases = [
{"F": 1},
{"T": 1},
{"True": 1},
{"False": 1},
{"TRUE": 1},
{"FALSE": 1},
{"NULL": 1},
{"NONE": 1},
{"Null": 1},
]
for data in test_cases:
with self.subTest(data=data):
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)

def test_multiple_boolean_like_keys(self):
"""Multiple boolean-like keys in same dict."""
data = {"t": 1, "f": 2, "true": 3, "false": 4, "null": 5}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)

def test_boolean_like_keys_with_boolean_values(self):
"""Boolean-like keys with actual boolean values."""
data = {"t": True, "f": False, "null": None}
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)
# Keys should be strings
self.assertIn("t", decoded)
self.assertIn("f", decoded)
self.assertIn("null", decoded)
# Values should be booleans/None
self.assertIs(decoded["t"], True)
self.assertIs(decoded["f"], False)
self.assertIs(decoded["null"], None)

def test_in_table_context(self):
"""Boolean-like keys in tabular data."""
data = [
{"f": 1, "t": 2, "value": "a"},
{"f": 3, "t": 4, "value": "b"},
]
encoded = zon.encode(data)
decoded = zon.decode(encoded)
self.assertEqual(decoded, data)


if __name__ == "__main__":
unittest.main()
34 changes: 32 additions & 2 deletions zon-format/tests/unit/test_delta.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import unittest
from zon import ZonEncoder, ZonDecoder

Expand Down Expand Up @@ -107,12 +108,41 @@ def test_deep_nesting(self):
data = [
{'a': {'b': {'c': {'d': {'e': 1}}}}}
]

encoded = self.encoder.encode(data)
self.assertIn('a.b.c.d.e', encoded)

decoded = self.decoder.decode(encoded)
self.assertEqual(decoded, data)

def test_float_column_roundtrip_is_lossless(self):
"""Float columns must round-trip bit-exactly (spec §2.3 MUST).

Covers multiple precision regimes so a partial fix (e.g. round-to-N)
cannot sneak through.
"""
data = [
{'v': 1865.43}, # benchmark regression case
{'v': 3579.16}, # benchmark regression case
{'v': math.pi}, # 17-sig-digit irrational
{'v': math.e}, # 17-sig-digit irrational
{'v': 0.1 + 0.2}, # classic non-terminating binary: 0.30000000000000004
{'v': -42.5}, # negative, crosses zero in deltas
{'v': 1e-10}, # small exponent
{'v': 1e15}, # large exponent
]

decoded = self.decoder.decode(self.encoder.encode(data))

for original, got in zip(data, decoded):
# repr(float) is the shortest string that round-trips to the same
# double, so repr equality is equivalent to bit equality.
self.assertEqual(
repr(original['v']),
repr(got['v']),
f"float roundtrip lost precision: {original['v']!r} -> {got['v']!r}",
)


if __name__ == "__main__":
unittest.main()
Loading