diff --git a/zon-format/src/zon/core/decoder.py b/zon-format/src/zon/core/decoder.py index f6cf525..7744e5d 100644 --- a/zon-format/src/zon/core/decoder.py +++ b/zon-format/src/zon/core/decoder.py @@ -13,7 +13,7 @@ ) from .exceptions import ZonDecodeError from ..schema.inference import TypeInferrer -from .utils import parse_value +from .utils import parse_key, parse_value class ZonDecoder: """Decodes ZON format strings into Python data structures. @@ -409,7 +409,7 @@ def _parse_zon_node(self, text: str, depth: int = 0) -> Any: if match: key_str = match.group(1) val_str = pair[match.end(1):] - key = parse_value(key_str) + key = parse_key(key_str) val = self._parse_zon_node(val_str, depth + 1) obj[key] = val continue @@ -424,7 +424,7 @@ def _parse_zon_node(self, text: str, depth: int = 0) -> Any: key_str = pair[:colon_pos].strip() val_str = pair[colon_pos + 1:].strip() - key = parse_value(key_str) + key = parse_key(key_str) val = self._parse_zon_node(val_str, depth + 1) obj[key] = val diff --git a/zon-format/src/zon/core/encoder.py b/zon-format/src/zon/core/encoder.py index 498222d..e985dc1 100644 --- a/zon-format/src/zon/core/encoder.py +++ b/zon-format/src/zon/core/encoder.py @@ -173,13 +173,17 @@ def _analyze_optimal_sparse_mode(self, values: List[Any]) -> SparseMode: if len(values) < 5: return SparseMode.NONE - is_numeric = True + # Only int columns are eligible for delta encoding. Float delta + # encoding cannot satisfy the spec §2.3 MUST round-trip requirement: + # prev + (cur - prev) in IEEE-754 does not preserve the original + # double's bit pattern for arbitrary floats. + is_int_only = True for val in values: - if not isinstance(val, (int, float)) or isinstance(val, bool): - is_numeric = False + if not isinstance(val, int) or isinstance(val, bool): + is_int_only = False break - - if is_numeric: + + if is_int_only: return SparseMode.DELTA return SparseMode.NONE @@ -534,7 +538,9 @@ def _write_dictionary_table( lines: List[str] = [] for col, values in dictionaries.items(): - lines.append(f"{col}[{len(values)}]:{','.join(values)}") + # Quote dictionary values that contain special characters + formatted_values = [self._format_value(v) for v in values] + lines.append(f"{col}[{len(values)}]:{','.join(formatted_values)}") dict_cols = list(dictionaries.keys()) regular_cols = [c for c in cols if c not in dictionaries] @@ -650,7 +656,8 @@ def _format_zon_node(self, val: Any, visited: Optional[set] = None) -> str: for k in keys: v = val[k] k_str = str(k) - if re.search(r'[,:\{\}\[\]"]', k_str): + # Quote keys with special chars OR boolean/null keywords + if re.search(r'[,:\{\}\[\]"]', k_str) or re.match(r'^(true|false|t|f|null|none|nil)$', k_str, re.IGNORECASE): k_str = json.dumps(k_str) v_str = self._format_zon_node(v, visited.copy()) diff --git a/zon-format/src/zon/core/utils.py b/zon-format/src/zon/core/utils.py index 9e0b932..7b18c86 100644 --- a/zon-format/src/zon/core/utils.py +++ b/zon-format/src/zon/core/utils.py @@ -31,6 +31,33 @@ def quote_string(s: str) -> str: zon_str = inner.replace('\\"', '""') return f'"{zon_str}"' +def parse_key(val: str) -> str: + """Parse a ZON dictionary key string. + + Unlike parse_value, this does NOT convert boolean keywords (t, f, true, false) + or null keywords to Python types. Keys are always strings. + + Args: + val: The key string to parse + + Returns: + The parsed key as a string + """ + trimmed = val.strip() + + # Handle quoted strings + if trimmed.startswith('"'): + try: + return json.loads(trimmed) + except json.JSONDecodeError: + if trimmed.endswith('"'): + inner = trimmed[1:-1] + json_str = inner.replace('""', '\\"') + return json.loads(f'"{json_str}"') + + return trimmed + + def parse_value(val: str) -> Any: """Parse a ZON value string into the appropriate Python type. diff --git a/zon-format/tests/unit/test_boolean_keys.py b/zon-format/tests/unit/test_boolean_keys.py new file mode 100644 index 0000000..3e8fb6b --- /dev/null +++ b/zon-format/tests/unit/test_boolean_keys.py @@ -0,0 +1,139 @@ +"""Tests for boolean-like dictionary keys. + +This module tests the fix for a bug where dictionary keys like "f", "t", +"true", "false", "null" were incorrectly parsed as boolean/null values +instead of being preserved as strings. + +""" + +import unittest +import zon + + +class TestBooleanLikeKeys(unittest.TestCase): + """Test that boolean-like strings are preserved as dictionary keys.""" + + def test_single_char_f_key(self): + """Key 'f' should not become False.""" + data = {"f": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("f", decoded) + self.assertNotIn(False, decoded) + + def test_single_char_t_key(self): + """Key 't' should not become True.""" + data = {"t": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("t", decoded) + self.assertNotIn(True, decoded) + + def test_nested_f_key(self): + """Nested key 'f' should not become False.""" + data = {"a": {"b": {"c": {"d": {"e": {"f": 1}}}}}} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + # Verify the innermost key is string "f", not False + inner = decoded["a"]["b"]["c"]["d"]["e"] + self.assertIn("f", inner) + self.assertNotIn(False, inner) + + def test_true_key(self): + """Key 'true' should not become True.""" + data = {"true": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("true", decoded) + + def test_false_key(self): + """Key 'false' should not become False.""" + data = {"false": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("false", decoded) + + def test_null_key(self): + """Key 'null' should not become None.""" + data = {"null": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("null", decoded) + self.assertNotIn(None, decoded) + + def test_none_key(self): + """Key 'none' should not become None.""" + data = {"none": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("none", decoded) + + def test_nil_key(self): + """Key 'nil' should not become None.""" + data = {"nil": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("nil", decoded) + + def test_case_insensitive_keys(self): + """Case variants should also be preserved as strings.""" + test_cases = [ + {"F": 1}, + {"T": 1}, + {"True": 1}, + {"False": 1}, + {"TRUE": 1}, + {"FALSE": 1}, + {"NULL": 1}, + {"NONE": 1}, + {"Null": 1}, + ] + for data in test_cases: + with self.subTest(data=data): + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + + def test_multiple_boolean_like_keys(self): + """Multiple boolean-like keys in same dict.""" + data = {"t": 1, "f": 2, "true": 3, "false": 4, "null": 5} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + + def test_boolean_like_keys_with_boolean_values(self): + """Boolean-like keys with actual boolean values.""" + data = {"t": True, "f": False, "null": None} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + # Keys should be strings + self.assertIn("t", decoded) + self.assertIn("f", decoded) + self.assertIn("null", decoded) + # Values should be booleans/None + self.assertIs(decoded["t"], True) + self.assertIs(decoded["f"], False) + self.assertIs(decoded["null"], None) + + def test_in_table_context(self): + """Boolean-like keys in tabular data.""" + data = [ + {"f": 1, "t": 2, "value": "a"}, + {"f": 3, "t": 4, "value": "b"}, + ] + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + + +if __name__ == "__main__": + unittest.main() diff --git a/zon-format/tests/unit/test_delta.py b/zon-format/tests/unit/test_delta.py index 47c8168..e966de9 100644 --- a/zon-format/tests/unit/test_delta.py +++ b/zon-format/tests/unit/test_delta.py @@ -1,3 +1,4 @@ +import math import unittest from zon import ZonEncoder, ZonDecoder @@ -107,12 +108,41 @@ def test_deep_nesting(self): data = [ {'a': {'b': {'c': {'d': {'e': 1}}}}} ] - + encoded = self.encoder.encode(data) self.assertIn('a.b.c.d.e', encoded) - + decoded = self.decoder.decode(encoded) self.assertEqual(decoded, data) + def test_float_column_roundtrip_is_lossless(self): + """Float columns must round-trip bit-exactly (spec §2.3 MUST). + + Covers multiple precision regimes so a partial fix (e.g. round-to-N) + cannot sneak through. + """ + data = [ + {'v': 1865.43}, # benchmark regression case + {'v': 3579.16}, # benchmark regression case + {'v': math.pi}, # 17-sig-digit irrational + {'v': math.e}, # 17-sig-digit irrational + {'v': 0.1 + 0.2}, # classic non-terminating binary: 0.30000000000000004 + {'v': -42.5}, # negative, crosses zero in deltas + {'v': 1e-10}, # small exponent + {'v': 1e15}, # large exponent + ] + + decoded = self.decoder.decode(self.encoder.encode(data)) + + for original, got in zip(data, decoded): + # repr(float) is the shortest string that round-trips to the same + # double, so repr equality is equivalent to bit equality. + self.assertEqual( + repr(original['v']), + repr(got['v']), + f"float roundtrip lost precision: {original['v']!r} -> {got['v']!r}", + ) + + if __name__ == "__main__": unittest.main()