From d3b605e7b85eb18a6a19c5e48be6af21158f7ad3 Mon Sep 17 00:00:00 2001 From: Toomas Ormisson Date: Sun, 12 Apr 2026 22:42:27 +0100 Subject: [PATCH 1/7] Fix boolean-like dictionary keys being converted to booleans The encoder now quotes keys that match boolean/null keywords (t, f, true, false, null, none, nil) to prevent the decoder from misinterpreting them. The decoder now uses a new parse_key() function for dictionary keys that preserves them as strings, unlike parse_value() which converts keywords. This fixes a critical round-trip bug where {"f": 1} would decode as {False: 1}. Co-Authored-By: Claude Opus 4.5 --- zon-format/src/zon/core/decoder.py | 6 +- zon-format/src/zon/core/encoder.py | 3 +- zon-format/src/zon/core/utils.py | 27 ++++ zon-format/tests/unit/test_boolean_keys.py | 140 +++++++++++++++++++++ 4 files changed, 172 insertions(+), 4 deletions(-) create mode 100644 zon-format/tests/unit/test_boolean_keys.py diff --git a/zon-format/src/zon/core/decoder.py b/zon-format/src/zon/core/decoder.py index f6cf525..7744e5d 100644 --- a/zon-format/src/zon/core/decoder.py +++ b/zon-format/src/zon/core/decoder.py @@ -13,7 +13,7 @@ ) from .exceptions import ZonDecodeError from ..schema.inference import TypeInferrer -from .utils import parse_value +from .utils import parse_key, parse_value class ZonDecoder: """Decodes ZON format strings into Python data structures. @@ -409,7 +409,7 @@ def _parse_zon_node(self, text: str, depth: int = 0) -> Any: if match: key_str = match.group(1) val_str = pair[match.end(1):] - key = parse_value(key_str) + key = parse_key(key_str) val = self._parse_zon_node(val_str, depth + 1) obj[key] = val continue @@ -424,7 +424,7 @@ def _parse_zon_node(self, text: str, depth: int = 0) -> Any: key_str = pair[:colon_pos].strip() val_str = pair[colon_pos + 1:].strip() - key = parse_value(key_str) + key = parse_key(key_str) val = self._parse_zon_node(val_str, depth + 1) obj[key] = val diff --git a/zon-format/src/zon/core/encoder.py b/zon-format/src/zon/core/encoder.py index 498222d..5dade68 100644 --- a/zon-format/src/zon/core/encoder.py +++ b/zon-format/src/zon/core/encoder.py @@ -650,7 +650,8 @@ def _format_zon_node(self, val: Any, visited: Optional[set] = None) -> str: for k in keys: v = val[k] k_str = str(k) - if re.search(r'[,:\{\}\[\]"]', k_str): + # Quote keys with special chars OR boolean/null keywords + if re.search(r'[,:\{\}\[\]"]', k_str) or re.match(r'^(true|false|t|f|null|none|nil)$', k_str, re.IGNORECASE): k_str = json.dumps(k_str) v_str = self._format_zon_node(v, visited.copy()) diff --git a/zon-format/src/zon/core/utils.py b/zon-format/src/zon/core/utils.py index 9e0b932..7b18c86 100644 --- a/zon-format/src/zon/core/utils.py +++ b/zon-format/src/zon/core/utils.py @@ -31,6 +31,33 @@ def quote_string(s: str) -> str: zon_str = inner.replace('\\"', '""') return f'"{zon_str}"' +def parse_key(val: str) -> str: + """Parse a ZON dictionary key string. + + Unlike parse_value, this does NOT convert boolean keywords (t, f, true, false) + or null keywords to Python types. Keys are always strings. + + Args: + val: The key string to parse + + Returns: + The parsed key as a string + """ + trimmed = val.strip() + + # Handle quoted strings + if trimmed.startswith('"'): + try: + return json.loads(trimmed) + except json.JSONDecodeError: + if trimmed.endswith('"'): + inner = trimmed[1:-1] + json_str = inner.replace('""', '\\"') + return json.loads(f'"{json_str}"') + + return trimmed + + def parse_value(val: str) -> Any: """Parse a ZON value string into the appropriate Python type. diff --git a/zon-format/tests/unit/test_boolean_keys.py b/zon-format/tests/unit/test_boolean_keys.py new file mode 100644 index 0000000..e6d3756 --- /dev/null +++ b/zon-format/tests/unit/test_boolean_keys.py @@ -0,0 +1,140 @@ +"""Tests for boolean-like dictionary keys. + +This module tests the fix for a bug where dictionary keys like "f", "t", +"true", "false", "null" were incorrectly parsed as boolean/null values +instead of being preserved as strings. + +See: https://github.com/ZON-Format/ZON/issues/XXX +""" + +import unittest +import zon + + +class TestBooleanLikeKeys(unittest.TestCase): + """Test that boolean-like strings are preserved as dictionary keys.""" + + def test_single_char_f_key(self): + """Key 'f' should not become False.""" + data = {"f": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("f", decoded) + self.assertNotIn(False, decoded) + + def test_single_char_t_key(self): + """Key 't' should not become True.""" + data = {"t": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("t", decoded) + self.assertNotIn(True, decoded) + + def test_nested_f_key(self): + """Nested key 'f' should not become False.""" + data = {"a": {"b": {"c": {"d": {"e": {"f": 1}}}}}} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + # Verify the innermost key is string "f", not False + inner = decoded["a"]["b"]["c"]["d"]["e"] + self.assertIn("f", inner) + self.assertNotIn(False, inner) + + def test_true_key(self): + """Key 'true' should not become True.""" + data = {"true": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("true", decoded) + + def test_false_key(self): + """Key 'false' should not become False.""" + data = {"false": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("false", decoded) + + def test_null_key(self): + """Key 'null' should not become None.""" + data = {"null": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("null", decoded) + self.assertNotIn(None, decoded) + + def test_none_key(self): + """Key 'none' should not become None.""" + data = {"none": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("none", decoded) + + def test_nil_key(self): + """Key 'nil' should not become None.""" + data = {"nil": 1} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + self.assertIn("nil", decoded) + + def test_case_insensitive_keys(self): + """Case variants should also be preserved as strings.""" + test_cases = [ + {"F": 1}, + {"T": 1}, + {"True": 1}, + {"False": 1}, + {"TRUE": 1}, + {"FALSE": 1}, + {"NULL": 1}, + {"NONE": 1}, + {"Null": 1}, + ] + for data in test_cases: + with self.subTest(data=data): + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + + def test_multiple_boolean_like_keys(self): + """Multiple boolean-like keys in same dict.""" + data = {"t": 1, "f": 2, "true": 3, "false": 4, "null": 5} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + + def test_boolean_like_keys_with_boolean_values(self): + """Boolean-like keys with actual boolean values.""" + data = {"t": True, "f": False, "null": None} + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + # Keys should be strings + self.assertIn("t", decoded) + self.assertIn("f", decoded) + self.assertIn("null", decoded) + # Values should be booleans/None + self.assertIs(decoded["t"], True) + self.assertIs(decoded["f"], False) + self.assertIs(decoded["null"], None) + + def test_in_table_context(self): + """Boolean-like keys in tabular data.""" + data = [ + {"f": 1, "t": 2, "value": "a"}, + {"f": 3, "t": 4, "value": "b"}, + ] + encoded = zon.encode(data) + decoded = zon.decode(encoded) + self.assertEqual(decoded, data) + + +if __name__ == "__main__": + unittest.main() From 5b5c7f83f3b7ae59d5ec916a71154105702c44e5 Mon Sep 17 00:00:00 2001 From: Toomas Ormisson Date: Sun, 12 Apr 2026 23:10:02 +0100 Subject: [PATCH 2/7] Fix dictionary header values not being quoted Values in dictionary compression headers containing commas, colons, or other special characters were not being quoted, causing data corruption on decode (values truncated at delimiter). Now uses _format_value() for consistent quoting of dictionary values. Co-Authored-By: Claude Opus 4.5 --- zon-format/src/zon/core/encoder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/zon-format/src/zon/core/encoder.py b/zon-format/src/zon/core/encoder.py index 5dade68..47f4d5b 100644 --- a/zon-format/src/zon/core/encoder.py +++ b/zon-format/src/zon/core/encoder.py @@ -534,7 +534,9 @@ def _write_dictionary_table( lines: List[str] = [] for col, values in dictionaries.items(): - lines.append(f"{col}[{len(values)}]:{','.join(values)}") + # Quote dictionary values that contain special characters + formatted_values = [self._format_value(v) for v in values] + lines.append(f"{col}[{len(values)}]:{','.join(formatted_values)}") dict_cols = list(dictionaries.keys()) regular_cols = [c for c in cols if c not in dictionaries] From c377beeab017e3fb818204d1d5beacf71e7eb782 Mon Sep 17 00:00:00 2001 From: Toomas Ormisson Date: Mon, 13 Apr 2026 00:45:18 +0100 Subject: [PATCH 3/7] remove null pointer comment --- zon-format/tests/unit/test_boolean_keys.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zon-format/tests/unit/test_boolean_keys.py b/zon-format/tests/unit/test_boolean_keys.py index e6d3756..3e8fb6b 100644 --- a/zon-format/tests/unit/test_boolean_keys.py +++ b/zon-format/tests/unit/test_boolean_keys.py @@ -4,7 +4,6 @@ "true", "false", "null" were incorrectly parsed as boolean/null values instead of being preserved as strings. -See: https://github.com/ZON-Format/ZON/issues/XXX """ import unittest From a0019ce5fda451b845928c754ae0d60ab948d2b7 Mon Sep 17 00:00:00 2001 From: Toomas Ormisson Date: Tue, 14 Apr 2026 23:03:32 +0100 Subject: [PATCH 4/7] 5 failing tests --- zon-format/benchmarks/data/wtq-00080.json | 128 +++++++++++++ zon-format/benchmarks/data/wtq-00085.json | 135 ++++++++++++++ zon-format/benchmarks/data/wtq-00113.json | 128 +++++++++++++ zon-format/benchmarks/data/wtq-00162.json | 218 ++++++++++++++++++++++ zon-format/benchmarks/data/wtq-00168.json | 203 ++++++++++++++++++++ 5 files changed, 812 insertions(+) create mode 100644 zon-format/benchmarks/data/wtq-00080.json create mode 100644 zon-format/benchmarks/data/wtq-00085.json create mode 100644 zon-format/benchmarks/data/wtq-00113.json create mode 100644 zon-format/benchmarks/data/wtq-00162.json create mode 100644 zon-format/benchmarks/data/wtq-00168.json diff --git a/zon-format/benchmarks/data/wtq-00080.json b/zon-format/benchmarks/data/wtq-00080.json new file mode 100644 index 0000000..72abe09 --- /dev/null +++ b/zon-format/benchmarks/data/wtq-00080.json @@ -0,0 +1,128 @@ +[ + { + "Year": "1996", + "Driver": "Karim Hirji", + "Co-driver": "Frank Nekusa", + "Car": "Toyota Celica ST 185", + "Notes": "Run under FIA rules as the Great Lakes Rally, jointly with Club Automobile du Burundi" + }, + { + "Year": "1997", + "Driver": "Chipper Adams", + "Co-driver": "Justin Beyendeza", + "Car": "Toyota Supra", + "Notes": "Run as the Pearl of Africa Uganda Rally - ARC Candidate event" + }, + { + "Year": "1998", + "Driver": "Charles Muhangi", + "Co-driver": "Steven Byaruhanga", + "Car": "Subaru Impreza", + "Notes": "" + }, + { + "Year": "1999", + "Driver": "Chipper Adams", + "Co-driver": "Justin Beyendeza", + "Car": "Toyota Supra", + "Notes": "" + }, + { + "Year": "2000", + "Driver": "Charlie Lubega", + "Co-driver": "Abed Musa", + "Car": "Mitsubishi Lancer Evo 4", + "Notes": "" + }, + { + "Year": "2001", + "Driver": "-", + "Co-driver": "-", + "Car": "-", + "Notes": "Event not run" + }, + { + "Year": "2002", + "Driver": "Johnny Gemmel", + "Co-driver": "Robert Paisley", + "Car": "Subaru Impreza WRX", + "Notes": "" + }, + { + "Year": "2003", + "Driver": "Charlie Lubega", + "Co-driver": "Abed Musa", + "Car": "Mitsubishi Lancer Evo 4", + "Notes": "" + }, + { + "Year": "2004", + "Driver": "Charlie Lubega", + "Co-driver": "Abed Musa", + "Car": "Mitsubishi Lancer Evo 4", + "Notes": "" + }, + { + "Year": "2005", + "Driver": "Riyaz Kurji", + "Co-driver": "Sayed Kadri", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2006", + "Driver": "Riyaz Kurji", + "Co-driver": "Sayed Kadri", + "Car": "Subaru Impreza N8", + "Notes": "Crew run under Kenyan Licence" + }, + { + "Year": "2007", + "Driver": "Conrad Rautenbach", + "Co-driver": "Peter Marsh", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2008", + "Driver": "Jamie Whyte", + "Co-driver": "Phil Archenoul", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2009", + "Driver": "Riyaz Kurji", + "Co-driver": "Sayed Kadri", + "Car": "Subaru Impreza N8", + "Notes": "Price granted posthumously" + }, + { + "Year": "2010", + "Driver": "Jamie Whyte", + "Co-driver": "Phil Archenoul", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2011", + "Driver": "Ponsiano Lwakataka", + "Co-driver": "Musa Nsubuga", + "Car": "Subaru Impreza N8", + "Notes": "" + }, + { + "Year": "2012", + "Driver": "Mohammed Essa", + "Co-driver": "Greg Stead", + "Car": "Subaru Impreza N12", + "Notes": "" + }, + { + "Year": "2013", + "Driver": "Jas Mangat", + "Co-driver": "Gihan de Silva", + "Car": "Mitsubishi Lancer Evo X", + "Notes": "" + } +] diff --git a/zon-format/benchmarks/data/wtq-00085.json b/zon-format/benchmarks/data/wtq-00085.json new file mode 100644 index 0000000..2bffd1a --- /dev/null +++ b/zon-format/benchmarks/data/wtq-00085.json @@ -0,0 +1,135 @@ +[ + { + "Region": "Netherlands", + "Date": "April 19, 2004", + "Label": "Magik Muzik", + "Format": "vinyl, 12\"", + "Catalog": "Magik Muzik 817-5" + }, + { + "Region": "Netherlands", + "Date": "April 19, 2004", + "Label": "Magik Muzik", + "Format": "CD, Maxi", + "Catalog": "Magik Muzik 817-2" + }, + { + "Region": "Netherlands", + "Date": "April 19, 2004", + "Label": "Magik Muzik", + "Format": "CD, Single", + "Catalog": "Magik Muzik 817-1" + }, + { + "Region": "Germany", + "Date": "May, 2004", + "Label": "Kontor Records", + "Format": "CD, Maxi", + "Catalog": "Kontor375" + }, + { + "Region": "Germany", + "Date": "April 8, 2004", + "Label": "Kontor Records", + "Format": "vinyl, 12\"", + "Catalog": "Kontor375" + }, + { + "Region": "Australia", + "Date": "April, 2004", + "Label": "Bang On!", + "Format": "vinyl, 12\"", + "Catalog": "BANG 056" + }, + { + "Region": "Australia", + "Date": "May, 2004", + "Label": "Bang On!", + "Format": "CD, Maxi", + "Catalog": "BANG0096" + }, + { + "Region": "United Kingdom", + "Date": "April 30, 2004", + "Label": "Nebula", + "Format": "CD, Maxi, Enhanced", + "Catalog": "NEBCD058" + }, + { + "Region": "United Kingdom", + "Date": "April 30, 2004", + "Label": "Nebula", + "Format": "CD, Single, Promo", + "Catalog": "NEBCDDJ058" + }, + { + "Region": "United Kingdom", + "Date": "April 30, 2004", + "Label": "Nebula", + "Format": "vinyl, 12\", Promo", + "Catalog": "NEBDJ058" + }, + { + "Region": "United Kingdom", + "Date": "April 30, 2004", + "Label": "Nebula", + "Format": "CD, Maxi", + "Catalog": "NEBT058" + }, + { + "Region": "Switzerland", + "Date": "June, 2004", + "Label": "Sirup", + "Format": "CD, Maxi", + "Catalog": "SIR021-1CD" + }, + { + "Region": "Switzerland", + "Date": "June, 2004", + "Label": "Sirup", + "Format": "CD, Maxi", + "Catalog": "MV-SIR903572" + }, + { + "Region": "Switzerland", + "Date": "June, 2004", + "Label": "Sirup", + "Format": "vinyl, 12\"", + "Catalog": "SIR021-6" + }, + { + "Region": "United States", + "Date": "2004", + "Label": "Nettwerk America", + "Format": "vinyl, 12\"", + "Catalog": "0 6700 33227 1 3" + }, + { + "Region": "United States", + "Date": "May, 2004", + "Label": "Nettwerk America", + "Format": "CD, Single", + "Catalog": "0 6700 33227 2 0" + }, + { + "Region": "France", + "Date": "2004", + "Label": "Independence Records", + "Format": "vinyl, 12\"", + "Catalog": "IR 0408" + }, + { + "Region": "Italy", + "Date": "June 5, 2004", + "Label": "Media Records", + "Format": "vinyl, 12\"", + "Catalog": "MR 2013" + }, + { + "Region": "Scandinavia", + "Date": "2004", + "Label": "Playground Music Scandinavia", + "Format": "CD, Maxi", + "Catalog": "Magik Muzik 817-2" + } +] diff --git a/zon-format/benchmarks/data/wtq-00113.json b/zon-format/benchmarks/data/wtq-00113.json new file mode 100644 index 0000000..72abe09 --- /dev/null +++ b/zon-format/benchmarks/data/wtq-00113.json @@ -0,0 +1,128 @@ +[ + { + "Year": "1996", + "Driver": "Karim Hirji", + "Co-driver": "Frank Nekusa", + "Car": "Toyota Celica ST 185", + "Notes": "Run under FIA rules as the Great Lakes Rally, jointly with Club Automobile du Burundi" + }, + { + "Year": "1997", + "Driver": "Chipper Adams", + "Co-driver": "Justin Beyendeza", + "Car": "Toyota Supra", + "Notes": "Run as the Pearl of Africa Uganda Rally - ARC Candidate event" + }, + { + "Year": "1998", + "Driver": "Charles Muhangi", + "Co-driver": "Steven Byaruhanga", + "Car": "Subaru Impreza", + "Notes": "" + }, + { + "Year": "1999", + "Driver": "Chipper Adams", + "Co-driver": "Justin Beyendeza", + "Car": "Toyota Supra", + "Notes": "" + }, + { + "Year": "2000", + "Driver": "Charlie Lubega", + "Co-driver": "Abed Musa", + "Car": "Mitsubishi Lancer Evo 4", + "Notes": "" + }, + { + "Year": "2001", + "Driver": "-", + "Co-driver": "-", + "Car": "-", + "Notes": "Event not run" + }, + { + "Year": "2002", + "Driver": "Johnny Gemmel", + "Co-driver": "Robert Paisley", + "Car": "Subaru Impreza WRX", + "Notes": "" + }, + { + "Year": "2003", + "Driver": "Charlie Lubega", + "Co-driver": "Abed Musa", + "Car": "Mitsubishi Lancer Evo 4", + "Notes": "" + }, + { + "Year": "2004", + "Driver": "Charlie Lubega", + "Co-driver": "Abed Musa", + "Car": "Mitsubishi Lancer Evo 4", + "Notes": "" + }, + { + "Year": "2005", + "Driver": "Riyaz Kurji", + "Co-driver": "Sayed Kadri", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2006", + "Driver": "Riyaz Kurji", + "Co-driver": "Sayed Kadri", + "Car": "Subaru Impreza N8", + "Notes": "Crew run under Kenyan Licence" + }, + { + "Year": "2007", + "Driver": "Conrad Rautenbach", + "Co-driver": "Peter Marsh", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2008", + "Driver": "Jamie Whyte", + "Co-driver": "Phil Archenoul", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2009", + "Driver": "Riyaz Kurji", + "Co-driver": "Sayed Kadri", + "Car": "Subaru Impreza N8", + "Notes": "Price granted posthumously" + }, + { + "Year": "2010", + "Driver": "Jamie Whyte", + "Co-driver": "Phil Archenoul", + "Car": "Subaru Impreza N10", + "Notes": "" + }, + { + "Year": "2011", + "Driver": "Ponsiano Lwakataka", + "Co-driver": "Musa Nsubuga", + "Car": "Subaru Impreza N8", + "Notes": "" + }, + { + "Year": "2012", + "Driver": "Mohammed Essa", + "Co-driver": "Greg Stead", + "Car": "Subaru Impreza N12", + "Notes": "" + }, + { + "Year": "2013", + "Driver": "Jas Mangat", + "Co-driver": "Gihan de Silva", + "Car": "Mitsubishi Lancer Evo X", + "Notes": "" + } +] diff --git a/zon-format/benchmarks/data/wtq-00162.json b/zon-format/benchmarks/data/wtq-00162.json new file mode 100644 index 0000000..2c2ae16 --- /dev/null +++ b/zon-format/benchmarks/data/wtq-00162.json @@ -0,0 +1,218 @@ +[ + { + "Year": "1997", + "Competition": "Central American Games", + "Venue": "San Pedro Sula, Honduras", + "Position": "2nd", + "Event": "10,000 m", + "Notes": "" + }, + { + "Year": "2001", + "Competition": "Central American and Caribbean Championships", + "Venue": "Guatemala City, Guatemala", + "Position": "6th", + "Event": "5000 m", + "Notes": "14:57.04" + }, + { + "Year": "2001", + "Competition": "Central American and Caribbean Championships", + "Venue": "Guatemala City, Guatemala", + "Position": "3rd", + "Event": "10,000 m", + "Notes": "30:53.52" + }, + { + "Year": "2001", + "Competition": "Central American Games", + "Venue": "Guatemala City, Guatemala", + "Position": "1st", + "Event": "5000 m", + "Notes": "14:28.91 A" + }, + { + "Year": "2001", + "Competition": "Central American Games", + "Venue": "Guatemala City, Guatemala", + "Position": "2nd", + "Event": "10,000 m", + "Notes": "30:43.86 A" + }, + { + "Year": "2002", + "Competition": "Central American Championships", + "Venue": "San José, Costa Rica", + "Position": "2nd", + "Event": "5000 m", + "Notes": "14:30.14" + }, + { + "Year": "2002", + "Competition": "Central American Championships", + "Venue": "San José, Costa Rica", + "Position": "1st", + "Event": "10,000 m", + "Notes": "30:05.31" + }, + { + "Year": "2003", + "Competition": "Central American Championships", + "Venue": "Guatemala City, Guatemala", + "Position": "1st", + "Event": "5000 m", + "Notes": "15:19.35" + }, + { + "Year": "2003", + "Competition": "Central American Championships", + "Venue": "Guatemala City, Guatemala", + "Position": "1st", + "Event": "10,000 m", + "Notes": "30:16.25" + }, + { + "Year": "2003", + "Competition": "Pan American Games", + "Venue": "Santo Domingo, Dominican Republic", + "Position": "–", + "Event": "5000 m", + "Notes": "DNF" + }, + { + "Year": "2003", + "Competition": "Pan American Games", + "Venue": "Santo Domingo, Dominican Republic", + "Position": "7th", + "Event": "10,000 m", + "Notes": "30:26.61" + }, + { + "Year": "2004", + "Competition": "Olympic Games", + "Venue": "Athens, Greece", + "Position": "64th", + "Event": "Marathon", + "Notes": "2:27:13" + }, + { + "Year": "2005", + "Competition": "Central American Championships", + "Venue": "San José, Costa Rica", + "Position": "1st", + "Event": "10,000 m", + "Notes": "30:11.54" + }, + { + "Year": "2006", + "Competition": "Ibero-American Championships", + "Venue": "Ponce, Puerto Rico", + "Position": "8th", + "Event": "5000 m", + "Notes": "14:31.55" + }, + { + "Year": "2006", + "Competition": "Central American and Caribbean Games", + "Venue": "Cartagena, Colombia", + "Position": "6th", + "Event": "5000 m", + "Notes": "14:27.54" + }, + { + "Year": "2006", + "Competition": "Central American and Caribbean Games", + "Venue": "Cartagena, Colombia", + "Position": "4th", + "Event": "10,000 m", + "Notes": "30:09.31" + }, + { + "Year": "2007", + "Competition": "Central American Championships", + "Venue": "San José, Costa Rica", + "Position": "1st", + "Event": "5000 m", + "Notes": "14:42.08" + }, + { + "Year": "2007", + "Competition": "Central American Championships", + "Venue": "San José, Costa Rica", + "Position": "1st", + "Event": "10,000 m", + "Notes": "29:53.61" + }, + { + "Year": "2007", + "Competition": "NACAC Championships", + "Venue": "San Salvador, El Salvador", + "Position": "1st", + "Event": "5000 m", + "Notes": "14:33.31" + }, + { + "Year": "2007", + "Competition": "NACAC Championships", + "Venue": "San Salvador, El Salvador", + "Position": "1st", + "Event": "10,000 m", + "Notes": "29:42.11" + }, + { + "Year": "2007", + "Competition": "Pan American Games", + "Venue": "Rio de Janeiro, Brazil", + "Position": "2nd", + "Event": "Marathon", + "Notes": "2:14:27" + }, + { + "Year": "2008", + "Competition": "Olympic Games", + "Venue": "Beijing, PR China", + "Position": "35th", + "Event": "Marathon", + "Notes": "2:20:15" + }, + { + "Year": "2009", + "Competition": "Central American Championships", + "Venue": "Guatemala City, Guatemala", + "Position": "1st", + "Event": "5000 m", + "Notes": "14:55.44" + }, + { + "Year": "2010", + "Competition": "Central American and Caribbean Games", + "Venue": "Mayagüez, Puerto Rico", + "Position": "1st", + "Event": "Marathon", + "Notes": "2:21:35" + }, + { + "Year": "2010", + "Competition": "Central American Championships", + "Venue": "Guatemala City, Guatemala", + "Position": "1st", + "Event": "10,000 m", + "Notes": "32:06.26" + }, + { + "Year": "2011", + "Competition": "Pan American Games", + "Venue": "Guadalajara, Mexico", + "Position": "5th", + "Event": "Marathon", + "Notes": "2:20:27 SB" + }, + { + "Year": "2012", + "Competition": "Olympic Games", + "Venue": "London, United Kingdom", + "Position": "38th", + "Event": "Marathon", + "Notes": "2:18:23" + } +] diff --git a/zon-format/benchmarks/data/wtq-00168.json b/zon-format/benchmarks/data/wtq-00168.json new file mode 100644 index 0000000..78b0601 --- /dev/null +++ b/zon-format/benchmarks/data/wtq-00168.json @@ -0,0 +1,203 @@ +[ + { + "Year": "1987", + "Competition": "European Junior Championships", + "Venue": "Birmingham, United Kingdom", + "Position": "–", + "Event": "20 km walk", + "Notes": "DQ" + }, + { + "Year": "1990", + "Competition": "European Championships", + "Venue": "Split, Yugoslavia", + "Position": "4th", + "Event": "20 km walk", + "Notes": "1:23.47" + }, + { + "Year": "1991", + "Competition": "World Championships", + "Venue": "Tokyo, Japan", + "Position": "10th", + "Event": "20 km walk", + "Notes": "1:21:32" + }, + { + "Year": "1991", + "Competition": "World Championships", + "Venue": "Tokyo, Japan", + "Position": "–", + "Event": "50 km walk", + "Notes": "DNF" + }, + { + "Year": "1992", + "Competition": "Olympic Games", + "Venue": "Barcelona, Spain", + "Position": "–", + "Event": "20 km walk", + "Notes": "DNF" + }, + { + "Year": "1992", + "Competition": "Olympic Games", + "Venue": "Barcelona, Spain", + "Position": "–", + "Event": "50 km walk", + "Notes": "DQ" + }, + { + "Year": "1993", + "Competition": "World Indoor Championships", + "Venue": "Toronto, Canada", + "Position": "2nd", + "Event": "5000 m walk", + "Notes": "18:35.91" + }, + { + "Year": "1993", + "Competition": "World Championships", + "Venue": "Stuttgart, Germany", + "Position": "–", + "Event": "50 km walk", + "Notes": "DQ" + }, + { + "Year": "1994", + "Competition": "European Championships", + "Venue": "Helsinki, Finland", + "Position": "–", + "Event": "50 km walk", + "Notes": "DQ" + }, + { + "Year": "1994", + "Competition": "European Championships", + "Venue": "Helsinki, Finland", + "Position": "5th", + "Event": "50 km walk", + "Notes": "3:45:57" + }, + { + "Year": "1995", + "Competition": "World Championships", + "Venue": "Gothenburg, Sweden", + "Position": "3rd", + "Event": "50 km walk", + "Notes": "3:45.57" + }, + { + "Year": "1996", + "Competition": "Olympic Games", + "Venue": "Atlanta, United States", + "Position": "8th", + "Event": "20 km walk", + "Notes": "1:21:13" + }, + { + "Year": "1996", + "Competition": "Olympic Games", + "Venue": "Atlanta, United States", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:43:30" + }, + { + "Year": "1997", + "Competition": "World Championships", + "Venue": "Athens, Greece", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:44:46" + }, + { + "Year": "1998", + "Competition": "European Championships", + "Venue": "Budapest, Hungary", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:43:51" + }, + { + "Year": "1999", + "Competition": "World Race Walking Cup", + "Venue": "Mézidon-Canon, France", + "Position": "4th", + "Event": "20 km walk", + "Notes": "1:20:52" + }, + { + "Year": "1999", + "Competition": "World Championships", + "Venue": "Seville, Spain", + "Position": "–", + "Event": "50 km walk", + "Notes": "DQ" + }, + { + "Year": "2000", + "Competition": "European Race Walking Cup", + "Venue": "Eisenhüttenstadt, Germany", + "Position": "1st", + "Event": "20 km walk", + "Notes": "1:18:29" + }, + { + "Year": "2000", + "Competition": "Olympic Games", + "Venue": "Sydney, Australia", + "Position": "1st", + "Event": "20 km walk", + "Notes": "1:18:59 (OR)" + }, + { + "Year": "2000", + "Competition": "Olympic Games", + "Venue": "Sydney, Australia", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:42:22" + }, + { + "Year": "2001", + "Competition": "World Championships", + "Venue": "Edmonton, Canada", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:42.08" + }, + { + "Year": "2001", + "Competition": "Goodwill Games", + "Venue": "Brisbane, Australia", + "Position": "2nd", + "Event": "20,000 m walk", + "Notes": "1:19:52.0" + }, + { + "Year": "2002", + "Competition": "European Championships", + "Venue": "Munich, Germany", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:36:39 (WR)" + }, + { + "Year": "2003", + "Competition": "World Championships", + "Venue": "Paris, France", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:36:03" + }, + { + "Year": "2004", + "Competition": "Olympic Games", + "Venue": "Athens, Greece", + "Position": "1st", + "Event": "50 km walk", + "Notes": "3:38:46" + } +] + From d05f41dc79c3c999e55ef70cee66401de9c66f84 Mon Sep 17 00:00:00 2001 From: Toomas Ormisson Date: Sun, 19 Apr 2026 14:12:04 +0100 Subject: [PATCH 5/7] Add test_roundtrip_benchmarks.py to assert roundtrip accuracy-- 5 of my tests failing +1 existing one. --- .../tests/unit/test_roundtrip_benchmarks.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 zon-format/tests/unit/test_roundtrip_benchmarks.py diff --git a/zon-format/tests/unit/test_roundtrip_benchmarks.py b/zon-format/tests/unit/test_roundtrip_benchmarks.py new file mode 100644 index 0000000..b90edb6 --- /dev/null +++ b/zon-format/tests/unit/test_roundtrip_benchmarks.py @@ -0,0 +1,53 @@ +""" +Roundtrip tests for all benchmark datasets. + +Validates that ZON encoding is lossless by checking encode -> decode -> compare. +""" +import json +import unittest +from pathlib import Path + +import zon + + +BENCHMARKS_DATA_DIR = Path(__file__).parent.parent.parent / 'benchmarks' / 'data' + + +def get_json_files(): + """Return all JSON files in benchmarks/data except questions.""" + return sorted( + f for f in BENCHMARKS_DATA_DIR.glob('*.json') + if 'questions' not in f.name + ) + + +class TestRoundtripBenchmarks(unittest.TestCase): + """Test roundtrip encoding for all benchmark datasets.""" + pass + + +def _make_test(filepath): + def test_roundtrip(self): + with open(filepath) as f: + original = json.load(f) + + encoded = zon.encode(original) + decoded = zon.decode(encoded) + + orig_json = json.dumps(original, sort_keys=True) + dec_json = json.dumps(decoded, sort_keys=True) + + self.assertEqual( + orig_json, dec_json, + f"Roundtrip failed for {filepath.name}" + ) + return test_roundtrip + + +for filepath in get_json_files(): + test_name = f'test_roundtrip_{filepath.stem.replace("-", "_")}' + setattr(TestRoundtripBenchmarks, test_name, _make_test(filepath)) + + +if __name__ == '__main__': + unittest.main() From 6824b8dc131bffc520a34e9deb2c74ed87a12fab Mon Sep 17 00:00:00 2001 From: Toomas Ormisson Date: Sun, 19 Apr 2026 14:43:51 +0100 Subject: [PATCH 6/7] Revert "Add add failure scenatios" --- zon-format/benchmarks/data/wtq-00080.json | 128 ---------- zon-format/benchmarks/data/wtq-00085.json | 135 ----------- zon-format/benchmarks/data/wtq-00113.json | 128 ---------- zon-format/benchmarks/data/wtq-00162.json | 218 ------------------ zon-format/benchmarks/data/wtq-00168.json | 203 ---------------- .../tests/unit/test_roundtrip_benchmarks.py | 53 ----- 6 files changed, 865 deletions(-) delete mode 100644 zon-format/benchmarks/data/wtq-00080.json delete mode 100644 zon-format/benchmarks/data/wtq-00085.json delete mode 100644 zon-format/benchmarks/data/wtq-00113.json delete mode 100644 zon-format/benchmarks/data/wtq-00162.json delete mode 100644 zon-format/benchmarks/data/wtq-00168.json delete mode 100644 zon-format/tests/unit/test_roundtrip_benchmarks.py diff --git a/zon-format/benchmarks/data/wtq-00080.json b/zon-format/benchmarks/data/wtq-00080.json deleted file mode 100644 index 72abe09..0000000 --- a/zon-format/benchmarks/data/wtq-00080.json +++ /dev/null @@ -1,128 +0,0 @@ -[ - { - "Year": "1996", - "Driver": "Karim Hirji", - "Co-driver": "Frank Nekusa", - "Car": "Toyota Celica ST 185", - "Notes": "Run under FIA rules as the Great Lakes Rally, jointly with Club Automobile du Burundi" - }, - { - "Year": "1997", - "Driver": "Chipper Adams", - "Co-driver": "Justin Beyendeza", - "Car": "Toyota Supra", - "Notes": "Run as the Pearl of Africa Uganda Rally - ARC Candidate event" - }, - { - "Year": "1998", - "Driver": "Charles Muhangi", - "Co-driver": "Steven Byaruhanga", - "Car": "Subaru Impreza", - "Notes": "" - }, - { - "Year": "1999", - "Driver": "Chipper Adams", - "Co-driver": "Justin Beyendeza", - "Car": "Toyota Supra", - "Notes": "" - }, - { - "Year": "2000", - "Driver": "Charlie Lubega", - "Co-driver": "Abed Musa", - "Car": "Mitsubishi Lancer Evo 4", - "Notes": "" - }, - { - "Year": "2001", - "Driver": "-", - "Co-driver": "-", - "Car": "-", - "Notes": "Event not run" - }, - { - "Year": "2002", - "Driver": "Johnny Gemmel", - "Co-driver": "Robert Paisley", - "Car": "Subaru Impreza WRX", - "Notes": "" - }, - { - "Year": "2003", - "Driver": "Charlie Lubega", - "Co-driver": "Abed Musa", - "Car": "Mitsubishi Lancer Evo 4", - "Notes": "" - }, - { - "Year": "2004", - "Driver": "Charlie Lubega", - "Co-driver": "Abed Musa", - "Car": "Mitsubishi Lancer Evo 4", - "Notes": "" - }, - { - "Year": "2005", - "Driver": "Riyaz Kurji", - "Co-driver": "Sayed Kadri", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2006", - "Driver": "Riyaz Kurji", - "Co-driver": "Sayed Kadri", - "Car": "Subaru Impreza N8", - "Notes": "Crew run under Kenyan Licence" - }, - { - "Year": "2007", - "Driver": "Conrad Rautenbach", - "Co-driver": "Peter Marsh", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2008", - "Driver": "Jamie Whyte", - "Co-driver": "Phil Archenoul", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2009", - "Driver": "Riyaz Kurji", - "Co-driver": "Sayed Kadri", - "Car": "Subaru Impreza N8", - "Notes": "Price granted posthumously" - }, - { - "Year": "2010", - "Driver": "Jamie Whyte", - "Co-driver": "Phil Archenoul", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2011", - "Driver": "Ponsiano Lwakataka", - "Co-driver": "Musa Nsubuga", - "Car": "Subaru Impreza N8", - "Notes": "" - }, - { - "Year": "2012", - "Driver": "Mohammed Essa", - "Co-driver": "Greg Stead", - "Car": "Subaru Impreza N12", - "Notes": "" - }, - { - "Year": "2013", - "Driver": "Jas Mangat", - "Co-driver": "Gihan de Silva", - "Car": "Mitsubishi Lancer Evo X", - "Notes": "" - } -] diff --git a/zon-format/benchmarks/data/wtq-00085.json b/zon-format/benchmarks/data/wtq-00085.json deleted file mode 100644 index 2bffd1a..0000000 --- a/zon-format/benchmarks/data/wtq-00085.json +++ /dev/null @@ -1,135 +0,0 @@ -[ - { - "Region": "Netherlands", - "Date": "April 19, 2004", - "Label": "Magik Muzik", - "Format": "vinyl, 12\"", - "Catalog": "Magik Muzik 817-5" - }, - { - "Region": "Netherlands", - "Date": "April 19, 2004", - "Label": "Magik Muzik", - "Format": "CD, Maxi", - "Catalog": "Magik Muzik 817-2" - }, - { - "Region": "Netherlands", - "Date": "April 19, 2004", - "Label": "Magik Muzik", - "Format": "CD, Single", - "Catalog": "Magik Muzik 817-1" - }, - { - "Region": "Germany", - "Date": "May, 2004", - "Label": "Kontor Records", - "Format": "CD, Maxi", - "Catalog": "Kontor375" - }, - { - "Region": "Germany", - "Date": "April 8, 2004", - "Label": "Kontor Records", - "Format": "vinyl, 12\"", - "Catalog": "Kontor375" - }, - { - "Region": "Australia", - "Date": "April, 2004", - "Label": "Bang On!", - "Format": "vinyl, 12\"", - "Catalog": "BANG 056" - }, - { - "Region": "Australia", - "Date": "May, 2004", - "Label": "Bang On!", - "Format": "CD, Maxi", - "Catalog": "BANG0096" - }, - { - "Region": "United Kingdom", - "Date": "April 30, 2004", - "Label": "Nebula", - "Format": "CD, Maxi, Enhanced", - "Catalog": "NEBCD058" - }, - { - "Region": "United Kingdom", - "Date": "April 30, 2004", - "Label": "Nebula", - "Format": "CD, Single, Promo", - "Catalog": "NEBCDDJ058" - }, - { - "Region": "United Kingdom", - "Date": "April 30, 2004", - "Label": "Nebula", - "Format": "vinyl, 12\", Promo", - "Catalog": "NEBDJ058" - }, - { - "Region": "United Kingdom", - "Date": "April 30, 2004", - "Label": "Nebula", - "Format": "CD, Maxi", - "Catalog": "NEBT058" - }, - { - "Region": "Switzerland", - "Date": "June, 2004", - "Label": "Sirup", - "Format": "CD, Maxi", - "Catalog": "SIR021-1CD" - }, - { - "Region": "Switzerland", - "Date": "June, 2004", - "Label": "Sirup", - "Format": "CD, Maxi", - "Catalog": "MV-SIR903572" - }, - { - "Region": "Switzerland", - "Date": "June, 2004", - "Label": "Sirup", - "Format": "vinyl, 12\"", - "Catalog": "SIR021-6" - }, - { - "Region": "United States", - "Date": "2004", - "Label": "Nettwerk America", - "Format": "vinyl, 12\"", - "Catalog": "0 6700 33227 1 3" - }, - { - "Region": "United States", - "Date": "May, 2004", - "Label": "Nettwerk America", - "Format": "CD, Single", - "Catalog": "0 6700 33227 2 0" - }, - { - "Region": "France", - "Date": "2004", - "Label": "Independence Records", - "Format": "vinyl, 12\"", - "Catalog": "IR 0408" - }, - { - "Region": "Italy", - "Date": "June 5, 2004", - "Label": "Media Records", - "Format": "vinyl, 12\"", - "Catalog": "MR 2013" - }, - { - "Region": "Scandinavia", - "Date": "2004", - "Label": "Playground Music Scandinavia", - "Format": "CD, Maxi", - "Catalog": "Magik Muzik 817-2" - } -] diff --git a/zon-format/benchmarks/data/wtq-00113.json b/zon-format/benchmarks/data/wtq-00113.json deleted file mode 100644 index 72abe09..0000000 --- a/zon-format/benchmarks/data/wtq-00113.json +++ /dev/null @@ -1,128 +0,0 @@ -[ - { - "Year": "1996", - "Driver": "Karim Hirji", - "Co-driver": "Frank Nekusa", - "Car": "Toyota Celica ST 185", - "Notes": "Run under FIA rules as the Great Lakes Rally, jointly with Club Automobile du Burundi" - }, - { - "Year": "1997", - "Driver": "Chipper Adams", - "Co-driver": "Justin Beyendeza", - "Car": "Toyota Supra", - "Notes": "Run as the Pearl of Africa Uganda Rally - ARC Candidate event" - }, - { - "Year": "1998", - "Driver": "Charles Muhangi", - "Co-driver": "Steven Byaruhanga", - "Car": "Subaru Impreza", - "Notes": "" - }, - { - "Year": "1999", - "Driver": "Chipper Adams", - "Co-driver": "Justin Beyendeza", - "Car": "Toyota Supra", - "Notes": "" - }, - { - "Year": "2000", - "Driver": "Charlie Lubega", - "Co-driver": "Abed Musa", - "Car": "Mitsubishi Lancer Evo 4", - "Notes": "" - }, - { - "Year": "2001", - "Driver": "-", - "Co-driver": "-", - "Car": "-", - "Notes": "Event not run" - }, - { - "Year": "2002", - "Driver": "Johnny Gemmel", - "Co-driver": "Robert Paisley", - "Car": "Subaru Impreza WRX", - "Notes": "" - }, - { - "Year": "2003", - "Driver": "Charlie Lubega", - "Co-driver": "Abed Musa", - "Car": "Mitsubishi Lancer Evo 4", - "Notes": "" - }, - { - "Year": "2004", - "Driver": "Charlie Lubega", - "Co-driver": "Abed Musa", - "Car": "Mitsubishi Lancer Evo 4", - "Notes": "" - }, - { - "Year": "2005", - "Driver": "Riyaz Kurji", - "Co-driver": "Sayed Kadri", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2006", - "Driver": "Riyaz Kurji", - "Co-driver": "Sayed Kadri", - "Car": "Subaru Impreza N8", - "Notes": "Crew run under Kenyan Licence" - }, - { - "Year": "2007", - "Driver": "Conrad Rautenbach", - "Co-driver": "Peter Marsh", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2008", - "Driver": "Jamie Whyte", - "Co-driver": "Phil Archenoul", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2009", - "Driver": "Riyaz Kurji", - "Co-driver": "Sayed Kadri", - "Car": "Subaru Impreza N8", - "Notes": "Price granted posthumously" - }, - { - "Year": "2010", - "Driver": "Jamie Whyte", - "Co-driver": "Phil Archenoul", - "Car": "Subaru Impreza N10", - "Notes": "" - }, - { - "Year": "2011", - "Driver": "Ponsiano Lwakataka", - "Co-driver": "Musa Nsubuga", - "Car": "Subaru Impreza N8", - "Notes": "" - }, - { - "Year": "2012", - "Driver": "Mohammed Essa", - "Co-driver": "Greg Stead", - "Car": "Subaru Impreza N12", - "Notes": "" - }, - { - "Year": "2013", - "Driver": "Jas Mangat", - "Co-driver": "Gihan de Silva", - "Car": "Mitsubishi Lancer Evo X", - "Notes": "" - } -] diff --git a/zon-format/benchmarks/data/wtq-00162.json b/zon-format/benchmarks/data/wtq-00162.json deleted file mode 100644 index 2c2ae16..0000000 --- a/zon-format/benchmarks/data/wtq-00162.json +++ /dev/null @@ -1,218 +0,0 @@ -[ - { - "Year": "1997", - "Competition": "Central American Games", - "Venue": "San Pedro Sula, Honduras", - "Position": "2nd", - "Event": "10,000 m", - "Notes": "" - }, - { - "Year": "2001", - "Competition": "Central American and Caribbean Championships", - "Venue": "Guatemala City, Guatemala", - "Position": "6th", - "Event": "5000 m", - "Notes": "14:57.04" - }, - { - "Year": "2001", - "Competition": "Central American and Caribbean Championships", - "Venue": "Guatemala City, Guatemala", - "Position": "3rd", - "Event": "10,000 m", - "Notes": "30:53.52" - }, - { - "Year": "2001", - "Competition": "Central American Games", - "Venue": "Guatemala City, Guatemala", - "Position": "1st", - "Event": "5000 m", - "Notes": "14:28.91 A" - }, - { - "Year": "2001", - "Competition": "Central American Games", - "Venue": "Guatemala City, Guatemala", - "Position": "2nd", - "Event": "10,000 m", - "Notes": "30:43.86 A" - }, - { - "Year": "2002", - "Competition": "Central American Championships", - "Venue": "San José, Costa Rica", - "Position": "2nd", - "Event": "5000 m", - "Notes": "14:30.14" - }, - { - "Year": "2002", - "Competition": "Central American Championships", - "Venue": "San José, Costa Rica", - "Position": "1st", - "Event": "10,000 m", - "Notes": "30:05.31" - }, - { - "Year": "2003", - "Competition": "Central American Championships", - "Venue": "Guatemala City, Guatemala", - "Position": "1st", - "Event": "5000 m", - "Notes": "15:19.35" - }, - { - "Year": "2003", - "Competition": "Central American Championships", - "Venue": "Guatemala City, Guatemala", - "Position": "1st", - "Event": "10,000 m", - "Notes": "30:16.25" - }, - { - "Year": "2003", - "Competition": "Pan American Games", - "Venue": "Santo Domingo, Dominican Republic", - "Position": "–", - "Event": "5000 m", - "Notes": "DNF" - }, - { - "Year": "2003", - "Competition": "Pan American Games", - "Venue": "Santo Domingo, Dominican Republic", - "Position": "7th", - "Event": "10,000 m", - "Notes": "30:26.61" - }, - { - "Year": "2004", - "Competition": "Olympic Games", - "Venue": "Athens, Greece", - "Position": "64th", - "Event": "Marathon", - "Notes": "2:27:13" - }, - { - "Year": "2005", - "Competition": "Central American Championships", - "Venue": "San José, Costa Rica", - "Position": "1st", - "Event": "10,000 m", - "Notes": "30:11.54" - }, - { - "Year": "2006", - "Competition": "Ibero-American Championships", - "Venue": "Ponce, Puerto Rico", - "Position": "8th", - "Event": "5000 m", - "Notes": "14:31.55" - }, - { - "Year": "2006", - "Competition": "Central American and Caribbean Games", - "Venue": "Cartagena, Colombia", - "Position": "6th", - "Event": "5000 m", - "Notes": "14:27.54" - }, - { - "Year": "2006", - "Competition": "Central American and Caribbean Games", - "Venue": "Cartagena, Colombia", - "Position": "4th", - "Event": "10,000 m", - "Notes": "30:09.31" - }, - { - "Year": "2007", - "Competition": "Central American Championships", - "Venue": "San José, Costa Rica", - "Position": "1st", - "Event": "5000 m", - "Notes": "14:42.08" - }, - { - "Year": "2007", - "Competition": "Central American Championships", - "Venue": "San José, Costa Rica", - "Position": "1st", - "Event": "10,000 m", - "Notes": "29:53.61" - }, - { - "Year": "2007", - "Competition": "NACAC Championships", - "Venue": "San Salvador, El Salvador", - "Position": "1st", - "Event": "5000 m", - "Notes": "14:33.31" - }, - { - "Year": "2007", - "Competition": "NACAC Championships", - "Venue": "San Salvador, El Salvador", - "Position": "1st", - "Event": "10,000 m", - "Notes": "29:42.11" - }, - { - "Year": "2007", - "Competition": "Pan American Games", - "Venue": "Rio de Janeiro, Brazil", - "Position": "2nd", - "Event": "Marathon", - "Notes": "2:14:27" - }, - { - "Year": "2008", - "Competition": "Olympic Games", - "Venue": "Beijing, PR China", - "Position": "35th", - "Event": "Marathon", - "Notes": "2:20:15" - }, - { - "Year": "2009", - "Competition": "Central American Championships", - "Venue": "Guatemala City, Guatemala", - "Position": "1st", - "Event": "5000 m", - "Notes": "14:55.44" - }, - { - "Year": "2010", - "Competition": "Central American and Caribbean Games", - "Venue": "Mayagüez, Puerto Rico", - "Position": "1st", - "Event": "Marathon", - "Notes": "2:21:35" - }, - { - "Year": "2010", - "Competition": "Central American Championships", - "Venue": "Guatemala City, Guatemala", - "Position": "1st", - "Event": "10,000 m", - "Notes": "32:06.26" - }, - { - "Year": "2011", - "Competition": "Pan American Games", - "Venue": "Guadalajara, Mexico", - "Position": "5th", - "Event": "Marathon", - "Notes": "2:20:27 SB" - }, - { - "Year": "2012", - "Competition": "Olympic Games", - "Venue": "London, United Kingdom", - "Position": "38th", - "Event": "Marathon", - "Notes": "2:18:23" - } -] diff --git a/zon-format/benchmarks/data/wtq-00168.json b/zon-format/benchmarks/data/wtq-00168.json deleted file mode 100644 index 78b0601..0000000 --- a/zon-format/benchmarks/data/wtq-00168.json +++ /dev/null @@ -1,203 +0,0 @@ -[ - { - "Year": "1987", - "Competition": "European Junior Championships", - "Venue": "Birmingham, United Kingdom", - "Position": "–", - "Event": "20 km walk", - "Notes": "DQ" - }, - { - "Year": "1990", - "Competition": "European Championships", - "Venue": "Split, Yugoslavia", - "Position": "4th", - "Event": "20 km walk", - "Notes": "1:23.47" - }, - { - "Year": "1991", - "Competition": "World Championships", - "Venue": "Tokyo, Japan", - "Position": "10th", - "Event": "20 km walk", - "Notes": "1:21:32" - }, - { - "Year": "1991", - "Competition": "World Championships", - "Venue": "Tokyo, Japan", - "Position": "–", - "Event": "50 km walk", - "Notes": "DNF" - }, - { - "Year": "1992", - "Competition": "Olympic Games", - "Venue": "Barcelona, Spain", - "Position": "–", - "Event": "20 km walk", - "Notes": "DNF" - }, - { - "Year": "1992", - "Competition": "Olympic Games", - "Venue": "Barcelona, Spain", - "Position": "–", - "Event": "50 km walk", - "Notes": "DQ" - }, - { - "Year": "1993", - "Competition": "World Indoor Championships", - "Venue": "Toronto, Canada", - "Position": "2nd", - "Event": "5000 m walk", - "Notes": "18:35.91" - }, - { - "Year": "1993", - "Competition": "World Championships", - "Venue": "Stuttgart, Germany", - "Position": "–", - "Event": "50 km walk", - "Notes": "DQ" - }, - { - "Year": "1994", - "Competition": "European Championships", - "Venue": "Helsinki, Finland", - "Position": "–", - "Event": "50 km walk", - "Notes": "DQ" - }, - { - "Year": "1994", - "Competition": "European Championships", - "Venue": "Helsinki, Finland", - "Position": "5th", - "Event": "50 km walk", - "Notes": "3:45:57" - }, - { - "Year": "1995", - "Competition": "World Championships", - "Venue": "Gothenburg, Sweden", - "Position": "3rd", - "Event": "50 km walk", - "Notes": "3:45.57" - }, - { - "Year": "1996", - "Competition": "Olympic Games", - "Venue": "Atlanta, United States", - "Position": "8th", - "Event": "20 km walk", - "Notes": "1:21:13" - }, - { - "Year": "1996", - "Competition": "Olympic Games", - "Venue": "Atlanta, United States", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:43:30" - }, - { - "Year": "1997", - "Competition": "World Championships", - "Venue": "Athens, Greece", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:44:46" - }, - { - "Year": "1998", - "Competition": "European Championships", - "Venue": "Budapest, Hungary", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:43:51" - }, - { - "Year": "1999", - "Competition": "World Race Walking Cup", - "Venue": "Mézidon-Canon, France", - "Position": "4th", - "Event": "20 km walk", - "Notes": "1:20:52" - }, - { - "Year": "1999", - "Competition": "World Championships", - "Venue": "Seville, Spain", - "Position": "–", - "Event": "50 km walk", - "Notes": "DQ" - }, - { - "Year": "2000", - "Competition": "European Race Walking Cup", - "Venue": "Eisenhüttenstadt, Germany", - "Position": "1st", - "Event": "20 km walk", - "Notes": "1:18:29" - }, - { - "Year": "2000", - "Competition": "Olympic Games", - "Venue": "Sydney, Australia", - "Position": "1st", - "Event": "20 km walk", - "Notes": "1:18:59 (OR)" - }, - { - "Year": "2000", - "Competition": "Olympic Games", - "Venue": "Sydney, Australia", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:42:22" - }, - { - "Year": "2001", - "Competition": "World Championships", - "Venue": "Edmonton, Canada", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:42.08" - }, - { - "Year": "2001", - "Competition": "Goodwill Games", - "Venue": "Brisbane, Australia", - "Position": "2nd", - "Event": "20,000 m walk", - "Notes": "1:19:52.0" - }, - { - "Year": "2002", - "Competition": "European Championships", - "Venue": "Munich, Germany", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:36:39 (WR)" - }, - { - "Year": "2003", - "Competition": "World Championships", - "Venue": "Paris, France", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:36:03" - }, - { - "Year": "2004", - "Competition": "Olympic Games", - "Venue": "Athens, Greece", - "Position": "1st", - "Event": "50 km walk", - "Notes": "3:38:46" - } -] - diff --git a/zon-format/tests/unit/test_roundtrip_benchmarks.py b/zon-format/tests/unit/test_roundtrip_benchmarks.py deleted file mode 100644 index b90edb6..0000000 --- a/zon-format/tests/unit/test_roundtrip_benchmarks.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Roundtrip tests for all benchmark datasets. - -Validates that ZON encoding is lossless by checking encode -> decode -> compare. -""" -import json -import unittest -from pathlib import Path - -import zon - - -BENCHMARKS_DATA_DIR = Path(__file__).parent.parent.parent / 'benchmarks' / 'data' - - -def get_json_files(): - """Return all JSON files in benchmarks/data except questions.""" - return sorted( - f for f in BENCHMARKS_DATA_DIR.glob('*.json') - if 'questions' not in f.name - ) - - -class TestRoundtripBenchmarks(unittest.TestCase): - """Test roundtrip encoding for all benchmark datasets.""" - pass - - -def _make_test(filepath): - def test_roundtrip(self): - with open(filepath) as f: - original = json.load(f) - - encoded = zon.encode(original) - decoded = zon.decode(encoded) - - orig_json = json.dumps(original, sort_keys=True) - dec_json = json.dumps(decoded, sort_keys=True) - - self.assertEqual( - orig_json, dec_json, - f"Roundtrip failed for {filepath.name}" - ) - return test_roundtrip - - -for filepath in get_json_files(): - test_name = f'test_roundtrip_{filepath.stem.replace("-", "_")}' - setattr(TestRoundtripBenchmarks, test_name, _make_test(filepath)) - - -if __name__ == '__main__': - unittest.main() From 2df1b3114bb3ae35c40847a9196ed9dd536b9528 Mon Sep 17 00:00:00 2001 From: Toomas Ormisson Date: Sun, 19 Apr 2026 14:57:08 +0100 Subject: [PATCH 7/7] Exclude float columns from delta encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delta encoding violated spec §2.3's MUST round-trip requirement for float columns: prev + (cur - prev) in IEEE-754 does not recover the original double's bit pattern for arbitrary values, and round(diff, 10) at the encode step compounded the loss. Benchmark data exposed this as e.g. 1865.43 decoding to 1865.4299999999994. Restrict SparseMode.DELTA eligibility to int-only columns. Float columns now fall through to standard value encoding, which round-trips exactly via Python's shortest-round-trip str(float). Int delta encoding (the common case: IDs, counts, timestamps) is unchanged. Regression test covers multiple precision regimes — benchmark values, math.pi/math.e, 0.1+0.2, extreme exponents, negatives — so a future round-to-N workaround cannot sneak through. Co-Authored-By: Claude Opus 4.7 (1M context) --- zon-format/src/zon/core/encoder.py | 14 +++++++----- zon-format/tests/unit/test_delta.py | 34 +++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/zon-format/src/zon/core/encoder.py b/zon-format/src/zon/core/encoder.py index 47f4d5b..e985dc1 100644 --- a/zon-format/src/zon/core/encoder.py +++ b/zon-format/src/zon/core/encoder.py @@ -173,13 +173,17 @@ def _analyze_optimal_sparse_mode(self, values: List[Any]) -> SparseMode: if len(values) < 5: return SparseMode.NONE - is_numeric = True + # Only int columns are eligible for delta encoding. Float delta + # encoding cannot satisfy the spec §2.3 MUST round-trip requirement: + # prev + (cur - prev) in IEEE-754 does not preserve the original + # double's bit pattern for arbitrary floats. + is_int_only = True for val in values: - if not isinstance(val, (int, float)) or isinstance(val, bool): - is_numeric = False + if not isinstance(val, int) or isinstance(val, bool): + is_int_only = False break - - if is_numeric: + + if is_int_only: return SparseMode.DELTA return SparseMode.NONE diff --git a/zon-format/tests/unit/test_delta.py b/zon-format/tests/unit/test_delta.py index 47c8168..e966de9 100644 --- a/zon-format/tests/unit/test_delta.py +++ b/zon-format/tests/unit/test_delta.py @@ -1,3 +1,4 @@ +import math import unittest from zon import ZonEncoder, ZonDecoder @@ -107,12 +108,41 @@ def test_deep_nesting(self): data = [ {'a': {'b': {'c': {'d': {'e': 1}}}}} ] - + encoded = self.encoder.encode(data) self.assertIn('a.b.c.d.e', encoded) - + decoded = self.decoder.decode(encoded) self.assertEqual(decoded, data) + def test_float_column_roundtrip_is_lossless(self): + """Float columns must round-trip bit-exactly (spec §2.3 MUST). + + Covers multiple precision regimes so a partial fix (e.g. round-to-N) + cannot sneak through. + """ + data = [ + {'v': 1865.43}, # benchmark regression case + {'v': 3579.16}, # benchmark regression case + {'v': math.pi}, # 17-sig-digit irrational + {'v': math.e}, # 17-sig-digit irrational + {'v': 0.1 + 0.2}, # classic non-terminating binary: 0.30000000000000004 + {'v': -42.5}, # negative, crosses zero in deltas + {'v': 1e-10}, # small exponent + {'v': 1e15}, # large exponent + ] + + decoded = self.decoder.decode(self.encoder.encode(data)) + + for original, got in zip(data, decoded): + # repr(float) is the shortest string that round-trips to the same + # double, so repr equality is equivalent to bit equality. + self.assertEqual( + repr(original['v']), + repr(got['v']), + f"float roundtrip lost precision: {original['v']!r} -> {got['v']!r}", + ) + + if __name__ == "__main__": unittest.main()