Skip to content

Commit 425d626

Browse files
authored
fix: decode tuple/frozenset payloads with non-finite floats (#9365)
Python's `json.dumps` emits bare `NaN`/`Infinity`/`-Infinity` for non-finite floats inside embedded tuple/frozenset payloads (e.g. `text/plain+tuple:[NaN]`). The outer JSON still parses because those tokens live inside a JSON string, but `formatTuplePayload` and `formatFrozensetPayload` used strict `JSON.parse` and threw. Swap both to `jsonParseWithSpecialChar` (same parser we already use at the outer level) and render non-finite elements as `float('nan')` / `float('inf')` / `-float('inf')`, matching the scalar-key form.
1 parent a845f3c commit 425d626

3 files changed

Lines changed: 159 additions & 12 deletions

File tree

frontend/src/components/editor/output/JsonOutput.tsx

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { memo, useState } from "react";
1818
import type { OutputMessage } from "@/core/kernel/messages";
1919
import { cn } from "@/utils/cn";
2020
import { copyToClipboard } from "@/utils/copy";
21+
import { jsonParseWithSpecialChar } from "@/utils/json/json-parser";
2122
import { isUrl } from "@/utils/urls";
2223
import { useTheme } from "../../../theme/useTheme";
2324
import { logNever } from "../../../utils/assertNever";
@@ -386,14 +387,41 @@ function renderLeaf(leaf: string, render: LeafRenderer): React.ReactNode {
386387
// See `_key_formatter` in marimo/_output/formatters/structures.py.
387388
const KEY_ENCODED_PREFIX = "text/plain+";
388389

390+
// Format elements for a Python collection literal. Non-finite floats
391+
// (NaN / Infinity / -Infinity) parse as JS `number` via
392+
// `jsonParseWithSpecialChar`; `JSON.stringify` on those returns `null`,
393+
// so render them as the same `float(...)` literals we use for scalar
394+
// float keys (see `decodeKeyForCopy`).
395+
function formatCollectionItems(items: unknown[]): string {
396+
return items
397+
.map((x) => {
398+
if (typeof x === "number" && !Number.isFinite(x)) {
399+
if (Number.isNaN(x)) {
400+
return "float('nan')";
401+
}
402+
return x > 0 ? "float('inf')" : "-float('inf')";
403+
}
404+
return JSON.stringify(x);
405+
})
406+
.join(", ");
407+
}
408+
389409
// Format a JSON-list payload as a Python tuple literal. 1-element tuples
390410
// need a trailing comma — `(1)` is just `1` in Python, `(1,)` is the tuple.
411+
// Uses `jsonParseWithSpecialChar` so bare `NaN`/`Infinity`/`-Infinity`
412+
// emitted by Python's json.dumps round-trip cleanly.
391413
function formatTuplePayload(jsonList: string): string {
392-
const items = JSON.parse(jsonList) as unknown[];
393-
const inner = items.map((x) => JSON.stringify(x)).join(", ");
414+
const items = jsonParseWithSpecialChar<unknown[]>(jsonList);
415+
// `jsonParseWithSpecialChar` returns `{}` when both parse passes fail;
416+
// fall back to the raw payload so a malformed wire form doesn't crash
417+
// rendering/copy. Matches the defensive pattern in `formatSetPayload`.
418+
if (!Array.isArray(items)) {
419+
return jsonList;
420+
}
394421
if (items.length === 0) {
395422
return "()";
396423
}
424+
const inner = formatCollectionItems(items);
397425
if (items.length === 1) {
398426
return `(${inner},)`;
399427
}
@@ -403,29 +431,31 @@ function formatTuplePayload(jsonList: string): string {
403431
// Format a JSON-list payload as a Python frozenset literal. Empty → `frozenset()`
404432
// rather than `frozenset({})` (which reads like a dict).
405433
function formatFrozensetPayload(jsonList: string): string {
406-
const items = JSON.parse(jsonList) as unknown[];
434+
const items = jsonParseWithSpecialChar<unknown[]>(jsonList);
435+
if (!Array.isArray(items)) {
436+
return jsonList;
437+
}
407438
if (items.length === 0) {
408439
return "frozenset()";
409440
}
410-
const inner = items.map((x) => JSON.stringify(x)).join(", ");
441+
const inner = formatCollectionItems(items);
411442
return `frozenset({${inner}})`;
412443
}
413444

414445
// Format a JSON-list payload as a Python set literal. Empty → `set()`
415446
// (not `{}`, which is a dict literal in Python).
416447
function formatSetPayload(jsonList: string): string {
417-
try {
418-
const items = JSON.parse(jsonList) as unknown[];
419-
if (items.length === 0) {
420-
return "set()";
421-
}
422-
const inner = items.map((x) => JSON.stringify(x)).join(", ");
423-
return `{${inner}}`;
424-
} catch {
448+
const items = jsonParseWithSpecialChar<unknown[]>(jsonList);
449+
if (!Array.isArray(items)) {
425450
// Back-compat: older wire form was `text/plain+set:{1, 2, 3}` (Python
426451
// set-literal string, not JSON). Pass it through as-is rather than crash.
427452
return jsonList;
428453
}
454+
if (items.length === 0) {
455+
return "set()";
456+
}
457+
const inner = formatCollectionItems(items);
458+
return `{${inner}}`;
429459
}
430460

431461
// Renderers for decoded non-string keys. Visual affordances match Python:

frontend/src/components/editor/output/__tests__/json-output.test.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,44 @@ describe("getCopyValue with encoded non-string keys", () => {
417417
`);
418418
});
419419

420+
it("parses tuple/frozenset payloads containing bare NaN/Infinity", () => {
421+
// Python's json.dumps emits bare `NaN`/`Infinity` inside the embedded
422+
// tuple/frozenset payload strings (JSON spec violation, but ECMA-262-
423+
// friendly via the fallback in jsonParseWithSpecialChar). The outer
424+
// JSON stays strict because those tokens live inside a JSON string
425+
// key/value. Regression for tuple-key payloads that previously broke
426+
// the frontend's `JSON.parse` and threw.
427+
const value = {
428+
"text/plain+tuple:[NaN]": "tn",
429+
"text/plain+tuple:[Infinity, -Infinity]": "ti",
430+
k: "text/plain+frozenset:[Infinity, 1]",
431+
};
432+
expect(getCopyValue(value)).toMatchInlineSnapshot(`
433+
"{
434+
(float('nan'),): "tn",
435+
(float('inf'), -float('inf')): "ti",
436+
"k": frozenset({float('inf'), 1})
437+
}"
438+
`);
439+
});
440+
441+
it("falls back to the raw payload for malformed tuple/frozenset", () => {
442+
// `jsonParseWithSpecialChar` returns `{}` on parse failure rather
443+
// than throwing; without an `Array.isArray` guard, the formatters
444+
// would crash on `.length`/`.map`. Pass the raw payload through so
445+
// a malformed wire form doesn't break the whole render.
446+
const value = {
447+
"text/plain+tuple:not a json list": "t",
448+
k: "text/plain+frozenset:also broken",
449+
};
450+
expect(getCopyValue(value)).toMatchInlineSnapshot(`
451+
"{
452+
not a json list: "t",
453+
"k": also broken
454+
}"
455+
`);
456+
});
457+
420458
it("unescapes string keys that looked encoded", () => {
421459
const value = {
422460
"text/plain+str:text/plain+int:2": "hello",

tests/_output/formatters/test_structures.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,85 @@ def test_format_structure_dict_plain_string_keys_unchanged() -> None:
671671
)
672672

673673

674+
def _reject_non_finite(literal: str) -> float:
675+
# `parse_constant` fires for bare `NaN`, `Infinity`, and `-Infinity`.
676+
# Python's `json.loads` accepts these by default (non-spec) — passing a
677+
# raising hook makes the test match the JS `JSON.parse` behavior we
678+
# actually care about.
679+
raise AssertionError(f"outer JSON contained bare {literal!r}")
680+
681+
682+
def test_format_structure_tuple_key_with_nan_outer_json_is_strict() -> None:
683+
"""Tuple keys with non-finite floats are embedded strings — the outer
684+
JSON must be strict per the JS `JSON.parse` spec (no bare `NaN` /
685+
`Infinity` at the top level).
686+
687+
Those tokens live inside the embedded tuple payload *string*, not at
688+
the outer JSON level, so the frontend's outer `JSON.parse` succeeds
689+
and it then calls `jsonParseWithSpecialChar` on the embedded
690+
payload. Previously the bare token appeared at the top level and
691+
broke the outer parse.
692+
"""
693+
StructuresFormatter().register()
694+
695+
_, data = get_and_format(
696+
{(float("nan"),): "n", (float("inf"), -float("inf")): "i"}
697+
)
698+
# `parse_constant` raises on bare NaN/Infinity at the JSON layer —
699+
# matching JS `JSON.parse` strictness. `json.loads` alone accepts
700+
# them by default, which is too lenient to test the contract.
701+
parsed = json.loads(data, parse_constant=_reject_non_finite)
702+
assert parsed == {
703+
"text/plain+tuple:[NaN]": "n",
704+
"text/plain+tuple:[Infinity, -Infinity]": "i",
705+
}
706+
707+
708+
def test_format_structure_frozenset_value_with_nan_outer_json_is_strict() -> (
709+
None
710+
):
711+
"""Frozenset values with non-finite floats parse strictly at the outer level."""
712+
StructuresFormatter().register()
713+
714+
_, data = get_and_format({"k": frozenset({float("inf"), 1})})
715+
# Outer parse is strict (JS-`JSON.parse`-compatible).
716+
parsed = json.loads(data, parse_constant=_reject_non_finite)
717+
key = parsed["k"]
718+
assert key.startswith("text/plain+frozenset:")
719+
# The embedded payload contains bare `Infinity`; the frontend parses
720+
# it with `jsonParseWithSpecialChar`. Python's permissive `json.loads`
721+
# is fine here because we're just inspecting the embedded content.
722+
payload = json.loads(key[len("text/plain+frozenset:") :])
723+
assert set(payload) == {1, float("inf")}
724+
725+
726+
def test_format_structure_frozenset_key_with_nan_outer_json_is_strict() -> (
727+
None
728+
):
729+
"""Frozenset keys with non-finite floats parse strictly at the outer level."""
730+
StructuresFormatter().register()
731+
732+
_, data = get_and_format({frozenset({float("nan")}): "v"})
733+
# Outer parse is strict — the bare `NaN` lives inside the key string.
734+
parsed = json.loads(data, parse_constant=_reject_non_finite)
735+
(key,) = parsed
736+
assert key == "text/plain+frozenset:[NaN]"
737+
738+
739+
def test_format_structure_tuple_value_with_nan_is_strict_json() -> None:
740+
"""Tuple values with non-finite floats round-trip via scalar sentinels.
741+
742+
Tuple values don't hit the tuple-encoder path because `flatten`
743+
recurses into tuples before leaf formatting — each float is handled
744+
by `_leaf_formatter` and emitted as its own `text/plain+float:`
745+
sentinel string.
746+
"""
747+
StructuresFormatter().register()
748+
749+
formatted = format_structure([(float("nan"), float("inf"))])
750+
assert formatted == [("text/plain+float:nan", "text/plain+float:inf")]
751+
752+
674753
def test_format_structure_bigint() -> None:
675754
bigint = 2**64
676755
assert format_structure([bigint]) == ([f"text/plain+bigint:{bigint}"])

0 commit comments

Comments
 (0)