Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 196c111

Browse files
committed
fix: Implement safe JSON decoding in Polars compiler
1 parent 6d42400 commit 196c111

File tree

2 files changed

+61
-6
lines changed

2 files changed

+61
-6
lines changed

bigframes/core/compile/polars/compiler.py

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import dataclasses
1717
import functools
1818
import itertools
19+
import json
1920
from typing import cast, Literal, Optional, Sequence, Tuple, Type, TYPE_CHECKING
2021

2122
import pandas as pd
@@ -429,13 +430,68 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
429430
@compile_op.register(json_ops.JSONDecode)
430431
def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr:
431432
assert isinstance(op, json_ops.JSONDecode)
433+
target_dtype = _bigframes_dtype_to_polars_dtype(op.to_type)
432434
if op.safe:
433435
# Polars does not support safe JSON decoding (returning null on failure).
434-
# Fallback to BigQuery execution.
435-
raise NotImplementedError(
436-
"Safe JSON decoding is not supported in Polars executor."
437-
)
438-
return input.str.json_decode(_bigframes_dtype_to_polars_dtype(op.to_type))
436+
# We use map_elements to provide safe JSON decoding.
437+
def safe_decode(val):
438+
if val is None:
439+
return None
440+
try:
441+
decoded = json.loads(val)
442+
except Exception:
443+
return None
444+
445+
if decoded is None:
446+
return None
447+
448+
if op.to_type == bigframes.dtypes.INT_DTYPE:
449+
if type(decoded) is bool:
450+
return None
451+
if isinstance(decoded, int):
452+
return decoded
453+
if isinstance(decoded, float):
454+
if decoded.is_integer():
455+
return int(decoded)
456+
if isinstance(decoded, str):
457+
try:
458+
return int(decoded)
459+
except Exception:
460+
pass
461+
return None
462+
463+
if op.to_type == bigframes.dtypes.FLOAT_DTYPE:
464+
if type(decoded) is bool:
465+
return None
466+
if isinstance(decoded, (int, float)):
467+
return float(decoded)
468+
if isinstance(decoded, str):
469+
try:
470+
return float(decoded)
471+
except Exception:
472+
pass
473+
return None
474+
475+
if op.to_type == bigframes.dtypes.BOOL_DTYPE:
476+
if isinstance(decoded, bool):
477+
return decoded
478+
if isinstance(decoded, str):
479+
if decoded.lower() == "true":
480+
return True
481+
if decoded.lower() == "false":
482+
return False
483+
return None
484+
485+
if op.to_type == bigframes.dtypes.STRING_DTYPE:
486+
if isinstance(decoded, str):
487+
return decoded
488+
return None
489+
490+
return decoded
491+
492+
return input.map_elements(safe_decode, return_dtype=target_dtype)
493+
494+
return input.str.json_decode(target_dtype)
439495

440496
@compile_op.register(arr_ops.ToArrayOp)
441497
def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr:

tests/unit/test_series_polars.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4142,7 +4142,6 @@ def test_json_astype_others_raise_error(data, to_type):
41424142
bf_series.astype(to_type, errors="raise").to_pandas()
41434143

41444144

4145-
@pytest.mark.skip(reason="AssertionError: Series NA mask are different")
41464145
@pytest.mark.parametrize(
41474146
("data", "to_type"),
41484147
[

0 commit comments

Comments
 (0)