Skip to content

Commit 5536f73

Browse files
timsaucerclaude
andcommitted
Improve error messages, tests, and API hygiene from PR review
- Provide actionable error message for invalid explain format strings - Remove recursions param from deprecated unnest_column (use unnest_columns) - Add null-handling test case for sort_by to verify nulls-last behavior - Add format-specific assertions to explain tests (TREE, PGJSON, GRAPHVIZ) - Add deep recursion test for unnest_columns with depth > 1 - Add multi-expression window test to verify variadic *exprs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d62faef commit 5536f73

File tree

3 files changed

+53
-19
lines changed

3 files changed

+53
-19
lines changed

crates/core/src/dataframe.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,12 @@ impl PyDataFrame {
823823
let explain_format = match format {
824824
Some(f) => f
825825
.parse::<datafusion::common::format::ExplainFormat>()
826-
.map_err(|e| PyDataFusionError::Common(e.to_string()))?,
826+
.map_err(|_| {
827+
PyDataFusionError::Common(format!(
828+
"Invalid explain format: '{}'. Valid options: indent, tree, pgjson, graphviz",
829+
f
830+
))
831+
})?,
827832
None => datafusion::common::format::ExplainFormat::Indent,
828833
};
829834
let opts = datafusion::logical_expr::ExplainOption::default()

python/datafusion/dataframe.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,14 +1529,9 @@ def unnest_column(
15291529
self,
15301530
column: str,
15311531
preserve_nulls: bool = True,
1532-
recursions: list[tuple[str, str, int]] | None = None,
15331532
) -> DataFrame:
15341533
"""See :py:func:`unnest_columns`."""
1535-
return DataFrame(
1536-
self.df.unnest_column(
1537-
column, preserve_nulls=preserve_nulls, recursions=recursions
1538-
)
1539-
)
1534+
return DataFrame(self.df.unnest_column(column, preserve_nulls=preserve_nulls))
15401535

15411536
def unnest_columns(
15421537
self,

python/tests/test_dataframe.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3637,31 +3637,32 @@ def test_distinct_on():
36373637

36383638

36393639
@pytest.mark.parametrize(
3640-
"input_values",
3640+
("input_values", "expected"),
36413641
[
3642-
[3, 1, 2],
3643-
[1, 2, 3],
3642+
([3, 1, 2], [1, 2, 3]),
3643+
([1, 2, 3], [1, 2, 3]),
3644+
([3, None, 1, 2], [1, 2, 3, None]),
36443645
],
36453646
)
3646-
def test_sort_by(input_values):
3647+
def test_sort_by(input_values, expected):
36473648
"""sort_by always sorts ascending with nulls last regardless of input order."""
36483649
ctx = SessionContext()
36493650
df = ctx.from_pydict({"a": input_values})
36503651
result = df.sort_by(column("a")).collect()[0]
3651-
assert result.column(0).to_pylist() == [1, 2, 3]
3652+
assert result.column(0).to_pylist() == expected
36523653

36533654

36543655
@pytest.mark.parametrize(
3655-
("fmt", "verbose", "analyze"),
3656+
("fmt", "verbose", "analyze", "expected_substring"),
36563657
[
3657-
(None, False, False),
3658-
("TREE", False, False),
3659-
("INDENT", True, True),
3660-
("PGJSON", False, False),
3661-
("GRAPHVIZ", False, False),
3658+
(None, False, False, None),
3659+
("TREE", False, False, "---"),
3660+
("INDENT", True, True, None),
3661+
("PGJSON", False, False, '"Plan"'),
3662+
("GRAPHVIZ", False, False, "digraph"),
36623663
],
36633664
)
3664-
def test_explain_with_format(capsys, fmt, verbose, analyze):
3665+
def test_explain_with_format(capsys, fmt, verbose, analyze, expected_substring):
36653666
from datafusion import ExplainFormat
36663667

36673668
ctx = SessionContext()
@@ -3670,6 +3671,8 @@ def test_explain_with_format(capsys, fmt, verbose, analyze):
36703671
df.explain(verbose=verbose, analyze=analyze, format=explain_fmt)
36713672
captured = capsys.readouterr()
36723673
assert "plan_type" in captured.out
3674+
if expected_substring is not None:
3675+
assert expected_substring in captured.out
36733676

36743677

36753678
def test_window():
@@ -3695,3 +3698,34 @@ def test_unnest_columns_with_recursions():
36953698
# With explicit recursion options
36963699
result = df.unnest_columns("a", recursions=[("a", "a", 1)]).collect()[0]
36973700
assert result.column(0).to_pylist() == [1, 2, 3]
3701+
3702+
3703+
def test_unnest_columns_with_deep_recursion():
3704+
ctx = SessionContext()
3705+
# Nested list of lists — requires depth > 1 to fully flatten
3706+
df = ctx.from_pydict({"a": [[[1, 2], [3]], [[4]]], "b": ["x", "y"]})
3707+
# Depth 1 unnests the outer list, leaving inner lists intact
3708+
result = df.unnest_columns("a", recursions=[("a", "a", 1)]).collect()[0]
3709+
assert result.column(0).to_pylist() == [[1, 2], [3], [4]]
3710+
# Depth 2 fully flattens
3711+
result = df.unnest_columns("a", recursions=[("a", "a", 2)]).collect()[0]
3712+
assert result.column(0).to_pylist() == [1, 2, 3, 4]
3713+
3714+
3715+
def test_window_multiple_expressions():
3716+
ctx = SessionContext()
3717+
df = ctx.from_pydict({"a": [1, 2, 3], "b": ["x", "x", "y"]})
3718+
result = (
3719+
df.window(
3720+
f.row_number(partition_by=[column("b")], order_by=[column("a")]).alias(
3721+
"rn"
3722+
),
3723+
f.rank(partition_by=[column("b")], order_by=[column("a")]).alias("rnk"),
3724+
)
3725+
.sort(column("a").sort(ascending=True))
3726+
.collect()[0]
3727+
)
3728+
assert "rn" in result.schema.names
3729+
assert "rnk" in result.schema.names
3730+
assert result.column(result.schema.get_field_index("rn")).to_pylist() == [1, 2, 1]
3731+
assert result.column(result.schema.get_field_index("rnk")).to_pylist() == [1, 2, 1]

0 commit comments

Comments
 (0)