Skip to content

Commit bf2c5a5

Browse files
Carson JonesCarson Jones
authored andcommitted
Fix PySpark struct() to accept a list of columns (#17189)
The struct() function now unwraps a single list or set argument, matching the PySpark API behavior and the existing array() function in this codebase.
1 parent 89ed9a1 commit bf2c5a5

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

duckdb/experimental/spark/sql/functions.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,9 @@ def _inner_expr_or_val(val: Union[Column, str]) -> Union[Column, str]:
122122
return val.expr if isinstance(val, Column) else val
123123

124124

125-
def struct(*cols: Column) -> Column: # noqa: D103
125+
def struct(*cols: Union["ColumnOrName", list["ColumnOrName"], tuple["ColumnOrName", ...]]) -> Column: # noqa: D103
126+
if len(cols) == 1 and isinstance(cols[0], (list, set)):
127+
cols = cols[0]
126128
return Column(FunctionExpression("struct_pack", *[_inner_expr_or_val(x) for x in cols]))
127129

128130

tests/fast/spark/test_spark_column.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,14 @@ def test_struct_column(self, spark):
3535
):
3636
df = df.withColumn("struct", "yes")
3737

38+
def test_struct_column_with_list(self, spark):
39+
df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
40+
res = df.select(struct([df.age, df.name]).alias("struct")).collect()
41+
assert res == [
42+
Row(struct=Row(age=2, name="Alice")),
43+
Row(struct=Row(age=5, name="Bob")),
44+
]
45+
3846
def test_array_column(self, spark):
3947
df = spark.createDataFrame([Row(a=1, b=2, c=3, d=4)], ["a", "b", "c", "d"])
4048

0 commit comments

Comments
 (0)