From 4ed6fbad778f183251258c1c93a3605e8136da39 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 26 Mar 2026 10:36:45 +0100 Subject: [PATCH 01/10] feat: {Expr, Series}.struct.unnest() --- docs/api-reference/expr_struct.md | 1 + docs/api-reference/series_struct.md | 1 + narwhals/_arrow/series_struct.py | 10 +++++ narwhals/_compliant/any_namespace.py | 3 +- narwhals/_compliant/expr.py | 32 ++++++++++++- narwhals/_compliant/typing.py | 2 +- narwhals/_duckdb/expr_struct.py | 31 +++++++++++++ narwhals/_ibis/expr_struct.py | 33 +++++++++++++- narwhals/_pandas_like/series_struct.py | 12 +++++ narwhals/_polars/series.py | 7 ++- narwhals/_polars/utils.py | 1 + narwhals/_spark_like/expr_struct.py | 33 +++++++++++++- narwhals/expr_struct.py | 29 ++++++++++++ narwhals/series_struct.py | 37 ++++++++++++++- tests/expr_and_series/struct_/unnest_test.py | 47 ++++++++++++++++++++ 15 files changed, 272 insertions(+), 7 deletions(-) create mode 100644 tests/expr_and_series/struct_/unnest_test.py diff --git a/docs/api-reference/expr_struct.md b/docs/api-reference/expr_struct.md index bfc093ed8a..2a00bd185e 100644 --- a/docs/api-reference/expr_struct.md +++ b/docs/api-reference/expr_struct.md @@ -5,5 +5,6 @@ options: members: - field + - unnest show_source: false show_bases: false diff --git a/docs/api-reference/series_struct.md b/docs/api-reference/series_struct.md index 638376dad3..cbdcb302ed 100644 --- a/docs/api-reference/series_struct.md +++ b/docs/api-reference/series_struct.md @@ -5,5 +5,6 @@ options: members: - field + - unnest show_source: false show_bases: false diff --git a/narwhals/_arrow/series_struct.py b/narwhals/_arrow/series_struct.py index 906725ba7b..5f520e370b 100644 --- a/narwhals/_arrow/series_struct.py +++ b/narwhals/_arrow/series_struct.py @@ -2,15 +2,25 @@ from typing import TYPE_CHECKING +import pyarrow as pa import pyarrow.compute as pc from narwhals._arrow.utils import ArrowSeriesNamespace from narwhals._compliant.any_namespace import StructNamespace if TYPE_CHECKING: + from narwhals._arrow.dataframe import ArrowDataFrame from narwhals._arrow.series import ArrowSeries class ArrowSeriesStructNamespace(ArrowSeriesNamespace, StructNamespace["ArrowSeries"]): def field(self, name: str) -> ArrowSeries: return self.with_native(pc.struct_field(self.native, name)).alias(name) + + def unnest(self) -> ArrowDataFrame: + from narwhals._arrow.dataframe import ArrowDataFrame + + native = self.native + struct_type: pa.StructType = native.type + table = pa.table({n: pc.struct_field(native, n) for n in struct_type.names}) + return ArrowDataFrame.from_native(table, context=self.compliant) diff --git a/narwhals/_compliant/any_namespace.py b/narwhals/_compliant/any_namespace.py index 5bf1868b87..abb0d2ed10 100644 --- a/narwhals/_compliant/any_namespace.py +++ b/narwhals/_compliant/any_namespace.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar, Protocol, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeVar from narwhals._utils import CompliantT_co, _StoresCompliant @@ -115,3 +115,4 @@ class StructNamespace(_StoresCompliant[CompliantT_co], Protocol[CompliantT_co]): _accessor: ClassVar[Accessor] = "struct" def field(self, name: str) -> CompliantT_co: ... + def unnest(self) -> Any: ... diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index f72b6fb1d3..3f25dcb2e2 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -43,7 +43,13 @@ from narwhals._compliant.namespace import CompliantNamespace, EagerNamespace from narwhals._compliant.series import CompliantSeries - from narwhals._compliant.typing import AliasNames, EvalNames, EvalSeries + from narwhals._compliant.typing import ( + AliasNames, + EagerDataFrameAny, + EagerSeriesAny, + EvalNames, + EvalSeries, + ) from narwhals._expression_parsing import ExprMetadata from narwhals._typing import NoDefault from narwhals._utils import Implementation, Version, _LimitedContext @@ -1184,3 +1190,27 @@ def field(self, name: str) -> EagerExprT: return self.compliant._reuse_series_namespace("struct", "field", name=name).alias( name ) + + def unnest(self) -> EagerExprT: + def inner(df: EagerDataFrameAny) -> list[EagerSeriesAny]: + result: list[EagerSeriesAny] = [] + for series in self.compliant(df): + unnested_df: EagerDataFrameAny = series.struct.unnest() + result.extend( + unnested_df.get_column(col_name) for col_name in unnested_df.columns + ) + return result + + def evaluate_output_names(df: EagerDataFrameAny) -> Sequence[str]: + return [ + field.name + for series in self.compliant(df) + for field in series.dtype.fields + ] + + return self.compliant._from_callable( + inner, + evaluate_output_names=evaluate_output_names, + alias_output_names=None, + context=self.compliant, + ) diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index 52d583bb53..d4d460670c 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -150,7 +150,7 @@ class ScalarKwargs(TypedDict, total=False): EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound=EagerSeriesAny, covariant=True) # NOTE: `pyright` gives false (8) positives if this uses `EagerDataFrameAny`? -EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any, Any, Any]") +EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrameAny") LazyExprT = TypeVar("LazyExprT", bound=LazyExprAny) LazyExprT_contra = TypeVar("LazyExprT_contra", bound=LazyExprAny, contravariant=True) diff --git a/narwhals/_duckdb/expr_struct.py b/narwhals/_duckdb/expr_struct.py index ac91d399d9..488c9201d6 100644 --- a/narwhals/_duckdb/expr_struct.py +++ b/narwhals/_duckdb/expr_struct.py @@ -7,6 +7,9 @@ from narwhals._duckdb.utils import F, lit if TYPE_CHECKING: + from duckdb import Expression + + from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.expr import DuckDBExpr @@ -17,3 +20,31 @@ def field(self, name: str) -> DuckDBExpr: return self.compliant._with_elementwise( lambda expr: F("struct_extract", expr, lit(name)) ).alias(name) + + def unnest(self) -> DuckDBExpr: + compliant = self.compliant + + def func(df: DuckDBLazyFrame) -> list[Expression]: + schema = df.schema + return [ + F("struct_extract", native_expr, lit(field.name)).alias(field.name) + for native_expr, name in zip( + compliant(df), compliant._evaluate_output_names(df) + ) + for field in schema[name].fields # pyright: ignore[reportAttributeAccessIssue] + ] + + def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]: + schema = df.schema + return [ + field.name + for name in compliant._evaluate_output_names(df) + for field in schema[name].fields # pyright: ignore[reportAttributeAccessIssue] + ] + + return compliant.__class__( + func, + evaluate_output_names=evaluate_output_names, + alias_output_names=None, + version=compliant._version, + ) diff --git a/narwhals/_ibis/expr_struct.py b/narwhals/_ibis/expr_struct.py index 25eedf04fc..12fc723911 100644 --- a/narwhals/_ibis/expr_struct.py +++ b/narwhals/_ibis/expr_struct.py @@ -1,13 +1,15 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import StructNamespace if TYPE_CHECKING: import ibis.expr.types as ir + from ibis.expr.datatypes import Struct as StructDtype + from narwhals._ibis.dataframe import IbisLazyFrame from narwhals._ibis.expr import IbisExpr @@ -17,3 +19,32 @@ def func(expr: ir.StructColumn) -> ir.Column: return expr[name] return self.compliant._with_callable(func).alias(name) + + def unnest(self) -> IbisExpr: + compliant = self.compliant + + def func(df: IbisLazyFrame) -> list[ir.Column]: + schema = df.schema + return [ + cast("ir.StructColumn", native_expr)[field.name].name(field.name) + for native_expr, name in zip( + compliant(df), compliant._evaluate_output_names(df) + ) + for field in cast("StructDtype", schema[name]).fields + ] + + def evaluate_output_names(df: IbisLazyFrame) -> list[str]: + schema = df.schema + return [ + field.name + for name in compliant._evaluate_output_names(df) + for field in cast("StructDtype", schema[name]).fields + ] + + return compliant.__class__( + func, + evaluate_output_names=evaluate_output_names, + alias_output_names=None, + version=compliant._version, + implementation=compliant._implementation, + ) diff --git a/narwhals/_pandas_like/series_struct.py b/narwhals/_pandas_like/series_struct.py index dc80997533..9e4cd87a5e 100644 --- a/narwhals/_pandas_like/series_struct.py +++ b/narwhals/_pandas_like/series_struct.py @@ -6,6 +6,9 @@ from narwhals._pandas_like.utils import PandasLikeSeriesNamespace if TYPE_CHECKING: + import pyarrow as pa + + from narwhals._pandas_like.dataframe import PandasLikeDataFrame from narwhals._pandas_like.series import PandasLikeSeries @@ -14,3 +17,12 @@ class PandasLikeSeriesStructNamespace( ): def field(self, name: str) -> PandasLikeSeries: return self.with_native(self.native.struct.field(name)).alias(name) + + def unnest(self) -> PandasLikeDataFrame: + from narwhals._pandas_like.dataframe import PandasLikeDataFrame + + native = self.native + pa_type: pa.StructType = native.dtype.pyarrow_dtype + ns = self.implementation.to_native_namespace() + result = ns.DataFrame({name: native.struct.field(name) for name in pa_type.names}) + return PandasLikeDataFrame.from_native(result, context=self.compliant) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index b77bba4eb9..d23ed2af87 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -846,4 +846,9 @@ def contains(self, item: NonNestedLiteral) -> PolarsSeries: class PolarsSeriesStructNamespace( PolarsSeriesNamespace, PolarsStructNamespace[PolarsSeries, pl.Series] -): ... +): + def unnest(self) -> PolarsDataFrame: + from narwhals._polars.dataframe import PolarsDataFrame + + result = self.native.struct.unnest() + return PolarsDataFrame(result, version=self.compliant._version) diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 1011f7ce93..6c17d54c9e 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -378,3 +378,4 @@ def len(self) -> CompliantT: ... class PolarsStructNamespace(PolarsAnyNamespace[CompliantT, NativeT_co]): _accessor: ClassVar[Accessor] = "struct" field: Method[CompliantT] + unnest: Method[CompliantT] diff --git a/narwhals/_spark_like/expr_struct.py b/narwhals/_spark_like/expr_struct.py index ac5202535e..f63112e936 100644 --- a/narwhals/_spark_like/expr_struct.py +++ b/narwhals/_spark_like/expr_struct.py @@ -1,13 +1,15 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import StructNamespace if TYPE_CHECKING: + from sqlframe.base import types as native_dtypes from sqlframe.base.column import Column + from narwhals._spark_like.dataframe import SparkLikeLazyFrame from narwhals._spark_like.expr import SparkLikeExpr @@ -19,3 +21,32 @@ def func(expr: Column) -> Column: return expr.getField(name) return self.compliant._with_elementwise(func).alias(name) + + def unnest(self) -> SparkLikeExpr: + compliant = self.compliant + + def func(df: SparkLikeLazyFrame) -> list[Column]: + schema = df.schema + return [ + native_expr.getField(field.name).alias(field.name) + for native_expr, name in zip( + compliant(df), compliant._evaluate_output_names(df) + ) + for field in cast("native_dtypes.StructType", schema[name]).fields + ] + + def evaluate_output_names(df: SparkLikeLazyFrame) -> list[str]: + schema = df.schema + return [ + field.name + for name in compliant._evaluate_output_names(df) + for field in cast("native_dtypes.StructType", schema[name]).fields + ] + + return compliant.__class__( + func, + evaluate_output_names=evaluate_output_names, + alias_output_names=None, + version=compliant._version, + implementation=compliant._implementation, + ) diff --git a/narwhals/expr_struct.py b/narwhals/expr_struct.py index 7d734732f9..d1c7bed9d3 100644 --- a/narwhals/expr_struct.py +++ b/narwhals/expr_struct.py @@ -45,3 +45,32 @@ def field(self, name: str) -> ExprT: return self._expr._append_node( ExprNode(ExprKind.ELEMENTWISE, "struct.field", name=name) ) + + def unnest(self) -> ExprT: + r"""Expand the struct column into individual fields as separate columns. + + Each field of the struct becomes a separate column in the result. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> df_native = pl.DataFrame( + ... {"user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}]} + ... ) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("user").struct.unnest()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (2, 2) | + | ┌─────┬──────┐ | + | │ id ┆ name │ | + | │ --- ┆ --- │ | + | │ i64 ┆ str │ | + | ╞═════╪══════╡ | + | │ 0 ┆ john │ | + | │ 1 ┆ jane │ | + | └─────┴──────┘ | + └──────────────────┘ + """ + return self._expr._append_node(ExprNode(ExprKind.ELEMENTWISE, "struct.unnest")) diff --git a/narwhals/series_struct.py b/narwhals/series_struct.py index 5a2851d6f0..5282eb505b 100644 --- a/narwhals/series_struct.py +++ b/narwhals/series_struct.py @@ -1,9 +1,12 @@ from __future__ import annotations -from typing import Generic +from typing import TYPE_CHECKING, Any, Generic from narwhals.typing import SeriesT +if TYPE_CHECKING: + from narwhals.dataframe import DataFrame + class SeriesStructNamespace(Generic[SeriesT]): def __init__(self, series: SeriesT) -> None: @@ -28,3 +31,35 @@ def field(self, name: str) -> SeriesT: return self._narwhals_series._with_compliant( self._narwhals_series._compliant_series.struct.field(name) ) + + def unnest(self) -> DataFrame[Any]: + r"""Convert this struct Series to a DataFrame with a separate column for each field. + + Each field of the struct becomes a column in the resulting DataFrame. + + Examples: + >>> import polars as pl + >>> import narwhals as nw + >>> s_native = pl.Series( + ... [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}] + ... ) + >>> s = nw.from_native(s_native, series_only=True) + >>> s.struct.unnest() + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (2, 2) | + | ┌─────┬──────┐ | + | │ id ┆ name │ | + | │ --- ┆ --- │ | + | │ i64 ┆ str │ | + | ╞═════╪══════╡ | + | │ 0 ┆ john │ | + | │ 1 ┆ jane │ | + | └─────┴──────┘ | + └──────────────────┘ + """ + return self._narwhals_series._dataframe( + self._narwhals_series._compliant_series.struct.unnest(), + level=self._narwhals_series._level, + ) diff --git a/tests/expr_and_series/struct_/unnest_test.py b/tests/expr_and_series/struct_/unnest_test.py new file mode 100644 index 0000000000..20d806f462 --- /dev/null +++ b/tests/expr_and_series/struct_/unnest_test.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import pytest + +import narwhals as nw +from tests.utils import ( + PANDAS_VERSION, + PYARROW_VERSION, + Constructor, + ConstructorEager, + assert_equal_data, +) + + +def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: + if any(backend in str(constructor) for backend in ("dask", "sqlframe")): + request.applymarker(pytest.mark.xfail) + + if "pandas" in str(constructor) and ( + PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0) + ): + pytest.skip() + + data = {"user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}]} + dtype = nw.Struct({"id": nw.Int16(), "name": nw.String()}) + df = nw.from_native(constructor(data)).select(user=nw.col("user").cast(dtype)) + + result = df.select(nw.col("user").struct.unnest()) + expected = {"id": [0, 1], "name": ["john", "jane"]} + assert_equal_data(result, expected) + + +def test_unnest_series(constructor_eager: ConstructorEager) -> None: + if "pandas" in str(constructor_eager) and ( + PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0) + ): + pytest.skip() + + data = {"user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}]} + dtype = nw.Struct({"id": nw.Int16(), "name": nw.String()}) + df = nw.from_native(constructor_eager(data), eager_only=True).select( + user=nw.col("user").cast(dtype) + ) + + result = df.get_column("user").struct.unnest() + expected = {"id": [0, 1], "name": ["john", "jane"]} + assert_equal_data(result, expected) From cbdca2c95eeb08c2bac999d93056d16f5f9853b7 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Thu, 26 Mar 2026 10:42:31 +0100 Subject: [PATCH 02/10] improve polars typing --- narwhals/_polars/expr.py | 3 ++- narwhals/_polars/utils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index 9a60d21e2d..d952c35e58 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -515,4 +515,5 @@ def contains(self, item: Any) -> PolarsExpr: class PolarsExprStructNamespace( PolarsExprNamespace, PolarsStructNamespace[PolarsExpr, pl.Expr] -): ... +): + unnest: Method[PolarsExpr] diff --git a/narwhals/_polars/utils.py b/narwhals/_polars/utils.py index 6c17d54c9e..96a59bb075 100644 --- a/narwhals/_polars/utils.py +++ b/narwhals/_polars/utils.py @@ -378,4 +378,4 @@ def len(self) -> CompliantT: ... class PolarsStructNamespace(PolarsAnyNamespace[CompliantT, NativeT_co]): _accessor: ClassVar[Accessor] = "struct" field: Method[CompliantT] - unnest: Method[CompliantT] + unnest: Method[Any] From d78ebb24d0d3b087075f73cbaa531b4566c6ef31 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 27 Mar 2026 08:59:38 +0100 Subject: [PATCH 03/10] fixup typing --- narwhals/_compliant/expr.py | 10 +++++----- narwhals/_compliant/typing.py | 2 +- narwhals/_duckdb/expr_struct.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py index 3f25dcb2e2..22e1fb7c94 100644 --- a/narwhals/_compliant/expr.py +++ b/narwhals/_compliant/expr.py @@ -1192,9 +1192,11 @@ def field(self, name: str) -> EagerExprT: ) def unnest(self) -> EagerExprT: + compliant = self.compliant + def inner(df: EagerDataFrameAny) -> list[EagerSeriesAny]: result: list[EagerSeriesAny] = [] - for series in self.compliant(df): + for series in compliant(df): unnested_df: EagerDataFrameAny = series.struct.unnest() result.extend( unnested_df.get_column(col_name) for col_name in unnested_df.columns @@ -1203,14 +1205,12 @@ def inner(df: EagerDataFrameAny) -> list[EagerSeriesAny]: def evaluate_output_names(df: EagerDataFrameAny) -> Sequence[str]: return [ - field.name - for series in self.compliant(df) - for field in series.dtype.fields + field.name for series in compliant(df) for field in series.dtype.fields ] return self.compliant._from_callable( inner, evaluate_output_names=evaluate_output_names, alias_output_names=None, - context=self.compliant, + context=compliant, ) diff --git a/narwhals/_compliant/typing.py b/narwhals/_compliant/typing.py index d4d460670c..52d583bb53 100644 --- a/narwhals/_compliant/typing.py +++ b/narwhals/_compliant/typing.py @@ -150,7 +150,7 @@ class ScalarKwargs(TypedDict, total=False): EagerSeriesT_co = TypeVar("EagerSeriesT_co", bound=EagerSeriesAny, covariant=True) # NOTE: `pyright` gives false (8) positives if this uses `EagerDataFrameAny`? -EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrameAny") +EagerDataFrameT = TypeVar("EagerDataFrameT", bound="EagerDataFrame[Any, Any, Any, Any]") LazyExprT = TypeVar("LazyExprT", bound=LazyExprAny) LazyExprT_contra = TypeVar("LazyExprT_contra", bound=LazyExprAny, contravariant=True) diff --git a/narwhals/_duckdb/expr_struct.py b/narwhals/_duckdb/expr_struct.py index 488c9201d6..a4c27b3040 100644 --- a/narwhals/_duckdb/expr_struct.py +++ b/narwhals/_duckdb/expr_struct.py @@ -31,7 +31,7 @@ def func(df: DuckDBLazyFrame) -> list[Expression]: for native_expr, name in zip( compliant(df), compliant._evaluate_output_names(df) ) - for field in schema[name].fields # pyright: ignore[reportAttributeAccessIssue] + for field in schema[name].fields # type: ignore[attr-defined] ] def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]: @@ -39,7 +39,7 @@ def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]: return [ field.name for name in compliant._evaluate_output_names(df) - for field in schema[name].fields # pyright: ignore[reportAttributeAccessIssue] + for field in schema[name].fields # type: ignore[attr-defined] ] return compliant.__class__( From 2d7c4191ad76fde6ed7f14583d83e8f34d22a530 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 27 Mar 2026 09:30:24 +0100 Subject: [PATCH 04/10] casting to narwhals Struct --- narwhals/_duckdb/expr_struct.py | 7 ++++--- narwhals/_ibis/expr_struct.py | 6 +++--- narwhals/_spark_like/expr_struct.py | 6 +++--- tests/expr_and_series/struct_/unnest_test.py | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/narwhals/_duckdb/expr_struct.py b/narwhals/_duckdb/expr_struct.py index a4c27b3040..002539928f 100644 --- a/narwhals/_duckdb/expr_struct.py +++ b/narwhals/_duckdb/expr_struct.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from narwhals._compliant import LazyExprNamespace from narwhals._compliant.any_namespace import StructNamespace @@ -11,6 +11,7 @@ from narwhals._duckdb.dataframe import DuckDBLazyFrame from narwhals._duckdb.expr import DuckDBExpr + from narwhals.dtypes import Struct class DuckDBExprStructNamespace( @@ -31,7 +32,7 @@ def func(df: DuckDBLazyFrame) -> list[Expression]: for native_expr, name in zip( compliant(df), compliant._evaluate_output_names(df) ) - for field in schema[name].fields # type: ignore[attr-defined] + for field in cast("Struct", schema[name]).fields ] def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]: @@ -39,7 +40,7 @@ def evaluate_output_names(df: DuckDBLazyFrame) -> list[str]: return [ field.name for name in compliant._evaluate_output_names(df) - for field in schema[name].fields # type: ignore[attr-defined] + for field in cast("Struct", schema[name]).fields ] return compliant.__class__( diff --git a/narwhals/_ibis/expr_struct.py b/narwhals/_ibis/expr_struct.py index 12fc723911..f92cdeaba0 100644 --- a/narwhals/_ibis/expr_struct.py +++ b/narwhals/_ibis/expr_struct.py @@ -7,10 +7,10 @@ if TYPE_CHECKING: import ibis.expr.types as ir - from ibis.expr.datatypes import Struct as StructDtype from narwhals._ibis.dataframe import IbisLazyFrame from narwhals._ibis.expr import IbisExpr + from narwhals.dtypes import Struct class IbisExprStructNamespace(LazyExprNamespace["IbisExpr"], StructNamespace["IbisExpr"]): @@ -30,7 +30,7 @@ def func(df: IbisLazyFrame) -> list[ir.Column]: for native_expr, name in zip( compliant(df), compliant._evaluate_output_names(df) ) - for field in cast("StructDtype", schema[name]).fields + for field in cast("Struct", schema[name]).fields ] def evaluate_output_names(df: IbisLazyFrame) -> list[str]: @@ -38,7 +38,7 @@ def evaluate_output_names(df: IbisLazyFrame) -> list[str]: return [ field.name for name in compliant._evaluate_output_names(df) - for field in cast("StructDtype", schema[name]).fields + for field in cast("Struct", schema[name]).fields ] return compliant.__class__( diff --git a/narwhals/_spark_like/expr_struct.py b/narwhals/_spark_like/expr_struct.py index f63112e936..35ab65ee38 100644 --- a/narwhals/_spark_like/expr_struct.py +++ b/narwhals/_spark_like/expr_struct.py @@ -6,11 +6,11 @@ from narwhals._compliant.any_namespace import StructNamespace if TYPE_CHECKING: - from sqlframe.base import types as native_dtypes from sqlframe.base.column import Column from narwhals._spark_like.dataframe import SparkLikeLazyFrame from narwhals._spark_like.expr import SparkLikeExpr + from narwhals.dtypes import Struct class SparkLikeExprStructNamespace( @@ -32,7 +32,7 @@ def func(df: SparkLikeLazyFrame) -> list[Column]: for native_expr, name in zip( compliant(df), compliant._evaluate_output_names(df) ) - for field in cast("native_dtypes.StructType", schema[name]).fields + for field in cast("Struct", schema[name]).fields ] def evaluate_output_names(df: SparkLikeLazyFrame) -> list[str]: @@ -40,7 +40,7 @@ def evaluate_output_names(df: SparkLikeLazyFrame) -> list[str]: return [ field.name for name in compliant._evaluate_output_names(df) - for field in cast("native_dtypes.StructType", schema[name]).fields + for field in cast("Struct", schema[name]).fields ] return compliant.__class__( diff --git a/tests/expr_and_series/struct_/unnest_test.py b/tests/expr_and_series/struct_/unnest_test.py index 20d806f462..9ce7f7c3f0 100644 --- a/tests/expr_and_series/struct_/unnest_test.py +++ b/tests/expr_and_series/struct_/unnest_test.py @@ -13,7 +13,7 @@ def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(backend in str(constructor) for backend in ("dask", "sqlframe")): + if any(backend in str(constructor) for backend in ("dask",)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor) and ( From fb9f32b029146b488d4292ae347e8983fd2ba20f Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Fri, 27 Mar 2026 15:48:20 +0100 Subject: [PATCH 05/10] pyspark manual conversion --- narwhals/_spark_like/expr_struct.py | 2 +- tests/expr_and_series/struct_/unnest_test.py | 84 ++++++++++++++++++-- 2 files changed, 78 insertions(+), 8 deletions(-) diff --git a/narwhals/_spark_like/expr_struct.py b/narwhals/_spark_like/expr_struct.py index 35ab65ee38..12df7f6485 100644 --- a/narwhals/_spark_like/expr_struct.py +++ b/narwhals/_spark_like/expr_struct.py @@ -22,7 +22,7 @@ def func(expr: Column) -> Column: return self.compliant._with_elementwise(func).alias(name) - def unnest(self) -> SparkLikeExpr: + def unnest(self) -> SparkLikeExpr: # pragma: no cover compliant = self.compliant def func(df: SparkLikeLazyFrame) -> list[Column]: diff --git a/tests/expr_and_series/struct_/unnest_test.py b/tests/expr_and_series/struct_/unnest_test.py index 9ce7f7c3f0..c56e3c8c2a 100644 --- a/tests/expr_and_series/struct_/unnest_test.py +++ b/tests/expr_and_series/struct_/unnest_test.py @@ -1,5 +1,8 @@ from __future__ import annotations +from datetime import datetime +from typing import TYPE_CHECKING, cast + import pytest import narwhals as nw @@ -11,9 +14,47 @@ assert_equal_data, ) +if TYPE_CHECKING: + from narwhals._native import NativeSQLFrame + +data = { + "user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}], + "psw": [ + {"hash": "fake-hash-1", "ts": datetime(2026, 1, 1, 0, 0)}, + {"hash": "fake-hash-2", "ts": datetime(2026, 1, 2, 0, 0)}, + ], +} + +user_dtype = nw.Struct({"id": nw.Int16(), "name": nw.String()}) +psw_dtype = nw.Struct({"hash": nw.String(), "ts": nw.Datetime()}) + + +def _spark_to_struct(native_df: NativeSQLFrame) -> NativeSQLFrame: # pragma: no cover + """Convert pyspark MAP columns to proper struct columns. + + PySpark natively maps dict input to MAP, so we need to + reconstruct the struct columns with the correct types before casting. + """ + _tmp_nw_compliant_frame = nw.from_native(native_df)._compliant_frame + F = _tmp_nw_compliant_frame._F # type: ignore[attr-defined] + T = _tmp_nw_compliant_frame._native_dtypes # type: ignore[attr-defined] # noqa: N806 + + return native_df.withColumns( + { + "user": F.struct( + F.col("user.id").cast(T.IntegerType()).alias("id"), + F.col("user.name").cast(T.StringType()).alias("name"), + ), + "psw": F.struct( + F.col("psw.hash").cast(T.StringType()).alias("hash"), + F.col("psw.ts").cast(T.TimestampType()).alias("ts"), + ), + } + ) + def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(backend in str(constructor) for backend in ("dask",)): + if any(backend in str(constructor) for backend in ("dask", "sqlframe")): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor) and ( @@ -21,25 +62,54 @@ def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) - ): pytest.skip() - data = {"user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}]} - dtype = nw.Struct({"id": nw.Int16(), "name": nw.String()}) - df = nw.from_native(constructor(data)).select(user=nw.col("user").cast(dtype)) + native_df = constructor(data) + if "spark" in str(constructor): # pragma: no cover + native_df = _spark_to_struct(cast("NativeSQLFrame", native_df)) + + df = nw.from_native(native_df).select(user=nw.col("user").cast(user_dtype)) result = df.select(nw.col("user").struct.unnest()) expected = {"id": [0, 1], "name": ["john", "jane"]} assert_equal_data(result, expected) +def test_unnest_expr_multi( + request: pytest.FixtureRequest, constructor: Constructor +) -> None: + if any(backend in str(constructor) for backend in ("dask", "sqlframe")): + request.applymarker(pytest.mark.xfail) + + if "pandas" in str(constructor) and ( + PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0) + ): + pytest.skip() + + native_df = constructor(data) + if "spark" in str(constructor): # pragma: no cover + native_df = _spark_to_struct(cast("NativeSQLFrame", native_df)) + + df = nw.from_native(native_df).select( + user=nw.col("user").cast(user_dtype), psw=nw.col("psw").cast(psw_dtype) + ) + + result = df.select(nw.col("user", "psw").struct.unnest()) + expected = { + "id": [0, 1], + "name": ["john", "jane"], + "hash": ["fake-hash-1", "fake-hash-2"], + "ts": [datetime(2026, 1, 1, 0, 0), datetime(2026, 1, 2, 0, 0)], + } + assert_equal_data(result, expected) + + def test_unnest_series(constructor_eager: ConstructorEager) -> None: if "pandas" in str(constructor_eager) and ( PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0) ): pytest.skip() - data = {"user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}]} - dtype = nw.Struct({"id": nw.Int16(), "name": nw.String()}) df = nw.from_native(constructor_eager(data), eager_only=True).select( - user=nw.col("user").cast(dtype) + user=nw.col("user").cast(user_dtype) ) result = df.get_column("user").struct.unnest() From 36aeeb7ac58af3b78cb55922277f6acfae492296 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Apr 2026 16:42:18 +0200 Subject: [PATCH 06/10] simplify pyspark tests via nw.struct --- narwhals/_spark_like/expr_struct.py | 2 +- narwhals/_spark_like/utils.py | 14 ++--- tests/expr_and_series/cast_test.py | 40 ++----------- tests/expr_and_series/struct_/unnest_test.py | 60 ++++---------------- 4 files changed, 24 insertions(+), 92 deletions(-) diff --git a/narwhals/_spark_like/expr_struct.py b/narwhals/_spark_like/expr_struct.py index 12df7f6485..35ab65ee38 100644 --- a/narwhals/_spark_like/expr_struct.py +++ b/narwhals/_spark_like/expr_struct.py @@ -22,7 +22,7 @@ def func(expr: Column) -> Column: return self.compliant._with_elementwise(func).alias(name) - def unnest(self) -> SparkLikeExpr: # pragma: no cover + def unnest(self) -> SparkLikeExpr: compliant = self.compliant def func(df: SparkLikeLazyFrame) -> list[Column]: diff --git a/narwhals/_spark_like/utils.py b/narwhals/_spark_like/utils.py index e0e0c8c857..d7ac768928 100644 --- a/narwhals/_spark_like/utils.py +++ b/narwhals/_spark_like/utils.py @@ -2,7 +2,7 @@ import operator from collections.abc import Callable -from functools import lru_cache +from functools import lru_cache, partial from importlib import import_module from operator import attrgetter from types import ModuleType @@ -182,15 +182,13 @@ def narwhals_to_native_dtype( # noqa: C901 return native.ArrayType( elementType=narwhals_to_native_dtype(dtype.inner, version, native, session) ) - if isinstance_or_issubclass(dtype, dtypes.Struct): # pragma: no cover + if isinstance_or_issubclass(dtype, dtypes.Struct): + to_native = partial( + narwhals_to_native_dtype, version=version, spark_types=native, session=session + ) return native.StructType( fields=[ - native.StructField( - name=field.name, - dataType=narwhals_to_native_dtype( - field.dtype, version, native, session - ), - ) + native.StructField(name=field.name, dataType=to_native(field.dtype)) for field in dtype.fields ] ) diff --git a/tests/expr_and_series/cast_test.py b/tests/expr_and_series/cast_test.py index cfadaff347..e5e878888a 100644 --- a/tests/expr_and_series/cast_test.py +++ b/tests/expr_and_series/cast_test.py @@ -1,7 +1,7 @@ from __future__ import annotations from datetime import datetime, time, timedelta, timezone -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING import pytest @@ -19,7 +19,6 @@ if TYPE_CHECKING: from collections.abc import Mapping - from narwhals._native import NativeSQLFrame from narwhals.typing import NonNestedDType DATA = { @@ -270,7 +269,7 @@ def test_cast_datetime_utc( def test_cast_struct(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(backend in str(constructor) for backend in ("dask", "cudf", "sqlframe")): + if any(backend in str(constructor) for backend in ("dask", "cudf")): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor): @@ -278,40 +277,13 @@ def test_cast_struct(request: pytest.FixtureRequest, constructor: Constructor) - pytest.skip() pytest.importorskip("pyarrow") - data = { - "a": [{"movie ": "Cars", "rating": 4.5}, {"movie ": "Toy Story", "rating": 4.9}] - } + data = {"movie": ["Cars", "Toy Story"], "rating": [4.5, 4.9]} native_df = constructor(data) - # NOTE: This branch needs to be rewritten to **not depend** on private `SparkLikeLazyFrame` properties - if "spark" in str(constructor): # pragma: no cover - # Special handling for pyspark as it natively maps the input to - # a column of type MAP - native_ldf = cast("NativeSQLFrame", native_df) - _tmp_nw_compliant_frame = nw.from_native(native_ldf)._compliant_frame - F = _tmp_nw_compliant_frame._F # type: ignore[attr-defined] - T = _tmp_nw_compliant_frame._native_dtypes # type: ignore[attr-defined] # noqa: N806 - - native_ldf = native_ldf.withColumn( - "a", - F.struct( - F.col("a.movie ").cast(T.StringType()).alias("movie "), - F.col("a.rating").cast(T.DoubleType()).alias("rating"), - ), - ) - assert nw.from_native(native_ldf).collect_schema() == nw.Schema( - { - "a": nw.Struct( - [nw.Field("movie ", nw.String()), nw.Field("rating", nw.Float64())] - ) - } - ) - native_df = native_ldf - - dtype = nw.Struct([nw.Field("movie ", nw.String()), nw.Field("rating", nw.Float32())]) - result = nw.from_native(native_df).select(nw.col("a").cast(dtype)).lazy().collect() - assert result.schema == {"a": dtype} + dtype = nw.Struct([nw.Field("movie", nw.String()), nw.Field("rating", nw.Float32())]) + result = nw.from_native(native_df).select(a=nw.struct("movie", "rating").cast(dtype)) + assert result.collect_schema() == {"a": dtype} def test_raise_if_polars_dtype(constructor: Constructor) -> None: diff --git a/tests/expr_and_series/struct_/unnest_test.py b/tests/expr_and_series/struct_/unnest_test.py index c56e3c8c2a..7d7ef9cfaf 100644 --- a/tests/expr_and_series/struct_/unnest_test.py +++ b/tests/expr_and_series/struct_/unnest_test.py @@ -1,7 +1,6 @@ from __future__ import annotations from datetime import datetime -from typing import TYPE_CHECKING, cast import pytest @@ -14,47 +13,22 @@ assert_equal_data, ) -if TYPE_CHECKING: - from narwhals._native import NativeSQLFrame - data = { - "user": [{"id": 0, "name": "john"}, {"id": 1, "name": "jane"}], - "psw": [ - {"hash": "fake-hash-1", "ts": datetime(2026, 1, 1, 0, 0)}, - {"hash": "fake-hash-2", "ts": datetime(2026, 1, 2, 0, 0)}, - ], + "id": [0, 1], + "name": ["john", "jane"], + "hash": ["fake-hash-1", "fake-hash-2"], + "ts": [datetime(2026, 1, 1, 0, 0), datetime(2026, 1, 2, 0, 0)], } user_dtype = nw.Struct({"id": nw.Int16(), "name": nw.String()}) psw_dtype = nw.Struct({"hash": nw.String(), "ts": nw.Datetime()}) - -def _spark_to_struct(native_df: NativeSQLFrame) -> NativeSQLFrame: # pragma: no cover - """Convert pyspark MAP columns to proper struct columns. - - PySpark natively maps dict input to MAP, so we need to - reconstruct the struct columns with the correct types before casting. - """ - _tmp_nw_compliant_frame = nw.from_native(native_df)._compliant_frame - F = _tmp_nw_compliant_frame._F # type: ignore[attr-defined] - T = _tmp_nw_compliant_frame._native_dtypes # type: ignore[attr-defined] # noqa: N806 - - return native_df.withColumns( - { - "user": F.struct( - F.col("user.id").cast(T.IntegerType()).alias("id"), - F.col("user.name").cast(T.StringType()).alias("name"), - ), - "psw": F.struct( - F.col("psw.hash").cast(T.StringType()).alias("hash"), - F.col("psw.ts").cast(T.TimestampType()).alias("ts"), - ), - } - ) +user_expr = nw.struct("id", "name").cast(user_dtype).alias("user") +psw_expr = nw.struct("hash", "ts").cast(psw_dtype).alias("user") def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(backend in str(constructor) for backend in ("dask", "sqlframe")): + if any(backend in str(constructor) for backend in ("dask",)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor) and ( @@ -62,11 +36,7 @@ def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) - ): pytest.skip() - native_df = constructor(data) - if "spark" in str(constructor): # pragma: no cover - native_df = _spark_to_struct(cast("NativeSQLFrame", native_df)) - - df = nw.from_native(native_df).select(user=nw.col("user").cast(user_dtype)) + df = nw.from_native(constructor(data)).select(user=user_expr, psw=psw_expr) result = df.select(nw.col("user").struct.unnest()) expected = {"id": [0, 1], "name": ["john", "jane"]} @@ -76,7 +46,7 @@ def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) - def test_unnest_expr_multi( request: pytest.FixtureRequest, constructor: Constructor ) -> None: - if any(backend in str(constructor) for backend in ("dask", "sqlframe")): + if any(backend in str(constructor) for backend in ("dask",)): request.applymarker(pytest.mark.xfail) if "pandas" in str(constructor) and ( @@ -84,13 +54,7 @@ def test_unnest_expr_multi( ): pytest.skip() - native_df = constructor(data) - if "spark" in str(constructor): # pragma: no cover - native_df = _spark_to_struct(cast("NativeSQLFrame", native_df)) - - df = nw.from_native(native_df).select( - user=nw.col("user").cast(user_dtype), psw=nw.col("psw").cast(psw_dtype) - ) + df = nw.from_native(constructor(data)).select(user=user_expr, psw=psw_expr) result = df.select(nw.col("user", "psw").struct.unnest()) expected = { @@ -108,9 +72,7 @@ def test_unnest_series(constructor_eager: ConstructorEager) -> None: ): pytest.skip() - df = nw.from_native(constructor_eager(data), eager_only=True).select( - user=nw.col("user").cast(user_dtype) - ) + df = nw.from_native(constructor_eager(data), eager_only=True).select(user=user_expr) result = df.get_column("user").struct.unnest() expected = {"id": [0, 1], "name": ["john", "jane"]} From 81e7be5158f180ff89681c21c4ac6277e49f38ab Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Apr 2026 21:55:26 +0200 Subject: [PATCH 07/10] fixup old pyarrow versions --- narwhals/_arrow/series_struct.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/narwhals/_arrow/series_struct.py b/narwhals/_arrow/series_struct.py index 5f520e370b..c91a547770 100644 --- a/narwhals/_arrow/series_struct.py +++ b/narwhals/_arrow/series_struct.py @@ -22,5 +22,13 @@ def unnest(self) -> ArrowDataFrame: native = self.native struct_type: pa.StructType = native.type - table = pa.table({n: pc.struct_field(native, n) for n in struct_type.names}) + + # NOTE: struct_type.names is not available until pyarrow 18.0.0 + n_fields = struct_type.num_fields + table = pa.table( + { + struct_type.field(idx).name: pc.struct_field(native, idx) + for idx in range(n_fields) + } + ) return ArrowDataFrame.from_native(table, context=self.compliant) From e73ddd4b37920731e4987675e25afe2c69d07745 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Apr 2026 22:23:29 +0200 Subject: [PATCH 08/10] patch polars until possible --- narwhals/_polars/expr.py | 16 +++++++++++++++- tests/expr_and_series/struct_/unnest_test.py | 7 +++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/narwhals/_polars/expr.py b/narwhals/_polars/expr.py index eaca8fcacf..8c70924214 100644 --- a/narwhals/_polars/expr.py +++ b/narwhals/_polars/expr.py @@ -522,4 +522,18 @@ def contains(self, item: Any) -> PolarsExpr: class PolarsExprStructNamespace( PolarsExprNamespace, PolarsStructNamespace[PolarsExpr, pl.Expr] ): - unnest: Method[PolarsExpr] + def unnest(self) -> PolarsExpr: + native = self.native + pl_version = self._expr._backend_version + if pl_version >= (1, 10, 0): + result = native.struct.unnest() + elif pl_version >= (0, 20, 30): # pragma: no cover + result = native.struct.field("*") + else: # pragma: no cover + found = ".".join(f"{d}" for d in pl_version) + msg = ( + "`Expr.struct.unnest` is only available in 'polars>=0.20.30',\n" + f"found version {found!r}." + ) + raise NotImplementedError(msg) + return self.compliant._with_native(result) diff --git a/tests/expr_and_series/struct_/unnest_test.py b/tests/expr_and_series/struct_/unnest_test.py index 7d7ef9cfaf..ebdb24f433 100644 --- a/tests/expr_and_series/struct_/unnest_test.py +++ b/tests/expr_and_series/struct_/unnest_test.py @@ -7,6 +7,7 @@ import narwhals as nw from tests.utils import ( PANDAS_VERSION, + POLARS_VERSION, PYARROW_VERSION, Constructor, ConstructorEager, @@ -36,6 +37,9 @@ def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) - ): pytest.skip() + if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 30): + pytest.skip() + df = nw.from_native(constructor(data)).select(user=user_expr, psw=psw_expr) result = df.select(nw.col("user").struct.unnest()) @@ -54,6 +58,9 @@ def test_unnest_expr_multi( ): pytest.skip() + if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 30): + pytest.skip() + df = nw.from_native(constructor(data)).select(user=user_expr, psw=psw_expr) result = df.select(nw.col("user", "psw").struct.unnest()) From 34af1de0622d3b66ab252df1b7a9e7751a40a267 Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Apr 2026 22:42:07 +0200 Subject: [PATCH 09/10] skip old duckdb due to struct builder --- tests/expr_and_series/struct_/unnest_test.py | 35 ++++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tests/expr_and_series/struct_/unnest_test.py b/tests/expr_and_series/struct_/unnest_test.py index ebdb24f433..fe7cfcd6b6 100644 --- a/tests/expr_and_series/struct_/unnest_test.py +++ b/tests/expr_and_series/struct_/unnest_test.py @@ -6,6 +6,7 @@ import narwhals as nw from tests.utils import ( + DUCKDB_VERSION, PANDAS_VERSION, POLARS_VERSION, PYARROW_VERSION, @@ -14,6 +15,19 @@ assert_equal_data, ) + +def skip_if_old_version(constructor: Constructor | ConstructorEager) -> None: + if ( + ( + "pandas" in str(constructor) + and (PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0)) + ) + or ("polars" in str(constructor) and POLARS_VERSION < (0, 20, 30)) + or ("duckdb" in str(constructor) and DUCKDB_VERSION < (1, 3)) + ): + pytest.skip() + + data = { "id": [0, 1], "name": ["john", "jane"], @@ -32,13 +46,7 @@ def test_unnest_expr(request: pytest.FixtureRequest, constructor: Constructor) - if any(backend in str(constructor) for backend in ("dask",)): request.applymarker(pytest.mark.xfail) - if "pandas" in str(constructor) and ( - PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0) - ): - pytest.skip() - - if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 30): - pytest.skip() + skip_if_old_version(constructor) df = nw.from_native(constructor(data)).select(user=user_expr, psw=psw_expr) @@ -53,13 +61,7 @@ def test_unnest_expr_multi( if any(backend in str(constructor) for backend in ("dask",)): request.applymarker(pytest.mark.xfail) - if "pandas" in str(constructor) and ( - PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0) - ): - pytest.skip() - - if "polars" in str(constructor) and POLARS_VERSION < (0, 20, 30): - pytest.skip() + skip_if_old_version(constructor) df = nw.from_native(constructor(data)).select(user=user_expr, psw=psw_expr) @@ -74,10 +76,7 @@ def test_unnest_expr_multi( def test_unnest_series(constructor_eager: ConstructorEager) -> None: - if "pandas" in str(constructor_eager) and ( - PANDAS_VERSION < (2, 2, 0) or PYARROW_VERSION == (0, 0, 0) - ): - pytest.skip() + skip_if_old_version(constructor_eager) df = nw.from_native(constructor_eager(data), eager_only=True).select(user=user_expr) From ca453953d89c27ded6a4b28502425f024668525b Mon Sep 17 00:00:00 2001 From: FBruzzesi Date: Tue, 14 Apr 2026 22:49:09 +0200 Subject: [PATCH 10/10] fixup pandas with old pyarrow --- narwhals/_pandas_like/series_struct.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/narwhals/_pandas_like/series_struct.py b/narwhals/_pandas_like/series_struct.py index 9e4cd87a5e..bf9e26fef1 100644 --- a/narwhals/_pandas_like/series_struct.py +++ b/narwhals/_pandas_like/series_struct.py @@ -22,7 +22,16 @@ def unnest(self) -> PandasLikeDataFrame: from narwhals._pandas_like.dataframe import PandasLikeDataFrame native = self.native - pa_type: pa.StructType = native.dtype.pyarrow_dtype + struct_type: pa.StructType = native.dtype.pyarrow_dtype + + # NOTE: struct_type.names is not available until pyarrow 18.0.0 + n_fields = struct_type.num_fields ns = self.implementation.to_native_namespace() - result = ns.DataFrame({name: native.struct.field(name) for name in pa_type.names}) + + result = ns.DataFrame( + { + struct_type.field(idx).name: native.struct.field(idx) + for idx in range(n_fields) + } + ) return PandasLikeDataFrame.from_native(result, context=self.compliant)