Skip to content

Commit a662e18

Browse files
timsaucerclaude
andcommitted
Add doctest examples and fix docstring style for new scalar functions
Replace Args/Returns sections with doctest Examples blocks for arrow_metadata, get_field, union_extract, union_tag, and version to match existing codebase conventions. Simplify row to alias-style docstring with See Also reference. Document that arrow_cast accepts both str and Expr for data_type. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0f50a31 commit a662e18

File tree

1 file changed

+75
-31
lines changed

1 file changed

+75
-31
lines changed

python/datafusion/functions.py

Lines changed: 75 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2637,6 +2637,8 @@ def arrow_typeof(arg: Expr) -> Expr:
26372637
def arrow_cast(expr: Expr, data_type: Expr | str) -> Expr:
26382638
"""Casts an expression to a specified data type.
26392639
2640+
The ``data_type`` can be a string or an ``Expr``.
2641+
26402642
Examples:
26412643
>>> ctx = dfn.SessionContext()
26422644
>>> df = ctx.from_pydict({"a": [1]})
@@ -2657,12 +2659,26 @@ def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
26572659
If called with one argument, returns a Map of all metadata key-value pairs.
26582660
If called with two arguments, returns the value for the specified metadata key.
26592661
2660-
Args:
2661-
expr: An expression whose metadata to retrieve.
2662-
key: Optional metadata key to look up. Can be a string or an Expr.
2662+
Examples:
2663+
>>> import pyarrow as pa
2664+
>>> field = pa.field("val", pa.int64(), metadata={"k": "v"})
2665+
>>> schema = pa.schema([field])
2666+
>>> batch = pa.RecordBatch.from_arrays([pa.array([1])], schema=schema)
2667+
>>> ctx = dfn.SessionContext()
2668+
>>> df = ctx.create_dataframe([[batch]])
2669+
>>> result = df.select(
2670+
... dfn.functions.arrow_metadata(dfn.col("val")).alias("meta")
2671+
... )
2672+
>>> ("k", "v") in result.collect_column("meta")[0].as_py()
2673+
True
26632674
2664-
Returns:
2665-
A Map of metadata or a specific metadata value.
2675+
>>> result = df.select(
2676+
... dfn.functions.arrow_metadata(
2677+
... dfn.col("val"), key="k"
2678+
... ).alias("meta_val")
2679+
... )
2680+
>>> result.collect_column("meta_val")[0].as_py()
2681+
'v'
26662682
"""
26672683
if key is None:
26682684
return Expr(f.arrow_metadata(expr.expr))
@@ -2674,12 +2690,20 @@ def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr:
26742690
def get_field(expr: Expr, name: Expr | str) -> Expr:
26752691
"""Extracts a field from a struct or map by name.
26762692
2677-
Args:
2678-
expr: A struct or map expression.
2679-
name: The field name to extract.
2680-
2681-
Returns:
2682-
The value of the named field.
2693+
Examples:
2694+
>>> ctx = dfn.SessionContext()
2695+
>>> df = ctx.from_pydict({"a": [1], "b": [2]})
2696+
>>> df = df.with_column(
2697+
... "s",
2698+
... dfn.functions.named_struct(
2699+
... [("x", dfn.col("a")), ("y", dfn.col("b"))]
2700+
... ),
2701+
... )
2702+
>>> result = df.select(
2703+
... dfn.functions.get_field(dfn.col("s"), "x").alias("x_val")
2704+
... )
2705+
>>> result.collect_column("x_val")[0].as_py()
2706+
1
26832707
"""
26842708
if isinstance(name, str):
26852709
name = Expr.string_literal(name)
@@ -2692,12 +2716,22 @@ def union_extract(union_expr: Expr, field_name: Expr | str) -> Expr:
26922716
Returns the value of the named field if it is the currently selected
26932717
variant, otherwise returns NULL.
26942718
2695-
Args:
2696-
union_expr: A union-typed expression.
2697-
field_name: The name of the field to extract.
2698-
2699-
Returns:
2700-
The extracted value or NULL.
2719+
Examples:
2720+
>>> import pyarrow as pa
2721+
>>> ctx = dfn.SessionContext()
2722+
>>> types = pa.array([0, 1, 0], type=pa.int8())
2723+
>>> offsets = pa.array([0, 0, 1], type=pa.int32())
2724+
>>> arr = pa.UnionArray.from_dense(
2725+
... types, offsets, [pa.array([1, 2]), pa.array(["hi"])],
2726+
... ["int", "str"], [0, 1],
2727+
... )
2728+
>>> batch = pa.RecordBatch.from_arrays([arr], names=["u"])
2729+
>>> df = ctx.create_dataframe([[batch]])
2730+
>>> result = df.select(
2731+
... dfn.functions.union_extract(dfn.col("u"), "int").alias("val")
2732+
... )
2733+
>>> result.collect_column("val").to_pylist()
2734+
[1, None, 2]
27012735
"""
27022736
if isinstance(field_name, str):
27032737
field_name = Expr.string_literal(field_name)
@@ -2707,34 +2741,44 @@ def union_extract(union_expr: Expr, field_name: Expr | str) -> Expr:
27072741
def union_tag(union_expr: Expr) -> Expr:
27082742
"""Returns the tag (active field name) of a union type.
27092743
2710-
Args:
2711-
union_expr: A union-typed expression.
2712-
2713-
Returns:
2714-
The name of the currently selected field in the union.
2744+
Examples:
2745+
>>> import pyarrow as pa
2746+
>>> ctx = dfn.SessionContext()
2747+
>>> types = pa.array([0, 1, 0], type=pa.int8())
2748+
>>> offsets = pa.array([0, 0, 1], type=pa.int32())
2749+
>>> arr = pa.UnionArray.from_dense(
2750+
... types, offsets, [pa.array([1, 2]), pa.array(["hi"])],
2751+
... ["int", "str"], [0, 1],
2752+
... )
2753+
>>> batch = pa.RecordBatch.from_arrays([arr], names=["u"])
2754+
>>> df = ctx.create_dataframe([[batch]])
2755+
>>> result = df.select(
2756+
... dfn.functions.union_tag(dfn.col("u")).alias("tag")
2757+
... )
2758+
>>> result.collect_column("tag").to_pylist()
2759+
['int', 'str', 'int']
27152760
"""
27162761
return Expr(f.union_tag(union_expr.expr))
27172762

27182763

27192764
def version() -> Expr:
27202765
"""Returns the DataFusion version string.
27212766
2722-
Returns:
2723-
A string describing the DataFusion version.
2767+
Examples:
2768+
>>> ctx = dfn.SessionContext()
2769+
>>> df = ctx.from_pydict({"a": [1]})
2770+
>>> result = df.select(dfn.functions.version().alias("v"))
2771+
>>> "Apache DataFusion" in result.collect_column("v")[0].as_py()
2772+
True
27242773
"""
27252774
return Expr(f.version())
27262775

27272776

27282777
def row(*args: Expr) -> Expr:
27292778
"""Returns a struct with the given arguments.
27302779
2731-
This is an alias for :py:func:`struct`.
2732-
2733-
Args:
2734-
args: The expressions to include in the struct.
2735-
2736-
Returns:
2737-
A struct expression.
2780+
See Also:
2781+
This is an alias for :py:func:`struct`.
27382782
"""
27392783
return struct(*args)
27402784

0 commit comments

Comments
 (0)