Skip to content

Commit fc76525

Browse files
timsaucerclaude
andcommitted
Improve map function docstrings
- Add examples for all three map() calling conventions - Use clearer descriptions instead of jargon (no "zipped" or "variadic") - Break map_keys/map_values/map_extract/map_entries examples into two steps: create the map column first, then call the function Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1688aef commit fc76525

File tree

1 file changed

+36
-13
lines changed

1 file changed

+36
-13
lines changed

python/datafusion/functions.py

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3389,19 +3389,41 @@ def map(*args: Any) -> Expr:
33893389
Supports three calling conventions:
33903390
33913391
- ``map({"a": 1, "b": 2})`` — from a Python dictionary.
3392-
- ``map([keys], [values])`` — two lists that get zipped.
3393-
- ``map(k1, v1, k2, v2, ...)`` — variadic key-value pairs.
3392+
- ``map([keys], [values])`` — from a list of keys and a list of
3393+
their associated values. Both lists must be the same length.
3394+
- ``map(k1, v1, k2, v2, ...)`` — from alternating keys and their
3395+
associated values.
33943396
33953397
Keys and values that are not already :py:class:`~datafusion.expr.Expr`
33963398
are automatically converted to literal expressions.
33973399
33983400
Examples:
3401+
From a dictionary:
3402+
33993403
>>> ctx = dfn.SessionContext()
34003404
>>> df = ctx.from_pydict({"a": [1]})
34013405
>>> result = df.select(
34023406
... dfn.functions.map({"a": 1, "b": 2}).alias("m"))
34033407
>>> result.collect_column("m")[0].as_py()
34043408
[('a', 1), ('b', 2)]
3409+
3410+
From two lists:
3411+
3412+
>>> df = ctx.from_pydict({"key": ["x", "y"], "val": [10, 20]})
3413+
>>> df = df.select(
3414+
... dfn.functions.map(
3415+
... [dfn.col("key")], [dfn.col("val")]
3416+
... ).alias("m"))
3417+
>>> df.collect_column("m")[0].as_py()
3418+
[('x', 10)]
3419+
3420+
From alternating keys and values:
3421+
3422+
>>> df = ctx.from_pydict({"a": [1]})
3423+
>>> result = df.select(
3424+
... dfn.functions.map("x", 1, "y", 2).alias("m"))
3425+
>>> result.collect_column("m")[0].as_py()
3426+
[('x', 1), ('y', 2)]
34053427
"""
34063428
if len(args) == 1 and isinstance(args[0], dict):
34073429
key_list = list(args[0].keys())
@@ -3440,10 +3462,10 @@ def map_keys(map: Expr) -> Expr:
34403462
Examples:
34413463
>>> ctx = dfn.SessionContext()
34423464
>>> df = ctx.from_pydict({"a": [1]})
3465+
>>> df = df.select(
3466+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34433467
>>> result = df.select(
3444-
... dfn.functions.map_keys(
3445-
... dfn.functions.map({"x": 1, "y": 2})
3446-
... ).alias("keys"))
3468+
... dfn.functions.map_keys(dfn.col("m")).alias("keys"))
34473469
>>> result.collect_column("keys")[0].as_py()
34483470
['x', 'y']
34493471
"""
@@ -3456,10 +3478,10 @@ def map_values(map: Expr) -> Expr:
34563478
Examples:
34573479
>>> ctx = dfn.SessionContext()
34583480
>>> df = ctx.from_pydict({"a": [1]})
3481+
>>> df = df.select(
3482+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34593483
>>> result = df.select(
3460-
... dfn.functions.map_values(
3461-
... dfn.functions.map({"x": 1, "y": 2})
3462-
... ).alias("vals"))
3484+
... dfn.functions.map_values(dfn.col("m")).alias("vals"))
34633485
>>> result.collect_column("vals")[0].as_py()
34643486
[1, 2]
34653487
"""
@@ -3472,10 +3494,11 @@ def map_extract(map: Expr, key: Expr) -> Expr:
34723494
Examples:
34733495
>>> ctx = dfn.SessionContext()
34743496
>>> df = ctx.from_pydict({"a": [1]})
3497+
>>> df = df.select(
3498+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34753499
>>> result = df.select(
34763500
... dfn.functions.map_extract(
3477-
... dfn.functions.map({"x": 1, "y": 2}),
3478-
... dfn.lit("x"),
3501+
... dfn.col("m"), dfn.lit("x")
34793502
... ).alias("val"))
34803503
>>> result.collect_column("val")[0].as_py()
34813504
[1]
@@ -3489,10 +3512,10 @@ def map_entries(map: Expr) -> Expr:
34893512
Examples:
34903513
>>> ctx = dfn.SessionContext()
34913514
>>> df = ctx.from_pydict({"a": [1]})
3515+
>>> df = df.select(
3516+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34923517
>>> result = df.select(
3493-
... dfn.functions.map_entries(
3494-
... dfn.functions.map({"x": 1, "y": 2})
3495-
... ).alias("entries"))
3518+
... dfn.functions.map_entries(dfn.col("m")).alias("entries"))
34963519
>>> result.collect_column("entries")[0].as_py()
34973520
[{'key': 'x', 'value': 1}, {'key': 'y', 'value': 2}]
34983521
"""

0 commit comments

Comments
 (0)