Skip to content

Commit 3b55137

Browse files
timsaucerclaude
andcommitted
Improve map function docstrings
- Add examples for all three map() calling conventions - Use clearer descriptions instead of jargon (no "zipped" or "variadic") - Break map_keys/map_values/map_extract/map_entries examples into two steps: create the map column first, then call the function Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 76abad5 commit 3b55137

File tree

1 file changed

+36
-13
lines changed

1 file changed

+36
-13
lines changed

python/datafusion/functions.py

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3354,19 +3354,41 @@ def map(*args: Any) -> Expr:
33543354
Supports three calling conventions:
33553355
33563356
- ``map({"a": 1, "b": 2})`` — from a Python dictionary.
3357-
- ``map([keys], [values])`` — two lists that get zipped.
3358-
- ``map(k1, v1, k2, v2, ...)`` — variadic key-value pairs.
3357+
- ``map([keys], [values])`` — from a list of keys and a list of
3358+
their associated values. Both lists must be the same length.
3359+
- ``map(k1, v1, k2, v2, ...)`` — from alternating keys and their
3360+
associated values.
33593361
33603362
Keys and values that are not already :py:class:`~datafusion.expr.Expr`
33613363
are automatically converted to literal expressions.
33623364
33633365
Examples:
3366+
From a dictionary:
3367+
33643368
>>> ctx = dfn.SessionContext()
33653369
>>> df = ctx.from_pydict({"a": [1]})
33663370
>>> result = df.select(
33673371
... dfn.functions.map({"a": 1, "b": 2}).alias("m"))
33683372
>>> result.collect_column("m")[0].as_py()
33693373
[('a', 1), ('b', 2)]
3374+
3375+
From two lists:
3376+
3377+
>>> df = ctx.from_pydict({"key": ["x", "y"], "val": [10, 20]})
3378+
>>> df = df.select(
3379+
... dfn.functions.map(
3380+
... [dfn.col("key")], [dfn.col("val")]
3381+
... ).alias("m"))
3382+
>>> df.collect_column("m")[0].as_py()
3383+
[('x', 10)]
3384+
3385+
From alternating keys and values:
3386+
3387+
>>> df = ctx.from_pydict({"a": [1]})
3388+
>>> result = df.select(
3389+
... dfn.functions.map("x", 1, "y", 2).alias("m"))
3390+
>>> result.collect_column("m")[0].as_py()
3391+
[('x', 1), ('y', 2)]
33703392
"""
33713393
if len(args) == 1 and isinstance(args[0], dict):
33723394
key_list = list(args[0].keys())
@@ -3405,10 +3427,10 @@ def map_keys(map: Expr) -> Expr:
34053427
Examples:
34063428
>>> ctx = dfn.SessionContext()
34073429
>>> df = ctx.from_pydict({"a": [1]})
3430+
>>> df = df.select(
3431+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34083432
>>> result = df.select(
3409-
... dfn.functions.map_keys(
3410-
... dfn.functions.map({"x": 1, "y": 2})
3411-
... ).alias("keys"))
3433+
... dfn.functions.map_keys(dfn.col("m")).alias("keys"))
34123434
>>> result.collect_column("keys")[0].as_py()
34133435
['x', 'y']
34143436
"""
@@ -3421,10 +3443,10 @@ def map_values(map: Expr) -> Expr:
34213443
Examples:
34223444
>>> ctx = dfn.SessionContext()
34233445
>>> df = ctx.from_pydict({"a": [1]})
3446+
>>> df = df.select(
3447+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34243448
>>> result = df.select(
3425-
... dfn.functions.map_values(
3426-
... dfn.functions.map({"x": 1, "y": 2})
3427-
... ).alias("vals"))
3449+
... dfn.functions.map_values(dfn.col("m")).alias("vals"))
34283450
>>> result.collect_column("vals")[0].as_py()
34293451
[1, 2]
34303452
"""
@@ -3437,10 +3459,11 @@ def map_extract(map: Expr, key: Expr) -> Expr:
34373459
Examples:
34383460
>>> ctx = dfn.SessionContext()
34393461
>>> df = ctx.from_pydict({"a": [1]})
3462+
>>> df = df.select(
3463+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34403464
>>> result = df.select(
34413465
... dfn.functions.map_extract(
3442-
... dfn.functions.map({"x": 1, "y": 2}),
3443-
... dfn.lit("x"),
3466+
... dfn.col("m"), dfn.lit("x")
34443467
... ).alias("val"))
34453468
>>> result.collect_column("val")[0].as_py()
34463469
[1]
@@ -3454,10 +3477,10 @@ def map_entries(map: Expr) -> Expr:
34543477
Examples:
34553478
>>> ctx = dfn.SessionContext()
34563479
>>> df = ctx.from_pydict({"a": [1]})
3480+
>>> df = df.select(
3481+
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
34573482
>>> result = df.select(
3458-
... dfn.functions.map_entries(
3459-
... dfn.functions.map({"x": 1, "y": 2})
3460-
... ).alias("entries"))
3483+
... dfn.functions.map_entries(dfn.col("m")).alias("entries"))
34613484
>>> result.collect_column("entries")[0].as_py()
34623485
[{'key': 'x', 'value': 1}, {'key': 'y', 'value': 2}]
34633486
"""

0 commit comments

Comments
 (0)