Skip to content

Commit dbe4d2e

Browse files
timsaucerclaude
andcommitted
Remove map() in favor of make_map(), fix docstrings, add validation
- Remove map() function that shadowed Python builtin; make_map() is now the sole entry point for creating map expressions - Fix map_extract/element_at docstrings: missing keys return [None], not an empty list (matches actual upstream behavior) - Add length validation for the two-list calling convention - Update all tests and docstring examples accordingly Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent fc76525 commit dbe4d2e

File tree

2 files changed

+43
-44
lines changed

2 files changed

+43
-44
lines changed

python/datafusion/functions.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3383,15 +3383,15 @@ def empty(array: Expr) -> Expr:
33833383
# map functions
33843384

33853385

3386-
def map(*args: Any) -> Expr:
3386+
def make_map(*args: Any) -> Expr:
33873387
"""Returns a map expression.
33883388
33893389
Supports three calling conventions:
33903390
3391-
- ``map({"a": 1, "b": 2})`` — from a Python dictionary.
3392-
- ``map([keys], [values])`` — from a list of keys and a list of
3391+
- ``make_map({"a": 1, "b": 2})`` — from a Python dictionary.
3392+
- ``make_map([keys], [values])`` — from a list of keys and a list of
33933393
their associated values. Both lists must be the same length.
3394-
- ``map(k1, v1, k2, v2, ...)`` — from alternating keys and their
3394+
- ``make_map(k1, v1, k2, v2, ...)`` — from alternating keys and their
33953395
associated values.
33963396
33973397
Keys and values that are not already :py:class:`~datafusion.expr.Expr`
@@ -3403,15 +3403,15 @@ def map(*args: Any) -> Expr:
34033403
>>> ctx = dfn.SessionContext()
34043404
>>> df = ctx.from_pydict({"a": [1]})
34053405
>>> result = df.select(
3406-
... dfn.functions.map({"a": 1, "b": 2}).alias("m"))
3406+
... dfn.functions.make_map({"a": 1, "b": 2}).alias("m"))
34073407
>>> result.collect_column("m")[0].as_py()
34083408
[('a', 1), ('b', 2)]
34093409
34103410
From two lists:
34113411
34123412
>>> df = ctx.from_pydict({"key": ["x", "y"], "val": [10, 20]})
34133413
>>> df = df.select(
3414-
... dfn.functions.map(
3414+
... dfn.functions.make_map(
34153415
... [dfn.col("key")], [dfn.col("val")]
34163416
... ).alias("m"))
34173417
>>> df.collect_column("m")[0].as_py()
@@ -3421,7 +3421,7 @@ def map(*args: Any) -> Expr:
34213421
34223422
>>> df = ctx.from_pydict({"a": [1]})
34233423
>>> result = df.select(
3424-
... dfn.functions.map("x", 1, "y", 2).alias("m"))
3424+
... dfn.functions.make_map("x", 1, "y", 2).alias("m"))
34253425
>>> result.collect_column("m")[0].as_py()
34263426
[('x', 1), ('y', 2)]
34273427
"""
@@ -3433,37 +3433,34 @@ def map(*args: Any) -> Expr:
34333433
and isinstance(args[0], list)
34343434
and isinstance(args[1], list)
34353435
):
3436+
if len(args[0]) != len(args[1]):
3437+
msg = "make_map requires key and value lists to be the same length"
3438+
raise ValueError(msg)
34363439
key_list = args[0]
34373440
value_list = args[1]
34383441
elif len(args) >= 2 and len(args) % 2 == 0: # noqa: PLR2004
34393442
key_list = list(args[0::2])
34403443
value_list = list(args[1::2])
34413444
else:
3442-
msg = "map expects a dict, two lists, or an even number of key-value arguments"
3445+
msg = (
3446+
"make_map expects a dict, two lists, or an even number of "
3447+
"key-value arguments"
3448+
)
34433449
raise ValueError(msg)
34443450

34453451
key_exprs = [k if isinstance(k, Expr) else Expr.literal(k) for k in key_list]
34463452
val_exprs = [v if isinstance(v, Expr) else Expr.literal(v) for v in value_list]
34473453
return Expr(f.make_map([k.expr for k in key_exprs], [v.expr for v in val_exprs]))
34483454

34493455

3450-
def make_map(*args: Any) -> Expr:
3451-
"""Returns a map expression.
3452-
3453-
See Also:
3454-
This is an alias for :py:func:`map`.
3455-
"""
3456-
return map(*args)
3457-
3458-
34593456
def map_keys(map: Expr) -> Expr:
34603457
"""Returns a list of all keys in the map.
34613458
34623459
Examples:
34633460
>>> ctx = dfn.SessionContext()
34643461
>>> df = ctx.from_pydict({"a": [1]})
34653462
>>> df = df.select(
3466-
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
3463+
... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
34673464
>>> result = df.select(
34683465
... dfn.functions.map_keys(dfn.col("m")).alias("keys"))
34693466
>>> result.collect_column("keys")[0].as_py()
@@ -3479,7 +3476,7 @@ def map_values(map: Expr) -> Expr:
34793476
>>> ctx = dfn.SessionContext()
34803477
>>> df = ctx.from_pydict({"a": [1]})
34813478
>>> df = df.select(
3482-
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
3479+
... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
34833480
>>> result = df.select(
34843481
... dfn.functions.map_values(dfn.col("m")).alias("vals"))
34853482
>>> result.collect_column("vals")[0].as_py()
@@ -3489,13 +3486,15 @@ def map_values(map: Expr) -> Expr:
34893486

34903487

34913488
def map_extract(map: Expr, key: Expr) -> Expr:
3492-
"""Returns the value for the given key in the map, or an empty list if absent.
3489+
"""Return the value for a given key in the map.
3490+
3491+
Returns ``[None]`` if the key is absent.
34933492
34943493
Examples:
34953494
>>> ctx = dfn.SessionContext()
34963495
>>> df = ctx.from_pydict({"a": [1]})
34973496
>>> df = df.select(
3498-
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
3497+
... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
34993498
>>> result = df.select(
35003499
... dfn.functions.map_extract(
35013500
... dfn.col("m"), dfn.lit("x")
@@ -3513,7 +3512,7 @@ def map_entries(map: Expr) -> Expr:
35133512
>>> ctx = dfn.SessionContext()
35143513
>>> df = ctx.from_pydict({"a": [1]})
35153514
>>> df = df.select(
3516-
... dfn.functions.map({"x": 1, "y": 2}).alias("m"))
3515+
... dfn.functions.make_map({"x": 1, "y": 2}).alias("m"))
35173516
>>> result = df.select(
35183517
... dfn.functions.map_entries(dfn.col("m")).alias("entries"))
35193518
>>> result.collect_column("entries")[0].as_py()
@@ -3523,7 +3522,9 @@ def map_entries(map: Expr) -> Expr:
35233522

35243523

35253524
def element_at(map: Expr, key: Expr) -> Expr:
3526-
"""Returns the value for the given key in the map, or an empty list if absent.
3525+
"""Return the value for a given key in the map.
3526+
3527+
Returns ``[None]`` if the key is absent.
35273528
35283529
See Also:
35293530
This is an alias for :py:func:`map_extract`.

python/tests/test_functions.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,7 @@ def test_map_from_dict():
673673
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
674674
df = ctx.create_dataframe([[batch]])
675675

676-
result = df.select(f.map({"x": 1, "y": 2}).alias("m")).collect()[0].column(0)
676+
result = df.select(f.make_map({"x": 1, "y": 2}).alias("m")).collect()[0].column(0)
677677
assert result[0].as_py() == [("x", 1), ("y", 2)]
678678

679679

@@ -683,7 +683,7 @@ def test_map_from_dict_with_expr_values():
683683
df = ctx.create_dataframe([[batch]])
684684

685685
result = (
686-
df.select(f.map({"x": literal(1), "y": literal(2)}).alias("m"))
686+
df.select(f.make_map({"x": literal(1), "y": literal(2)}).alias("m"))
687687
.collect()[0]
688688
.column(0)
689689
)
@@ -701,7 +701,7 @@ def test_map_from_two_lists():
701701
)
702702
df = ctx.create_dataframe([[batch]])
703703

704-
m = f.map([column("keys")], [column("vals")])
704+
m = f.make_map([column("keys")], [column("vals")])
705705
result = df.select(f.map_keys(m).alias("k")).collect()[0].column(0)
706706
for i, expected in enumerate(["k1", "k2", "k3"]):
707707
assert result[i].as_py() == [expected]
@@ -716,7 +716,7 @@ def test_map_from_variadic_pairs():
716716
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
717717
df = ctx.create_dataframe([[batch]])
718718

719-
result = df.select(f.map("x", 1, "y", 2).alias("m")).collect()[0].column(0)
719+
result = df.select(f.make_map("x", 1, "y", 2).alias("m")).collect()[0].column(0)
720720
assert result[0].as_py() == [("x", 1), ("y", 2)]
721721

722722

@@ -726,33 +726,31 @@ def test_map_variadic_with_exprs():
726726
df = ctx.create_dataframe([[batch]])
727727

728728
result = (
729-
df.select(f.map(literal("x"), literal(1), literal("y"), literal(2)).alias("m"))
729+
df.select(
730+
f.make_map(literal("x"), literal(1), literal("y"), literal(2)).alias("m")
731+
)
730732
.collect()[0]
731733
.column(0)
732734
)
733735
assert result[0].as_py() == [("x", 1), ("y", 2)]
734736

735737

736-
def test_map_odd_args_raises():
737-
with pytest.raises(ValueError, match="map expects"):
738-
f.map("x", 1, "y")
738+
def test_make_map_odd_args_raises():
739+
with pytest.raises(ValueError, match="make_map expects"):
740+
f.make_map("x", 1, "y")
739741

740742

741-
def test_make_map_is_alias():
742-
ctx = SessionContext()
743-
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
744-
df = ctx.create_dataframe([[batch]])
745-
746-
result = df.select(f.make_map({"x": 1, "y": 2}).alias("m")).collect()[0].column(0)
747-
assert result[0].as_py() == [("x", 1), ("y", 2)]
743+
def test_make_map_mismatched_lengths():
744+
with pytest.raises(ValueError, match="same length"):
745+
f.make_map(["a", "b"], [1])
748746

749747

750748
def test_map_keys():
751749
ctx = SessionContext()
752750
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
753751
df = ctx.create_dataframe([[batch]])
754752

755-
m = f.map({"x": 1, "y": 2})
753+
m = f.make_map({"x": 1, "y": 2})
756754
result = df.select(f.map_keys(m).alias("keys")).collect()[0].column(0)
757755
assert result[0].as_py() == ["x", "y"]
758756

@@ -762,7 +760,7 @@ def test_map_values():
762760
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
763761
df = ctx.create_dataframe([[batch]])
764762

765-
m = f.map({"x": 1, "y": 2})
763+
m = f.make_map({"x": 1, "y": 2})
766764
result = df.select(f.map_values(m).alias("vals")).collect()[0].column(0)
767765
assert result[0].as_py() == [1, 2]
768766

@@ -772,7 +770,7 @@ def test_map_extract():
772770
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
773771
df = ctx.create_dataframe([[batch]])
774772

775-
m = f.map({"x": 1, "y": 2})
773+
m = f.make_map({"x": 1, "y": 2})
776774
result = (
777775
df.select(f.map_extract(m, literal("x")).alias("val")).collect()[0].column(0)
778776
)
@@ -784,7 +782,7 @@ def test_map_extract_missing_key():
784782
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
785783
df = ctx.create_dataframe([[batch]])
786784

787-
m = f.map({"x": 1})
785+
m = f.make_map({"x": 1})
788786
result = (
789787
df.select(f.map_extract(m, literal("z")).alias("val")).collect()[0].column(0)
790788
)
@@ -796,7 +794,7 @@ def test_map_entries():
796794
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
797795
df = ctx.create_dataframe([[batch]])
798796

799-
m = f.map({"x": 1, "y": 2})
797+
m = f.make_map({"x": 1, "y": 2})
800798
result = df.select(f.map_entries(m).alias("entries")).collect()[0].column(0)
801799
assert result[0].as_py() == [
802800
{"key": "x", "value": 1},
@@ -809,7 +807,7 @@ def test_element_at():
809807
batch = pa.RecordBatch.from_arrays([pa.array([1])], names=["a"])
810808
df = ctx.create_dataframe([[batch]])
811809

812-
m = f.map({"a": 10, "b": 20})
810+
m = f.make_map({"a": 10, "b": 20})
813811
result = (
814812
df.select(f.element_at(m, literal("b")).alias("val")).collect()[0].column(0)
815813
)

0 commit comments

Comments
 (0)