Skip to content

Commit a7c7de4

Browse files
timsaucerclaude
andcommitted
Add unit tests for new array/list functions and aliases
Tests cover all functions and aliases added in the previous commit: array_any_value, array_distance, array_max, array_min, array_reverse, arrays_zip, string_to_array, gen_series, generate_series, array_contains, list_contains, list_empty, list_pop_back, list_pop_front, list_has, list_has_all, list_has_any, and list_* aliases for the new functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b9d5f65 commit a7c7de4

File tree

1 file changed

+189
-0
lines changed

1 file changed

+189
-0
lines changed

python/tests/test_functions.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,3 +1469,192 @@ def test_coalesce(df):
14691469
assert result.column(0) == pa.array(
14701470
["Hello", "fallback", "!"], type=pa.string_view()
14711471
)
1472+
1473+
1474+
def test_array_any_value():
1475+
ctx = SessionContext()
1476+
df = ctx.from_pydict({"a": [[None, 2, 3], [None, None, None], [1, 2, 3]]})
1477+
result = df.select(f.array_any_value(column("a")).alias("v")).collect()
1478+
values = [row.as_py() for row in result[0].column(0)]
1479+
assert values[0] == 2
1480+
assert values[1] is None
1481+
assert values[2] == 1
1482+
1483+
1484+
def test_list_any_value():
1485+
ctx = SessionContext()
1486+
df = ctx.from_pydict({"a": [[None, 5]]})
1487+
result = df.select(f.list_any_value(column("a")).alias("v")).collect()
1488+
assert result[0].column(0)[0].as_py() == 5
1489+
1490+
1491+
def test_array_distance():
1492+
ctx = SessionContext()
1493+
df = ctx.from_pydict({"a": [[1.0, 2.0]], "b": [[1.0, 4.0]]})
1494+
result = df.select(f.array_distance(column("a"), column("b")).alias("v")).collect()
1495+
assert result[0].column(0)[0].as_py() == pytest.approx(2.0)
1496+
1497+
1498+
def test_list_distance():
1499+
ctx = SessionContext()
1500+
df = ctx.from_pydict({"a": [[3.0, 0.0]], "b": [[0.0, 4.0]]})
1501+
result = df.select(f.list_distance(column("a"), column("b")).alias("v")).collect()
1502+
assert result[0].column(0)[0].as_py() == pytest.approx(5.0)
1503+
1504+
1505+
def test_array_max():
1506+
ctx = SessionContext()
1507+
df = ctx.from_pydict({"a": [[1, 5, 3], [10, 2]]})
1508+
result = df.select(f.array_max(column("a")).alias("v")).collect()
1509+
values = [row.as_py() for row in result[0].column(0)]
1510+
assert values == [5, 10]
1511+
1512+
1513+
def test_list_max():
1514+
ctx = SessionContext()
1515+
df = ctx.from_pydict({"a": [[7, 2, 9]]})
1516+
result = df.select(f.list_max(column("a")).alias("v")).collect()
1517+
assert result[0].column(0)[0].as_py() == 9
1518+
1519+
1520+
def test_array_min():
1521+
ctx = SessionContext()
1522+
df = ctx.from_pydict({"a": [[1, 5, 3], [10, 2]]})
1523+
result = df.select(f.array_min(column("a")).alias("v")).collect()
1524+
values = [row.as_py() for row in result[0].column(0)]
1525+
assert values == [1, 2]
1526+
1527+
1528+
def test_list_min():
1529+
ctx = SessionContext()
1530+
df = ctx.from_pydict({"a": [[7, 2, 9]]})
1531+
result = df.select(f.list_min(column("a")).alias("v")).collect()
1532+
assert result[0].column(0)[0].as_py() == 2
1533+
1534+
1535+
def test_array_reverse():
1536+
ctx = SessionContext()
1537+
df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5]]})
1538+
result = df.select(f.array_reverse(column("a")).alias("v")).collect()
1539+
values = [row.as_py() for row in result[0].column(0)]
1540+
assert values == [[3, 2, 1], [5, 4]]
1541+
1542+
1543+
def test_list_reverse():
1544+
ctx = SessionContext()
1545+
df = ctx.from_pydict({"a": [[10, 20, 30]]})
1546+
result = df.select(f.list_reverse(column("a")).alias("v")).collect()
1547+
assert result[0].column(0)[0].as_py() == [30, 20, 10]
1548+
1549+
1550+
def test_arrays_zip():
1551+
ctx = SessionContext()
1552+
df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
1553+
result = df.select(f.arrays_zip(column("a"), column("b")).alias("v")).collect()
1554+
values = result[0].column(0)[0].as_py()
1555+
assert values == [{"c0": 1, "c1": 3}, {"c0": 2, "c1": 4}]
1556+
1557+
1558+
def test_list_zip():
1559+
ctx = SessionContext()
1560+
df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
1561+
result = df.select(f.list_zip(column("a"), column("b")).alias("v")).collect()
1562+
values = result[0].column(0)[0].as_py()
1563+
assert values == [{"c0": 1, "c1": 3}, {"c0": 2, "c1": 4}]
1564+
1565+
1566+
def test_string_to_array():
1567+
ctx = SessionContext()
1568+
df = ctx.from_pydict({"a": ["hello,world,foo"]})
1569+
result = df.select(
1570+
f.string_to_array(column("a"), literal(","), literal("")).alias("v")
1571+
).collect()
1572+
assert result[0].column(0)[0].as_py() == ["hello", "world", "foo"]
1573+
1574+
1575+
def test_string_to_list():
1576+
ctx = SessionContext()
1577+
df = ctx.from_pydict({"a": ["a-b-c"]})
1578+
result = df.select(
1579+
f.string_to_list(column("a"), literal("-"), literal("")).alias("v")
1580+
).collect()
1581+
assert result[0].column(0)[0].as_py() == ["a", "b", "c"]
1582+
1583+
1584+
def test_gen_series():
1585+
ctx = SessionContext()
1586+
df = ctx.from_pydict({"a": [0]})
1587+
result = df.select(
1588+
f.gen_series(literal(1), literal(5), literal(1)).alias("v")
1589+
).collect()
1590+
assert result[0].column(0)[0].as_py() == [1, 2, 3, 4, 5]
1591+
1592+
1593+
def test_generate_series():
1594+
ctx = SessionContext()
1595+
df = ctx.from_pydict({"a": [0]})
1596+
result = df.select(
1597+
f.generate_series(literal(1), literal(3), literal(1)).alias("v")
1598+
).collect()
1599+
assert result[0].column(0)[0].as_py() == [1, 2, 3]
1600+
1601+
1602+
def test_array_contains():
1603+
ctx = SessionContext()
1604+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1605+
result = df.select(f.array_contains(column("a"), literal(2)).alias("v")).collect()
1606+
assert result[0].column(0)[0].as_py() is True
1607+
1608+
1609+
def test_list_contains():
1610+
ctx = SessionContext()
1611+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1612+
result = df.select(f.list_contains(column("a"), literal(99)).alias("v")).collect()
1613+
assert result[0].column(0)[0].as_py() is False
1614+
1615+
1616+
def test_list_empty():
1617+
ctx = SessionContext()
1618+
df = ctx.from_pydict({"a": [[], [1, 2]]})
1619+
result = df.select(f.list_empty(column("a")).alias("v")).collect()
1620+
values = [row.as_py() for row in result[0].column(0)]
1621+
assert values == [True, False]
1622+
1623+
1624+
def test_list_pop_back():
1625+
ctx = SessionContext()
1626+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1627+
result = df.select(f.list_pop_back(column("a")).alias("v")).collect()
1628+
assert result[0].column(0)[0].as_py() == [1, 2]
1629+
1630+
1631+
def test_list_pop_front():
1632+
ctx = SessionContext()
1633+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1634+
result = df.select(f.list_pop_front(column("a")).alias("v")).collect()
1635+
assert result[0].column(0)[0].as_py() == [2, 3]
1636+
1637+
1638+
def test_list_has():
1639+
ctx = SessionContext()
1640+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1641+
result = df.select(f.list_has(column("a"), literal(2)).alias("v")).collect()
1642+
assert result[0].column(0)[0].as_py() is True
1643+
1644+
1645+
def test_list_has_all():
1646+
ctx = SessionContext()
1647+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1648+
result = df.select(
1649+
f.list_has_all(column("a"), f.make_array(literal(1), literal(2))).alias("v")
1650+
).collect()
1651+
assert result[0].column(0)[0].as_py() is True
1652+
1653+
1654+
def test_list_has_any():
1655+
ctx = SessionContext()
1656+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1657+
result = df.select(
1658+
f.list_has_any(column("a"), f.make_array(literal(5), literal(2))).alias("v")
1659+
).collect()
1660+
assert result[0].column(0)[0].as_py() is True

0 commit comments

Comments
 (0)