Skip to content

Commit 0d14831

Browse files
timsaucerclaude
andcommitted
Add unit tests for greatest, least, nvl2, and ifnull functions
Tests cover multiple data types (integers, strings), null handling (all-null, partial-null), multiple arguments, and ifnull/nvl equivalence. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c8dc245 commit 0d14831

File tree

1 file changed

+162
-0
lines changed

1 file changed

+162
-0
lines changed

python/tests/test_functions.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,3 +1435,165 @@ def test_coalesce(df):
14351435
assert result.column(0) == pa.array(
14361436
["Hello", "fallback", "!"], type=pa.string_view()
14371437
)
1438+
1439+
1440+
def test_greatest(df):
1441+
ctx = SessionContext()
1442+
batch = pa.RecordBatch.from_arrays(
1443+
[
1444+
pa.array([1, 5, None]),
1445+
pa.array([3, 2, None]),
1446+
pa.array([2, 8, None]),
1447+
],
1448+
names=["a", "b", "c"],
1449+
)
1450+
df_test = ctx.create_dataframe([[batch]])
1451+
1452+
# Test greatest with two columns
1453+
result = df_test.select(
1454+
f.greatest(column("a"), column("b")).alias("greatest_ab")
1455+
).collect()[0]
1456+
assert result.column(0) == pa.array([3, 5, None], type=pa.int64())
1457+
1458+
# Test greatest with three columns
1459+
result = df_test.select(
1460+
f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
1461+
).collect()[0]
1462+
assert result.column(0) == pa.array([3, 8, None], type=pa.int64())
1463+
1464+
# Test greatest with nulls mixed in (partial nulls)
1465+
batch2 = pa.RecordBatch.from_arrays(
1466+
[
1467+
pa.array([None, 10]),
1468+
pa.array([5, None]),
1469+
],
1470+
names=["x", "y"],
1471+
)
1472+
df_test2 = ctx.create_dataframe([[batch2]])
1473+
result = df_test2.select(f.greatest(column("x"), column("y")).alias("g")).collect()[
1474+
0
1475+
]
1476+
assert result.column(0) == pa.array([5, 10], type=pa.int64())
1477+
1478+
# Test greatest with string columns
1479+
batch3 = pa.RecordBatch.from_arrays(
1480+
[
1481+
pa.array(["apple", "cherry"]),
1482+
pa.array(["banana", "apricot"]),
1483+
],
1484+
names=["s1", "s2"],
1485+
)
1486+
df_test3 = ctx.create_dataframe([[batch3]])
1487+
result = df_test3.select(
1488+
f.greatest(column("s1"), column("s2")).alias("g")
1489+
).collect()[0]
1490+
assert result.column(0).to_pylist() == ["banana", "cherry"]
1491+
1492+
1493+
def test_least(df):
1494+
ctx = SessionContext()
1495+
batch = pa.RecordBatch.from_arrays(
1496+
[
1497+
pa.array([1, 5, None]),
1498+
pa.array([3, 2, None]),
1499+
pa.array([2, 8, None]),
1500+
],
1501+
names=["a", "b", "c"],
1502+
)
1503+
df_test = ctx.create_dataframe([[batch]])
1504+
1505+
# Test least with two columns
1506+
result = df_test.select(
1507+
f.least(column("a"), column("b")).alias("least_ab")
1508+
).collect()[0]
1509+
assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
1510+
1511+
# Test least with three columns
1512+
result = df_test.select(
1513+
f.least(column("a"), column("b"), column("c")).alias("least_abc")
1514+
).collect()[0]
1515+
assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
1516+
1517+
# Test least with partial nulls
1518+
batch2 = pa.RecordBatch.from_arrays(
1519+
[
1520+
pa.array([None, 10]),
1521+
pa.array([5, None]),
1522+
],
1523+
names=["x", "y"],
1524+
)
1525+
df_test2 = ctx.create_dataframe([[batch2]])
1526+
result = df_test2.select(f.least(column("x"), column("y")).alias("l")).collect()[0]
1527+
assert result.column(0) == pa.array([5, 10], type=pa.int64())
1528+
1529+
# Test least with string columns
1530+
batch3 = pa.RecordBatch.from_arrays(
1531+
[
1532+
pa.array(["apple", "cherry"]),
1533+
pa.array(["banana", "apricot"]),
1534+
],
1535+
names=["s1", "s2"],
1536+
)
1537+
df_test3 = ctx.create_dataframe([[batch3]])
1538+
result = df_test3.select(f.least(column("s1"), column("s2")).alias("l")).collect()[
1539+
0
1540+
]
1541+
assert result.column(0).to_pylist() == ["apple", "apricot"]
1542+
1543+
1544+
def test_nvl2(df):
1545+
ctx = SessionContext()
1546+
batch = pa.RecordBatch.from_arrays(
1547+
[
1548+
pa.array([None, 1, None, 4]),
1549+
pa.array([10, 20, 30, 40]),
1550+
pa.array([100, 200, 300, 400]),
1551+
],
1552+
names=["a", "b", "c"],
1553+
)
1554+
df_test = ctx.create_dataframe([[batch]])
1555+
1556+
# nvl2 returns b when a is not null, c when a is null
1557+
result = df_test.select(
1558+
f.nvl2(column("a"), column("b"), column("c")).alias("result")
1559+
).collect()[0]
1560+
assert result.column(0) == pa.array([100, 20, 300, 40], type=pa.int64())
1561+
1562+
# Test with string columns
1563+
batch2 = pa.RecordBatch.from_arrays(
1564+
[
1565+
pa.array(["x", None]),
1566+
pa.array(["not_null", "not_null"]),
1567+
pa.array(["is_null", "is_null"]),
1568+
],
1569+
names=["a", "b", "c"],
1570+
)
1571+
df_test2 = ctx.create_dataframe([[batch2]])
1572+
result = df_test2.select(
1573+
f.nvl2(column("a"), column("b"), column("c")).alias("result")
1574+
).collect()[0]
1575+
assert result.column(0).to_pylist() == ["not_null", "is_null"]
1576+
1577+
1578+
def test_ifnull(df):
1579+
ctx = SessionContext()
1580+
batch = pa.RecordBatch.from_arrays(
1581+
[
1582+
pa.array([None, 1, None, 4]),
1583+
pa.array([10, 20, 30, 40]),
1584+
],
1585+
names=["a", "b"],
1586+
)
1587+
df_test = ctx.create_dataframe([[batch]])
1588+
1589+
# ifnull returns a when a is not null, b when a is null (same as nvl)
1590+
result = df_test.select(
1591+
f.ifnull(column("a"), column("b")).alias("result")
1592+
).collect()[0]
1593+
assert result.column(0) == pa.array([10, 1, 30, 4], type=pa.int64())
1594+
1595+
# Verify ifnull matches nvl behavior
1596+
result_nvl = df_test.select(
1597+
f.nvl(column("a"), column("b")).alias("nvl_result")
1598+
).collect()[0]
1599+
assert result.column(0) == result_nvl.column(0)

0 commit comments

Comments
 (0)