@@ -1435,3 +1435,165 @@ def test_coalesce(df):
14351435 assert result .column (0 ) == pa .array (
14361436 ["Hello" , "fallback" , "!" ], type = pa .string_view ()
14371437 )
1438+
1439+
1440+ def test_greatest (df ):
1441+ ctx = SessionContext ()
1442+ batch = pa .RecordBatch .from_arrays (
1443+ [
1444+ pa .array ([1 , 5 , None ]),
1445+ pa .array ([3 , 2 , None ]),
1446+ pa .array ([2 , 8 , None ]),
1447+ ],
1448+ names = ["a" , "b" , "c" ],
1449+ )
1450+ df_test = ctx .create_dataframe ([[batch ]])
1451+
1452+ # Test greatest with two columns
1453+ result = df_test .select (
1454+ f .greatest (column ("a" ), column ("b" )).alias ("greatest_ab" )
1455+ ).collect ()[0 ]
1456+ assert result .column (0 ) == pa .array ([3 , 5 , None ], type = pa .int64 ())
1457+
1458+ # Test greatest with three columns
1459+ result = df_test .select (
1460+ f .greatest (column ("a" ), column ("b" ), column ("c" )).alias ("greatest_abc" )
1461+ ).collect ()[0 ]
1462+ assert result .column (0 ) == pa .array ([3 , 8 , None ], type = pa .int64 ())
1463+
1464+ # Test greatest with nulls mixed in (partial nulls)
1465+ batch2 = pa .RecordBatch .from_arrays (
1466+ [
1467+ pa .array ([None , 10 ]),
1468+ pa .array ([5 , None ]),
1469+ ],
1470+ names = ["x" , "y" ],
1471+ )
1472+ df_test2 = ctx .create_dataframe ([[batch2 ]])
1473+ result = df_test2 .select (f .greatest (column ("x" ), column ("y" )).alias ("g" )).collect ()[
1474+ 0
1475+ ]
1476+ assert result .column (0 ) == pa .array ([5 , 10 ], type = pa .int64 ())
1477+
1478+ # Test greatest with string columns
1479+ batch3 = pa .RecordBatch .from_arrays (
1480+ [
1481+ pa .array (["apple" , "cherry" ]),
1482+ pa .array (["banana" , "apricot" ]),
1483+ ],
1484+ names = ["s1" , "s2" ],
1485+ )
1486+ df_test3 = ctx .create_dataframe ([[batch3 ]])
1487+ result = df_test3 .select (
1488+ f .greatest (column ("s1" ), column ("s2" )).alias ("g" )
1489+ ).collect ()[0 ]
1490+ assert result .column (0 ).to_pylist () == ["banana" , "cherry" ]
1491+
1492+
1493+ def test_least (df ):
1494+ ctx = SessionContext ()
1495+ batch = pa .RecordBatch .from_arrays (
1496+ [
1497+ pa .array ([1 , 5 , None ]),
1498+ pa .array ([3 , 2 , None ]),
1499+ pa .array ([2 , 8 , None ]),
1500+ ],
1501+ names = ["a" , "b" , "c" ],
1502+ )
1503+ df_test = ctx .create_dataframe ([[batch ]])
1504+
1505+ # Test least with two columns
1506+ result = df_test .select (
1507+ f .least (column ("a" ), column ("b" )).alias ("least_ab" )
1508+ ).collect ()[0 ]
1509+ assert result .column (0 ) == pa .array ([1 , 2 , None ], type = pa .int64 ())
1510+
1511+ # Test least with three columns
1512+ result = df_test .select (
1513+ f .least (column ("a" ), column ("b" ), column ("c" )).alias ("least_abc" )
1514+ ).collect ()[0 ]
1515+ assert result .column (0 ) == pa .array ([1 , 2 , None ], type = pa .int64 ())
1516+
1517+ # Test least with partial nulls
1518+ batch2 = pa .RecordBatch .from_arrays (
1519+ [
1520+ pa .array ([None , 10 ]),
1521+ pa .array ([5 , None ]),
1522+ ],
1523+ names = ["x" , "y" ],
1524+ )
1525+ df_test2 = ctx .create_dataframe ([[batch2 ]])
1526+ result = df_test2 .select (f .least (column ("x" ), column ("y" )).alias ("l" )).collect ()[0 ]
1527+ assert result .column (0 ) == pa .array ([5 , 10 ], type = pa .int64 ())
1528+
1529+ # Test least with string columns
1530+ batch3 = pa .RecordBatch .from_arrays (
1531+ [
1532+ pa .array (["apple" , "cherry" ]),
1533+ pa .array (["banana" , "apricot" ]),
1534+ ],
1535+ names = ["s1" , "s2" ],
1536+ )
1537+ df_test3 = ctx .create_dataframe ([[batch3 ]])
1538+ result = df_test3 .select (f .least (column ("s1" ), column ("s2" )).alias ("l" )).collect ()[
1539+ 0
1540+ ]
1541+ assert result .column (0 ).to_pylist () == ["apple" , "apricot" ]
1542+
1543+
1544+ def test_nvl2 (df ):
1545+ ctx = SessionContext ()
1546+ batch = pa .RecordBatch .from_arrays (
1547+ [
1548+ pa .array ([None , 1 , None , 4 ]),
1549+ pa .array ([10 , 20 , 30 , 40 ]),
1550+ pa .array ([100 , 200 , 300 , 400 ]),
1551+ ],
1552+ names = ["a" , "b" , "c" ],
1553+ )
1554+ df_test = ctx .create_dataframe ([[batch ]])
1555+
1556+ # nvl2 returns b when a is not null, c when a is null
1557+ result = df_test .select (
1558+ f .nvl2 (column ("a" ), column ("b" ), column ("c" )).alias ("result" )
1559+ ).collect ()[0 ]
1560+ assert result .column (0 ) == pa .array ([100 , 20 , 300 , 40 ], type = pa .int64 ())
1561+
1562+ # Test with string columns
1563+ batch2 = pa .RecordBatch .from_arrays (
1564+ [
1565+ pa .array (["x" , None ]),
1566+ pa .array (["not_null" , "not_null" ]),
1567+ pa .array (["is_null" , "is_null" ]),
1568+ ],
1569+ names = ["a" , "b" , "c" ],
1570+ )
1571+ df_test2 = ctx .create_dataframe ([[batch2 ]])
1572+ result = df_test2 .select (
1573+ f .nvl2 (column ("a" ), column ("b" ), column ("c" )).alias ("result" )
1574+ ).collect ()[0 ]
1575+ assert result .column (0 ).to_pylist () == ["not_null" , "is_null" ]
1576+
1577+
1578+ def test_ifnull (df ):
1579+ ctx = SessionContext ()
1580+ batch = pa .RecordBatch .from_arrays (
1581+ [
1582+ pa .array ([None , 1 , None , 4 ]),
1583+ pa .array ([10 , 20 , 30 , 40 ]),
1584+ ],
1585+ names = ["a" , "b" ],
1586+ )
1587+ df_test = ctx .create_dataframe ([[batch ]])
1588+
1589+ # ifnull returns a when a is not null, b when a is null (same as nvl)
1590+ result = df_test .select (
1591+ f .ifnull (column ("a" ), column ("b" )).alias ("result" )
1592+ ).collect ()[0 ]
1593+ assert result .column (0 ) == pa .array ([10 , 1 , 30 , 4 ], type = pa .int64 ())
1594+
1595+ # Verify ifnull matches nvl behavior
1596+ result_nvl = df_test .select (
1597+ f .nvl (column ("a" ), column ("b" )).alias ("nvl_result" )
1598+ ).collect ()[0 ]
1599+ assert result .column (0 ) == result_nvl .column (0 )
0 commit comments