Add unit tests for greatest, least, nvl2, and ifnull functions

timsaucer · claude · timsaucer · commit 0d148319f33c · 2026-03-29T19:23:08.000-04:00
Tests cover multiple data types (integers, strings), null handling
(all-null, partial-null), multiple arguments, and ifnull/nvl equivalence.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
@@ -1435,3 +1435,165 @@ def test_coalesce(df):
     assert result.column(0) == pa.array(
         ["Hello", "fallback", "!"], type=pa.string_view()
     )
+
+
+def test_greatest(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test greatest with two columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b")).alias("greatest_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 5, None], type=pa.int64())
+
+    # Test greatest with three columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 8, None], type=pa.int64())
+
+    # Test greatest with nulls mixed in (partial nulls)
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.greatest(column("x"), column("y")).alias("g")).collect()[
+        0
+    ]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test greatest with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(
+        f.greatest(column("s1"), column("s2")).alias("g")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["banana", "cherry"]
+
+
+def test_least(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test least with two columns
+    result = df_test.select(
+        f.least(column("a"), column("b")).alias("least_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with three columns
+    result = df_test.select(
+        f.least(column("a"), column("b"), column("c")).alias("least_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with partial nulls
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.least(column("x"), column("y")).alias("l")).collect()[0]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test least with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(f.least(column("s1"), column("s2")).alias("l")).collect()[
+        0
+    ]
+    assert result.column(0).to_pylist() == ["apple", "apricot"]
+
+
+def test_nvl2(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 1, None, 4]),
+            pa.array([10, 20, 30, 40]),
+            pa.array([100, 200, 300, 400]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # nvl2 returns b when a is not null, c when a is null
+    result = df_test.select(
+        f.nvl2(column("a"), column("b"), column("c")).alias("result")
+    ).collect()[0]
+    assert result.column(0) == pa.array([100, 20, 300, 40], type=pa.int64())
+
+    # Test with string columns
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["x", None]),
+            pa.array(["not_null", "not_null"]),
+            pa.array(["is_null", "is_null"]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(
+        f.nvl2(column("a"), column("b"), column("c")).alias("result")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["not_null", "is_null"]
+
+
+def test_ifnull(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 1, None, 4]),
+            pa.array([10, 20, 30, 40]),
+        ],
+        names=["a", "b"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # ifnull returns a when a is not null, b when a is null (same as nvl)
+    result = df_test.select(
+        f.ifnull(column("a"), column("b")).alias("result")
+    ).collect()[0]
+    assert result.column(0) == pa.array([10, 1, 30, 4], type=pa.int64())
+
+    # Verify ifnull matches nvl behavior
+    result_nvl = df_test.select(
+        f.nvl(column("a"), column("b")).alias("nvl_result")
+    ).collect()[0]
+    assert result.column(0) == result_nvl.column(0)