Skip to content

Commit 020e205

Browse files
committed
test: add test cases for create_match_filter
1 parent 011b98b commit 020e205

File tree

1 file changed

+66
-2
lines changed

1 file changed

+66
-2
lines changed

tests/table/test_upsert.py

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323

2424
from pyiceberg.catalog import Catalog
2525
from pyiceberg.exceptions import NoSuchTableError
26-
from pyiceberg.expressions import AlwaysTrue, And, EqualTo, Reference
27-
from pyiceberg.expressions.literals import LongLiteral
26+
from pyiceberg.expressions import AlwaysTrue, And, BooleanExpression, EqualTo, In, IsNaN, IsNull, Or, Reference
27+
from pyiceberg.expressions.literals import DoubleLiteral, LongLiteral
2828
from pyiceberg.io.pyarrow import schema_to_pyarrow
2929
from pyiceberg.schema import Schema
3030
from pyiceberg.table import Table, UpsertResult
@@ -443,6 +443,70 @@ def test_create_match_filter_single_condition() -> None:
443443
)
444444

445445

446+
@pytest.mark.parametrize(
447+
"data, expected",
448+
[
449+
pytest.param(
450+
[{"x": 1.0}, {"x": 2.0}, {"x": None}, {"x": 4.0}, {"x": float("nan")}],
451+
Or(
452+
left=IsNull(term=Reference(name="x")),
453+
right=Or(
454+
left=IsNaN(term=Reference(name="x")),
455+
right=In(Reference(name="x"), {DoubleLiteral(1.0), DoubleLiteral(2.0), DoubleLiteral(4.0)}),
456+
),
457+
),
458+
id="single-column",
459+
),
460+
pytest.param(
461+
[
462+
{"x": 1.0, "y": 9.0},
463+
{"x": 2.0, "y": None},
464+
{"x": None, "y": 7.0},
465+
{"x": 4.0, "y": float("nan")},
466+
{"x": float("nan"), "y": 0.0},
467+
],
468+
Or(
469+
left=Or(
470+
left=And(
471+
left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(1.0)),
472+
right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(9.0)),
473+
),
474+
right=And(
475+
left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(2.0)),
476+
right=IsNull(term=Reference(name="y")),
477+
),
478+
),
479+
right=Or(
480+
left=And(
481+
left=IsNull(term=Reference(name="x")),
482+
right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(7.0)),
483+
),
484+
right=Or(
485+
left=And(
486+
left=EqualTo(term=Reference(name="x"), literal=DoubleLiteral(4.0)),
487+
right=IsNaN(term=Reference(name="y")),
488+
),
489+
right=And(
490+
left=IsNaN(term=Reference(name="x")),
491+
right=EqualTo(term=Reference(name="y"), literal=DoubleLiteral(0.0)),
492+
),
493+
),
494+
),
495+
),
496+
id="multi-column",
497+
),
498+
],
499+
)
500+
def test_create_match_filter_with_nulls(data: list[dict], expected: BooleanExpression) -> None:
501+
schema = pa.schema([pa.field("x", pa.float64()), pa.field("y", pa.float64())])
502+
table = pa.Table.from_pylist(data, schema=schema)
503+
join_cols = sorted({col for record in data for col in record})
504+
505+
expr = create_match_filter(table, join_cols)
506+
507+
assert expr == expected
508+
509+
446510
def test_upsert_with_duplicate_rows_in_table(catalog: Catalog) -> None:
447511
identifier = "default.test_upsert_with_duplicate_rows_in_table"
448512

0 commit comments

Comments
 (0)