Skip to content

Commit e605971

Browse files
committed
refactor: Remove redundant imports in upsert filter tests
1 parent 8123826 commit e605971

File tree

1 file changed

+1
-16
lines changed

1 file changed

+1
-16
lines changed

tests/table/test_upsert.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -905,8 +905,8 @@ def test_coarse_match_filter_composite_key() -> None:
905905
"""
906906
Test that create_coarse_match_filter produces efficient In() predicates for composite keys.
907907
"""
908+
from pyiceberg.expressions import And, Or
908909
from pyiceberg.table.upsert_util import create_coarse_match_filter, create_match_filter
909-
from pyiceberg.expressions import Or, And, In
910910

911911
# Create a table with composite key that has overlapping values
912912
# (1, 'x'), (2, 'y'), (1, 'z') - exact filter should have 3 conditions
@@ -1180,7 +1180,6 @@ def test_is_numeric_type(dtype: pa.DataType, expected_numeric: bool) -> None:
11801180
def test_coarse_match_filter_small_dataset_uses_in_filter() -> None:
11811181
"""Test that small datasets (< 10,000 unique keys) use In() filter."""
11821182
from pyiceberg.expressions import In
1183-
11841183
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
11851184

11861185
# Create a dataset with 100 unique keys (well below threshold)
@@ -1201,7 +1200,6 @@ def test_coarse_match_filter_small_dataset_uses_in_filter() -> None:
12011200
def test_coarse_match_filter_threshold_boundary_uses_in_filter() -> None:
12021201
"""Test that datasets at threshold - 1 (9,999 unique keys) still use In() filter."""
12031202
from pyiceberg.expressions import In
1204-
12051203
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
12061204

12071205
# Create a dataset with exactly threshold - 1 unique keys
@@ -1221,7 +1219,6 @@ def test_coarse_match_filter_threshold_boundary_uses_in_filter() -> None:
12211219
def test_coarse_match_filter_above_threshold_uses_optimized_filter() -> None:
12221220
"""Test that datasets >= 10,000 unique keys use optimized filter strategy."""
12231221
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1224-
12251222
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
12261223

12271224
# Create a dense dataset (consecutive IDs) with exactly threshold unique keys
@@ -1243,7 +1240,6 @@ def test_coarse_match_filter_above_threshold_uses_optimized_filter() -> None:
12431240
def test_coarse_match_filter_large_dataset() -> None:
12441241
"""Test that large datasets (100,000 unique keys) use optimized filter."""
12451242
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1246-
12471243
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
12481244

12491245
# Create a dense dataset with 100,000 unique keys
@@ -1269,7 +1265,6 @@ def test_coarse_match_filter_large_dataset() -> None:
12691265
def test_coarse_match_filter_dense_ids_use_range_filter() -> None:
12701266
"""Test that dense IDs (density > 10%) use range filter."""
12711267
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1272-
12731268
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
12741269

12751270
# Create dense IDs: all values from 0 to N-1 (100% density)
@@ -1290,7 +1285,6 @@ def test_coarse_match_filter_dense_ids_use_range_filter() -> None:
12901285
def test_coarse_match_filter_moderately_dense_ids_use_range_filter() -> None:
12911286
"""Test that moderately dense IDs (50% density) use range filter."""
12921287
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1293-
12941288
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
12951289

12961290
# Create IDs: 0, 2, 4, 6, ... (every other number) - 50% density
@@ -1330,7 +1324,6 @@ def test_coarse_match_filter_sparse_ids_use_always_true() -> None:
13301324
def test_coarse_match_filter_density_boundary_at_10_percent() -> None:
13311325
"""Test exact 10% boundary density behavior."""
13321326
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1333-
13341327
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
13351328

13361329
# Create IDs at exactly ~10% density
@@ -1379,7 +1372,6 @@ def test_coarse_match_filter_very_sparse_ids() -> None:
13791372
def test_coarse_match_filter_empty_dataset_returns_always_false() -> None:
13801373
"""Test that empty dataset returns AlwaysFalse."""
13811374
from pyiceberg.expressions import AlwaysFalse
1382-
13831375
from pyiceberg.table.upsert_util import create_coarse_match_filter
13841376

13851377
schema = pa.schema([pa.field("id", pa.int64()), pa.field("value", pa.int64())])
@@ -1393,7 +1385,6 @@ def test_coarse_match_filter_empty_dataset_returns_always_false() -> None:
13931385
def test_coarse_match_filter_single_value_dataset() -> None:
13941386
"""Test that single value dataset uses In() or EqualTo() with single value."""
13951387
from pyiceberg.expressions import In
1396-
13971388
from pyiceberg.table.upsert_util import create_coarse_match_filter
13981389

13991390
schema = pa.schema([pa.field("id", pa.int64()), pa.field("value", pa.int64())])
@@ -1418,7 +1409,6 @@ def test_coarse_match_filter_single_value_dataset() -> None:
14181409
def test_coarse_match_filter_negative_numbers_range() -> None:
14191410
"""Test that negative number IDs produce correct min/max range."""
14201411
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1421-
14221412
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
14231413

14241414
# Create dense negative IDs: -10000 to -1
@@ -1441,7 +1431,6 @@ def test_coarse_match_filter_negative_numbers_range() -> None:
14411431
def test_coarse_match_filter_mixed_sign_numbers_range() -> None:
14421432
"""Test that mixed sign IDs (-500 to 500) produce correct range spanning zero."""
14431433
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1444-
14451434
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
14461435

14471436
# Create IDs spanning zero: -5000 to 4999
@@ -1464,7 +1453,6 @@ def test_coarse_match_filter_mixed_sign_numbers_range() -> None:
14641453
def test_coarse_match_filter_float_range_filter() -> None:
14651454
"""Test that float IDs use range filter correctly."""
14661455
from pyiceberg.expressions import GreaterThanOrEqual, LessThanOrEqual
1467-
14681456
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
14691457

14701458
# Create dense float IDs
@@ -1508,7 +1496,6 @@ def test_coarse_match_filter_non_numeric_column_skips_range_filter() -> None:
15081496

15091497
def test_coarse_match_filter_composite_key_small_dataset() -> None:
15101498
"""Test that composite key with small dataset uses And(In(), In())."""
1511-
from pyiceberg.expressions import In
15121499

15131500
from pyiceberg.table.upsert_util import create_coarse_match_filter
15141501

@@ -1531,7 +1518,6 @@ def test_coarse_match_filter_composite_key_small_dataset() -> None:
15311518

15321519
def test_coarse_match_filter_composite_key_large_numeric_column() -> None:
15331520
"""Test composite key where one column has >10k unique numeric values."""
1534-
from pyiceberg.expressions import GreaterThanOrEqual, In, LessThanOrEqual
15351521

15361522
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
15371523

@@ -1557,7 +1543,6 @@ def test_coarse_match_filter_composite_key_large_numeric_column() -> None:
15571543

15581544
def test_coarse_match_filter_composite_key_mixed_types() -> None:
15591545
"""Test composite key with mixed numeric and string columns with large dataset."""
1560-
from pyiceberg.expressions import In
15611546

15621547
from pyiceberg.table.upsert_util import LARGE_FILTER_THRESHOLD, create_coarse_match_filter
15631548

0 commit comments

Comments
 (0)