2727from pyarrow import dataset as pyarrow_dataset
2828
2929from pypaimon .manifest .schema .simple_stats import SimpleStats
30+ from pypaimon .table .row .generic_row import GenericRow
3031from pypaimon .table .row .internal_row import InternalRow
3132
3233
@@ -67,32 +68,31 @@ def test(self, record: InternalRow) -> bool:
6768 raise ValueError (f"Unsupported predicate method: { self .method } " )
6869
6970 def test_by_simple_stats (self , stat : SimpleStats , row_count : int ) -> bool :
70- return self .test_by_stats ({
71- "min_values" : stat .min_values .to_dict (),
72- "max_values" : stat .max_values .to_dict (),
73- "null_counts" : {
74- stat .min_values .fields [i ].name : stat .null_counts [i ] for i in range (len (stat .min_values .fields ))
75- },
76- "row_count" : row_count ,
77- })
78-
79- def test_by_stats (self , stat : Dict ) -> bool :
71+ """Test predicate against BinaryRow stats with denseIndexMapping like Java implementation."""
8072 if self .method == 'and' :
81- return all (p .test_by_stats (stat ) for p in self .literals )
73+ return all (p .test_by_simple_stats (stat , row_count ) for p in self .literals )
8274 if self .method == 'or' :
83- t = any (p .test_by_stats (stat ) for p in self .literals )
84- return t
75+ return any (p .test_by_simple_stats (stat , row_count ) for p in self .literals )
8576
86- null_count = stat ["null_counts" ][self .field ]
87- row_count = stat ["row_count" ]
77+ # Get null count using the mapped index
78+ null_count = stat .null_counts [self .index ] if stat .null_counts and self .index < len (
79+ stat .null_counts ) else 0
8880
8981 if self .method == 'isNull' :
9082 return null_count is not None and null_count > 0
9183 if self .method == 'isNotNull' :
9284 return null_count is None or row_count is None or null_count < row_count
9385
94- min_value = stat ["min_values" ][self .field ]
95- max_value = stat ["max_values" ][self .field ]
86+ if not isinstance (stat .min_values , GenericRow ):
87+ # Parse field values using BinaryRow's direct field access by name
88+ min_value = stat .min_values .get_field (self .index )
89+ max_value = stat .max_values .get_field (self .index )
90+ else :
91+ # TODO transform partition to BinaryRow
92+ min_values = stat .min_values .to_dict ()
93+ max_values = stat .max_values .to_dict ()
94+ min_value = min_values [self .field ]
95+ max_value = max_values [self .field ]
9696
9797 if min_value is None or max_value is None or (null_count is not None and null_count == row_count ):
9898 # invalid stats, skip validation
@@ -164,7 +164,6 @@ def __init__(cls, name, bases, dct):
164164
165165
166166class Tester (ABC , metaclass = RegisterMeta ):
167-
168167 name = None
169168
170169 @abstractmethod
@@ -187,7 +186,6 @@ def test_by_arrow(self, val, literals) -> bool:
187186
188187
189188class Equal (Tester ):
190-
191189 name = 'equal'
192190
193191 def test_by_value (self , val , literals ) -> bool :
@@ -201,7 +199,6 @@ def test_by_arrow(self, val, literals) -> bool:
201199
202200
203201class NotEqual (Tester ):
204-
205202 name = "notEqual"
206203
207204 def test_by_value (self , val , literals ) -> bool :
@@ -215,7 +212,6 @@ def test_by_arrow(self, val, literals) -> bool:
215212
216213
217214class LessThan (Tester ):
218-
219215 name = "lessThan"
220216
221217 def test_by_value (self , val , literals ) -> bool :
@@ -229,7 +225,6 @@ def test_by_arrow(self, val, literals) -> bool:
229225
230226
231227class LessOrEqual (Tester ):
232-
233228 name = "lessOrEqual"
234229
235230 def test_by_value (self , val , literals ) -> bool :
@@ -243,7 +238,6 @@ def test_by_arrow(self, val, literals) -> bool:
243238
244239
245240class GreaterThan (Tester ):
246-
247241 name = "greaterThan"
248242
249243 def test_by_value (self , val , literals ) -> bool :
@@ -257,7 +251,6 @@ def test_by_arrow(self, val, literals) -> bool:
257251
258252
259253class GreaterOrEqual (Tester ):
260-
261254 name = "greaterOrEqual"
262255
263256 def test_by_value (self , val , literals ) -> bool :
@@ -271,7 +264,6 @@ def test_by_arrow(self, val, literals) -> bool:
271264
272265
273266class In (Tester ):
274-
275267 name = "in"
276268
277269 def test_by_value (self , val , literals ) -> bool :
@@ -285,7 +277,6 @@ def test_by_arrow(self, val, literals) -> bool:
285277
286278
287279class NotIn (Tester ):
288-
289280 name = "notIn"
290281
291282 def test_by_value (self , val , literals ) -> bool :
@@ -299,7 +290,6 @@ def test_by_arrow(self, val, literals) -> bool:
299290
300291
301292class Between (Tester ):
302-
303293 name = "between"
304294
305295 def test_by_value (self , val , literals ) -> bool :
@@ -313,7 +303,6 @@ def test_by_arrow(self, val, literals) -> bool:
313303
314304
315305class StartsWith (Tester ):
316-
317306 name = "startsWith"
318307
319308 def test_by_value (self , val , literals ) -> bool :
@@ -329,7 +318,6 @@ def test_by_arrow(self, val, literals) -> bool:
329318
330319
331320class EndsWith (Tester ):
332-
333321 name = "endsWith"
334322
335323 def test_by_value (self , val , literals ) -> bool :
@@ -343,7 +331,6 @@ def test_by_arrow(self, val, literals) -> bool:
343331
344332
345333class Contains (Tester ):
346-
347334 name = "contains"
348335
349336 def test_by_value (self , val , literals ) -> bool :
@@ -357,7 +344,6 @@ def test_by_arrow(self, val, literals) -> bool:
357344
358345
359346class IsNull (Tester ):
360-
361347 name = "isNull"
362348
363349 def test_by_value (self , val , literals ) -> bool :
@@ -371,7 +357,6 @@ def test_by_arrow(self, val, literals) -> bool:
371357
372358
373359class IsNotNull (Tester ):
374-
375360 name = "isNotNull"
376361
377362 def test_by_value (self , val , literals ) -> bool :
0 commit comments