Skip to content

Commit 889b25f

Browse files
author
Brian Hart
committed
Fix dtype mismatch in FileStatsLayoutReader for stat scalars
Use the stat's own dtype (e.g. u64 for NullCount) rather than the field dtype when constructing stat scalars in stats_ref. This fixes IS NULL pruning on nullable timestamp columns which previously failed with a dtype mismatch. Add regression test for is_null pruning on a nullable timestamp column. Signed-off-by: Brian Hart <brian@brainhart.dev>
1 parent 1838a7a commit 889b25f

1 file changed

Lines changed: 62 additions & 1 deletion

File tree

vortex-file/src/v2/file_stats_reader.rs

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,10 @@ impl StatsCatalog for FileStatsLayoutReader {
126126

127127
let stat_value = field_stats.get(stat)?.as_exact()?;
128128
let field_dtype = self.struct_fields.field_by_index(field_idx)?;
129-
let stat_scalar = Scalar::try_new(field_dtype, Some(stat_value)).ok()?;
129+
// Use the stat's own dtype rather than the field dtype. For example,
130+
// NullCount is always u64 regardless of the column type.
131+
let stat_dtype = stat.dtype(&field_dtype)?;
132+
let stat_scalar = Scalar::try_new(stat_dtype, Some(stat_value)).ok()?;
130133

131134
Some(lit(stat_scalar))
132135
}
@@ -209,16 +212,20 @@ mod tests {
209212

210213
use vortex_array::ArrayContext;
211214
use vortex_array::IntoArray as _;
215+
use vortex_array::arrays::PrimitiveArray;
212216
use vortex_array::arrays::StructArray;
217+
use vortex_array::arrays::datetime::TemporalData;
213218
use vortex_array::dtype::DType;
214219
use vortex_array::dtype::Nullability;
215220
use vortex_array::dtype::PType;
216221
use vortex_array::expr::get_item;
217222
use vortex_array::expr::gt;
223+
use vortex_array::expr::is_null;
218224
use vortex_array::expr::lit;
219225
use vortex_array::expr::root;
220226
use vortex_array::expr::stats::Precision;
221227
use vortex_array::expr::stats::Stat;
228+
use vortex_array::extension::datetime::TimeUnit;
222229
use vortex_array::scalar::ScalarValue;
223230
use vortex_array::scalar_fn::session::ScalarFnSession;
224231
use vortex_array::session::ArraySession;
@@ -337,4 +344,58 @@ mod tests {
337344
Ok(())
338345
})
339346
}
347+
348+
/// Regression test: `IS NULL` on a nullable timestamp column must not fail with a
349+
/// dtype mismatch. The bug was that `stats_ref` used the *field* dtype (timestamp)
350+
/// for the `NullCount` stat scalar instead of the stat's own dtype (u64).
351+
#[test]
352+
fn is_null_pruning_on_nullable_timestamp_column() -> VortexResult<()> {
353+
block_on(|handle| async {
354+
let ctx = ArrayContext::empty();
355+
let segments = Arc::new(TestSegments::default());
356+
let (ptr, eof) = SequenceId::root().split();
357+
358+
// Build a struct with a nullable timestamp column containing some nulls.
359+
let prim_array =
360+
PrimitiveArray::from_option_iter([Some(1_000_000i64), None, Some(3_000_000)])
361+
.into_array();
362+
let ts_data = TemporalData::new_timestamp(prim_array, TimeUnit::Microseconds, None);
363+
let ts_dtype = ts_data.dtype().clone();
364+
let ts_array = ts_data.into_array();
365+
366+
let struct_array = StructArray::from_fields([("deleted_at", ts_array)].as_slice())?;
367+
368+
let strategy = TableStrategy::new(
369+
Arc::new(FlatLayoutStrategy::default()),
370+
Arc::new(FlatLayoutStrategy::default()),
371+
);
372+
let layout = strategy
373+
.write_stream(
374+
ctx,
375+
segments.clone(),
376+
struct_array.into_array().to_array_stream().sequenced(ptr),
377+
eof,
378+
handle,
379+
)
380+
.await?;
381+
382+
let child = layout.new_reader("".into(), segments, &SESSION)?;
383+
384+
// File-level stats: 1 null in deleted_at.
385+
let mut stats = StatsSet::default();
386+
stats.set(Stat::NullCount, Precision::exact(ScalarValue::from(1u64)));
387+
let file_stats = FileStatistics::new(Arc::from([stats]), Arc::from([ts_dtype]));
388+
389+
let reader = FileStatsLayoutReader::new(child, file_stats, SESSION.clone());
390+
391+
// `is_null(deleted_at)` — should NOT panic or error due to dtype mismatch.
392+
let expr = is_null(get_item("deleted_at", root()));
393+
let mask = Mask::new_true(3);
394+
let result = reader.pruning_evaluation(&(0..3), &expr, mask)?.await?;
395+
// null_count is 1 (non-zero), so is_null is not falsified => not pruned.
396+
assert_eq!(result, Mask::new_true(3));
397+
398+
Ok(())
399+
})
400+
}
340401
}

0 commit comments

Comments
 (0)