Skip to content

Commit bb19d3e

Browse files
adriangbclaude
andcommitted
perf(parquet): skip per-chunk vals_in_chunk computation when all values are non-null
The chunker's per-chunk `partition_point` (arrow path) or `LevelDataRef::value_count` (non-arrow path) returns `chunk_size` by construction whenever the column has no nulls. The GKE bench showed ~+12–27% regressions on `list_primitive_non_null/*` and `string_non_null/*` consistent with that walk dominating: ~50 K chunks × a binary search through a 50 M-entry `non_null_indices` buffer means cold cache reads on every chunk. Compute a `ValueCountStrategy` once at `write_batch_internal` entry: - `AllPresent` — set when the arrow caller passed `non_null_indices.len() == num_levels`, or when the column has `max_def_level == 0`. The chunker uses `chunk_size` directly with no per-chunk work. - `Sorted(&[usize])` — arrow nullable path; binary-search the indices. - `DefLevelScan(max_def)` — non-arrow nullable path; def-level scan. For the bench's `list_primitive_non_null` (all-non-null lists with a 50 M-entry leaf), this drops the per-chunk binary search entirely; expected to bring those rows back near noise. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 403af94 commit bb19d3e

2 files changed

Lines changed: 55 additions & 9 deletions

File tree

parquet/src/column/writer/byte_budget_chunker.rs

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,25 @@ use crate::column::writer::encoder::ColumnValueEncoder;
3838
use crate::file::properties::WriterProperties;
3939
use crate::schema::types::ColumnDescriptor;
4040

41+
/// Strategy for counting how many values fall in a chunk's level range.
42+
/// Computed once per `write_batch_internal` call rather than per chunk so
43+
/// `partition_point` and `LevelDataRef::value_count` don't run when their
44+
/// answer is statically known to be `chunk_size`.
45+
#[derive(Clone, Copy)]
46+
pub(crate) enum ValueCountStrategy<'a> {
47+
/// Every level corresponds to a non-null value, so the answer is
48+
/// always `chunk_size`. Either the column has `max_def_level == 0`
49+
/// or the arrow caller's `non_null_indices.len() == num_levels`.
50+
AllPresent,
51+
/// Arrow nullable path: binary-search the sorted `non_null_indices`
52+
/// for the chunk's level range. O(log n) per chunk.
53+
Sorted(&'a [usize]),
54+
/// Non-arrow nullable path: scan the def-level slice for entries
55+
/// matching `max_def`. O(n) per chunk; only used when no sorted
56+
/// `value_indices` were supplied.
57+
DefLevelScan(i16),
58+
}
59+
4160
/// Per-column-open chunker that picks byte-budget-aware mini-batch sizes.
4261
pub(crate) struct ByteBudgetChunker {
4362
/// Configured data page byte limit for the column.
@@ -48,9 +67,6 @@ pub(crate) struct ByteBudgetChunker {
4867
/// decision short-circuit with no work for every numeric, bool, or
4968
/// narrow `FIXED_LEN_BYTE_ARRAY` column.
5069
static_always_fits: bool,
51-
/// Column's `max_def_level`, needed by `LevelDataRef::value_count` for
52-
/// the non-arrow path where we don't have a sorted `non_null_indices`.
53-
max_def_level: i16,
5470
}
5571

5672
impl ByteBudgetChunker {
@@ -75,7 +91,29 @@ impl ByteBudgetChunker {
7591
Self {
7692
page_byte_limit,
7793
static_always_fits,
78-
max_def_level: descr.max_def_level(),
94+
}
95+
}
96+
97+
/// Pick the cheapest strategy for `vals_in_chunk` queries for this
98+
/// `write_batch_internal` call. Computed once and reused per chunk so
99+
/// we don't repeat the check on every iteration.
100+
#[inline]
101+
pub(crate) fn value_count_strategy<'a>(
102+
descr: &ColumnDescriptor,
103+
value_indices: Option<&'a [usize]>,
104+
num_levels: usize,
105+
) -> ValueCountStrategy<'a> {
106+
match value_indices {
107+
// Arrow path. If every level has a non-null value, the gather
108+
// index is the trivial `0..num_levels` and we don't need to
109+
// walk it per chunk — `vals_in_chunk == chunk_size` by
110+
// construction.
111+
Some(idx) if idx.len() == num_levels => ValueCountStrategy::AllPresent,
112+
Some(idx) => ValueCountStrategy::Sorted(idx),
113+
// Non-arrow path. `max_def_level == 0` means the column has
114+
// no nullability, so again `vals_in_chunk == chunk_size`.
115+
None if descr.max_def_level() == 0 => ValueCountStrategy::AllPresent,
116+
None => ValueCountStrategy::DefLevelScan(descr.max_def_level()),
79117
}
80118
}
81119

@@ -108,6 +146,7 @@ impl ByteBudgetChunker {
108146
values: &E::Values,
109147
value_indices: Option<&[usize]>,
110148
chunk_def: LevelDataRef<'_>,
149+
strategy: ValueCountStrategy<'_>,
111150
values_offset: usize,
112151
chunk_size: usize,
113152
end_offset: usize,
@@ -116,11 +155,15 @@ impl ByteBudgetChunker {
116155
return chunk_size;
117156
}
118157
// Count how many values fall in this chunk's level range. The
119-
// arrow path passes a sorted `non_null_indices`, so this is one
120-
// binary search; otherwise we walk the def-level slice.
121-
let vals_in_chunk = match value_indices {
122-
Some(idx) => idx[values_offset..].partition_point(|&i| i < end_offset),
123-
None => chunk_def.value_count(chunk_size, self.max_def_level),
158+
// strategy was picked once per `write_batch_internal` call so
159+
// the common all-non-null case (every level has a value) skips
160+
// the per-chunk binary search and def-level scan entirely.
161+
let vals_in_chunk = match strategy {
162+
ValueCountStrategy::AllPresent => chunk_size,
163+
ValueCountStrategy::Sorted(idx) => {
164+
idx[values_offset..].partition_point(|&i| i < end_offset)
165+
}
166+
ValueCountStrategy::DefLevelScan(max_def) => chunk_def.value_count(chunk_size, max_def),
124167
};
125168
if vals_in_chunk == 0 {
126169
return chunk_size;

parquet/src/column/writer/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
566566
self.props.write_batch_size()
567567
};
568568
let chunker = ByteBudgetChunker::new(&self.descr, &self.props, base_batch_size);
569+
let value_count_strategy =
570+
ByteBudgetChunker::value_count_strategy(&self.descr, value_indices, num_levels);
569571
while levels_offset < num_levels {
570572
let mut end_offset = num_levels.min(levels_offset + base_batch_size);
571573

@@ -585,6 +587,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
585587
values,
586588
value_indices,
587589
chunk_def,
590+
value_count_strategy,
588591
values_offset,
589592
chunk_size,
590593
end_offset,

0 commit comments

Comments
 (0)