Skip to content

Commit 60c5585

Browse files
committed
feat: add --limit option to shuffle benchmark (default 1M rows)
1 parent 3873b95 commit 60c5585

1 file changed

Lines changed: 24 additions & 0 deletions

File tree

native/core/src/bin/shuffle_bench.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,10 @@ struct Args {
152152
/// Write buffer size in bytes
153153
#[arg(long, default_value_t = 1048576)]
154154
write_buffer_size: usize,
155+
156+
/// Maximum number of rows to use (default: 1,000,000)
157+
#[arg(long, default_value_t = 1_000_000)]
158+
limit: usize,
155159
}
156160

157161
fn main() {
@@ -178,6 +182,26 @@ fn main() {
178182
};
179183
let load_elapsed = load_start.elapsed();
180184

185+
// Apply row limit
186+
let batches = {
187+
let mut limited = Vec::new();
188+
let mut rows_so_far = 0usize;
189+
for batch in batches {
190+
if rows_so_far >= args.limit {
191+
break;
192+
}
193+
let remaining = args.limit - rows_so_far;
194+
if batch.num_rows() <= remaining {
195+
rows_so_far += batch.num_rows();
196+
limited.push(batch);
197+
} else {
198+
limited.push(batch.slice(0, remaining));
199+
rows_so_far += remaining;
200+
}
201+
}
202+
limited
203+
};
204+
181205
let schema = batches[0].schema();
182206
let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
183207
let total_bytes: usize = batches.iter().map(|b| b.get_array_memory_size()).sum();

0 commit comments

Comments
 (0)