Skip to content

Commit a29c1c3

Browse files
committed
stashing debug stuff
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent ba17eee commit a29c1c3

12 files changed

Lines changed: 636 additions & 55 deletions

File tree

vortex-array/src/arrow/executor/list.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::sync::Arc;
5+
use std::sync::atomic::AtomicU64;
6+
use std::sync::atomic::Ordering;
57

68
use arrow_array::ArrayRef as ArrowArrayRef;
79
use arrow_array::GenericListArray;
@@ -31,6 +33,31 @@ use crate::dtype::NativePType;
3133
use crate::dtype::Nullability;
3234
use crate::vtable::ValidityHelper;
3335

36+
static LIST_TO_LIST_COUNT: AtomicU64 = AtomicU64::new(0);
37+
static LIST_VIEW_ZCTL_COUNT: AtomicU64 = AtomicU64::new(0);
38+
static EXECUTE_LIST_VIEW_COUNT: AtomicU64 = AtomicU64::new(0);
39+
40+
#[derive(Debug, Clone, Copy, Default)]
41+
pub struct ListArrowPathCounters {
42+
pub list_to_list: u64,
43+
pub list_view_zctl: u64,
44+
pub execute_list_view: u64,
45+
}
46+
47+
pub(crate) fn reset_list_arrow_path_counters() {
48+
LIST_TO_LIST_COUNT.store(0, Ordering::Relaxed);
49+
LIST_VIEW_ZCTL_COUNT.store(0, Ordering::Relaxed);
50+
EXECUTE_LIST_VIEW_COUNT.store(0, Ordering::Relaxed);
51+
}
52+
53+
pub(crate) fn list_arrow_path_counters() -> ListArrowPathCounters {
54+
ListArrowPathCounters {
55+
list_to_list: LIST_TO_LIST_COUNT.load(Ordering::Relaxed),
56+
list_view_zctl: LIST_VIEW_ZCTL_COUNT.load(Ordering::Relaxed),
57+
execute_list_view: EXECUTE_LIST_VIEW_COUNT.load(Ordering::Relaxed),
58+
}
59+
}
60+
3461
/// Convert a Vortex array into an Arrow GenericBinaryArray.
3562
pub(super) fn to_arrow_list<O: OffsetSizeTrait + NativePType>(
3663
array: ArrayRef,
@@ -39,12 +66,14 @@ pub(super) fn to_arrow_list<O: OffsetSizeTrait + NativePType>(
3966
) -> VortexResult<ArrowArrayRef> {
4067
// If the Vortex array is already in List format, we can directly convert it.
4168
if let Some(array) = array.as_opt::<List>() {
69+
LIST_TO_LIST_COUNT.fetch_add(1, Ordering::Relaxed);
4270
return list_to_list::<O>(array, elements_field, ctx);
4371
}
4472

4573
// If the Vortex array is a ListViewArray, rebuild to ZCTL if needed and convert.
4674
let array = match array.try_into::<ListView>() {
4775
Ok(array) => {
76+
LIST_VIEW_ZCTL_COUNT.fetch_add(1, Ordering::Relaxed);
4877
let zctl = if array.is_zero_copy_to_list() {
4978
array
5079
} else {
@@ -59,12 +88,14 @@ pub(super) fn to_arrow_list<O: OffsetSizeTrait + NativePType>(
5988
// Note: arrow_cast::cast supports ListView → List (apache/arrow-rs#8735), but it
6089
// unconditionally uses take. Our rebuild uses a heuristic that picks list-by-list
6190
// for large lists, which avoids materializing a large index buffer.
91+
EXECUTE_LIST_VIEW_COUNT.fetch_add(1, Ordering::Relaxed);
6292
let list_view = array.execute::<ListViewArray>(ctx)?;
6393
let zctl = if list_view.is_zero_copy_to_list() {
6494
list_view
6595
} else {
6696
list_view.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?
6797
};
98+
LIST_VIEW_ZCTL_COUNT.fetch_add(1, Ordering::Relaxed);
6899
list_view_zctl::<O>(zctl, elements_field, ctx)
69100
}
70101

vortex-array/src/arrow/executor/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ use arrow_schema::Field;
2525
use arrow_schema::FieldRef;
2626
use arrow_schema::Schema;
2727
use itertools::Itertools;
28+
pub(crate) use list::ListArrowPathCounters;
29+
pub(crate) use list::list_arrow_path_counters;
30+
pub(crate) use list::reset_list_arrow_path_counters;
2831
use vortex_error::VortexResult;
2932
use vortex_error::vortex_bail;
3033
use vortex_error::vortex_ensure;

vortex-array/src/arrow/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ mod null_buffer;
1515
mod record_batch;
1616

1717
pub use datum::*;
18+
pub(crate) use executor::ListArrowPathCounters;
19+
pub(crate) use executor::list_arrow_path_counters;
20+
pub(crate) use executor::reset_list_arrow_path_counters;
1821
pub use executor::*;
1922
pub use iter::*;
2023

vortex-array/src/debug.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use crate::arrow::ListArrowPathCounters;
5+
use crate::arrow::list_arrow_path_counters;
6+
use crate::arrow::reset_list_arrow_path_counters;
7+
8+
pub fn reset_arrow_path_counters() {
9+
reset_list_arrow_path_counters();
10+
}
11+
12+
pub fn arrow_path_counters() -> ListArrowPathCounters {
13+
list_arrow_path_counters()
14+
}

vortex-array/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pub mod builtins;
4141
mod canonical;
4242
mod columnar;
4343
pub mod compute;
44+
pub mod debug;
4445
pub mod display;
4546
pub mod dtype;
4647
mod executor;

vortex-bench/src/runner.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
//! Generic benchmark runner infrastructure to reduce boilerplate across engine-specific benchmarks.
55
6+
use std::env;
7+
use std::fmt;
68
use std::fs::File;
79
use std::future::Future;
810
use std::io::Write;
@@ -12,7 +14,11 @@ use std::time::Duration;
1214
use std::time::Instant;
1315

1416
use indicatif::ProgressBar;
17+
use vortex::array::debug::arrow_path_counters;
18+
use vortex::array::debug::reset_arrow_path_counters;
1519
use vortex::error::vortex_panic;
20+
use vortex::scan::debug::counters as scan_debug_counters;
21+
use vortex::scan::debug::reset_counters as reset_scan_debug_counters;
1622

1723
use crate::Benchmark;
1824
use crate::BenchmarkDataset;
@@ -45,6 +51,47 @@ use crate::measurements::QueryMeasurement;
4551
use crate::memory::BenchmarkMemoryTracker;
4652
use crate::url_scheme_to_storage;
4753

54+
#[derive(Debug, Clone, Copy, Default)]
55+
struct DebugCounterSnapshot {
56+
list_to_list: u64,
57+
list_view_zctl: u64,
58+
execute_list_view: u64,
59+
legacy_stream_fallbacks: u64,
60+
}
61+
62+
impl fmt::Display for DebugCounterSnapshot {
63+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64+
write!(
65+
f,
66+
"list_to_list={}, list_view_zctl={}, execute_list_view={}, legacy_stream_fallbacks={}",
67+
self.list_to_list,
68+
self.list_view_zctl,
69+
self.execute_list_view,
70+
self.legacy_stream_fallbacks
71+
)
72+
}
73+
}
74+
75+
fn debug_counters_enabled() -> bool {
76+
env::var_os("VORTEX_BENCH_DEBUG_COUNTERS").is_some()
77+
}
78+
79+
fn reset_debug_counters() {
80+
reset_arrow_path_counters();
81+
reset_scan_debug_counters();
82+
}
83+
84+
fn snapshot_debug_counters() -> DebugCounterSnapshot {
85+
let arrow = arrow_path_counters();
86+
let scan = scan_debug_counters();
87+
DebugCounterSnapshot {
88+
list_to_list: arrow.list_to_list,
89+
list_view_zctl: arrow.list_view_zctl,
90+
execute_list_view: arrow.execute_list_view,
91+
legacy_stream_fallbacks: scan.legacy_stream_fallbacks,
92+
}
93+
}
94+
4895
/// Results from a benchmark run.
4996
pub struct BenchmarkResults {
5097
pub query_measurements: Vec<QueryMeasurement>,
@@ -130,6 +177,9 @@ impl SqlBenchmarkRunner {
130177
let mut row_count = None;
131178

132179
for _ in 0..iterations {
180+
if debug_counters_enabled() {
181+
reset_debug_counters();
182+
}
133183
let start = Instant::now();
134184
let (timing, result) = f();
135185
let elapsed = timing.unwrap_or_else(|| start.elapsed());
@@ -138,6 +188,15 @@ impl SqlBenchmarkRunner {
138188
if row_count.is_none() {
139189
row_count = Some(result.row_count());
140190
}
191+
192+
if debug_counters_enabled() {
193+
tracing::info!(
194+
query_idx,
195+
%format,
196+
counters = %snapshot_debug_counters(),
197+
"debug counters"
198+
);
199+
}
141200
}
142201

143202
let row_count = row_count.expect("iterations must be > 0");
@@ -363,6 +422,9 @@ impl SqlBenchmarkRunner {
363422
tracing::debug!(%format, query_idx, "Running query");
364423

365424
for _ in 0..iterations {
425+
if debug_counters_enabled() {
426+
reset_debug_counters();
427+
}
366428
let start = Instant::now();
367429
let (timing, result) = execute(query_idx, &ctx, query.as_str())
368430
.await
@@ -375,6 +437,15 @@ impl SqlBenchmarkRunner {
375437
if row_count.is_none() {
376438
row_count = Some(result.row_count());
377439
}
440+
441+
if debug_counters_enabled() {
442+
tracing::info!(
443+
query_idx,
444+
%format,
445+
counters = %snapshot_debug_counters(),
446+
"debug counters"
447+
);
448+
}
378449
}
379450

380451
let row_count = row_count.expect("iterations must be > 0");

vortex-scan/src/debug.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use crate::repeated_scan::ScanDebugCounters;
5+
use crate::repeated_scan::reset_scan_debug_counters;
6+
use crate::repeated_scan::scan_debug_counters;
7+
8+
pub fn reset_counters() {
9+
reset_scan_debug_counters();
10+
}
11+
12+
pub fn counters() -> ScanDebugCounters {
13+
scan_debug_counters()
14+
}

vortex-scan/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
/// A heuristic for an ideal split size.
99
///
1010
/// We don't actually know if this is right, but it is probably a good estimate.
11-
const IDEAL_SPLIT_SIZE: u64 = 64 * 1024;
11+
const IDEAL_SPLIT_SIZE: u64 = 100_000;
1212

1313
pub mod api;
1414
pub mod arrow;
15+
pub mod debug;
1516
mod filter;
1617
pub mod row_mask;
1718
mod splits;

0 commit comments

Comments
 (0)