Skip to content

Commit cfe7f38

Browse files
authored
perf(cubeorchestrator): QueryResult - migrate to columnar storage (#10829)
QueryResult now stores data column-major (one buffer per column + row_count) instead of row-major (one Vec per row). Why: the Columnar branch had to re-pivot the row-major matrix on every call; with column-major storage that pivot disappears. `TransformedData::transform/columnar`: | cols / rows | 1 000 | 10 000 | 50 000 | 100 000 | | --- | --- | --- | --- | --- | | 8 | 115.5µs → 91.0µs (-21.3%) | 1.14ms → 882.9µs (-22.4%) | 6.14ms → 4.42ms (-28.0%) | 14.71ms → 8.91ms (-39.4%) | | 16 | 248.2µs → 195.3µs (-21.3%) | 2.50ms → 1.93ms (-22.6%) | 16.50ms → 9.79ms (-40.6%) | 44.91ms → 18.90ms (-57.9%) | | 32 | 504.5µs → 413.3µs (-18.1%) | 5.48ms → 3.99ms (-27.2%) | 29.46ms → 20.01ms (-32.1%) | 60.20ms → 40.13ms (-33.3%) | | 64 | 982.0µs → 819.1µs (-16.6%) | 12.69ms → 7.96ms (-37.3%) | 54.49ms → 39.82ms (-26.9%) | 116.82ms → 80.09ms (-31.4%) | `TransformedData::transform/compact`: | cols / rows | 1 000 | 10 000 | 50 000 | 100 000 | | --- | --- | --- | --- | --- | | 8 | 122.3µs → 116.1µs (-5.1%) | 1.19ms → 1.16ms (-2.8%) | 6.04ms → 5.76ms (-4.7%) | 11.96ms → 12.19ms (+1.9%) | | 16 | 253.2µs → 266.0µs (+5.1%) | 2.54ms → 2.61ms (+2.8%) | 12.86ms → 13.19ms (+2.5%) | 25.07ms → 28.73ms (+14.6%) | | 32 | 486.4µs → 502.1µs (+3.2%) | 4.96ms → 5.22ms (+5.1%) | 25.20ms → 28.14ms (+11.7%) | 50.49ms → 56.27ms (+11.4%) | | 64 | 912.5µs → 951.0µs (+4.2%) | 9.16ms → 9.61ms (+5.0%) | 46.23ms → 52.63ms (+13.8%) | 94.70ms → 105.23ms (+11.1%) | `TransformedData::transform/vanilla`: | cols / rows | 1 000 | 10 000 | 50 000 | 100 000 | | --- | --- | --- | --- | --- | | 8 | 221.7µs → 216.6µs (-2.3%) | 2.20ms → 2.20ms (-0.2%) | 11.23ms → 10.79ms (-3.9%) | 23.57ms → 23.09ms (-2.0%) | | 16 | 404.4µs → 406.3µs (+0.5%) | 3.88ms → 4.03ms (+3.8%) | 20.02ms → 21.27ms (+6.3%) | 40.63ms → 48.68ms (+19.8%) | | 32 | 755.5µs → 787.2µs (+4.2%) | 7.89ms → 7.86ms (-0.3%) | 39.16ms → 45.25ms (+15.6%) | 77.37ms → 91.59ms (+18.4%) | | 64 | 1.37ms → 1.44ms (+4.5%) | 14.52ms → 15.34ms (+5.7%) | 74.31ms → 88.02ms (+18.4%) | 159.19ms → 176.54ms (+10.9%) | `QueryResult::from_cubestore_fb` (Δ vs master; no saved master baseline to show absolutes) | cols / rows | 1 000 | 10 000 | 50 000 | 100 000 | | --- | --- | --- | --- | --- | | 8 | -9.3% | -8.4% | -11.0% | -9.2% | | 16 | -7.9% | -12.3% | -11.7% | -8.7% | | 32 | -9.4% | -4.2% | -3.2% | +9.4% | | 64 | -2.0% | -3.1% | +3.0% | +8.0% | `QueryResult::from_js_raw_data`: | combo | parse_only | parse_plus_build | | --- | --- | --- | | c08_r10000 | 1.82ms → 1.82ms (-0.2%) | 2.16ms → 1.73ms (-20.1%) | | c16_r10000 | 3.76ms → 3.77ms (+0.2%) | 4.39ms → 3.71ms (-15.7%) | | c16_r100000 | 37.86ms → 38.25ms (+1.0%) | 53.10ms → 38.73ms (-27.1%) | | c32_r100000 | 78.73ms → 79.77ms (+1.3%) | 106.57ms → 80.78ms (-24.2%) |
1 parent 6aa2699 commit cfe7f38

9 files changed

Lines changed: 557 additions & 389 deletions

File tree

packages/cubejs-backend-native/src/orchestrator.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -334,15 +334,22 @@ pub fn get_cubestore_result(mut cx: FunctionContext) -> JsResult<JsValue> {
334334
let result = cx.argument::<JsBox<Arc<QueryResult>>>(0)?;
335335

336336
let js_array = cx.execute_scoped(|mut cx| {
337-
let js_keys: Vec<Handle<JsString>> = result.members.iter().map(|k| cx.string(k)).collect();
337+
let js_keys: Vec<Handle<JsString>> =
338+
result.members().iter().map(|k| cx.string(k)).collect();
338339

339-
let js_array = JsArray::new(&mut cx, result.rows.len());
340+
let row_count = result.row_count();
341+
let columns: Vec<_> = (0..js_keys.len())
342+
.map(|i| result.column(i))
343+
.collect::<Result<_, _>>()
344+
.or_else(|err| cx.throw_error(err.to_string()))?;
345+
let js_array = JsArray::new(&mut cx, row_count);
340346

341-
for (i, row) in result.rows.iter().enumerate() {
347+
for row_idx in 0..row_count {
342348
let js_row = cx.execute_scoped(|mut cx| {
343349
let js_row = JsObject::new(&mut cx);
344350

345-
for (js_key, value) in js_keys.iter().zip(row.iter()) {
351+
for (col_idx, js_key) in js_keys.iter().enumerate() {
352+
let value = &columns[col_idx][row_idx];
346353
let js_value: Handle<'_, JsValue> = match value {
347354
DBResponsePrimitive::Null => cx.null().upcast(),
348355
// For compatibility, we convert all primitives to strings
@@ -355,7 +362,7 @@ pub fn get_cubestore_result(mut cx: FunctionContext) -> JsResult<JsValue> {
355362
Ok(js_row)
356363
})?;
357364

358-
js_array.set(&mut cx, i as u32, js_row)?;
365+
js_array.set(&mut cx, row_idx as u32, js_row)?;
359366
}
360367

361368
Ok(js_array)

rust/cube/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
target
22
.zed
3+
.claude

rust/cube/cubeorchestrator/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,7 @@ criterion = { version = "0.8", default-features = false, features = ["html_repor
2323
[[bench]]
2424
name = "transform"
2525
harness = false
26+
27+
[[bench]]
28+
name = "parser"
29+
harness = false
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#![allow(dead_code)]
2+
3+
use cubeorchestrator::query_result_transform::{ColumnarArray, DBResponsePrimitive};
4+
use cubeorchestrator::transport::JsRawColumnarData;
5+
6+
pub const ROW_COUNTS: &[usize] = &[1_000, 10_000, 50_000, 100_000];
7+
pub const COLUMN_COUNTS: &[usize] = &[8, 16, 32, 64];
8+
9+
#[derive(Clone)]
10+
pub struct TimeColumn {
11+
pub member: String,
12+
pub alias: String,
13+
}
14+
15+
/// Split a target column count into ~60% dimensions and ~40% measures.
16+
pub fn split_dim_measure(col_count: usize) -> (usize, usize) {
17+
let dim_count = (col_count * 6) / 10;
18+
let measure_count = col_count - dim_count;
19+
(dim_count, measure_count)
20+
}
21+
22+
pub fn make_member_aliases(prefix: &str, count: usize) -> Vec<(String, String)> {
23+
(0..count)
24+
.map(|i| {
25+
(
26+
format!("Sales.{}{}", prefix, i),
27+
format!("sales__{}{}", prefix, i),
28+
)
29+
})
30+
.collect()
31+
}
32+
33+
pub fn build_dataset(
34+
row_count: usize,
35+
dimensions: &[(String, String)],
36+
measures: &[(String, String)],
37+
time_dims: &[TimeColumn],
38+
) -> JsRawColumnarData {
39+
let total_cols = dimensions.len() + measures.len() + time_dims.len();
40+
let mut members = Vec::with_capacity(total_cols);
41+
let mut columns: Vec<ColumnarArray> = Vec::with_capacity(total_cols);
42+
43+
for (j, (_, alias)) in dimensions.iter().enumerate() {
44+
members.push(alias.clone());
45+
let mut col = ColumnarArray::with_capacity(row_count);
46+
for i in 0..row_count {
47+
col.push(DBResponsePrimitive::String(format!(
48+
"dim_{}_{}",
49+
j,
50+
i % 1000
51+
)));
52+
}
53+
columns.push(col);
54+
}
55+
for (j, (_, alias)) in measures.iter().enumerate() {
56+
members.push(alias.clone());
57+
let mut col = ColumnarArray::with_capacity(row_count);
58+
for i in 0..row_count {
59+
col.push(DBResponsePrimitive::Number(((i * (j + 1)) as f64) * 0.5));
60+
}
61+
columns.push(col);
62+
}
63+
for (j, td) in time_dims.iter().enumerate() {
64+
members.push(td.alias.clone());
65+
let mut col = ColumnarArray::with_capacity(row_count);
66+
for i in 0..row_count {
67+
// Format mirrors typical CubeStore output: ISO-8601 with millisecond
68+
// fractional and no timezone.
69+
let month = ((i + j) % 12) + 1;
70+
let day = ((i / 12) % 28) + 1;
71+
col.push(DBResponsePrimitive::String(format!(
72+
"2024-{:02}-{:02}T00:00:00.000",
73+
month, day
74+
)));
75+
}
76+
columns.push(col);
77+
}
78+
79+
JsRawColumnarData { members, columns }
80+
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
use std::hint::black_box;
2+
3+
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
4+
use cubeorchestrator::query_message_parser::QueryResult;
5+
use cubeorchestrator::transport::JsRawColumnarData;
6+
use cubeshared::codegen::{
7+
HttpColumnValue, HttpColumnValueArgs, HttpCommand, HttpMessage, HttpMessageArgs, HttpResultSet,
8+
HttpResultSetArgs, HttpRow, HttpRowArgs,
9+
};
10+
use cubeshared::flatbuffers::FlatBufferBuilder;
11+
12+
#[path = "common/mod.rs"]
13+
mod common;
14+
use common::{build_dataset, make_member_aliases, split_dim_measure, COLUMN_COUNTS, ROW_COUNTS};
15+
16+
/// Build a FlatBuffer `HttpMessage` payload mirroring CubeStore's wire format
17+
/// for `from_cubestore_fb` to parse. Cells are 16-character strings to give
18+
/// a realistic per-cell allocation cost.
19+
fn build_cubestore_fb_message(num_rows: usize, num_columns: usize) -> Vec<u8> {
20+
let mut builder = FlatBufferBuilder::new();
21+
22+
let column_names: Vec<_> = (0..num_columns)
23+
.map(|i| builder.create_string(&format!("column_{:02}", i)))
24+
.collect();
25+
26+
let mut rows_vec = Vec::with_capacity(num_rows);
27+
for row_idx in 0..num_rows {
28+
let mut values_vec = Vec::with_capacity(num_columns);
29+
for col_idx in 0..num_columns {
30+
let value_str = builder.create_string(&format!("r{:08}_c{:04}", row_idx, col_idx));
31+
let col_value = HttpColumnValue::create(
32+
&mut builder,
33+
&HttpColumnValueArgs {
34+
string_value: Some(value_str),
35+
},
36+
);
37+
values_vec.push(col_value);
38+
}
39+
let values_vector = builder.create_vector(&values_vec);
40+
let row = HttpRow::create(
41+
&mut builder,
42+
&HttpRowArgs {
43+
values: Some(values_vector),
44+
},
45+
);
46+
rows_vec.push(row);
47+
}
48+
49+
let columns_vector = builder.create_vector(&column_names);
50+
let rows_vector = builder.create_vector(&rows_vec);
51+
let result_set = HttpResultSet::create(
52+
&mut builder,
53+
&HttpResultSetArgs {
54+
columns: Some(columns_vector),
55+
rows: Some(rows_vector),
56+
},
57+
);
58+
59+
let connection_id = builder.create_string("bench_connection");
60+
let message = HttpMessage::create(
61+
&mut builder,
62+
&HttpMessageArgs {
63+
message_id: 1,
64+
command_type: HttpCommand::HttpResultSet,
65+
command: Some(result_set.as_union_value()),
66+
connection_id: Some(connection_id),
67+
},
68+
);
69+
70+
builder.finish(message, None);
71+
builder.finished_data().to_vec()
72+
}
73+
74+
fn bench_from_cubestore_fb(c: &mut Criterion) {
75+
let mut group = c.benchmark_group("QueryResult::from_cubestore_fb");
76+
77+
for &col_count in COLUMN_COUNTS {
78+
for &row_count in ROW_COUNTS {
79+
let msg = build_cubestore_fb_message(row_count, col_count);
80+
let id = format!("c{:02}_r{}", col_count, row_count);
81+
group.throughput(Throughput::Elements((row_count * col_count) as u64));
82+
group.bench_with_input(BenchmarkId::from_parameter(id), &(), |b, _| {
83+
b.iter(|| {
84+
let result =
85+
QueryResult::from_cubestore_fb(black_box(&msg)).expect("from_cubestore_fb");
86+
black_box(result);
87+
});
88+
});
89+
}
90+
}
91+
92+
group.finish();
93+
}
94+
95+
fn bench_from_js_raw_data(c: &mut Criterion) {
96+
let mut group = c.benchmark_group("QueryResult::from_js_raw_data");
97+
98+
let combos: &[(usize, usize)] = &[(8, 10_000), (16, 10_000), (16, 100_000), (32, 100_000)];
99+
100+
for &(col_count, row_count) in combos {
101+
let (dim_count, measure_count) = split_dim_measure(col_count);
102+
let dimensions = make_member_aliases("dim", dim_count);
103+
let measures = make_member_aliases("measure", measure_count);
104+
105+
let dataset = build_dataset(row_count, &dimensions, &measures, &[]);
106+
let payload = serde_json::to_vec(&dataset).expect("to_vec");
107+
let payload_len = payload.len();
108+
109+
eprintln!(
110+
"from_js_raw_data: c{:02}_r{} payload_bytes={}",
111+
col_count, row_count, payload_len
112+
);
113+
114+
group.throughput(Throughput::Elements((row_count * col_count) as u64));
115+
116+
let id_param = format!("c{:02}_r{}", col_count, row_count);
117+
118+
// Parse only: serde_json::from_slice into the wire type.
119+
group.bench_with_input(BenchmarkId::new("parse_only", &id_param), &(), |b, _| {
120+
b.iter(|| {
121+
let parsed: JsRawColumnarData =
122+
serde_json::from_slice(black_box(&payload)).expect("from_slice");
123+
black_box(parsed);
124+
});
125+
});
126+
127+
// End-to-end: parse + build into QueryResult — what the Neon bridge does.
128+
group.bench_with_input(
129+
BenchmarkId::new("parse_plus_build", &id_param),
130+
&(),
131+
|b, _| {
132+
b.iter(|| {
133+
let parsed: JsRawColumnarData =
134+
serde_json::from_slice(black_box(&payload)).expect("from_slice");
135+
let built = QueryResult::from_js_raw_data(parsed).expect("from_js_raw_data");
136+
black_box(built);
137+
});
138+
},
139+
);
140+
}
141+
142+
group.finish();
143+
}
144+
145+
criterion_group!(benches, bench_from_cubestore_fb, bench_from_js_raw_data);
146+
criterion_main!(benches);

0 commit comments

Comments
 (0)