Skip to content

Commit 79f7a70

Browse files
authored
perf(cubeorchestrator): Improve compact transform (-70%, 3x) (cube-js#10830)
The compact transform was the only output path still doing dictionary lookups per cell, per row: for every member it re-walked annotation -> members_to_alias_map -> columns_pos before reading the cell. The other two paths (vanilla, columnar) already precompute a plan. Bench on my Apple M3 Max `TransformedData::transform/compact`, median wall time before → after (ms): | cols / rows | 1 000 | 10 000 | 50 000 | 100 000 | | ----------- | -------------- | -------------- | --------------- | ---------------- | | 8 | 0.505 → 0.135 | 5.11 → 1.35 | 25.6 → 6.89 | 51.4 → 13.9 | | 16 | 1.07 → 0.275 | 10.5 → 2.72 | 52.5 → 13.9 | 104.5 → 28.3 | | 32 | 2.07 → 0.528 | 20.6 → 5.36 | 101.4 → 27.7 | 203.0 → 55.2 | | 64 | 4.14 → 1.02 | 39.9 → 10.3 | 199.1 → 51.7 | 401.4 → 101.7 | | cols / rows | 1 000 | 10 000 | 50 000 | 100 000 | | ----------- | ------- | ------- | ------- | ------- | | 8 | -73.4% | -73.6% | -72.9% | -72.9% | | 16 | -74.3% | -74.0% | -73.4% | -72.9% | | 32 | -74.5% | -73.9% | -73.2% | -72.3% | | 64 | -75.1% | -74.1% | -73.3% | -74.6% | Roughly 3.7-4.0x faster across the matrix. Same magnitude as the analogous hoist on the columnar branch.
1 parent 7f37faa commit 79f7a70

1 file changed

Lines changed: 98 additions & 61 deletions

File tree

rust/cube/cubeorchestrator/src/query_result_transform.rs

Lines changed: 98 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -322,54 +322,101 @@ pub fn get_members(
322322
Ok((members_map, members_arr))
323323
}
324324

325-
/// Convert DB response object to the compact output format.
326-
pub fn get_compact_row(
325+
/// One output cell in a compact row. Built once per request by
326+
/// [`build_compact_plan`] so the per-row materializer ([`get_compact_row`])
327+
/// only does the bounds check and [`transform_value`] call.
328+
pub(crate) enum CompactPlanEntry<'a> {
329+
/// Read `db_row[column_index]` and run [`transform_value`].
330+
Cell {
331+
column_index: usize,
332+
member_type: &'a str,
333+
},
334+
/// Constant value replicated across every row (the
335+
/// `compareDateRange` synthetic tail for [`QueryType::CompareDateRangeQuery`]).
336+
Constant(DBResponsePrimitive),
337+
}
338+
339+
pub struct CompactPlan<'a> {
340+
entries: Vec<CompactPlanEntry<'a>>,
341+
}
342+
343+
pub(crate) fn build_compact_plan<'a>(
344+
members: &[String],
327345
members_to_alias_map: &IndexMap<String, String>,
328-
annotation: &HashMap<String, ConfigItem>,
346+
annotation: &'a HashMap<String, ConfigItem>,
347+
columns_pos: &IndexMap<String, usize>,
329348
query_type: &QueryType,
330-
members: &[String],
331349
time_dimensions: Option<&Vec<QueryTimeDimension>>,
332-
db_row: &[DBResponseValue],
333-
columns_pos: &IndexMap<String, usize>,
334-
) -> Result<Vec<DBResponsePrimitive>> {
335-
let mut row: Vec<DBResponsePrimitive> = Vec::with_capacity(members.len());
350+
) -> Result<CompactPlan<'a>> {
351+
let mut entries: Vec<CompactPlanEntry<'a>> = Vec::with_capacity(members.len());
336352

337353
for m in members {
338354
if let Some(annotation_item) = annotation.get(m) {
339355
if let Some(alias) = members_to_alias_map.get(m) {
340-
if let Some(key) = columns_pos.get(alias) {
341-
if let Some(value) = db_row.get(*key) {
342-
let mtype = annotation_item.member_type.as_deref().unwrap_or("");
343-
row.push(transform_value(value.clone(), mtype));
344-
}
356+
if let Some(&column_index) = columns_pos.get(alias) {
357+
let member_type = annotation_item.member_type.as_deref().unwrap_or("");
358+
entries.push(CompactPlanEntry::Cell {
359+
column_index,
360+
member_type,
361+
});
345362
}
346363
}
347364
}
348365
}
349366

350367
match query_type {
351368
QueryType::CompareDateRangeQuery => {
352-
row.push(get_date_range_value(time_dimensions)?);
369+
entries.push(CompactPlanEntry::Constant(get_date_range_value(
370+
time_dimensions,
371+
)?));
353372
}
354373
QueryType::BlendingQuery => {
355374
let blending_key = get_blending_response_key(time_dimensions)?;
356-
357375
if let Some(alias) = members_to_alias_map.get(&blending_key) {
358-
if let Some(key) = columns_pos.get(alias) {
359-
if let Some(value) = db_row.get(*key) {
360-
let member_type = annotation.get(alias).map_or("", |annotation_item| {
361-
annotation_item.member_type.as_deref().unwrap_or("")
362-
});
363-
364-
row.push(transform_value(value.clone(), member_type));
365-
}
376+
if let Some(&column_index) = columns_pos.get(alias) {
377+
// Preserve the (likely-quirky) lookup at the original
378+
// `get_compact_row`: member_type comes from
379+
// `annotation[alias]`, not `annotation[member]`.
380+
let member_type = annotation
381+
.get(alias)
382+
.map_or("", |a| a.member_type.as_deref().unwrap_or(""));
383+
entries.push(CompactPlanEntry::Cell {
384+
column_index,
385+
member_type,
386+
});
366387
}
367388
}
368389
}
369390
_ => {}
370391
}
371392

372-
Ok(row)
393+
Ok(CompactPlan { entries })
394+
}
395+
396+
/// Convert DB response row to the compact output
397+
pub fn get_compact_row(
398+
plan: &CompactPlan<'_>,
399+
db_row: &[DBResponseValue],
400+
) -> Vec<DBResponsePrimitive> {
401+
let mut row: Vec<DBResponsePrimitive> = Vec::with_capacity(plan.entries.len());
402+
403+
for entry in &plan.entries {
404+
match entry {
405+
CompactPlanEntry::Cell {
406+
column_index,
407+
member_type,
408+
} => {
409+
if let Some(value) = db_row.get(*column_index) {
410+
row.push(transform_value(value.clone(), member_type));
411+
}
412+
}
413+
CompactPlanEntry::Constant(v) => {
414+
row.push(v.clone());
415+
}
416+
}
417+
}
418+
419+
row
373420
}
374421

375422
/// Per-column information that is constant across all rows for a given request.
@@ -791,21 +838,19 @@ impl TransformedData {
791838

792839
match res_type {
793840
Some(ResultType::Compact) => {
841+
let plan = build_compact_plan(
842+
&members,
843+
&members_to_alias_map,
844+
annotation,
845+
&cube_store_result.columns_pos,
846+
query_type,
847+
query.time_dimensions.as_ref(),
848+
)?;
794849
let dataset: Vec<_> = cube_store_result
795850
.rows
796851
.iter()
797-
.map(|row| {
798-
get_compact_row(
799-
&members_to_alias_map,
800-
annotation,
801-
query_type,
802-
&members,
803-
query.time_dimensions.as_ref(),
804-
row,
805-
&cube_store_result.columns_pos,
806-
)
807-
})
808-
.collect::<Result<Vec<_>>>()?;
852+
.map(|row| get_compact_row(&plan, row))
853+
.collect();
809854
Ok(TransformedData::Compact { members, dataset })
810855
}
811856
Some(ResultType::Columnar) => {
@@ -2788,15 +2833,15 @@ mod tests {
27882833
alias_to_member_name_map,
27892834
annotation,
27902835
)?;
2791-
let res = get_compact_row(
2836+
let plan = build_compact_plan(
2837+
&members,
27922838
&members_to_alias_map,
27932839
annotation,
2840+
&raw_data.columns_pos,
27942841
query_type,
2795-
&members,
27962842
Some(time_dimensions),
2797-
&raw_data.rows[0],
2798-
&raw_data.columns_pos,
27992843
)?;
2844+
let res = get_compact_row(&plan, &raw_data.rows[0]);
28002845

28012846
let members_map_expected = HashMap::from([
28022847
(
@@ -2837,15 +2882,15 @@ mod tests {
28372882
alias_to_member_name_map,
28382883
annotation,
28392884
)?;
2840-
let res = get_compact_row(
2885+
let plan = build_compact_plan(
2886+
&members,
28412887
&members_to_alias_map,
28422888
annotation,
2889+
&raw_data.columns_pos,
28432890
query_type,
2844-
&members,
28452891
Some(time_dimensions),
2846-
&raw_data.rows[0],
2847-
&raw_data.columns_pos,
28482892
)?;
2893+
let res = get_compact_row(&plan, &raw_data.rows[0]);
28492894

28502895
let members_map_expected = HashMap::from([
28512896
(
@@ -2886,15 +2931,15 @@ mod tests {
28862931
alias_to_member_name_map,
28872932
annotation,
28882933
)?;
2889-
let res = get_compact_row(
2934+
let plan = build_compact_plan(
2935+
&members,
28902936
&members_to_alias_map,
28912937
annotation,
2938+
&raw_data.columns_pos,
28922939
query_type,
2893-
&members,
28942940
Some(time_dimensions),
2895-
&raw_data.rows[0],
2896-
&raw_data.columns_pos,
28972941
)?;
2942+
let res = get_compact_row(&plan, &raw_data.rows[0]);
28982943

28992944
let members_map_expected = HashMap::from([
29002945
(
@@ -2922,15 +2967,7 @@ mod tests {
29222967
assert_eq!(res[i], members_map_expected.get(val).unwrap().clone());
29232968
}
29242969

2925-
let res = get_compact_row(
2926-
&members_to_alias_map,
2927-
annotation,
2928-
query_type,
2929-
&members,
2930-
Some(time_dimensions),
2931-
&raw_data.rows[1],
2932-
&raw_data.columns_pos,
2933-
)?;
2970+
let res = get_compact_row(&plan, &raw_data.rows[1]);
29342971

29352972
let members_map_expected = HashMap::from([
29362973
(
@@ -2984,15 +3021,15 @@ mod tests {
29843021
alias_to_member_name_map,
29853022
annotation,
29863023
)?;
2987-
let res = get_compact_row(
3024+
let plan = build_compact_plan(
3025+
&members,
29883026
&members_to_alias_map,
29893027
annotation,
3028+
&raw_data.columns_pos,
29903029
query_type,
2991-
&members,
29923030
Some(time_dimensions),
2993-
&raw_data.rows[0],
2994-
&raw_data.columns_pos,
29953031
)?;
3032+
let res = get_compact_row(&plan, &raw_data.rows[0]);
29963033

29973034
let members_map_expected = HashMap::from([
29983035
(

0 commit comments

Comments
 (0)