Skip to content

Commit 640bc61

Browse files
committed
fix(tesseract): multi_stage reduce_by/group_by no-ops on time dimensions
A bare reduce_by/group_by (or grain exclude/keep_only) entry referencing a time dimension never matched the projected TimeDimensionSymbol, whose full_name carries a granularity suffix (_day, _month, ...). The entry was silently ignored: the time dimension stayed in (or vanished from) the window PARTITION BY / the JOIN-model leaf grain, collapsing window aggregates to one-row partitions. The granularity-aware matching lives on MultiStageGrain itself (partition_dimensions): entries with an explicit granularity match strictly on full_name; bare entries match across granularities by unwrapping both sides to their base symbol. Both consumers — the window path's member_partition_by_logical and the JOIN-model's partition_filter — now delegate to it, removing the duplicated body and discharging the FIXME about the two copies drifting. Fixes #10854.
1 parent eabeda8 commit 640bc61

10 files changed

Lines changed: 278 additions & 49 deletions

rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -538,24 +538,7 @@ impl MultiStageMemberQueryPlanner {
538538
}
539539

540540
fn member_partition_by_logical(&self, grain: &MultiStageGrain) -> Vec<Rc<MemberSymbol>> {
541-
let dimensions = self.all_dimensions();
542-
let dimensions = if let Some(exclude) = &grain.exclude {
543-
dimensions
544-
.into_iter()
545-
.filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m)))
546-
.collect_vec()
547-
} else {
548-
dimensions
549-
};
550-
let dimensions = if let Some(keep_only) = &grain.keep_only {
551-
dimensions
552-
.into_iter()
553-
.filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m)))
554-
.collect_vec()
555-
} else {
556-
dimensions
557-
};
558-
dimensions
541+
grain.partition_dimensions(&self.all_dimensions())
559542
}
560543

561544
fn query_order_by(&self) -> Result<Vec<OrderByItem>, CubeError> {

rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -279,35 +279,6 @@ impl MultiStageQueryPlanner {
279279
}
280280
}
281281

282-
/// Applies the partition-shaping part of `grain` to a parent-state
283-
/// dimension list: `exclude` removes matching dims, then `keep_only`
284-
/// intersects what's left. `include` is appended outside this helper
285-
/// via `add_dimensions`.
286-
///
287-
/// FIXME: merge with `MultiStageMemberQueryPlanner::member_partition_by_logical`
288-
/// — both apply the same grain reshape on different inputs; keeping two
289-
/// copies invites silent drift when only one is updated.
290-
fn partition_filter(
291-
dims: &Vec<Rc<MemberSymbol>>,
292-
grain: &MultiStageGrain,
293-
) -> Vec<Rc<MemberSymbol>> {
294-
let dims: Vec<Rc<MemberSymbol>> = if let Some(exclude) = &grain.exclude {
295-
dims.iter()
296-
.filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m)))
297-
.cloned()
298-
.collect()
299-
} else {
300-
dims.clone()
301-
};
302-
if let Some(keep_only) = &grain.keep_only {
303-
dims.into_iter()
304-
.filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m)))
305-
.collect()
306-
} else {
307-
dims
308-
}
309-
}
310-
311282
/// Default child-generation path: for each measure or
312283
/// multi-stage-dimension dependency, recurses into
313284
/// `make_queries_descriptions` and adds the result as an input
@@ -556,8 +527,8 @@ impl MultiStageQueryPlanner {
556527
)
557528
{
558529
let grain = multi_stage_member.grain();
559-
let dims = Self::partition_filter(new_state.dimensions(), grain);
560-
let time_dims = Self::partition_filter(new_state.time_dimensions(), grain);
530+
let dims = grain.partition_dimensions(new_state.dimensions());
531+
let time_dims = grain.partition_dimensions(new_state.time_dimensions());
561532
new_state.set_dimensions(dims);
562533
new_state.set_time_dimensions(time_dims);
563534
}

rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,50 @@ pub struct MultiStageGrain {
6363
pub include: Option<Vec<Rc<MemberSymbol>>>,
6464
}
6565

66+
impl MultiStageGrain {
67+
// A grain entry with an explicit granularity (`created_at.month`) matches
68+
// only that granularity; a bare entry matches the dimension at any
69+
// granularity. Projected time dimensions carry a granularity suffix in
70+
// full_name, so bare entries need both sides unwrapped to their base
71+
// symbol — otherwise they silently match nothing, see cube-js/cube#10854.
72+
fn member_matches(dim: &Rc<MemberSymbol>, entry: &Rc<MemberSymbol>) -> bool {
73+
let entry_has_explicit_granularity = matches!(
74+
entry.as_ref(),
75+
MemberSymbol::TimeDimension(td) if td.has_granularity()
76+
);
77+
if entry_has_explicit_granularity {
78+
dim.has_member_in_reference_chain(entry)
79+
} else {
80+
let base = |x: &Rc<MemberSymbol>| match x.as_ref() {
81+
MemberSymbol::TimeDimension(td) => td.base_symbol().clone(),
82+
_ => x.clone(),
83+
};
84+
base(dim).has_member_in_reference_chain(&base(entry))
85+
}
86+
}
87+
88+
/// Applies the partition-shaping part of the grain to a dimension list:
89+
/// `exclude` removes matching dims, then `keep_only` intersects what's
90+
/// left. `include` is appended by callers via `add_dimensions`.
91+
pub fn partition_dimensions(&self, dims: &[Rc<MemberSymbol>]) -> Vec<Rc<MemberSymbol>> {
92+
let dims: Vec<Rc<MemberSymbol>> = if let Some(exclude) = &self.exclude {
93+
dims.iter()
94+
.filter(|d| !exclude.iter().any(|m| Self::member_matches(d, m)))
95+
.cloned()
96+
.collect()
97+
} else {
98+
dims.to_vec()
99+
};
100+
if let Some(keep_only) = &self.keep_only {
101+
dims.into_iter()
102+
.filter(|d| keep_only.iter().any(|m| Self::member_matches(d, m)))
103+
.collect()
104+
} else {
105+
dims
106+
}
107+
}
108+
}
109+
66110
#[derive(Clone)]
67111
pub struct MultiStageProperties {
68112
pub grain: MultiStageGrain,

rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,35 @@ cubes:
194194
- orders.status
195195
- orders.category
196196

197+
- name: amount_reduce_created_at
198+
type: sum
199+
sql: "{CUBE.total_amount}"
200+
multi_stage: true
201+
reduce_by:
202+
- orders.created_at
203+
204+
# Non-sum outer aggregation forces the JOIN-model path (not
205+
# window-eligible) — exercises the partition_filter site of
206+
# cube-js/cube#10854.
207+
- name: max_amount_reduce_created_at
208+
type: max
209+
sql: "{CUBE.total_amount}"
210+
multi_stage: true
211+
reduce_by:
212+
- orders.created_at
213+
214+
# grain.include also forces the JOIN-model path; bare time-dim
215+
# exclude via the grain directive.
216+
- name: amount_grain_exclude_created_at_include_category
217+
type: sum
218+
sql: "{CUBE.total_amount}"
219+
multi_stage: true
220+
grain:
221+
exclude:
222+
- orders.created_at
223+
include:
224+
- orders.category
225+
197226
- name: amount_group_by_status
198227
type: sum
199228
sql: "{CUBE.total_amount}"

rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,41 @@ async fn test_grain_keep_only_status_top_level() {
121121
insta::assert_snapshot!(result);
122122
}
123123
}
124+
125+
#[tokio::test(flavor = "multi_thread")]
126+
async fn test_group_by_time_dim_multiple_granularities() {
127+
let ctx = create_context();
128+
129+
// Regression test mirroring the BigECommerce postgres-driver case (see
130+
// packages/cubejs-testing-drivers/src/tests/testQueries.ts
131+
// "multi-stage group by time dimension"). The query projects two
132+
// granularities of the same time dim (month + week), while the measure
133+
// `amount_group_by_status_time` has `group_by: [status, created_at.month]`
134+
// — only the month granularity should stay in PARTITION BY.
135+
//
136+
// Expected: every (month, week) row reports the per-month grand total
137+
// (Jan=500, Feb=750, Mar=1000), NOT the per-week sum. If `group_by`
138+
// matched via TimeDimensionSymbol → base unwrapping, the week granularity
139+
// would also stay in PARTITION BY and the result would collapse to
140+
// per-(month, week) sums.
141+
let query = indoc! {r#"
142+
measures:
143+
- orders.amount_group_by_status_time
144+
time_dimensions:
145+
- dimension: orders.created_at
146+
granularity: month
147+
dateRange:
148+
- "2024-01-01"
149+
- "2024-03-31"
150+
- dimension: orders.created_at
151+
granularity: week
152+
order:
153+
- id: orders.created_at
154+
"#};
155+
156+
ctx.build_sql(query).unwrap();
157+
158+
if let Some(result) = ctx.try_execute_pg(query, SEED).await {
159+
insta::assert_snapshot!(result);
160+
}
161+
}

rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,3 +343,104 @@ async fn test_reduce_by_with_time() {
343343
insta::assert_snapshot!(result);
344344
}
345345
}
346+
347+
#[tokio::test(flavor = "multi_thread")]
348+
async fn test_reduce_by_time_dim() {
349+
let ctx = create_context();
350+
351+
// Regression test for cube-js/cube#10854: `reduce_by: [<time_dim>]` was
352+
// silently ignored because `TimeDimensionSymbol::full_name` carries a
353+
// granularity suffix (defaulting to "_day" even with no granularity)
354+
// while the reduce_by entry resolves to a base DimensionSymbol without
355+
// the suffix — string equality at the comparison site failed and the
356+
// time dim stayed in PARTITION BY, collapsing each window to a one-row
357+
// partition.
358+
//
359+
// Expected: `amount_reduce_created_at` (sum reducing by created_at)
360+
// returns the per-status grand total across all months, not split per
361+
// (status, month). Status totals from seed: completed=1400, pending=650,
362+
// cancelled=200. With reduce_by working, every row for a given status
363+
// shows the same total regardless of month.
364+
let query = indoc! {r#"
365+
measures:
366+
- orders.amount_reduce_created_at
367+
dimensions:
368+
- orders.status
369+
time_dimensions:
370+
- dimension: orders.created_at
371+
granularity: month
372+
dateRange:
373+
- "2024-01-01"
374+
- "2024-03-31"
375+
order:
376+
- id: orders.status
377+
- id: orders.created_at
378+
"#};
379+
380+
ctx.build_sql(query).unwrap();
381+
382+
if let Some(result) = ctx.try_execute_pg(query, SEED).await {
383+
insta::assert_snapshot!(result);
384+
}
385+
}
386+
387+
// Same bug as test_reduce_by_time_dim, but on the JOIN-model path: a non-sum
388+
// outer aggregation is not window-eligible, so the grain is applied via
389+
// partition_filter on the leaf state instead of PARTITION BY. Expected: each
390+
// status row carries the per-status max across all months.
391+
#[tokio::test(flavor = "multi_thread")]
392+
async fn test_reduce_by_time_dim_join_model() {
393+
let ctx = create_context();
394+
395+
let query = indoc! {r#"
396+
measures:
397+
- orders.max_amount_reduce_created_at
398+
dimensions:
399+
- orders.status
400+
time_dimensions:
401+
- dimension: orders.created_at
402+
granularity: month
403+
dateRange:
404+
- "2024-01-01"
405+
- "2024-03-31"
406+
order:
407+
- id: orders.status
408+
- id: orders.created_at
409+
"#};
410+
411+
ctx.build_sql(query).unwrap();
412+
413+
if let Some(result) = ctx.try_execute_pg(query, SEED).await {
414+
insta::assert_snapshot!(result);
415+
}
416+
}
417+
418+
// Bare time-dim exclude spelled via the `grain:` directive, with a non-empty
419+
// include (which forces the JOIN-model path). Expected: per-status totals
420+
// independent of month.
421+
#[tokio::test(flavor = "multi_thread")]
422+
async fn test_grain_exclude_time_dim_join_model() {
423+
let ctx = create_context();
424+
425+
let query = indoc! {r#"
426+
measures:
427+
- orders.amount_grain_exclude_created_at_include_category
428+
dimensions:
429+
- orders.status
430+
time_dimensions:
431+
- dimension: orders.created_at
432+
granularity: month
433+
dateRange:
434+
- "2024-01-01"
435+
- "2024-03-31"
436+
order:
437+
- id: orders.status
438+
- id: orders.created_at
439+
"#};
440+
441+
ctx.build_sql(query).unwrap();
442+
443+
if let Some(result) = ctx.try_execute_pg(query, SEED).await {
444+
insta::assert_snapshot!(result);
445+
}
446+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
source: cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs
3+
expression: result
4+
---
5+
orders__created_at_month | orders__created_at_week | orders__amount_group_by_status_time
6+
-------------------------+-------------------------+------------------------------------
7+
2024-01-01 00:00:00 | 2024-01-01 00:00:00 | 500.00
8+
2024-01-01 00:00:00 | 2024-01-08 00:00:00 | 500.00
9+
2024-01-01 00:00:00 | 2024-01-15 00:00:00 | 500.00
10+
2024-01-01 00:00:00 | 2024-01-22 00:00:00 | 500.00
11+
2024-02-01 00:00:00 | 2024-01-29 00:00:00 | 750.00
12+
2024-02-01 00:00:00 | 2024-02-05 00:00:00 | 750.00
13+
2024-02-01 00:00:00 | 2024-02-12 00:00:00 | 750.00
14+
2024-02-01 00:00:00 | 2024-02-19 00:00:00 | 750.00
15+
2024-03-01 00:00:00 | 2024-02-26 00:00:00 | 1000.00
16+
2024-03-01 00:00:00 | 2024-03-04 00:00:00 | 1000.00
17+
2024-03-01 00:00:00 | 2024-03-11 00:00:00 | 1000.00
18+
2024-03-01 00:00:00 | 2024-03-18 00:00:00 | 1000.00
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
source: cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs
3+
expression: result
4+
---
5+
orders__status | orders__created_at_month | orders__amount_grain_exclude_created_at_include_category
6+
---------------+--------------------------+---------------------------------------------------------
7+
cancelled | 2024-01-01 00:00:00 | 200.00
8+
cancelled | 2024-02-01 00:00:00 | 200.00
9+
cancelled | 2024-03-01 00:00:00 | 200.00
10+
completed | 2024-01-01 00:00:00 | 1400.00
11+
completed | 2024-02-01 00:00:00 | 1400.00
12+
completed | 2024-03-01 00:00:00 | 1400.00
13+
pending | 2024-01-01 00:00:00 | 650.00
14+
pending | 2024-02-01 00:00:00 | 650.00
15+
pending | 2024-03-01 00:00:00 | 650.00
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
source: cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs
3+
expression: result
4+
---
5+
orders__status | orders__created_at_month | orders__amount_reduce_created_at
6+
---------------+--------------------------+---------------------------------
7+
cancelled | 2024-01-01 00:00:00 | 200.00
8+
cancelled | 2024-02-01 00:00:00 | 200.00
9+
cancelled | 2024-03-01 00:00:00 | 200.00
10+
completed | 2024-01-01 00:00:00 | 1400.00
11+
completed | 2024-02-01 00:00:00 | 1400.00
12+
completed | 2024-03-01 00:00:00 | 1400.00
13+
pending | 2024-01-01 00:00:00 | 650.00
14+
pending | 2024-02-01 00:00:00 | 650.00
15+
pending | 2024-03-01 00:00:00 | 650.00
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
source: cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs
3+
expression: result
4+
---
5+
orders__status | orders__created_at_month | orders__max_amount_reduce_created_at
6+
---------------+--------------------------+-------------------------------------
7+
cancelled | 2024-01-01 00:00:00 | 200.00
8+
cancelled | 2024-02-01 00:00:00 | 200.00
9+
cancelled | 2024-03-01 00:00:00 | 200.00
10+
completed | 2024-01-01 00:00:00 | 1400.00
11+
completed | 2024-02-01 00:00:00 | 1400.00
12+
completed | 2024-03-01 00:00:00 | 1400.00
13+
pending | 2024-01-01 00:00:00 | 650.00
14+
pending | 2024-02-01 00:00:00 | 650.00
15+
pending | 2024-03-01 00:00:00 | 650.00

0 commit comments

Comments
 (0)