Skip to content

Commit b647dd5

Browse files
authored
fix(tesseract): tautological FK-aggregate join for filtered PK counts (cube-js#10762)
When a filtered count measure of shape `type: count` + `sql: "{primary_key}"` + `filters: [...]` is reached through a 1->N edge with `CUBESQL_SQL_PUSH_DOWN=true`, the FK-aggregate rejoin onto the source cube emitted `ON keys.pk = keys.pk` — both sides referencing the inner `keys` subquery. The join cross-produced and the count over-included any row whose filters were satisfied anywhere in the source table. Root cause: the `Cube` arm of `AggregateMultipliedSubquerySource` built the right-hand side of the join condition as `Expr::Member(MemberExpression::new(dim))`, deferring resolution to whatever rendering context was active at SQL emission. By emission time, the outer factory's `render_references` had been populated by the schema-resolution pass, mapping the PK dim to `(keys, <alias>)` (since the `keys` subquery — the join root — projects it). That mapping then intercepted both sides of the `ON` clause via `RenderReferencesSqlNode`. Fix: clone `context_factory`, register `pk_cube.name() -> pk_cube_alias` in `cube_name_references`, and bind a `VisitorContext` snapshot (pre-pollution) to the join's right-hand side via `Expr::new_member_with_context`. The dimension renders through `AutoPrefixSqlNode` against `pk_cube_alias`, preserving full SQL semantics for custom-SQL or composite primary keys. The `MeasureSubquery` arm in the same file already used an explicit reference to `pk_cube_alias` and was unaffected. Adds a regression test exercising the bug shape (`customers.active_count` filtered count on PK, queried by `orders.status` to force the multiplied path) with both a structural tautology assertion over every `ON` predicate and an `insta` snapshot of the generated SQL.
1 parent d5da121 commit b647dd5

4 files changed

Lines changed: 102 additions & 1 deletion

File tree

rust/cube/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/aggregate_multiplied_subquery.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use super::super::{LogicalNodeProcessor, ProcessableNode, PushDownBuilderContext};
22
use crate::logical_plan::{AggregateMultipliedSubquery, AggregateMultipliedSubquerySource};
33
use crate::physical_plan::ReferencesBuilder;
4+
use crate::physical_plan::VisitorContext;
45
use crate::physical_plan::{
56
Expr, From, JoinBuilder, JoinCondition, MemberExpression, QualifiedColumnName, Select,
67
SelectBuilder,
@@ -59,6 +60,24 @@ impl<'a> LogicalNodeProcessor<'a, AggregateMultipliedSubquery>
5960

6061
match &aggregate_multiplied_subquery.source {
6162
AggregateMultipliedSubquerySource::Cube(cube) => {
63+
// Bind a dedicated VisitorContext to the join's right-hand side
64+
// so that primary-key dimensions render against `pk_cube_alias`
65+
// (the source cube join). Without it, the outer factory's
66+
// render_references — populated later for the SELECT — map
67+
// these dimensions to the inner `keys` subquery alias, and
68+
// both sides of the ON clause collapse to `keys.<pk> = keys.<pk>`.
69+
// Clone the parent factory rather than rebuilding from context so
70+
// that any state already added above (currently none, but this
71+
// makes the lineage explicit for future maintenance) is preserved.
72+
let mut join_context_factory = context_factory.clone();
73+
join_context_factory
74+
.add_cube_name_reference(cube.cube().name().clone(), pk_cube_alias.clone());
75+
let join_visitor_context = Rc::new(VisitorContext::new(
76+
query_tools.clone(),
77+
&join_context_factory,
78+
None,
79+
));
80+
6281
let conditions = primary_keys_dimensions
6382
.iter()
6483
.map(|dim| -> Result<_, CubeError> {
@@ -67,7 +86,10 @@ impl<'a> LogicalNodeProcessor<'a, AggregateMultipliedSubquery>
6786
Some(keys_query_alias.clone()),
6887
alias_in_keys_query,
6988
));
70-
let pk_cube_expr = Expr::Member(MemberExpression::new(dim.clone()));
89+
let pk_cube_expr = Expr::new_member_with_context(
90+
dim.clone(),
91+
join_visitor_context.clone(),
92+
);
7193
Ok(vec![(keys_query_ref, pk_cube_expr)])
7294
})
7395
.collect::<Result<Vec<_>, _>>()?;

rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_fact.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ cubes:
5050
sql: lifetime_value
5151
filters:
5252
- sql: "{regions.name} = 'East'"
53+
- name: active_count
54+
type: count
55+
sql: "{id}"
56+
filters:
57+
- sql: "{name} LIKE 'A%'"
5358

5459
- name: orders
5560
sql: "SELECT * FROM orders"

rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_fact.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,71 @@ async fn test_multiplied_aggregate_hub_sum_measure() {
487487
}
488488
}
489489

490+
#[tokio::test(flavor = "multi_thread")]
491+
async fn test_multiplied_aggregate_filtered_count_on_pk() {
492+
let ctx = create_context();
493+
494+
// customers.active_count (type: count, sql: "{CUBE}.id", filters: [...])
495+
// grouped by orders.status.
496+
// customers→orders is one_to_many, so customers is multiplied → AggregateMultipliedSubquery
497+
// The measure references only customers → source = Cube branch.
498+
// Regression: the Cube branch used to render the right side of the FK-aggregate
499+
// rejoin via Expr::Member, which the outer factory's render_references mapped
500+
// back to the inner `keys` subquery alias — yielding a tautological
501+
// `ON keys.<pk> = keys.<pk>` that cross-joined the source table and over-counted.
502+
let query = indoc! {"
503+
measures:
504+
- customers.active_count
505+
dimensions:
506+
- orders.status
507+
order:
508+
- id: orders.status
509+
"};
510+
511+
let sql = ctx.build_sql(query).unwrap();
512+
513+
// The buggy output contained `keys.customers__id = keys.customers__id`. Flatten
514+
// the SQL so the assertion survives any future line-wrapping in the planner's
515+
// formatter, then walk every ON predicate and assert no tautological equality
516+
// survives. The snapshot below is the airtight guard; this is a targeted
517+
// structural sanity check pinned to the bug shape.
518+
let flat = sql.replace('\n', " ");
519+
let flat_upper = flat.to_ascii_uppercase();
520+
let mut cursor = 0;
521+
while let Some(rel) = flat_upper[cursor..].find(" ON ") {
522+
let on_end = cursor + rel + 4;
523+
let after = &flat[on_end..];
524+
let after_upper = &flat_upper[on_end..];
525+
let bound = [
526+
" GROUP BY ",
527+
" ORDER BY ",
528+
" LEFT JOIN ",
529+
" INNER JOIN ",
530+
" RIGHT JOIN ",
531+
" UNION ",
532+
]
533+
.iter()
534+
.filter_map(|kw| after_upper.find(kw))
535+
.min()
536+
.unwrap_or(after.len());
537+
for chunk in after[..bound].split(" AND ") {
538+
if let Some((lhs, rhs)) = chunk.split_once('=') {
539+
let lhs = lhs.trim().trim_matches('(').trim();
540+
let rhs = rhs.trim().trim_matches(')').trim();
541+
assert!(
542+
lhs != rhs,
543+
"tautological join condition `{lhs} = {rhs}` in:\n{sql}"
544+
);
545+
}
546+
}
547+
cursor = on_end;
548+
}
549+
550+
if let Some(result) = ctx.try_execute_pg(query, SEED).await {
551+
insta::assert_snapshot!(result);
552+
}
553+
}
554+
490555
#[tokio::test(flavor = "multi_thread")]
491556
async fn test_multiplied_aggregate_with_measure_subquery() {
492557
let ctx = create_context();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
source: cubesqlplanner/src/tests/integration/multi_fact.rs
3+
expression: result
4+
---
5+
orders__status | customers__active_count
6+
---------------+------------------------
7+
completed | 1
8+
pending | 1
9+
NULL | 0

0 commit comments

Comments
 (0)