Skip to content

Commit 8ef4b54

Browse files
authored
feat(cubesql): Support FULL and RIGHT joins with non-push-to-Cube SQL push down (#11008)
* feat(cubesql): Support FULL and RIGHT joins with non-push-to-Cube SQL push down * fix(tesseract): Decouple join types from SQL push down
1 parent 6336388 commit 8ef4b54

11 files changed

Lines changed: 286 additions & 7 deletions

File tree

packages/cubejs-schema-compiler/src/adapter/BaseQuery.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4631,6 +4631,10 @@ export class BaseQuery {
46314631
series_bounds_cast: '{{ expr }}',
46324632
bool_param_cast: '{{ expr }}',
46334633
number_param_cast: '{{ expr }}',
4634+
// Tesseract uses its own join type templates, decoupled from `join_types`
4635+
// which are used by the SQL API push down. FULL is opt-in per dialect.
4636+
join_types_inner: 'INNER',
4637+
join_types_left: 'LEFT',
46344638
},
46354639
filters: {
46364640
equals: '{{ column }} = {{ value }}{{ is_null_check }}',
@@ -4661,6 +4665,8 @@ export class BaseQuery {
46614665
join_types: {
46624666
inner: 'INNER',
46634667
left: 'LEFT',
4668+
right: 'RIGHT',
4669+
full: 'FULL',
46644670
},
46654671
window_frame_types: {
46664672
rows: 'ROWS',

packages/cubejs-schema-compiler/src/adapter/CubeStoreQuery.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,11 @@ export class CubeStoreQuery extends BaseQuery {
358358
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
359359
templates.expressions.wrap_segment_select = 'IF({{ expr }}, 1, 0)';
360360
templates.expressions.wrap_segment_filter = '{{ expr }} = 1';
361+
// CubeStore has no native FULL OUTER JOIN (it is emulated via LEFT JOIN chains), and its
362+
// distributed join executor assumes the left-most table is the split root, so RIGHT/FULL
363+
// across partitioned tables is unsafe. Don't push those join types down to CubeStore.
364+
delete templates.join_types.full;
365+
delete templates.join_types.right;
361366
return templates;
362367
}
363368
}

packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ export class MysqlQuery extends BaseQuery {
195195
templates.types.timestamp = 'DATETIME';
196196
delete templates.types.interval;
197197
templates.types.binary = 'BLOB';
198+
// MySQL has no FULL OUTER JOIN
199+
delete templates.join_types.full;
198200

199201
templates.expressions.concat_strings = 'CONCAT({{ strings | join(\',\' ) }})';
200202

packages/cubejs-schema-compiler/src/adapter/SnowflakeQuery.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ export class SnowflakeQuery extends BaseQuery {
119119
templates.expressions.like = '{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}{% if default_escape %} ESCAPE \'\\\\\'{% endif %}';
120120
templates.expressions.ilike = '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}{% if default_escape %} ESCAPE \'\\\\\'{% endif %}';
121121
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
122-
templates.join_types.full = 'FULL';
122+
templates.tesseract.join_types_full = 'FULL';
123123
delete templates.types.interval;
124124
return templates;
125125
}

rust/cube/cubesqlplanner/cubesqlplanner/src/planner/sql_templates/plan.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,9 +447,9 @@ impl PlanSqlTemplates {
447447
join_type: &JoinType,
448448
) -> Result<String, CubeError> {
449449
let join_type = match join_type {
450-
JoinType::Full => self.render.get_template("join_types/full")?,
451-
JoinType::Inner => self.render.get_template("join_types/inner")?,
452-
JoinType::Left => self.render.get_template("join_types/left")?,
450+
JoinType::Full => self.render.get_template("tesseract/join_types_full")?,
451+
JoinType::Inner => self.render.get_template("tesseract/join_types_inner")?,
452+
JoinType::Left => self.render.get_template("tesseract/join_types_left")?,
453453
};
454454
self.render.render_template(
455455
"statements/join",
@@ -500,7 +500,7 @@ impl PlanSqlTemplates {
500500
}
501501

502502
pub fn supports_full_join(&self) -> bool {
503-
self.render.contains_template("join_types/full")
503+
self.render.contains_template("tesseract/join_types_full")
504504
}
505505

506506
pub fn supports_is_not_distinct_from(&self) -> bool {

rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_sql_templates_render.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,12 @@ impl MockSqlTemplatesRender {
461461
// Join types - based on BaseQuery.js:4424-4427
462462
templates.insert("join_types/inner".to_string(), "INNER".to_string());
463463
templates.insert("join_types/left".to_string(), "LEFT".to_string());
464+
// Tesseract join types (decoupled from SQL API push down join_types)
465+
templates.insert(
466+
"tesseract/join_types_inner".to_string(),
467+
"INNER".to_string(),
468+
);
469+
templates.insert("tesseract/join_types_left".to_string(), "LEFT".to_string());
464470

465471
// Window frame types - based on BaseQuery.js:4428-4431
466472
templates.insert("window_frame_types/rows".to_string(), "ROWS".to_string());

rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3459,6 +3459,7 @@ impl WrappedSelectNode {
34593459
}
34603460

34613461
match join_type {
3462+
// Right/Full are only generated on the non-push-to-Cube path
34623463
JoinType::Inner | JoinType::Left => {
34633464
// Do nothing
34643465
}
@@ -3596,6 +3597,7 @@ impl WrappedSelectNode {
35963597
};
35973598

35983599
let join_type = match join_type {
3600+
// Right/Full are only generated on the non-push-to-Cube path
35993601
JoinType::Left => generator.get_sql_templates().left_join()?,
36003602
JoinType::Inner => generator.get_sql_templates().inner_join()?,
36013603
_ => {
@@ -3868,6 +3870,8 @@ impl WrappedSelectNode {
38683870
let join_type_sql = match join_type {
38693871
JoinType::Left => generator.get_sql_templates().left_join()?,
38703872
JoinType::Inner => generator.get_sql_templates().inner_join()?,
3873+
JoinType::Right => generator.get_sql_templates().right_join()?,
3874+
JoinType::Full => generator.get_sql_templates().full_join()?,
38713875
_ => {
38723876
return Err(CubeError::internal(format!(
38733877
"Unsupported join type for join subquery: {join_type:?}"

rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/join.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::{
1313
WrappedSelectJoinJoinType, WrappedSelectPushToCube, WrapperReplacerContextAliasToCube,
1414
WrapperReplacerContextGroupedSubqueries,
1515
},
16+
transport::MetaContext,
1617
var, var_iter, var_list_iter,
1718
};
1819

@@ -21,7 +22,7 @@ use datafusion::{
2122
logical_plan::Column,
2223
prelude::JoinType,
2324
};
24-
use egg::{Id, Subst};
25+
use egg::{Id, Subst, Var};
2526
use itertools::Itertools;
2627

2728
impl WrapperRules {
@@ -263,6 +264,7 @@ impl WrapperRules {
263264
"?left_on",
264265
"?right_on",
265266
"?in_join_type",
267+
"?input_data_source",
266268
"?out_join_expr",
267269
"?out_join_type",
268270
"?out_grouped_subqueries",
@@ -481,6 +483,7 @@ impl WrapperRules {
481483
"?left_push_to_cube",
482484
"?right_on",
483485
"?in_join_type",
486+
"?input_data_source",
484487
"?out_join_expr",
485488
"?out_join_type",
486489
"?out_grouped_subqueries",
@@ -982,12 +985,42 @@ impl WrapperRules {
982985
result_expr
983986
}
984987

988+
/// Whether a join subquery with `join_type` can be pushed down to `data_source_var`.
989+
///
990+
/// Inner/Left are always supported. Right/Full are only supported on the non-push-to-Cube
991+
/// path (`push_to_cube == false`), i.e. when both sides become standalone subqueries joined
992+
/// together — there the outer-join semantics map directly to SQL. On the push-to-Cube path
993+
/// the join is folded inside the Cube query alongside its grouping/measures, where NULL-extended
994+
/// outer rows are not validated, so Right/Full are refused there.
995+
/// Other join types (semi/anti) are never supported as join subqueries.
996+
fn is_subquery_join_type_supported(
997+
egraph: &CubeEGraph,
998+
subst: &mut Subst,
999+
meta: &MetaContext,
1000+
data_source_var: Var,
1001+
join_type: &JoinType,
1002+
push_to_cube: bool,
1003+
) -> bool {
1004+
let template = match join_type {
1005+
JoinType::Inner => "join_types/inner",
1006+
JoinType::Left => "join_types/left",
1007+
JoinType::Right if !push_to_cube => "join_types/right",
1008+
JoinType::Full if !push_to_cube => "join_types/full",
1009+
_ => return false,
1010+
};
1011+
let Ok(data_source) = Self::get_data_source(egraph, subst, data_source_var) else {
1012+
return false;
1013+
};
1014+
Self::can_rewrite_template(&data_source, meta, template)
1015+
}
1016+
9851017
fn transform_ungrouped_join_grouped(
9861018
&self,
9871019
left_members_var: &'static str,
9881020
left_on_var: &'static str,
9891021
right_on_var: &'static str,
9901022
in_join_type_var: &'static str,
1023+
input_data_source_var: &'static str,
9911024
out_join_expr_var: &'static str,
9921025
out_join_type_var: &'static str,
9931026
out_grouped_subqueries_var: &'static str,
@@ -998,11 +1031,14 @@ impl WrapperRules {
9981031
let right_on_var = var!(right_on_var);
9991032

10001033
let in_join_type_var = var!(in_join_type_var);
1034+
let input_data_source_var = var!(input_data_source_var);
10011035

10021036
let out_join_expr_var = var!(out_join_expr_var);
10031037
let out_join_type_var = var!(out_join_type_var);
10041038
let out_grouped_subqueries_var = var!(out_grouped_subqueries_var);
10051039

1040+
let meta = self.meta_context.clone();
1041+
10061042
// Only left is allowed to be ungrouped query, so right would be a subquery join for left ungrouped CubeScan
10071043
// It means we don't care about just a "single cube" in LHS, and there's essentially no cubes by this moment in RHS
10081044

@@ -1020,6 +1056,19 @@ impl WrapperRules {
10201056
for in_join_type in
10211057
var_list_iter!(egraph[subst[in_join_type_var]], JoinJoinType).cloned()
10221058
{
1059+
// Left is an ungrouped CubeScan pushed to Cube, so this is always the
1060+
// push-to-Cube path: Right/Full are not supported here.
1061+
if !Self::is_subquery_join_type_supported(
1062+
egraph,
1063+
subst,
1064+
&meta,
1065+
input_data_source_var,
1066+
&in_join_type.0,
1067+
true,
1068+
) {
1069+
return false;
1070+
}
1071+
10231072
if !Self::are_join_members_supported(
10241073
egraph,
10251074
subst[left_members_var],
@@ -1217,6 +1266,7 @@ impl WrapperRules {
12171266
left_push_to_cube_var: &'static str,
12181267
right_on_var: &'static str,
12191268
in_join_type_var: &'static str,
1269+
input_data_source_var: &'static str,
12201270
out_join_expr_var: &'static str,
12211271
out_join_type_var: &'static str,
12221272
out_grouped_subqueries_var: &'static str,
@@ -1228,12 +1278,15 @@ impl WrapperRules {
12281278
let right_on_var = var!(right_on_var);
12291279

12301280
let in_join_type_var = var!(in_join_type_var);
1281+
let input_data_source_var = var!(input_data_source_var);
12311282

12321283
let out_join_expr_var = var!(out_join_expr_var);
12331284
let out_join_type_var = var!(out_join_type_var);
12341285
let out_grouped_subqueries_var = var!(out_grouped_subqueries_var);
12351286
let out_push_to_cube_var = var!(out_push_to_cube_var);
12361287

1288+
let meta = self.meta_context.clone();
1289+
12371290
move |egraph, subst| {
12381291
// We are going to generate join with grouped subquery
12391292
// TODO Do we have to check stuff like `transform_check_subquery_allowed` is checking:
@@ -1254,6 +1307,20 @@ impl WrapperRules {
12541307
)
12551308
.cloned()
12561309
{
1310+
// Right/Full are only supported on the non-push-to-Cube variant.
1311+
// `continue` rather than `return false` so the non-push variant of
1312+
// this eclass still gets a chance to match.
1313+
if !Self::is_subquery_join_type_supported(
1314+
egraph,
1315+
subst,
1316+
&meta,
1317+
input_data_source_var,
1318+
&in_join_type.0,
1319+
left_push_to_cube.0,
1320+
) {
1321+
continue;
1322+
}
1323+
12571324
// TODO what's a proper way to find table expression alias?
12581325
let Some(right_join_alias) = right_join_on
12591326
.iter()

rust/cubesql/cubesql/src/compile/test/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,8 @@ OFFSET {{ offset }}{% endif %}"#.to_string(),
756756
("expressions/between".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}BETWEEN {{ low }} AND {{ high }}".to_string()),
757757
("join_types/inner".to_string(), "INNER".to_string()),
758758
("join_types/left".to_string(), "LEFT".to_string()),
759+
("join_types/right".to_string(), "RIGHT".to_string()),
760+
("join_types/full".to_string(), "FULL".to_string()),
759761
("quotes/identifiers".to_string(), "\"".to_string()),
760762
("quotes/escape".to_string(), "\"\"".to_string()),
761763
("params/param".to_string(), "${{ param_index + 1 }}".to_string()),
@@ -780,6 +782,11 @@ OFFSET {{ offset }}{% endif %}"#.to_string(),
780782
("types/binary".to_string(), "BINARY".to_string()),
781783
]
782784
.into_iter().chain(custom_templates)
785+
.collect::<HashMap<_, _>>()
786+
.into_iter()
787+
// Custom template with an empty value removes the base template,
788+
// allowing tests to check behavior of data sources without it
789+
.filter(|(_, value)| !value.is_empty())
783790
.collect(),
784791
false,
785792
)

0 commit comments

Comments
 (0)