From b5a556c60ccab69c77e4e8aab3131ee423cc2044 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Wed, 27 May 2026 17:35:17 +0200 Subject: [PATCH 01/11] feat(schema-compiler): add grain directive Joi schema for multi-stage measures Introduce `grain:` directive on multi-stage measures with fields `mode (relative|fixed)`, `exclude`, `keepOnly`, `include` mirroring the existing `filter:` directive shape. All three list fields are `Joi.func()` returning arrays of refs and are wired into the prop transpiler so authors can write `({ CUBE }) => [CUBE.dim]`. `exclude` and `keepOnly` are mutually exclusive via `.nand(...)`, matching the `MultiStageFilter` contract. `CubeEvaluator.evaluateMultiStageReferences` now resolves grain's function fields into `*References` siblings that the native bridge will read. The directive is only added to multi-stage measures; dimensions are intentionally out of scope for now, and the legacy `group_by/add_group_by/reduce_by` fields keep working unchanged. --- .../src/compiler/CubeEvaluator.ts | 23 ++ .../src/compiler/CubeValidator.ts | 8 + .../transpilers/CubePropContextTranspiler.ts | 1 + .../test/unit/cube-validator.test.ts | 199 ++++++++++++++++++ 4 files changed, 231 insertions(+) diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index 67db3b77e4097..63a0b88eaa706 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -54,6 +54,17 @@ export type MultiStageFilterDirective = { keepOnlyReferences?: string[]; }; +export type MultiStageGrainDirective = { + mode?: 'relative' | 'fixed'; + exclude?: (...args: Array) => Array; + keepOnly?: (...args: Array) => Array; + include?: (...args: Array) => Array; + // Resolved sibling fields populated by `evaluateMultiStageReferences`. + excludeReferences?: string[]; + keepOnlyReferences?: string[]; + includeReferences?: string[]; +}; + export type DimensionDefinition = { type: string; sql(): string; @@ -98,6 +109,7 @@ export type MeasureDefinition = { groupBy?: (...args: Array) => Array; reduceBy?: (...args: Array) => Array; addGroupBy?: (...args: Array) => Array; + grain?: MultiStageGrainDirective; timeShift?: TimeShiftDefinition[]; groupByReferences?: string[]; reduceByReferences?: string[]; @@ -626,6 +638,17 @@ export class CubeEvaluator extends CubeSymbols { member.filter.keepOnlyReferences = this.evaluateReferences(cubeName, member.filter.keepOnly); } } + if (member.grain) { + if (typeof member.grain.exclude === 'function') { + member.grain.excludeReferences = this.evaluateReferences(cubeName, member.grain.exclude); + } + if (typeof member.grain.keepOnly === 'function') { + member.grain.keepOnlyReferences = this.evaluateReferences(cubeName, member.grain.keepOnly); + } + if (typeof member.grain.include === 'function') { + member.grain.includeReferences = this.evaluateReferences(cubeName, member.grain.include); + } + } } } } diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts index d01c5dd1e5b77..c574c282a9cd0 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts @@ -868,6 +868,13 @@ const MultiStageFilter = Joi.object().keys({ ), }).nand('exclude', 'keepOnly'); +const MultiStageGrain = Joi.object().keys({ + mode: Joi.string().valid('relative', 'fixed'), + exclude: Joi.func(), + keepOnly: Joi.func(), + include: Joi.func(), +}).nand('exclude', 'keepOnly'); + const CaseSchema = Joi.object().keys({ when: Joi.array().items(Joi.object().keys({ sql: Joi.func().required(), @@ -916,6 +923,7 @@ const MeasuresSchema = Joi.object().pattern(identifierRegex, Joi.alternatives(). reduceBy: Joi.func(), addGroupBy: Joi.func(), filter: MultiStageFilter, + grain: MultiStageGrain, timeShift: Joi.alternatives().conditional(Joi.array().length(1), { then: Joi.array().items(timeShiftItemOptional), otherwise: Joi.array().items(timeShiftItemRequired) diff --git a/packages/cubejs-schema-compiler/src/compiler/transpilers/CubePropContextTranspiler.ts b/packages/cubejs-schema-compiler/src/compiler/transpilers/CubePropContextTranspiler.ts index 5cc43fd99b91a..b7eaed26cd680 100644 --- a/packages/cubejs-schema-compiler/src/compiler/transpilers/CubePropContextTranspiler.ts +++ b/packages/cubejs-schema-compiler/src/compiler/transpilers/CubePropContextTranspiler.ts @@ -20,6 +20,7 @@ export const transpiledFieldsPatterns: Array = [ /^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(timeShift|time_shift)\.[0-9]+\.(timeDimension|time_dimension)$/, /^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by)$/, /^(measures|dimensions)\.[_a-zA-Z][_a-zA-Z0-9]*\.filter\.(exclude|keepOnly|keep_only)$/, + /^measures\.[_a-zA-Z][_a-zA-Z0-9]*\.grain\.(exclude|keepOnly|keep_only|include)$/, /^(measures|dimensions)\.[_a-zA-Z][_a-zA-Z0-9]*\.case\.switch$/, /^dimensions\.[_a-zA-Z][_a-zA-Z0-9]*\.(reduceBy|reduce_by|groupBy|group_by|addGroupBy|add_group_by|key)$/, /^(preAggregations|pre_aggregations)\.[_a-zA-Z][_a-zA-Z0-9]*\.indexes\.[_a-zA-Z][_a-zA-Z0-9]*\.columns$/, diff --git a/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts b/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts index 77bb51c8a6028..5690f2e3ba328 100644 --- a/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts @@ -762,6 +762,205 @@ describe('Cube Validation', () => { expect(validationResult.error).toBeFalsy(); }); + + it('multi-stage grain — full directive accepted', async () => { + const cubeValidator = new CubeValidator(new CubeSymbols()); + const cube = { + name: 'name', + sql: () => '', + fileName: 'fileName', + measures: { + measure_with_grain: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { + mode: 'relative', + exclude: () => [], + include: () => [], + } + } + } + }; + + const validationResult = cubeValidator.validate(cube, { + error: (message: any, _e: any) => { + console.log(message); + } + } as any); + + expect(validationResult.error).toBeFalsy(); + }); + + it('multi-stage grain — partial directives accepted', async () => { + const cubeValidator = new CubeValidator(new CubeSymbols()); + const cube = { + name: 'name', + sql: () => '', + fileName: 'fileName', + measures: { + only_exclude: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { exclude: () => [] } + }, + only_keep_only: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { mode: 'fixed', keepOnly: () => [] } + }, + only_include: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { include: () => [] } + }, + } + }; + + const validationResult = cubeValidator.validate(cube, { + error: (message: any, _e: any) => { + console.log(message); + } + } as any); + + expect(validationResult.error).toBeFalsy(); + }); + + it('multi-stage grain — exclude and keepOnly are mutually exclusive', async () => { + const cubeValidator = new CubeValidator(new CubeSymbols()); + const cube = { + name: 'name', + sql: () => '', + fileName: 'fileName', + measures: { + both_set: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { + exclude: () => [], + keepOnly: () => [], + } + } + } + }; + + const validationResult = cubeValidator.validate(cube, { + error: (message: any, _e: any) => { + console.log(message); + expect(message).toContain('exclude'); + expect(message).toContain('keepOnly'); + } + } as any); + + expect(validationResult.error).toBeTruthy(); + }); + + it('multi-stage grain — invalid mode rejected', async () => { + const cubeValidator = new CubeValidator(new CubeSymbols()); + const cube = { + name: 'name', + sql: () => '', + fileName: 'fileName', + measures: { + bad_mode: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { mode: 'RELATIVE' } + } + } + }; + + const validationResult = cubeValidator.validate(cube, { + error: (message: any, _e: any) => { + console.log(message); + expect(message).toContain('measures.bad_mode.grain.mode'); + } + } as any); + + expect(validationResult.error).toBeTruthy(); + }); + + it('multi-stage grain — exclude must be a function', async () => { + const cubeValidator = new CubeValidator(new CubeSymbols()); + const cube = { + name: 'name', + sql: () => '', + fileName: 'fileName', + measures: { + bad_exclude: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { exclude: ['some_dim'] } + } + } + }; + + const validationResult = cubeValidator.validate(cube, { + error: (message: any, _e: any) => { + console.log(message); + expect(message).toContain('measures.bad_exclude.grain.exclude'); + } + } as any); + + expect(validationResult.error).toBeTruthy(); + }); + + it('multi-stage grain — include must be a function', async () => { + const cubeValidator = new CubeValidator(new CubeSymbols()); + const cube = { + name: 'name', + sql: () => '', + fileName: 'fileName', + measures: { + bad_include: { + multiStage: true, + type: 'sum', + sql: () => '', + grain: { include: ['some_dim'] } + } + } + }; + + const validationResult = cubeValidator.validate(cube, { + error: (message: any, _e: any) => { + console.log(message); + expect(message).toContain('measures.bad_include.grain.include'); + } + } as any); + + expect(validationResult.error).toBeTruthy(); + }); + + it('multi-stage grain — rejected on multi-stage dimension', async () => { + const cubeValidator = new CubeValidator(new CubeSymbols()); + const cube = { + name: 'name', + sql: () => '', + fileName: 'fileName', + dimensions: { + dim_with_grain: { + multiStage: true, + type: 'string', + sql: () => '', + grain: { include: () => [] } + } + } + }; + + const validationResult = cubeValidator.validate(cube, { + error: (message: any, _e: any) => { + console.log(message); + } + } as any); + + expect(validationResult.error).toBeTruthy(); + }); }); it('OriginalSqlSchema', async () => { From e55f6997f135ee1a94550d0ecfce2207a976f3bd Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Wed, 27 May 2026 17:50:42 +0200 Subject: [PATCH 02/11] feat(tesseract): bridge multi-stage grain directive into Rust MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the multi-stage filter bridge for the new `grain:` directive: - New static-only bridge `MultiStageGrainReferences` carrying `mode/exclude/keep_only/include` (all reference name lists). - `MeasureDefinition::grain()` accessor — only on measures, dimensions stay out of scope until the directive expands. - Tesseract mocks: `MockMultiStageGrainReferences`, `grain:` field on the YAML measure parser with cube-name qualification, wired into `MockMeasureDefinition`, plus YAML round-trip unit tests covering full / partial / cube-qualified shapes. Backend-native bridge harness: - Register `multiStageGrain` in the bridge_registry and add an empty `invoke_multi_stage_grain` (static-only, like the filter sibling). - Extend `invoke_measure_definition` to record the new `grain` getter so the dispatcher/meta drift guard stays green. - JS fixture + coverage rows for `multiStageGrain`, and `grain` added to the `measureDefinition` expected meta. The fixture populates only one of `excludeReferences`/`keepOnlyReferences` because the `.nand` enforcement currently lives on the JS Joi validator; the bridge itself is structurally permissive. --- .../src/bridge_test_exports.rs | 12 +++ .../test/bridge/bridge-fixtures.ts | 13 +++ .../bridge/object-bridges-coverage.test.ts | 5 ++ .../src/cube_bridge/measure_definition.rs | 4 + .../cubesqlplanner/src/cube_bridge/mod.rs | 1 + .../src/cube_bridge/multi_stage_grain.rs | 21 +++++ .../cube_bridge/mock_measure_definition.rs | 90 ++++++++++++++++++- .../cube_bridge/mock_multi_stage_grain.rs | 36 ++++++++ .../src/test_fixtures/cube_bridge/mod.rs | 2 + .../test_fixtures/cube_bridge/yaml/measure.rs | 31 ++++++- 10 files changed, 213 insertions(+), 2 deletions(-) create mode 100644 rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs create mode 100644 rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs diff --git a/packages/cubejs-backend-native/src/bridge_test_exports.rs b/packages/cubejs-backend-native/src/bridge_test_exports.rs index eb1cb4b15aa4b..f569174040f89 100644 --- a/packages/cubejs-backend-native/src/bridge_test_exports.rs +++ b/packages/cubejs-backend-native/src/bridge_test_exports.rs @@ -69,6 +69,9 @@ use cubesqlplanner::cube_bridge::{ multi_stage_filter::{ multi_stage_filter_references_bridge_fields_meta, NativeMultiStageFilterReferences, }, + multi_stage_grain::{ + multi_stage_grain_references_bridge_fields_meta, NativeMultiStageGrainReferences, + }, pre_aggregation_description::{ pre_aggregation_description_bridge_fields_meta, NativePreAggregationDescription, PreAggregationDescription, @@ -450,6 +453,7 @@ bridge_registry! { "memberExpressionDefinition" => NativeMemberExpressionDefinition, member_expression_definition_bridge_fields_meta, invoke_member_expression_definition; "memberOrderBy" => NativeMemberOrderBy, member_order_by_bridge_fields_meta, invoke_member_order_by; "multiStageFilter" => NativeMultiStageFilterReferences, multi_stage_filter_references_bridge_fields_meta, invoke_multi_stage_filter; + "multiStageGrain" => NativeMultiStageGrainReferences, multi_stage_grain_references_bridge_fields_meta, invoke_multi_stage_grain; "preAggregationDescription" => NativePreAggregationDescription, pre_aggregation_description_bridge_fields_meta, invoke_pre_aggregation_description; "preAggregationObj" => NativePreAggregationObj, pre_aggregation_obj_bridge_fields_meta, invoke_pre_aggregation_obj; "preAggregationTimeDimension" => NativePreAggregationTimeDimension, pre_aggregation_time_dimension_bridge_fields_meta, invoke_pre_aggregation_time_dimension; @@ -748,6 +752,7 @@ fn invoke_measure_definition(b: &NativeMeasureDefinition) -> r.record("case", b.case()); r.record("filters", b.filters()); r.record("filter", b.filter()); + r.record("grain", b.grain()); r.record("drill_filters", b.drill_filters()); r.record("order_by", b.order_by()); r.record("mask_sql", b.mask_sql()); @@ -764,6 +769,13 @@ fn invoke_multi_stage_filter( InvokeResult::new() } +fn invoke_multi_stage_grain( + _b: &NativeMultiStageGrainReferences, +) -> InvokeResult { + // Static-only bridge — same shape as `invoke_multi_stage_filter`. + InvokeResult::new() +} + fn invoke_expression_struct(b: &NativeExpressionStruct) -> InvokeResult { let mut r = InvokeResult::new(); r.record("add_filters", b.add_filters()); diff --git a/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts b/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts index 609170b026e90..9b472f52172af 100644 --- a/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts +++ b/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts @@ -83,6 +83,18 @@ export const multiStageFilterFixture = (): unknown => ({ include: [{ member: 'orders.amount', operator: 'gt', values: ['0'] }], }); +// MultiStageGrainReferences mirrors the filter bridge — static-only. The +// bridge is structurally permissive (both `excludeReferences` and +// `keepOnlyReferences` could deserialize at once); the `.nand` lives on the +// JS Joi validator. The fixture populates only one of them to match the +// schema contract. `include` is a plain reference list, not the structured +// filter items the filter bridge uses. +export const multiStageGrainFixture = (): unknown => ({ + mode: 'fixed', + excludeReferences: ['orders.region'], + includeReferences: ['orders.category'], +}); + export const memberDefinitionFixture = (): unknown => ({ type: 'dimension', // sql is optional @@ -284,6 +296,7 @@ export const FIXTURES: Record = { memberExpressionDefinition: memberExpressionDefinitionFixture, memberOrderBy: memberOrderByFixture, multiStageFilter: multiStageFilterFixture, + multiStageGrain: multiStageGrainFixture, preAggregationDescription: preAggregationDescriptionFixture, preAggregationObj: preAggregationObjFixture, preAggregationTimeDimension: preAggregationTimeDimensionFixture, diff --git a/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts b/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts index dbe371322c323..585cb88237e86 100644 --- a/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts +++ b/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts @@ -181,6 +181,7 @@ const BRIDGES: BridgeSpec[] = [ 'drill_filters', 'filter', 'filters', + 'grain', 'group_by_references', 'mask_sql', 'measure_type', @@ -203,6 +204,10 @@ const BRIDGES: BridgeSpec[] = [ name: 'multiStageFilter', expected: ['exclude', 'include', 'keep_only', 'mode'], }, + { + name: 'multiStageGrain', + expected: ['exclude', 'include', 'keep_only', 'mode'], + }, { name: 'preAggregationDescription', expected: [ diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/measure_definition.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/measure_definition.rs index 5e96209f192eb..b9b54f3b5a5ef 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/measure_definition.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/measure_definition.rs @@ -2,6 +2,7 @@ use super::case_variant::CaseVariant; use super::member_order_by::{MemberOrderBy, NativeMemberOrderBy}; use super::member_sql::{MemberSql, NativeMemberSql}; use super::multi_stage_filter::{MultiStageFilterReferences, NativeMultiStageFilterReferences}; +use super::multi_stage_grain::{MultiStageGrainReferences, NativeMultiStageGrainReferences}; use super::struct_with_sql_member::{NativeStructWithSqlMember, StructWithSqlMember}; use cubenativeutils::wrappers::serializer::{ NativeDeserialize, NativeDeserializer, NativeSerialize, @@ -68,6 +69,9 @@ pub trait MeasureDefinition { #[nbridge(field, optional)] fn filter(&self) -> Result>, CubeError>; + #[nbridge(field, optional)] + fn grain(&self) -> Result>, CubeError>; + #[nbridge(field, optional, vec)] fn drill_filters(&self) -> Result>>, CubeError>; diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/mod.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/mod.rs index a6d22a6c9f8d3..19759f1796dcd 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/mod.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/mod.rs @@ -34,6 +34,7 @@ pub mod member_expression; pub mod member_order_by; pub mod member_sql; pub mod multi_stage_filter; +pub mod multi_stage_grain; pub mod options_member; pub mod pre_aggregation_description; pub mod pre_aggregation_obj; diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs new file mode 100644 index 0000000000000..068900544a00d --- /dev/null +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs @@ -0,0 +1,21 @@ +use cubenativeutils::wrappers::serializer::{NativeDeserialize, NativeSerialize}; +use cubenativeutils::wrappers::NativeContextHolder; +use cubenativeutils::wrappers::NativeObjectHandle; +use cubenativeutils::CubeError; +use serde::{Deserialize, Serialize}; +use std::any::Any; +use std::rc::Rc; + +#[derive(Serialize, Deserialize, Debug, Clone, nativebridge::NativeBridgeStatic)] +pub struct MultiStageGrainReferencesStatic { + pub mode: Option, + #[serde(rename = "excludeReferences")] + pub exclude: Option>, + #[serde(rename = "keepOnlyReferences")] + pub keep_only: Option>, + #[serde(rename = "includeReferences")] + pub include: Option>, +} + +#[nativebridge::native_bridge(MultiStageGrainReferencesStatic, with_static_meta)] +pub trait MultiStageGrainReferences {} diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs index bf8b11c92efbb..2c3dc6e1a0b77 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs @@ -5,11 +5,13 @@ use crate::cube_bridge::measure_definition::{ use crate::cube_bridge::member_order_by::MemberOrderBy; use crate::cube_bridge::member_sql::MemberSql; use crate::cube_bridge::multi_stage_filter::MultiStageFilterReferences; +use crate::cube_bridge::multi_stage_grain::MultiStageGrainReferences; use crate::cube_bridge::struct_with_sql_member::StructWithSqlMember; use crate::impl_static_data; use crate::test_fixtures::cube_bridge::yaml::measure::YamlMeasureDefinition; use crate::test_fixtures::cube_bridge::{ - MockMemberOrderBy, MockMemberSql, MockMultiStageFilterReferences, MockStructWithSqlMember, + MockMemberOrderBy, MockMemberSql, MockMultiStageFilterReferences, + MockMultiStageGrainReferences, MockStructWithSqlMember, }; use cubenativeutils::CubeError; use std::any::Any; @@ -45,6 +47,8 @@ pub struct MockMeasureDefinition { #[builder(default)] filter: Option>, #[builder(default)] + grain: Option>, + #[builder(default)] order_by: Option>>, #[builder(default, setter(strip_option(fallback = resolved_mask_sql_opt)))] resolved_mask_sql: Option, @@ -141,6 +145,17 @@ impl MeasureDefinition for MockMeasureDefinition { .map(|f| f.clone() as Rc)) } + fn has_grain(&self) -> Result { + Ok(self.grain.is_some()) + } + + fn grain(&self) -> Result>, CubeError> { + Ok(self + .grain + .as_ref() + .map(|g| g.clone() as Rc)) + } + fn has_order_by(&self) -> Result { Ok(self.order_by.is_some()) } @@ -343,6 +358,79 @@ mod tests { ); } + #[test] + fn test_from_yaml_with_grain() { + let yaml = indoc! {" + type: sum + sql: \"{CUBE.amount}\" + multi_stage: true + grain: + mode: fixed + include: + - region + - category + "}; + + let measure = MockMeasureDefinition::from_yaml(yaml).unwrap(); + + assert!(measure.has_grain().unwrap()); + let grain = measure.grain().unwrap().expect("grain present"); + let static_data = grain.static_data(); + assert_eq!(static_data.mode.as_deref(), Some("fixed")); + assert_eq!( + static_data.include, + Some(vec!["region".to_string(), "category".to_string()]) + ); + assert_eq!(static_data.exclude, None); + assert_eq!(static_data.keep_only, None); + } + + #[test] + fn test_from_yaml_with_grain_partial() { + let yaml = indoc! {" + type: sum + sql: \"{CUBE.amount}\" + multi_stage: true + grain: + exclude: + - region + "}; + + let measure = MockMeasureDefinition::from_yaml(yaml).unwrap(); + let grain = measure.grain().unwrap().expect("grain present"); + let static_data = grain.static_data(); + + assert_eq!(static_data.mode, None); + assert_eq!(static_data.exclude, Some(vec!["region".to_string()])); + assert_eq!(static_data.keep_only, None); + assert_eq!(static_data.include, None); + } + + #[test] + fn test_from_yaml_with_grain_qualifies_references() { + let yaml = indoc! {" + type: sum + sql: \"{CUBE.amount}\" + multi_stage: true + grain: + include: + - region + - other_cube.dim + "}; + + let yaml_def: YamlMeasureDefinition = serde_yaml::from_str(yaml).unwrap(); + let measure = yaml_def.build_with_cube_name(Some("orders")); + let grain = measure.grain().unwrap().expect("grain present"); + + assert_eq!( + grain.static_data().include, + Some(vec![ + "orders.region".to_string(), + "other_cube.dim".to_string() + ]) + ); + } + #[test] fn test_from_yaml_with_case() { let yaml = indoc! {" diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs new file mode 100644 index 0000000000000..9b12c62d89263 --- /dev/null +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs @@ -0,0 +1,36 @@ +use crate::cube_bridge::multi_stage_grain::{ + MultiStageGrainReferences, MultiStageGrainReferencesStatic, +}; +use crate::impl_static_data; +use std::any::Any; +use std::rc::Rc; +use typed_builder::TypedBuilder; + +#[derive(TypedBuilder)] +pub struct MockMultiStageGrainReferences { + #[builder(default)] + mode: Option, + #[builder(default)] + exclude: Option>, + #[builder(default)] + keep_only: Option>, + #[builder(default)] + include: Option>, +} + +impl_static_data!( + MockMultiStageGrainReferences, + MultiStageGrainReferencesStatic, + mode, + exclude, + keep_only, + include +); + +impl MultiStageGrainReferences for MockMultiStageGrainReferences { + crate::impl_static_data_method!(MultiStageGrainReferencesStatic); + + fn as_any(self: Rc) -> Rc { + self + } +} diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mod.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mod.rs index d3d6b74cb2eae..e2fca83a923d6 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mod.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mod.rs @@ -27,6 +27,7 @@ mod mock_member_expression_definition; mod mock_member_order_by; mod mock_member_sql; mod mock_multi_stage_filter; +mod mock_multi_stage_grain; mod mock_pre_aggregation_description; mod mock_pre_aggregation_obj; mod mock_pre_aggregation_time_dimension; @@ -64,6 +65,7 @@ pub use mock_member_expression_definition::MockMemberExpressionDefinition; pub use mock_member_order_by::MockMemberOrderBy; pub use mock_member_sql::MockMemberSql; pub use mock_multi_stage_filter::MockMultiStageFilterReferences; +pub use mock_multi_stage_grain::MockMultiStageGrainReferences; pub use mock_pre_aggregation_description::MockPreAggregationDescription; pub use mock_pre_aggregation_time_dimension::MockPreAggregationTimeDimension; pub use mock_schema::{MockSchema, MockSchemaBuilder}; diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs index d7775c848d8fe..22d27b1ccc668 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs @@ -5,7 +5,7 @@ use crate::test_fixtures::cube_bridge::yaml::case::YamlCaseVariant; use crate::test_fixtures::cube_bridge::yaml::mask::YamlMask; use crate::test_fixtures::cube_bridge::{ MockMeasureDefinition, MockMemberOrderBy, MockMultiStageFilterReferences, - MockStructWithSqlMember, + MockMultiStageGrainReferences, MockStructWithSqlMember, }; use serde::Deserialize; use std::rc::Rc; @@ -29,6 +29,8 @@ pub struct YamlMeasureDefinition { #[serde(default)] filter: Option, #[serde(default)] + grain: Option, + #[serde(default)] sql: Option, #[serde(default)] case: Option, @@ -122,6 +124,31 @@ impl YamlMultiStageFilter { } } +#[derive(Debug, Deserialize)] +pub struct YamlMultiStageGrain { + #[serde(default)] + mode: Option, + #[serde(default)] + exclude: Option>, + #[serde(default)] + keep_only: Option>, + #[serde(default)] + include: Option>, +} + +impl YamlMultiStageGrain { + pub(super) fn build(self, cube_name: Option<&str>) -> Rc { + Rc::new( + MockMultiStageGrainReferences::builder() + .mode(self.mode) + .exclude(qualify_references(self.exclude, cube_name)) + .keep_only(qualify_references(self.keep_only, cube_name)) + .include(qualify_references(self.include, cube_name)) + .build(), + ) + } +} + #[derive(Debug, Deserialize)] struct YamlFilter { sql: String, @@ -197,6 +224,7 @@ impl YamlMeasureDefinition { }; let filter = self.filter.map(|f| f.build(cube_name)); + let grain = self.grain.map(|g| g.build(cube_name)); Rc::new( MockMeasureDefinition::builder() @@ -215,6 +243,7 @@ impl YamlMeasureDefinition { .filters(filters) .drill_filters(drill_filters) .filter(filter) + .grain(grain) .order_by(order_by) .resolved_mask_sql_opt(self.mask.map(|m| m.to_sql_string())) .build(), From e9a453d5a2d3f3a068e8ff28fbc75553431dcfec Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Wed, 27 May 2026 18:10:12 +0200 Subject: [PATCH 03/11] refactor(tesseract): collapse multi-stage partition fields into MultiStageGrain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the three independent `add_group_by` / `reduce_by` / `group_by` fields on `MultiStageProperties` with a single `MultiStageGrain` struct carrying `mode` + `exclude` / `keep_only` / `include`, modeled as a set operation on the inherited grain context. For measures the grain is sourced from the new `grain:` directive when the schema provides it; otherwise it falls back to the older `add_group_by` / `reduce_by` / `group_by` fields with the mapping: - add_group_by -> include - reduce_by -> exclude - group_by -> keep_only Fallback always uses `mode: Relative` — the planner does not branch on mode for grain yet, so observable behavior is unchanged. The `.nand` between `exclude` and `keep_only` on the directive is enforced at the symbol-build layer, matching the wording of the filter equivalent. For dimensions the grain still carries only `include` (from `add_group_by`); the `grain:` directive is currently scoped to measures. `MeasureSymbol::reduce_by/add_group_by/group_by` and `DimensionSymbol::add_group_by` are kept as thin proxies on the new struct so existing call sites continue to compile and behave the same. Tests: existing direct-field accesses moved to the new shape; a YAML fixture and two unit tests cover the legacy-mapping and directive-overrides-legacy paths. --- .../src/planner/symbols/common/multi_stage.rs | 114 +++++++++++++++--- .../src/planner/symbols/dimension_symbol.rs | 2 +- .../src/planner/symbols/measure_symbol.rs | 10 +- .../yaml_files/common/multi_stage_filter.yaml | 12 ++ .../src/tests/measure_symbol.rs | 77 ++++++++++-- 5 files changed, 181 insertions(+), 34 deletions(-) diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs index e632244c5a666..d0df36f893a61 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs @@ -1,7 +1,8 @@ use super::super::measure_symbol::MeasureTimeShifts; use super::super::MemberSymbol; use crate::cube_bridge::dimension_definition::DimensionDefinition; -use crate::cube_bridge::measure_definition::MeasureDefinition; +use crate::cube_bridge::measure_definition::{MeasureDefinition, MeasureDefinitionStatic}; +use crate::cube_bridge::multi_stage_grain::MultiStageGrainReferences; use crate::planner::filter::compiler::FilterCompiler; use crate::planner::filter::FilterItem; use crate::planner::Compiler; @@ -51,12 +52,46 @@ pub struct MultiStageFilter { pub include_measure: Vec, } +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum MultiStageGrainMode { + Relative, + Fixed, +} + +impl MultiStageGrainMode { + fn from_str(s: &str) -> Result { + match s { + "relative" => Ok(Self::Relative), + "fixed" => Ok(Self::Fixed), + other => Err(CubeError::user(format!( + "Unknown multi-stage grain mode '{}', expected 'relative' or 'fixed'", + other + ))), + } + } +} + +/// Set operation on the inherited grain context of a multi-stage member. +/// +/// `mode` chooses the base context (parent grain for `Relative`, empty for +/// `Fixed`); the three lists then mutate that base — `exclude` removes, +/// `keep_only` intersects with the parent, `include` adds. +/// +/// Sourced from the `grain:` directive when present; otherwise mapped from +/// `add_group_by` / `reduce_by` / `group_by` (→ `include` / `exclude` / +/// `keep_only`) with `mode: Relative`. +#[derive(Clone)] +pub struct MultiStageGrain { + pub mode: MultiStageGrainMode, + pub exclude: Option>>, + pub keep_only: Option>>, + pub include: Option>>, +} + #[derive(Clone)] pub struct MultiStageProperties { - pub add_group_by: Option>>, + pub grain: MultiStageGrain, pub filter: Option, - pub reduce_by: Option>>, - pub group_by: Option>>, pub time_shift: Option, } @@ -71,17 +106,16 @@ impl MultiStageProperties { return Ok(None); } - let static_data = definition.static_data(); - let reduce_by = resolve_reference_paths(&static_data.reduce_by_references, compiler)?; - let add_group_by = resolve_reference_paths(&static_data.add_group_by_references, compiler)?; - let group_by = resolve_reference_paths(&static_data.group_by_references, compiler)?; + let grain = match definition.grain()? { + Some(g) => build_grain_from_directive(g, compiler)?, + None => build_grain_from_legacy(&definition.static_data(), compiler)?, + }; + let filter = build_filter(cube_name, definition.filter()?, compiler)?; Ok(Some(Self { - add_group_by, + grain, filter, - reduce_by, - group_by, time_shift, })) } @@ -95,15 +129,20 @@ impl MultiStageProperties { return Ok(None); } - let add_group_by = + // Dimensions only expose `add_group_by` today — the `grain:` directive + // is currently scoped to measures. + let include = resolve_reference_paths(&definition.static_data().add_group_by_references, compiler)?; let filter = build_filter(cube_name, definition.filter()?, compiler)?; Ok(Some(Self { - add_group_by, + grain: MultiStageGrain { + mode: MultiStageGrainMode::Relative, + exclude: None, + keep_only: None, + include, + }, filter, - reduce_by: None, - group_by: None, time_shift: None, })) } @@ -134,11 +173,16 @@ impl MultiStageProperties { None => None, }; + let grain = MultiStageGrain { + mode: self.grain.mode.clone(), + exclude: map_refs(&self.grain.exclude)?, + keep_only: map_refs(&self.grain.keep_only)?, + include: map_refs(&self.grain.include)?, + }; + Ok(Self { - add_group_by: map_refs(&self.add_group_by)?, + grain, filter, - reduce_by: map_refs(&self.reduce_by)?, - group_by: map_refs(&self.group_by)?, time_shift: self.time_shift.clone(), }) } @@ -160,6 +204,40 @@ fn resolve_reference_paths( } } +fn build_grain_from_directive( + grain: Rc, + compiler: &mut Compiler, +) -> Result { + let static_data = grain.static_data(); + if static_data.exclude.is_some() && static_data.keep_only.is_some() { + return Err(CubeError::user( + "Multi-stage grain cannot specify both `exclude` and `keep_only` — they are mutually exclusive ways of restricting the inherited context.".to_string(), + )); + } + let mode = match &static_data.mode { + Some(s) => MultiStageGrainMode::from_str(s)?, + None => MultiStageGrainMode::Relative, + }; + Ok(MultiStageGrain { + mode, + exclude: resolve_reference_paths(&static_data.exclude, compiler)?, + keep_only: resolve_reference_paths(&static_data.keep_only, compiler)?, + include: resolve_reference_paths(&static_data.include, compiler)?, + }) +} + +fn build_grain_from_legacy( + static_data: &MeasureDefinitionStatic, + compiler: &mut Compiler, +) -> Result { + Ok(MultiStageGrain { + mode: MultiStageGrainMode::Relative, + exclude: resolve_reference_paths(&static_data.reduce_by_references, compiler)?, + keep_only: resolve_reference_paths(&static_data.group_by_references, compiler)?, + include: resolve_reference_paths(&static_data.add_group_by_references, compiler)?, + }) +} + fn build_filter( _cube_name: &String, filter: Option>, diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs index 70530edee2edd..7eb18a16be011 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs @@ -179,7 +179,7 @@ impl DimensionSymbol { pub fn add_group_by(&self) -> Option<&Vec>> { self.multi_stage .as_ref() - .and_then(|m| m.add_group_by.as_ref()) + .and_then(|m| m.grain.include.as_ref()) } pub fn dimension_type(&self) -> &str { diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs index 79b19c94149d2..3c6da468b5359 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs @@ -461,17 +461,21 @@ impl MeasureSymbol { } pub fn reduce_by(&self) -> Option<&Vec>> { - self.multi_stage.as_ref().and_then(|m| m.reduce_by.as_ref()) + self.multi_stage + .as_ref() + .and_then(|m| m.grain.exclude.as_ref()) } pub fn add_group_by(&self) -> Option<&Vec>> { self.multi_stage .as_ref() - .and_then(|m| m.add_group_by.as_ref()) + .and_then(|m| m.grain.include.as_ref()) } pub fn group_by(&self) -> Option<&Vec>> { - self.multi_stage.as_ref().and_then(|m| m.group_by.as_ref()) + self.multi_stage + .as_ref() + .and_then(|m| m.grain.keep_only.as_ref()) } pub fn multi_stage(&self) -> Option<&MultiStageProperties> { diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml index 60f61e6fa2bdf..bd9b8236dfd33 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml @@ -66,3 +66,15 @@ cubes: - member: orders.status operator: equals values: [completed] + - name: revenue_with_grain + type: number + sql: "{CUBE.revenue}" + multi_stage: true + add_group_by: + - status + grain: + mode: fixed + exclude: + - status + include: + - city diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs index 594d78d17327a..d1ec0301829cd 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs @@ -474,7 +474,7 @@ fn new_patched_appends_to_existing_filters() { mod multi_stage { use super::*; - use crate::planner::MultiStageFilterMode; + use crate::planner::{MultiStageFilterMode, MultiStageGrainMode}; fn ctx() -> TestContext { let schema = MockSchema::from_yaml_file("common/multi_stage_filter.yaml"); @@ -490,13 +490,16 @@ mod multi_stage { assert!(measure.is_multi_stage()); let ms = measure.multi_stage().expect("multi_stage present"); - let reduce_by = ms.reduce_by.as_ref().expect("reduce_by"); - assert_eq!(reduce_by.len(), 1); - assert_eq!(reduce_by[0].full_name(), "orders.status"); + let exclude = ms.grain.exclude.as_ref().expect("exclude"); + assert_eq!(exclude.len(), 1); + assert_eq!(exclude[0].full_name(), "orders.status"); - let add_group_by = ms.add_group_by.as_ref().expect("add_group_by"); - assert_eq!(add_group_by.len(), 1); - assert_eq!(add_group_by[0].full_name(), "orders.city"); + let include = ms.grain.include.as_ref().expect("include"); + assert_eq!(include.len(), 1); + assert_eq!(include[0].full_name(), "orders.city"); + + assert_eq!(ms.grain.mode, MultiStageGrainMode::Relative); + assert!(ms.grain.keep_only.is_none()); } #[test] @@ -531,9 +534,9 @@ mod multi_stage { assert!(dim.is_multi_stage()); let ms = dim.multi_stage().expect("multi_stage present"); - let add_group_by = ms.add_group_by.as_ref().expect("add_group_by"); - assert_eq!(add_group_by.len(), 1); - assert_eq!(add_group_by[0].full_name(), "orders.status"); + let include = ms.grain.include.as_ref().expect("include"); + assert_eq!(include.len(), 1); + assert_eq!(include[0].full_name(), "orders.status"); let filter = ms.filter.as_ref().expect("filter present"); assert_eq!(filter.mode, MultiStageFilterMode::Relative); @@ -544,8 +547,8 @@ mod multi_stage { assert_eq!(filter.include_dimension.len(), 1); assert!(filter.include_measure.is_empty()); assert!(filter.include_time_dimension.is_empty()); - assert!(ms.reduce_by.is_none()); - assert!(ms.group_by.is_none()); + assert!(ms.grain.exclude.is_none()); + assert!(ms.grain.keep_only.is_none()); } #[test] @@ -571,6 +574,56 @@ mod multi_stage { assert_eq!(filter.mode, MultiStageFilterMode::Relative); } + #[test] + fn legacy_fields_populate_grain_with_relative_mode() { + let ctx = ctx(); + let m = ctx.create_measure("orders.revenue_filtered").unwrap(); + let measure = m.as_measure().unwrap(); + let ms = measure.multi_stage().expect("multi_stage present"); + + assert_eq!(ms.grain.mode, MultiStageGrainMode::Relative); + + let include = ms.grain.include.as_ref().expect("include"); + assert_eq!(include[0].full_name(), "orders.city"); + let exclude = ms.grain.exclude.as_ref().expect("exclude"); + assert_eq!(exclude[0].full_name(), "orders.status"); + assert!(ms.grain.keep_only.is_none()); + + // Proxies on `MeasureSymbol` must mirror the resolved grain. + let proxy_add = measure.add_group_by().expect("add_group_by proxy"); + let proxy_reduce = measure.reduce_by().expect("reduce_by proxy"); + assert_eq!(proxy_add[0].full_name(), "orders.city"); + assert_eq!(proxy_reduce[0].full_name(), "orders.status"); + assert!(measure.group_by().is_none()); + } + + #[test] + fn grain_directive_overrides_legacy_fields() { + let ctx = ctx(); + let m = ctx.create_measure("orders.revenue_with_grain").unwrap(); + let measure = m.as_measure().unwrap(); + let ms = measure.multi_stage().expect("multi_stage present"); + + assert_eq!(ms.grain.mode, MultiStageGrainMode::Fixed); + + let exclude = ms.grain.exclude.as_ref().expect("exclude"); + assert_eq!(exclude.len(), 1); + assert_eq!(exclude[0].full_name(), "orders.status"); + + let include = ms.grain.include.as_ref().expect("include"); + assert_eq!(include.len(), 1); + // Comes from `grain.include: [city]`, NOT from `add_group_by: [status]`. + assert_eq!(include[0].full_name(), "orders.city"); + + assert!(ms.grain.keep_only.is_none()); + + // Proxy methods reflect the grain — old add_group_by is shadowed. + let proxy_add = measure.add_group_by().expect("add_group_by proxy"); + assert_eq!(proxy_add[0].full_name(), "orders.city"); + let proxy_reduce = measure.reduce_by().expect("reduce_by proxy"); + assert_eq!(proxy_reduce[0].full_name(), "orders.status"); + } + #[test] fn measure_filter_keep_only_and_exclude_mutually_exclusive() { let schema = MockSchema::from_yaml_file("common/multi_stage_filter_invalid.yaml"); From 79dcee25c6c02b09dd53c1fe1a86bcbbfee53aa2 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Wed, 27 May 2026 18:37:23 +0200 Subject: [PATCH 04/11] refactor(tesseract): thread MultiStageGrain through multi-stage planner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the three separate `reduce_by` / `add_group_by` / `group_by` fields on `MultiStageInodeMember` with the shared `MultiStageGrain`, and route every planner call site through `.grain()` rather than through per-field accessors. The inode now exposes only `grain()` and `time_shift()`; partition-shape helpers (`partition_filter`, `member_partition_by_logical`) take `&MultiStageGrain` directly. Add `Default` impls on `MultiStageGrain` and `MultiStageGrainMode` (Relative). Reuse it from `MultiStageProperties::from_dimension_definition` and at every "no directive, no legacy fields" fallback in the planner. The `use_window_path` check still gates only on `include` — `exclude` and `keep_only` change partition shape but are honoured by the window path through PARTITION BY at render time. Comment added so future work that extends window-path coverage revisits this assumption. Schema fixture and integration test for `grain.keep_only` rephrased without "legacy/new" wording so they stay accurate when the older fields go away. --- .../planner/planners/multi_stage/member.rs | 45 ++-------- .../multi_stage/member_query_planner.rs | 20 ++--- .../multi_stage/multi_stage_query_planner.rs | 86 +++++++++---------- .../src/planner/symbols/common/multi_stage.rs | 9 +- .../common/integration_multi_stage.yaml | 11 +++ .../tests/integration/multi_stage/group_by.rs | 22 +++++ 6 files changed, 92 insertions(+), 101 deletions(-) diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member.rs index 0f0d652e7952f..9a429eecdc96b 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member.rs @@ -1,4 +1,4 @@ -use crate::planner::{MeasureTimeShifts, MemberSymbol}; +use crate::planner::{MeasureTimeShifts, MemberSymbol, MultiStageGrain}; use std::rc::Rc; /// Description of the time-series CTE driving a rolling-window @@ -113,15 +113,12 @@ pub enum MultiStageInodeMemberType { /// Non-leaf node in a multi-stage tree. Bundles the semantic /// `inode_type` (Rank / Aggregate / Calculate / Dimension / -/// RollingWindow) with the partition-shaping flags driven by the -/// measure's data-model directives: `reduce_by`, `add_group_by`, -/// `group_by`, `time_shift`. +/// RollingWindow) with the partition-shaping `grain` carried over from +/// the measure's data-model directives and an optional `time_shift`. #[derive(Clone)] pub struct MultiStageInodeMember { inode_type: MultiStageInodeMemberType, - reduce_by: Vec>, - add_group_by: Vec>, - group_by: Option>>, + grain: MultiStageGrain, time_shift: Option, /// Optimisation flag: this Aggregate inode is a safe candidate for /// the `window`-based render — single measure dep, additive identity @@ -134,16 +131,12 @@ pub struct MultiStageInodeMember { impl MultiStageInodeMember { pub fn new( inode_type: MultiStageInodeMemberType, - reduce_by: Vec>, - add_group_by: Vec>, - group_by: Option>>, + grain: MultiStageGrain, time_shift: Option, ) -> Self { Self { inode_type, - reduce_by, - add_group_by, - group_by, + grain, time_shift, use_window_path: false, } @@ -162,30 +155,8 @@ impl MultiStageInodeMember { &self.inode_type } - pub fn reduce_by(&self) -> Vec { - self.reduce_by.iter().map(|s| s.full_name()).collect() - } - - pub fn add_group_by(&self) -> Vec { - self.add_group_by.iter().map(|s| s.full_name()).collect() - } - - pub fn reduce_by_symbols(&self) -> &Vec> { - &self.reduce_by - } - - pub fn add_group_by_symbols(&self) -> &Vec> { - &self.add_group_by - } - - pub fn group_by(&self) -> Option> { - self.group_by - .as_ref() - .map(|g| g.iter().map(|s| s.full_name()).collect()) - } - - pub fn group_by_symbols(&self) -> &Option>> { - &self.group_by + pub fn grain(&self) -> &MultiStageGrain { + &self.grain } pub fn time_shift(&self) -> &Option { diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs index bbb86fa306745..f43d5170bbf8d 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs @@ -7,6 +7,7 @@ use crate::planner::planners::{multi_stage::RollingWindowType, QueryPlanner, Sim use crate::planner::query_tools::QueryTools; use crate::planner::GranularityHelper; use crate::planner::MemberSymbol; +use crate::planner::MultiStageGrain; use crate::planner::{OrderByItem, QueryProperties}; use cubenativeutils::CubeError; @@ -203,10 +204,7 @@ impl MultiStageMemberQueryPlanner { &self, multi_stage_member: &MultiStageInodeMember, ) -> Result, CubeError> { - let partition_by = self.member_partition_by_logical( - &multi_stage_member.reduce_by_symbols(), - &multi_stage_member.group_by_symbols(), - ); + let partition_by = self.member_partition_by_logical(multi_stage_member.grain()); // Rank always uses a window function. Aggregate inodes are // routed through `FullKeyAggregate` by default; only the narrow @@ -520,24 +518,20 @@ impl MultiStageMemberQueryPlanner { .collect_vec() } - fn member_partition_by_logical( - &self, - reduce_by: &Vec>, - group_by: &Option>>, - ) -> Vec> { + fn member_partition_by_logical(&self, grain: &MultiStageGrain) -> Vec> { let dimensions = self.all_dimensions(); - let dimensions = if !reduce_by.is_empty() { + let dimensions = if let Some(exclude) = &grain.exclude { dimensions .into_iter() - .filter(|d| !reduce_by.iter().any(|m| d.has_member_in_reference_chain(m))) + .filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m))) .collect_vec() } else { dimensions }; - let dimensions = if let Some(group_by) = group_by { + let dimensions = if let Some(keep_only) = &grain.keep_only { dimensions .into_iter() - .filter(|d| group_by.iter().any(|m| d.has_member_in_reference_chain(m))) + .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) .collect_vec() } else { dimensions diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index ff2aa64584056..97bed2e5422b0 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -22,6 +22,7 @@ use crate::planner::MeasureKind; use crate::planner::MemberSymbol; use crate::planner::MultiStageFilter; use crate::planner::MultiStageFilterMode; +use crate::planner::MultiStageGrain; use crate::planner::QueryProperties; use cubenativeutils::CubeError; use indexmap::IndexMap; @@ -141,10 +142,9 @@ impl MultiStageQueryPlanner { /// Classifies `base_member` into a `MultiStageInodeMember` — picks /// the inode kind (Rank / Aggregate / Calculate for a measure, - /// Dimension for a dimension) and pulls the partition-shaping - /// flags (`reduce_by`, `add_group_by`, `group_by`, `time_shift`) - /// out of the data-model definition. Returns the inode together - /// with the leaf's `is_ungrupped` flag. + /// Dimension for a dimension) and carries over the partition-shaping + /// `grain` and optional `time_shift` from the data-model definition. + /// Returns the inode together with the leaf's `is_ungrupped` flag. fn create_multi_stage_inode_member( &self, base_member: Rc, @@ -164,39 +164,35 @@ impl MultiStageQueryPlanner { _ => self.query_properties.ungrouped(), }; - let reduce_by = measure.reduce_by().cloned().unwrap_or_default(); - let add_group_by = measure.add_group_by().cloned().unwrap_or_default(); - let group_by = measure.group_by().cloned(); + let grain = measure + .multi_stage() + .map(|ms| ms.grain.clone()) + .unwrap_or_default(); + // Window-path eligibility intentionally checks only `include`: + // `exclude` and `keep_only` are realised through the window's + // PARTITION BY at render time, so they don't disqualify the + // path. `include` extends the leaf grain, which the JOIN-model + // is required for. Revisit if window-path expands to cases + // where exclude/keep_only affect render correctness. + let has_include = grain.include.as_ref().is_some_and(|v| !v.is_empty()); let use_window_path = matches!(member_type, MultiStageInodeMemberType::Aggregate) - && add_group_by.is_empty() + && !has_include && Self::is_window_path_eligible(&base_member); ( - MultiStageInodeMember::new( - member_type, - reduce_by, - add_group_by, - group_by, - time_shift, - ) - .with_use_window_path(use_window_path), + MultiStageInodeMember::new(member_type, grain, time_shift) + .with_use_window_path(use_window_path), is_ungrupped, ) } else { - let add_group_by = if let Ok(dimension) = base_member.as_dimension() { - dimension.add_group_by().cloned().unwrap_or_default() - } else { - vec![] - }; + let grain = base_member + .as_dimension() + .ok() + .and_then(|d| d.multi_stage().map(|ms| ms.grain.clone())) + .unwrap_or_default(); resolved_multi_stage_dimensions .insert(base_member.clone().resolve_reference_chain().full_name()); ( - MultiStageInodeMember::new( - MultiStageInodeMemberType::Dimension, - vec![], - add_group_by, - None, - None, - ), + MultiStageInodeMember::new(MultiStageInodeMemberType::Dimension, grain, None), false, ) }; @@ -285,24 +281,23 @@ impl MultiStageQueryPlanner { /// grain. /// /// FIXME: merge with `MultiStageMemberQueryPlanner::member_partition_by_logical` - /// — both apply the same reduce_by/group_by reshape on different inputs; - /// keeping two copies invites silent drift when only one is updated. + /// — both apply the same grain (exclude / keep_only) reshape on different + /// inputs; keeping two copies invites silent drift when only one is updated. fn partition_filter( dims: &Vec>, - reduce_by: &Vec>, - group_by: &Option>>, + grain: &MultiStageGrain, ) -> Vec> { - let dims: Vec> = if !reduce_by.is_empty() { + let dims: Vec> = if let Some(exclude) = &grain.exclude { dims.iter() - .filter(|d| !reduce_by.iter().any(|m| d.has_member_in_reference_chain(m))) + .filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m))) .cloned() .collect() } else { dims.clone() }; - if let Some(group_by) = group_by { + if let Some(keep_only) = &grain.keep_only { dims.into_iter() - .filter(|d| group_by.iter().any(|m| d.has_member_in_reference_chain(m))) + .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) .collect() } else { dims @@ -511,7 +506,11 @@ impl MultiStageQueryPlanner { let (multi_stage_member, is_ungrupped) = self .create_multi_stage_inode_member(member.clone(), resolved_multi_stage_dimensions)?; - let mut dimensions_to_add = multi_stage_member.add_group_by_symbols().clone(); + let mut dimensions_to_add = multi_stage_member + .grain() + .include + .clone() + .unwrap_or_default(); if let Some(case) = member.case() { if let Some(switch_dim) = case.case_switch_dimension() { @@ -553,12 +552,9 @@ impl MultiStageQueryPlanner { MultiStageInodeMemberType::Aggregate ) { - let reduce_by = multi_stage_member.reduce_by_symbols().clone(); - let group_by = multi_stage_member.group_by_symbols().clone(); - let dims = - Self::partition_filter(new_state.dimensions(), &reduce_by, &group_by); - let time_dims = - Self::partition_filter(new_state.time_dimensions(), &reduce_by, &group_by); + let grain = multi_stage_member.grain(); + let dims = Self::partition_filter(new_state.dimensions(), grain); + let time_dims = Self::partition_filter(new_state.time_dimensions(), grain); new_state.set_dimensions(dims); new_state.set_time_dimensions(time_dims); } @@ -788,9 +784,7 @@ impl MultiStageQueryPlanner { let inode_member = MultiStageInodeMember::new( MultiStageInodeMemberType::RollingWindow(rolling_window_descr), - vec![], - vec![], - None, + MultiStageGrain::default(), None, ); diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs index d0df36f893a61..120a7141a0a99 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs @@ -52,8 +52,9 @@ pub struct MultiStageFilter { pub include_measure: Vec, } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Default)] pub enum MultiStageGrainMode { + #[default] Relative, Fixed, } @@ -80,7 +81,7 @@ impl MultiStageGrainMode { /// Sourced from the `grain:` directive when present; otherwise mapped from /// `add_group_by` / `reduce_by` / `group_by` (→ `include` / `exclude` / /// `keep_only`) with `mode: Relative`. -#[derive(Clone)] +#[derive(Clone, Default)] pub struct MultiStageGrain { pub mode: MultiStageGrainMode, pub exclude: Option>>, @@ -137,10 +138,8 @@ impl MultiStageProperties { Ok(Some(Self { grain: MultiStageGrain { - mode: MultiStageGrainMode::Relative, - exclude: None, - keep_only: None, include, + ..Default::default() }, filter, time_shift: None, diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml index d3bdb54bea748..06f74c3c6848a 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml @@ -221,6 +221,17 @@ cubes: - orders.status - orders.created_at.month + # Same partition shape as `amount_group_by_status`, expressed via + # `grain.keep_only`. + - name: amount_grain_keep_only_status + type: sum + sql: "{CUBE.total_amount}" + multi_stage: true + grain: + mode: relative + keep_only: + - orders.status + - name: amount_prev_month type: number sql: "{CUBE.total_amount}" diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs index fdf8880297dd6..46617ded656f6 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs @@ -98,3 +98,25 @@ async fn test_group_by_equals_query_dims() { insta::assert_snapshot!(result); } } + +// Smoke test: `grain.keep_only: [status]` yields the same partition as +// `group_by: [status]`. Snapshot must match `test_group_by_override`. +#[tokio::test(flavor = "multi_thread")] +async fn test_grain_keep_only_matches_group_by_override() { + let ctx = create_context(); + + let query = indoc! {r#" + measures: + - orders.amount_grain_keep_only_status + dimensions: + - orders.category + order: + - id: orders.category + "#}; + + ctx.build_sql(query).unwrap(); + + if let Some(result) = ctx.try_execute_pg(query, SEED).await { + insta::assert_snapshot!(result); + } +} From 17eb98196e06af0ec220d63f6a3fc239d8f7c395 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Wed, 27 May 2026 18:46:10 +0200 Subject: [PATCH 05/11] refactor(tesseract): drop measure/dimension partition proxies in favor of grain Remove `MeasureSymbol::reduce_by/add_group_by/group_by` and `DimensionSymbol::add_group_by`. Their only role was to hide the `multi_stage().grain.{exclude,include,keep_only}` access behind the old field names; now both production call sites (`join_hints_collector`, `logical_plan::multistage::dimension::join_dimensions`) and the symbol-level tests read the grain directly. `MeasureSymbol` and `DimensionSymbol` are now grain-agnostic apart from exposing `multi_stage() -> Option<&MultiStageProperties>`; everything partition-shape-related lives on `MultiStageGrain`. --- .../src/logical_plan/multistage/dimension.rs | 5 ++++- .../planner/collectors/join_hints_collector.rs | 4 ++-- .../src/planner/symbols/dimension_symbol.rs | 6 ------ .../src/planner/symbols/measure_symbol.rs | 18 ------------------ .../cubesqlplanner/src/tests/measure_symbol.rs | 18 ++---------------- 5 files changed, 8 insertions(+), 43 deletions(-) diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs index 6b6089eb47880..ba2a6217ad3c2 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs @@ -48,7 +48,10 @@ impl MultiStageDimensionCalculation { pub fn join_dimensions(&self) -> Result>, CubeError> { let mut result = if let Ok(dimension) = self.multi_stage_dimension.as_dimension() { - dimension.add_group_by().cloned().unwrap_or_default() + dimension + .multi_stage() + .and_then(|m| m.grain.include.clone()) + .unwrap_or_default() } else { vec![] }; diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/collectors/join_hints_collector.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/collectors/join_hints_collector.rs index e93dc9a872af5..121fcd4adaeef 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/collectors/join_hints_collector.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/collectors/join_hints_collector.rs @@ -28,8 +28,8 @@ impl TraversalVisitor for JoinHintsCollector { ) -> Result, CubeError> { if node.is_multi_stage() { if let Ok(dim) = node.as_dimension() { - if let Some(add_group_by) = dim.add_group_by() { - for item in add_group_by.iter() { + if let Some(include) = dim.multi_stage().and_then(|m| m.grain.include.as_ref()) { + for item in include.iter() { self.apply(item, &())?; } } diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs index 7eb18a16be011..9db8ddcad6f79 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/dimension_symbol.rs @@ -176,12 +176,6 @@ impl DimensionSymbol { &self.mask_sql } - pub fn add_group_by(&self) -> Option<&Vec>> { - self.multi_stage - .as_ref() - .and_then(|m| m.grain.include.as_ref()) - } - pub fn dimension_type(&self) -> &str { self.kind.dimension_type_str() } diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs index 3c6da468b5359..c10feac1c58d4 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/measure_symbol.rs @@ -460,24 +460,6 @@ impl MeasureSymbol { &self.measure_order_by } - pub fn reduce_by(&self) -> Option<&Vec>> { - self.multi_stage - .as_ref() - .and_then(|m| m.grain.exclude.as_ref()) - } - - pub fn add_group_by(&self) -> Option<&Vec>> { - self.multi_stage - .as_ref() - .and_then(|m| m.grain.include.as_ref()) - } - - pub fn group_by(&self) -> Option<&Vec>> { - self.multi_stage - .as_ref() - .and_then(|m| m.grain.keep_only.as_ref()) - } - pub fn multi_stage(&self) -> Option<&MultiStageProperties> { self.multi_stage.as_ref() } diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs index d1ec0301829cd..24b7e6bea791a 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs @@ -559,8 +559,6 @@ mod multi_stage { assert!(!measure.is_multi_stage()); assert!(measure.multi_stage().is_none()); - assert!(measure.reduce_by().is_none()); - assert!(measure.add_group_by().is_none()); } #[test] @@ -588,13 +586,6 @@ mod multi_stage { let exclude = ms.grain.exclude.as_ref().expect("exclude"); assert_eq!(exclude[0].full_name(), "orders.status"); assert!(ms.grain.keep_only.is_none()); - - // Proxies on `MeasureSymbol` must mirror the resolved grain. - let proxy_add = measure.add_group_by().expect("add_group_by proxy"); - let proxy_reduce = measure.reduce_by().expect("reduce_by proxy"); - assert_eq!(proxy_add[0].full_name(), "orders.city"); - assert_eq!(proxy_reduce[0].full_name(), "orders.status"); - assert!(measure.group_by().is_none()); } #[test] @@ -612,16 +603,11 @@ mod multi_stage { let include = ms.grain.include.as_ref().expect("include"); assert_eq!(include.len(), 1); - // Comes from `grain.include: [city]`, NOT from `add_group_by: [status]`. + // Comes from `grain.include: [city]`, not from the sibling + // `add_group_by: [status]` — the directive wins when both are set. assert_eq!(include[0].full_name(), "orders.city"); assert!(ms.grain.keep_only.is_none()); - - // Proxy methods reflect the grain — old add_group_by is shadowed. - let proxy_add = measure.add_group_by().expect("add_group_by proxy"); - assert_eq!(proxy_add[0].full_name(), "orders.city"); - let proxy_reduce = measure.reduce_by().expect("reduce_by proxy"); - assert_eq!(proxy_reduce[0].full_name(), "orders.status"); } #[test] From 0018d285b7ccdd12bc8a4a6647307c6ec78b18ea Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Wed, 27 May 2026 23:12:40 +0200 Subject: [PATCH 06/11] feat(tesseract): branch multi-stage partition_filter on grain mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Honour `MultiStageGrainMode::Fixed` in `partition_filter`: - `Relative` (existing): start from parent state's dims and apply `exclude` → `keep_only`. - `Fixed`: discard the parent state, start from the original query context (`root_state.dimensions/time_dimensions`), and intersect with `keep_only`. `exclude` is undefined here per the spec (validator forbids `exclude` + `keep_only` together). `include` continues to be appended via `add_dimensions(...)` outside the helper, so the full FIXED result is `(root_dims ∩ keep_only) ∪ include`, and FIXED with no keep_only and no include collapses to an empty grain (grand total). `member_partition_by_logical` is intentionally not updated: its input is `description.state()` which has already been shaped by `partition_filter`, so re-applying `exclude` / `keep_only` over the narrowed set is idempotent in every allowed combination. Fixtures and integration smoke tests cover three top-level scenarios: grand total (FIXED, no narrowing), FIXED + keep_only with the dim present in the query, and FIXED + keep_only with the dim absent. Snapshots are filled when the suite runs against Postgres. --- .../multi_stage/multi_stage_query_planner.rs | 72 ++++++++++++++----- .../common/integration_multi_stage.yaml | 25 +++++++ .../tests/integration/multi_stage/group_by.rs | 70 ++++++++++++++++++ 3 files changed, 149 insertions(+), 18 deletions(-) diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index 97bed2e5422b0..f3e71e7ef132b 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -23,6 +23,7 @@ use crate::planner::MemberSymbol; use crate::planner::MultiStageFilter; use crate::planner::MultiStageFilterMode; use crate::planner::MultiStageGrain; +use crate::planner::MultiStageGrainMode; use crate::planner::QueryProperties; use cubenativeutils::CubeError; use indexmap::IndexMap; @@ -280,27 +281,53 @@ impl MultiStageQueryPlanner { /// reduce_by / group_by actually shrinks the partition vs the leaf /// grain. /// + /// Applies the partition-shaping part of `grain` to a dimension list. + /// `dims` is the parent state's slice (used by `Relative`); `root_dims` + /// is the query-root slice (used by `Fixed`, which discards inherited + /// narrowing). `include` is not handled here — it is appended to the + /// resulting state via `add_dimensions` outside this helper. + /// /// FIXME: merge with `MultiStageMemberQueryPlanner::member_partition_by_logical` - /// — both apply the same grain (exclude / keep_only) reshape on different - /// inputs; keeping two copies invites silent drift when only one is updated. + /// — both apply the same grain reshape on different inputs; keeping two + /// copies invites silent drift when only one is updated. fn partition_filter( dims: &Vec>, + root_dims: &Vec>, grain: &MultiStageGrain, ) -> Vec> { - let dims: Vec> = if let Some(exclude) = &grain.exclude { - dims.iter() - .filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m))) - .cloned() - .collect() - } else { - dims.clone() - }; - if let Some(keep_only) = &grain.keep_only { - dims.into_iter() - .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) - .collect() - } else { - dims + match grain.mode { + MultiStageGrainMode::Relative => { + let dims: Vec> = if let Some(exclude) = &grain.exclude { + dims.iter() + .filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m))) + .cloned() + .collect() + } else { + dims.clone() + }; + if let Some(keep_only) = &grain.keep_only { + dims.into_iter() + .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) + .collect() + } else { + dims + } + } + MultiStageGrainMode::Fixed => { + // Discard the parent state. `exclude` is undefined here per + // the spec; the validator forbids `exclude` + `keep_only` + // together so the only relevant operator left is `keep_only`, + // which intersects with the original query context. + if let Some(keep_only) = &grain.keep_only { + root_dims + .iter() + .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) + .cloned() + .collect() + } else { + Vec::new() + } + } } } @@ -553,8 +580,17 @@ impl MultiStageQueryPlanner { ) { let grain = multi_stage_member.grain(); - let dims = Self::partition_filter(new_state.dimensions(), grain); - let time_dims = Self::partition_filter(new_state.time_dimensions(), grain); + let root_state = self.root_state(); + let dims = Self::partition_filter( + new_state.dimensions(), + root_state.dimensions(), + grain, + ); + let time_dims = Self::partition_filter( + new_state.time_dimensions(), + root_state.time_dimensions(), + grain, + ); new_state.set_dimensions(dims); new_state.set_time_dimensions(time_dims); } diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml index 06f74c3c6848a..8ab9540498919 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml @@ -232,6 +232,31 @@ cubes: keep_only: - orders.status + # ── grain mode: fixed ────────────────────────────────────── + # `mode: fixed` with no keep_only/include → grand total. The + # inner CTE has no GROUP BY; the scalar replicates across the + # outer query's rows. + - name: amount_grain_fixed_grand_total + type: sum + sql: "{CUBE.total_amount}" + multi_stage: true + grain: + mode: fixed + + # `mode: fixed` + keep_only intersects with the original query + # context (not with whatever an ancestor narrowed it to). At + # top-level the parent context equals the query, so this is + # observable via "keep_only dim missing from the query" — the + # measure collapses to a grand total in that case. + - name: amount_grain_fixed_keep_only_status + type: sum + sql: "{CUBE.total_amount}" + multi_stage: true + grain: + mode: fixed + keep_only: + - orders.status + - name: amount_prev_month type: number sql: "{CUBE.total_amount}" diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs index 46617ded656f6..3319d0f1ec7ee 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs @@ -120,3 +120,73 @@ async fn test_grain_keep_only_matches_group_by_override() { insta::assert_snapshot!(result); } } + +// `grain.mode: fixed` (no keep_only/include) → grand total. The measure +// value is the SUM across all rows, replicated per query category. +#[tokio::test(flavor = "multi_thread")] +async fn test_grain_fixed_grand_total() { + let ctx = create_context(); + + let query = indoc! {r#" + measures: + - orders.amount_grain_fixed_grand_total + dimensions: + - orders.category + order: + - id: orders.category + "#}; + + ctx.build_sql(query).unwrap(); + + if let Some(result) = ctx.try_execute_pg(query, SEED).await { + insta::assert_snapshot!(result); + } +} + +// `grain.mode: fixed, keep_only: [status]` with status absent from the +// query → effective grain is empty (grand total), since the FIXED base is +// the original query context, not the parent state. +#[tokio::test(flavor = "multi_thread")] +async fn test_grain_fixed_keep_only_dim_not_in_query() { + let ctx = create_context(); + + let query = indoc! {r#" + measures: + - orders.amount_grain_fixed_keep_only_status + dimensions: + - orders.category + order: + - id: orders.category + "#}; + + ctx.build_sql(query).unwrap(); + + if let Some(result) = ctx.try_execute_pg(query, SEED).await { + insta::assert_snapshot!(result); + } +} + +// `grain.mode: fixed, keep_only: [status]` with status in the query → +// effective grain is [status], matching the RELATIVE behavior at top +// level (parent context == query). Smoke test that both modes coexist. +#[tokio::test(flavor = "multi_thread")] +async fn test_grain_fixed_keep_only_dim_in_query() { + let ctx = create_context(); + + let query = indoc! {r#" + measures: + - orders.amount_grain_fixed_keep_only_status + dimensions: + - orders.status + - orders.category + order: + - id: orders.status + - id: orders.category + "#}; + + ctx.build_sql(query).unwrap(); + + if let Some(result) = ctx.try_execute_pg(query, SEED).await { + insta::assert_snapshot!(result); + } +} From 272eb6698497c83893c81ab743468ed29b8d6704 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Thu, 28 May 2026 10:12:37 +0200 Subject: [PATCH 07/11] revert(tesseract): drop `mode` from multi-stage grain directive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Roll back `MultiStageGrainMode` and the `mode` field on `MultiStageGrain`. The FIXED-vs-RELATIVE distinction is technically implementable but its behaviour is difficult to communicate to schema authors; ship grain without it for now and keep only the always-relative semantics (`exclude` removes from the parent grain, `keep_only` intersects with it, `include` adds). Removed across the stack: - Rust bridge: `mode: Option` on `MultiStageGrainReferencesStatic`, the corresponding mock builder field, and the YAML parser entry. - Planner symbols: the `MultiStageGrainMode` enum, the `mode` field on `MultiStageGrain`, and every site that read or wrote it (`build_grain_from_directive`, `build_grain_from_legacy`, `apply_to_deps`). - Planner core: `partition_filter` collapses back to a single relative branch (no `match grain.mode`, no `root_dims` parameter). - Tests: the three FIXED integration smoke tests, the `mock_measure_definition` yaml-grain asserts that checked `static_data.mode`, and every `assert_eq!(ms.grain.mode, …)` in `tests/measure_symbol.rs`. Test fixtures (`integration_multi_stage.yaml`, `multi_stage_filter.yaml`) no longer set `mode:`. - JS schema: `mode` removed from the `MultiStageGrain` Joi schema and from the `MultiStageGrainDirective` TS type. The `multi-stage grain — invalid mode rejected` validator test is gone. - Backend-native bridge harness: `mode` dropped from the JS fixture and from the bridge coverage expected-fields list. FIXED implementation is recoverable via `git revert 7ef91664d5` (the commit that introduced the `partition_filter` mode branch). --- .../test/bridge/bridge-fixtures.ts | 1 - .../bridge/object-bridges-coverage.test.ts | 2 +- .../src/compiler/CubeEvaluator.ts | 1 - .../src/compiler/CubeValidator.ts | 1 - .../test/unit/cube-validator.test.ts | 29 +------- .../src/cube_bridge/multi_stage_grain.rs | 1 - .../multi_stage/multi_stage_query_planner.rs | 71 ++++++------------- .../src/planner/symbols/common/multi_stage.rs | 35 +-------- .../cube_bridge/mock_measure_definition.rs | 3 - .../cube_bridge/mock_multi_stage_grain.rs | 3 - .../test_fixtures/cube_bridge/yaml/measure.rs | 3 - .../common/integration_multi_stage.yaml | 26 ------- .../yaml_files/common/multi_stage_filter.yaml | 1 - .../tests/integration/multi_stage/group_by.rs | 70 ------------------ .../src/tests/measure_symbol.rs | 9 +-- 15 files changed, 27 insertions(+), 229 deletions(-) diff --git a/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts b/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts index 9b472f52172af..67fb875c3af11 100644 --- a/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts +++ b/packages/cubejs-backend-native/test/bridge/bridge-fixtures.ts @@ -90,7 +90,6 @@ export const multiStageFilterFixture = (): unknown => ({ // schema contract. `include` is a plain reference list, not the structured // filter items the filter bridge uses. export const multiStageGrainFixture = (): unknown => ({ - mode: 'fixed', excludeReferences: ['orders.region'], includeReferences: ['orders.category'], }); diff --git a/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts b/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts index 585cb88237e86..629ae33d5945a 100644 --- a/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts +++ b/packages/cubejs-backend-native/test/bridge/object-bridges-coverage.test.ts @@ -206,7 +206,7 @@ const BRIDGES: BridgeSpec[] = [ }, { name: 'multiStageGrain', - expected: ['exclude', 'include', 'keep_only', 'mode'], + expected: ['exclude', 'include', 'keep_only'], }, { name: 'preAggregationDescription', diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts index 63a0b88eaa706..b86c36102d24b 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeEvaluator.ts @@ -55,7 +55,6 @@ export type MultiStageFilterDirective = { }; export type MultiStageGrainDirective = { - mode?: 'relative' | 'fixed'; exclude?: (...args: Array) => Array; keepOnly?: (...args: Array) => Array; include?: (...args: Array) => Array; diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts index c574c282a9cd0..ccb4aa918168a 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts @@ -869,7 +869,6 @@ const MultiStageFilter = Joi.object().keys({ }).nand('exclude', 'keepOnly'); const MultiStageGrain = Joi.object().keys({ - mode: Joi.string().valid('relative', 'fixed'), exclude: Joi.func(), keepOnly: Joi.func(), include: Joi.func(), diff --git a/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts b/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts index 5690f2e3ba328..183b44abe4fcc 100644 --- a/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/cube-validator.test.ts @@ -775,7 +775,6 @@ describe('Cube Validation', () => { type: 'sum', sql: () => '', grain: { - mode: 'relative', exclude: () => [], include: () => [], } @@ -809,7 +808,7 @@ describe('Cube Validation', () => { multiStage: true, type: 'sum', sql: () => '', - grain: { mode: 'fixed', keepOnly: () => [] } + grain: { keepOnly: () => [] } }, only_include: { multiStage: true, @@ -859,32 +858,6 @@ describe('Cube Validation', () => { expect(validationResult.error).toBeTruthy(); }); - it('multi-stage grain — invalid mode rejected', async () => { - const cubeValidator = new CubeValidator(new CubeSymbols()); - const cube = { - name: 'name', - sql: () => '', - fileName: 'fileName', - measures: { - bad_mode: { - multiStage: true, - type: 'sum', - sql: () => '', - grain: { mode: 'RELATIVE' } - } - } - }; - - const validationResult = cubeValidator.validate(cube, { - error: (message: any, _e: any) => { - console.log(message); - expect(message).toContain('measures.bad_mode.grain.mode'); - } - } as any); - - expect(validationResult.error).toBeTruthy(); - }); - it('multi-stage grain — exclude must be a function', async () => { const cubeValidator = new CubeValidator(new CubeSymbols()); const cube = { diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs index 068900544a00d..514a087696114 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/cube_bridge/multi_stage_grain.rs @@ -8,7 +8,6 @@ use std::rc::Rc; #[derive(Serialize, Deserialize, Debug, Clone, nativebridge::NativeBridgeStatic)] pub struct MultiStageGrainReferencesStatic { - pub mode: Option, #[serde(rename = "excludeReferences")] pub exclude: Option>, #[serde(rename = "keepOnlyReferences")] diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index f3e71e7ef132b..66aa180db3593 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -23,7 +23,6 @@ use crate::planner::MemberSymbol; use crate::planner::MultiStageFilter; use crate::planner::MultiStageFilterMode; use crate::planner::MultiStageGrain; -use crate::planner::MultiStageGrainMode; use crate::planner::QueryProperties; use cubenativeutils::CubeError; use indexmap::IndexMap; @@ -281,53 +280,32 @@ impl MultiStageQueryPlanner { /// reduce_by / group_by actually shrinks the partition vs the leaf /// grain. /// - /// Applies the partition-shaping part of `grain` to a dimension list. - /// `dims` is the parent state's slice (used by `Relative`); `root_dims` - /// is the query-root slice (used by `Fixed`, which discards inherited - /// narrowing). `include` is not handled here — it is appended to the - /// resulting state via `add_dimensions` outside this helper. + /// Applies the partition-shaping part of `grain` to a parent-state + /// dimension list: `exclude` removes matching dims, then `keep_only` + /// intersects what's left. `include` is appended outside this helper + /// via `add_dimensions`. /// /// FIXME: merge with `MultiStageMemberQueryPlanner::member_partition_by_logical` /// — both apply the same grain reshape on different inputs; keeping two /// copies invites silent drift when only one is updated. fn partition_filter( dims: &Vec>, - root_dims: &Vec>, grain: &MultiStageGrain, ) -> Vec> { - match grain.mode { - MultiStageGrainMode::Relative => { - let dims: Vec> = if let Some(exclude) = &grain.exclude { - dims.iter() - .filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m))) - .cloned() - .collect() - } else { - dims.clone() - }; - if let Some(keep_only) = &grain.keep_only { - dims.into_iter() - .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) - .collect() - } else { - dims - } - } - MultiStageGrainMode::Fixed => { - // Discard the parent state. `exclude` is undefined here per - // the spec; the validator forbids `exclude` + `keep_only` - // together so the only relevant operator left is `keep_only`, - // which intersects with the original query context. - if let Some(keep_only) = &grain.keep_only { - root_dims - .iter() - .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) - .cloned() - .collect() - } else { - Vec::new() - } - } + let dims: Vec> = if let Some(exclude) = &grain.exclude { + dims.iter() + .filter(|d| !exclude.iter().any(|m| d.has_member_in_reference_chain(m))) + .cloned() + .collect() + } else { + dims.clone() + }; + if let Some(keep_only) = &grain.keep_only { + dims.into_iter() + .filter(|d| keep_only.iter().any(|m| d.has_member_in_reference_chain(m))) + .collect() + } else { + dims } } @@ -580,17 +558,8 @@ impl MultiStageQueryPlanner { ) { let grain = multi_stage_member.grain(); - let root_state = self.root_state(); - let dims = Self::partition_filter( - new_state.dimensions(), - root_state.dimensions(), - grain, - ); - let time_dims = Self::partition_filter( - new_state.time_dimensions(), - root_state.time_dimensions(), - grain, - ); + let dims = Self::partition_filter(new_state.dimensions(), grain); + let time_dims = Self::partition_filter(new_state.time_dimensions(), grain); new_state.set_dimensions(dims); new_state.set_time_dimensions(time_dims); } diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs index 120a7141a0a99..c02555358dcef 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs @@ -52,38 +52,16 @@ pub struct MultiStageFilter { pub include_measure: Vec, } -#[derive(Clone, Debug, PartialEq, Eq, Default)] -pub enum MultiStageGrainMode { - #[default] - Relative, - Fixed, -} - -impl MultiStageGrainMode { - fn from_str(s: &str) -> Result { - match s { - "relative" => Ok(Self::Relative), - "fixed" => Ok(Self::Fixed), - other => Err(CubeError::user(format!( - "Unknown multi-stage grain mode '{}', expected 'relative' or 'fixed'", - other - ))), - } - } -} - /// Set operation on the inherited grain context of a multi-stage member. /// -/// `mode` chooses the base context (parent grain for `Relative`, empty for -/// `Fixed`); the three lists then mutate that base — `exclude` removes, -/// `keep_only` intersects with the parent, `include` adds. +/// The three lists mutate the parent grain — `exclude` removes, +/// `keep_only` intersects, `include` adds. /// /// Sourced from the `grain:` directive when present; otherwise mapped from /// `add_group_by` / `reduce_by` / `group_by` (→ `include` / `exclude` / -/// `keep_only`) with `mode: Relative`. +/// `keep_only`). #[derive(Clone, Default)] pub struct MultiStageGrain { - pub mode: MultiStageGrainMode, pub exclude: Option>>, pub keep_only: Option>>, pub include: Option>>, @@ -173,7 +151,6 @@ impl MultiStageProperties { }; let grain = MultiStageGrain { - mode: self.grain.mode.clone(), exclude: map_refs(&self.grain.exclude)?, keep_only: map_refs(&self.grain.keep_only)?, include: map_refs(&self.grain.include)?, @@ -213,12 +190,7 @@ fn build_grain_from_directive( "Multi-stage grain cannot specify both `exclude` and `keep_only` — they are mutually exclusive ways of restricting the inherited context.".to_string(), )); } - let mode = match &static_data.mode { - Some(s) => MultiStageGrainMode::from_str(s)?, - None => MultiStageGrainMode::Relative, - }; Ok(MultiStageGrain { - mode, exclude: resolve_reference_paths(&static_data.exclude, compiler)?, keep_only: resolve_reference_paths(&static_data.keep_only, compiler)?, include: resolve_reference_paths(&static_data.include, compiler)?, @@ -230,7 +202,6 @@ fn build_grain_from_legacy( compiler: &mut Compiler, ) -> Result { Ok(MultiStageGrain { - mode: MultiStageGrainMode::Relative, exclude: resolve_reference_paths(&static_data.reduce_by_references, compiler)?, keep_only: resolve_reference_paths(&static_data.group_by_references, compiler)?, include: resolve_reference_paths(&static_data.add_group_by_references, compiler)?, diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs index 2c3dc6e1a0b77..5e9ef880c22bc 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_measure_definition.rs @@ -365,7 +365,6 @@ mod tests { sql: \"{CUBE.amount}\" multi_stage: true grain: - mode: fixed include: - region - category @@ -376,7 +375,6 @@ mod tests { assert!(measure.has_grain().unwrap()); let grain = measure.grain().unwrap().expect("grain present"); let static_data = grain.static_data(); - assert_eq!(static_data.mode.as_deref(), Some("fixed")); assert_eq!( static_data.include, Some(vec!["region".to_string(), "category".to_string()]) @@ -400,7 +398,6 @@ mod tests { let grain = measure.grain().unwrap().expect("grain present"); let static_data = grain.static_data(); - assert_eq!(static_data.mode, None); assert_eq!(static_data.exclude, Some(vec!["region".to_string()])); assert_eq!(static_data.keep_only, None); assert_eq!(static_data.include, None); diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs index 9b12c62d89263..f04081b2687a5 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/mock_multi_stage_grain.rs @@ -8,8 +8,6 @@ use typed_builder::TypedBuilder; #[derive(TypedBuilder)] pub struct MockMultiStageGrainReferences { - #[builder(default)] - mode: Option, #[builder(default)] exclude: Option>, #[builder(default)] @@ -21,7 +19,6 @@ pub struct MockMultiStageGrainReferences { impl_static_data!( MockMultiStageGrainReferences, MultiStageGrainReferencesStatic, - mode, exclude, keep_only, include diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs index 22d27b1ccc668..428dc4716d031 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/cube_bridge/yaml/measure.rs @@ -126,8 +126,6 @@ impl YamlMultiStageFilter { #[derive(Debug, Deserialize)] pub struct YamlMultiStageGrain { - #[serde(default)] - mode: Option, #[serde(default)] exclude: Option>, #[serde(default)] @@ -140,7 +138,6 @@ impl YamlMultiStageGrain { pub(super) fn build(self, cube_name: Option<&str>) -> Rc { Rc::new( MockMultiStageGrainReferences::builder() - .mode(self.mode) .exclude(qualify_references(self.exclude, cube_name)) .keep_only(qualify_references(self.keep_only, cube_name)) .include(qualify_references(self.include, cube_name)) diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml index 8ab9540498919..7da7378d6e55c 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml @@ -228,32 +228,6 @@ cubes: sql: "{CUBE.total_amount}" multi_stage: true grain: - mode: relative - keep_only: - - orders.status - - # ── grain mode: fixed ────────────────────────────────────── - # `mode: fixed` with no keep_only/include → grand total. The - # inner CTE has no GROUP BY; the scalar replicates across the - # outer query's rows. - - name: amount_grain_fixed_grand_total - type: sum - sql: "{CUBE.total_amount}" - multi_stage: true - grain: - mode: fixed - - # `mode: fixed` + keep_only intersects with the original query - # context (not with whatever an ancestor narrowed it to). At - # top-level the parent context equals the query, so this is - # observable via "keep_only dim missing from the query" — the - # measure collapses to a grand total in that case. - - name: amount_grain_fixed_keep_only_status - type: sum - sql: "{CUBE.total_amount}" - multi_stage: true - grain: - mode: fixed keep_only: - orders.status diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml index bd9b8236dfd33..8de1de5e0ec3b 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/multi_stage_filter.yaml @@ -73,7 +73,6 @@ cubes: add_group_by: - status grain: - mode: fixed exclude: - status include: diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs index 3319d0f1ec7ee..46617ded656f6 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs @@ -120,73 +120,3 @@ async fn test_grain_keep_only_matches_group_by_override() { insta::assert_snapshot!(result); } } - -// `grain.mode: fixed` (no keep_only/include) → grand total. The measure -// value is the SUM across all rows, replicated per query category. -#[tokio::test(flavor = "multi_thread")] -async fn test_grain_fixed_grand_total() { - let ctx = create_context(); - - let query = indoc! {r#" - measures: - - orders.amount_grain_fixed_grand_total - dimensions: - - orders.category - order: - - id: orders.category - "#}; - - ctx.build_sql(query).unwrap(); - - if let Some(result) = ctx.try_execute_pg(query, SEED).await { - insta::assert_snapshot!(result); - } -} - -// `grain.mode: fixed, keep_only: [status]` with status absent from the -// query → effective grain is empty (grand total), since the FIXED base is -// the original query context, not the parent state. -#[tokio::test(flavor = "multi_thread")] -async fn test_grain_fixed_keep_only_dim_not_in_query() { - let ctx = create_context(); - - let query = indoc! {r#" - measures: - - orders.amount_grain_fixed_keep_only_status - dimensions: - - orders.category - order: - - id: orders.category - "#}; - - ctx.build_sql(query).unwrap(); - - if let Some(result) = ctx.try_execute_pg(query, SEED).await { - insta::assert_snapshot!(result); - } -} - -// `grain.mode: fixed, keep_only: [status]` with status in the query → -// effective grain is [status], matching the RELATIVE behavior at top -// level (parent context == query). Smoke test that both modes coexist. -#[tokio::test(flavor = "multi_thread")] -async fn test_grain_fixed_keep_only_dim_in_query() { - let ctx = create_context(); - - let query = indoc! {r#" - measures: - - orders.amount_grain_fixed_keep_only_status - dimensions: - - orders.status - - orders.category - order: - - id: orders.status - - id: orders.category - "#}; - - ctx.build_sql(query).unwrap(); - - if let Some(result) = ctx.try_execute_pg(query, SEED).await { - insta::assert_snapshot!(result); - } -} diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs index 24b7e6bea791a..60e1ee6c9589b 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs @@ -474,7 +474,7 @@ fn new_patched_appends_to_existing_filters() { mod multi_stage { use super::*; - use crate::planner::{MultiStageFilterMode, MultiStageGrainMode}; + use crate::planner::MultiStageFilterMode; fn ctx() -> TestContext { let schema = MockSchema::from_yaml_file("common/multi_stage_filter.yaml"); @@ -498,7 +498,6 @@ mod multi_stage { assert_eq!(include.len(), 1); assert_eq!(include[0].full_name(), "orders.city"); - assert_eq!(ms.grain.mode, MultiStageGrainMode::Relative); assert!(ms.grain.keep_only.is_none()); } @@ -573,14 +572,12 @@ mod multi_stage { } #[test] - fn legacy_fields_populate_grain_with_relative_mode() { + fn legacy_fields_populate_grain() { let ctx = ctx(); let m = ctx.create_measure("orders.revenue_filtered").unwrap(); let measure = m.as_measure().unwrap(); let ms = measure.multi_stage().expect("multi_stage present"); - assert_eq!(ms.grain.mode, MultiStageGrainMode::Relative); - let include = ms.grain.include.as_ref().expect("include"); assert_eq!(include[0].full_name(), "orders.city"); let exclude = ms.grain.exclude.as_ref().expect("exclude"); @@ -595,8 +592,6 @@ mod multi_stage { let measure = m.as_measure().unwrap(); let ms = measure.multi_stage().expect("multi_stage present"); - assert_eq!(ms.grain.mode, MultiStageGrainMode::Fixed); - let exclude = ms.grain.exclude.as_ref().expect("exclude"); assert_eq!(exclude.len(), 1); assert_eq!(exclude[0].full_name(), "orders.status"); From 7ac6fdd06dcbe216c47c8cceac1a6bee050d245b Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Thu, 28 May 2026 11:49:20 +0200 Subject: [PATCH 08/11] test(schema-compiler): postgres integration tests for multi-stage grain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror `multi-stage-filter.test.ts`: a single inline YAML cube with 6 rows wired through `prepareYamlCompiler` and `dbRunner.runQueryTest`, covering every shape of `grain:` end-to-end (JS → Joi → transpiler → Tesseract → SQL). Tests are gated on `nativeSqlPlanner`; the legacy planner does not implement the directive. Eight scenarios: - exclude / keep_only / include with a single dim - keep_only with the listed dim absent from the query (collapse to grand total) - exclude / keep_only / include with a two-element array each - keep_only + include combined: the keep narrows the inherited grain to `[status]`, include extends the leaf to `[status, id]`, outer re-aggregates → per-status total broadcast across categories. This is the JS analogue of the Rust planner snapshot `reduce_by_add_group_by_combo`. A sixth seed row (id=6, completed, books, 70) makes the two-element keep_only test distinguishable from `total_amount` — (completed, books) now spans two ids that share the same per-cell value. --- .../postgres/multi-stage-grain.test.ts | 271 ++++++++++++++++++ 1 file changed, 271 insertions(+) create mode 100644 packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts new file mode 100644 index 0000000000000..bd57bc8cab40a --- /dev/null +++ b/packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts @@ -0,0 +1,271 @@ +import { getEnv } from '@cubejs-backend/shared'; +import { prepareYamlCompiler } from '../../unit/PrepareCompiler'; +import { dbRunner } from './PostgresDBRunner'; + +// Smoke tests for the multi-stage `grain:` directive going through the +// JS schema compiler end-to-end (YAML → Joi → transpiler → Tesseract → SQL). +// The Rust planner has the exhaustive coverage; this file just confirms the +// JS pipeline accepts each grain shape and produces correct results. +// +// Inline data (6 rows — id=6 added so (completed, books) has two rows, +// which lets the 2-element keep_only test distinguish from total_amount): +// id status category amount created_at +// 1 completed books 100 2024-01-10 +// 2 completed electronics 200 2024-01-15 +// 3 pending books 50 2024-02-12 +// 4 pending electronics 75 2024-02-18 +// 5 cancelled books 30 2024-03-10 +// 6 completed books 70 2024-01-20 +// +// by status: completed=370, pending=125, cancelled=30 +// by category: books=250, electronics=275 +// by (status,category): (completed,books)=170, (completed,electronics)=200, +// (pending,books)=50, (pending,electronics)=75, +// (cancelled,books)=30 +// total: 525 +describe('Multi-Stage grain directive', () => { + jest.setTimeout(200000); + + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(` +cubes: + - name: orders + sql: > + SELECT 1 as ID, 'completed' as STATUS, 'books' as CATEGORY, 100 as AMOUNT, '2024-01-10T00:00:00.000Z'::timestamptz as CREATED_AT + union all + SELECT 2 as ID, 'completed' as STATUS, 'electronics' as CATEGORY, 200 as AMOUNT, '2024-01-15T00:00:00.000Z'::timestamptz as CREATED_AT + union all + SELECT 3 as ID, 'pending' as STATUS, 'books' as CATEGORY, 50 as AMOUNT, '2024-02-12T00:00:00.000Z'::timestamptz as CREATED_AT + union all + SELECT 4 as ID, 'pending' as STATUS, 'electronics' as CATEGORY, 75 as AMOUNT, '2024-02-18T00:00:00.000Z'::timestamptz as CREATED_AT + union all + SELECT 5 as ID, 'cancelled' as STATUS, 'books' as CATEGORY, 30 as AMOUNT, '2024-03-10T00:00:00.000Z'::timestamptz as CREATED_AT + union all + SELECT 6 as ID, 'completed' as STATUS, 'books' as CATEGORY, 70 as AMOUNT, '2024-01-20T00:00:00.000Z'::timestamptz as CREATED_AT + + dimensions: + - name: id + sql: ID + type: number + primary_key: true + + - name: status + sql: STATUS + type: string + + - name: category + sql: CATEGORY + type: string + + - name: created_at + sql: CREATED_AT + type: time + + measures: + - name: total_amount + sql: AMOUNT + type: sum + + # ── single-element variants ──────────────────────────────────── + - name: amount_grain_exclude_status + sql: "{CUBE.total_amount}" + type: sum + multi_stage: true + grain: + exclude: + - orders.status + + - name: amount_grain_keep_only_status + sql: "{CUBE.total_amount}" + type: sum + multi_stage: true + grain: + keep_only: + - orders.status + + - name: amount_grain_include_status + sql: "{CUBE.total_amount}" + type: sum + multi_stage: true + grain: + include: + - orders.status + + # ── two-element arrays ──────────────────────────────────────── + - name: amount_grain_exclude_status_id + sql: "{CUBE.total_amount}" + type: sum + multi_stage: true + grain: + exclude: + - orders.status + - orders.id + + - name: amount_grain_keep_only_status_category + sql: "{CUBE.total_amount}" + type: sum + multi_stage: true + grain: + keep_only: + - orders.status + - orders.category + + - name: amount_grain_include_status_id + sql: "{CUBE.total_amount}" + type: sum + multi_stage: true + grain: + include: + - orders.status + - orders.id + + # ── keep_only + include combination ────────────────────────── + # keep_only shrinks the inherited partition to [status]; include + # then extends the leaf grain with [id]. Outer re-aggregates over + # id back to the query grain — equivalent to "per-status total + # broadcast across categories" (the Rust planner pattern + # mirrored from total_by_customer_reduce_category). + - name: amount_grain_keep_status_include_id + sql: "{CUBE.total_amount}" + type: sum + multi_stage: true + grain: + keep_only: + - orders.status + include: + - orders.id + `); + + if (getEnv('nativeSqlPlanner')) { + // ── single-element variants ────────────────────────────────── + it('exclude: drops a dim from the partition', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_exclude_status'], + dimensions: ['orders.status', 'orders.category'], + order: [{ id: 'orders.status' }, { id: 'orders.category' }], + timezone: 'UTC', + }, [ + // exclude_status removes status from the window's PARTITION BY → each + // row gets the per-category total regardless of its status. + { orders__status: 'cancelled', orders__category: 'books', orders__total_amount: '30', orders__amount_grain_exclude_status: '250' }, + { orders__status: 'completed', orders__category: 'books', orders__total_amount: '170', orders__amount_grain_exclude_status: '250' }, + { orders__status: 'completed', orders__category: 'electronics', orders__total_amount: '200', orders__amount_grain_exclude_status: '275' }, + { orders__status: 'pending', orders__category: 'books', orders__total_amount: '50', orders__amount_grain_exclude_status: '250' }, + { orders__status: 'pending', orders__category: 'electronics', orders__total_amount: '75', orders__amount_grain_exclude_status: '275' }, + ], { joinGraph, cubeEvaluator, compiler })); + + it('keep_only: shrinks the partition to the listed dim', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_keep_only_status'], + dimensions: ['orders.status', 'orders.category'], + order: [{ id: 'orders.status' }, { id: 'orders.category' }], + timezone: 'UTC', + }, [ + // keep_only [status] reduces the partition to status only → each row + // gets the per-status total regardless of its category. + { orders__status: 'cancelled', orders__category: 'books', orders__total_amount: '30', orders__amount_grain_keep_only_status: '30' }, + { orders__status: 'completed', orders__category: 'books', orders__total_amount: '170', orders__amount_grain_keep_only_status: '370' }, + { orders__status: 'completed', orders__category: 'electronics', orders__total_amount: '200', orders__amount_grain_keep_only_status: '370' }, + { orders__status: 'pending', orders__category: 'books', orders__total_amount: '50', orders__amount_grain_keep_only_status: '125' }, + { orders__status: 'pending', orders__category: 'electronics', orders__total_amount: '75', orders__amount_grain_keep_only_status: '125' }, + ], { joinGraph, cubeEvaluator, compiler })); + + it('keep_only: dim absent from query collapses to grand total', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_keep_only_status'], + dimensions: ['orders.category'], + order: [{ id: 'orders.category' }], + timezone: 'UTC', + }, [ + // status not in the query → keep_only intersection with the query dims + // is empty → the measure collapses to a grand total (525) per row. + { orders__category: 'books', orders__total_amount: '250', orders__amount_grain_keep_only_status: '525' }, + { orders__category: 'electronics', orders__total_amount: '275', orders__amount_grain_keep_only_status: '525' }, + ], { joinGraph, cubeEvaluator, compiler })); + + it('include: extends the leaf grain, outer re-aggregates', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_include_status'], + dimensions: ['orders.category'], + order: [{ id: 'orders.category' }], + timezone: 'UTC', + }, [ + // Inner CTE groups by (category, status); outer SUMs back over status. + // SUM is associative so the per-category result equals total_amount — + // the test verifies pipeline acceptance, not a math divergence. + { orders__category: 'books', orders__total_amount: '250', orders__amount_grain_include_status: '250' }, + { orders__category: 'electronics', orders__total_amount: '275', orders__amount_grain_include_status: '275' }, + ], { joinGraph, cubeEvaluator, compiler })); + + // ── two-element arrays ─────────────────────────────────────── + it('exclude: two-element array drops both dims', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_exclude_status_id'], + dimensions: ['orders.status', 'orders.category', 'orders.id'], + order: [{ id: 'orders.status' }, { id: 'orders.category' }, { id: 'orders.id' }], + timezone: 'UTC', + }, [ + // exclude [status, id] removes both from the partition → partition is + // [category] alone → each row gets the per-category total. + { orders__status: 'cancelled', orders__category: 'books', orders__id: 5, orders__total_amount: '30', orders__amount_grain_exclude_status_id: '250' }, + { orders__status: 'completed', orders__category: 'books', orders__id: 1, orders__total_amount: '100', orders__amount_grain_exclude_status_id: '250' }, + { orders__status: 'completed', orders__category: 'books', orders__id: 6, orders__total_amount: '70', orders__amount_grain_exclude_status_id: '250' }, + { orders__status: 'completed', orders__category: 'electronics', orders__id: 2, orders__total_amount: '200', orders__amount_grain_exclude_status_id: '275' }, + { orders__status: 'pending', orders__category: 'books', orders__id: 3, orders__total_amount: '50', orders__amount_grain_exclude_status_id: '250' }, + { orders__status: 'pending', orders__category: 'electronics', orders__id: 4, orders__total_amount: '75', orders__amount_grain_exclude_status_id: '275' }, + ], { joinGraph, cubeEvaluator, compiler })); + + it('keep_only: two-element array narrows to the (status, category) cell', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_keep_only_status_category'], + dimensions: ['orders.status', 'orders.category', 'orders.id'], + order: [{ id: 'orders.status' }, { id: 'orders.category' }, { id: 'orders.id' }], + timezone: 'UTC', + }, [ + // keep_only [status, category] reduces the partition to that pair → + // each row gets the per-(status,category) total. id rows within the + // same (status, category) share the value (e.g. id=1 and id=6 both + // get 170 for (completed, books)). + { orders__status: 'cancelled', orders__category: 'books', orders__id: 5, orders__total_amount: '30', orders__amount_grain_keep_only_status_category: '30' }, + { orders__status: 'completed', orders__category: 'books', orders__id: 1, orders__total_amount: '100', orders__amount_grain_keep_only_status_category: '170' }, + { orders__status: 'completed', orders__category: 'books', orders__id: 6, orders__total_amount: '70', orders__amount_grain_keep_only_status_category: '170' }, + { orders__status: 'completed', orders__category: 'electronics', orders__id: 2, orders__total_amount: '200', orders__amount_grain_keep_only_status_category: '200' }, + { orders__status: 'pending', orders__category: 'books', orders__id: 3, orders__total_amount: '50', orders__amount_grain_keep_only_status_category: '50' }, + { orders__status: 'pending', orders__category: 'electronics', orders__id: 4, orders__total_amount: '75', orders__amount_grain_keep_only_status_category: '75' }, + ], { joinGraph, cubeEvaluator, compiler })); + + it('include: two-element array extends the leaf grain', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_include_status_id'], + dimensions: ['orders.category'], + order: [{ id: 'orders.category' }], + timezone: 'UTC', + }, [ + // Leaf grain extends to (category, status, id). Outer SUMs back to + // the per-category total — same numbers as total_amount, the test + // verifies that a two-element include list threads through. + { orders__category: 'books', orders__total_amount: '250', orders__amount_grain_include_status_id: '250' }, + { orders__category: 'electronics', orders__total_amount: '275', orders__amount_grain_include_status_id: '275' }, + ], { joinGraph, cubeEvaluator, compiler })); + + // ── keep_only + include combination ────────────────────────── + it('keep_only + include: keep narrows, include extends', async () => dbRunner.runQueryTest({ + measures: ['orders.total_amount', 'orders.amount_grain_keep_status_include_id'], + dimensions: ['orders.status', 'orders.category'], + order: [{ id: 'orders.status' }, { id: 'orders.category' }], + timezone: 'UTC', + }, [ + // keep_only [status] narrows parent grain to [status]; include [id] + // adds id to the leaf. Outer re-aggregates by (status, category) → + // per-status total broadcast across categories (completed=370 to both + // books and electronics; pending=125 to both; cancelled=30 to books). + { orders__status: 'cancelled', orders__category: 'books', orders__total_amount: '30', orders__amount_grain_keep_status_include_id: '30' }, + { orders__status: 'completed', orders__category: 'books', orders__total_amount: '170', orders__amount_grain_keep_status_include_id: '370' }, + { orders__status: 'completed', orders__category: 'electronics', orders__total_amount: '200', orders__amount_grain_keep_status_include_id: '370' }, + { orders__status: 'pending', orders__category: 'books', orders__total_amount: '50', orders__amount_grain_keep_status_include_id: '125' }, + { orders__status: 'pending', orders__category: 'electronics', orders__total_amount: '75', orders__amount_grain_keep_status_include_id: '125' }, + ], { joinGraph, cubeEvaluator, compiler })); + } else { + // These tests rely on Tesseract; v1 planner does not implement the directive. + test.skip('exclude: drops a dim from the partition', () => { expect(1).toBe(1); }); + test.skip('keep_only: shrinks the partition to the listed dim', () => { expect(1).toBe(1); }); + test.skip('keep_only: dim absent from query collapses to grand total', () => { expect(1).toBe(1); }); + test.skip('include: extends the leaf grain, outer re-aggregates', () => { expect(1).toBe(1); }); + test.skip('exclude: two-element array drops both dims', () => { expect(1).toBe(1); }); + test.skip('keep_only: two-element array narrows to the (status, category) cell', () => { expect(1).toBe(1); }); + test.skip('include: two-element array extends the leaf grain', () => { expect(1).toBe(1); }); + test.skip('keep_only + include: keep narrows, include extends', () => { expect(1).toBe(1); }); + } +}); From 56909bf87f562d0cf5a2c12a7669e920a1059f2d Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Thu, 28 May 2026 13:05:23 +0200 Subject: [PATCH 09/11] docs(tesseract): strip legacy-field and was-now framing from grain comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sweep through every comment touched by the grain work and remove references to the legacy `add_group_by` / `reduce_by` / `group_by` field names plus any "was → now" / "currently" / "today" wording that will rot once the legacy path is gone. - `MultiStageGrain` docstring no longer enumerates which legacy fields feed each slot; it just describes the set operation itself. - `MultiStageProperties::from_dimension_definition` loses the inline "Dimensions only expose `add_group_by` today" note — the code reads the field directly on the next line. - `partition_filter` keeps a single grain-centric docstring; the older paragraph that talked about `reduce_by`/`group_by` is dropped. - Integration test renamed to `test_grain_keep_only_status_top_level`, comment rewritten as an intrinsic description rather than a comparison against `test_group_by_override`. - YAML fixture and symbol-test comments drop the "expressed via grain" / "not from the sibling add_group_by" framing. - JS grain test loses the "id=6 added so..." narrative and the reference to the Rust planner snapshot it was originally mirrored from. --- .../integration/postgres/multi-stage-grain.test.ts | 10 +++++----- .../planners/multi_stage/multi_stage_query_planner.rs | 6 ------ .../src/planner/symbols/common/multi_stage.rs | 6 ------ .../yaml_files/common/integration_multi_stage.yaml | 2 -- .../src/tests/integration/multi_stage/group_by.rs | 7 ++++--- .../cubesqlplanner/src/tests/measure_symbol.rs | 2 -- 6 files changed, 9 insertions(+), 24 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts index bd57bc8cab40a..d7a44751903c4 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/multi-stage-grain.test.ts @@ -7,8 +7,9 @@ import { dbRunner } from './PostgresDBRunner'; // The Rust planner has the exhaustive coverage; this file just confirms the // JS pipeline accepts each grain shape and produces correct results. // -// Inline data (6 rows — id=6 added so (completed, books) has two rows, -// which lets the 2-element keep_only test distinguish from total_amount): +// Inline data (6 rows; (completed, books) spans two rows so that +// `keep_only: [status, category]` resolves to a value distinct from +// `total_amount`): // id status category amount created_at // 1 completed books 100 2024-01-10 // 2 completed electronics 200 2024-01-15 @@ -121,9 +122,8 @@ cubes: # ── keep_only + include combination ────────────────────────── # keep_only shrinks the inherited partition to [status]; include # then extends the leaf grain with [id]. Outer re-aggregates over - # id back to the query grain — equivalent to "per-status total - # broadcast across categories" (the Rust planner pattern - # mirrored from total_by_customer_reduce_category). + # id back to the query grain — per-status total broadcast across + # categories. - name: amount_grain_keep_status_include_id sql: "{CUBE.total_amount}" type: sum diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index 66aa180db3593..5734c8d8f9145 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -274,12 +274,6 @@ impl MultiStageQueryPlanner { } } - /// Mirror of `MultiStageMemberQueryPlanner::member_partition_by_logical`: - /// drops `reduce_by` dims and (when `group_by` is set) keeps only the - /// dims explicitly listed. Used at planning time to decide whether - /// reduce_by / group_by actually shrinks the partition vs the leaf - /// grain. - /// /// Applies the partition-shaping part of `grain` to a parent-state /// dimension list: `exclude` removes matching dims, then `keep_only` /// intersects what's left. `include` is appended outside this helper diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs index c02555358dcef..4f64597c10ee0 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/symbols/common/multi_stage.rs @@ -56,10 +56,6 @@ pub struct MultiStageFilter { /// /// The three lists mutate the parent grain — `exclude` removes, /// `keep_only` intersects, `include` adds. -/// -/// Sourced from the `grain:` directive when present; otherwise mapped from -/// `add_group_by` / `reduce_by` / `group_by` (→ `include` / `exclude` / -/// `keep_only`). #[derive(Clone, Default)] pub struct MultiStageGrain { pub exclude: Option>>, @@ -108,8 +104,6 @@ impl MultiStageProperties { return Ok(None); } - // Dimensions only expose `add_group_by` today — the `grain:` directive - // is currently scoped to measures. let include = resolve_reference_paths(&definition.static_data().add_group_by_references, compiler)?; let filter = build_filter(cube_name, definition.filter()?, compiler)?; diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml index 7da7378d6e55c..30d8ace1d9ad9 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml @@ -221,8 +221,6 @@ cubes: - orders.status - orders.created_at.month - # Same partition shape as `amount_group_by_status`, expressed via - # `grain.keep_only`. - name: amount_grain_keep_only_status type: sum sql: "{CUBE.total_amount}" diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs index 46617ded656f6..96e4d12145419 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs @@ -99,10 +99,11 @@ async fn test_group_by_equals_query_dims() { } } -// Smoke test: `grain.keep_only: [status]` yields the same partition as -// `group_by: [status]`. Snapshot must match `test_group_by_override`. +// `grain.keep_only: [status]` narrows the partition to `[status]`; the +// query has only `category`, so the measure value is the per-status total +// broadcast across categories. #[tokio::test(flavor = "multi_thread")] -async fn test_grain_keep_only_matches_group_by_override() { +async fn test_grain_keep_only_status_top_level() { let ctx = create_context(); let query = indoc! {r#" diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs index 60e1ee6c9589b..c390cc4fb8fd2 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/measure_symbol.rs @@ -598,8 +598,6 @@ mod multi_stage { let include = ms.grain.include.as_ref().expect("include"); assert_eq!(include.len(), 1); - // Comes from `grain.include: [city]`, not from the sibling - // `add_group_by: [status]` — the directive wins when both are set. assert_eq!(include[0].full_name(), "orders.city"); assert!(ms.grain.keep_only.is_none()); From 65b6c470b76514b98e7ad93478380ce1206102bd Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Thu, 28 May 2026 13:18:44 +0200 Subject: [PATCH 10/11] test(tesseract): accept snapshot for test_grain_keep_only_status_top_level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `grain.keep_only: [status]` with the query scoped to `[category]` yields an empty partition (keep_only ∩ query_dims = ∅) and collapses to the grand total — 2250 per category on the integration_multi_stage seed. Mirrors the legacy `group_by_override` snapshot value-for-value; content differs only by the measure name in the output column. --- ...ge__group_by__grain_keep_only_status_top_level.snap | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__grain_keep_only_status_top_level.snap diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__grain_keep_only_status_top_level.snap b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__grain_keep_only_status_top_level.snap new file mode 100644 index 0000000000000..5b34525bb0089 --- /dev/null +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__grain_keep_only_status_top_level.snap @@ -0,0 +1,10 @@ +--- +source: cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs +assertion_line: 121 +expression: result +--- +orders__category | orders__amount_grain_keep_only_status +-----------------+-------------------------------------- +books | 2250.00 +clothing | 2250.00 +electronics | 2250.00 From bfbcf444cd5c1f1f2ff088ba441237efa1066754 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Thu, 28 May 2026 15:36:16 +0200 Subject: [PATCH 11/11] docs(tesseract): replace remaining `reduce_by`/`add_group_by` mentions with grain terms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sweep the planner module for pre-existing comments that still describe the multi-stage shaping pipeline in terms of the legacy field names (`reduce_by`, `add_group_by`, `group_by`). After the refactor the planner reads `MultiStageGrain` directly, so the comments are reworded to use `grain.exclude` / `grain.keep_only` / `grain.include` (or just "grain reshape" where the specific operator doesn't matter). Touched: - `make_queries_descriptions` docstring — "grain reshape" instead of "add_group_by". - The JOIN-vs-window-path narrative in `create_multi_stage_inode_member` — step list now names grain operators; the window-path note refers to "leaf-extending `include`" rather than `add_group_by`. - `MultiStageQueryDescription::keys_input` docstring — "grain reshape" in place of "reduce_by / group_by". No semantic change. --- .../multi_stage/multi_stage_query_planner.rs | 25 +++++++++---------- .../planners/multi_stage/query_description.rs | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index 5734c8d8f9145..8ce7f3895d6eb 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -447,9 +447,8 @@ impl MultiStageQueryPlanner { /// already-built descriptions, tries a rolling-window path /// (`try_plan_rolling_window`), and otherwise returns either a /// leaf `Measure` or an inode description whose children come - /// from `make_childs`. Adjusts the state for inodes with any - /// `add_group_by`, time-shift or per-member filter changes the - /// inode demands. + /// from `make_childs`. Adjusts the state for any grain reshape, + /// time-shift or per-member filter changes the inode demands. fn make_queries_descriptions( &self, member: Rc, @@ -524,16 +523,16 @@ impl MultiStageQueryPlanner { // 1. filter directive — pick `state` (Relative/None) or // `root_state` (Fixed) as the base and apply exclude / // keep_only / include against it. - // 2. reduce_by / group_by — shrink parent grain to the - // partition grain implied by directives. - // 3. add_group_by — extend the result with extra leaf dims. + // 2. grain.exclude / grain.keep_only — shrink parent grain to + // the partition grain implied by the directive. + // 3. grain.include — extend the result with extra leaf dims. // 4. time_shift / filter cleanup. - // Step 2 must precede step 3: `group_by` is a keep-only filter - // and would silently drop dims that step 3 needs to introduce. + // Step 2 must precede step 3: `keep_only` is an intersection and + // would silently drop dims that step 3 needs to introduce. // // The window-path Aggregate inode skips step 2: the leaf stays - // at the parent state plus any add_group_by extension, and the - // window function does the reduce_by collapse at outer level. + // at the parent state plus any `include` extension, and the + // window function does the `exclude` collapse at outer level. let use_window_path = multi_stage_member.use_window_path(); let new_state = { let mut new_state = match directive_filter.as_ref().map(|f| &f.mode) { @@ -584,9 +583,9 @@ impl MultiStageQueryPlanner { // build keys-side descriptions per child on the parent state // so the FullKeyAggregate can broadcast measure values back // to the full query grain. Window-path Aggregate inodes - // (sum-of-sum / sum-of-count without add_group_by) handle - // broadcast via the window expression instead and don't need - // keys_input. + // (sum-of-sum / sum-of-count with no leaf-extending `include`) + // handle broadcast via the window expression instead and don't + // need keys_input. let mut keys_input: Vec> = vec![]; if !use_window_path { let new_state_has = |sym: &Rc| { diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs index c2314f8539063..a391c1c7916a2 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs @@ -16,7 +16,7 @@ pub struct MultiStageQueryDescription { input: Vec>, /// Dim-grid sources for the JOIN-based assembly. Empty for the /// window-based path. Populated by `make_queries_descriptions` when - /// reduce_by / group_by actually shrinks the partition grain vs the + /// the grain reshape actually shrinks the partition grain vs the /// leaf grain — in that case `input` is rebuilt at partition grain /// and the original full-grain inputs move here as keys. keys_input: Vec>,