diff --git a/packages/malloy/src/lang/ast/source-properties/join.ts b/packages/malloy/src/lang/ast/source-properties/join.ts index dff2c9ffd..544c282c2 100644 --- a/packages/malloy/src/lang/ast/source-properties/join.ts +++ b/packages/malloy/src/lang/ast/source-properties/join.ts @@ -28,6 +28,8 @@ import type { MatrixOperation, SourceDef, AccessModifierLabel, + Expr, + FieldUsage, } from '../../../model/malloy_types'; import {isSourceDef, isJoinable} from '../../../model/malloy_types'; import type {DynamicSpace} from '../field-space/dynamic-space'; @@ -239,6 +241,92 @@ export class ExpressionJoin extends Join { } } +export class UsingJoin extends Join { + elementType = 'joinUsing'; + joinType: JoinType = 'one'; + matrixOperation: MatrixOperation = 'left'; + + constructor( + readonly name: ModelEntryReference, + readonly sourceExpr: SourceQueryElement, + readonly usingFields: string[] + ) { + super({name, sourceExpr}); + } + + fixupJoinOn(_outer: FieldSpace, inStruct: JoinFieldDef): void { + // Generate an onExpression equivalence internally so Malloy's semantic + // engine understands the lineage of the fields. + // e.g. for `USING (id)`, we do `base_table.id = joined_table.id`. + // We construct a boolean expression: `this.name.id = id` + // Since there can be multiple fields, it becomes `(this.name.id = id) AND ...` + if (this.usingFields.length === 0) { + return; + } + + const conditions: Expr[] = []; + const fieldUsage: FieldUsage = []; + for (const field of this.usingFields) { + const left: Expr = { + node: 'field', + path: [this.name.refString, field], + at: this.location, + }; + const right: Expr = { + node: 'field', + path: [field], + at: this.location, + }; + conditions.push({node: '=', kids: {left, right}}); + fieldUsage.push({path: [this.name.refString, field]}, {path: [field]}); + } + + let onExpr: Expr = conditions[0]; + for (let i = 1; i < conditions.length; i++) { + onExpr = {node: 'and', kids: {left: onExpr, right: conditions[i]}}; + } + + inStruct.onExpression = onExpr; + inStruct.refSummary = mergeRefSummaries(inStruct.refSummary, { + fieldUsage, + }); + } + + getStructDef(parameterSpace: ParameterSpace): JoinFieldDef { + const source = this.sourceExpr.getSource(); + if (!source) { + this.sourceExpr.sqLog( + 'invalid-join-source', + 'Cannot create a source to join from' + ); + return ErrorFactory.joinDef; + } + const sourceDef = source.getSourceDef(parameterSpace); + let matrixOperation: MatrixOperation = 'left'; + if (this.inExperiment('join_types', true)) { + matrixOperation = this.matrixOperation; + } + + if (!isJoinable(sourceDef)) { + throw this.internalError(`Can't join struct type ${sourceDef.type}`); + } + const joinStruct: JoinFieldDef = { + ...sourceDef, + name: this.name.refString, + join: this.joinType, + matrixOperation, + usingFields: this.usingFields, + location: this.location, + }; + delete joinStruct.as; + if (this.note) { + joinStruct.annotation = this.note; + } + this.document()?.rememberToAddModelAnnotations(joinStruct); + return joinStruct; + } +} + export class JoinStatement extends DefinitionList implements QueryPropertyInterface diff --git a/packages/malloy/src/lang/grammar/MalloyLexer.g4 b/packages/malloy/src/lang/grammar/MalloyLexer.g4 index 4befbd5a1..362f83aa9 100644 --- a/packages/malloy/src/lang/grammar/MalloyLexer.g4 +++ b/packages/malloy/src/lang/grammar/MalloyLexer.g4 @@ -161,6 +161,7 @@ WHEN: W H E N ; WITH: W I T H ; YEAR: Y E A R S?; UNGROUPED: U N G R O U P E D; +USING: U S I N G; VIRTUAL: V I R T U A L; fragment SQ: '\''; diff --git a/packages/malloy/src/lang/grammar/MalloyParser.g4 b/packages/malloy/src/lang/grammar/MalloyParser.g4 index e86e75ff5..339e645c4 100644 --- a/packages/malloy/src/lang/grammar/MalloyParser.g4 +++ b/packages/malloy/src/lang/grammar/MalloyParser.g4 @@ -430,6 +430,7 @@ joinFrom joinDef : tags joinFrom matrixOperation? WITH fieldExpr # joinWith | tags joinFrom (matrixOperation? ON joinExpression)? # joinOn + | tags joinFrom matrixOperation? USING OPAREN fieldNameList CPAREN # joinUsing ; joinExpression: fieldExpr; diff --git a/packages/malloy/src/lang/malloy-to-ast.ts b/packages/malloy/src/lang/malloy-to-ast.ts index 8eb27ad86..f4609c20f 100644 --- a/packages/malloy/src/lang/malloy-to-ast.ts +++ b/packages/malloy/src/lang/malloy-to-ast.ts @@ -922,6 +922,26 @@ export class MalloyToAST return this.astAt(join, pcx); } + visitJoinUsing(pcx: parse.JoinUsingContext): ast.Join { + const {joinAs, joinFrom, notes} = this.getJoinFrom(pcx.joinFrom()); + const usingFields = pcx.fieldNameList().fieldName().map(getId); + const join = new ast.UsingJoin(joinAs, joinFrom, usingFields); + + const mop = pcx.matrixOperation()?.text.toLocaleLowerCase() || 'left'; + if (isMatrixOperation(mop)) { + join.matrixOperation = mop; + } else { + this.contextError( + pcx, + 'unknown-matrix-operation', + 'Internal Error: Unknown matrixOperation' + ); + } + + join.extendNote({notes: this.getNotes(pcx.tags()).concat(notes)}); + return this.astAt(join, pcx); + } + getFieldDef( pcx: parse.FieldDefContext, makeFieldDef: ast.FieldDeclarationConstructor diff --git a/packages/malloy/src/lang/test/source.spec.ts b/packages/malloy/src/lang/test/source.spec.ts index 8e8505236..519af4cb7 100644 --- a/packages/malloy/src/lang/test/source.spec.ts +++ b/packages/malloy/src/lang/test/source.spec.ts @@ -165,6 +165,11 @@ describe('source:', () => { 'source: y is a extend { join_many: x is b on astr = x.astr }' ).toTranslate(); }); + test('one using', () => { + expect( + 'source: x is a extend { join_one: b using (astr) }' + ).toTranslate(); + }); test('cross', () => { expect('source: nab is a extend { join_cross: b }').toTranslate(); }); diff --git a/packages/malloy/src/model/expression_compiler.ts b/packages/malloy/src/model/expression_compiler.ts index 515d71067..554992067 100644 --- a/packages/malloy/src/model/expression_compiler.ts +++ b/packages/malloy/src/model/expression_compiler.ts @@ -821,16 +821,10 @@ export function generateFieldFragment( } // The normal case - just generate the SQL reference - return sqlFullChildReference( - fieldRef.parent, - fieldRef.fieldDef.name, - fieldRef.parent.structDef.type === 'record' - ? { - result: resultSet, - field: fieldRef, - } - : undefined - ); + return sqlFullChildReference(fieldRef.parent, fieldRef.fieldDef.name, { + result: resultSet, + field: fieldRef.parent.structDef.type === 'record' ? fieldRef : undefined, + }); } } diff --git a/packages/malloy/src/model/field_instance.ts b/packages/malloy/src/model/field_instance.ts index bce72aded..7e424841f 100644 --- a/packages/malloy/src/model/field_instance.ts +++ b/packages/malloy/src/model/field_instance.ts @@ -113,16 +113,10 @@ export class FieldInstanceField implements FieldInstance { ); } - return sqlFullChildReference( - this.f.parent, - this.f.fieldDef.name, - this.f.parent.structDef.type === 'record' - ? { - result: this.parent, - field: this.f, - } - : undefined - ); + return sqlFullChildReference(this.f.parent, this.f.fieldDef.name, { + result: this.parent, + field: this.f.parent.structDef.type === 'record' ? this.f : undefined, + }); } private generateDistinctKeyExpression(): string { @@ -684,10 +678,14 @@ export class FieldInstanceResultRoot extends FieldInstanceResult { export function sqlFullChildReference( struct: QueryStruct, name: string, - expand: {result: FieldInstanceResult; field: QueryField} | undefined + expand: {result?: FieldInstanceResult; field?: QueryField} | undefined ): string { let parentRef = struct.getSQLIdentifier(); - if (expand && isAtomic(struct.structDef) && hasExpression(struct.structDef)) { + if ( + expand?.field && + isAtomic(struct.structDef) && + hasExpression(struct.structDef) + ) { if (!struct.parent) { throw new Error(`Cannot expand reference to ${name} without parent`); } @@ -697,11 +695,37 @@ export function sqlFullChildReference( ); } parentRef = FieldInstanceField.exprCompiler( - expand.result, + expand.result!, struct.parent, struct.structDef.e ); } + + // When a SQL dialect processes a USING (col) join, it combines the join column from both tables + // into a single coalesced column. In many dialects (like BigQuery), referencing this coalesced + // column with a table alias (e.g. `base_table.col` or `joined_table.col`) will result in an error + // because the column is considered to belong to the join result itself, not the individual tables. + // Therefore, if this field is part of an active USING join and we are referencing it from one + // of the tables involved in that join, we must strip the table alias (parentRef) and emit the + // column name completely unqualified (e.g., just `col`). + if (expand?.result) { + const rootResult = expand.result.root(); + for (const join of rootResult.joins.values()) { + const def = join.queryStruct.structDef; + if (isJoined(def) && def.usingFields?.includes(name)) { + // Check if the current struct generating the reference is either the joined table itself, + // or the left-hand-side table of the join (the parent). If `join.parent` is undefined, + // it implies the left-hand-side is the base table of the query. + if ( + struct === join.queryStruct || + (join.parent ? struct === join.parent.queryStruct : true) + ) { + parentRef = ''; + break; + } + } + } + } let refType: FieldReferenceType = 'table'; if (struct.structDef.type === 'record') { refType = 'record'; @@ -715,5 +739,8 @@ export function sqlFullChildReference( } const child = struct.getChildByName(name); const childType = child?.fieldDef.type || 'unknown'; + if (parentRef === '') { + return struct.dialect.sqlQuoteIdentifier(name); + } return struct.dialect.sqlFieldReference(parentRef, refType, name, childType); } diff --git a/packages/malloy/src/model/malloy_types.ts b/packages/malloy/src/model/malloy_types.ts index 4c806d0ff..6f13f1beb 100644 --- a/packages/malloy/src/model/malloy_types.ts +++ b/packages/malloy/src/model/malloy_types.ts @@ -1124,6 +1124,7 @@ export interface JoinBase { join: JoinType; matrixOperation?: MatrixOperation; onExpression?: Expr; + usingFields?: string[]; refSummary?: RefSummary; accessModifier?: NonDefaultAccessModifierLabel | undefined; } diff --git a/packages/malloy/src/model/query_query.ts b/packages/malloy/src/model/query_query.ts index f5ec7a093..b661b8b90 100644 --- a/packages/malloy/src/model/query_query.ts +++ b/packages/malloy/src/model/query_query.ts @@ -977,7 +977,12 @@ export class QueryQuery extends QueryField { if (qs.parent === undefined) { throw new Error('Expected joined struct to have a parent.'); } - if (qsDef.onExpression) { + + let isUsing = false; + if (qsDef.usingFields && qsDef.usingFields.length > 0) { + isUsing = true; + onCondition = `USING (${qsDef.usingFields.join(', ')})`; + } else if (qsDef.onExpression) { // Create a temporary field instance to generate the SQL const boolField = new QueryFieldBoolean( { @@ -993,9 +998,9 @@ export class QueryQuery extends QueryField { this.rootResult, undefined ); - onCondition = tempInstance.generateExpression(); + onCondition = `ON ${tempInstance.generateExpression()}`; } else { - onCondition = '1=1'; + onCondition = 'ON 1=1'; } let filters = ''; let conditions: string[] | undefined = undefined; @@ -1031,9 +1036,15 @@ export class QueryQuery extends QueryField { // } if (conditions !== undefined && conditions.length >= 1) { - filters = ` AND (${conditions.join(' AND ')})`; + if (isUsing) { + // Cannot use AND with a USING clause. We must push the extra join conditions + // down into a subquery on the joined table. + structSQL = `(SELECT * FROM ${structSQL} WHERE ${conditions.join(' AND ')})`; + } else { + filters = ` AND (${conditions.join(' AND ')})`; + } } - s += ` ${matrixOperation} JOIN ${structSQL} AS ${ji.alias}\n ON ${onCondition}${filters}\n`; + s += ` ${matrixOperation} JOIN ${structSQL} AS ${ji.alias}\n ${onCondition}${filters}\n`; } else { let select = `SELECT ${ji.alias}.*`; let joins = ''; @@ -1049,7 +1060,7 @@ export class QueryQuery extends QueryField { }\n${joins}\nWHERE ${conditions?.join(' AND ')}\n`; s += `${matrixOperation} JOIN (\n${indent(select)}) AS ${ ji.alias - }\n ON ${onCondition}\n`; + }\n ${onCondition}\n`; return s; } } else if (qsDef.type === 'array') { @@ -1072,11 +1083,10 @@ export class QueryQuery extends QueryField { // a join at the top level, and the name will exist. // ... not sure this is the right way to do this // ... the test for this is called "source repeated record containing an array" - arrayExpression = sqlFullChildReference( - qs.parent, - qsDef.name, - depth === 0 ? {result: this.rootResult, field: this} : undefined - ); + arrayExpression = sqlFullChildReference(qs.parent, qsDef.name, { + result: this.rootResult, + field: depth === 0 ? this : undefined, + }); } // we need to generate primary key. If parent has a primary key combine // console.log(ji.alias, fieldExpression, this.inNestedPipeline()); diff --git a/test/src/databases/all/join.spec.ts b/test/src/databases/all/join.spec.ts index 625a4e75b..735c01bc2 100644 --- a/test/src/databases/all/join.spec.ts +++ b/test/src/databases/all/join.spec.ts @@ -88,6 +88,46 @@ describe.each(runtimes.runtimeList)('%s', (databaseName, runtime) => { `).toMatchResult(joinModel, {model_count: 1416}); }); + it('model source refine in query join using', async () => { + // Tests the new USING syntax and ensures coalesced columns work without table prefix errors + await expect(` + run: aircraft extend { + join_one: aircraft_models using (aircraft_model_code) + } -> { + aggregate: + aircraft_count + aircraft_models.model_count + } + `).toMatchResult(joinModel, {model_count: 1416}); + }); + + it('model source refine join using', async () => { + await expect(` + source: my_aircraft is aircraft extend { + join_one: aircraft_models using (aircraft_model_code) + } + run: my_aircraft -> { + aggregate: + aircraft_count + aircraft_models.model_count + } + `).toMatchResult(joinModel, {model_count: 1416}); + }); + + it('join with multiple using fields', async () => { + // Tests USING syntax with multiple columns + await expect(` + source: table1 is ${databaseName}.sql("SELECT 1 as id, 'a' as code, 10 as val1") + source: table2 is ${databaseName}.sql("SELECT 1 as id, 'a' as code, 20 as val2") + + run: table1 extend { + join_one: table2 using (id, code) + } -> { + select: id, code, val1, table2.val2 + } + `).toMatchResult(testModel, {id: 1, code: 'a', val1: 10, val2: 20}); + }); + it('model: join fact table query', async () => { await expect(` run: aircraft_models extend {