From 1c49fa0eead9d9c7c0ef2ef72d6cc03a656c8d69 Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Thu, 1 May 2025 13:27:16 -0600 Subject: [PATCH 1/5] things are *mostly* working, need a lot of cleanup --- CLAUDE.md | 4 +- package.json | 1 + packages/lib/src/compiler.ts | 980 ++++++++++++++---- packages/lib/src/parser.ts | 5 +- .../array-access.integration.test.ts | 318 +++++- 5 files changed, 1127 insertions(+), 181 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5f8d821..e003121 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -9,7 +9,7 @@ QueryLeaf is a SQL to MongoDB compiler / translator. - Format: `yarn format` (check: `yarn format:check`) - Run all tests: `yarn test` - Run individual package tests: `yarn test:lib`, `yarn test:cli`, `yarn test:server`, `yarn test:pg-server` -- Run single test: `cd packages/[package] && npx jest -t "test name"` or `npx jest path/to/test.test.ts -t "test name"` +- Run single test: `cd packages/[package] && yarn yarn -t "test name"` or `yarn jest path/to/test.test.ts -t "test name"` - Integration tests: `yarn test:lib:integration` (requires Docker) - Documentation: `yarn docs:serve` (dev), `yarn docs:build` (build) @@ -24,4 +24,4 @@ QueryLeaf is a SQL to MongoDB compiler / translator. - Error handling with proper try/catch blocks and meaningful error messages - Use async/await for asynchronous code - Follow existing patterns for similar functionality -- Tests should cover both unit and integration cases \ No newline at end of file +- Tests should cover both unit and integration cases diff --git a/package.json b/package.json index 63f1b64..db7a3e1 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "test:unit": "bin/run-all test:unit", "test:integration": "bin/run-all test:integration", "test:lib": "yarn workspace @queryleaf/lib test", + "test:lib:integration": "yarn workspace @queryleaf/lib test:integration", "test:cli": "yarn workspace @queryleaf/cli test", "test:server": "yarn workspace @queryleaf/server test", "test:pg-server": "yarn workspace @queryleaf/postgres-server test", diff --git a/packages/lib/src/compiler.ts b/packages/lib/src/compiler.ts index 34255ed..a8d7a8f 100644 --- a/packages/lib/src/compiler.ts +++ b/packages/lib/src/compiler.ts @@ -249,9 +249,12 @@ export class SqlCompilerImpl implements SqlCompiler { log(`Processing field path for projection: ${fieldPath}`); + // Process and normalize the field path for array access + const { normalizedPath, hasArrayAccess, outputFieldName } = this.normalizeFieldPath(fieldPath); + // If the field path contains a period, check if it's a table alias reference or a nested field - if (fieldPath.includes('.')) { - const parts = fieldPath.split('.'); + if (normalizedPath.includes('.')) { + const parts = normalizedPath.split('.'); const prefix = parts[0]; // Check if the prefix is a table alias that we identified in the FROM clause @@ -260,24 +263,34 @@ export class SqlCompilerImpl implements SqlCompiler { const actualField = parts.slice(1).join('.'); log(`Identified table alias in projection: ${prefix} -> ${actualField}`); - // Add to projection with just the field name - no $first for regular fields - projection[actualField] = 1; + // For table aliases with array access, we need to do special handling + if (hasArrayAccess) { + // Build an array access expression for the aliased field + this.buildArrayAccessProjection(projection, actualField, outputFieldName); + } else { + // Regular aliased field - no array access + projection[actualField] = 1; + } } else { // This is a nested field - // For nested fields, create a name with underscores instead of dots - const fieldNameWithUnderscores = fieldPath.replace(/\./g, '_'); - - // Add to projection with the path-based name - use $ syntax for field reference - projection[fieldNameWithUnderscores] = `$${fieldPath}`; - log(`Added nested field to projection: ${fieldNameWithUnderscores} = $${fieldPath}`); + + // Check if we need to use array operators + if (hasArrayAccess) { + // Build a complex array access projection using the dot notation path + this.buildArrayAccessProjection(projection, normalizedPath, outputFieldName); + } else { + // Standard nested field without array access + projection[outputFieldName] = `$${normalizedPath}`; + log(`$Added nested field to projection: ${outputFieldName} = $${normalizedPath}`); + } // Also include the last part as a fallback const lastPart = parts[parts.length - 1]; - projection[lastPart] = 1; + projection[lastPart] = projection[outputFieldName]; } } else { // Regular field - projection[fieldPath] = 1; + projection[normalizedPath] = 1; } } @@ -363,9 +376,11 @@ export class SqlCompilerImpl implements SqlCompiler { col === '*' || (typeof col === 'object' && col.expr && col.expr.type === 'star') ); + log(`CURRENT TABLE ALIASES: `, JSON.stringify(this.currentTableAliases, null, 2)) // Process explicit columns first if (ast.columns && !hasStar) { for (const column of ast.columns) { + log(`Processing explicit column: `, JSON.stringify(column, null, 2)) if ( typeof column === 'object' && column.expr && @@ -376,6 +391,7 @@ export class SqlCompilerImpl implements SqlCompiler { const field = column.expr.column; // Get the output field name (use alias if provided) + // HEY CLAUDE. THIS IS WHERE WE NEED TO PROPERLY UNWIND THE OUTPUT NAME THE SAME WAY WE DID IN SELECTS const outputName = column.as || field; // If this field is from the joined table that we're currently processing @@ -388,6 +404,7 @@ export class SqlCompilerImpl implements SqlCompiler { } } + log(`HAS STAR FOR ${lookup.as}: ${hasStar}, joinFieldMapping: ${JSON.stringify(joinFieldMapping, null, 2)}`) // If we have a SELECT * or no explicit joined fields were found, // we need to promote ALL fields from the joined collection if (hasStar || Object.keys(joinFieldMapping).length === 0) { @@ -409,22 +426,6 @@ export class SqlCompilerImpl implements SqlCompiler { }, }); log(`Added $project to remove original nested object ${lookup.as}`); - } else { - // For explicit field selection, add fields stage to bring joined fields up to top level - aggregateCommand.pipeline.push({ - $addFields: joinFieldMapping, - }); - log( - `Added $addFields stage for explicit joined fields: ${JSON.stringify(joinFieldMapping, null, 2)}` - ); - - // Then exclude the nested joined document to prevent duplication - aggregateCommand.pipeline.push({ - $project: { - [lookup.as]: 0, - }, - }); - log(`Added $project stage to exclude nested joined document: ${lookup.as}`); } }); } @@ -454,6 +455,8 @@ export class SqlCompilerImpl implements SqlCompiler { // Add projection for SELECT columns if (ast.columns) { const projection: Record = {}; + // Track array access fields that need special handling + const arrayAccessFields: {field: string, path: string, index: number, subField?: string}[] = []; // For JOIN queries, we need to handle nested paths differently const isJoinQuery = ast.from && ast.from.length > 1; @@ -491,7 +494,15 @@ export class SqlCompilerImpl implements SqlCompiler { // If column has an alias, use it for projection log(`Found column alias: ${column.as} for field: ${fieldPath}`); - if (fieldPath.includes('.')) { + // First process the field path to handle array indexing notation + // This transforms items__ARRAY_0__name => items.0.name for MongoDB dot notation + const processedPath = this.processFieldName(fieldPath); + log(`$Processed aliased field path with array notation: ${fieldPath} -> ${processedPath}`); + + // Check if this field path contains array access notation + const arrayInfo = this.getArrayAccessInfo(processedPath); + + if (processedPath.includes('.')) { // If it's a table alias reference, extract actual field const parts = fieldPath.split('.'); const prefix = parts[0]; @@ -514,24 +525,172 @@ export class SqlCompilerImpl implements SqlCompiler { `Added aliased field to projection: ${column.as} = $${actualField}, including ${actualField}` ); } + } else if (arrayInfo.hasArrayAccess) { + // This is an array access notation + const firstArrayIndex = arrayInfo.arrayIndices[0]; + const indexValue = parseInt(parts[firstArrayIndex]); + + if (firstArrayIndex === 1) { + // Simple case: array is the first level, like actors.0.name + const arrayField = parts[0]; + const subPath = parts.slice(2).join('.'); + + // Add to list of array fields that need special handling + // TODO (@day): this might need some changes + arrayAccessFields.push({ + field: column.as, + path: processedPath, + index: indexValue, + subField: subPath || undefined + }); + + // Also add it to the projection so it's included + projection[column.as] = { + $getField: { + field: subPath || 'value', // Fallback to extract the whole value + input: { + $arrayElemAt: [`$${arrayField}`, indexValue] + } + } + }; + + log(`$Added array access field to alias: ${column.as} using $arrayElemAt operator`); + } else { + // More complex nested array case - use dot notation as fallback + projection[column.as] = `$${processedPath}`; + log(`$Added complex nested array field to alias: ${column.as} = $${processedPath}`); + } } else { // Nested field with alias - projection[column.as] = `$${fieldPath}`; - log(`Added aliased nested field to projection: ${column.as} = $${fieldPath}`); + projection[column.as] = `$${processedPath}`; + log(`$Added aliased nested field to projection: ${column.as} = $${processedPath}`); } } else { // Regular field with alias - projection[column.as] = `$${fieldPath}`; - log(`Added aliased field to projection: ${column.as} = $${fieldPath}`); + projection[column.as] = `$${processedPath}`; + log(`$Added aliased field to projection: ${column.as} = $${processedPath}`); } } else { // No alias, use standard projection - this.addFieldToProjection(projection, fieldPath); + // Check if this is an array access field without alias (like actors[0].name) + const processedPath = this.processFieldName(fieldPath); + const arrayInfo = this.getArrayAccessInfo(processedPath); + + if (arrayInfo.hasArrayAccess) { + // This is array access without alias + const parts = processedPath.split('.'); + const arrayField = parts[0]; + const indexValue = parseInt(parts[1]); + const subField = parts.slice(2).join('.'); + + // If this is a nested field in an array element, extract just the property name + const outputField = subField || parts[parts.length - 1]; + + // Add to list of array fields that need special handling + arrayAccessFields.push({ + field: outputField, + path: processedPath, + index: indexValue, + subField: subField || undefined + }); + + // Add to projection so it's included + projection[outputField] = { + $getField: { + field: subField || 'value', + input: { + $arrayElemAt: [`$${arrayField}`, indexValue] + } + } + }; + + log(`$Added array access field without alias: ${outputField} using path ${processedPath}`); + } else { + // Regular field without array access + this.addFieldToProjection(projection, fieldPath); + } } } - + + // Special handling for array access fields - we need to create field extraction + // expressions to flatten nested array elements to root level + if (arrayAccessFields.length > 0 && !isJoinQuery) { + const arrayFieldsProject: Record = {}; + + // Process each array access field + arrayAccessFields.forEach(({field, path, index, subField}) => { + const parts = path.split('.'); + const arrayField = parts[0]; + + // Extract specific fields from array elements + if (subField) { + // Check if the subField contains multiple nested levels or array indices + if (subField.includes('.')) { + // This is a complex nested path like addresses[0].details.street + // or addresses[0].details.pastAddresses[0].street + + // Start with the array element + let currentExpr: any = { + $arrayElemAt: [`$${arrayField}`, index] + }; + + // Build an expression dynamically based on the subField components + // First, normalize the path in case it contains any array indices + const normalizedSubField = this.processFieldName(subField); + const subParts = normalizedSubField.split('.'); + + // Process each part of the path + for (let i = 0; i < subParts.length; i++) { + const part = subParts[i]; + + // Check if this is a numeric array index + if (/^\d+$/.test(part)) { + // This part is a numeric array index, use $arrayElemAt + currentExpr = { + $arrayElemAt: [currentExpr, parseInt(part)] + }; + } else { + // This is a field name, use $getField + currentExpr = { + $getField: { + field: part, + input: currentExpr + } + }; + } + } + + // Set the final expression + arrayFieldsProject[field] = currentExpr; + log(`$Added complex nested field/array access for ${field}: ${path}.${subField}`); + } else { + // Simple case - just one level of nesting + // Use $getField to extract nested field from array element + arrayFieldsProject[field] = { + $getField: { + field: subField, + input: { + $arrayElemAt: [`$${arrayField}`, index] + } + } + }; + } + } else { + // Just extract the whole array element + arrayFieldsProject[field] = { + $arrayElemAt: [`$${arrayField}`, index] + }; + } + }); + + // Add the array fields as an $addFields stage instead of $project + // This preserves all the original fields while adding the array access fields + aggregateCommand.pipeline.push({ $addFields: arrayFieldsProject }); + log(`$Added array access fields using $addFields: ${JSON.stringify(arrayFieldsProject, null, 2)}`); + } + // For JOIN queries, we need a special handling - if (isJoinQuery) { + else if (isJoinQuery) { // Add detailed debugging for JOIN queries log('================ JOIN QUERY DEBUG ================'); log('JOIN query columns:', JSON.stringify(ast.columns, null, 2)); @@ -563,12 +722,37 @@ export class SqlCompilerImpl implements SqlCompiler { if (isMainTable) { // Fields from the main table can be accessed directly - renamedFieldsProject[outputName] = `$${field}`; - log(`Main table field mapping: ${outputName} = $${field}`); + // Handle field names with table prefixes in the field itself (e.g., m.title in field) + if (field.includes('.')) { + const fieldParts = field.split('.'); + const actualField = fieldParts[fieldParts.length - 1]; + renamedFieldsProject[outputName] = `$${fieldParts[1]}`; // Use main table field + log(`$Main table field with prefix: ${field} -> ${outputName} = $${fieldParts[1]}`); + } else { + // Simple field from main table + renamedFieldsProject[outputName] = `$${field}`; + log(`$Main table field mapping: ${outputName} = $${field}`); + } + + // If the output name has a table prefix and no alias was explicitly provided, + // we also add a version without the prefix for compatibility + if (outputName.includes('.') && !column.as) { + const outParts = outputName.split('.'); + const cleanName = outParts[outParts.length - 1]; + renamedFieldsProject[cleanName] = `$${field}`; + log(`$Added clean field name for compatibility: ${cleanName} = $${field}`); + } } else { // Fields from joined tables need the alias prefix - renamedFieldsProject[outputName] = `$${table}.${field}`; - log(`Joined table field mapping: ${outputName} = $${table}.${field}`); + if (field.includes('.')) { + const fieldParts = field.split('.'); + const actualField = fieldParts[fieldParts.length - 1]; + renamedFieldsProject[outputName] = `$${table}.${actualField}`; + log(`$Joined table field with prefix: ${field} -> ${outputName} = $${table}.${actualField}`); + } else { + renamedFieldsProject[outputName] = `$${table}.${field}`; + log(`$Joined table field mapping: ${outputName} = $${table}.${field}`); + } } } else { // Not a recognized alias, but still has a table prefix @@ -658,14 +842,27 @@ export class SqlCompilerImpl implements SqlCompiler { // For each column in the query for (const column of ast.columns) { if (typeof column === 'object' && column.expr) { - const table = column.expr.table; - const field = column.expr.column; + let table = column.expr.table; + let field = column.expr.column; + + // Special handling for array access notation without table reference + // When we have something like scenes[0].name directly, we need to treat it as a field on the main table + if (!table && field && field.includes('__ARRAY_')) { + // Assume it belongs to the main table + table = ast.from[0].as; + log(`$Processing array access field without table reference: ${field}, assigning to main table: ${table}`); + } log(`Processing JOIN column: table=${table}, field=${field}`); - if (table && field && this.currentTableAliases.has(table)) { - // Output field name (possibly aliased) - const outputField = column.as || field; + if ((table && field && this.currentTableAliases.has(table)) || + // Also handle fields without table references as belonging to the main table + (!table && field)) { + + const outputField = this.extractOutputField(field, column.as); + const processedField = this.processFieldName(field); + + log(`$Output field name: ${outputField} from ${field} (processed: ${processedField})`); // Create a path to the field, which could be in the root doc or nested // in a joined doc (like "o.product") @@ -674,14 +871,12 @@ export class SqlCompilerImpl implements SqlCompiler { let sourcePath; if (table === ast.from[0].as) { - // Field from main table can be accessed directly - sourcePath = `$${field}`; - log(`Main table field path: $${field}`); + const fieldWithoutTablePrefix = processedField.replace(`${table}.`, ''); + sourcePath = `$${fieldWithoutTablePrefix}`; + log(`$Main table field path: $${fieldWithoutTablePrefix}`); } else { - // Field from joined table (came from $lookup and $unwind) - // MongoDB dot notation for accessing nested document fields - sourcePath = `$${table}.${field}`; - log(`Joined table field path: $${table}.${field}`); + sourcePath = `$${table}.${processedField}`; + log(`$Joined table field path: ${sourcePath}`); } // Add this field mapping @@ -713,83 +908,47 @@ export class SqlCompilerImpl implements SqlCompiler { // Add the $addFields stage aggregateCommand.pipeline.push({ $addFields: addFieldsStage }); - // Direct fix for the product/price field coming from a JOIN - // We'll extract the exact fields we need from the joined document - const joinFieldMapping: Record = {}; - - // Loop through columns to specifically handle JOIN fields - ast.columns.forEach((column: any) => { - if ( - typeof column === 'object' && - column.expr && - column.expr.table && - column.expr.column - ) { - const table = column.expr.table; - const field = column.expr.column; - - // Only process fields from joined tables (not from main table) - if (this.currentTableAliases.has(table) && table !== ast.from[0].as) { - // This is a field coming from a joined table - const outputName = column.as || field; - - // Map it directly from the nested document to the top level - joinFieldMapping[outputName] = `$${table}.${field}`; - log(`Mapping joined field to top level: ${outputName} = $${table}.${field}`); - } - } - }); - - // Add the $addFields stage to bring JOIN fields to top level - if (Object.keys(joinFieldMapping).length > 0) { - log( - 'Adding $addFields stage for JOIN field mapping:', - JSON.stringify(joinFieldMapping, null, 2) - ); - aggregateCommand.pipeline.push({ - $addFields: joinFieldMapping, - }); - } - // Now we need to exclude the joined table objects since their fields are flattened // This makes the output match what SQL would normally return - const excludeJoinedDocs: Record = {}; + const includeFields: Record = {}; + const outputFields = Object.keys(addFieldsStage); // First, indicate that we want to keep everything - excludeJoinedDocs['_id'] = 1; - - // Set merged fields to be kept - for (const [field, _] of Object.entries(joinFieldMapping)) { - excludeJoinedDocs[field] = 1; - } - - // Include all base table fields (they're already at the root level) - for (const column of ast.columns) { - if (typeof column === 'object' && column.expr) { - const table = column.expr.table; - const field = column.expr.column; + includeFields['_id'] = 1; - if (table === ast.from[0].as || !table) { - // Main table field or direct field reference - const outputName = column.as || field; - excludeJoinedDocs[outputName] = 1; - } - } + for (const field of outputFields) { + log(`FIELD: `, field); + includeFields[field] = 1 } + // First add a projection to include only the fields we want + log( + 'Adding $project stage to include our fields:', + JSON.stringify(includeFields, null, 2) + ); + aggregateCommand.pipeline.push({ $project: includeFields }); + + // Then add a separate projection to exclude nested documents + // MongoDB doesn't allow mixing inclusion and exclusion in the same projection + const excludeJoinedDocsOnly: Record = {}; + // Now specifically exclude the nested documents to prevent duplication for (const fromItem of ast.from) { if (fromItem.as && fromItem.as !== ast.from[0].as) { // Exclude the joined document fields that were flattened - excludeJoinedDocs[fromItem.as] = 0; + excludeJoinedDocsOnly[fromItem.as] = 0; } } log( 'Adding $project stage to exclude nested docs:', - JSON.stringify(excludeJoinedDocs, null, 2) + JSON.stringify(excludeJoinedDocsOnly, null, 2) ); - aggregateCommand.pipeline.push({ $project: excludeJoinedDocs }); + + // Only add the exclusion stage if we have fields to exclude + if (Object.keys(excludeJoinedDocsOnly).length > 0) { + aggregateCommand.pipeline.push({ $project: excludeJoinedDocsOnly }); + } // After adding, print the full pipeline log( @@ -801,8 +960,8 @@ export class SqlCompilerImpl implements SqlCompiler { } } } - // For non-JOIN queries, use the standard projection - else if (Object.keys(projection).length > 0) { + // For non-JOIN queries with array access, we already added the projection stage earlier + else if (Object.keys(projection).length > 0 && arrayAccessFields.length === 0) { log('Standard projection stage:', JSON.stringify(projection, null, 2)); aggregateCommand.pipeline.push({ $project: projection }); } @@ -816,9 +975,14 @@ export class SqlCompilerImpl implements SqlCompiler { type: 'FIND', collection, filter: ast.where ? this.convertWhere(ast.where) : undefined, - projection: ast.columns ? this.convertColumns(ast.columns) : undefined, }; + // Set up projection + if (ast.columns) { + const projection = this.convertColumns(ast.columns); + findCommand.projection = projection; + } + // Handle LIMIT and OFFSET const { limit, skip } = this.extractLimitOffset(ast); if (limit !== undefined) findCommand.limit = limit; @@ -973,11 +1137,17 @@ export class SqlCompilerImpl implements SqlCompiler { // Process the field name to handle nested fields with dot notation fieldName = this.processFieldName(setItem.column); } - + + // After field name processing, check if it contains array access notation + const processedFieldName = this.processFieldName(fieldName); + const arrayInfo = this.getArrayAccessInfo(processedFieldName); + log( - `Setting UPDATE field: ${fieldName} = ${JSON.stringify(this.convertValue(setItem.value))}` + `$Setting UPDATE field: ${processedFieldName} = ${JSON.stringify(this.convertValue(setItem.value))}` ); - update[fieldName] = this.convertValue(setItem.value); + + // Use the processed field name with proper array indexing + update[processedFieldName] = this.convertValue(setItem.value); } }); @@ -1049,10 +1219,10 @@ export class SqlCompilerImpl implements SqlCompiler { /** * Extract table name from FROM clause */ - private extractTableName(from: From | Dual): string { + private extractTableName(from: From): string { if (typeof from === 'string') { return from; - } else if (this.isFromType(from) && from.table) { + } else if (from.table) { return from.table; } throw new Error('Invalid FROM clause'); @@ -1281,6 +1451,8 @@ export class SqlCompilerImpl implements SqlCompiler { */ private convertColumns(columns: any[]): Record { const projection: Record = {}; + // Track parent fields to avoid path collisions + const parentFields = new Set(); log('Converting columns to projection:', JSON.stringify(columns, null, 2)); @@ -1297,6 +1469,8 @@ export class SqlCompilerImpl implements SqlCompiler { return {}; } + // First pass - process all fields + const fieldsToProject: string[] = []; columns.forEach((column) => { if (typeof column === 'object') { if ('expr' in column && column.expr) { @@ -1306,20 +1480,11 @@ export class SqlCompilerImpl implements SqlCompiler { let fieldName; if (column.expr.table && column.expr.column) { fieldName = `${column.expr.table}.${column.expr.column}`; - log(`Using table-prefixed field in projection: ${fieldName}`); + log(`$Using table-prefixed field in projection: ${fieldName}`); } else { fieldName = this.processFieldName(column.expr.column); } - - const outputField = column.as || fieldName; - // For find queries, MongoDB projection uses 1 - projection[fieldName] = 1; - - // For nested fields, also include the parent field - if (fieldName.includes('.')) { - const parentField = fieldName.split('.')[0]; - projection[parentField] = 1; - } + fieldsToProject.push(fieldName); } else if (column.expr.type === 'column_ref' && column.expr.column) { // Handle column_ref with possible table let fieldName; @@ -1329,16 +1494,7 @@ export class SqlCompilerImpl implements SqlCompiler { } else { fieldName = this.processFieldName(column.expr.column); } - - const outputField = column.as || fieldName; - // For find queries, MongoDB projection uses 1 - projection[fieldName] = 1; - - // For nested fields, also include the parent field - if (fieldName.includes('.')) { - const parentField = fieldName.split('.')[0]; - projection[parentField] = 1; - } + fieldsToProject.push(fieldName); } else if ( column.expr.type === 'binary_expr' && column.expr.operator === '.' && @@ -1357,13 +1513,7 @@ export class SqlCompilerImpl implements SqlCompiler { } if (fieldName && column.expr.right.column) { fieldName += '.' + column.expr.right.column; - const outputField = column.as || fieldName; - // For find queries, MongoDB projection uses 1 - projection[fieldName] = 1; - - // Also include the parent field - const parentField = fieldName.split('.')[0]; - projection[parentField] = 1; + fieldsToProject.push(fieldName); } } } else if ('type' in column && column.type === 'column_ref' && column.column) { @@ -1375,16 +1525,7 @@ export class SqlCompilerImpl implements SqlCompiler { } else { fieldName = this.processFieldName(column.column); } - - const outputField = column.as || fieldName; - // For find queries, MongoDB projection uses 1 - projection[fieldName] = 1; - - // For nested fields, also include the parent field - if (fieldName.includes('.')) { - const parentField = fieldName.split('.')[0]; - projection[parentField] = 1; - } + fieldsToProject.push(fieldName); } else if ('column' in column) { // Handle direct column with possible table let fieldName; @@ -1394,29 +1535,124 @@ export class SqlCompilerImpl implements SqlCompiler { } else { fieldName = this.processFieldName(column.column); } - - const outputField = column.as || fieldName; - // For find queries, MongoDB projection uses 1 - projection[fieldName] = 1; - - // For nested fields, also include the parent field - if (fieldName.includes('.')) { - const parentField = fieldName.split('.')[0]; - projection[parentField] = 1; - } + fieldsToProject.push(fieldName); } } else if (typeof column === 'string') { const fieldName = this.processFieldName(column); - // For find queries, MongoDB projection uses 1 - projection[fieldName] = 1; + fieldsToProject.push(fieldName); + } + }); - // For nested fields, also include the parent field - if (fieldName.includes('.')) { - const parentField = fieldName.split('.')[0]; - projection[parentField] = 1; + // Handle array access fields - since MongoDB 4.4 doesn't allow including both a field and its subfields, + // we'll detect array paths and handle them specially + + // Track array access fields for special handling + const arrayAccessInfoMap = new Map(); + + fieldsToProject.forEach(fieldName => { + // Check for array access in the field path + const arrayInfo = this.getArrayAccessInfo(fieldName); + + if (arrayInfo.hasArrayAccess) { + // This is a field with array access like actors.0.name + log(`$Found array access in field: ${fieldName}`); + + // We need to use $slice and field projection for arrays to work around path collision issues + const parts = fieldName.split('.'); + const baseField = parts[0]; // e.g., "actors" + + if (parts.length >= 3 && /^\d+$/.test(parts[1])) { + // This is a path like "actors.0.name" - extract the parts: + const arrayField = parts[0]; // "actors" + const indexValue = parseInt(parts[1]); // 0 + const subField = parts.slice(2).join('.'); // "name" + + // Setting specific array element field without including the whole array + // If we need name & role from actors[0], this ensures we get both without + // path collision issues + projection[fieldName] = 1; + + // Store info for field name flattening + const outputField = subField || parts[parts.length - 1]; + arrayAccessInfoMap.set(outputField, { + path: fieldName, + fieldName: outputField, + arrayField, + index: indexValue, + subField: subField || undefined + }); + + // Mark this field as having been projected to avoid collisions + parentFields.add(baseField); + } else if (parts.length === 2 && /^\d+$/.test(parts[1])) { + // This is a path like "actors.0" - just projecting a specific array element + projection[fieldName] = 1; + + // Store info for field name flattening + arrayAccessInfoMap.set(parts[1], { + path: fieldName, + fieldName: parts[1], + arrayField: parts[0], + index: parseInt(parts[1]) + }); + + parentFields.add(baseField); } + } else if (fieldName.includes('.')) { + // Regular nested field + // Check if any parent has already been included + const parts = fieldName.split('.'); + const baseField = parts[0]; + + if (!parentFields.has(baseField)) { + projection[fieldName] = 1; + } else { + // Parent field already included, skip this to avoid path collision + log(`$Skipping field ${fieldName} to avoid path collision with parent ${baseField}`); + } + } else { + // Regular top-level field + projection[fieldName] = 1; } }); + + // For array access fields, we need to use MongoDB's aggregation operators directly in the projection + if (arrayAccessInfoMap.size > 0) { + log(`$Adding MongoDB operators for array access fields: ${JSON.stringify(Array.from(arrayAccessInfoMap.entries()))}`); + + // Add MongoDB's field extraction operators for array access fields + for (const [fieldName, info] of arrayAccessInfoMap.entries()) { + const { arrayField, index, subField } = info; + + // MongoDB's projection can use aggregation operators + if (subField) { + // Use $getField to extract nested field from array element + projection[fieldName] = { + $getField: { + field: subField, + input: { + $arrayElemAt: [`$${arrayField}`, index] + } + } + }; + } else { + // Extract the whole array element + projection[fieldName] = { $arrayElemAt: [`$${arrayField}`, index] }; + } + + // Remove the original dot notation field if it was added + const dotPath = `${arrayField}.${index}${subField ? '.' + subField : ''}`; + if (projection[dotPath]) { + delete projection[dotPath]; + } + } + } log('Final projection:', JSON.stringify(projection, null, 2)); @@ -1459,6 +1695,369 @@ export class SqlCompilerImpl implements SqlCompiler { return processed; } + + /** + * Normalizes a field path by handling various array notation formats + * and returns information for further processing + */ + private normalizeFieldPath(fieldPath: string): { + normalizedPath: string; + hasArrayAccess: boolean; + outputFieldName: string; + arrayIndices: number[]; + } { + // First, process SQL-style array syntax (items__ARRAY_0__name) to MongoDB dot notation + const processedPath = this.processFieldName(fieldPath); + + // Convert underscore-number patterns to standard dot notation (addresses_0 -> addresses.0) + const underscoreArrayPattern = /(\w+)_(\d+)/g; + const normalizedPath = processedPath.replace(underscoreArrayPattern, '$1.$2'); + + if (normalizedPath !== processedPath) { + log(`$Converted underscore array path to dot notation: ${processedPath} -> ${normalizedPath}`); + } + + // Identify array indices + const parts = normalizedPath.split('.'); + const arrayIndices: number[] = []; + + parts.forEach((part, index) => { + if (/^\d+$/.test(part)) { + arrayIndices.push(index); + } + }); + + // Create a standardized output field name + // For array access and nested fields, we use underscores in the output field name + const outputFieldName = normalizedPath.includes('.') + ? normalizedPath.replace(/\./g, '_') + : normalizedPath; + + return { + normalizedPath, + hasArrayAccess: arrayIndices.length > 0, + outputFieldName, + arrayIndices + }; + } + + /** + * Get array access information from a normalized path + * @deprecated Use normalizeFieldPath instead + */ + private getArrayAccessInfo(fieldPath: string): { + hasArrayAccess: boolean; + arrayIndices: number[]; + parts: string[]; + } { + // For backward compatibility, we maintain this method but implement using normalizeFieldPath + const { hasArrayAccess, arrayIndices } = this.normalizeFieldPath(fieldPath); + const parts = fieldPath.split('.'); + + return { + hasArrayAccess, + arrayIndices, + parts + }; + } + + /** + * Builds a MongoDB projection expression for array access + * Handles simple and complex nested array access patterns + */ + private buildArrayAccessProjection( + projection: Record, + fieldPath: string, + outputFieldName: string + ): void { + // Check if the path has an underscore pattern like "addresses_0" that needs to be handled + // as array access instead of using the dot notation + const underscoreArrayPattern = /(\w+)_(\d+)/g; + let arrayPath = fieldPath; + + // If the path contains underscores followed by numbers, we need to + // handle it using $arrayElemAt for proper MongoDB array access + // Example: addresses_0.details.street => addresses.0.details.street + if (arrayPath.match(underscoreArrayPattern)) { + log(`$Handling underscore-based array notation: ${arrayPath}`); + + // First, normalize the path by replacing all underscore-number combinations with dot notation + arrayPath = arrayPath.replace(underscoreArrayPattern, '$1.$2'); + log(`$Normalized underscore array path: ${fieldPath} -> ${arrayPath}`); + + // Now process the normalized path + const parts = arrayPath.split('.'); + let currentExpr: any = null; + + // Start with the base object + let currentObj = `$${parts[0]}`; + let startIndex = 1; + + // Build a chain of $arrayElemAt and $getField operations + for (let i = startIndex; i < parts.length; i++) { + const part = parts[i]; + + if (/^\d+$/.test(part)) { + // This is a numeric index, use $arrayElemAt + const indexValue = parseInt(part); + + if (currentExpr === null) { + // This is the first operation in the chain + currentExpr = { + $arrayElemAt: [currentObj, indexValue] + }; + } else { + // Nest this operation in the previous one + currentExpr = { + $arrayElemAt: [currentExpr, indexValue] + }; + } + + log(`$Added array access at index ${indexValue}`); + } else { + // This is a field name, use $getField + if (currentExpr === null) { + // If we haven't created any expression yet, use direct path + if (i === 1) { + // We're at the first operation and it's a field + currentObj = `$${parts[0]}.${part}`; + } else { + // We need to build a getField expression + currentExpr = { + $getField: { + field: part, + input: currentObj + } + }; + } + } else { + // Nest this field access in the previous operation + currentExpr = { + $getField: { + field: part, + input: currentExpr + } + }; + } + + log(`$Added field access for ${part}`); + } + } + + // Set the final expression in the projection + if (currentExpr !== null) { + projection[outputFieldName] = currentExpr; + } else { + // If we didn't build an expression, use the path directly + projection[outputFieldName] = currentObj; + } + + log(`$Final array access expression for ${outputFieldName}: ${JSON.stringify(projection[outputFieldName], null, 2)}`); + return; + } + + // Continue with standard dot-notation array processing + const parts = fieldPath.split('.'); + const arrayInfo = this.getArrayAccessInfo(fieldPath); + + if (!arrayInfo.hasArrayAccess) { + // Not an array access field, use standard projection + projection[outputFieldName] = `$${fieldPath}`; + log(`$Added standard field to projection: ${outputFieldName} = $${fieldPath}`); + return; + } + + // Get the first array index position + const firstArrayIndex = arrayInfo.arrayIndices[0]; + const indexValue = parseInt(parts[firstArrayIndex]); + + // Simple case: array at the first level with potential nested fields + if (firstArrayIndex === 0) { + // Array is the root, like: 0.field.subfield + const arrayField = parts[0]; + const subPath = parts.slice(1).join('.'); + + this.handleSimpleArrayAccess(projection, arrayField, indexValue, subPath, outputFieldName); + } else if (firstArrayIndex === 1) { + // Array is the second level, like: field.0.subfield + const arrayField = parts[0]; + const subPath = parts.slice(2).join('.'); + + this.handleSimpleArrayAccess(projection, arrayField, indexValue, subPath, outputFieldName); + } else if (arrayInfo.arrayIndices.length === 1) { + // Only one array index, but it's deeper in the path + const prefix = parts.slice(0, firstArrayIndex).join('.'); + const indexValue = parseInt(parts[firstArrayIndex]); + const suffix = parts.slice(firstArrayIndex + 1).join('.'); + + // Build a nested expression with $arrayElemAt + if (suffix) { + projection[outputFieldName] = { + $getField: { + field: suffix, + input: { + $arrayElemAt: [{ + $getField: { + field: parts[firstArrayIndex - 1], + input: `$${prefix.substring(0, prefix.lastIndexOf('.'))}` + } + }, indexValue] + } + } + }; + } else { + projection[outputFieldName] = { + $arrayElemAt: [{ + $getField: { + field: parts[firstArrayIndex - 1], + input: `$${prefix.substring(0, prefix.lastIndexOf('.'))}` + } + }, indexValue] + }; + } + + log(`$Added complex nested array access to projection: ${outputFieldName}`); + } else { + // Multiple array indices - very complex case + // For this case, we'll fall back to the simple dot notation which works in some cases + projection[outputFieldName] = `$${fieldPath}`; + log(`$Using fallback dot notation for complex array access: ${outputFieldName} = $${fieldPath}`); + } + } + + /** + * Handles array access patterns at any level of nesting + * This supports patterns like: + * - actors.0.name + * - addresses.0.details.street + * - addresses.0.details.coords.0 + */ + private handleSimpleArrayAccess( + projection: Record, + arrayField: string, + indexValue: number, + subPath: string, + outputFieldName: string + ): void { + log(`$Processing array access: ${arrayField}[${indexValue}]${subPath ? '.' + subPath : ''} as ${outputFieldName}`); + + // Special handling for common complex patterns + // This pattern matches addresses[0].details.street and similar patterns + if (subPath && subPath.includes('.')) { + const pathParts = subPath.split('.'); + + // Special handling for nested object patterns like addresses[0].details.street + if (pathParts.length >= 2) { + let currentExpr: any = { + $arrayElemAt: [`$${arrayField}`, indexValue] + }; + + // Process each part of the path to build a nested expression + for (let i = 0; i < pathParts.length; i++) { + const part = pathParts[i]; + + if (/^\d+$/.test(part)) { + // This is an array index - another level of array access + currentExpr = { + $arrayElemAt: [currentExpr, parseInt(part)] + }; + } else { + // This is a field access + currentExpr = { + $getField: { + field: part, + input: currentExpr + } + }; + } + } + + // Set the fully built nested expression + projection[outputFieldName] = currentExpr; + log(`$Added optimized multi-level nested expression for ${outputFieldName}`); + return; + } + } + + if (subPath) { + // Check if the subPath contains nested fields + if (subPath.includes('.')) { + // Handle complex nested path inside array element + // e.g., actors.0.details.name needs nested $getField expressions + const subParts = subPath.split('.'); + + // Start with the array element access + let expr: any = { + $arrayElemAt: [`$${arrayField}`, indexValue] + }; + + // Build nested $getField expressions for each part + for (const part of subParts) { + if (part === '') continue; // Skip empty parts + + // Check if this part is a numeric index (another array access) + if (/^\d+$/.test(part)) { + // This is an array index within the nested path + expr = { + $arrayElemAt: [expr, parseInt(part)] + }; + log(`$Adding nested array access at index ${part}`); + } else { + // This is a field name + expr = { + $getField: { + field: part, + input: expr + } + }; + log(`$Adding nested field access for ${part}`); + } + } + + // Store the complex expression in the projection + projection[outputFieldName] = expr; + log(`$Added complex nested field array access to projection: ${outputFieldName}`); + } else { + // Simple subPath with no further nesting + projection[outputFieldName] = { + $getField: { + field: subPath, + input: { + $arrayElemAt: [`$${arrayField}`, indexValue] + } + } + }; + log(`$Added array access with nested field to projection: ${outputFieldName}`); + } + } else { + // Just need the array element itself: items.0 + projection[outputFieldName] = { + $arrayElemAt: [`$${arrayField}`, indexValue] + }; + log(`$Added simple array access to projection: ${outputFieldName}`); + } + + // Debug log the final expression for this field + log(`$Final projection expression for ${outputFieldName}:`, JSON.stringify(projection[outputFieldName], null, 2)); + } + /** + * Check if a name is an actual table reference in the FROM clause + * + * This helps distinguish between table.column notation and nested field access + */ + private isActualTableReference(name: string, ast: any): boolean { + if (!ast.from || !Array.isArray(ast.from)) return false; + + // Check if the name appears as a table name or alias in the FROM clause + return ast.from.some((fromItem: any) => { + return ( + fromItem.table === name || + fromItem.as === name || + // Also match table references to aliases in the FROM clause + (typeof fromItem === 'object' && fromItem.as && fromItem.as === name) + ); + }); + } /** * Special handling for table references that might actually be nested fields @@ -1484,7 +2083,8 @@ export class SqlCompilerImpl implements SqlCompiler { column.expr && column.expr.type === 'column_ref' && column.expr.table && - column.expr.column + column.expr.column && + !this.isActualTableReference(column.expr.table, ast) ) { // This could be a nested field - convert table.column to a single column path column.expr.column = `${column.expr.table}.${column.expr.column}`; @@ -2091,4 +2691,34 @@ export class SqlCompilerImpl implements SqlCompiler { return conditions; } + + /** + * Process a field name to figure out what the output name should be + * - items__ARRAY_0__name => name + * - table.column => column + */ + private extractOutputField(field: string, as?: string): string { + // Process the field to handle array access notation first (converts __ARRAY_0__ to .0.) + const processedField = this.processFieldName(field); + + // Output field name (possibly aliased) + // If there's an alias, use it + // Otherwise, if the field has a table prefix or dots, use just the final part (excluding array indices) + let outputField; + if (as) { + // If there's an AS clause, use that for the output field name + outputField = as; + } else if (processedField.includes('.')) { + // For dot notation fields, use the last part (excluding array indices) + const parts = processedField.split('.'); + // Get the last non-numeric part (skipping array indices) + const lastNonNumericPart = parts.filter(part => isNaN(Number(part))).pop(); + outputField = lastNonNumericPart || field; + } else { + // Simple field without dots + outputField = field; + } + + return outputField; + } } diff --git a/packages/lib/src/parser.ts b/packages/lib/src/parser.ts index 7d63e45..8d2a5f3 100644 --- a/packages/lib/src/parser.ts +++ b/packages/lib/src/parser.ts @@ -1,4 +1,4 @@ -import { Parser as NodeSqlParser } from 'node-sql-parser'; +import { From, Parser as NodeSqlParser } from 'node-sql-parser'; import { SqlParser, SqlStatement } from './interfaces'; import debug from 'debug'; @@ -79,6 +79,8 @@ export class SqlParserImpl implements SqlParser { database: 'PostgreSQL', }); + log('Preprocessed AST: ', JSON.stringify(ast, null, 2)) + // Process the AST to properly handle nested fields const processedAst = this.postProcessAst(ast); @@ -249,6 +251,7 @@ export class SqlParserImpl implements SqlParser { // It's likely a nested field, not a table reference column.expr.column = `${column.expr.table}.${column.expr.column}`; column.expr.table = null; + log(`Setting table to null for likely nested field: ${column.expr.column}`) } } }); diff --git a/packages/lib/tests/integration/array-access.integration.test.ts b/packages/lib/tests/integration/array-access.integration.test.ts index 655940b..a6f9ae3 100644 --- a/packages/lib/tests/integration/array-access.integration.test.ts +++ b/packages/lib/tests/integration/array-access.integration.test.ts @@ -9,19 +9,34 @@ describe('Array Access Integration Tests', () => { }, 30000); // 30 second timeout for container startup afterAll(async () => { + // Make sure to close any outstanding connections + const queryLeaf = testSetup.getQueryLeaf(); + + // Clean up any resources that QueryLeaf might be using + if (typeof queryLeaf.close === 'function') { + await queryLeaf.close(); + } + + // Clean up test setup resources await testSetup.cleanup(); - }); + }, 10000); beforeEach(async () => { - // Add test data for array access + // Clean up collections before each test const db = testSetup.getDb(); await db.collection('order_items').deleteMany({}); + await db.collection('movies').deleteMany({}); + await db.collection('users').deleteMany({}); + await db.collection('directors').deleteMany({}); }); afterEach(async () => { - // Clean up test data + // Clean up collections after each test const db = testSetup.getDb(); await db.collection('order_items').deleteMany({}); + await db.collection('movies').deleteMany({}); + await db.collection('users').deleteMany({}); + await db.collection('directors').deleteMany({}); }); test('should handle array access syntax for nested field access in queries', async () => { @@ -198,4 +213,301 @@ describe('Array Access Integration Tests', () => { expect(orderIds).toContain('ORD-2001'); expect(orderIds).toContain('ORD-2002'); }); + + // NEW TESTS FOR DIRECT BRACKET NOTATION + + test('should support bracket notation for array access in SELECT', async () => { + // Arrange + const db = testSetup.getDb(); + await db.collection('movies').insertMany([ + { + title: 'The Matrix', + year: 1999, + actors: [ + { name: 'Keanu Reeves', role: 'Neo' }, + { name: 'Laurence Fishburne', role: 'Morpheus' }, + { name: 'Carrie-Anne Moss', role: 'Trinity' } + ] + }, + { + title: 'Inception', + year: 2010, + actors: [ + { name: 'Leonardo DiCaprio', role: 'Cobb' }, + { name: 'Joseph Gordon-Levitt', role: 'Arthur' }, + { name: 'Ellen Page', role: 'Ariadne' } + ] + } + ]); + + // Act - Test bracket notation syntax + const queryLeaf = testSetup.getQueryLeaf(); + // Explicit test of the bracket notation feature we want to implement + const sql = "SELECT title, actors[0].name AS lead_actor FROM movies"; + log('SQL being executed:', sql); + + // Debug: First check with direct MongoDB query to ensure test data is properly inserted + const directMovies = await db.collection('movies').find().toArray(); + log('Direct MongoDB query results for movies:', JSON.stringify(directMovies, null, 2)); + + const results = ensureArray(await queryLeaf.execute(sql)); + log('Bracket notation array access results:', JSON.stringify(results, null, 2)); + + // Extra debug info to help diagnose the issue + log('===== TEST DEBUG INFO ====='); + log('SQL Query:', sql); + log('Direct MongoDB data:', JSON.stringify(directMovies, null, 2)); + log('QueryLeaf result keys:', Object.keys(results[0] || {})); + log('The Matrix result:', JSON.stringify(results.find(m => m.title === 'The Matrix'), null, 2)); + + // Assert + expect(results).toHaveLength(2); + expect(results.find(m => m.title === 'The Matrix')?.lead_actor).toBe('Keanu Reeves'); + expect(results.find(m => m.title === 'Inception')?.lead_actor).toBe('Leonardo DiCaprio'); + }); + + test('should support bracket notation for array access in WHERE clause', async () => { + // Arrange + const db = testSetup.getDb(); + await db.collection('movies').insertMany([ + { + title: 'The Matrix', + year: 1999, + ratings: [8.5, 9.0, 7.5] + }, + { + title: 'Inception', + year: 2010, + ratings: [9.2, 8.8, 9.5] + } + ]); + + // Act - Test bracket notation in WHERE clause + const queryLeaf = testSetup.getQueryLeaf(); + const sql = "SELECT title, year FROM movies WHERE ratings[0] > 9.0"; + + const results = ensureArray(await queryLeaf.execute(sql)); + log('Bracket notation in WHERE results:', JSON.stringify(results, null, 2)); + + // Assert + expect(results).toHaveLength(1); + expect(results[0].title).toBe('Inception'); + }); + + test('should support multiple levels of array and object nesting with bracket notation', async () => { + // Arrange + const db = testSetup.getDb(); + await db.collection('users').insertMany([ + { + name: 'Alice', + addresses: [ + { + type: 'home', + details: { + street: '123 Main St', + coords: [40.7128, -74.0060] + } + }, + { + type: 'work', + details: { + street: '456 Market St', + coords: [37.7749, -122.4194] + } + } + ] + }, + { + name: 'Bob', + addresses: [ + { + type: 'home', + details: { + street: '789 Oak St', + coords: [39.9526, -75.1652] + } + } + ] + } + ]); + + // Act - Test complex nesting with bracket notation + const queryLeaf = testSetup.getQueryLeaf(); + const sql = "SELECT name, addresses[0].details.street AS home_street, addresses[0].details.coords[0] AS latitude FROM users"; + + // First, do a direct MongoDB query to see the exact structure + const directUserResults = await db.collection('users').find().toArray(); + log('Direct MongoDB query result for users:', JSON.stringify(directUserResults, null, 2)); + + const results = ensureArray(await queryLeaf.execute(sql)); + log('Complex nested bracket notation results:', JSON.stringify(results, null, 2)); + + // Assert + expect(results).toHaveLength(2); + const alice = results.find(u => u.name === 'Alice'); + const bob = results.find(u => u.name === 'Bob'); + + expect(alice).toBeDefined(); + expect(alice?.home_street).toBe('123 Main St'); + expect(alice?.latitude).toBe(40.7128); + + expect(bob).toBeDefined(); + expect(bob?.home_street).toBe('789 Oak St'); + expect(bob?.latitude).toBe(39.9526); + }); + + test('should support bracket notation in UPDATE statements', async () => { + // Arrange + const db = testSetup.getDb(); + await db.collection('movies').insertOne({ + title: 'The Matrix', + year: 1999, + actors: [ + { name: 'Keanu Reeves', role: 'Neo' }, + { name: 'Laurence Fishburne', role: 'Morpheus' }, + { name: 'Carrie-Anne Moss', role: 'Trinity' } + ] + }); + + // Act - Test bracket notation in UPDATE statement + const queryLeaf = testSetup.getQueryLeaf(); + const updateSql = "UPDATE movies SET actors[0].role = 'The One' WHERE title = 'The Matrix'"; + + await queryLeaf.execute(updateSql); + + // Verify with a SELECT using bracket notation + const selectSql = "SELECT title, actors[0].name, actors[0].role FROM movies WHERE title = 'The Matrix'"; + log('SQL for verification after UPDATE:', selectSql); + const results = ensureArray(await queryLeaf.execute(selectSql)); + log('Bracket notation in UPDATE results:', JSON.stringify(results, null, 2)); + log('Result keys:', Object.keys(results[0] || {})); + + // Assert + expect(results).toHaveLength(1); + expect(results[0].name).toBe('Keanu Reeves'); + expect(results[0].role).toBe('The One'); + + // Double-check with a direct MongoDB query + const dbResult = await db.collection('movies').findOne({ title: 'The Matrix' }); + log('Direct MongoDB query after UPDATE:', JSON.stringify(dbResult, null, 2)); + expect(dbResult?.actors[0]?.role).toBe('The One'); + }); + + test('should support bracket notation with JOIN operations', async () => { + // Arrange + const db = testSetup.getDb(); + + // Insert movies with a director ID + await db.collection('movies').insertMany([ + { + title: 'The Matrix', + year: 1999, + directorId: 'director1', + scenes: [ + { name: 'Rooftop Scene', duration: 12 }, + { name: 'Lobby Scene', duration: 8 } + ] + }, + { + title: 'Inception', + year: 2010, + directorId: 'director2', + scenes: [ + { name: 'Dream Level 1', duration: 15 }, + { name: 'Dream Level 2', duration: 10 } + ] + } + ]); + + // Insert directors + await db.collection('directors').insertMany([ + { + _id: 'director1', + name: 'Wachowski Sisters', + awards: ['Oscar Nomination', 'BAFTA Award'] + }, + { + _id: 'director2', + name: 'Christopher Nolan', + awards: ['Oscar Winner', 'Golden Globe'] + } + ]); + + // Act - Test bracket notation in JOIN query + const queryLeaf = testSetup.getQueryLeaf(); + const sql = ` + SELECT m.title, m.scenes[0].name AS first_scene, d.name AS director, d.awards[0] AS top_award + FROM movies m + JOIN directors d ON m.directorId = d._id + `; + log('JOIN SQL query:', sql); + + // Verify input data with direct MongoDB queries + const moviesData = await db.collection('movies').find().toArray(); + log('Direct MongoDB query - movies:', JSON.stringify(moviesData, null, 2)); + + const directorsData = await db.collection('directors').find().toArray(); + log('Direct MongoDB query - directors:', JSON.stringify(directorsData, null, 2)); + + // Let's first check what this specific MongoDB query would look like + // without the SQL translation + const movieCollection = db.collection('movies'); + const pipeline = [ + { + $lookup: { + from: 'directors', + localField: 'directorId', + foreignField: '_id', + as: 'director' + } + }, + { + $unwind: '$director' + }, + { + $project: { + 'title': 1, + 'first_scene': { $arrayElemAt: ['$scenes.name', 0] }, + 'director': '$director.name', + 'top_award': { $arrayElemAt: ['$director.awards', 0] } + } + } + ]; + + // Run the manual MongoDB aggregation for comparison + const manualResult = await movieCollection.aggregate(pipeline).toArray(); + log('Manual MongoDB aggregation result:', JSON.stringify(manualResult, null, 2)); + + const results = ensureArray(await queryLeaf.execute(sql)); + log('Bracket notation in JOIN results:', JSON.stringify(results, null, 2)); + log('JOIN result keys:', Object.keys(results[0] || {})); + + // For debugging - add more detailed output + log('Results detailed dump:'); + results.forEach((r, i) => { + log(`- Result ${i}:`, JSON.stringify(r, null, 2)); + log(` Keys: ${Object.keys(r).join(', ')}`); + }); + + // Try different ways to find The Matrix + const matrixByTitle = results.find(r => r.title === 'The Matrix'); + const matrixById = results.find(r => r._id === 'director1'); + log('Matrix lookup by title:', matrixByTitle ? 'found' : 'not found'); + log('Matrix lookup by director id:', matrixById ? 'found' : 'not found'); + + // Assert + expect(results).toHaveLength(2); + + const matrix = results.find(r => r.title === 'The Matrix'); + expect(matrix).toBeDefined(); + expect(matrix?.first_scene).toBe('Rooftop Scene'); + expect(matrix?.director).toBe('Wachowski Sisters'); + expect(matrix?.top_award).toBe('Oscar Nomination'); + + const inception = results.find(r => r.title === 'Inception'); + expect(inception).toBeDefined(); + expect(inception?.first_scene).toBe('Dream Level 1'); + expect(inception?.director).toBe('Christopher Nolan'); + expect(inception?.top_award).toBe('Oscar Winner'); + }); }); From 7866452fd04e3068fd585476d2813c1f61d466f2 Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Fri, 2 May 2025 14:54:39 -0600 Subject: [PATCH 2/5] fix alias tests --- packages/lib/src/compiler.ts | 133 ++++------------------------------- 1 file changed, 15 insertions(+), 118 deletions(-) diff --git a/packages/lib/src/compiler.ts b/packages/lib/src/compiler.ts index a8d7a8f..1761069 100644 --- a/packages/lib/src/compiler.ts +++ b/packages/lib/src/compiler.ts @@ -614,6 +614,7 @@ export class SqlCompilerImpl implements SqlCompiler { // Special handling for array access fields - we need to create field extraction // expressions to flatten nested array elements to root level + log(`arrayAccessFields: `, arrayAccessFields) if (arrayAccessFields.length > 0 && !isJoinQuery) { const arrayFieldsProject: Record = {}; @@ -697,120 +698,10 @@ export class SqlCompilerImpl implements SqlCompiler { log('JOIN query from:', JSON.stringify(ast.from, null, 2)); log('JOIN query where:', JSON.stringify(ast.where, null, 2)); - // Add the $lookup stages we've already configured - const lookupStages = aggregateCommand.pipeline.filter((stage) => '$lookup' in stage); - log('Current $lookup stages:', JSON.stringify(lookupStages, null, 2)); - // For JOIN queries, we need to handle the projection differently to flatten the results // First, we'll create a projection that preserves the table aliases in the pipeline const renamedFieldsProject: Record = {}; - // Process each column and create a flattened naming structure - for (const column of ast.columns) { - if (typeof column === 'object' && column.expr) { - const table = column.expr.table; - const field = column.expr.column; - - // The field name that will be used in the output - // If there's an alias, use it, otherwise use just the field name - const outputName = column.as || field; - - if (table) { - // Different handling based on whether it's from the main table or a joined table - if (this.currentTableAliases.has(table)) { - const isMainTable = table === ast.from[0].as; - - if (isMainTable) { - // Fields from the main table can be accessed directly - // Handle field names with table prefixes in the field itself (e.g., m.title in field) - if (field.includes('.')) { - const fieldParts = field.split('.'); - const actualField = fieldParts[fieldParts.length - 1]; - renamedFieldsProject[outputName] = `$${fieldParts[1]}`; // Use main table field - log(`$Main table field with prefix: ${field} -> ${outputName} = $${fieldParts[1]}`); - } else { - // Simple field from main table - renamedFieldsProject[outputName] = `$${field}`; - log(`$Main table field mapping: ${outputName} = $${field}`); - } - - // If the output name has a table prefix and no alias was explicitly provided, - // we also add a version without the prefix for compatibility - if (outputName.includes('.') && !column.as) { - const outParts = outputName.split('.'); - const cleanName = outParts[outParts.length - 1]; - renamedFieldsProject[cleanName] = `$${field}`; - log(`$Added clean field name for compatibility: ${cleanName} = $${field}`); - } - } else { - // Fields from joined tables need the alias prefix - if (field.includes('.')) { - const fieldParts = field.split('.'); - const actualField = fieldParts[fieldParts.length - 1]; - renamedFieldsProject[outputName] = `$${table}.${actualField}`; - log(`$Joined table field with prefix: ${field} -> ${outputName} = $${table}.${actualField}`); - } else { - renamedFieldsProject[outputName] = `$${table}.${field}`; - log(`$Joined table field mapping: ${outputName} = $${table}.${field}`); - } - } - } else { - // Not a recognized alias, but still has a table prefix - renamedFieldsProject[outputName] = `$${table}.${field}`; - } - } else if (column.expr.type === 'column_ref' && column.expr.column) { - // Handle case where column is a direct column reference without table - // For JOINS, we still need to know which table it belongs to - - // If no table specified, try to determine which table it belongs to - // For simplicity, assume it's from the main table - renamedFieldsProject[outputName] = `$${column.expr.column}`; - log(`Simple column mapping: ${outputName} = $${column.expr.column}`); - } else { - // No table prefix specified, assume it's from the main table - renamedFieldsProject[outputName] = `$${field}`; - } - } else if ( - column === '*' || - (typeof column === 'object' && column.expr && column.expr.type === 'star') - ) { - // For SELECT *, we need to merge all fields from all tables - // This is a more complex case that needs a special projection approach - - // For star queries in JOIN context, we need to use MongoDB's $mergeObjects - // to bring fields from joined documents up to the top level - - // First, create a base object with all fields from the main table - renamedFieldsProject['mainFields'] = '$$ROOT'; - - // Then, for each joined table, create a merge field - for (let i = 1; i < ast.from.length; i++) { - const joinedTable = ast.from[i].as || this.extractTableName(ast.from[i]); - // Use all fields from the joined table, directly available at the top level - // This preserves their original field names - renamedFieldsProject[joinedTable] = `$${joinedTable}`; - } - - // Use MongoDB's $replaceRoot to promote all fields to the root level - // This will be a separate stage after the projection - const mergeObjects = ['$mainFields']; - for (let i = 1; i < ast.from.length; i++) { - const joinedTable = ast.from[i].as || this.extractTableName(ast.from[i]); - mergeObjects.push(`$${joinedTable}`); - } - - // We will add the $replaceRoot stage after this projection - aggregateCommand.pipeline.push({ - $replaceRoot: { - newRoot: { - $mergeObjects: mergeObjects, - }, - }, - }); - - log('Added $replaceRoot stage for merging joined tables in SELECT *'); - } - } // Add a final stage to correctly handle JOIN results // We need the column values to be accessible directly at the top level, @@ -1478,23 +1369,25 @@ export class SqlCompilerImpl implements SqlCompiler { if ('column' in column.expr && column.expr.column) { // First check if the column has a table reference that might be an alias let fieldName; - if (column.expr.table && column.expr.column) { + if (column.expr.table && column.expr.column && this.currentTableAliases.has(column.expr.table)) { fieldName = `${column.expr.table}.${column.expr.column}`; log(`$Using table-prefixed field in projection: ${fieldName}`); } else { fieldName = this.processFieldName(column.expr.column); } - fieldsToProject.push(fieldName); + const outputName = this.extractOutputField(fieldName, column.as) + fieldsToProject.push(outputName); } else if (column.expr.type === 'column_ref' && column.expr.column) { // Handle column_ref with possible table let fieldName; - if (column.expr.table && column.expr.column) { + if (column.expr.table && column.expr.column && this.currentTableAliases.has(column.expr.table)) { fieldName = `${column.expr.table}.${column.expr.column}`; log(`Using table-prefixed field in column_ref projection: ${fieldName}`); } else { fieldName = this.processFieldName(column.expr.column); } - fieldsToProject.push(fieldName); + const outputName = this.extractOutputField(fieldName, column.as) + fieldsToProject.push(outputName); } else if ( column.expr.type === 'binary_expr' && column.expr.operator === '.' && @@ -1513,7 +1406,8 @@ export class SqlCompilerImpl implements SqlCompiler { } if (fieldName && column.expr.right.column) { fieldName += '.' + column.expr.right.column; - fieldsToProject.push(fieldName); + const outputName = this.extractOutputField(fieldName, column.as) + fieldsToProject.push(outputName); } } } else if ('type' in column && column.type === 'column_ref' && column.column) { @@ -1525,17 +1419,19 @@ export class SqlCompilerImpl implements SqlCompiler { } else { fieldName = this.processFieldName(column.column); } - fieldsToProject.push(fieldName); + const outputName = this.extractOutputField(fieldName, column.as) + fieldsToProject.push(outputName); } else if ('column' in column) { // Handle direct column with possible table let fieldName; - if (column.table && column.column) { + if (column.table && column.column && this.currentTableAliases.has(column.table)) { fieldName = `${column.table}.${column.column}`; log(`Using table-prefixed field in direct column: ${fieldName}`); } else { fieldName = this.processFieldName(column.column); } - fieldsToProject.push(fieldName); + const outputName = this.extractOutputField(fieldName, column.as) + fieldsToProject.push(outputName); } } else if (typeof column === 'string') { const fieldName = this.processFieldName(column); @@ -1618,6 +1514,7 @@ export class SqlCompilerImpl implements SqlCompiler { } } else { // Regular top-level field + log(`Adding ${fieldName} to projection`) projection[fieldName] = 1; } }); From ff35714174e99bc232b2a48f05efe2b95d34ebba Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Fri, 2 May 2025 15:26:04 -0600 Subject: [PATCH 3/5] fix tests --- packages/lib/src/compiler.ts | 573 +++++++++++++++-------------------- packages/lib/src/parser.ts | 4 +- 2 files changed, 249 insertions(+), 328 deletions(-) diff --git a/packages/lib/src/compiler.ts b/packages/lib/src/compiler.ts index 1761069..3968836 100644 --- a/packages/lib/src/compiler.ts +++ b/packages/lib/src/compiler.ts @@ -251,7 +251,7 @@ export class SqlCompilerImpl implements SqlCompiler { // Process and normalize the field path for array access const { normalizedPath, hasArrayAccess, outputFieldName } = this.normalizeFieldPath(fieldPath); - + // If the field path contains a period, check if it's a table alias reference or a nested field if (normalizedPath.includes('.')) { const parts = normalizedPath.split('.'); @@ -273,7 +273,7 @@ export class SqlCompilerImpl implements SqlCompiler { } } else { // This is a nested field - + // Check if we need to use array operators if (hasArrayAccess) { // Build a complex array access projection using the dot notation path @@ -376,11 +376,11 @@ export class SqlCompilerImpl implements SqlCompiler { col === '*' || (typeof col === 'object' && col.expr && col.expr.type === 'star') ); - log(`CURRENT TABLE ALIASES: `, JSON.stringify(this.currentTableAliases, null, 2)) + log(`CURRENT TABLE ALIASES: `, JSON.stringify(this.currentTableAliases, null, 2)); // Process explicit columns first if (ast.columns && !hasStar) { for (const column of ast.columns) { - log(`Processing explicit column: `, JSON.stringify(column, null, 2)) + log(`Processing explicit column: `, JSON.stringify(column, null, 2)); if ( typeof column === 'object' && column.expr && @@ -404,7 +404,9 @@ export class SqlCompilerImpl implements SqlCompiler { } } - log(`HAS STAR FOR ${lookup.as}: ${hasStar}, joinFieldMapping: ${JSON.stringify(joinFieldMapping, null, 2)}`) + log( + `HAS STAR FOR ${lookup.as}: ${hasStar}, joinFieldMapping: ${JSON.stringify(joinFieldMapping, null, 2)}` + ); // If we have a SELECT * or no explicit joined fields were found, // we need to promote ALL fields from the joined collection if (hasStar || Object.keys(joinFieldMapping).length === 0) { @@ -456,7 +458,12 @@ export class SqlCompilerImpl implements SqlCompiler { if (ast.columns) { const projection: Record = {}; // Track array access fields that need special handling - const arrayAccessFields: {field: string, path: string, index: number, subField?: string}[] = []; + const arrayAccessFields: { + field: string; + path: string; + index: number; + subField?: string; + }[] = []; // For JOIN queries, we need to handle nested paths differently const isJoinQuery = ast.from && ast.from.length > 1; @@ -497,7 +504,9 @@ export class SqlCompilerImpl implements SqlCompiler { // First process the field path to handle array indexing notation // This transforms items__ARRAY_0__name => items.0.name for MongoDB dot notation const processedPath = this.processFieldName(fieldPath); - log(`$Processed aliased field path with array notation: ${fieldPath} -> ${processedPath}`); + log( + `$Processed aliased field path with array notation: ${fieldPath} -> ${processedPath}` + ); // Check if this field path contains array access notation const arrayInfo = this.getArrayAccessInfo(processedPath); @@ -529,36 +538,40 @@ export class SqlCompilerImpl implements SqlCompiler { // This is an array access notation const firstArrayIndex = arrayInfo.arrayIndices[0]; const indexValue = parseInt(parts[firstArrayIndex]); - + if (firstArrayIndex === 1) { // Simple case: array is the first level, like actors.0.name const arrayField = parts[0]; const subPath = parts.slice(2).join('.'); - + // Add to list of array fields that need special handling // TODO (@day): this might need some changes arrayAccessFields.push({ field: column.as, path: processedPath, index: indexValue, - subField: subPath || undefined + subField: subPath || undefined, }); - + // Also add it to the projection so it's included projection[column.as] = { $getField: { field: subPath || 'value', // Fallback to extract the whole value input: { - $arrayElemAt: [`$${arrayField}`, indexValue] - } - } + $arrayElemAt: [`$${arrayField}`, indexValue], + }, + }, }; - - log(`$Added array access field to alias: ${column.as} using $arrayElemAt operator`); + + log( + `$Added array access field to alias: ${column.as} using $arrayElemAt operator` + ); } else { // More complex nested array case - use dot notation as fallback projection[column.as] = `$${processedPath}`; - log(`$Added complex nested array field to alias: ${column.as} = $${processedPath}`); + log( + `$Added complex nested array field to alias: ${column.as} = $${processedPath}` + ); } } else { // Nested field with alias @@ -575,92 +588,94 @@ export class SqlCompilerImpl implements SqlCompiler { // Check if this is an array access field without alias (like actors[0].name) const processedPath = this.processFieldName(fieldPath); const arrayInfo = this.getArrayAccessInfo(processedPath); - + if (arrayInfo.hasArrayAccess) { // This is array access without alias const parts = processedPath.split('.'); const arrayField = parts[0]; const indexValue = parseInt(parts[1]); const subField = parts.slice(2).join('.'); - + // If this is a nested field in an array element, extract just the property name const outputField = subField || parts[parts.length - 1]; - + // Add to list of array fields that need special handling arrayAccessFields.push({ field: outputField, path: processedPath, index: indexValue, - subField: subField || undefined + subField: subField || undefined, }); - + // Add to projection so it's included projection[outputField] = { $getField: { field: subField || 'value', input: { - $arrayElemAt: [`$${arrayField}`, indexValue] - } - } + $arrayElemAt: [`$${arrayField}`, indexValue], + }, + }, }; - - log(`$Added array access field without alias: ${outputField} using path ${processedPath}`); + + log( + `$Added array access field without alias: ${outputField} using path ${processedPath}` + ); } else { // Regular field without array access this.addFieldToProjection(projection, fieldPath); } } } - + // Special handling for array access fields - we need to create field extraction // expressions to flatten nested array elements to root level - log(`arrayAccessFields: `, arrayAccessFields) + log(`arrayAccessFields: `, arrayAccessFields); if (arrayAccessFields.length > 0 && !isJoinQuery) { const arrayFieldsProject: Record = {}; - + // Process each array access field - arrayAccessFields.forEach(({field, path, index, subField}) => { + arrayAccessFields.forEach(({ field, path, index, subField }) => { const parts = path.split('.'); const arrayField = parts[0]; - + // Extract specific fields from array elements if (subField) { // Check if the subField contains multiple nested levels or array indices if (subField.includes('.')) { // This is a complex nested path like addresses[0].details.street // or addresses[0].details.pastAddresses[0].street - + // Start with the array element let currentExpr: any = { - $arrayElemAt: [`$${arrayField}`, index] + $arrayElemAt: [`$${arrayField}`, index], }; - + // Build an expression dynamically based on the subField components // First, normalize the path in case it contains any array indices const normalizedSubField = this.processFieldName(subField); const subParts = normalizedSubField.split('.'); - + // Process each part of the path for (let i = 0; i < subParts.length; i++) { const part = subParts[i]; - + // Check if this is a numeric array index if (/^\d+$/.test(part)) { // This part is a numeric array index, use $arrayElemAt currentExpr = { - $arrayElemAt: [currentExpr, parseInt(part)] + $arrayElemAt: [currentExpr, parseInt(part)], }; } else { // This is a field name, use $getField currentExpr = { $getField: { field: part, - input: currentExpr - } + input: currentExpr, + }, }; } } - + // Set the final expression arrayFieldsProject[field] = currentExpr; log(`$Added complex nested field/array access for ${field}: ${path}.${subField}`); @@ -671,25 +686,27 @@ export class SqlCompilerImpl implements SqlCompiler { $getField: { field: subField, input: { - $arrayElemAt: [`$${arrayField}`, index] - } - } + $arrayElemAt: [`$${arrayField}`, index], + }, + }, }; } } else { // Just extract the whole array element arrayFieldsProject[field] = { - $arrayElemAt: [`$${arrayField}`, index] + $arrayElemAt: [`$${arrayField}`, index], }; } }); - + // Add the array fields as an $addFields stage instead of $project // This preserves all the original fields while adding the array access fields aggregateCommand.pipeline.push({ $addFields: arrayFieldsProject }); - log(`$Added array access fields using $addFields: ${JSON.stringify(arrayFieldsProject, null, 2)}`); + log( + `$Added array access fields using $addFields: ${JSON.stringify(arrayFieldsProject, null, 2)}` + ); } - + // For JOIN queries, we need a special handling else if (isJoinQuery) { // Add detailed debugging for JOIN queries @@ -702,7 +719,6 @@ export class SqlCompilerImpl implements SqlCompiler { // First, we'll create a projection that preserves the table aliases in the pipeline const renamedFieldsProject: Record = {}; - // Add a final stage to correctly handle JOIN results // We need the column values to be accessible directly at the top level, // without requiring table alias prefixes @@ -735,44 +751,58 @@ export class SqlCompilerImpl implements SqlCompiler { if (typeof column === 'object' && column.expr) { let table = column.expr.table; let field = column.expr.column; - + // Special handling for array access notation without table reference // When we have something like scenes[0].name directly, we need to treat it as a field on the main table if (!table && field && field.includes('__ARRAY_')) { // Assume it belongs to the main table table = ast.from[0].as; - log(`$Processing array access field without table reference: ${field}, assigning to main table: ${table}`); + log( + `$Processing array access field without table reference: ${field}, assigning to main table: ${table}` + ); } log(`Processing JOIN column: table=${table}, field=${field}`); - if ((table && field && this.currentTableAliases.has(table)) || - // Also handle fields without table references as belonging to the main table - (!table && field)) { - + if ( + (table && field && this.currentTableAliases.has(table)) || + // Also handle fields without table references as belonging to the main table + (!table && field) + ) { const outputField = this.extractOutputField(field, column.as); - const processedField = this.processFieldName(field); - - log(`$Output field name: ${outputField} from ${field} (processed: ${processedField})`); - - // Create a path to the field, which could be in the root doc or nested - // in a joined doc (like "o.product") - // The key fix: Use proper MongoDB dot notation for accessing fields - // Fields from main table can be accessed directly, fields from joined tables need the alias prefix - let sourcePath; - - if (table === ast.from[0].as) { - const fieldWithoutTablePrefix = processedField.replace(`${table}.`, ''); - sourcePath = `$${fieldWithoutTablePrefix}`; - log(`$Main table field path: $${fieldWithoutTablePrefix}`); + const arrayInfo = this.normalizeFieldPath(field); + if (arrayInfo.hasArrayAccess) { + let fieldName = this.processFieldName(field); + if (table !== ast.from[0].as) { + fieldName = `${table}.${fieldName}`; + } + this.buildArrayAccessProjection(addFieldsStage, fieldName, outputField); } else { - sourcePath = `$${table}.${processedField}`; - log(`$Joined table field path: ${sourcePath}`); - } + const processedField = this.processFieldName(field); + + log( + `$Output field name: ${outputField} from ${field} (processed: ${processedField})` + ); + + // Create a path to the field, which could be in the root doc or nested + // in a joined doc (like "o.product") + // The key fix: Use proper MongoDB dot notation for accessing fields + // Fields from main table can be accessed directly, fields from joined tables need the alias prefix + let sourcePath; + + if (table === ast.from[0].as) { + const fieldWithoutTablePrefix = processedField.replace(`${table}.`, ''); + sourcePath = `$${fieldWithoutTablePrefix}`; + log(`$Main table field path: $${fieldWithoutTablePrefix}`); + } else { + sourcePath = `$${table}.${processedField}`; + log(`$Joined table field path: ${sourcePath}`); + } - // Add this field mapping - addFieldsStage[outputField] = sourcePath; - log(`JOIN: Creating flat field ${outputField} = ${sourcePath}`); + // Add this field mapping + addFieldsStage[outputField] = sourcePath; + log(`JOIN: Creating flat field ${outputField} = ${sourcePath}`); + } } else { log( `Skipped column - missing table alias or field: ${JSON.stringify(column, null, 2)}` @@ -809,7 +839,7 @@ export class SqlCompilerImpl implements SqlCompiler { for (const field of outputFields) { log(`FIELD: `, field); - includeFields[field] = 1 + includeFields[field] = 1; } // First add a projection to include only the fields we want @@ -818,11 +848,11 @@ export class SqlCompilerImpl implements SqlCompiler { JSON.stringify(includeFields, null, 2) ); aggregateCommand.pipeline.push({ $project: includeFields }); - + // Then add a separate projection to exclude nested documents // MongoDB doesn't allow mixing inclusion and exclusion in the same projection const excludeJoinedDocsOnly: Record = {}; - + // Now specifically exclude the nested documents to prevent duplication for (const fromItem of ast.from) { if (fromItem.as && fromItem.as !== ast.from[0].as) { @@ -835,7 +865,7 @@ export class SqlCompilerImpl implements SqlCompiler { 'Adding $project stage to exclude nested docs:', JSON.stringify(excludeJoinedDocsOnly, null, 2) ); - + // Only add the exclusion stage if we have fields to exclude if (Object.keys(excludeJoinedDocsOnly).length > 0) { aggregateCommand.pipeline.push({ $project: excludeJoinedDocsOnly }); @@ -1028,15 +1058,15 @@ export class SqlCompilerImpl implements SqlCompiler { // Process the field name to handle nested fields with dot notation fieldName = this.processFieldName(setItem.column); } - + // After field name processing, check if it contains array access notation const processedFieldName = this.processFieldName(fieldName); const arrayInfo = this.getArrayAccessInfo(processedFieldName); - + log( `$Setting UPDATE field: ${processedFieldName} = ${JSON.stringify(this.convertValue(setItem.value))}` ); - + // Use the processed field name with proper array indexing update[processedFieldName] = this.convertValue(setItem.value); } @@ -1362,6 +1392,15 @@ export class SqlCompilerImpl implements SqlCompiler { // First pass - process all fields const fieldsToProject: string[] = []; + // Track array access fields for special handling + const arrayAccessInfoMap = new Map< + string, + { + outputField: string; + fieldName: string; + } + >(); + columns.forEach((column) => { if (typeof column === 'object') { if ('expr' in column && column.expr) { @@ -1369,24 +1408,42 @@ export class SqlCompilerImpl implements SqlCompiler { if ('column' in column.expr && column.expr.column) { // First check if the column has a table reference that might be an alias let fieldName; - if (column.expr.table && column.expr.column && this.currentTableAliases.has(column.expr.table)) { + if ( + column.expr.table && + column.expr.column && + this.currentTableAliases.has(column.expr.table) + ) { fieldName = `${column.expr.table}.${column.expr.column}`; log(`$Using table-prefixed field in projection: ${fieldName}`); } else { fieldName = this.processFieldName(column.expr.column); } - const outputName = this.extractOutputField(fieldName, column.as) + const outputName = this.extractOutputField(fieldName, column.as); fieldsToProject.push(outputName); + + const arrayInfo = this.getArrayAccessInfo(fieldName); + + if (arrayInfo.hasArrayAccess) { + parentFields.add(arrayInfo.parts[0]); + arrayAccessInfoMap.set(outputName, { + outputField: outputName, + fieldName, + }); + } } else if (column.expr.type === 'column_ref' && column.expr.column) { // Handle column_ref with possible table let fieldName; - if (column.expr.table && column.expr.column && this.currentTableAliases.has(column.expr.table)) { + if ( + column.expr.table && + column.expr.column && + this.currentTableAliases.has(column.expr.table) + ) { fieldName = `${column.expr.table}.${column.expr.column}`; log(`Using table-prefixed field in column_ref projection: ${fieldName}`); } else { fieldName = this.processFieldName(column.expr.column); } - const outputName = this.extractOutputField(fieldName, column.as) + const outputName = this.extractOutputField(fieldName, column.as); fieldsToProject.push(outputName); } else if ( column.expr.type === 'binary_expr' && @@ -1406,7 +1463,7 @@ export class SqlCompilerImpl implements SqlCompiler { } if (fieldName && column.expr.right.column) { fieldName += '.' + column.expr.right.column; - const outputName = this.extractOutputField(fieldName, column.as) + const outputName = this.extractOutputField(fieldName, column.as); fieldsToProject.push(outputName); } } @@ -1419,7 +1476,7 @@ export class SqlCompilerImpl implements SqlCompiler { } else { fieldName = this.processFieldName(column.column); } - const outputName = this.extractOutputField(fieldName, column.as) + const outputName = this.extractOutputField(fieldName, column.as); fieldsToProject.push(outputName); } else if ('column' in column) { // Handle direct column with possible table @@ -1430,7 +1487,7 @@ export class SqlCompilerImpl implements SqlCompiler { } else { fieldName = this.processFieldName(column.column); } - const outputName = this.extractOutputField(fieldName, column.as) + const outputName = this.extractOutputField(fieldName, column.as); fieldsToProject.push(outputName); } } else if (typeof column === 'string') { @@ -1441,71 +1498,20 @@ export class SqlCompilerImpl implements SqlCompiler { // Handle array access fields - since MongoDB 4.4 doesn't allow including both a field and its subfields, // we'll detect array paths and handle them specially - - // Track array access fields for special handling - const arrayAccessInfoMap = new Map(); - - fieldsToProject.forEach(fieldName => { + + fieldsToProject.forEach((fieldName) => { // Check for array access in the field path const arrayInfo = this.getArrayAccessInfo(fieldName); - + if (arrayInfo.hasArrayAccess) { // This is a field with array access like actors.0.name log(`$Found array access in field: ${fieldName}`); - - // We need to use $slice and field projection for arrays to work around path collision issues - const parts = fieldName.split('.'); - const baseField = parts[0]; // e.g., "actors" - - if (parts.length >= 3 && /^\d+$/.test(parts[1])) { - // This is a path like "actors.0.name" - extract the parts: - const arrayField = parts[0]; // "actors" - const indexValue = parseInt(parts[1]); // 0 - const subField = parts.slice(2).join('.'); // "name" - - // Setting specific array element field without including the whole array - // If we need name & role from actors[0], this ensures we get both without - // path collision issues - projection[fieldName] = 1; - - // Store info for field name flattening - const outputField = subField || parts[parts.length - 1]; - arrayAccessInfoMap.set(outputField, { - path: fieldName, - fieldName: outputField, - arrayField, - index: indexValue, - subField: subField || undefined - }); - - // Mark this field as having been projected to avoid collisions - parentFields.add(baseField); - } else if (parts.length === 2 && /^\d+$/.test(parts[1])) { - // This is a path like "actors.0" - just projecting a specific array element - projection[fieldName] = 1; - - // Store info for field name flattening - arrayAccessInfoMap.set(parts[1], { - path: fieldName, - fieldName: parts[1], - arrayField: parts[0], - index: parseInt(parts[1]) - }); - - parentFields.add(baseField); - } } else if (fieldName.includes('.')) { // Regular nested field // Check if any parent has already been included const parts = fieldName.split('.'); const baseField = parts[0]; - + if (!parentFields.has(baseField)) { projection[fieldName] = 1; } else { @@ -1514,40 +1520,22 @@ export class SqlCompilerImpl implements SqlCompiler { } } else { // Regular top-level field - log(`Adding ${fieldName} to projection`) + log(`Adding ${fieldName} to projection`); projection[fieldName] = 1; } }); - + // For array access fields, we need to use MongoDB's aggregation operators directly in the projection if (arrayAccessInfoMap.size > 0) { - log(`$Adding MongoDB operators for array access fields: ${JSON.stringify(Array.from(arrayAccessInfoMap.entries()))}`); - + log( + `$Adding MongoDB operators for array access fields: ${JSON.stringify(Array.from(arrayAccessInfoMap.entries()))}` + ); + // Add MongoDB's field extraction operators for array access fields for (const [fieldName, info] of arrayAccessInfoMap.entries()) { - const { arrayField, index, subField } = info; - - // MongoDB's projection can use aggregation operators - if (subField) { - // Use $getField to extract nested field from array element - projection[fieldName] = { - $getField: { - field: subField, - input: { - $arrayElemAt: [`$${arrayField}`, index] - } - } - }; - } else { - // Extract the whole array element - projection[fieldName] = { $arrayElemAt: [`$${arrayField}`, index] }; - } - - // Remove the original dot notation field if it was added - const dotPath = `${arrayField}.${index}${subField ? '.' + subField : ''}`; - if (projection[dotPath]) { - delete projection[dotPath]; - } + const { outputField, fieldName } = info; + + this.buildArrayAccessProjection(projection, fieldName, outputField); } } @@ -1592,7 +1580,7 @@ export class SqlCompilerImpl implements SqlCompiler { return processed; } - + /** * Normalizes a field path by handling various array notation formats * and returns information for further processing @@ -1605,44 +1593,46 @@ export class SqlCompilerImpl implements SqlCompiler { } { // First, process SQL-style array syntax (items__ARRAY_0__name) to MongoDB dot notation const processedPath = this.processFieldName(fieldPath); - + // Convert underscore-number patterns to standard dot notation (addresses_0 -> addresses.0) const underscoreArrayPattern = /(\w+)_(\d+)/g; const normalizedPath = processedPath.replace(underscoreArrayPattern, '$1.$2'); - + if (normalizedPath !== processedPath) { - log(`$Converted underscore array path to dot notation: ${processedPath} -> ${normalizedPath}`); + log( + `$Converted underscore array path to dot notation: ${processedPath} -> ${normalizedPath}` + ); } - + // Identify array indices const parts = normalizedPath.split('.'); const arrayIndices: number[] = []; - + parts.forEach((part, index) => { if (/^\d+$/.test(part)) { arrayIndices.push(index); } }); - + // Create a standardized output field name // For array access and nested fields, we use underscores in the output field name - const outputFieldName = normalizedPath.includes('.') + const outputFieldName = normalizedPath.includes('.') ? normalizedPath.replace(/\./g, '_') : normalizedPath; - + return { normalizedPath, hasArrayAccess: arrayIndices.length > 0, outputFieldName, - arrayIndices + arrayIndices, }; } - + /** * Get array access information from a normalized path * @deprecated Use normalizeFieldPath instead */ - private getArrayAccessInfo(fieldPath: string): { + private getArrayAccessInfo(fieldPath: string): { hasArrayAccess: boolean; arrayIndices: number[]; parts: string[]; @@ -1650,178 +1640,104 @@ export class SqlCompilerImpl implements SqlCompiler { // For backward compatibility, we maintain this method but implement using normalizeFieldPath const { hasArrayAccess, arrayIndices } = this.normalizeFieldPath(fieldPath); const parts = fieldPath.split('.'); - + return { hasArrayAccess, arrayIndices, - parts + parts, }; } - + /** * Builds a MongoDB projection expression for array access * Handles simple and complex nested array access patterns */ private buildArrayAccessProjection( - projection: Record, - fieldPath: string, + projection: Record, + fieldPath: string, outputFieldName: string ): void { - // Check if the path has an underscore pattern like "addresses_0" that needs to be handled - // as array access instead of using the dot notation - const underscoreArrayPattern = /(\w+)_(\d+)/g; - let arrayPath = fieldPath; - - // If the path contains underscores followed by numbers, we need to - // handle it using $arrayElemAt for proper MongoDB array access - // Example: addresses_0.details.street => addresses.0.details.street - if (arrayPath.match(underscoreArrayPattern)) { - log(`$Handling underscore-based array notation: ${arrayPath}`); - - // First, normalize the path by replacing all underscore-number combinations with dot notation - arrayPath = arrayPath.replace(underscoreArrayPattern, '$1.$2'); - log(`$Normalized underscore array path: ${fieldPath} -> ${arrayPath}`); - - // Now process the normalized path - const parts = arrayPath.split('.'); - let currentExpr: any = null; - - // Start with the base object - let currentObj = `$${parts[0]}`; - let startIndex = 1; - - // Build a chain of $arrayElemAt and $getField operations - for (let i = startIndex; i < parts.length; i++) { - const part = parts[i]; - - if (/^\d+$/.test(part)) { - // This is a numeric index, use $arrayElemAt - const indexValue = parseInt(part); - - if (currentExpr === null) { - // This is the first operation in the chain - currentExpr = { - $arrayElemAt: [currentObj, indexValue] - }; - } else { - // Nest this operation in the previous one - currentExpr = { - $arrayElemAt: [currentExpr, indexValue] - }; - } - - log(`$Added array access at index ${indexValue}`); - } else { - // This is a field name, use $getField - if (currentExpr === null) { - // If we haven't created any expression yet, use direct path - if (i === 1) { - // We're at the first operation and it's a field - currentObj = `$${parts[0]}.${part}`; - } else { - // We need to build a getField expression - currentExpr = { - $getField: { - field: part, - input: currentObj - } - }; - } - } else { - // Nest this field access in the previous operation - currentExpr = { - $getField: { - field: part, - input: currentExpr - } - }; - } - - log(`$Added field access for ${part}`); - } - } - - // Set the final expression in the projection - if (currentExpr !== null) { - projection[outputFieldName] = currentExpr; - } else { - // If we didn't build an expression, use the path directly - projection[outputFieldName] = currentObj; - } - - log(`$Final array access expression for ${outputFieldName}: ${JSON.stringify(projection[outputFieldName], null, 2)}`); - return; - } - // Continue with standard dot-notation array processing const parts = fieldPath.split('.'); const arrayInfo = this.getArrayAccessInfo(fieldPath); - + if (!arrayInfo.hasArrayAccess) { // Not an array access field, use standard projection projection[outputFieldName] = `$${fieldPath}`; log(`$Added standard field to projection: ${outputFieldName} = $${fieldPath}`); return; } - + // Get the first array index position const firstArrayIndex = arrayInfo.arrayIndices[0]; const indexValue = parseInt(parts[firstArrayIndex]); - + // Simple case: array at the first level with potential nested fields if (firstArrayIndex === 0) { + log(`firstArrayIndex === 0: ${outputFieldName}`); // Array is the root, like: 0.field.subfield const arrayField = parts[0]; const subPath = parts.slice(1).join('.'); - + this.handleSimpleArrayAccess(projection, arrayField, indexValue, subPath, outputFieldName); } else if (firstArrayIndex === 1) { + log(`firstArrayIndex === 1: ${outputFieldName}`); // Array is the second level, like: field.0.subfield const arrayField = parts[0]; const subPath = parts.slice(2).join('.'); - + this.handleSimpleArrayAccess(projection, arrayField, indexValue, subPath, outputFieldName); } else if (arrayInfo.arrayIndices.length === 1) { + log(`ONLY ONE ARRAY INDEX: ${outputFieldName}`); // Only one array index, but it's deeper in the path const prefix = parts.slice(0, firstArrayIndex).join('.'); const indexValue = parseInt(parts[firstArrayIndex]); const suffix = parts.slice(firstArrayIndex + 1).join('.'); - + // Build a nested expression with $arrayElemAt if (suffix) { projection[outputFieldName] = { $getField: { field: suffix, input: { - $arrayElemAt: [{ - $getField: { - field: parts[firstArrayIndex - 1], - input: `$${prefix.substring(0, prefix.lastIndexOf('.'))}` - } - }, indexValue] - } - } + $arrayElemAt: [ + { + $getField: { + field: parts[firstArrayIndex - 1], + input: `$${prefix.substring(0, prefix.lastIndexOf('.'))}`, + }, + }, + indexValue, + ], + }, + }, }; } else { + log(`ELSE: ${outputFieldName}`); projection[outputFieldName] = { - $arrayElemAt: [{ - $getField: { - field: parts[firstArrayIndex - 1], - input: `$${prefix.substring(0, prefix.lastIndexOf('.'))}` - } - }, indexValue] + $arrayElemAt: [ + { + $getField: { + field: parts[firstArrayIndex - 1], + input: `$${prefix.substring(0, prefix.lastIndexOf('.'))}`, + }, + }, + indexValue, + ], }; } - + log(`$Added complex nested array access to projection: ${outputFieldName}`); } else { // Multiple array indices - very complex case // For this case, we'll fall back to the simple dot notation which works in some cases projection[outputFieldName] = `$${fieldPath}`; - log(`$Using fallback dot notation for complex array access: ${outputFieldName} = $${fieldPath}`); + log( + `$Using fallback dot notation for complex array access: ${outputFieldName} = $${fieldPath}` + ); } } - + /** * Handles array access patterns at any level of nesting * This supports patterns like: @@ -1836,67 +1752,69 @@ export class SqlCompilerImpl implements SqlCompiler { subPath: string, outputFieldName: string ): void { - log(`$Processing array access: ${arrayField}[${indexValue}]${subPath ? '.' + subPath : ''} as ${outputFieldName}`); - + log( + `$Processing array access: ${arrayField}[${indexValue}]${subPath ? '.' + subPath : ''} as ${outputFieldName}` + ); + // Special handling for common complex patterns // This pattern matches addresses[0].details.street and similar patterns if (subPath && subPath.includes('.')) { const pathParts = subPath.split('.'); - + // Special handling for nested object patterns like addresses[0].details.street if (pathParts.length >= 2) { let currentExpr: any = { - $arrayElemAt: [`$${arrayField}`, indexValue] + $arrayElemAt: [`$${arrayField}`, indexValue], }; - + // Process each part of the path to build a nested expression for (let i = 0; i < pathParts.length; i++) { const part = pathParts[i]; - + if (/^\d+$/.test(part)) { // This is an array index - another level of array access currentExpr = { - $arrayElemAt: [currentExpr, parseInt(part)] + $arrayElemAt: [currentExpr, parseInt(part)], }; } else { // This is a field access currentExpr = { $getField: { field: part, - input: currentExpr - } + input: currentExpr, + }, }; } } - + // Set the fully built nested expression projection[outputFieldName] = currentExpr; log(`$Added optimized multi-level nested expression for ${outputFieldName}`); return; } } - + if (subPath) { - // Check if the subPath contains nested fields + // Check if the subPath contains nested fields if (subPath.includes('.')) { // Handle complex nested path inside array element // e.g., actors.0.details.name needs nested $getField expressions const subParts = subPath.split('.'); - + // Start with the array element access let expr: any = { - $arrayElemAt: [`$${arrayField}`, indexValue] + $arrayElemAt: [`$${arrayField}`, indexValue], }; - + // Build nested $getField expressions for each part for (const part of subParts) { if (part === '') continue; // Skip empty parts - + // Check if this part is a numeric index (another array access) if (/^\d+$/.test(part)) { // This is an array index within the nested path expr = { - $arrayElemAt: [expr, parseInt(part)] + $arrayElemAt: [expr, parseInt(part)], }; log(`$Adding nested array access at index ${part}`); } else { @@ -1904,13 +1822,13 @@ export class SqlCompilerImpl implements SqlCompiler { expr = { $getField: { field: part, - input: expr - } + input: expr, + }, }; log(`$Adding nested field access for ${part}`); } } - + // Store the complex expression in the projection projection[outputFieldName] = expr; log(`$Added complex nested field array access to projection: ${outputFieldName}`); @@ -1920,22 +1838,25 @@ export class SqlCompilerImpl implements SqlCompiler { $getField: { field: subPath, input: { - $arrayElemAt: [`$${arrayField}`, indexValue] - } - } + $arrayElemAt: [`$${arrayField}`, indexValue], + }, + }, }; log(`$Added array access with nested field to projection: ${outputFieldName}`); } } else { // Just need the array element itself: items.0 projection[outputFieldName] = { - $arrayElemAt: [`$${arrayField}`, indexValue] + $arrayElemAt: [`$${arrayField}`, indexValue], }; log(`$Added simple array access to projection: ${outputFieldName}`); } - + // Debug log the final expression for this field - log(`$Final projection expression for ${outputFieldName}:`, JSON.stringify(projection[outputFieldName], null, 2)); + log( + `$Final projection expression for ${outputFieldName}:`, + JSON.stringify(projection[outputFieldName], null, 2) + ); } /** * Check if a name is an actual table reference in the FROM clause @@ -2597,7 +2518,7 @@ export class SqlCompilerImpl implements SqlCompiler { private extractOutputField(field: string, as?: string): string { // Process the field to handle array access notation first (converts __ARRAY_0__ to .0.) const processedField = this.processFieldName(field); - + // Output field name (possibly aliased) // If there's an alias, use it // Otherwise, if the field has a table prefix or dots, use just the final part (excluding array indices) @@ -2609,7 +2530,7 @@ export class SqlCompilerImpl implements SqlCompiler { // For dot notation fields, use the last part (excluding array indices) const parts = processedField.split('.'); // Get the last non-numeric part (skipping array indices) - const lastNonNumericPart = parts.filter(part => isNaN(Number(part))).pop(); + const lastNonNumericPart = parts.filter((part) => isNaN(Number(part))).pop(); outputField = lastNonNumericPart || field; } else { // Simple field without dots @@ -2617,5 +2538,5 @@ export class SqlCompilerImpl implements SqlCompiler { } return outputField; - } + } } diff --git a/packages/lib/src/parser.ts b/packages/lib/src/parser.ts index 8d2a5f3..37a560d 100644 --- a/packages/lib/src/parser.ts +++ b/packages/lib/src/parser.ts @@ -79,7 +79,7 @@ export class SqlParserImpl implements SqlParser { database: 'PostgreSQL', }); - log('Preprocessed AST: ', JSON.stringify(ast, null, 2)) + log('Preprocessed AST: ', JSON.stringify(ast, null, 2)); // Process the AST to properly handle nested fields const processedAst = this.postProcessAst(ast); @@ -251,7 +251,7 @@ export class SqlParserImpl implements SqlParser { // It's likely a nested field, not a table reference column.expr.column = `${column.expr.table}.${column.expr.column}`; column.expr.table = null; - log(`Setting table to null for likely nested field: ${column.expr.column}`) + log(`Setting table to null for likely nested field: ${column.expr.column}`); } } }); From 4823d31a500668fcc12661185edd698afa8d3a62 Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Fri, 2 May 2025 15:31:19 -0600 Subject: [PATCH 4/5] fix unit tests --- packages/lib/tests/unit/basic.test.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/lib/tests/unit/basic.test.ts b/packages/lib/tests/unit/basic.test.ts index a26fce5..59eaa56 100644 --- a/packages/lib/tests/unit/basic.test.ts +++ b/packages/lib/tests/unit/basic.test.ts @@ -178,7 +178,10 @@ describe('QueryLeaf', () => { // Check if projection includes array element access if (commands[0].type === 'FIND' && commands[0].projection) { expect(commands[0].projection).toBeDefined(); - expect(commands[0].projection['items.0.id']).toBe(1); + expect(commands[0].projection['id']).toBeDefined(); + expect(commands[0].projection['id']['$getField']).toBeDefined(); + expect(commands[0].projection['id']['$getField']['field']).toBe('id'); + expect(commands[0].projection['id']['$getField']['input']).toBeDefined(); expect(commands[0].projection['items']).toBe(1); } }); @@ -390,4 +393,4 @@ describe('QueryLeaf', () => { expect(result[0]).toHaveProperty('age'); }); }); -}); \ No newline at end of file +}); From fa8353a12ea0422616a754fe1e10f9514216bbb8 Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Fri, 2 May 2025 15:41:22 -0600 Subject: [PATCH 5/5] clean up tests --- .../integration/alias.integration.test.ts | 36 ++---------- .../array-access.integration.test.ts | 55 ++----------------- 2 files changed, 8 insertions(+), 83 deletions(-) diff --git a/packages/lib/tests/integration/alias.integration.test.ts b/packages/lib/tests/integration/alias.integration.test.ts index c867d6c..2b1ee6c 100644 --- a/packages/lib/tests/integration/alias.integration.test.ts +++ b/packages/lib/tests/integration/alias.integration.test.ts @@ -1,9 +1,8 @@ import { ObjectId } from 'mongodb'; -import { testSetup, createLogger, ensureArray, ensureDocument } from './test-setup'; - -const log = createLogger('alias'); +import { testSetup, ensureArray, ensureDocument } from './test-setup'; describe('SQL Aliases Integration Tests', () => { + let db; beforeAll(async () => { await testSetup.init(); }, 30000); // 30 second timeout for container startup @@ -23,7 +22,7 @@ describe('SQL Aliases Integration Tests', () => { beforeEach(async () => { // Clean up collections before each test - const db = testSetup.getDb(); + db = testSetup.getDb(); await db.collection('customers').deleteMany({}); await db.collection('products').deleteMany({}); await db.collection('orders').deleteMany({}); @@ -31,7 +30,6 @@ describe('SQL Aliases Integration Tests', () => { afterEach(async () => { // Clean up collections after each test - const db = testSetup.getDb(); await db.collection('customers').deleteMany({}); await db.collection('products').deleteMany({}); await db.collection('orders').deleteMany({}); @@ -40,7 +38,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for SELECT with table alias test('should return correct fields when using table alias in SELECT', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('customers').insertMany([ { name: 'John Doe', email: 'john@example.com', active: true }, { name: 'Jane Smith', email: 'jane@example.com', active: false } @@ -51,7 +48,6 @@ describe('SQL Aliases Integration Tests', () => { const sql = "SELECT c.name, c.active FROM customers c"; const results = ensureArray(await queryLeaf.execute(sql)); - log('Table alias SELECT results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(2); @@ -64,7 +60,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for just selecting a single field with table alias test('should return single field when using table alias in SELECT', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('customers').insertMany([ { name: 'John Doe', email: 'john@example.com', active: true }, { name: 'Jane Smith', email: 'jane@example.com', active: false } @@ -75,7 +70,6 @@ describe('SQL Aliases Integration Tests', () => { const sql = "SELECT c.active FROM customers c"; const results = ensureArray(await queryLeaf.execute(sql)); - log('Single field alias SELECT results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(2); @@ -88,7 +82,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for table alias in WHERE clause test('should filter correctly when using table alias in WHERE clause', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('customers').insertMany([ { name: 'John Doe', email: 'john@example.com', active: true }, { name: 'Jane Smith', email: 'jane@example.com', active: false } @@ -99,7 +92,6 @@ describe('SQL Aliases Integration Tests', () => { const sql = "SELECT c.name FROM customers c WHERE c.active = true"; const results = ensureArray(await queryLeaf.execute(sql)); - log('WHERE clause with alias results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(1); @@ -109,7 +101,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for combining table alias with column alias test('should support combined table alias with column alias', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('customers').insertMany([ { name: 'John Doe', email: 'john@example.com', active: true }, { name: 'Jane Smith', email: 'jane@example.com', active: false } @@ -120,7 +111,6 @@ describe('SQL Aliases Integration Tests', () => { const sql = "SELECT c.name AS customer_name, c.active AS is_active FROM customers c"; const results = ensureArray(await queryLeaf.execute(sql)); - log('Combined table and column alias results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(2); @@ -155,7 +145,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for using table alias in UPDATE statement test('should update correctly when using table alias in UPDATE', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('customers').insertMany([ { name: 'John Doe', email: 'john@example.com', active: true }, { name: 'Jane Smith', email: 'jane@example.com', active: false } @@ -170,7 +159,6 @@ describe('SQL Aliases Integration Tests', () => { // Verify with a SELECT const selectSql = "SELECT name, active FROM customers"; const results = ensureArray(await queryLeaf.execute(selectSql)); - log('UPDATE with alias results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(2); @@ -182,7 +170,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for using table alias in DELETE statement test('should delete correctly when using table alias in DELETE', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('customers').insertMany([ { name: 'John Doe', email: 'john@example.com', active: true }, { name: 'Jane Smith', email: 'jane@example.com', active: false } @@ -197,7 +184,6 @@ describe('SQL Aliases Integration Tests', () => { // Verify with a SELECT const selectSql = "SELECT name, active FROM customers"; const results = ensureArray(await queryLeaf.execute(selectSql)); - log('DELETE with alias results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(1); @@ -208,7 +194,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for multiple table aliases in a query with JOIN test('should handle multiple table aliases in a JOIN query', async () => { // Arrange - const db = testSetup.getDb(); const johnId = new ObjectId(); const janeId = new ObjectId(); @@ -231,15 +216,8 @@ describe('SQL Aliases Integration Tests', () => { JOIN orders o ON c._id = o.customerId `; - // Add detailed logging before executing the query - console.log('EXECUTING JOIN QUERY:', sql); - const results = ensureArray(await queryLeaf.execute(sql)); - // Print the detailed results for debugging - console.log('JOIN RESULTS LENGTH:', results.length); - console.log('JOIN RESULTS STRUCTURE:', JSON.stringify(results, null, 2)); - // Assert expect(results.length).toBeGreaterThan(0); @@ -265,7 +243,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for alias in ORDER BY clause test('should sort correctly when using alias in ORDER BY', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('products').insertMany([ { name: 'Laptop', category: 'Electronics', price: 1200 }, { name: 'Mouse', category: 'Electronics', price: 25 }, @@ -277,7 +254,6 @@ describe('SQL Aliases Integration Tests', () => { const sql = "SELECT p.name, p.price FROM products p ORDER BY p.price DESC"; const results = ensureArray(await queryLeaf.execute(sql)); - log('ORDER BY with alias results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(3); @@ -290,7 +266,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for using alias in GROUP BY clause test('should group correctly when using alias in GROUP BY', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('products').insertMany([ { name: 'Laptop', category: 'Electronics', price: 1200 }, { name: 'Mouse', category: 'Electronics', price: 25 }, @@ -304,7 +279,6 @@ describe('SQL Aliases Integration Tests', () => { const sql = "SELECT p.category, COUNT(*) as count FROM products p GROUP BY p.category"; const results = ensureArray(await queryLeaf.execute(sql)); - log('GROUP BY with alias results:', JSON.stringify(results, null, 2)); // Assert expect(results.length).toBe(2); @@ -345,7 +319,6 @@ describe('SQL Aliases Integration Tests', () => { // Test case for alias with functions test('should handle alias with functions in SELECT', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('products').insertMany([ { name: 'Laptop', price: 1200 }, { name: 'Mouse', price: 25 }, @@ -357,7 +330,6 @@ describe('SQL Aliases Integration Tests', () => { const sql = "SELECT p.name, UPPER(p.name) as upper_name FROM products p"; const results = ensureArray(await queryLeaf.execute(sql)); - log('Function with alias results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(3); @@ -379,4 +351,4 @@ describe('SQL Aliases Integration Tests', () => { } } }); -}); \ No newline at end of file +}); diff --git a/packages/lib/tests/integration/array-access.integration.test.ts b/packages/lib/tests/integration/array-access.integration.test.ts index a6f9ae3..b4792b2 100644 --- a/packages/lib/tests/integration/array-access.integration.test.ts +++ b/packages/lib/tests/integration/array-access.integration.test.ts @@ -1,9 +1,8 @@ import { ObjectId } from 'mongodb'; -import { testSetup, createLogger, ensureArray } from './test-setup'; - -const log = createLogger('array-access'); +import { testSetup, ensureArray } from './test-setup'; describe('Array Access Integration Tests', () => { + let db; beforeAll(async () => { await testSetup.init(); }, 30000); // 30 second timeout for container startup @@ -23,7 +22,7 @@ describe('Array Access Integration Tests', () => { beforeEach(async () => { // Clean up collections before each test - const db = testSetup.getDb(); + db = testSetup.getDb(); await db.collection('order_items').deleteMany({}); await db.collection('movies').deleteMany({}); await db.collection('users').deleteMany({}); @@ -32,7 +31,6 @@ describe('Array Access Integration Tests', () => { afterEach(async () => { // Clean up collections after each test - const db = testSetup.getDb(); await db.collection('order_items').deleteMany({}); await db.collection('movies').deleteMany({}); await db.collection('users').deleteMany({}); @@ -41,7 +39,6 @@ describe('Array Access Integration Tests', () => { test('should handle array access syntax for nested field access in queries', async () => { // Arrange: Insert test data with arrays - keep it very simple - const db = testSetup.getDb(); await db.collection('order_items').insertOne({ orderId: 'ORD-1001', items: [ @@ -61,7 +58,6 @@ describe('Array Access Integration Tests', () => { `; const results = ensureArray(await queryLeaf.execute(sql)); - log('Array access filter results:', JSON.stringify(results, null, 2)); // Assert: Verify that filtering by array element works // Since the filtering might be handled differently by different implementations, @@ -73,7 +69,6 @@ describe('Array Access Integration Tests', () => { test('should filter by array element properties at different indices', async () => { // Arrange: Insert test data with arrays - const db = testSetup.getDb(); await db.collection('order_items').insertMany([ { orderId: 'ORD-1001', @@ -109,7 +104,6 @@ describe('Array Access Integration Tests', () => { `; const results = ensureArray(await queryLeaf.execute(sql)); - log('Array indices filtering results:', JSON.stringify(results, null, 2)); // Assert: Verify only the order with Widget as first item and inStock=true for second item // Since the filtering might be handled differently, we'll check if ORD-1003 is in the results @@ -119,7 +113,6 @@ describe('Array Access Integration Tests', () => { test('should query arrays with multiple indices', async () => { // Arrange: Insert test data with larger arrays - const db = testSetup.getDb(); await db.collection('order_items').insertMany([ { orderId: 'ORD-2001', @@ -141,7 +134,6 @@ describe('Array Access Integration Tests', () => { // First verify with a direct MongoDB query to confirm the data structure const directQueryResult = await db.collection('order_items').findOne({ orderId: 'ORD-2001' }); - log('Direct MongoDB query result:', JSON.stringify(directQueryResult, null, 2)); // Execute the query through QueryLeaf const queryLeaf = testSetup.getQueryLeaf(); @@ -152,7 +144,6 @@ describe('Array Access Integration Tests', () => { `; const results = ensureArray(await queryLeaf.execute(sql)); - log('Order items query results:', JSON.stringify(results, null, 2)); // Basic validation expect(results.length).toBe(1); @@ -203,7 +194,6 @@ describe('Array Access Integration Tests', () => { `; const indexResults = ensureArray(await queryLeaf.execute(indexAccessSql)); - log('Array index access results:', JSON.stringify(indexResults, null, 2)); // Verify we can find orders by array index properties expect(indexResults.length).toBeGreaterThan(0); @@ -218,7 +208,6 @@ describe('Array Access Integration Tests', () => { test('should support bracket notation for array access in SELECT', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('movies').insertMany([ { title: 'The Matrix', @@ -235,7 +224,7 @@ describe('Array Access Integration Tests', () => { actors: [ { name: 'Leonardo DiCaprio', role: 'Cobb' }, { name: 'Joseph Gordon-Levitt', role: 'Arthur' }, - { name: 'Ellen Page', role: 'Ariadne' } + { name: 'Elliot Page', role: 'Ariadne' } ] } ]); @@ -244,21 +233,12 @@ describe('Array Access Integration Tests', () => { const queryLeaf = testSetup.getQueryLeaf(); // Explicit test of the bracket notation feature we want to implement const sql = "SELECT title, actors[0].name AS lead_actor FROM movies"; - log('SQL being executed:', sql); // Debug: First check with direct MongoDB query to ensure test data is properly inserted const directMovies = await db.collection('movies').find().toArray(); - log('Direct MongoDB query results for movies:', JSON.stringify(directMovies, null, 2)); const results = ensureArray(await queryLeaf.execute(sql)); - log('Bracket notation array access results:', JSON.stringify(results, null, 2)); - // Extra debug info to help diagnose the issue - log('===== TEST DEBUG INFO ====='); - log('SQL Query:', sql); - log('Direct MongoDB data:', JSON.stringify(directMovies, null, 2)); - log('QueryLeaf result keys:', Object.keys(results[0] || {})); - log('The Matrix result:', JSON.stringify(results.find(m => m.title === 'The Matrix'), null, 2)); // Assert expect(results).toHaveLength(2); @@ -268,7 +248,6 @@ describe('Array Access Integration Tests', () => { test('should support bracket notation for array access in WHERE clause', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('movies').insertMany([ { title: 'The Matrix', @@ -287,7 +266,6 @@ describe('Array Access Integration Tests', () => { const sql = "SELECT title, year FROM movies WHERE ratings[0] > 9.0"; const results = ensureArray(await queryLeaf.execute(sql)); - log('Bracket notation in WHERE results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(1); @@ -296,7 +274,6 @@ describe('Array Access Integration Tests', () => { test('should support multiple levels of array and object nesting with bracket notation', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('users').insertMany([ { name: 'Alice', @@ -337,10 +314,8 @@ describe('Array Access Integration Tests', () => { // First, do a direct MongoDB query to see the exact structure const directUserResults = await db.collection('users').find().toArray(); - log('Direct MongoDB query result for users:', JSON.stringify(directUserResults, null, 2)); const results = ensureArray(await queryLeaf.execute(sql)); - log('Complex nested bracket notation results:', JSON.stringify(results, null, 2)); // Assert expect(results).toHaveLength(2); @@ -358,7 +333,6 @@ describe('Array Access Integration Tests', () => { test('should support bracket notation in UPDATE statements', async () => { // Arrange - const db = testSetup.getDb(); await db.collection('movies').insertOne({ title: 'The Matrix', year: 1999, @@ -377,10 +351,7 @@ describe('Array Access Integration Tests', () => { // Verify with a SELECT using bracket notation const selectSql = "SELECT title, actors[0].name, actors[0].role FROM movies WHERE title = 'The Matrix'"; - log('SQL for verification after UPDATE:', selectSql); const results = ensureArray(await queryLeaf.execute(selectSql)); - log('Bracket notation in UPDATE results:', JSON.stringify(results, null, 2)); - log('Result keys:', Object.keys(results[0] || {})); // Assert expect(results).toHaveLength(1); @@ -389,13 +360,11 @@ describe('Array Access Integration Tests', () => { // Double-check with a direct MongoDB query const dbResult = await db.collection('movies').findOne({ title: 'The Matrix' }); - log('Direct MongoDB query after UPDATE:', JSON.stringify(dbResult, null, 2)); expect(dbResult?.actors[0]?.role).toBe('The One'); }); test('should support bracket notation with JOIN operations', async () => { // Arrange - const db = testSetup.getDb(); // Insert movies with a director ID await db.collection('movies').insertMany([ @@ -440,14 +409,10 @@ describe('Array Access Integration Tests', () => { FROM movies m JOIN directors d ON m.directorId = d._id `; - log('JOIN SQL query:', sql); // Verify input data with direct MongoDB queries const moviesData = await db.collection('movies').find().toArray(); - log('Direct MongoDB query - movies:', JSON.stringify(moviesData, null, 2)); - const directorsData = await db.collection('directors').find().toArray(); - log('Direct MongoDB query - directors:', JSON.stringify(directorsData, null, 2)); // Let's first check what this specific MongoDB query would look like // without the SQL translation @@ -476,24 +441,12 @@ describe('Array Access Integration Tests', () => { // Run the manual MongoDB aggregation for comparison const manualResult = await movieCollection.aggregate(pipeline).toArray(); - log('Manual MongoDB aggregation result:', JSON.stringify(manualResult, null, 2)); const results = ensureArray(await queryLeaf.execute(sql)); - log('Bracket notation in JOIN results:', JSON.stringify(results, null, 2)); - log('JOIN result keys:', Object.keys(results[0] || {})); - - // For debugging - add more detailed output - log('Results detailed dump:'); - results.forEach((r, i) => { - log(`- Result ${i}:`, JSON.stringify(r, null, 2)); - log(` Keys: ${Object.keys(r).join(', ')}`); - }); // Try different ways to find The Matrix const matrixByTitle = results.find(r => r.title === 'The Matrix'); const matrixById = results.find(r => r._id === 'director1'); - log('Matrix lookup by title:', matrixByTitle ? 'found' : 'not found'); - log('Matrix lookup by director id:', matrixById ? 'found' : 'not found'); // Assert expect(results).toHaveLength(2);