From 7616157b149ab9b834b327caa658ef7d38af0af4 Mon Sep 17 00:00:00 2001 From: Day Matchullis Date: Fri, 2 May 2025 16:29:40 -0600 Subject: [PATCH] fix _id showing up in queries where it hasn't been selected --- packages/lib/src/compiler.ts | 64 +++++- .../projection.integration.test.ts | 188 ++++++++++++++++++ 2 files changed, 250 insertions(+), 2 deletions(-) create mode 100644 packages/lib/tests/integration/projection.integration.test.ts diff --git a/packages/lib/src/compiler.ts b/packages/lib/src/compiler.ts index 3968836..3662715 100644 --- a/packages/lib/src/compiler.ts +++ b/packages/lib/src/compiler.ts @@ -833,9 +833,24 @@ export class SqlCompilerImpl implements SqlCompiler { // This makes the output match what SQL would normally return const includeFields: Record = {}; const outputFields = Object.keys(addFieldsStage); + // For JOINs, we need to explicitly exclude _id unless it was specifically requested + // or we have a SELECT * query + const hasExplicitIdField = + ast.columns && + ast.columns.some((col: any) => { + if (typeof col === 'object' && col.expr) { + return ( + col.expr.column === '_id' || col.expr.alias === '_id' || col.as === '_id' + ); + } + return col === '_id'; + }); - // First, indicate that we want to keep everything - includeFields['_id'] = 1; + if (hasExplicitIdField) { + includeFields['_id'] = 1; + } else { + includeFields['_id'] = 0; + } for (const field of outputFields) { log(`FIELD: `, field); @@ -883,6 +898,23 @@ export class SqlCompilerImpl implements SqlCompiler { } // For non-JOIN queries with array access, we already added the projection stage earlier else if (Object.keys(projection).length > 0 && arrayAccessFields.length === 0) { + // Explicitly exclude _id field unless it was specifically requested + // OR if we have a GROUP BY (since the group key is stored in _id) + const isGroupBy = ast.groupby && ast.groupby.length > 0; + + // Check if _id is explicitly requested in the columns + const hasExplicitIdField = ast.columns.some((col: any) => { + if (typeof col === 'object' && col.expr) { + return col.expr.column === '_id' || col.expr.alias === '_id' || col.as === '_id'; + } + return col === '_id'; + }); + + // If _id is not explicitly requested AND this is not a GROUP BY, exclude it + if (!hasExplicitIdField && !isGroupBy) { + projection['_id'] = 0; + } + log('Standard projection stage:', JSON.stringify(projection, null, 2)); aggregateCommand.pipeline.push({ $project: projection }); } @@ -901,6 +933,14 @@ export class SqlCompilerImpl implements SqlCompiler { // Set up projection if (ast.columns) { const projection = this.convertColumns(ast.columns); + + // If this is a GROUP BY query, we need to keep _id in the projection + // (It's where MongoDB stores the group key) + const isGroupBy = ast.groupby && ast.groupby.length > 0; + if (isGroupBy && '_id' in projection && projection['_id'] === 0) { + delete projection['_id']; // Remove the exclusion + } + findCommand.projection = projection; } @@ -1390,6 +1430,17 @@ export class SqlCompilerImpl implements SqlCompiler { return {}; } + // Check if _id is explicitly requested in the columns + const hasExplicitIdField = columns.some((col) => { + if (typeof col === 'object' && col.expr) { + return col.expr.column === '_id' || col.expr.alias === '_id' || col.as === '_id'; + } + return col === '_id'; + }); + + // Explicitly exclude _id field from projection unless it's specified in the columns + // We'll set this at the end of the method to account for field discovery during processing + // First pass - process all fields const fieldsToProject: string[] = []; // Track array access fields for special handling @@ -1401,6 +1452,8 @@ export class SqlCompilerImpl implements SqlCompiler { } >(); + // _id will be handled at the end of this method + columns.forEach((column) => { if (typeof column === 'object') { if ('expr' in column && column.expr) { @@ -1539,6 +1592,13 @@ export class SqlCompilerImpl implements SqlCompiler { } } + // Now that we've processed all fields, decide if we should exclude _id + // If no fields are explicitly requested, exclude _id + // The calling code in compileSelect will handle GROUP BY special cases + if (!hasExplicitIdField) { + projection['_id'] = 0; + } + log('Final projection:', JSON.stringify(projection, null, 2)); return projection; diff --git a/packages/lib/tests/integration/projection.integration.test.ts b/packages/lib/tests/integration/projection.integration.test.ts new file mode 100644 index 0000000..1fe34da --- /dev/null +++ b/packages/lib/tests/integration/projection.integration.test.ts @@ -0,0 +1,188 @@ +import { ObjectId } from 'mongodb'; +import { testSetup, ensureArray } from './test-setup'; + +describe('Projection and Field Selection Tests', () => { + let db; + beforeAll(async () => { + await testSetup.init(); + }, 30000); // 30 second timeout for container startup + + afterAll(async () => { + // Make sure to close any outstanding connections + const queryLeaf = testSetup.getQueryLeaf(); + + // Clean up any resources that QueryLeaf might be using + if (typeof queryLeaf.close === 'function') { + await queryLeaf.close(); + } + + // Clean up test setup resources + await testSetup.cleanup(); + }, 10000); + + beforeEach(async () => { + // Clean up and setup test data before each test + db = testSetup.getDb(); + await db.collection('projection_test').deleteMany({}); + + // Insert test data + await db.collection('projection_test').insertMany([ + { + _id: new ObjectId(), + name: 'Product A', + price: 100, + category: 'Electronics', + inStock: true + }, + { + _id: new ObjectId(), + name: 'Product B', + price: 200, + category: 'Furniture', + inStock: false + } + ]); + }); + + afterEach(async () => { + // Clean up test data after each test + await db.collection('projection_test').deleteMany({}); + }); + + test('should NOT include _id field when not explicitly selected', async () => { + // Act + const queryLeaf = testSetup.getQueryLeaf(); + const sql = 'SELECT name, price FROM projection_test'; + + const results = ensureArray(await queryLeaf.execute(sql)); + + // Assert + expect(results).toHaveLength(2); + + // Check the fields in the first result + expect(Object.keys(results[0])).toContain('name'); + expect(Object.keys(results[0])).toContain('price'); + expect(Object.keys(results[0])).not.toContain('_id'); // _id should NOT be included + expect(Object.keys(results[0])).not.toContain('category'); + expect(Object.keys(results[0])).not.toContain('inStock'); + }); + + test('should include _id field when using SELECT *', async () => { + // Act + const queryLeaf = testSetup.getQueryLeaf(); + const sql = 'SELECT * FROM projection_test'; + + const results = ensureArray(await queryLeaf.execute(sql)); + + // Assert + expect(results).toHaveLength(2); + + // Check all fields are included in the first result + expect(Object.keys(results[0])).toContain('_id'); + expect(Object.keys(results[0])).toContain('name'); + expect(Object.keys(results[0])).toContain('price'); + expect(Object.keys(results[0])).toContain('category'); + expect(Object.keys(results[0])).toContain('inStock'); + }); + + test('should include _id field when explicitly requested', async () => { + // Act + const queryLeaf = testSetup.getQueryLeaf(); + const sql = 'SELECT _id, name FROM projection_test'; + + const results = ensureArray(await queryLeaf.execute(sql)); + + // Assert + expect(results).toHaveLength(2); + + // Check the fields in the first result + expect(Object.keys(results[0])).toContain('_id'); + expect(Object.keys(results[0])).toContain('name'); + expect(Object.keys(results[0])).not.toContain('price'); + expect(Object.keys(results[0])).not.toContain('category'); + expect(Object.keys(results[0])).not.toContain('inStock'); + }); + + test('should handle _id with table alias', async () => { + // Act + const queryLeaf = testSetup.getQueryLeaf(); + const sql = 'SELECT p._id, p.name FROM projection_test p'; + + const results = ensureArray(await queryLeaf.execute(sql)); + + // Assert + expect(results).toHaveLength(2); + + // Check the fields in the first result + expect(Object.keys(results[0])).toContain('_id'); + expect(Object.keys(results[0])).toContain('name'); + expect(Object.keys(results[0])).not.toContain('price'); + }); + + test('should include _id field in GROUP BY results', async () => { + // Act + const queryLeaf = testSetup.getQueryLeaf(); + const sql = 'SELECT category, COUNT(*) as count FROM projection_test GROUP BY category'; + + const results = ensureArray(await queryLeaf.execute(sql)); + + // Assert + expect(results).toHaveLength(2); + + // In GROUP BY, MongoDB puts the group key in _id + // Depending on implementation, it could be in _id or directly in category + const categoryValues = results.map(r => r._id || r.category); + expect(categoryValues).toContain('Electronics'); + expect(categoryValues).toContain('Furniture'); + + // Make sure each result has a count field + results.forEach(result => { + expect(result).toHaveProperty('count'); + }); + }); + + test('should handle JOIN with proper _id field handling', async () => { + // Arrange - Create related collections for JOIN test + await db.collection('join_test_orders').deleteMany({}); + await db.collection('join_test_customers').deleteMany({}); + + const customer1Id = new ObjectId(); + const customer2Id = new ObjectId(); + + await db.collection('join_test_customers').insertMany([ + { _id: customer1Id, name: 'Alice', email: 'alice@example.com' }, + { _id: customer2Id, name: 'Bob', email: 'bob@example.com' } + ]); + + await db.collection('join_test_orders').insertMany([ + { orderId: 'ORD-001', customerId: customer1Id, amount: 100 }, + { orderId: 'ORD-002', customerId: customer1Id, amount: 200 }, + { orderId: 'ORD-003', customerId: customer2Id, amount: 150 } + ]); + + // Act - Test JOIN with specific field selection (no _id) + const queryLeaf = testSetup.getQueryLeaf(); + const sql = ` + SELECT c.name, o.orderId, o.amount + FROM join_test_customers c + JOIN join_test_orders o ON c._id = o.customerId + `; + + const results = ensureArray(await queryLeaf.execute(sql)); + + // Assert + expect(results.length).toBeGreaterThan(0); + + // Check fields in the result - _id should not be included + expect(Object.keys(results[0])).toContain('name'); + expect(Object.keys(results[0])).toContain('orderId'); + expect(Object.keys(results[0])).toContain('amount'); + expect(Object.keys(results[0])).not.toContain('_id'); + expect(Object.keys(results[0])).not.toContain('customerId'); + expect(Object.keys(results[0])).not.toContain('email'); + + // Clean up JOIN test collections + await db.collection('join_test_orders').deleteMany({}); + await db.collection('join_test_customers').deleteMany({}); + }); +}); \ No newline at end of file