-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathparser.ts
More file actions
408 lines (357 loc) · 13.6 KB
/
parser.ts
File metadata and controls
408 lines (357 loc) · 13.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
import { From, Parser as NodeSqlParser } from 'node-sql-parser';
import { SqlParser, SqlStatement } from './interfaces';
import debug from 'debug';
const log = debug('queryleaf:parser');
// Custom PostgreSQL mode with extensions to support our syntax needs
const CUSTOM_DIALECT = {
name: 'QueryLeafPostgreSQL',
reserved: [
'SELECT',
'FROM',
'WHERE',
'INSERT',
'UPDATE',
'DELETE',
'CREATE',
'DROP',
'TABLE',
'DATABASE',
'VIEW',
'INDEX',
'TRIGGER',
'PROCEDURE',
'FUNCTION',
],
literalTokens: {
// Add handling for array indexing syntax
'[': { tokenType: 'BRACKET_OPEN', regex: /\[/ },
']': { tokenType: 'BRACKET_CLOSE', regex: /\]/ },
'.': { tokenType: 'DOT', regex: /\./ },
},
operators: [
// Standard operators
'+',
'-',
'*',
'/',
'%',
'=',
'!=',
'<>',
'>',
'<',
'>=',
'<=',
// Add nested field operators
'.',
],
};
/**
* SQL Parser implementation using node-sql-parser
*/
export class SqlParserImpl implements SqlParser {
private parser: NodeSqlParser;
constructor(options?: { database?: string }) {
// Create standard parser with PostgreSQL mode
this.parser = new NodeSqlParser();
}
/**
* Parse SQL string into a SqlStatement
* @param sql SQL string to parse
* @returns Parsed SQL statement object
*/
parse(sql: string): SqlStatement {
try {
// Transform explicit ObjectId cast syntax before any other preprocessing
const preprocessedObjectIdSql = this.preprocessObjectIdCasts(sql);
// First, handle nested dot notation in field access
const preprocessedNestedSql = this.preprocessNestedFields(preprocessedObjectIdSql);
// Then transform array index notation to a form the parser can handle
const preprocessedSql = this.preprocessArrayIndexes(preprocessedNestedSql);
log('Preprocessed SQL:', preprocessedSql);
// Parse with PostgreSQL mode but try to handle our custom extensions
const ast = this.parser.astify(preprocessedSql, {
database: 'PostgreSQL',
});
log('Preprocessed AST: ', JSON.stringify(ast, null, 2));
// Process the AST to properly handle nested fields
const processedAst = this.postProcessAst(ast);
return {
ast: Array.isArray(processedAst) ? processedAst[0] : processedAst,
text: sql, // Use original SQL for reference
metadata: {
nestedFieldReplacements: this._nestedFieldReplacements,
},
};
} catch (error) {
// If error happens and it's related to our extensions, try to handle it
const errorMessage = error instanceof Error ? error.message : String(error);
if (errorMessage.includes('[')) {
// Make a more aggressive transformation of the SQL for bracket syntax
const fallbackSql = this.aggressivePreprocessing(this.preprocessObjectIdCasts(sql));
log('Fallback SQL for array syntax:', fallbackSql);
try {
const ast = this.parser.astify(fallbackSql, { database: 'PostgreSQL' });
const processedAst = this.postProcessAst(ast);
return {
ast: Array.isArray(processedAst) ? processedAst[0] : processedAst,
text: sql,
metadata: {
nestedFieldReplacements: this._nestedFieldReplacements,
},
};
} catch (fallbackErr) {
const fallbackErrorMsg =
fallbackErr instanceof Error ? fallbackErr.message : String(fallbackErr);
throw new Error(`SQL parsing error (fallback): ${fallbackErrorMsg}`);
}
}
throw new Error(`SQL parsing error: ${errorMessage}`);
}
}
/**
* Preprocess explicit ObjectId cast syntax into a sentinel string literal
* that survives SQL parsing and is recognised later in convertValue().
*
* Supported forms (case-insensitive):
* CAST('507f...' AS OBJECTID) → '__QL_OBJECTID_507f...__'
* '507f...'::OBJECTID → '__QL_OBJECTID_507f...__'
*/
private preprocessObjectIdCasts(sql: string): string {
// CAST('value' AS OBJECTID)
let result = sql.replace(
/CAST\s*\(\s*'([^']*)'\s+AS\s+OBJECTID\s*\)/gi,
(_match, value) => `'__QL_OBJECTID_${value}__'`
);
// 'value'::OBJECTID
result = result.replace(
/'([^']*)'\s*::\s*OBJECTID/gi,
(_match, value) => `'__QL_OBJECTID_${value}__'`
);
return result;
}
/**
* Preprocess nested field access in SQL before parsing
*
* This helps ensure that the parser correctly handles nested fields like:
* contact.address.city => becomes a properly parsed reference
*
* For deep nested fields (with more than one dot), we need special handling
* since the SQL parser typically expects table.column format only
*/
private preprocessNestedFields(sql: string): string {
log('Processing nested fields in SQL:', sql);
// Keep track of replacements to restore them later
const replacements: [string, string][] = [];
// Check if this is an UPDATE statement
if (sql.trim().toUpperCase().startsWith('UPDATE')) {
// Handle multi-level nested fields in UPDATE statements' SET clause
// This regex looks for patterns like: SET contact.address.city = 'Boston', other.field = 'value'
const setNestedFieldRegex = /SET\s+(.*?)(?:\s+WHERE|$)/is;
const setMatch = setNestedFieldRegex.exec(sql);
if (setMatch && setMatch[1]) {
const setPart = setMatch[1];
// Split by commas to get individual assignments
const assignments = setPart.split(',');
let modifiedSetPart = setPart;
// Process each assignment
for (const assignment of assignments) {
// This regex extracts the field name before the equals sign
const fieldMatch = /^\s*([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+){2,})\s*=/i.exec(assignment);
if (fieldMatch && fieldMatch[1]) {
const nestedField = fieldMatch[1];
// Create a placeholder name
const placeholder = `__NESTED_${replacements.length}__`;
// Store the replacement
replacements.push([placeholder, nestedField]);
// Replace in the set part
modifiedSetPart = modifiedSetPart.replace(nestedField, placeholder);
}
}
// Replace the whole SET part
sql = sql.replace(setPart, modifiedSetPart);
}
}
// Process WHERE clause nested fields
// This regex matches multi-level nested fields in WHERE conditions
// It looks for patterns like: WHERE contact.address.city = 'Boston'
const whereNestedFieldRegex =
/WHERE\s+([a-zA-Z0-9_]+(\.[a-zA-Z0-9_]+){1,})\s*(=|!=|<>|>|<|>=|<=|LIKE|IN|NOT IN)/gi;
// Replace deep nested fields in WHERE clause with placeholders
let processedSql = sql.replace(whereNestedFieldRegex, (match, nestedField, _, operator) => {
// Create a placeholder name
const placeholder = `__NESTED_${replacements.length}__`;
// Store the replacement
replacements.push([placeholder, nestedField]);
// Replace with the placeholder
return `WHERE ${placeholder} ${operator}`;
});
// Add debug info about replacements
if (replacements.length > 0) {
log('Nested field replacements:', JSON.stringify(replacements, null, 2));
}
// Store the replacements in this instance for later use
this._nestedFieldReplacements = replacements;
return processedSql;
}
// Store replacements for later reference
private _nestedFieldReplacements: [string, string][] = [];
/**
* Post-process the AST to correctly handle nested fields
*
* This ensures that expressions like "contact.address.city" are correctly
* recognized as a single column reference rather than a table/column pair.
*/
private postProcessAst(ast: any): any {
// Clone the AST to avoid modifying the original
const processed = JSON.parse(JSON.stringify(ast));
// Handle SELECT clause nested fields
this.processSelectClause(processed);
// Handle WHERE clause nested fields
this.processWhereClause(processed);
log('Post-processed AST:', JSON.stringify(processed, null, 2));
return processed;
}
/**
* Process nested fields in the SELECT clause
*/
private processSelectClause(ast: any): void {
if (!ast || (!Array.isArray(ast) && typeof ast !== 'object')) return;
// Handle array of statements
if (Array.isArray(ast)) {
ast.forEach((item) => this.processSelectClause(item));
return;
}
// Only process SELECT statements
if (ast.type !== 'select' || !ast.columns) return;
// Process each column in the SELECT list
ast.columns.forEach((column: any) => {
if (column.expr && column.expr.type === 'column_ref') {
// If the column has table.field notation, check if it should be a nested field
if (
column.expr.table &&
column.expr.column &&
!this.isActualTableReference(column.expr.table, ast)
) {
// It's likely a nested field, not a table reference
column.expr.column = `${column.expr.table}.${column.expr.column}`;
column.expr.table = null;
log(`Setting table to null for likely nested field: ${column.expr.column}`);
}
}
});
}
/**
* Process nested fields in the WHERE clause
*/
private processWhereClause(ast: any): void {
if (!ast || (!Array.isArray(ast) && typeof ast !== 'object')) return;
// Handle array of statements
if (Array.isArray(ast)) {
ast.forEach((item) => this.processWhereClause(item));
return;
}
// No WHERE clause to process
if (!ast.where) return;
// Process the WHERE clause recursively
this.processWhereExpr(ast.where, ast);
}
/**
* Process WHERE expression recursively to handle nested fields
*/
private processWhereExpr(expr: any, ast: any): void {
if (!expr || typeof expr !== 'object') return;
if (expr.type === 'binary_expr') {
// Process both sides of binary expressions
this.processWhereExpr(expr.left, ast);
this.processWhereExpr(expr.right, ast);
// Check for column references in the left side of the expression
if (expr.left && expr.left.type === 'column_ref') {
// First, check if this is a placeholder that needs to be restored
if (
expr.left.column &&
expr.left.column.startsWith('__NESTED_') &&
expr.left.column.endsWith('__')
) {
// Find the corresponding replacement
const placeholderIndex = parseInt(
expr.left.column.replace('__NESTED_', '').replace('__', '')
);
if (this._nestedFieldReplacements.length > placeholderIndex) {
// Restore the original nested field name
const [_, originalField] = this._nestedFieldReplacements[placeholderIndex];
log(`Restoring nested field: ${expr.left.column} -> ${originalField}`);
expr.left.column = originalField;
expr.left.table = null;
}
}
// Then check for table.column notation that should be a nested field
else if (
expr.left.table &&
expr.left.column &&
!this.isActualTableReference(expr.left.table, ast)
) {
// Likely a nested field access, not a table reference
expr.left.column = `${expr.left.table}.${expr.left.column}`;
expr.left.table = null;
}
}
} else if (expr.type === 'unary_expr') {
// Process the expression in unary operators
this.processWhereExpr(expr.expr, ast);
}
}
/**
* Check if a name is an actual table reference in the FROM clause
*
* This helps distinguish between table.column notation and nested field access
*/
private isActualTableReference(name: string, ast: any): boolean {
if (!ast.from || !Array.isArray(ast.from)) return false;
// Check if the name appears as a table name or alias in the FROM clause
return ast.from.some((fromItem: any) => {
return (
fromItem.table === name ||
fromItem.as === name ||
// Also match table references to aliases in the FROM clause
(typeof fromItem === 'object' && fromItem.as && fromItem.as === name)
);
});
}
/**
* Preprocess SQL to transform array index notation into a form the parser can handle
*
* This transforms:
* items[0].name => items__ARRAY_0__name
*
* We'll convert it back to MongoDB's dot notation later in the compiler.
*/
private preprocessArrayIndexes(sql: string): string {
// Replace array index notation with a placeholder format
// This regex matches field references with array indexes like items[0] or items[0].name
return sql.replace(/(\w+)\[(\d+)\](\.\w+)?/g, (match, field, index, suffix) => {
if (suffix) {
// For nested access like items[0].name => items__ARRAY_0__name
return `${field}__ARRAY_${index}__${suffix.substring(1)}`;
} else {
// For simple array access like items[0] => items__ARRAY_0
return `${field}__ARRAY_${index}`;
}
});
}
/**
* More aggressive preprocessing for SQL that contains array syntax
* This completely removes the array indexing and replaces it with a special column naming pattern
*/
private aggressivePreprocessing(sql: string): string {
// Replace items[0].name with items_0_name
// This is a more aggressive approach that completely avoids bracket syntax
return sql.replace(/(\w+)\[(\d+)\](\.(\w+))?/g, (match, field, index, dotPart, subfield) => {
if (subfield) {
return `${field}_${index}_${subfield}`;
} else {
return `${field}_${index}`;
}
});
}
}