Skip to content

Commit c8529f1

Browse files
author
zhaoge
committed
feat: optimize collecting entity when match empty column in entityCollecting context
1 parent cd3626c commit c8529f1

33 files changed

Lines changed: 33393 additions & 28838 deletions

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "dt-sql-parser",
3-
"version": "4.5.0-beta.0",
3+
"version": "4.5.0-beta.1",
44
"authors": "DTStack Corporation",
55
"description": "SQL Parsers for BigData, built with antlr4",
66
"keywords": [

src/grammar/flink/FlinkSqlParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ columnProjectItem
509509
| selectLiteralColumnName (columnAlias | KW_AS? expression)?
510510
| tableAllColumns columnAlias?
511511
| selectExpressionColumnName (columnAlias | KW_AS? columnName)?
512+
| {this.shouldMatchEmpty()}? emptyColumn
512513
;
513514

514515
selectWindowItemColumnName

src/grammar/hive/HiveSqlParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,6 +1536,7 @@ selectItem
15361536
| KW_AS LPAREN alias=id_ (COMMA alias=id_)* RPAREN
15371537
)?
15381538
)
1539+
| {this.shouldMatchEmpty()}? emptyColumn
15391540
;
15401541

15411542
selectLiteralColumnName

src/grammar/impala/ImpalaSqlParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,7 @@ selectItem
823823
: selectLiteralColumnName columnAlias?
824824
| selectExpressionColumnName columnAlias?
825825
| tableAllColumns
826+
| {this.shouldMatchEmpty()}? emptyColumn
826827
;
827828

828829
columnAlias

src/grammar/mysql/MySqlParser.g4

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,9 +1205,10 @@ selectElements
12051205
;
12061206

12071207
selectElement
1208-
: tableAllColumns
1209-
| selectLiteralColumnName (KW_AS? alias=uid)?
1210-
| selectExpressionColumnName (KW_AS? alias=uid)?
1208+
: tableAllColumns # selectElement_star
1209+
| selectLiteralColumnName (KW_AS? alias=uid)? # selectElement_label
1210+
| selectExpressionColumnName (KW_AS? alias=uid)? # selectElement_expr
1211+
| uid DOT {this.shouldMatchEmpty()}? emptyColumn # selectElement_dot_empty
12111212
;
12121213

12131214
tableAllColumns
@@ -2424,7 +2425,7 @@ emptyColumn
24242425
;
24252426

24262427
columnName
2427-
: uid (dottedIdAllowEmpty dottedIdAllowEmpty?)?
2428+
: uid (dottedId dottedId?)?
24282429
| .? dottedId dottedId?
24292430
| {this.shouldMatchEmpty()}? emptyColumn
24302431
;
@@ -2436,7 +2437,7 @@ columnNamePath
24362437

24372438
columnNamePathAllowEmpty
24382439
: {this.shouldMatchEmpty()}? emptyColumn
2439-
| uid (dottedIdAllowEmpty dottedIdAllowEmpty?)?
2440+
| uid (dottedId dottedId?)?
24402441
;
24412442

24422443
tableSpaceNameCreate
@@ -2574,12 +2575,6 @@ dottedId
25742575
| '.' uid
25752576
;
25762577

2577-
dottedIdAllowEmpty
2578-
: DOT ID
2579-
| '.' uid
2580-
| {this.shouldMatchEmpty()}? DOT emptyColumn
2581-
;
2582-
25832578
decimalLiteral
25842579
: DECIMAL_LITERAL
25852580
| ZERO_DECIMAL

src/grammar/postgresql/PostgreSqlParser.g4

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2615,7 +2615,8 @@ when_clause
26152615
;
26162616

26172617
indirectionEl
2618-
: DOT (colLabel | STAR)
2618+
: DOT indirectionLabel
2619+
| DOT STAR
26192620
| OPEN_BRACKET (expression | expression? COLON expression?) CLOSE_BRACKET
26202621
;
26212622

@@ -2634,6 +2635,8 @@ targetList
26342635
targetEl
26352636
: tableAllColumns # target_star
26362637
| (selectLiteralColumnName | selectExpressionColumnName) (KW_AS? alias=identifier |) # target_label
2638+
| colId DOT {this.entityCollecting}? emptyColumn # target_dot_empty
2639+
| {this.entityCollecting}? emptyColumn # target_empty
26372640
;
26382641

26392642
tableAllColumns
@@ -2722,18 +2725,17 @@ procedureNameCreate
27222725
| colId indirection
27232726
;
27242727

2728+
// Empty column rule for entity collection
27252729
emptyColumn
27262730
:
27272731
;
27282732

27292733
columnName
27302734
: colId optIndirection
2731-
| {this.shouldMatchEmpty()}? (colId DOT emptyColumn | emptyColumn)
27322735
;
27332736

27342737
columnNamePath
27352738
: colId optIndirection
2736-
| {this.shouldMatchEmpty()}? (colId DOT emptyColumn | emptyColumn)
27372739
;
27382740

27392741
columnNameCreate
@@ -2800,6 +2802,12 @@ colLabel
28002802
| reservedKeyword
28012803
;
28022804

2805+
indirectionLabel
2806+
: identifier
2807+
| colNameKeyword
2808+
| typeFuncNameKeyword
2809+
;
2810+
28032811
identifier
28042812
: Identifier (KW_UESCAPE anysconst)?
28052813
| stringConst

src/grammar/spark/SparkSqlParser.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,7 @@ namedExpression
850850
: (tableAllColumns | selectLiteralColumnName | selectExpressionColumnName) (
851851
KW_AS? (alias=errorCapturingIdentifier | identifierList)
852852
)?
853+
| {this.shouldMatchEmpty()}? emptyColumn
853854
;
854855

855856
namedExpressionSeq

src/lib/SQLParserBase.ts

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,50 @@ export abstract class SQLParserBase<T = antlr.ParserRuleContext> extends antlr.P
1010

1111
public entityCollecting = false;
1212

13-
public shouldMatchEmpty () {
14-
return this.entityCollecting
15-
&& (this.tokenStream.LT(-1)?.tokenIndex ?? Infinity) <= this.caretTokenIndex
16-
&& (this.tokenStream.LT(1)?.tokenIndex ?? -Infinity) >= this.caretTokenIndex
13+
/**
14+
* Semantic predicate to determine whether to match empty column.
15+
*
16+
* Key design:
17+
* 1. Only match empty column in entityCollecting mode
18+
* 2. Check if caret position is at the empty column position
19+
* 3. In validate mode (entityCollecting=false), this predicate returns false
20+
* and reports an error to ensure incomplete SQL is caught
21+
*
22+
* IMPORTANT: This predicate should be used carefully to avoid affecting
23+
* prediction in non-entity-collecting contexts.
24+
*/
25+
public shouldMatchEmpty (ruleName?: string) {
26+
// Only match in entityCollecting mode or when caret position is specified (suggestion mode)
27+
if (this.entityCollecting || this.caretTokenIndex >= 0) {
28+
// If no caret position specified, match all empty columns
29+
if (this.caretTokenIndex < 0) {
30+
return true;
31+
}
32+
33+
// Check if caret is at the position where empty column would be
34+
const prevTokenIndex = this.tokenStream.LT(-1)?.tokenIndex;
35+
const nextTokenIndex = this.tokenStream.LT(1)?.tokenIndex;
36+
37+
// Match if caret is between previous and next token
38+
if (prevTokenIndex !== undefined && nextTokenIndex !== undefined) {
39+
return prevTokenIndex <= this.caretTokenIndex && nextTokenIndex >= this.caretTokenIndex;
40+
}
41+
42+
// If only previous token exists, match if caret is after it
43+
if (prevTokenIndex !== undefined) {
44+
return prevTokenIndex <= this.caretTokenIndex;
45+
}
46+
47+
// If only next token exists, match if caret is before it
48+
if (nextTokenIndex !== undefined) {
49+
return nextTokenIndex >= this.caretTokenIndex;
50+
}
51+
52+
return false;
53+
}
54+
55+
// In pure validate mode, don't match empty columns
56+
// This allows ANTLR to report errors naturally
57+
return false;
1758
}
18-
}
59+
}

src/lib/flink/FlinkSqlParser.interp

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)