-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokens.ts
More file actions
701 lines (676 loc) · 21.6 KB
/
tokens.ts
File metadata and controls
701 lines (676 loc) · 21.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
import { createToken, Lexer, TokenType } from "chevrotain"
import { keywords } from "../grammar/keywords"
import { dataTypes } from "../grammar/dataTypes"
import { constants } from "../grammar/constants"
// =============================================================================
// Token Category for Non-Reserved Keywords Used as Identifiers
// =============================================================================
// Chevrotain token categories let us match many keyword tokens with a single
// CONSUME(IdentifierKeyword) in the parser, avoiding a 160+ alternative OR
// that makes performSelfAnalysis() take ~35 seconds.
export const IdentifierKeyword = createToken({
name: "IdentifierKeyword",
pattern: Lexer.NA,
})
// =============================================================================
// Token Generation Utilities
// =============================================================================
/**
* Convert a keyword to PascalCase token name
* Examples:
* "select" → "Select"
* "o3MaxLag" → "O3MaxLag"
* "long256" → "Long256"
* "data_page_size" → "DataPageSize"
* "timestamp_ns" → "TimestampNs"
*/
function toPascalCase(str: string): string {
// Handle underscores: split, capitalize each part, join
if (str.includes("_")) {
return str
.split("_")
.map((part) => part.charAt(0).toUpperCase() + part.slice(1).toLowerCase())
.join("")
}
// Simple case: just capitalize first letter
return str.charAt(0).toUpperCase() + str.slice(1)
}
/**
* Create a keyword token with case-insensitive word boundary matching.
* Uses a negative lookahead instead of \b to correctly handle Unicode
* identifiers. JavaScript's \b only considers ASCII \w characters, so
* "in\b" would incorrectly match the "in" prefix of "inção" (since ç
* is non-ASCII and not in \w). The lookahead ensures the keyword is not
* followed by any valid identifier character, including Unicode.
*/
function createKeywordToken(name: string, pattern: string): TokenType {
return createToken({
name,
pattern: new RegExp(`${pattern}(?![\\w\\u0080-\\uFFFF])`, "i"),
})
}
/**
* Map from token name → original SQL keyword string (uppercased).
* Built during token generation so autocomplete doesn't need to
* reverse-engineer keyword text from regex patterns.
* e.g., "Select" → "SELECT", "DataPageSize" → "DATA_PAGE_SIZE"
*/
export const TOKEN_NAME_TO_KEYWORD = new Map<string, string>()
/**
* Generate tokens from a list of keywords
* Returns a Map of tokenName → TokenType
*/
function generateTokensFromList(
list: readonly string[],
): Map<string, TokenType> {
const tokenMap = new Map<string, TokenType>()
for (const item of list) {
const name = toPascalCase(item)
// Skip if already exists (handles duplicates across lists)
if (!tokenMap.has(name)) {
tokenMap.set(name, createKeywordToken(name, item))
TOKEN_NAME_TO_KEYWORD.set(name, item.toUpperCase())
}
}
return tokenMap
}
// =============================================================================
// Auto-generated Tokens from Grammar Arrays
// =============================================================================
// Generate tokens from each grammar category
const keywordTokenMap = generateTokensFromList(keywords)
const dataTypeTokenMap = generateTokensFromList(dataTypes)
const constantTokenMap = generateTokensFromList(constants)
// Merge all into a single map (keywords first, then dataTypes, then constants)
// Later entries don't override earlier ones (duplicates handled by generateTokensFromList)
const allGeneratedTokens = new Map<string, TokenType>([
...keywordTokenMap,
...dataTypeTokenMap,
...constantTokenMap,
])
// =============================================================================
// Assign IdentifierKeyword category to non-reserved keyword tokens
// =============================================================================
// These tokens can be used as unquoted identifiers (table names, column names,
// aliases, CTE names, etc.). This matches QuestDB's Java parser behavior where
// only ~61 keywords are reserved and everything else is non-reserved.
//
// Reserved keywords NOT included (would cause parsing ambiguity):
// Structural: Select, From, Where, As, By, With, If, Exists, Into,
// Values, Set, For, Table, To, Declare, Rename
// Joins: Join, Inner, Left, Right, Full, Outer, Cross,
// Asof, Lt, Splice, On
// Operators: And, Or, Not, In, Between, Like, Ilike, Is
// Expressions: Case, When, Then, Else, End, Cast, True, False, Null, NaN,
// All, Any, Distinct, Over
// Clauses: Group, Order, Asc, Desc, Limit, Sample, Window,
// Union, Intersect, Except, Pivot, Unpivot, Lock, Truncate
export const IDENTIFIER_KEYWORD_NAMES = new globalThis.Set([
// Data type keywords
"Symbol",
"Timestamp",
"Date",
"Time",
"Int",
"Integer",
"Long",
"Long128",
"Long256",
"Short",
"Byte",
"Float",
"Double",
"Boolean",
"String",
"Char",
"Binary",
"Uuid",
"Ipv4",
"Geohash",
"Varchar",
"Decimal",
"Interval",
"TimestampNs",
// Common identifier-like keywords
"Index",
"Key",
"Column",
"Type",
"Level",
"Offset",
"First",
"Volume",
"Start",
"Current",
"User",
"Users",
"Public",
"Default",
"View",
// Time units (from grammar/constants.ts)
"Hour",
"Day",
"Week",
"Month",
"Year",
"Days",
"Hours",
"Months",
"Weeks",
"Years",
"Minute",
"Minutes",
"Second",
"Seconds",
"Millisecond",
"Milliseconds",
"Microsecond",
"Microseconds",
"Nanosecond",
"Nanoseconds",
"Century",
"Decade",
"Millennium",
"Quarter",
"Dow",
"Doy",
"Epoch",
"Isodow",
"Isoyear",
// Function name keywords
"Replace",
"Tables",
"Format",
"Header",
"Query",
"Enable",
"Disable",
"None",
"Error",
"System",
"Http",
// Entity/config keywords
"Account",
"Accounts",
"Service",
"Token",
"Rest",
"Password",
"Partition",
"Partitions",
"PartitionBy",
"Dedup",
"Wal",
"Bypass",
"Batch",
"No",
"Groups",
"Assume",
"Database",
"Backup",
"Foreign",
"Primary",
"References",
"Cascade",
"Capacity",
"Cancel",
"Prevailing",
"Range",
"Writer",
"Materialized",
"Snapshot",
"Unlock",
"Refresh",
// ALTER TABLE sub-operations
"Add",
"Attach",
"Detach",
"Convert",
"Remove",
"Squash",
"Suspend",
"Resume",
"Release",
// COPY/export parameters
"Parquet",
"ParquetVersion",
"Abort",
"Alias",
"Base",
"Complete",
"CompressionCodec",
"CompressionLevel",
"DataPageSize",
"Delimiter",
"Exit",
"Ignore",
"Option",
"Param",
"Prepare",
"RowGroupSize",
"SkipRow",
"SkipColumn",
"Verification",
// CREATE TABLE / DDL config
"Atomic",
"CommitLag",
"Delay",
"External",
"Incremental",
"MaxUncommittedRows",
"O3MaxLag",
"Ttl",
"Deferred",
// SHOW sub-keywords
"Columns",
"Keys",
"List",
"Parameters",
"Permissions",
"Datestyle",
"DefaultTransactionReadOnly",
"Keep",
"Maps",
"MaxIdentifierLength",
"SearchPath",
"ServerVersion",
"ServerVersionNum",
"StandardConformingStrings",
"StatisticsEnabled",
"Txn",
"Within",
// Config/metadata
"Calendar",
"Checkpoint",
"Cumulative",
"Isolation",
"Jwk",
"Length",
"Manual",
"Nocache",
"Observation",
"Overridable",
"Owned",
"Period",
"Reindex",
"Tolerance",
"Transaction",
"TransactionIsolation",
"Transient",
"Zone",
// Compression codecs
"RawArrayEncoding",
"Uncompressed",
"Snappy",
"Gzip",
"Lz4",
"Zstd",
"Lz4Raw",
"Brotli",
"Lzo",
// Other non-reserved keywords
"Compile",
"Delete",
"Upsert",
"Cache",
"Exclusive",
"Immediate",
"Only",
"Align",
"Latest",
// New constants that can be used as identifiers
"Ilp",
"Native",
"Pgwire",
// Window frame keywords
"Row",
"Rows",
"Preceding",
"Following",
"Unbounded",
"Exclude",
"Others",
"Nulls",
// SAMPLE BY keywords
"Fill",
"Every",
"Prev",
"Linear",
"Step",
])
for (const name of IDENTIFIER_KEYWORD_NAMES) {
const token = allGeneratedTokens.get(name)
if (token) {
// Add IdentifierKeyword to this token's categories
if (!token.CATEGORIES) {
token.CATEGORIES = [IdentifierKeyword]
} else {
token.CATEGORIES.push(IdentifierKeyword)
}
}
}
// =============================================================================
// Token Exports
// =============================================================================
/**
* All keyword tokens as a Map (name → token)
* Use this for dynamic access or iteration
*/
export const keywordTokens = allGeneratedTokens
/**
* All keyword tokens as an array (for Lexer construction)
* Order doesn't matter for keywords since they use word boundaries
*/
export const keywordTokenArray: TokenType[] = Array.from(
allGeneratedTokens.values(),
)
// =============================================================================
// Named Exports for Parser Usage
// The parser needs direct references to tokens like `Select`, `From`, etc.
// =============================================================================
// Extract tokens from the map with proper typing
function getToken(name: string): TokenType {
const token = allGeneratedTokens.get(name)
if (!token) {
throw new globalThis.Error(`Token ${name} not found in generated tokens`)
}
return token
}
// Keywords (from grammar/keywords.ts)
export const Abort = getToken("Abort")
export const Account = getToken("Account")
export const Accounts = getToken("Accounts")
export const Add = getToken("Add")
export const Alias = getToken("Alias")
export const Align = getToken("Align")
export const All = getToken("All")
export const Alter = getToken("Alter")
export const And = getToken("And")
export const Any = getToken("Any")
export const As = getToken("As")
export const Asof = getToken("Asof")
export const Assume = getToken("Assume")
export const Attach = getToken("Attach")
export const Atomic = getToken("Atomic")
export const Backup = getToken("Backup")
export const Base = getToken("Base")
export const Batch = getToken("Batch")
export const Between = getToken("Between")
export const By = getToken("By")
export const Bypass = getToken("Bypass")
export const Cache = getToken("Cache")
export const Calendar = getToken("Calendar")
export const Cancel = getToken("Cancel")
export const Capacity = getToken("Capacity")
export const Cascade = getToken("Cascade")
export const Case = getToken("Case")
export const Cast = getToken("Cast")
export const Checkpoint = getToken("Checkpoint")
export const Column = getToken("Column")
export const Columns = getToken("Columns")
export const Compile = getToken("Compile")
export const Complete = getToken("Complete")
export const CompressionCodec = getToken("CompressionCodec")
export const CompressionLevel = getToken("CompressionLevel")
export const CommitLag = getToken("CommitLag")
export const Convert = getToken("Convert")
export const Copy = getToken("Copy")
export const Create = getToken("Create")
export const Cross = getToken("Cross")
export const Cumulative = getToken("Cumulative")
export const Current = getToken("Current")
export const DataPageSize = getToken("DataPageSize")
export const Database = getToken("Database")
export const Datestyle = getToken("Datestyle")
export const Declare = getToken("Declare")
export const Dedup = getToken("Dedup")
export const Default = getToken("Default")
export const DefaultTransactionReadOnly = getToken("DefaultTransactionReadOnly")
export const Deferred = getToken("Deferred")
export const Delay = getToken("Delay")
export const Delete = getToken("Delete")
export const Delimiter = getToken("Delimiter")
export const Detach = getToken("Detach")
export const Details = getToken("Details")
export const Disable = getToken("Disable")
export const Distinct = getToken("Distinct")
export const Drop = getToken("Drop")
export const Else = getToken("Else")
export const Enable = getToken("Enable")
export const End = getToken("End")
export const Error = getToken("Error")
export const Every = getToken("Every")
export const Except = getToken("Except")
export const Exclude = getToken("Exclude")
export const Exclusive = getToken("Exclusive")
export const Exists = getToken("Exists")
export const Exit = getToken("Exit")
export const Explain = getToken("Explain")
export const External = getToken("External")
export const Fill = getToken("Fill")
export const First = getToken("First")
export const Following = getToken("Following")
export const For = getToken("For")
export const Foreign = getToken("Foreign")
export const Format = getToken("Format")
export const From = getToken("From")
export const Full = getToken("Full")
export const Grant = getToken("Grant")
export const Group = getToken("Group")
export const Groups = getToken("Groups")
export const Header = getToken("Header")
export const Horizon = getToken("Horizon")
export const Http = getToken("Http")
export const If = getToken("If")
export const Ignore = getToken("Ignore")
export const Ilike = getToken("Ilike")
export const Immediate = getToken("Immediate")
export const In = getToken("In")
export const Include = getToken("Include")
export const Index = getToken("Index")
export const Inner = getToken("Inner")
export const Insert = getToken("Insert")
export const Incremental = getToken("Incremental")
export const Intersect = getToken("Intersect")
export const Into = getToken("Into")
export const Is = getToken("Is")
export const Isolation = getToken("Isolation")
export const Jwk = getToken("Jwk")
export const Join = getToken("Join")
export const Keep = getToken("Keep")
export const Key = getToken("Key")
export const Keys = getToken("Keys")
export const Latest = getToken("Latest")
export const Left = getToken("Left")
export const Length = getToken("Length")
export const Level = getToken("Level")
export const Like = getToken("Like")
export const Limit = getToken("Limit")
export const List = getToken("List")
export const Lock = getToken("Lock")
export const Lt = getToken("Lt")
export const Maps = getToken("Maps")
export const Materialized = getToken("Materialized")
export const Manual = getToken("Manual")
export const MaxIdentifierLength = getToken("MaxIdentifierLength")
export const MaxUncommittedRows = getToken("MaxUncommittedRows")
export const No = getToken("No")
export const Nocache = getToken("Nocache")
export const Not = getToken("Not")
export const Nulls = getToken("Nulls")
export const O3MaxLag = getToken("O3MaxLag")
export const Observation = getToken("Observation")
export const Offset = getToken("Offset")
export const On = getToken("On")
export const Only = getToken("Only")
export const Option = getToken("Option")
export const Or = getToken("Or")
export const Order = getToken("Order")
export const Others = getToken("Others")
export const Outer = getToken("Outer")
export const Over = getToken("Over")
export const Overridable = getToken("Overridable")
export const Owned = getToken("Owned")
export const Param = getToken("Param")
export const Parameters = getToken("Parameters")
export const Parquet = getToken("Parquet")
export const ParquetVersion = getToken("ParquetVersion")
export const PartitionBy = getToken("PartitionBy")
export const Partition = getToken("Partition")
export const Partitions = getToken("Partitions")
export const Password = getToken("Password")
export const Period = getToken("Period")
export const Permissions = getToken("Permissions")
export const Pivot = getToken("Pivot")
export const Prepare = getToken("Prepare")
export const Preceding = getToken("Preceding")
export const Prevailing = getToken("Prevailing")
export const Primary = getToken("Primary")
export const Public = getToken("Public")
export const Query = getToken("Query")
export const Range = getToken("Range")
export const References = getToken("References")
export const Refresh = getToken("Refresh")
export const Release = getToken("Release")
export const Reindex = getToken("Reindex")
export const Remove = getToken("Remove")
export const Rename = getToken("Rename")
export const Repair = getToken("Repair")
export const Replace = getToken("Replace")
export const Rest = getToken("Rest")
export const Respect = getToken("Respect")
export const Resume = getToken("Resume")
export const Revoke = getToken("Revoke")
export const Right = getToken("Right")
export const Row = getToken("Row")
export const RowGroupSize = getToken("RowGroupSize")
export const Rows = getToken("Rows")
export const Sample = getToken("Sample")
export const SearchPath = getToken("SearchPath")
export const ServerVersion = getToken("ServerVersion")
export const ServerVersionNum = getToken("ServerVersionNum")
export const Select = getToken("Select")
export const Service = getToken("Service")
export const Set = getToken("Set")
export const Show = getToken("Show")
export const Skip = getToken("Skip")
export const SkipColumn = getToken("SkipColumn")
export const SkipRow = getToken("SkipRow")
export const Snapshot = getToken("Snapshot")
export const Splice = getToken("Splice")
export const Squash = getToken("Squash")
export const StandardConformingStrings = getToken("StandardConformingStrings")
export const Step = getToken("Step")
export const Start = getToken("Start")
export const StatisticsEnabled = getToken("StatisticsEnabled")
export const Suspend = getToken("Suspend")
export const System = getToken("System")
export const Table = getToken("Table")
export const Tables = getToken("Tables")
export const Then = getToken("Then")
export const Time = getToken("Time")
export const To = getToken("To")
export const Token = getToken("Token")
export const Tolerance = getToken("Tolerance")
export const Transaction = getToken("Transaction")
export const TransactionIsolation = getToken("TransactionIsolation")
export const Transient = getToken("Transient")
export const Truncate = getToken("Truncate")
export const Ttl = getToken("Ttl")
export const Txn = getToken("Txn")
export const Type = getToken("Type")
export const Unbounded = getToken("Unbounded")
export const Union = getToken("Union")
export const Unlock = getToken("Unlock")
export const Unpivot = getToken("Unpivot")
export const Update = getToken("Update")
export const Upsert = getToken("Upsert")
export const User = getToken("User")
export const Users = getToken("Users")
export const Vacuum = getToken("Vacuum")
export const Values = getToken("Values")
export const Verification = getToken("Verification")
export const View = getToken("View")
export const Volume = getToken("Volume")
export const Wal = getToken("Wal")
export const When = getToken("When")
export const Where = getToken("Where")
export const Window = getToken("Window")
export const With = getToken("With")
export const Within = getToken("Within")
export const Writer = getToken("Writer")
export const Zone = getToken("Zone")
export const RawArrayEncoding = getToken("RawArrayEncoding")
export const Uncompressed = getToken("Uncompressed")
export const Snappy = getToken("Snappy")
export const Gzip = getToken("Gzip")
export const Lz4 = getToken("Lz4")
export const Zstd = getToken("Zstd")
export const Lz4Raw = getToken("Lz4Raw")
export const Brotli = getToken("Brotli")
export const Lzo = getToken("Lzo")
// Data types (from grammar/dataTypes.ts)
export const Binary = getToken("Binary")
export const Boolean = getToken("Boolean")
export const Byte = getToken("Byte")
export const Char = getToken("Char")
export const Date = getToken("Date")
export const Decimal = getToken("Decimal")
export const Double = getToken("Double")
export const Float = getToken("Float")
export const Geohash = getToken("Geohash")
export const Int = getToken("Int")
export const Integer = getToken("Integer")
export const Interval = getToken("Interval")
export const Ipv4 = getToken("Ipv4")
export const Long = getToken("Long")
export const Long128 = getToken("Long128")
export const Long256 = getToken("Long256")
export const Short = getToken("Short")
export const String = getToken("String")
export const Symbol = getToken("Symbol")
export const Timestamp = getToken("Timestamp")
export const TimestampNs = getToken("TimestampNs")
export const Uuid = getToken("Uuid")
export const Varchar = getToken("Varchar")
// Constants (from grammar/constants.ts)
export const True = getToken("True")
export const False = getToken("False")
export const Null = getToken("Null")
export const NaN = getToken("Nan")
export const None = getToken("None")
export const Prev = getToken("Prev")
export const Linear = getToken("Linear")
// Sort direction (from grammar/constants.ts)
export const Asc = getToken("Asc")
export const Desc = getToken("Desc")
// Time units (from grammar/constants.ts)
export const Hour = getToken("Hour")
export const Hours = getToken("Hours")
export const Day = getToken("Day")
export const Days = getToken("Days")
export const Week = getToken("Week")
export const Weeks = getToken("Weeks")
export const Month = getToken("Month")
export const Months = getToken("Months")
export const Year = getToken("Year")
export const Years = getToken("Years")
export const Minute = getToken("Minute")
export const Minutes = getToken("Minutes")
export const Second = getToken("Second")
export const Seconds = getToken("Seconds")
export const Millisecond = getToken("Millisecond")
export const Milliseconds = getToken("Milliseconds")
export const Microsecond = getToken("Microsecond")
export const Microseconds = getToken("Microseconds")
export const Nanosecond = getToken("Nanosecond")
export const Nanoseconds = getToken("Nanoseconds")
export const Century = getToken("Century")
export const Decade = getToken("Decade")
export const Millennium = getToken("Millennium")
export const Quarter = getToken("Quarter")
export const Dow = getToken("Dow")
export const Doy = getToken("Doy")
export const Epoch = getToken("Epoch")
export const Isodow = getToken("Isodow")
export const Isoyear = getToken("Isoyear")