Skip to content

Commit afa3187

Browse files
committed
Speedup lookups in the tokenizer by using compile time map
1 parent 45f2276 commit afa3187

5 files changed

Lines changed: 186 additions & 126 deletions

File tree

Cargo.lock

Lines changed: 63 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ chrono = { version = "0.4.41" }
3434
regex = { version = "1.11.1" }
3535
rand = { version = "0.9.2" }
3636
indexmap = { version = "2.10.0" }
37+
phf = { version = "0.13.1" }
3738
linked-hash-map = { version = "0.5.6" }
3839
uuid = { version = "1.17.0", features = ["v4"] }
3940
gitql-core = { path = "./crates/gitql-core", version = "0.17.0" }
@@ -61,6 +62,9 @@ lineeditor = "0.4.1"
6162
[dev-dependencies]
6263
criterion = "0.7.0"
6364

65+
[build-dependencies]
66+
phf_codegen = "0.13.1"
67+
6468
# Run all benchmarks with `cargo bench`
6569
# Run individual benchmarks like `cargo bench -- <regex>` e.g. `cargo bench -- tokenizer`
6670
[[bench]]

crates/gitql-parser/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ categories = ["command-line-utilities"]
1212
[dependencies]
1313
gitql-core = { workspace = true }
1414
gitql-ast = { workspace = true }
15+
phf = { workspace = true, features = ["macros"] }

crates/gitql-parser/src/token.rs

Lines changed: 112 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ use std::fmt::Display;
22
use std::fmt::Formatter;
33
use std::fmt::Result;
44

5-
#[derive(PartialEq)]
5+
use phf::phf_map;
6+
7+
#[derive(Clone, PartialEq)]
68
pub enum TokenKind {
79
Do,
810
Set,
@@ -120,6 +122,115 @@ pub enum TokenKind {
120122
Semicolon,
121123
}
122124

125+
pub static GITQL_RESERVED_KEYWORDS: phf::Map<&'static str, TokenKind> = phf_map! {
126+
// Reserved keywords
127+
"do" => TokenKind::Do,
128+
"set" => TokenKind::Set,
129+
"select" => TokenKind::Select,
130+
"distinct" => TokenKind::Distinct,
131+
"from" => TokenKind::From,
132+
"where" => TokenKind::Where,
133+
"qualify" => TokenKind::Qualify,
134+
"limit" => TokenKind::Limit,
135+
"offset" => TokenKind::Offset,
136+
"order" => TokenKind::Order,
137+
"using" => TokenKind::Using,
138+
"case" => TokenKind::Case,
139+
"when" => TokenKind::When,
140+
"then" => TokenKind::Then,
141+
"else" => TokenKind::Else,
142+
"end" => TokenKind::End,
143+
"between" => TokenKind::Between,
144+
"in" => TokenKind::In,
145+
"is" => TokenKind::Is,
146+
"on" => TokenKind::On,
147+
"not" => TokenKind::Not,
148+
"like" => TokenKind::Like,
149+
"glob" => TokenKind::Glob,
150+
"describe" => TokenKind::Describe,
151+
"show" => TokenKind::Show,
152+
"regexp" => TokenKind::RegExp,
153+
154+
"cast" => TokenKind::Cast,
155+
"benchmark" => TokenKind::Benchmark,
156+
157+
"interval" => TokenKind::Interval,
158+
159+
// Select into
160+
"into" => TokenKind::Into,
161+
"outfile" => TokenKind::Outfile,
162+
"dumpfile" => TokenKind::Dumpfile,
163+
"lines" => TokenKind::Lines,
164+
"fields" => TokenKind::Fields,
165+
"enclosed" => TokenKind::Enclosed,
166+
"terminated" => TokenKind::Terminated,
167+
168+
// Joins
169+
"join" => TokenKind::Join,
170+
"left" => TokenKind::Left,
171+
"right" => TokenKind::Right,
172+
"cross" => TokenKind::Cross,
173+
"inner" => TokenKind::Inner,
174+
"outer" => TokenKind::Outer,
175+
176+
// Grouping
177+
"group" => TokenKind::Group,
178+
"by" => TokenKind::By,
179+
"having" => TokenKind::Having,
180+
"with" => TokenKind::With,
181+
"rollup" => TokenKind::Rollup,
182+
183+
// Between kind
184+
"symmetric" => TokenKind::Symmetric,
185+
"asymmetric" => TokenKind::Asymmetric,
186+
187+
// Integer division and Modulo operator
188+
"div" => TokenKind::Slash,
189+
"mod" => TokenKind::Percentage,
190+
191+
// Logical Operators
192+
"or" => TokenKind::OrKeyword,
193+
"and" => TokenKind::AndKeyword,
194+
"xor" => TokenKind::XorKeyword,
195+
196+
// Group Operators
197+
"all" => TokenKind::All,
198+
"some" => TokenKind::Some,
199+
"any" => TokenKind::Any,
200+
201+
"row" => TokenKind::Row,
202+
203+
// True, False and Null
204+
"true" => TokenKind::True,
205+
"false" => TokenKind::False,
206+
"null" => TokenKind::Null,
207+
"nulls" => TokenKind::Nulls,
208+
209+
// Infinity and NaN
210+
"infinity" => TokenKind::Infinity,
211+
"inf" => TokenKind::Infinity,
212+
"nan" => TokenKind::NaN,
213+
214+
// As for alias
215+
"as" => TokenKind::As,
216+
217+
// Order by DES and ASC
218+
"asc" => TokenKind::Ascending,
219+
"desc" => TokenKind::Descending,
220+
221+
// Order by null ordering policy Null first and last
222+
"first" => TokenKind::First,
223+
"last" => TokenKind::Last,
224+
225+
// Array data type
226+
"array" => TokenKind::Array,
227+
228+
// Over
229+
"window" => TokenKind::Window,
230+
"over" => TokenKind::Over,
231+
"partition" => TokenKind::Partition,
232+
};
233+
123234
impl Display for TokenKind {
124235
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
125236
let literal = match self {
@@ -293,13 +404,6 @@ impl Token {
293404
Token { kind, location }
294405
}
295406

296-
pub fn new_symbol(symbol: String, location: SourceLocation) -> Token {
297-
Token {
298-
kind: resolve_symbol_kind(symbol),
299-
location,
300-
}
301-
}
302-
303407
pub fn has_kind(&self, kind: TokenKind) -> bool {
304408
self.kind == kind
305409
}
@@ -310,117 +414,3 @@ impl Display for Token {
310414
f.write_str(&self.kind.to_string())
311415
}
312416
}
313-
314-
fn resolve_symbol_kind(symbol: String) -> TokenKind {
315-
match symbol.to_lowercase().as_str() {
316-
// Reserved keywords
317-
"do" => TokenKind::Do,
318-
"set" => TokenKind::Set,
319-
"select" => TokenKind::Select,
320-
"distinct" => TokenKind::Distinct,
321-
"from" => TokenKind::From,
322-
"where" => TokenKind::Where,
323-
"qualify" => TokenKind::Qualify,
324-
"limit" => TokenKind::Limit,
325-
"offset" => TokenKind::Offset,
326-
"order" => TokenKind::Order,
327-
"using" => TokenKind::Using,
328-
"case" => TokenKind::Case,
329-
"when" => TokenKind::When,
330-
"then" => TokenKind::Then,
331-
"else" => TokenKind::Else,
332-
"end" => TokenKind::End,
333-
"between" => TokenKind::Between,
334-
"in" => TokenKind::In,
335-
"is" => TokenKind::Is,
336-
"on" => TokenKind::On,
337-
"not" => TokenKind::Not,
338-
"like" => TokenKind::Like,
339-
"glob" => TokenKind::Glob,
340-
"describe" => TokenKind::Describe,
341-
"show" => TokenKind::Show,
342-
"regexp" => TokenKind::RegExp,
343-
344-
"cast" => TokenKind::Cast,
345-
"benchmark" => TokenKind::Benchmark,
346-
347-
"interval" => TokenKind::Interval,
348-
349-
// Select into
350-
"into" => TokenKind::Into,
351-
"outfile" => TokenKind::Outfile,
352-
"dumpfile" => TokenKind::Dumpfile,
353-
"lines" => TokenKind::Lines,
354-
"fields" => TokenKind::Fields,
355-
"enclosed" => TokenKind::Enclosed,
356-
"terminated" => TokenKind::Terminated,
357-
358-
// Joins
359-
"join" => TokenKind::Join,
360-
"left" => TokenKind::Left,
361-
"right" => TokenKind::Right,
362-
"cross" => TokenKind::Cross,
363-
"inner" => TokenKind::Inner,
364-
"outer" => TokenKind::Outer,
365-
366-
// Grouping
367-
"group" => TokenKind::Group,
368-
"by" => TokenKind::By,
369-
"having" => TokenKind::Having,
370-
"with" => TokenKind::With,
371-
"rollup" => TokenKind::Rollup,
372-
373-
// Between kind
374-
"symmetric" => TokenKind::Symmetric,
375-
"asymmetric" => TokenKind::Asymmetric,
376-
377-
// Integer division and Modulo operator
378-
"div" => TokenKind::Slash,
379-
"mod" => TokenKind::Percentage,
380-
381-
// Logical Operators
382-
"or" => TokenKind::OrKeyword,
383-
"and" => TokenKind::AndKeyword,
384-
"xor" => TokenKind::XorKeyword,
385-
386-
// Group Operators
387-
"all" => TokenKind::All,
388-
"some" => TokenKind::Some,
389-
"any" => TokenKind::Any,
390-
391-
"row" => TokenKind::Row,
392-
393-
// True, False and Null
394-
"true" => TokenKind::True,
395-
"false" => TokenKind::False,
396-
"null" => TokenKind::Null,
397-
"nulls" => TokenKind::Nulls,
398-
399-
// Infinity and NaN
400-
"infinity" => TokenKind::Infinity,
401-
"inf" => TokenKind::Infinity,
402-
"nan" => TokenKind::NaN,
403-
404-
// As for alias
405-
"as" => TokenKind::As,
406-
407-
// Order by DES and ASC
408-
"asc" => TokenKind::Ascending,
409-
"desc" => TokenKind::Descending,
410-
411-
// Order by null ordering policy Null first and last
412-
"first" => TokenKind::First,
413-
"last" => TokenKind::Last,
414-
415-
// Array data type
416-
"array" => TokenKind::Array,
417-
418-
// Over
419-
"window" => TokenKind::Window,
420-
"over" => TokenKind::Over,
421-
"partition" => TokenKind::Partition,
422-
423-
// Identifier
424-
_ => TokenKind::Symbol(symbol),
425-
}
426-
}

0 commit comments

Comments
 (0)