Skip to content

Commit ab5b61f

Browse files
committed
Support C-style block comments.
1 parent afa3187 commit ab5b61f

1 file changed

Lines changed: 38 additions & 24 deletions

File tree

crates/gitql-parser/src/tokenizer.rs

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ impl Tokenizer {
6363
// @> or Global Variable Symbol
6464
if char == '@' {
6565
// @>
66-
if self.index + 1 < len && self.content[self.index + 1] == '>' {
66+
if self.is_next_char('>') {
6767
self.index += 2;
6868
let location = self.current_source_location();
6969
tokens.push(Token::new(TokenKind::AtRightArrow, location));
@@ -139,8 +139,8 @@ impl Tokenizer {
139139
// Minus
140140
if char == '-' {
141141
// Ignore single line comment which from -- until the end of the current line
142-
if self.index + 1 < self.content_len && self.content[self.index + 1] == '-' {
143-
self.ignore_single_line_comment();
142+
if self.is_next_char('-') {
143+
self.consume_single_line_comment();
144144
continue;
145145
}
146146

@@ -161,8 +161,8 @@ impl Tokenizer {
161161
// Slash
162162
if char == '/' {
163163
// Ignore C style comment which from /* comment */
164-
if self.index + 1 < self.content_len && self.content[self.index + 1] == '*' {
165-
self.ignore_c_style_comment()?;
164+
if self.is_next_char('*') {
165+
self.consume_c_style_block_comment()?;
166166
continue;
167167
}
168168

@@ -201,7 +201,7 @@ impl Tokenizer {
201201
let location = self.current_source_location();
202202

203203
self.advance();
204-
let kind = if self.index < len && self.content[self.index] == '|' {
204+
let kind = if self.is_current_char('|') {
205205
self.advance();
206206
TokenKind::OrOr
207207
} else {
@@ -217,7 +217,7 @@ impl Tokenizer {
217217
let location = self.current_source_location();
218218

219219
self.advance();
220-
let kind = if self.index < len && self.content[self.index] == '&' {
220+
let kind = if self.is_current_char('&') {
221221
self.advance();
222222
TokenKind::AndAnd
223223
} else {
@@ -257,10 +257,10 @@ impl Tokenizer {
257257
let location = self.current_source_location();
258258

259259
self.advance();
260-
let kind = if self.index < len && self.content[self.index] == '=' {
260+
let kind = if self.is_current_char('=') {
261261
self.advance();
262262
TokenKind::GreaterEqual
263-
} else if self.index < len && self.content[self.index] == '>' {
263+
} else if self.is_current_char('>') {
264264
self.advance();
265265
TokenKind::BitwiseRightShift
266266
} else {
@@ -276,21 +276,21 @@ impl Tokenizer {
276276
let location = self.current_source_location();
277277

278278
self.advance();
279-
let kind = if self.index < len && self.content[self.index] == '=' {
279+
let kind = if self.is_current_char('=') {
280280
self.advance();
281-
if self.index < len && self.content[self.index] == '>' {
281+
if self.is_current_char('>') {
282282
self.advance();
283283
TokenKind::NullSafeEqual
284284
} else {
285285
TokenKind::LessEqual
286286
}
287-
} else if self.index < len && self.content[self.index] == '<' {
287+
} else if self.is_current_char('<') {
288288
self.advance();
289289
TokenKind::BitwiseLeftShift
290-
} else if self.index < len && self.content[self.index] == '>' {
290+
} else if self.is_current_char('>') {
291291
self.advance();
292292
TokenKind::BangEqual
293-
} else if self.index < len && self.content[self.index] == '@' {
293+
} else if self.is_current_char('@') {
294294
self.advance();
295295
TokenKind::ArrowRightAt
296296
} else {
@@ -314,15 +314,15 @@ impl Tokenizer {
314314
let location = self.current_source_location();
315315

316316
// :=
317-
if self.index + 1 < len && self.content[self.index + 1] == '=' {
317+
if self.is_next_char('=') {
318318
tokens.push(Token::new(TokenKind::ColonEqual, location));
319319
// Advance `:=`
320320
self.advance_n(2);
321321
continue;
322322
}
323323

324324
// ::
325-
if self.index + 1 < len && self.content[self.index + 1] == ':' {
325+
if self.is_next_char(':') {
326326
tokens.push(Token::new(TokenKind::ColonColon, location));
327327
// Advance `::`
328328
self.advance_n(2);
@@ -685,7 +685,7 @@ impl Tokenizer {
685685
self.advance();
686686

687687
let mut buffer = String::new();
688-
while self.has_next() && self.content[self.index] != around {
688+
while self.has_next() && !self.is_current_char(around) {
689689
if !self.is_current_char('\\') {
690690
buffer.push(self.content[self.index]);
691691
self.advance();
@@ -744,7 +744,7 @@ impl Tokenizer {
744744
Ok(buffer)
745745
}
746746

747-
fn ignore_single_line_comment(&mut self) {
747+
fn consume_single_line_comment(&mut self) {
748748
// Advance `--`
749749
self.advance_n(2);
750750

@@ -758,15 +758,25 @@ impl Tokenizer {
758758
self.column_end = 0;
759759
}
760760

761-
fn ignore_c_style_comment(&mut self) -> Result<(), Box<Diagnostic>> {
761+
fn consume_c_style_block_comment(&mut self) -> Result<(), Box<Diagnostic>> {
762762
// Advance `/*`
763763
self.advance_n(2);
764764

765-
while self.index + 1 < self.content_len
766-
&& (!self.is_current_char('*') && self.content[self.index + 1] != '/')
767-
{
765+
let mut number_nested_block_start = 0;
766+
loop {
767+
if self.is_current_char('/') && self.is_next_char('*') {
768+
number_nested_block_start += 1;
769+
}
770+
768771
// Advance char
769772
self.advance();
773+
774+
if self.is_current_char('*') && self.is_next_char('/') {
775+
number_nested_block_start -= 1;
776+
if number_nested_block_start < 0 {
777+
break;
778+
}
779+
}
770780
}
771781

772782
if self.index + 2 > self.content_len {
@@ -792,11 +802,15 @@ impl Tokenizer {
792802
}
793803

794804
fn is_current_char(&self, ch: char) -> bool {
795-
self.content[self.index] == ch
805+
self.index < self.content_len && self.content[self.index] == ch
806+
}
807+
808+
fn is_next_char(&self, ch: char) -> bool {
809+
self.index + 1 < self.content_len && self.content[self.index + 1] == ch
796810
}
797811

798812
fn is_current_char_func(&self, func: fn(char) -> bool) -> bool {
799-
func(self.content[self.index])
813+
self.index < self.content_len && func(self.content[self.index])
800814
}
801815

802816
fn has_next(&self) -> bool {

0 commit comments

Comments
 (0)