Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1191,6 +1191,22 @@ impl<'a> Tokenizer<'a> {
}
// numbers and period
'0'..='9' | '.' => {
// special case where if ._ is encountered after a word then that word
// is a table and the _ is the start of the col name.
// if the prev token is not a word, then this is not a valid sql
// word or number.
if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
if let Some(Token::Word(_)) = prev_token {
chars.next();
return Ok(Some(Token::Period));
}

return self.tokenizer_error(
chars.location(),
"Unexpected character '_'".to_string(),
);
}

// Some dialects support underscore as number separator
// There can only be one at a time and it must be followed by another digit
let is_number_separator = |ch: char, next_char: Option<char>| {
Expand Down Expand Up @@ -4018,4 +4034,40 @@ mod tests {
],
);
}

#[test]
fn tokenize_period_underscore() {
let sql = String::from("SELECT table._col");
// a dialect that supports underscores in numeric literals
let dialect = PostgreSqlDialect {};
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();

let expected = vec![
Token::make_keyword("SELECT"),
Token::Whitespace(Whitespace::Space),
Token::Word(Word {
value: "table".to_string(),
quote_style: None,
keyword: Keyword::TABLE,
}),
Token::Period,
Token::Word(Word {
value: "_col".to_string(),
quote_style: None,
keyword: Keyword::NoKeyword,
}),
];

compare(expected, tokens);

let sql = String::from("SELECT ._123");
if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() {
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
}

let sql = String::from("SELECT ._abc");
if let Ok(tokens) = Tokenizer::new(&dialect, &sql).tokenize() {
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
}
}
}