Skip to content

Commit 60abfec

Browse files
authored
Add Tokenizer custom token mapper support (#2184)
1 parent 0924f3a commit 60abfec

File tree

1 file changed

+47
-4
lines changed

1 file changed

+47
-4
lines changed

src/tokenizer.rs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,16 @@ impl<'a> Tokenizer<'a> {
934934
pub fn tokenize_with_location_into_buf(
935935
&mut self,
936936
buf: &mut Vec<TokenWithSpan>,
937+
) -> Result<(), TokenizerError> {
938+
self.tokenize_with_location_into_buf_with_mapper(buf, |token| token)
939+
}
940+
941+
/// Tokenize the statement and produce a vector of tokens, mapping each token
942+
/// with provided `mapper`
943+
pub fn tokenize_with_location_into_buf_with_mapper(
944+
&mut self,
945+
buf: &mut Vec<TokenWithSpan>,
946+
mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan,
937947
) -> Result<(), TokenizerError> {
938948
let mut state = State {
939949
peekable: self.query.chars().peekable(),
@@ -952,10 +962,10 @@ impl<'a> Tokenizer<'a> {
952962
&& comment.starts_with('!') =>
953963
{
954964
// Re-tokenize the hints and add them to the buffer
955-
self.tokenize_comment_hints(comment, span, buf)?;
965+
self.tokenize_comment_hints(comment, span, buf, &mut mapper)?;
956966
}
957967
_ => {
958-
buf.push(TokenWithSpan { token, span });
968+
buf.push(mapper(TokenWithSpan { token, span }));
959969
}
960970
}
961971

@@ -971,6 +981,7 @@ impl<'a> Tokenizer<'a> {
971981
comment: &str,
972982
span: Span,
973983
buf: &mut Vec<TokenWithSpan>,
984+
mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan,
974985
) -> Result<(), TokenizerError> {
975986
// Strip the leading '!' and any version digits (e.g., "50110")
976987
let hint_content = comment
@@ -997,10 +1008,10 @@ impl<'a> Tokenizer<'a> {
9971008
let mut location = state.location();
9981009
while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? {
9991010
let token_span = location.span_to(state.location());
1000-
buf.push(TokenWithSpan {
1011+
buf.push(mapper(TokenWithSpan {
10011012
token,
10021013
span: token_span,
1003-
});
1014+
}));
10041015
location = state.location();
10051016
}
10061017

@@ -2644,6 +2655,38 @@ mod tests {
26442655
compare(expected, tokens);
26452656
}
26462657

2658+
#[test]
2659+
fn tokenize_with_mapper() {
2660+
let sql = String::from("SELECT ?");
2661+
let dialect = GenericDialect {};
2662+
let mut param_num = 1;
2663+
2664+
let mut tokens = vec![];
2665+
Tokenizer::new(&dialect, &sql)
2666+
.tokenize_with_location_into_buf_with_mapper(&mut tokens, |mut token_span| {
2667+
token_span.token = match token_span.token {
2668+
Token::Placeholder(n) => Token::Placeholder(if n == "?" {
2669+
let ret = format!("${}", param_num);
2670+
param_num += 1;
2671+
ret
2672+
} else {
2673+
n
2674+
}),
2675+
token => token,
2676+
};
2677+
token_span
2678+
})
2679+
.unwrap();
2680+
let actual = tokens.into_iter().map(|t| t.token).collect();
2681+
let expected = vec![
2682+
Token::make_keyword("SELECT"),
2683+
Token::Whitespace(Whitespace::Space),
2684+
Token::Placeholder("$1".to_string()),
2685+
];
2686+
2687+
compare(expected, actual);
2688+
}
2689+
26472690
#[test]
26482691
fn tokenize_clickhouse_double_equal() {
26492692
let sql = String::from("SELECT foo=='1'");

0 commit comments

Comments
 (0)