Skip to content

Commit df97329

Browse files
Parser: fix exponential parse time on speculative prefix parsing
1 parent f63e42f commit df97329

3 files changed

Lines changed: 73 additions & 1 deletion

File tree

sqlparser_bench/benches/sqlparser_bench.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,28 @@ fn parse_not_chain(c: &mut Criterion) {
249249
group.finish();
250250
}
251251

252+
/// Benchmark parsing pathological `IF(<keyword-fn>(<keyword-fn>(...x` chains
253+
/// that previously caused 2^N work in `parse_prefix`. Each nested
254+
/// `current_time(` segment used to be explored twice at every level (once via
255+
/// the speculative reserved-word arm, once via the unreserved-word fallback),
256+
/// doubling work per level. Post-fix the cost is linear in chain length.
257+
fn parse_prefix_keyword_call_chain(c: &mut Criterion) {
258+
let mut group = c.benchmark_group("parse_prefix_keyword_call_chain");
259+
let dialect = PostgreSqlDialect {};
260+
261+
for &n in &[10usize, 20, 30] {
262+
let sql = String::from("if(") + &"current_time(".repeat(n) + "x";
263+
264+
group.bench_function(format!("chain_{n}"), |b| {
265+
b.iter(|| {
266+
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
267+
});
268+
});
269+
}
270+
271+
group.finish();
272+
}
273+
252274
criterion_group!(
253275
benches,
254276
basic_queries,
@@ -257,6 +279,7 @@ criterion_group!(
257279
parse_compound_chain,
258280
parse_named_arg_chain,
259281
parse_compound_keyword_chain,
260-
parse_not_chain
282+
parse_not_chain,
283+
parse_prefix_keyword_call_chain
261284
);
262285
criterion_main!(benches);

src/parser/mod.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use alloc::collections::BTreeSet;
1717
#[cfg(not(feature = "std"))]
1818
use alloc::{
1919
boxed::Box,
20+
collections::BTreeMap,
2021
format,
2122
string::{String, ToString},
2223
vec,
@@ -26,6 +27,9 @@ use core::{
2627
fmt::{self, Display},
2728
str::FromStr,
2829
};
30+
#[cfg(feature = "std")]
31+
use std::collections::BTreeMap;
32+
2933
use helpers::attached_token::AttachedToken;
3034
#[cfg(feature = "std")]
3135
use std::collections::BTreeSet;
@@ -371,6 +375,9 @@ pub struct Parser<'a> {
371375
/// `<ident>-NOT-<ident>.` ending in a parse error) trigger 2^N exploration
372376
/// because each `-NOT-` segment otherwise re-walks the rest of the chain.
373377
failed_unary_not_positions: BTreeSet<usize>,
378+
/// Cached errors from failed `parse_prefix` calls, keyed by start
379+
/// position. See [`Parser::parse_prefix`] for the 2^N pattern this guards.
380+
failed_prefix_positions: BTreeMap<usize, ParserError>,
374381
}
375382

376383
impl<'a> Parser<'a> {
@@ -398,6 +405,7 @@ impl<'a> Parser<'a> {
398405
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
399406
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
400407
failed_unary_not_positions: BTreeSet::new(),
408+
failed_prefix_positions: BTreeMap::new(),
401409
}
402410
}
403411

@@ -460,6 +468,7 @@ impl<'a> Parser<'a> {
460468
self.tokens = tokens;
461469
self.index = 0;
462470
self.failed_unary_not_positions.clear();
471+
self.failed_prefix_positions.clear();
463472
self
464473
}
465474

@@ -1731,6 +1740,23 @@ impl<'a> Parser<'a> {
17311740
return prefix;
17321741
}
17331742

1743+
// Memoize failed attempts to break 2^N speculation: both the
1744+
// reserved-word and unreserved-word arms can recurse into the
1745+
// same downstream position, so without this short-circuit
1746+
// inputs like `IF(current_time(current_time(...x` re-walk the
1747+
// chain at every level.
1748+
let start_index = self.index;
1749+
if let Some(cached) = self.failed_prefix_positions.get(&start_index) {
1750+
return Err(cached.clone());
1751+
}
1752+
let result = self.parse_prefix_inner();
1753+
if let Err(ref e) = result {
1754+
self.failed_prefix_positions.insert(start_index, e.clone());
1755+
}
1756+
result
1757+
}
1758+
1759+
fn parse_prefix_inner(&mut self) -> Result<Expr, ParserError> {
17341760
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
17351761
// string literal represents a literal of that type. Some examples:
17361762
//

tests/sqlparser_common.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19082,3 +19082,26 @@ fn parse_not_chain_no_exponential_blowup() {
1908219082
rx.recv_timeout(Duration::from_secs(5))
1908319083
.expect("parser should reject this quickly, not loop exponentially");
1908419084
}
19085+
19086+
/// Regression test for the 2^N parse-time blowup in `parse_prefix` on inputs
19087+
/// like `IF(current_time(current_time(...x`. Each nested `current_time(` used
19088+
/// to be explored twice at every level (once via the speculative reserved-word
19089+
/// arm, once via the unreserved-word fallback), doubling work per level.
19090+
/// Post-fix the failing parse short-circuits via the position-keyed cache.
19091+
#[test]
19092+
fn parse_prefix_keyword_call_chain_no_exponential_blowup() {
19093+
use std::sync::mpsc;
19094+
use std::thread;
19095+
use std::time::Duration;
19096+
19097+
let sql = String::from("if(") + &"current_time(".repeat(30) + "x";
19098+
19099+
let (tx, rx) = mpsc::channel();
19100+
thread::spawn(move || {
19101+
let _ = Parser::parse_sql(&PostgreSqlDialect {}, &sql);
19102+
let _ = tx.send(());
19103+
});
19104+
19105+
rx.recv_timeout(Duration::from_secs(5))
19106+
.expect("parser should reject this quickly, not loop exponentially");
19107+
}

0 commit comments

Comments
 (0)