diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index b52683aa55..46c2015400 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -152,5 +152,36 @@ fn parse_many_identifiers(c: &mut Criterion) { group.finish(); } -criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers); +/// Benchmark parsing pathological compound chains that previously caused 2^N +/// work in `parse_compound_expr`. The input `IF a0.a1...aN.#` rejects at the +/// trailing `#`, which used to force quadratic-or-worse backtracking through +/// the chain. +fn parse_compound_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_compound_chain"); + let dialect = GenericDialect {}; + + for &n in &[10usize, 20, 30] { + let chain = (0..n) + .map(|i| format!("a{i}")) + .collect::>() + .join("."); + let sql = format!("IF {chain}.#"); + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + basic_queries, + word_to_ident, + parse_many_identifiers, + parse_compound_chain +); criterion_main!(benches); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 763b876dec..91ac386aef 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2028,14 +2028,16 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed placeholder } - // Fallback to parsing an arbitrary expression, but restrict to expression - // types that are valid after the dot operator. This ensures that e.g. - // `T.interval` is parsed as a compound identifier, not as an interval - // expression. + // Parse a single field component, restricted to expression types valid + // after `.` (so e.g. `T.interval` is a compound identifier, not an + // interval expression). Using `parse_prefix` here rather than + // `parse_subexpr` avoids 2^N work on inputs like `IF a.b.c...x.#`: + // the outer loop already consumes successive `.field` segments, so a + // recursive `parse_subexpr` would re-walk the rest of the chain at + // every dot. _ => { let expr = self.maybe_parse(|parser| { - let expr = parser - .parse_subexpr(parser.dialect.prec_value(Precedence::Period))?; + let expr = parser.parse_prefix()?; match &expr { Expr::CompoundFieldAccess { .. } | Expr::CompoundIdentifier(_) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index bcf3c1d55f..f470b93ca3 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18977,3 +18977,30 @@ fn parse_non_pg_dialects_keep_xml_names_as_regular_identifiers() { let dialects = all_dialects_except(|d| d.supports_xml_expressions()); dialects.verified_only_select("SELECT xml FROM t"); } + +/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on +/// inputs like `IF a0.a1...aN.#`. The parse is run on a worker thread and the +/// main thread asserts that it reports back within a generous timeout. Post-fix +/// the parser returns `Err` in well under a millisecond, so the timeout is a +/// hang guard, not a perf threshold. +#[test] +fn parse_compound_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let chain: String = (0..30) + .map(|i| format!("a{i}")) + .collect::>() + .join("."); + let sql = format!("IF {chain}.#"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&GenericDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should reject this quickly, not loop exponentially"); +}