Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion sqlparser_bench/benches/sqlparser_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,36 @@ fn parse_many_identifiers(c: &mut Criterion) {
group.finish();
}

criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers);
/// Benchmark parsing pathological compound chains that previously caused 2^N
/// work in `parse_compound_expr`. The input `IF a0.a1...aN.#` rejects at the
/// trailing `#`, which used to force quadratic-or-worse backtracking through
/// the chain.
fn parse_compound_chain(c: &mut Criterion) {
let mut group = c.benchmark_group("parse_compound_chain");
let dialect = GenericDialect {};

for &n in &[10usize, 20, 30] {
let chain = (0..n)
.map(|i| format!("a{i}"))
.collect::<Vec<_>>()
.join(".");
let sql = format!("IF {chain}.#");

group.bench_function(format!("chain_{n}"), |b| {
b.iter(|| {
let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql));
});
});
}

group.finish();
}

criterion_group!(
benches,
basic_queries,
word_to_ident,
parse_many_identifiers,
parse_compound_chain
);
criterion_main!(benches);
14 changes: 8 additions & 6 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2028,14 +2028,16 @@ impl<'a> Parser<'a> {
chain.push(AccessExpr::Dot(expr));
self.advance_token(); // The consumed placeholder
}
// Fallback to parsing an arbitrary expression, but restrict to expression
// types that are valid after the dot operator. This ensures that e.g.
// `T.interval` is parsed as a compound identifier, not as an interval
// expression.
// Parse a single field component, restricted to expression types valid
// after `.` (so e.g. `T.interval` is a compound identifier, not an
// interval expression). Using `parse_prefix` here rather than
// `parse_subexpr` avoids 2^N work on inputs like `IF a.b.c...x.#`:
// the outer loop already consumes successive `.field` segments, so a
// recursive `parse_subexpr` would re-walk the rest of the chain at
// every dot.
_ => {
let expr = self.maybe_parse(|parser| {
let expr = parser
.parse_subexpr(parser.dialect.prec_value(Precedence::Period))?;
let expr = parser.parse_prefix()?;
match &expr {
Expr::CompoundFieldAccess { .. }
| Expr::CompoundIdentifier(_)
Expand Down
27 changes: 27 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18977,3 +18977,30 @@ fn parse_non_pg_dialects_keep_xml_names_as_regular_identifiers() {
let dialects = all_dialects_except(|d| d.supports_xml_expressions());
dialects.verified_only_select("SELECT xml FROM t");
}

/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on
/// inputs like `IF a0.a1...aN.#`. The parse is run on a worker thread and the
/// main thread asserts that it reports back within a generous timeout. Post-fix
/// the parser returns `Err` in well under a millisecond, so the timeout is a
/// hang guard, not a perf threshold.
#[test]
fn parse_compound_chain_no_exponential_blowup() {
use std::sync::mpsc;
use std::thread;
use std::time::Duration;

let chain: String = (0..30)
.map(|i| format!("a{i}"))
.collect::<Vec<_>>()
.join(".");
let sql = format!("IF {chain}.#");

let (tx, rx) = mpsc::channel();
thread::spawn(move || {
let _ = Parser::parse_sql(&GenericDialect {}, &sql);
let _ = tx.send(());
});

rx.recv_timeout(Duration::from_secs(5))
.expect("parser should reject this quickly, not loop exponentially");
}
Loading