From 04a048f59bf270699d381e45ee359d95664f3e24 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Wed, 20 May 2026 20:33:21 +0200 Subject: [PATCH 1/5] Parser: fix exponential parse time on compound chains --- src/parser/mod.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 763b876de..9e63a4860 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2028,14 +2028,16 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed placeholder } - // Fallback to parsing an arbitrary expression, but restrict to expression - // types that are valid after the dot operator. This ensures that e.g. - // `T.interval` is parsed as a compound identifier, not as an interval - // expression. + // Parse a single field component, restricted to expression types valid + // after `.` (so e.g. `T.interval` is a compound identifier, not an + // interval expression). Using `parse_prefix` here rather than + // `parse_subexpr` avoids 2^N work on inputs like `IF a.b.c...x.#`: + // the outer loop already consumes successive `.field` segments, so a + // recursive `parse_subexpr` would re-walk the rest of the chain at + // every dot. _ => { let expr = self.maybe_parse(|parser| { - let expr = parser - .parse_subexpr(parser.dialect.prec_value(Precedence::Period))?; + let expr = parser.parse_prefix()?; match &expr { Expr::CompoundFieldAccess { .. } | Expr::CompoundIdentifier(_) @@ -2050,14 +2052,9 @@ impl<'a> Parser<'a> { })?; match expr { - // If we get back a compound field access or identifier, - // we flatten the nested expression. - // For example if the current root is `foo` - // and we get back a compound identifier expression `bar.baz` - // The full expression should be `foo.bar.baz` (i.e. - // a root with an access chain with 2 entries) and not - // `foo.(bar.baz)` (i.e. a root with an access chain with - // 1 entry`). + // `parse_prefix` does not itself follow compound chains, but a + // dialect override could still return a compound expression, so + // keep the flatten arms for safety. Some(Expr::CompoundFieldAccess { root, access_chain }) => { chain.push(AccessExpr::Dot(*root)); chain.extend(access_chain); From f0d0a24cea5062bb9713029f9af445f9e5bf6cc4 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Wed, 20 May 2026 20:33:21 +0200 Subject: [PATCH 2/5] PostgreSQL: regression test for compound-chain blowup --- tests/sqlparser_postgres.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 274988be0..fb0f259cd 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -9243,3 +9243,27 @@ fn parse_lock_table() { } } } + +/// `parse_compound_expr` used to do 2^N work on `IF a.b.c...x.#` because every +/// `.` re-entered `parse_subexpr` over the rest of the chain. +#[test] +fn parse_compound_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let chain: String = (0..30) + .map(|i| format!("a{i}")) + .collect::>() + .join("."); + let sql = format!("IF {chain}.#"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = sqlparser::parser::Parser::parse_sql(&PostgreSqlDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should reject this quickly, not loop exponentially"); +} From e1910856e477b8b7604774412c834044d393b851 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Thu, 21 May 2026 11:16:31 +0200 Subject: [PATCH 3/5] Parser: restore flatten comment in parse_compound_expr --- src/parser/mod.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9e63a4860..91ac386ae 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2052,9 +2052,14 @@ impl<'a> Parser<'a> { })?; match expr { - // `parse_prefix` does not itself follow compound chains, but a - // dialect override could still return a compound expression, so - // keep the flatten arms for safety. + // If we get back a compound field access or identifier, + // we flatten the nested expression. + // For example if the current root is `foo` + // and we get back a compound identifier expression `bar.baz` + // The full expression should be `foo.bar.baz` (i.e. + // a root with an access chain with 2 entries) and not + // `foo.(bar.baz)` (i.e. a root with an access chain with + // 1 entry`). Some(Expr::CompoundFieldAccess { root, access_chain }) => { chain.push(AccessExpr::Dot(*root)); chain.extend(access_chain); From 43152a249c7f43fd6b22c76f6a5b9fb6814197b8 Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Thu, 21 May 2026 11:16:35 +0200 Subject: [PATCH 4/5] Bench: move compound-chain regression to sqlparser_bench --- sqlparser_bench/benches/sqlparser_bench.rs | 33 +++++++++++++++++++++- tests/sqlparser_postgres.rs | 24 ---------------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index b52683aa5..46c201540 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -152,5 +152,36 @@ fn parse_many_identifiers(c: &mut Criterion) { group.finish(); } -criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers); +/// Benchmark parsing pathological compound chains that previously caused 2^N +/// work in `parse_compound_expr`. The input `IF a0.a1...aN.#` rejects at the +/// trailing `#`, which used to force quadratic-or-worse backtracking through +/// the chain. +fn parse_compound_chain(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_compound_chain"); + let dialect = GenericDialect {}; + + for &n in &[10usize, 20, 30] { + let chain = (0..n) + .map(|i| format!("a{i}")) + .collect::>() + .join("."); + let sql = format!("IF {chain}.#"); + + group.bench_function(format!("chain_{n}"), |b| { + b.iter(|| { + let _ = Parser::parse_sql(&dialect, std::hint::black_box(&sql)); + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + basic_queries, + word_to_ident, + parse_many_identifiers, + parse_compound_chain +); criterion_main!(benches); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fb0f259cd..274988be0 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -9243,27 +9243,3 @@ fn parse_lock_table() { } } } - -/// `parse_compound_expr` used to do 2^N work on `IF a.b.c...x.#` because every -/// `.` re-entered `parse_subexpr` over the rest of the chain. -#[test] -fn parse_compound_chain_no_exponential_blowup() { - use std::sync::mpsc; - use std::thread; - use std::time::Duration; - - let chain: String = (0..30) - .map(|i| format!("a{i}")) - .collect::>() - .join("."); - let sql = format!("IF {chain}.#"); - - let (tx, rx) = mpsc::channel(); - thread::spawn(move || { - let _ = sqlparser::parser::Parser::parse_sql(&PostgreSqlDialect {}, &sql); - let _ = tx.send(()); - }); - - rx.recv_timeout(Duration::from_secs(5)) - .expect("parser should reject this quickly, not loop exponentially"); -} From 22ab29b3710677c890877a88ece4a999bca8af6e Mon Sep 17 00:00:00 2001 From: LucaCappelletti94 Date: Thu, 21 May 2026 11:52:10 +0200 Subject: [PATCH 5/5] Parser: regression test for compound-chain blowup --- tests/sqlparser_common.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index bcf3c1d55..f470b93ca 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18977,3 +18977,30 @@ fn parse_non_pg_dialects_keep_xml_names_as_regular_identifiers() { let dialects = all_dialects_except(|d| d.supports_xml_expressions()); dialects.verified_only_select("SELECT xml FROM t"); } + +/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on +/// inputs like `IF a0.a1...aN.#`. The parse is run on a worker thread and the +/// main thread asserts that it reports back within a generous timeout. Post-fix +/// the parser returns `Err` in well under a millisecond, so the timeout is a +/// hang guard, not a perf threshold. +#[test] +fn parse_compound_chain_no_exponential_blowup() { + use std::sync::mpsc; + use std::thread; + use std::time::Duration; + + let chain: String = (0..30) + .map(|i| format!("a{i}")) + .collect::>() + .join("."); + let sql = format!("IF {chain}.#"); + + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let _ = Parser::parse_sql(&GenericDialect {}, &sql); + let _ = tx.send(()); + }); + + rx.recv_timeout(Duration::from_secs(5)) + .expect("parser should reject this quickly, not loop exponentially"); +}