From 785e5d40521e53a50a30cdcdf3c9313af69ecbbd Mon Sep 17 00:00:00 2001 From: Dennis Felsing Date: Thu, 25 Jun 2026 11:55:11 +0000 Subject: [PATCH 1/2] expr: preserve operand errors in non-strict AND/OR reduce Closes: CLU-137 Fixes two user-visible bugs in how `reduce` treats an erroring operand of a non-strict AND/OR: * A materialized view or query whose `mz_now()` temporal filter sits under an OR of ANDs, e.g. WHERE (a AND ... AND mz_now() < t) OR (b AND ... AND mz_now() < t) could panic the compute worker (or fail to plan) with "Unsupported temporal predicate". The shared `mz_now() < t` conjunct was left buried inside the OR instead of being factored out, so temporal-filter extraction failed. * A query that should short-circuit, such as `WHERE col AND (1/0 = 1)`, could spuriously fail at runtime (e.g. "division by zero") even on rows where `col` is false. AND/OR are non-strict: `false AND ` is `false` and `true OR ` is `true`, so such rows must be filtered, not errored. Mechanism: the generic variadic fold in `reduce` replaced a call with any operand's literal error unconditionally, which is wrong for a function that is not strict in errors. Fold only for the strict variadics, excluding AND/OR and ErrorIfNull (which can absorb an operand's error at runtime). Keeping the erroring operand then reaches `undistribute_and_or`, which recombines operands across AND/OR's short-circuit boundary. That only preserves error semantics for operands common to every disjunct, so skip undistribution otherwise. A shared temporal predicate (`mz_now() < t`, whose cast can error) is common to every disjunct, so it is still factored out and stays extractable, unlike the reverted #37049's blunt `could_error()` skip. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/compute/src/render/context.rs | 10 ++- src/expr/src/scalar.rs | 120 +++++++++++++++++++++++++ src/expr/src/scalar/reduce/variadic.rs | 19 +++- test/sqllogictest/error_semantics.slt | 13 +++ test/sqllogictest/temporal.slt | 70 +++++++++++++++ 5 files changed, 227 insertions(+), 5 deletions(-) diff --git a/src/compute/src/render/context.rs b/src/compute/src/render/context.rs index fabd2ba318718..75a0aed50bead 100644 --- a/src/compute/src/render/context.rs +++ b/src/compute/src/render/context.rs @@ -938,7 +938,13 @@ impl<'scope, T: RenderTimestamp> CollectionBundle<'scope, T> { VecCollection<'scope, T, DataflowErrorSer, Diff>, ) { mfp.optimize(); - let mfp_plan = mfp.clone().into_plan().unwrap(); + // NOTE: only an un-extractable temporal predicate makes `into_plan` fail; + // `ExprPrepMaintained` rejects those at plan time and peeks materialize + // `mz_now()`, so none reach render. + let mfp_plan = mfp + .clone() + .into_plan() + .expect("temporal predicate extractable"); // If the MFP is trivial, we can just call `as_collection`. // In the case that we weren't going to apply the `key_val` optimization, @@ -959,7 +965,7 @@ impl<'scope, T: RenderTimestamp> CollectionBundle<'scope, T> { let max_demand = mfp.demand().last().map(|x| *x + 1).unwrap_or(0); mfp.permute_fn(|c| c, max_demand); mfp.optimize(); - let mfp_plan = mfp.into_plan().unwrap(); + let mfp_plan = mfp.into_plan().expect("temporal predicate extractable"); let mut datum_vec = DatumVec::new(); // Wrap in an `Rc` so that lifetimes work out. diff --git a/src/expr/src/scalar.rs b/src/expr/src/scalar.rs index 39d2de76d8ff9..1ef06bf5e0745 100644 --- a/src/expr/src/scalar.rs +++ b/src/expr/src/scalar.rs @@ -849,6 +849,49 @@ impl MirScalarExpr { } } + /// Whether [`Self::undistribute_and_or`] can rewrite `self` without changing + /// its error semantics. + /// + /// Undistribution recombines the operands *not* common to every disjunct into + /// a new AND/OR, changing the short-circuit context they evaluate in. Only a + /// dominating operand (`false`/`true`), not a NULL one, absorbs another + /// operand's error, so recombining a could-error operand can change whether a + /// row errors (e.g. `a OR (a AND )` raises for a NULL `a`, but + /// undistributes to `a`). Operands common to every disjunct are factored out + /// unchanged and stay sound, which is what lets a shared temporal filter like + /// `mz_now() < ` (whose cast can error) be factored out, as the renderer + /// requires. + fn undistribution_preserves_errors(&self) -> bool { + let MirScalarExpr::CallVariadic { + func: func @ (VariadicFunc::And(_) | VariadicFunc::Or(_)), + exprs, + } = self + else { + return true; + }; + // Fast path for the common case: with no erroring operand there is + // nothing to preserve, so skip the per-operand bookkeeping below. + // `could_error` recurses, so this also covers nested operands. + if !exprs.iter().any(|o| o.could_error()) { + return true; + } + let inner = func.switch_and_or(); + // The operands of a disjunct (a non-`inner` disjunct is its own singleton). + let operands = |o: &MirScalarExpr| match o { + MirScalarExpr::CallVariadic { func, exprs } if *func == inner => exprs.clone(), + _ => vec![o.clone()], + }; + let common = exprs + .iter() + .map(|o| BTreeSet::from_iter(operands(o))) + .reduce(|a, b| &a & &b) + .unwrap_or_default(); + exprs + .iter() + .flat_map(operands) + .all(|op| !op.could_error() || common.contains(&op)) + } + /// AND/OR undistribution (factoring out) to apply at each `MirScalarExpr`. /// /// This method attempts to apply one of the [distribution laws][distributivity] @@ -922,6 +965,11 @@ impl MirScalarExpr { while old_self != *self { old_self = self.clone(); self.reduce_and_canonicalize_and_or(); // We don't want to deal with 1-arg AND/OR at the top + + if !self.undistribution_preserves_errors() { + return; + } + if let MirScalarExpr::CallVariadic { exprs: outer_operands, func: outer_func @ (VariadicFunc::Or(_) | VariadicFunc::And(_)), @@ -2475,6 +2523,78 @@ mod tests { use super::*; use crate::scalar::func::variadic::Coalesce; + #[mz_ore::test] + fn test_reduce_and_or_preserves_operand_errors() { + // #37049: AND/OR are non-strict, so a `false`/`true` operand absorbs an + // erroring operand at runtime. `reduce` must not fold or absorb an + // erroring operand into an error the original never raises. + let bool_typ = ReprScalarType::Bool; + let types = vec![bool_typ.clone().nullable(true)]; + let err = || MirScalarExpr::literal(Err(EvalError::DivisionByZero), bool_typ.clone()); + let col = || MirScalarExpr::column(0); + let arena = RowArena::new(); + + // `x AND ` must not fold to the error: `false AND ` is `false`. + let mut and = col().and(err()); + and.reduce(&types); + assert!(!and.is_literal_err(), "{and:?}"); + assert_eq!(and.eval(&[Datum::False], &arena), Ok(Datum::False)); + + // `a OR (a AND )` must not absorb to `a`: a NULL `a` surfaces the + // error (`NULL OR (NULL AND )` = ``). + let mut or = col().or(col().and(err())); + or.reduce(&types); + assert_ne!(or, col(), "{or:?}"); + assert_eq!(or.eval(&[Datum::True], &arena), Ok(Datum::True)); + assert_eq!(or.eval(&[Datum::False], &arena), Ok(Datum::False)); + assert_eq!( + or.eval(&[Datum::Null], &arena), + Err(EvalError::DivisionByZero) + ); + } + + #[mz_ore::test] + fn test_reduce_and_or_undistributes_common_error() { + // CLU-137: undistribution must still factor out a could-error operand + // common to every disjunct (e.g. a temporal `mz_now() < `), so it + // becomes a standalone conjunct the renderer can extract. The reverted + // #37049 guard skipped undistribution for any could-error expression, + // burying such predicates inside the OR. + let bool_typ = ReprScalarType::Bool; + let types = vec![ + bool_typ.clone().nullable(true), + bool_typ.clone().nullable(true), + ]; + let err = || MirScalarExpr::literal(Err(EvalError::DivisionByZero), bool_typ.clone()); + let arena = RowArena::new(); + + // `(a AND ) OR (b AND )` --> ` AND (a OR b)`. + let original = MirScalarExpr::column(0) + .and(err()) + .or(MirScalarExpr::column(1).and(err())); + let mut reduced = original.clone(); + reduced.reduce(&types); + assert!( + matches!( + &reduced, + MirScalarExpr::CallVariadic { + func: VariadicFunc::And(_), + .. + } + ), + "common error operand not factored out: {reduced:?}" + ); + // ...and error semantics are preserved over every assignment. + for a in [Datum::True, Datum::False, Datum::Null] { + for b in [Datum::True, Datum::False, Datum::Null] { + assert_eq!( + reduced.eval(&[a, b], &arena), + original.eval(&[a, b], &arena) + ); + } + } + } + #[mz_ore::test] #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux` fn test_reduce() { diff --git a/src/expr/src/scalar/reduce/variadic.rs b/src/expr/src/scalar/reduce/variadic.rs index e6cfbc2e4dec2..8dee7e99f6c37 100644 --- a/src/expr/src/scalar/reduce/variadic.rs +++ b/src/expr/src/scalar/reduce/variadic.rs @@ -51,9 +51,22 @@ pub(super) fn reduce_call_variadic( *e = MirScalarExpr::literal_null(e.typ(column_types).scalar_type); return; } - if let Some(err) = exprs.iter().find_map(|x| x.as_literal_err()) { - *e = MirScalarExpr::literal(Err(err.clone()), e.typ(column_types).scalar_type); - return; + // Fold the call to an operand's literal error only when the function is + // strict in errors: any operand error must make the whole call error. The + // non-strict variadics are excluded, because they can absorb an operand's + // error at runtime: AND/OR via a dominating `false`/`true` operand + // (`false AND ` is `false`), and ErrorIfNull because it evaluates its + // message argument only when the first argument is null. Every other variadic + // here evaluates all of its operands, so the fold is valid. (Coalesce, also + // non-strict, bailed out above.) + if !matches!( + func, + VariadicFunc::And(_) | VariadicFunc::Or(_) | VariadicFunc::ErrorIfNull(_) + ) { + if let Some(err) = exprs.iter().find_map(|x| x.as_literal_err()) { + *e = MirScalarExpr::literal(Err(err.clone()), e.typ(column_types).scalar_type); + return; + } } // Per-function dispatch. Arms are mutually exclusive on discriminant; the diff --git a/test/sqllogictest/error_semantics.slt b/test/sqllogictest/error_semantics.slt index 824c26a60490b..1f504c2379521 100644 --- a/test/sqllogictest/error_semantics.slt +++ b/test/sqllogictest/error_semantics.slt @@ -93,3 +93,16 @@ select coalesce(a, (select a/b from test)) from test; query error Evaluation error: division by zero select *, case when a = 5 then (select a/b from test) else 7 end from test; + +# Regression for #37049. AND/OR are non-strict: a `false`/`true` operand absorbs +# an erroring operand at runtime (`false AND ` is `false`), so `reduce` +# must not fold `x AND ` / `x OR ` to the error. Before the fix +# these spuriously errored with division by zero (test.a = 1). +query I +select a from test where (a = 2) and (1/0 = 1); +---- + +query I +select a from test where (a = 1) or (1/0 = 1); +---- +1 diff --git a/test/sqllogictest/temporal.slt b/test/sqllogictest/temporal.slt index bf67de30f4401..2d9cfdab8b1cf 100644 --- a/test/sqllogictest/temporal.slt +++ b/test/sqllogictest/temporal.slt @@ -440,3 +440,73 @@ FROM (SELECT CAST('62143-12-31' AS date) + '200000 YEARS' AS c2 FROM (SELECT FROM (SELECT FROM t) AS subq_0) AS subq_1 WHERE pg_catalog."current_timestamp"() = pg_catalog.pg_postmaster_start_time()) AS subq_2; + +# Regression test for https://linear.app/materializeinc/issue/CLU-137 +# +# A temporal predicate (`mz_now() < ...`) shared across both branches of a +# disjunction of conjunctions must be factored out of the disjunction, so that +# it becomes a standalone conjunct the renderer can extract as a temporal +# bound. The factoring is done by `undistribute_and_or` during predicate +# canonicalization. The error-propagation fix in #37049 had skipped +# undistribution for any expression that could error, and `mz_now()` always +# counts as could-error, so the temporal predicate stayed buried inside the OR +# and the renderer panicked with "Unsupported temporal predicate". + +# A constant collection (`VALUES`) is readable at any logical time, so the MV +# can be queried `AS OF` a small timestamp, like the `valid` MV above. +statement ok +CREATE VIEW clu137 (active, confirmed, enabled, label, valid_until) AS VALUES + (true, true, true, CAST(NULL AS text), 10), + (true, true, true, '', 20), + (true, true, true, 'x', 30), + (false, true, true, NULL, 40); + +statement ok +CREATE MATERIALIZED VIEW clu137_mv AS +SELECT valid_until FROM clu137 +WHERE (active AND confirmed AND enabled AND label IS NULL AND mz_now() < valid_until) + OR (active AND confirmed AND enabled AND label = '' AND mz_now() < valid_until); + +query I rowsort +SELECT * FROM clu137_mv AS OF 5; +---- +10 +20 + +query I rowsort +SELECT * FROM clu137_mv AS OF 15; +---- +20 + +query I rowsort +SELECT * FROM clu137_mv AS OF 25; +---- + +# a non-common operand that could error keeps the shared temporal predicate +# buried in the OR (factoring it out would change error semantics), so +# extraction rejects it at plan time. A clean error, not a crash. +statement ok +CREATE VIEW clu137_fallible (active, label, divisor, valid_until) AS VALUES + (true, CAST(NULL AS text), 5, 10), + (true, '', 5, 20); + +# `(100 / divisor) > 0` (can error) sits in only the second disjunct. +statement error Unsupported temporal predicate +CREATE MATERIALIZED VIEW clu137_fallible_mv AS +SELECT valid_until FROM clu137_fallible +WHERE (active AND label IS NULL AND mz_now() < valid_until) + OR (active AND label = '' AND mz_now() < valid_until AND (100 / divisor) > 0); + +# But a fallible operand common to every disjunct factors out fine: the limit is +# commonality, not fallibility. +statement ok +CREATE MATERIALIZED VIEW clu137_common_fallible_mv AS +SELECT valid_until FROM clu137_fallible +WHERE (active AND label IS NULL AND mz_now() < valid_until AND (100 / divisor) > 0) + OR (active AND label = '' AND mz_now() < valid_until AND (100 / divisor) > 0); + +query I rowsort +SELECT * FROM clu137_common_fallible_mv AS OF 5; +---- +10 +20 From 75002efceb70176a2f8cf648f01e4e73a1e94179 Mon Sep 17 00:00:00 2001 From: Dennis Felsing Date: Fri, 26 Jun 2026 07:31:10 +0000 Subject: [PATCH 2/2] Prevent further crashes --- src/expr/src/scalar.rs | 63 +++++++++++++++++++++++++++++++--- test/sqllogictest/temporal.slt | 31 +++++++++-------- 2 files changed, 76 insertions(+), 18 deletions(-) diff --git a/src/expr/src/scalar.rs b/src/expr/src/scalar.rs index 1ef06bf5e0745..2762dbce4403e 100644 --- a/src/expr/src/scalar.rs +++ b/src/expr/src/scalar.rs @@ -857,10 +857,12 @@ impl MirScalarExpr { /// dominating operand (`false`/`true`), not a NULL one, absorbs another /// operand's error, so recombining a could-error operand can change whether a /// row errors (e.g. `a OR (a AND )` raises for a NULL `a`, but - /// undistributes to `a`). Operands common to every disjunct are factored out - /// unchanged and stay sound, which is what lets a shared temporal filter like - /// `mz_now() < ` (whose cast can error) be factored out, as the renderer - /// requires. + /// undistributes to `a`). + /// + /// Temporal predicates are exempt (see the `contains_temporal` check): a + /// shared `mz_now() t` must stay liftable out of the OR or the dataflow + /// can't be planned, and `mz_now()` is unmaterializable so there is no runtime + /// error to preserve. fn undistribution_preserves_errors(&self) -> bool { let MirScalarExpr::CallVariadic { func: func @ (VariadicFunc::And(_) | VariadicFunc::Or(_)), @@ -869,6 +871,14 @@ impl MirScalarExpr { else { return true; }; + // A shared temporal predicate (`mz_now() t`) must stay liftable out + // of the OR, else the renderer can't extract its time bound and the view + // fails to plan (and an existing one fails to re-optimize on upgrade, + // halting environmentd). `mz_now()` is unmaterializable, so it carries no + // runtime value and there is no error semantics to preserve here anyway. + if self.contains_temporal() { + return true; + } // Fast path for the common case: with no erroring operand there is // nothing to preserve, so skip the per-operand bookkeeping below. // `could_error` recurses, so this also covers nested operands. @@ -2595,6 +2605,51 @@ mod tests { } } + #[mz_ore::test] + fn test_reduce_and_or_undistributes_temporal_despite_noncommon_error() { + // CLU-137: a temporal predicate `T = mz_now() < t` shared by every + // disjunct must still be factored out even when a disjunct also carries a + // *non-common* operand that could error (here `(#3 / #4) > 0`). Factoring + // T out is not error-preserving in that case, but the error-preservation + // guard exempts temporal predicates: T must stay extractable or the view + // can't be planned (and an existing one fails to re-optimize on upgrade). + let types = vec![ + ReprScalarType::Bool.nullable(true), // #0 + ReprScalarType::Bool.nullable(true), // #1 + ReprScalarType::MzTimestamp.nullable(true), // #2 + ReprScalarType::Int32.nullable(true), // #3 + ReprScalarType::Int32.nullable(true), // #4 + ]; + let col = MirScalarExpr::column; + let temporal = || { + MirScalarExpr::CallUnmaterializable(UnmaterializableFunc::MzNow) + .call_binary(col(2), func::Lt) + }; + let noncommon_err = || { + col(3).call_binary(col(4), func::DivInt32).call_binary( + MirScalarExpr::literal_ok(Datum::Int32(0), ReprScalarType::Int32), + func::Gt, + ) + }; + + // `(#0 AND T) OR (#1 AND T AND E)`: T common, E non-common, both could_error. + let mut reduced = col(0) + .and(temporal()) + .or(col(1).and(temporal()).and(noncommon_err())); + reduced.reduce(&types); + + // T is lifted to a top-level AND conjunct, so the renderer can extract it. + let factored_out = matches!( + &reduced, + MirScalarExpr::CallVariadic { func: VariadicFunc::And(_), exprs } + if exprs.contains(&temporal()) + ); + assert!( + factored_out, + "temporal predicate not factored out: {reduced:?}" + ); + } + #[mz_ore::test] #[cfg_attr(miri, ignore)] // error: unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux` fn test_reduce() { diff --git a/test/sqllogictest/temporal.slt b/test/sqllogictest/temporal.slt index 2d9cfdab8b1cf..54a96b718d112 100644 --- a/test/sqllogictest/temporal.slt +++ b/test/sqllogictest/temporal.slt @@ -482,31 +482,34 @@ query I rowsort SELECT * FROM clu137_mv AS OF 25; ---- -# a non-common operand that could error keeps the shared temporal predicate -# buried in the OR (factoring it out would change error semantics), so -# extraction rejects it at plan time. A clean error, not a crash. +# A temporal predicate shared by every disjunct must stay extractable even when +# a disjunct also has a non-common operand that could error (here +# `(100 / divisor) > 0`). `undistribute_and_or` exempts temporal predicates from +# its error-preservation guard, so `mz_now() < valid_until` is still factored out +# of the OR and the view builds. Without the exemption it would fail to plan, +# which would also break re-optimization of an existing such view on upgrade. statement ok CREATE VIEW clu137_fallible (active, label, divisor, valid_until) AS VALUES (true, CAST(NULL AS text), 5, 10), (true, '', 5, 20); -# `(100 / divisor) > 0` (can error) sits in only the second disjunct. -statement error Unsupported temporal predicate +statement ok CREATE MATERIALIZED VIEW clu137_fallible_mv AS SELECT valid_until FROM clu137_fallible WHERE (active AND label IS NULL AND mz_now() < valid_until) OR (active AND label = '' AND mz_now() < valid_until AND (100 / divisor) > 0); -# But a fallible operand common to every disjunct factors out fine: the limit is -# commonality, not fallibility. -statement ok -CREATE MATERIALIZED VIEW clu137_common_fallible_mv AS -SELECT valid_until FROM clu137_fallible -WHERE (active AND label IS NULL AND mz_now() < valid_until AND (100 / divisor) > 0) - OR (active AND label = '' AND mz_now() < valid_until AND (100 / divisor) > 0); - query I rowsort -SELECT * FROM clu137_common_fallible_mv AS OF 5; +SELECT * FROM clu137_fallible_mv AS OF 5; ---- 10 20 + +query I rowsort +SELECT * FROM clu137_fallible_mv AS OF 15; +---- +20 + +query I rowsort +SELECT * FROM clu137_fallible_mv AS OF 25; +----