Skip to content

Commit 2138a72

Browse files
committed
Add stats rewrite session API
Signed-off-by: Nicholas Gates <nick@nickgates.com>
1 parent b3e1673 commit 2138a72

7 files changed

Lines changed: 324 additions & 0 deletions

File tree

STYLE.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161
- Maintain a clear separation between logical and physical types
6262
- Keep functions focused and reasonably sized
6363
- Separate public API from internal implementation details
64+
- Prefer one public entrypoint for each piece of functionality; keep helper APIs crate-private
65+
unless callers need them independently.
6466
- Use modules to organize related functionality
6567
- Place tests in a `tests` module or separate test files
6668

vortex-array/public-api.lock

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12778,10 +12778,14 @@ pub fn vortex_array::expr::Expression::children(&self) -> &alloc::sync::Arc<allo
1277812778

1277912779
pub fn vortex_array::expr::Expression::display_tree(&self) -> impl core::fmt::Display
1278012780

12781+
pub fn vortex_array::expr::Expression::falsify(&self, &vortex_session::VortexSession) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
12782+
1278112783
pub fn vortex_array::expr::Expression::fmt_sql(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
1278212784

1278312785
pub fn vortex_array::expr::Expression::return_dtype(&self, &vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_array::dtype::DType>
1278412786

12787+
pub fn vortex_array::expr::Expression::satisfy(&self, &vortex_session::VortexSession) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::Expression>>
12788+
1278512789
pub fn vortex_array::expr::Expression::scalar_fn(&self) -> &vortex_array::scalar_fn::ScalarFnRef
1278612790

1278712791
pub fn vortex_array::expr::Expression::stat_expression(&self, vortex_array::expr::stats::Stat, &dyn vortex_array::expr::pruning::StatsCatalog) -> core::option::Option<vortex_array::expr::Expression>
@@ -19774,6 +19778,24 @@ pub fn vortex_array::stats::expr::sum(vortex_array::expr::Expression) -> vortex_
1977419778

1977519779
pub mod vortex_array::stats::flatbuffers
1977619780

19781+
pub mod vortex_array::stats::session
19782+
19783+
pub struct vortex_array::stats::session::StatsRewriteSession
19784+
19785+
impl core::default::Default for vortex_array::stats::StatsRewriteSession
19786+
19787+
pub fn vortex_array::stats::StatsRewriteSession::default() -> vortex_array::stats::StatsRewriteSession
19788+
19789+
impl core::fmt::Debug for vortex_array::stats::StatsRewriteSession
19790+
19791+
pub fn vortex_array::stats::StatsRewriteSession::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
19792+
19793+
impl vortex_session::SessionVar for vortex_array::stats::StatsRewriteSession
19794+
19795+
pub fn vortex_array::stats::StatsRewriteSession::as_any(&self) -> &dyn core::any::Any
19796+
19797+
pub fn vortex_array::stats::StatsRewriteSession::as_any_mut(&mut self) -> &mut dyn core::any::Any
19798+
1977719799
pub struct vortex_array::stats::ArrayStats
1977819800

1977919801
impl vortex_array::stats::ArrayStats
@@ -19834,6 +19856,22 @@ pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::is_empty(&self) -> bool
1983419856

1983519857
pub fn vortex_array::stats::MutTypedStatsSetRef<'_, '_>::len(&self) -> usize
1983619858

19859+
pub struct vortex_array::stats::StatsRewriteSession
19860+
19861+
impl core::default::Default for vortex_array::stats::StatsRewriteSession
19862+
19863+
pub fn vortex_array::stats::StatsRewriteSession::default() -> vortex_array::stats::StatsRewriteSession
19864+
19865+
impl core::fmt::Debug for vortex_array::stats::StatsRewriteSession
19866+
19867+
pub fn vortex_array::stats::StatsRewriteSession::fmt(&self, &mut core::fmt::Formatter<'_>) -> core::fmt::Result
19868+
19869+
impl vortex_session::SessionVar for vortex_array::stats::StatsRewriteSession
19870+
19871+
pub fn vortex_array::stats::StatsRewriteSession::as_any(&self) -> &dyn core::any::Any
19872+
19873+
pub fn vortex_array::stats::StatsRewriteSession::as_any_mut(&mut self) -> &mut dyn core::any::Any
19874+
1983719875
pub struct vortex_array::stats::StatsSet
1983819876

1983919877
impl vortex_array::stats::StatsSet

vortex-array/src/expr/expression.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use std::sync::Arc;
1212
use itertools::Itertools;
1313
use vortex_error::VortexResult;
1414
use vortex_error::vortex_ensure;
15+
use vortex_session::VortexSession;
1516

1617
use crate::dtype::DType;
1718
use crate::expr::StatsCatalog;
@@ -135,6 +136,22 @@ impl Expression {
135136
self.scalar_fn().stat_falsification(self, catalog)
136137
}
137138

139+
/// Returns an expression that proves this predicate is definitely false from stats.
140+
///
141+
/// If the returned expression evaluates to `true` for a stats scope, this expression is
142+
/// guaranteed to be false for every row in that scope. `false` and `null` are unknown.
143+
pub fn falsify(&self, session: &VortexSession) -> VortexResult<Option<Expression>> {
144+
crate::stats::rewrite::StatsRewriteCtx::new(session).falsify(self)
145+
}
146+
147+
/// Returns an expression that proves this predicate is definitely true from stats.
148+
///
149+
/// If the returned expression evaluates to `true` for a stats scope, this expression is
150+
/// guaranteed to be true for every row in that scope. `false` and `null` are unknown.
151+
pub fn satisfy(&self, session: &VortexSession) -> VortexResult<Option<Expression>> {
152+
crate::stats::rewrite::StatsRewriteCtx::new(session).satisfy(self)
153+
}
154+
138155
/// Returns an expression representing the zoned statistic for the given stat, if available.
139156
///
140157
/// The [`StatsCatalog`] returns expressions that can be evaluated using the zone map as a

vortex-array/src/stats/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ pub use stats_set::*;
1717
mod array;
1818
pub mod expr;
1919
pub mod flatbuffers;
20+
pub(crate) mod rewrite;
21+
pub mod session;
2022
mod stats_set;
2123

2224
pub use array::*;
25+
pub use session::*;
2326
use vortex_error::VortexExpect;
2427

2528
use crate::expr::stats::Stat;

vortex-array/src/stats/rewrite.rs

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Session-registered rewrite rules for aggregate-backed stats expressions.
5+
6+
use std::fmt::Debug;
7+
use std::sync::Arc;
8+
9+
use vortex_error::VortexResult;
10+
use vortex_session::VortexSession;
11+
12+
use crate::expr::Expression;
13+
use crate::expr::or_collect;
14+
use crate::scalar_fn::ScalarFnId;
15+
use crate::stats::session::StatsRewriteSessionExt;
16+
17+
/// Shared reference to a stats rewrite rule.
18+
pub(crate) type StatsRewriteRuleRef = Arc<dyn StatsRewriteRule>;
19+
20+
/// A plugin-provided rule that rewrites predicates into stats-backed proof expressions.
21+
///
22+
/// A falsifier evaluates to `true` only when the original predicate is definitely false for the
23+
/// current stats scope. A satisfier evaluates to `true` only when the original predicate is
24+
/// definitely true for the current stats scope. Returning `None` means the rule cannot prove
25+
/// anything for the expression.
26+
#[allow(dead_code)]
27+
pub(crate) trait StatsRewriteRule: Debug + Send + Sync + 'static {
28+
/// The scalar function ID this rule applies to.
29+
fn scalar_fn_id(&self) -> ScalarFnId;
30+
31+
/// Rewrite an expression into a stats-backed falsifier.
32+
fn falsify(
33+
&self,
34+
expr: &Expression,
35+
ctx: &StatsRewriteCtx<'_>,
36+
) -> VortexResult<Option<Expression>> {
37+
_ = expr;
38+
_ = ctx;
39+
Ok(None)
40+
}
41+
42+
/// Rewrite an expression into a stats-backed satisfier.
43+
fn satisfy(
44+
&self,
45+
expr: &Expression,
46+
ctx: &StatsRewriteCtx<'_>,
47+
) -> VortexResult<Option<Expression>> {
48+
_ = expr;
49+
_ = ctx;
50+
Ok(None)
51+
}
52+
}
53+
54+
/// Context passed to stats rewrite rules.
55+
pub(crate) struct StatsRewriteCtx<'a> {
56+
session: &'a VortexSession,
57+
}
58+
59+
impl<'a> StatsRewriteCtx<'a> {
60+
/// Create a rewrite context for `session`.
61+
pub(crate) fn new(session: &'a VortexSession) -> Self {
62+
Self { session }
63+
}
64+
65+
/// Returns the session that owns the rewrite registry.
66+
pub(crate) fn session(&self) -> &'a VortexSession {
67+
self.session
68+
}
69+
70+
/// Rewrite `expr` into a stats-backed falsifier.
71+
pub(crate) fn falsify(&self, expr: &Expression) -> VortexResult<Option<Expression>> {
72+
rewrite(expr, self, StatsRewriteRule::falsify)
73+
}
74+
75+
/// Rewrite `expr` into a stats-backed satisfier.
76+
pub(crate) fn satisfy(&self, expr: &Expression) -> VortexResult<Option<Expression>> {
77+
rewrite(expr, self, StatsRewriteRule::satisfy)
78+
}
79+
}
80+
81+
fn rewrite(
82+
expr: &Expression,
83+
ctx: &StatsRewriteCtx<'_>,
84+
apply: fn(
85+
&dyn StatsRewriteRule,
86+
&Expression,
87+
&StatsRewriteCtx<'_>,
88+
) -> VortexResult<Option<Expression>>,
89+
) -> VortexResult<Option<Expression>> {
90+
let rules = ctx
91+
.session()
92+
.stats_rewrites()
93+
.rules_for(expr.scalar_fn().id());
94+
let Some(rules) = rules else {
95+
return Ok(None);
96+
};
97+
98+
let mut rewrites = Vec::new();
99+
for rule in rules.iter() {
100+
if let Some(rewrite) = apply(rule.as_ref(), expr, ctx)? {
101+
rewrites.push(rewrite);
102+
}
103+
}
104+
105+
Ok(or_collect(rewrites))
106+
}
107+
108+
#[cfg(test)]
109+
mod tests {
110+
use vortex_error::VortexResult;
111+
use vortex_session::VortexSession;
112+
113+
use super::StatsRewriteCtx;
114+
use super::StatsRewriteRule;
115+
use crate::expr::Expression;
116+
use crate::expr::lit;
117+
use crate::expr::or;
118+
use crate::scalar_fn::ScalarFnId;
119+
use crate::scalar_fn::ScalarFnVTable;
120+
use crate::scalar_fn::fns::literal::Literal;
121+
use crate::stats::session::StatsRewriteSession;
122+
use crate::stats::session::StatsRewriteSessionExt;
123+
124+
#[derive(Debug)]
125+
struct StaticLiteralRule {
126+
falsifier: Option<Expression>,
127+
satisfier: Option<Expression>,
128+
}
129+
130+
impl StatsRewriteRule for StaticLiteralRule {
131+
fn scalar_fn_id(&self) -> ScalarFnId {
132+
Literal.id()
133+
}
134+
135+
fn falsify(
136+
&self,
137+
_expr: &Expression,
138+
_ctx: &StatsRewriteCtx<'_>,
139+
) -> VortexResult<Option<Expression>> {
140+
Ok(self.falsifier.clone())
141+
}
142+
143+
fn satisfy(
144+
&self,
145+
_expr: &Expression,
146+
_ctx: &StatsRewriteCtx<'_>,
147+
) -> VortexResult<Option<Expression>> {
148+
Ok(self.satisfier.clone())
149+
}
150+
}
151+
152+
#[test]
153+
fn combines_multiple_falsifiers_with_or() -> VortexResult<()> {
154+
let session = VortexSession::empty().with::<StatsRewriteSession>();
155+
session.stats_rewrites().register(StaticLiteralRule {
156+
falsifier: Some(lit(false)),
157+
satisfier: None,
158+
});
159+
session.stats_rewrites().register(StaticLiteralRule {
160+
falsifier: Some(lit(true)),
161+
satisfier: None,
162+
});
163+
164+
assert_eq!(lit(7).falsify(&session)?, Some(or(lit(false), lit(true))));
165+
Ok(())
166+
}
167+
168+
#[test]
169+
fn combines_multiple_satisfiers_with_or() -> VortexResult<()> {
170+
let session = VortexSession::empty().with::<StatsRewriteSession>();
171+
session.stats_rewrites().register(StaticLiteralRule {
172+
falsifier: None,
173+
satisfier: Some(lit(false)),
174+
});
175+
session.stats_rewrites().register(StaticLiteralRule {
176+
falsifier: None,
177+
satisfier: Some(lit(true)),
178+
});
179+
180+
assert_eq!(lit(7).satisfy(&session)?, Some(or(lit(false), lit(true))));
181+
Ok(())
182+
}
183+
184+
#[test]
185+
fn unregistered_expression_has_no_rewrite() -> VortexResult<()> {
186+
let session = VortexSession::empty().with::<StatsRewriteSession>();
187+
188+
assert_eq!(lit(7).falsify(&session)?, None);
189+
assert_eq!(lit(7).satisfy(&session)?, None);
190+
Ok(())
191+
}
192+
}

vortex-array/src/stats/session.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Session state for stats rewrite rules.
5+
6+
use std::any::Any;
7+
use std::sync::Arc;
8+
9+
use parking_lot::RwLock;
10+
use vortex_session::Ref;
11+
use vortex_session::SessionExt;
12+
use vortex_session::SessionVar;
13+
use vortex_utils::aliases::hash_map::HashMap;
14+
15+
use crate::scalar_fn::ScalarFnId;
16+
use crate::stats::rewrite::StatsRewriteRule;
17+
use crate::stats::rewrite::StatsRewriteRuleRef;
18+
19+
type StatsRewriteRuleSet = Arc<[StatsRewriteRuleRef]>;
20+
21+
/// Session state for stats rewrite rules.
22+
#[derive(Debug, Default)]
23+
pub struct StatsRewriteSession {
24+
rules: RwLock<HashMap<ScalarFnId, StatsRewriteRuleSet>>,
25+
}
26+
27+
impl StatsRewriteSession {
28+
/// Register a stats rewrite rule.
29+
#[allow(dead_code)]
30+
pub(crate) fn register<R: StatsRewriteRule>(&self, rule: R) {
31+
self.register_ref(Arc::new(rule));
32+
}
33+
34+
/// Register a shared stats rewrite rule.
35+
#[allow(dead_code)]
36+
pub(crate) fn register_ref(&self, rule: StatsRewriteRuleRef) {
37+
let mut rules = self.rules.write();
38+
let rule_id = rule.scalar_fn_id();
39+
let mut updated_rules = rules
40+
.get(&rule_id)
41+
.map(|rules| rules.iter().cloned().collect::<Vec<_>>())
42+
.unwrap_or_default();
43+
updated_rules.push(rule);
44+
rules.insert(rule_id, updated_rules.into());
45+
}
46+
47+
/// Return the rewrite rules registered for `scalar_fn_id`.
48+
pub(crate) fn rules_for(&self, scalar_fn_id: ScalarFnId) -> Option<StatsRewriteRuleSet> {
49+
self.rules.read().get(&scalar_fn_id).cloned()
50+
}
51+
}
52+
53+
impl SessionVar for StatsRewriteSession {
54+
fn as_any(&self) -> &dyn Any {
55+
self
56+
}
57+
58+
fn as_any_mut(&mut self) -> &mut dyn Any {
59+
self
60+
}
61+
}
62+
63+
/// Extension trait for accessing stats rewrite session data.
64+
pub(crate) trait StatsRewriteSessionExt: SessionExt {
65+
/// Returns the stats rewrite rule registry.
66+
fn stats_rewrites(&self) -> Ref<'_, StatsRewriteSession> {
67+
self.get::<StatsRewriteSession>()
68+
}
69+
}
70+
impl<S: SessionExt> StatsRewriteSessionExt for S {}

vortex/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use vortex_array::optimizer::kernels::ArrayKernels;
1616
pub use vortex_array::scalar_fn;
1717
use vortex_array::scalar_fn::session::ScalarFnSession;
1818
use vortex_array::session::ArraySession;
19+
use vortex_array::stats::session::StatsRewriteSession;
1920
use vortex_io::session::RuntimeSession;
2021
use vortex_layout::session::LayoutSession;
2122
use vortex_session::VortexSession;
@@ -167,6 +168,7 @@ impl VortexSessionDefault for VortexSession {
167168
.with::<ArraySession>()
168169
.with::<LayoutSession>()
169170
.with::<ScalarFnSession>()
171+
.with::<StatsRewriteSession>()
170172
.with::<ArrayKernels>()
171173
.with::<AggregateFnSession>()
172174
.with::<RuntimeSession>();

0 commit comments

Comments
 (0)