Skip to content

Commit cf9a4f2

Browse files
committed
fix(tesseract): preserve FILTER_PARAMS pushdown with segments
Tesseract was treating any segment inside a filter tree as a fatal subtree extraction failure, which made FILTER_PARAMS fall back to always_true(). Keep AND-group pruning for matching members, require full matches for OR groups, and add regressions for segment siblings and partial OR branches. Made-with: Cursor
1 parent 94a2175 commit cf9a4f2

File tree

2 files changed

+189
-63
lines changed

2 files changed

+189
-63
lines changed

rust/cubesqlplanner/cubesqlplanner/src/plan/filter.rs

Lines changed: 54 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -128,88 +128,79 @@ impl FilterItem {
128128
}
129129
}
130130

131-
/// Extract all member symbols from this filter tree
132-
/// Returns None if filter tree is invalid (e.g., empty group)
133-
/// Returns Some(set) with all member symbols found in the tree
134-
fn extract_filter_members(&self) -> Option<Vec<Rc<MemberSymbol>>> {
135-
match self {
136-
FilterItem::Group(group) => {
137-
// Empty groups are considered invalid
138-
if group.items.is_empty() {
139-
return None;
140-
}
141-
142-
let mut all_members = Vec::new();
143-
144-
// Recursively extract from all children
145-
for child in &group.items {
146-
match child.extract_filter_members() {
147-
None => return None, // If any child is invalid, entire tree is invalid
148-
Some(mut members) => all_members.append(&mut members),
149-
}
150-
}
151-
152-
Some(all_members)
153-
}
154-
FilterItem::Item(item) => Some(vec![item.member_evaluator().clone()]),
155-
FilterItem::Segment(_) => None,
156-
}
157-
}
158-
159131
/// Find subtree of filters that only contains filters for the specified members
160132
/// Returns None if no matching filters found
161133
/// Returns Some(FilterItem) with the subtree containing only filters for target members
162134
///
163-
/// This only processes AND groups - OR groups are not supported and will return None
135+
/// Partial matching is only supported for AND groups. OR groups only match
136+
/// if all of their children match the target members.
164137
pub fn find_subtree_for_members(&self, target_members: &[&String]) -> Option<FilterItem> {
138+
self.find_subtree_for_members_with_match_state(target_members)
139+
.map(|(filter_item, _)| filter_item)
140+
}
141+
142+
fn find_subtree_for_members_with_match_state(
143+
&self,
144+
target_members: &[&String],
145+
) -> Option<(FilterItem, bool)> {
165146
match self {
166147
FilterItem::Group(group) => {
167148
// Empty groups return None
168149
if group.items.is_empty() {
169150
return None;
170151
}
171152

172-
// Extract all members from this filter subtree
173-
let filter_members = self.extract_filter_members()?;
153+
match group.operator {
154+
FilterGroupOperator::And => {
155+
let mut matching_children = Vec::new();
156+
let mut all_children_fully_match = true;
157+
158+
for child in &group.items {
159+
match child.find_subtree_for_members_with_match_state(target_members) {
160+
Some((matching_child, child_fully_matched)) => {
161+
matching_children.push(matching_child);
162+
all_children_fully_match &= child_fully_matched;
163+
}
164+
None => all_children_fully_match = false,
165+
}
166+
}
174167

175-
// Check if all members in this filter are in the target set
176-
let all_members_match = filter_members.iter().all(|member| {
177-
target_members.iter().any(|target| {
178-
&&member.clone().resolve_reference_chain().full_name() == target
179-
})
180-
});
168+
if matching_children.is_empty() {
169+
return None;
170+
}
181171

182-
if all_members_match {
183-
// All members match - return this entire filter subtree
184-
return Some(self.clone());
185-
}
172+
if all_children_fully_match {
173+
// Every child matches, so preserve the original group shape.
174+
return Some((self.clone(), true));
175+
}
186176

187-
// Only process AND groups for partial matching
188-
if group.operator == FilterGroupOperator::And {
189-
let matching_children: Vec<FilterItem> = group
190-
.items
191-
.iter()
192-
.filter_map(|child| child.find_subtree_for_members(target_members))
193-
.collect();
177+
if matching_children.len() == 1 {
178+
// Single match - return it directly without wrapping.
179+
return Some((matching_children.into_iter().next().unwrap(), false));
180+
}
194181

195-
if matching_children.is_empty() {
196-
return None;
182+
// Multiple matches - wrap in a new AND group.
183+
Some((
184+
FilterItem::Group(Rc::new(FilterGroup::new(
185+
FilterGroupOperator::And,
186+
matching_children,
187+
))),
188+
false,
189+
))
197190
}
191+
FilterGroupOperator::Or => {
192+
// OR groups can only be preserved if every child matches.
193+
for child in &group.items {
194+
let (_, child_fully_matched) =
195+
child.find_subtree_for_members_with_match_state(target_members)?;
196+
if !child_fully_matched {
197+
return None;
198+
}
199+
}
198200

199-
if matching_children.len() == 1 {
200-
// Single match - return it directly without wrapping
201-
return Some(matching_children.into_iter().next().unwrap());
201+
Some((self.clone(), true))
202202
}
203-
204-
// Multiple matches - wrap in new AND group
205-
return Some(FilterItem::Group(Rc::new(FilterGroup::new(
206-
FilterGroupOperator::And,
207-
matching_children,
208-
))));
209203
}
210-
211-
// OR groups are not supported
212-
None
213204
}
214205
FilterItem::Item(item) => {
215206
let member = item.member_evaluator();
@@ -219,7 +210,7 @@ impl FilterItem {
219210
.iter()
220211
.any(|target| &&member.clone().resolve_reference_chain().full_name() == target)
221212
{
222-
Some(self.clone())
213+
Some((self.clone(), true))
223214
} else {
224215
None
225216
}

rust/cubesqlplanner/cubesqlplanner/src/tests/utils/debug.rs

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::planner::filter::{BaseFilter, FilterOperator};
44
use crate::planner::sql_evaluator::DebugSql;
55
use crate::test_fixtures::cube_bridge::MockSchema;
66
use crate::test_fixtures::test_utils::TestContext;
7+
use std::rc::Rc;
78

89
#[test]
910
fn test_dimension_basic() {
@@ -201,3 +202,137 @@ fn test_filter_group_and_collapsed() {
201202
"AND: [\n {visitors.source} equals: ['google'],\n {visitors.id} gt: ['100']\n]";
202203
assert_eq!(sql, expected);
203204
}
205+
206+
#[test]
207+
fn test_find_subtree_for_members_excludes_segments_from_and_group() {
208+
let schema = MockSchema::from_yaml_file("common/integration_basic.yaml");
209+
let ctx = TestContext::new(schema).unwrap();
210+
let city_symbol = ctx.create_symbol("customers.city").unwrap();
211+
let amount_symbol = ctx.create_symbol("orders.amount").unwrap();
212+
213+
let city_filter = BaseFilter::try_new(
214+
ctx.query_tools().clone(),
215+
city_symbol.clone(),
216+
crate::planner::filter::base_filter::FilterType::Dimension,
217+
FilterOperator::Equal,
218+
Some(vec![Some("New York".to_string())]),
219+
)
220+
.unwrap();
221+
222+
let amount_filter = BaseFilter::try_new(
223+
ctx.query_tools().clone(),
224+
amount_symbol.clone(),
225+
crate::planner::filter::base_filter::FilterType::Dimension,
226+
FilterOperator::Gte,
227+
Some(vec![Some("100".to_string())]),
228+
)
229+
.unwrap();
230+
231+
let completed_segment = ctx.create_segment("orders.completed_orders").unwrap();
232+
233+
let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new(
234+
FilterGroupOperator::And,
235+
vec![
236+
FilterItem::Item(city_filter),
237+
FilterItem::Item(amount_filter),
238+
FilterItem::Segment(completed_segment),
239+
],
240+
)));
241+
242+
let city_member = city_symbol.resolve_reference_chain().full_name();
243+
let amount_member = amount_symbol.resolve_reference_chain().full_name();
244+
let targets = vec![&city_member, &amount_member];
245+
246+
let subtree = filter_tree
247+
.find_subtree_for_members(&targets)
248+
.expect("matching filters should still be extracted");
249+
250+
assert_eq!(
251+
subtree.debug_sql(false),
252+
"AND: [\n {customers.city} equals: ['New York'],\n {orders.amount} gte: ['100']\n]"
253+
);
254+
}
255+
256+
#[test]
257+
fn test_find_subtree_for_members_keeps_or_groups_all_or_nothing() {
258+
let schema = MockSchema::from_yaml_file("common/integration_basic.yaml");
259+
let ctx = TestContext::new(schema).unwrap();
260+
let city_symbol = ctx.create_symbol("customers.city").unwrap();
261+
262+
let city_filter = BaseFilter::try_new(
263+
ctx.query_tools().clone(),
264+
city_symbol.clone(),
265+
crate::planner::filter::base_filter::FilterType::Dimension,
266+
FilterOperator::Equal,
267+
Some(vec![Some("New York".to_string())]),
268+
)
269+
.unwrap();
270+
271+
let completed_segment = ctx.create_segment("orders.completed_orders").unwrap();
272+
273+
let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new(
274+
FilterGroupOperator::Or,
275+
vec![
276+
FilterItem::Item(city_filter),
277+
FilterItem::Segment(completed_segment),
278+
],
279+
)));
280+
281+
let city_member = city_symbol.resolve_reference_chain().full_name();
282+
let targets = vec![&city_member];
283+
284+
assert!(
285+
filter_tree.find_subtree_for_members(&targets).is_none(),
286+
"partial matches should not be extracted from OR groups"
287+
);
288+
}
289+
290+
#[test]
291+
fn test_find_subtree_for_members_rejects_or_groups_with_partially_matching_children() {
292+
let schema = MockSchema::from_yaml_file("common/integration_basic.yaml");
293+
let ctx = TestContext::new(schema).unwrap();
294+
let city_symbol = ctx.create_symbol("customers.city").unwrap();
295+
let amount_symbol = ctx.create_symbol("orders.amount").unwrap();
296+
297+
let city_filter = BaseFilter::try_new(
298+
ctx.query_tools().clone(),
299+
city_symbol.clone(),
300+
crate::planner::filter::base_filter::FilterType::Dimension,
301+
FilterOperator::Equal,
302+
Some(vec![Some("New York".to_string())]),
303+
)
304+
.unwrap();
305+
306+
let amount_filter = BaseFilter::try_new(
307+
ctx.query_tools().clone(),
308+
amount_symbol.clone(),
309+
crate::planner::filter::base_filter::FilterType::Dimension,
310+
FilterOperator::Gte,
311+
Some(vec![Some("100".to_string())]),
312+
)
313+
.unwrap();
314+
315+
let completed_segment = ctx.create_segment("orders.completed_orders").unwrap();
316+
317+
let partially_matching_branch = FilterItem::Group(Rc::new(FilterGroup::new(
318+
FilterGroupOperator::And,
319+
vec![
320+
FilterItem::Item(city_filter),
321+
FilterItem::Segment(completed_segment),
322+
],
323+
)));
324+
325+
let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new(
326+
FilterGroupOperator::Or,
327+
vec![partially_matching_branch, FilterItem::Item(amount_filter)],
328+
)));
329+
330+
let city_member = city_symbol.resolve_reference_chain().full_name();
331+
let amount_member = amount_symbol.resolve_reference_chain().full_name();
332+
let targets = vec![&city_member, &amount_member];
333+
334+
assert!(
335+
filter_tree.find_subtree_for_members(&targets).is_none(),
336+
"OR groups should only match when every branch matches without pruning"
337+
);
338+
}

0 commit comments

Comments
 (0)