From 5bb6b6ecc9781d21f92ad540e48c1b9eeca1136c Mon Sep 17 00:00:00 2001 From: stoicAI1776 Date: Fri, 10 Apr 2026 03:43:35 +0530 Subject: [PATCH 1/2] feat(hql): enable SearchV PREFILTER and wire prefilter codegen --- helix-db/src/grammar.pest | 2 +- .../analyzer/methods/graph_step_validation.rs | 9 + .../analyzer/methods/infer_expr_type.rs | 190 ++++++++++++------ .../analyzer/methods/traversal_validation.rs | 63 ++---- helix-db/src/helixc/generator/source_steps.rs | 2 +- .../helixc/parser/expression_parse_methods.rs | 15 ++ 6 files changed, 171 insertions(+), 110 deletions(-) diff --git a/helix-db/src/grammar.pest b/helix-db/src/grammar.pest index 875074d26..803b9fe28 100644 --- a/helix-db/src/grammar.pest +++ b/helix-db/src/grammar.pest @@ -224,7 +224,7 @@ rerank_mmr = { "RerankMMR" ~ "(" ~ "lambda" ~ ":" ~ evaluates_to_number ~ ("," ~ // --------------------------------------------------------------------- // Vector steps // --------------------------------------------------------------------- -search_vector = { "SearchV" ~ "<" ~ identifier_upper ~ ">" ~ "(" ~ vector_data ~ "," ~ (integer | identifier) ~ ")" }// ~ ("::" ~ pre_filter)? } +search_vector = { "SearchV" ~ "<" ~ identifier_upper ~ ">" ~ "(" ~ vector_data ~ "," ~ (integer | identifier) ~ ")" ~ ("::" ~ pre_filter)? } bm25_search = { "SearchBM25" ~ "<" ~ identifier_upper ~ ">" ~ "(" ~ (string_literal | identifier) ~ "," ~ (integer | identifier) ~ ")" } pre_filter = { "PREFILTER" ~ "(" ~ (evaluates_to_bool | anonymous_traversal) ~ ")" } BatchAddV = { "BatchAddV" ~ "<" ~ identifier_upper ~ ">" ~ "(" ~ identifier ~ ")" } diff --git a/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs b/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs index cb7bb7054..4ac1b09d4 100644 --- a/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs @@ -656,6 +656,15 @@ pub(crate) fn apply_graph_step<'a>( { generate_error!(ctx, original_query, sv.loc.clone(), E103, ty.as_str()); } + if sv.pre_filter.is_some() { + generate_error!( + ctx, + original_query, + sv.loc.clone(), + E601, + "PREFILTER is only supported on root SearchV calls, not graph-step SearchV" + ); + } let vec = match &sv.data { Some(VectorData::Vector(v)) => { VecData::Standard(GeneratedValue::Literal(GenRef::Ref(format!( diff --git a/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs b/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs index 3bf134934..6764b58c5 100644 --- a/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs +++ b/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs @@ -26,7 +26,7 @@ use crate::{ statements::Statement as GeneratedStatement, traversal_steps::{ ShouldCollect, Step as GeneratedStep, Traversal as GeneratedTraversal, - TraversalType, Where, WhereRef, + TraversalType, }, utils::{GenRef, GeneratedValue, Separator, VecData}, }, @@ -37,6 +37,90 @@ use crate::{ use paste::paste; use std::collections::HashMap; +fn is_supported_search_vector_prefilter_traversal(traversal: &GeneratedTraversal) -> bool { + let is_val_traversal = match &traversal.traversal_type { + TraversalType::FromIter(var) | TraversalType::FromSingle(var) => match var { + GenRef::Std(s) | GenRef::Literal(s) => s == DEFAULT_VAR_NAME, + _ => false, + }, + _ => false, + }; + + if !is_val_traversal { + return false; + } + + // Prefilter closures currently only support direct property predicates on the vector. + if !matches!(traversal.source_step.inner(), SourceStep::Anonymous) { + return false; + } + + if traversal.steps.len() != 2 { + return false; + } + + matches!(traversal.steps[0].inner(), GeneratedStep::PropertyFetch(_)) + && matches!(traversal.steps[1].inner(), GeneratedStep::BoolOp(_)) +} + +fn is_supported_search_vector_prefilter_expr(expr: &BoExp) -> bool { + match expr { + BoExp::Not(inner) => is_supported_search_vector_prefilter_expr(inner), + BoExp::And(exprs) | BoExp::Or(exprs) => exprs + .iter() + .all(is_supported_search_vector_prefilter_expr), + BoExp::Expr(traversal) => is_supported_search_vector_prefilter_traversal(traversal), + BoExp::Exists(_) | BoExp::Empty => false, + } +} + +pub(crate) fn build_search_vector_pre_filter<'a>( + ctx: &mut Ctx<'a>, + pre_filter_expr: &'a Expression, + scope: &mut HashMap<&'a str, VariableInfo>, + original_query: &'a Query, + vector_type: Option, + gen_query: &mut GeneratedQuery, +) -> Option> { + let (_, stmt) = infer_expr_type( + ctx, + pre_filter_expr, + scope, + original_query, + Some(Type::Vector(vector_type)), + gen_query, + ); + + let pre_filter = match stmt { + Some(GeneratedStatement::Traversal(tr)) => BoExp::Expr(tr), + Some(GeneratedStatement::BoExp(expr)) => expr, + Some(_) => { + generate_error!( + ctx, + original_query, + pre_filter_expr.loc.clone(), + E306, + "PREFILTER" + ); + return None; + } + None => return None, + }; + + if !is_supported_search_vector_prefilter_expr(&pre_filter) { + generate_error!( + ctx, + original_query, + pre_filter_expr.loc.clone(), + E601, + "PREFILTER only supports simple vector property predicates like _::{field}::EQ(value)" + ); + return None; + } + + Some(vec![pre_filter]) +} + /// Infer the end type of an expression and returns the statement to generate from the expression /// /// This function is used to infer the end type of an expression and returns the statement to generate from the expression @@ -1278,62 +1362,16 @@ pub(crate) fn infer_expr_type<'a>( } }; - let pre_filter: Option> = match &sv.pre_filter { - Some(expr) => { - let (_, stmt) = infer_expr_type( - ctx, - expr, - scope, - original_query, - Some(Type::Vector(sv.vector_type.clone())), - gen_query, - ); - // Where/boolean ops don't change the element type, - // so `cur_ty` stays the same. - if stmt.is_none() { - return (Type::Vector(sv.vector_type.clone()), None); - } - let stmt = stmt.unwrap(); - let mut gen_traversal = GeneratedTraversal { - traversal_type: TraversalType::FromIter(GenRef::Std("v".to_string())), - steps: vec![], - should_collect: ShouldCollect::ToVec, - source_step: Separator::Empty(SourceStep::Anonymous), - ..Default::default() - }; - match stmt { - GeneratedStatement::Traversal(tr) => { - gen_traversal - .steps - .push(Separator::Period(GeneratedStep::Where(Where::Ref( - WhereRef { - expr: BoExp::Expr(tr), - }, - )))); - } - GeneratedStatement::BoExp(expr) => { - gen_traversal - .steps - .push(Separator::Period(GeneratedStep::Where(match expr { - BoExp::Exists(mut traversal) => { - traversal.should_collect = ShouldCollect::No; - Where::Ref(WhereRef { - expr: BoExp::Exists(traversal), - }) - } - _ => Where::Ref(WhereRef { expr }), - }))); - } - // Pre-filter should produce Traversal or BoExp - _ => { - // Fall through - pre-filter will be None - return (Type::Vector(sv.vector_type.clone()), None); - } - } - Some(vec![BoExp::Expr(gen_traversal)]) - } - None => None, - }; + let pre_filter = sv.pre_filter.as_ref().and_then(|expr| { + build_search_vector_pre_filter( + ctx, + expr, + scope, + original_query, + sv.vector_type.clone(), + gen_query, + ) + }); // Search returns nodes that contain the vectors ( @@ -2116,4 +2154,42 @@ mod tests { let (diagnostics, _) = result.unwrap(); assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E660)); } + + #[test] + fn test_search_vector_prefilter_simple_property_predicate_valid() { + let source = r#" + V::Document { content: String, category: String, embedding: [F32] } + + QUERY test(query_vec: [F64]) => + docs <- SearchV(query_vec, 10)::PREFILTER(_::{category}::EQ("tech")) + RETURN docs + "#; + + let content = write_to_temp_file(vec![source]); + let parsed = HelixParser::parse_source(&content).unwrap(); + let result = crate::helixc::analyzer::analyze(&parsed); + + assert!(result.is_ok()); + let (diagnostics, _) = result.unwrap(); + assert!(!diagnostics.iter().any(|d| d.error_code == ErrorCode::E601)); + } + + #[test] + fn test_search_vector_prefilter_non_boolean_traversal_emits_e601() { + let source = r#" + V::Document { content: String, category: String, embedding: [F32] } + + QUERY test(query_vec: [F64]) => + docs <- SearchV(query_vec, 10)::PREFILTER(_::{category}) + RETURN docs + "#; + + let content = write_to_temp_file(vec![source]); + let parsed = HelixParser::parse_source(&content).unwrap(); + let result = crate::helixc::analyzer::analyze(&parsed); + + assert!(result.is_ok()); + let (diagnostics, _) = result.unwrap(); + assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E601)); + } } diff --git a/helix-db/src/helixc/analyzer/methods/traversal_validation.rs b/helix-db/src/helixc/analyzer/methods/traversal_validation.rs index 000ec9bad..1a7db1f69 100644 --- a/helix-db/src/helixc/analyzer/methods/traversal_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/traversal_validation.rs @@ -14,7 +14,8 @@ use crate::{ errors::push_query_err, methods::{ exclude_validation::validate_exclude, graph_step_validation::apply_graph_step, - infer_expr_type::infer_expr_type, object_validation::validate_object, + infer_expr_type::{build_search_vector_pre_filter, infer_expr_type}, + object_validation::validate_object, }, types::{AggregateInfo, Type}, utils::{ @@ -639,56 +640,16 @@ pub(crate) fn validate_traversal<'a>( } }; - // let pre_filter: Option> = match &sv.pre_filter { - // Some(expr) => { - // let (_, stmt) = infer_expr_type( - // ctx, - // expr, - // scope, - // original_query, - // Some(Type::Vector(sv.vector_type.clone())), - // gen_query, - // ); - // // Where/boolean ops don't change the element type, - // // so `cur_ty` stays the same. - // assert!(stmt.is_some()); - // let stmt = stmt.unwrap(); - // let mut gen_traversal = GeneratedTraversal { - // traversal_type: TraversalType::NestedFrom(GenRef::Std("v".to_string())), - // steps: vec![], - // should_collect: ShouldCollect::ToVec, - // source_step: Separator::Empty(SourceStep::Anonymous), - // }; - // match stmt { - // GeneratedStatement::Traversal(tr) => { - // gen_traversal - // .steps - // .push(Separator::Period(GeneratedStep::Where(Where::Ref( - // WhereRef { - // expr: BoExp::Expr(tr), - // }, - // )))); - // } - // GeneratedStatement::BoExp(expr) => { - // gen_traversal - // .steps - // .push(Separator::Period(GeneratedStep::Where(match expr { - // BoExp::Exists(mut traversal) => { - // traversal.should_collect = ShouldCollect::No; - // Where::Ref(WhereRef { - // expr: BoExp::Exists(traversal), - // }) - // } - // _ => Where::Ref(WhereRef { expr }), - // }))); - // } - // _ => unreachable!(), - // } - // Some(vec![BoExp::Expr(gen_traversal)]) - // } - // None => None, - // }; - let pre_filter = None; + let pre_filter = sv.pre_filter.as_ref().and_then(|expr| { + build_search_vector_pre_filter( + ctx, + expr, + scope, + original_query, + sv.vector_type.clone(), + gen_query, + ) + }); gen_traversal.traversal_type = TraversalType::Ref; gen_traversal.should_collect = ShouldCollect::ToVec; diff --git a/helix-db/src/helixc/generator/source_steps.rs b/helix-db/src/helixc/generator/source_steps.rs index b491125f3..ba6cf37c5 100644 --- a/helix-db/src/helixc/generator/source_steps.rs +++ b/helix-db/src/helixc/generator/source_steps.rs @@ -443,7 +443,7 @@ impl Display for SearchVector { self.label, pre_filter .iter() - .map(|f| format!("|v: &HVector, txn: &RoTxn| {f}")) + .map(|f| format!("|val: &HVector, txn: &RoTxn| {f}")) .collect::>() .join(", ") ), diff --git a/helix-db/src/helixc/parser/expression_parse_methods.rs b/helix-db/src/helixc/parser/expression_parse_methods.rs index c754e5504..fce8d53e5 100644 --- a/helix-db/src/helixc/parser/expression_parse_methods.rs +++ b/helix-db/src/helixc/parser/expression_parse_methods.rs @@ -1044,6 +1044,21 @@ mod tests { assert!(result.is_ok()); } + #[test] + fn test_parse_vector_search_with_prefilter() { + let source = r#" + V::Document { content: String, category: String, embedding: [F32] } + + QUERY searchSimilar(queryVec: [F32]) => + docs <- SearchV(queryVec, 10)::PREFILTER(_::{category}::EQ("tech")) + RETURN docs + "#; + + let content = write_to_temp_file(vec![source]); + let result = HelixParser::parse_source(&content); + assert!(result.is_ok()); + } + // ============================================================================ // Assignment Tests // ============================================================================ From 6d56b81c422c7ee0fae5e97dd4691a2e6861e3df Mon Sep 17 00:00:00 2001 From: stoicAI1776 Date: Fri, 10 Apr 2026 04:04:57 +0530 Subject: [PATCH 2/2] fix(hql): silence unused txn in generated prefilter closures --- helix-db/src/helixc/generator/source_steps.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helix-db/src/helixc/generator/source_steps.rs b/helix-db/src/helixc/generator/source_steps.rs index ba6cf37c5..e6866d328 100644 --- a/helix-db/src/helixc/generator/source_steps.rs +++ b/helix-db/src/helixc/generator/source_steps.rs @@ -443,7 +443,7 @@ impl Display for SearchVector { self.label, pre_filter .iter() - .map(|f| format!("|val: &HVector, txn: &RoTxn| {f}")) + .map(|f| format!("|val: &HVector, _txn: &RoTxn| {f}")) .collect::>() .join(", ") ),