@@ -447,4 +447,131 @@ public void testEfficientFilterWithOrderByScoreDescSucceeds() throws IOException
447447 "Efficient mode knn JSON should contain the WHERE predicate field:\n " + knnJson ,
448448 knnJson .contains ("state" ));
449449 }
450+
451+ // ── BETWEEN / NOT IN pushdown regression guards ─────────────────────
452+ // These tests lock in the DSL shape currently produced for BETWEEN and NOT IN predicates
453+ // when pushed down through vectorSearch(). They exist to catch silent regressions where a
454+ // change in the v2 FilterQueryBuilder pipeline would fall back to a serialized script query
455+ // instead of the native range/bool shape the cluster can index-accelerate.
456+
457+ @ Test
458+ public void testBetweenPushesAsRange () throws IOException {
459+ String explain =
460+ explainQuery (
461+ "SELECT v._id, v._score "
462+ + "FROM vectorSearch(table='"
463+ + TEST_INDEX
464+ + "', field='embedding', "
465+ + "vector='[1.0, 2.0, 3.0]', option='k=10') AS v "
466+ + "WHERE v.balance BETWEEN 50 AND 200 "
467+ + "LIMIT 10" );
468+
469+ // BETWEEN is desugared by the analyzer into AND(>=, <=), which FilterQueryBuilder renders as
470+ // two range clauses combined under a bool. The goal here is regression lock-in: ensure the
471+ // pushed filter is native range DSL, not a serialized script query.
472+ String sourceBuilderJson = extractSourceBuilderJson (explain );
473+ assertTrue (
474+ "Explain should contain bool query:\n " + sourceBuilderJson ,
475+ sourceBuilderJson .contains ("\" bool\" " ));
476+ assertTrue (
477+ "Explain should contain must clause (knn in scoring context):\n " + sourceBuilderJson ,
478+ sourceBuilderJson .contains ("\" must\" " ));
479+ assertTrue (
480+ "Explain should contain filter clause (WHERE in non-scoring context):\n "
481+ + sourceBuilderJson ,
482+ sourceBuilderJson .contains ("\" filter\" " ));
483+ assertTrue (
484+ "BETWEEN should push as native range DSL:\n " + sourceBuilderJson ,
485+ sourceBuilderJson .contains ("\" range\" " ));
486+ assertTrue (
487+ "Range should target balance field:\n " + sourceBuilderJson ,
488+ sourceBuilderJson .contains ("\" balance\" " ));
489+ // RangeQueryBuilder serializes inclusive bounds as from/to + include_lower/include_upper. Lock
490+ // both the lower bound (50) and upper bound (200) are present in the pushed DSL.
491+ assertTrue (
492+ "Range should contain lower bound 50:\n " + sourceBuilderJson ,
493+ sourceBuilderJson .contains ("\" from\" : 50" ) || sourceBuilderJson .contains ("\" from\" :50" ));
494+ assertTrue (
495+ "Range should contain upper bound 200:\n " + sourceBuilderJson ,
496+ sourceBuilderJson .contains ("\" to\" : 200" ) || sourceBuilderJson .contains ("\" to\" :200" ));
497+ // Script-query fallback sentinel: the CompoundedScriptEngine lang marker must NOT appear when
498+ // BETWEEN is pushed down natively.
499+ assertFalse (
500+ "BETWEEN must not fall back to a serialized script query:\n " + sourceBuilderJson ,
501+ sourceBuilderJson .contains ("\" script\" " ));
502+
503+ // POST-filter mode (default): the WHERE predicate must live OUTSIDE the knn payload.
504+ String knnJson = decodeSoleKnnJson (explain );
505+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
506+ assertFalse (
507+ "Post-filter mode must not embed the balance predicate inside knn:\n " + knnJson ,
508+ knnJson .contains ("balance" ));
509+ assertFalse (
510+ "Post-filter mode must not embed a range inside knn:\n " + knnJson ,
511+ knnJson .contains ("range" ));
512+ }
513+
514+ @ Test
515+ public void testNotInPushesAsMustNotTerms () throws IOException {
516+ String explain =
517+ explainQuery (
518+ "SELECT v._id, v._score "
519+ + "FROM vectorSearch(table='"
520+ + TEST_INDEX
521+ + "', field='embedding', "
522+ + "vector='[1.0, 2.0, 3.0]', option='k=10') AS v "
523+ + "WHERE v.gender NOT IN ('M', 'F') "
524+ + "LIMIT 10" );
525+
526+ // v2 analyzer desugars `x NOT IN (a, b)` into `NOT(x = a OR x = b)`. FilterQueryBuilder maps
527+ // NOT to bool.must_not and OR to bool.should, so the pushed DSL is must_not[should[term,term]]
528+ // rather than a single terms clause. The shape we're locking in is: native bool with must_not
529+ // on the keyword subfield, *not* a serialized script query.
530+ String sourceBuilderJson = extractSourceBuilderJson (explain );
531+ assertTrue (
532+ "Explain should contain bool query:\n " + sourceBuilderJson ,
533+ sourceBuilderJson .contains ("\" bool\" " ));
534+ assertTrue (
535+ "Explain should contain must clause (knn in scoring context):\n " + sourceBuilderJson ,
536+ sourceBuilderJson .contains ("\" must\" " ));
537+ assertTrue (
538+ "Explain should contain filter clause (WHERE in non-scoring context):\n "
539+ + sourceBuilderJson ,
540+ sourceBuilderJson .contains ("\" filter\" " ));
541+ assertTrue (
542+ "NOT IN should push as bool.must_not:\n " + sourceBuilderJson ,
543+ sourceBuilderJson .contains ("\" must_not\" " ));
544+ // OR-of-equals desugaring means the two literals land in a bool.should of term clauses.
545+ assertTrue (
546+ "NOT IN should contain should clause for OR-of-equals desugaring:\n " + sourceBuilderJson ,
547+ sourceBuilderJson .contains ("\" should\" " ));
548+ assertTrue (
549+ "NOT IN should produce term clauses for each literal:\n " + sourceBuilderJson ,
550+ sourceBuilderJson .contains ("\" term\" " ));
551+ // Terms target the keyword subfield of gender (text field with .keyword multi-field).
552+ assertTrue (
553+ "NOT IN term clauses should target gender.keyword:\n " + sourceBuilderJson ,
554+ sourceBuilderJson .contains ("\" gender.keyword\" " ));
555+ // Both literals must be present in the pushed DSL.
556+ assertTrue (
557+ "NOT IN should contain the 'M' literal:\n " + sourceBuilderJson ,
558+ sourceBuilderJson .contains ("\" M\" " ));
559+ assertTrue (
560+ "NOT IN should contain the 'F' literal:\n " + sourceBuilderJson ,
561+ sourceBuilderJson .contains ("\" F\" " ));
562+ // Script-query fallback sentinel: native pushdown must not degrade to a serialized script.
563+ assertFalse (
564+ "NOT IN must not fall back to a serialized script query:\n " + sourceBuilderJson ,
565+ sourceBuilderJson .contains ("\" script\" " ));
566+
567+ // POST-filter mode (default): the WHERE predicate must live OUTSIDE the knn payload.
568+ String knnJson = decodeSoleKnnJson (explain );
569+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
570+ assertFalse (
571+ "Post-filter mode must not embed the gender predicate inside knn:\n " + knnJson ,
572+ knnJson .contains ("gender" ));
573+ assertFalse (
574+ "Post-filter mode must not embed must_not inside knn:\n " + knnJson ,
575+ knnJson .contains ("must_not" ));
576+ }
450577}
0 commit comments