66package org .opensearch .sql .sql ;
77
88import java .io .IOException ;
9+ import java .nio .charset .StandardCharsets ;
10+ import java .util .ArrayList ;
11+ import java .util .Base64 ;
12+ import java .util .List ;
13+ import java .util .regex .Matcher ;
14+ import java .util .regex .Pattern ;
915import org .junit .Test ;
1016import org .opensearch .sql .legacy .SQLIntegTestCase ;
1117import org .opensearch .sql .legacy .TestsConstants ;
1723 */
1824public class VectorSearchExplainIT extends SQLIntegTestCase {
1925
26+ // Matches WrapperQueryBuilder's base64 payload in explain JSON. The explain output escapes
27+ // quotes as \", so the regex tolerates both \" and " forms around the query key/value.
28+ private static final Pattern WRAPPER_PAYLOAD =
29+ Pattern .compile ("\\ \\ ?\" query\\ \\ ?\" :\\ \\ ?\" ([A-Za-z0-9+/=]+)\\ \\ ?\" " );
30+ private static final Pattern SOURCE_BUILDER_JSON =
31+ Pattern .compile ("sourceBuilder=(\\ {.*?\\ }), pitId=" , Pattern .DOTALL );
32+
33+ /** Decodes every base64-encoded wrapper payload in the explain output into its knn JSON. */
34+ private static List <String > decodeWrapperKnnJsons (String explain ) {
35+ List <String > payloads = new ArrayList <>();
36+ Matcher m = WRAPPER_PAYLOAD .matcher (explain );
37+ while (m .find ()) {
38+ payloads .add (new String (Base64 .getDecoder ().decode (m .group (1 )), StandardCharsets .UTF_8 ));
39+ }
40+ return payloads ;
41+ }
42+
43+ /** Returns the single wrapper knn JSON, asserting exactly one is present. */
44+ private static String decodeSoleKnnJson (String explain ) {
45+ List <String > payloads = decodeWrapperKnnJsons (explain );
46+ assertEquals (
47+ "Expected exactly one wrapper query payload in explain:\n " + explain , 1 , payloads .size ());
48+ return payloads .get (0 );
49+ }
50+
51+ /** Extracts and unescapes the sourceBuilder JSON embedded in the explain request string. */
52+ private static String extractSourceBuilderJson (String explain ) {
53+ Matcher m = SOURCE_BUILDER_JSON .matcher (explain );
54+ assertTrue ("Explain should contain sourceBuilder JSON:\n " + explain , m .find ());
55+ return m .group (1 ).replace ("\\ \" " , "\" " );
56+ }
57+
2058 @ Override
2159 protected void init () throws Exception {
2260 // _explain needs the index to exist for field resolution.
@@ -38,11 +76,20 @@ public void testExplainTopKProducesKnnQuery() throws IOException {
3876 + "vector='[1.0, 2.0, 3.0]', option='k=5') AS v "
3977 + "LIMIT 5" );
4078
41- // WrapperQueryBuilder wraps the knn JSON — verify the wrapper is present
42- // and track_scores is enabled for score preservation.
43- assertTrue ("Explain should contain wrapper query:\n " + explain , explain .contains ("wrapper" ));
4479 assertTrue (
4580 "Explain should contain track_scores:\n " + explain , explain .contains ("track_scores" ));
81+
82+ String knnJson = decodeSoleKnnJson (explain );
83+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
84+ assertTrue (
85+ "knn JSON should target the embedding field:\n " + knnJson ,
86+ knnJson .contains ("\" embedding\" " ));
87+ assertTrue (
88+ "knn JSON should contain the vector values:\n " + knnJson ,
89+ knnJson .contains ("[1.0,2.0,3.0]" ));
90+ assertTrue ("knn JSON should contain k=5:\n " + knnJson , knnJson .contains ("\" k\" :5" ));
91+ assertFalse (
92+ "Top-k without WHERE should not embed a filter:\n " + knnJson , knnJson .contains ("filter" ));
4693 }
4794
4895 @ Test
@@ -56,7 +103,18 @@ public void testExplainRadialMaxDistanceProducesKnnQuery() throws IOException {
56103 + "vector='[1.0, 2.0]', option='max_distance=10.5') AS v "
57104 + "LIMIT 100" );
58105
59- assertTrue ("Explain should contain wrapper query:\n " + explain , explain .contains ("wrapper" ));
106+ String knnJson = decodeSoleKnnJson (explain );
107+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
108+ assertTrue (
109+ "knn JSON should target the embedding field:\n " + knnJson ,
110+ knnJson .contains ("\" embedding\" " ));
111+ assertTrue (
112+ "knn JSON should contain the vector values:\n " + knnJson , knnJson .contains ("[1.0,2.0]" ));
113+ assertTrue (
114+ "knn JSON should contain max_distance=10.5:\n " + knnJson ,
115+ knnJson .contains ("\" max_distance\" :10.5" ));
116+ assertFalse (
117+ "Radial without WHERE should not embed a filter:\n " + knnJson , knnJson .contains ("filter" ));
60118 }
61119
62120 @ Test
@@ -70,7 +128,18 @@ public void testExplainRadialMinScoreProducesKnnQuery() throws IOException {
70128 + "vector='[1.0, 2.0]', option='min_score=0.8') AS v "
71129 + "LIMIT 100" );
72130
73- assertTrue ("Explain should contain wrapper query:\n " + explain , explain .contains ("wrapper" ));
131+ String knnJson = decodeSoleKnnJson (explain );
132+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
133+ assertTrue (
134+ "knn JSON should target the embedding field:\n " + knnJson ,
135+ knnJson .contains ("\" embedding\" " ));
136+ assertTrue (
137+ "knn JSON should contain the vector values:\n " + knnJson , knnJson .contains ("[1.0,2.0]" ));
138+ assertTrue (
139+ "knn JSON should contain min_score=0.8:\n " + knnJson ,
140+ knnJson .contains ("\" min_score\" :0.8" ));
141+ assertFalse (
142+ "Radial without WHERE should not embed a filter:\n " + knnJson , knnJson .contains ("filter" ));
74143 }
75144
76145 // ── Post-filter DSL shape ────────────────────────────────────────────
@@ -87,13 +156,35 @@ public void testExplainPostFilterProducesBoolQuery() throws IOException {
87156 + "WHERE v.state = 'TX' "
88157 + "LIMIT 10" );
89158
90- assertTrue ("Explain should contain bool query:\n " + explain , explain .contains ("bool" ));
159+ // Post-filter shape: outer bool.must=[knn], bool.filter=[term] — WHERE lives OUTSIDE the knn
160+ // payload. Verify by decoding the wrapper and asserting the predicate field is NOT embedded.
161+ String sourceBuilderJson = extractSourceBuilderJson (explain );
162+ assertTrue (
163+ "Explain should contain bool query:\n " + sourceBuilderJson ,
164+ sourceBuilderJson .contains ("\" bool\" " ));
165+ assertTrue (
166+ "Explain should contain must clause (knn in scoring context):\n " + sourceBuilderJson ,
167+ sourceBuilderJson .contains ("\" must\" " ));
91168 assertTrue (
92- "Explain should contain must clause (knn in scoring context):\n " + explain ,
93- explain .contains ("must" ));
169+ "Explain should contain filter clause (WHERE in non-scoring context):\n "
170+ + sourceBuilderJson ,
171+ sourceBuilderJson .contains ("\" filter\" " ));
94172 assertTrue (
95- "Explain should contain filter clause (WHERE in non-scoring context):\n " + explain ,
96- explain .contains ("filter" ));
173+ "Explain should contain the outer state predicate:\n " + sourceBuilderJson ,
174+ sourceBuilderJson .contains ("\" state.keyword\" " ));
175+
176+ String knnJson = decodeSoleKnnJson (explain );
177+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
178+ assertTrue (
179+ "knn JSON should target the embedding field:\n " + knnJson ,
180+ knnJson .contains ("\" embedding\" " ));
181+ assertTrue ("knn JSON should contain k=10:\n " + knnJson , knnJson .contains ("\" k\" :10" ));
182+ assertFalse (
183+ "Post-filter mode must not embed the WHERE predicate inside knn:\n " + knnJson ,
184+ knnJson .contains ("state" ));
185+ assertFalse (
186+ "Post-filter mode must not embed a filter inside knn:\n " + knnJson ,
187+ knnJson .contains ("filter" ));
97188 }
98189
99190 @ Test
@@ -108,13 +199,41 @@ public void testExplainCompoundPredicateProducesBoolQuery() throws IOException {
108199 + "WHERE v.state = 'TX' AND v.age > 30 "
109200 + "LIMIT 10" );
110201
111- assertTrue ("Explain should contain bool query:\n " + explain , explain .contains ("bool" ));
202+ // Compound post-filter still uses outer bool.must=[knn]/bool.filter=[predicates]. Both WHERE
203+ // predicates must stay outside the knn payload; otherwise efficient mode could false-positive.
204+ String sourceBuilderJson = extractSourceBuilderJson (explain );
205+ assertTrue (
206+ "Explain should contain bool query:\n " + sourceBuilderJson ,
207+ sourceBuilderJson .contains ("\" bool\" " ));
112208 assertTrue (
113- "Explain should contain must clause (knn in scoring context):\n " + explain ,
114- explain .contains ("must" ));
209+ "Explain should contain must clause (knn in scoring context):\n " + sourceBuilderJson ,
210+ sourceBuilderJson .contains ("\" must\" " ));
115211 assertTrue (
116- "Explain should contain filter clause (compound WHERE in non-scoring context):\n " + explain ,
117- explain .contains ("filter" ));
212+ "Explain should contain filter clause (compound WHERE in non-scoring context):\n "
213+ + sourceBuilderJson ,
214+ sourceBuilderJson .contains ("\" filter\" " ));
215+ assertTrue (
216+ "Explain should contain the outer state predicate:\n " + sourceBuilderJson ,
217+ sourceBuilderJson .contains ("\" state.keyword\" " ));
218+ assertTrue (
219+ "Explain should contain the outer age predicate:\n " + sourceBuilderJson ,
220+ sourceBuilderJson .contains ("\" age\" " ));
221+
222+ String knnJson = decodeSoleKnnJson (explain );
223+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
224+ assertTrue (
225+ "knn JSON should target the embedding field:\n " + knnJson ,
226+ knnJson .contains ("\" embedding\" " ));
227+ assertTrue ("knn JSON should contain k=10:\n " + knnJson , knnJson .contains ("\" k\" :10" ));
228+ assertFalse (
229+ "Compound post-filter must not embed the state predicate inside knn:\n " + knnJson ,
230+ knnJson .contains ("state" ));
231+ assertFalse (
232+ "Compound post-filter must not embed the age predicate inside knn:\n " + knnJson ,
233+ knnJson .contains ("age" ));
234+ assertFalse (
235+ "Compound post-filter must not embed a filter inside knn:\n " + knnJson ,
236+ knnJson .contains ("filter" ));
118237 }
119238
120239 @ Test
@@ -129,13 +248,37 @@ public void testExplainRadialWithWhereProducesBoolQuery() throws IOException {
129248 + "WHERE v.state = 'TX' "
130249 + "LIMIT 100" );
131250
132- assertTrue ("Explain should contain bool query:\n " + explain , explain .contains ("bool" ));
251+ // Radial + WHERE should also keep the WHERE predicate in the outer bool.filter rather than
252+ // embedding it into the radial knn payload.
253+ String sourceBuilderJson = extractSourceBuilderJson (explain );
254+ assertTrue (
255+ "Explain should contain bool query:\n " + sourceBuilderJson ,
256+ sourceBuilderJson .contains ("\" bool\" " ));
257+ assertTrue (
258+ "Explain should contain must clause (knn in scoring context):\n " + sourceBuilderJson ,
259+ sourceBuilderJson .contains ("\" must\" " ));
260+ assertTrue (
261+ "Explain should contain filter clause (WHERE in non-scoring context):\n "
262+ + sourceBuilderJson ,
263+ sourceBuilderJson .contains ("\" filter\" " ));
264+ assertTrue (
265+ "Explain should contain the outer state predicate:\n " + sourceBuilderJson ,
266+ sourceBuilderJson .contains ("\" state.keyword\" " ));
267+
268+ String knnJson = decodeSoleKnnJson (explain );
269+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
133270 assertTrue (
134- "Explain should contain must clause (knn in scoring context) :\n " + explain ,
135- explain .contains ("must " ));
271+ "knn JSON should target the embedding field :\n " + knnJson ,
272+ knnJson .contains ("\" embedding \" " ));
136273 assertTrue (
137- "Explain should contain filter clause (WHERE in non-scoring context):\n " + explain ,
138- explain .contains ("filter" ));
274+ "knn JSON should contain max_distance=10.5:\n " + knnJson ,
275+ knnJson .contains ("\" max_distance\" :10.5" ));
276+ assertFalse (
277+ "Radial post-filter must not embed the WHERE predicate inside knn:\n " + knnJson ,
278+ knnJson .contains ("state" ));
279+ assertFalse (
280+ "Radial post-filter must not embed a filter inside knn:\n " + knnJson ,
281+ knnJson .contains ("filter" ));
139282 }
140283
141284 // ── Sort + LIMIT explain ─────────────────────────────────────────────
@@ -185,9 +328,35 @@ public void testExplainFilterTypePostProducesBoolQuery() throws IOException {
185328 + "WHERE v.state = 'TX' "
186329 + "LIMIT 10" );
187330
188- assertTrue ("Explain should contain bool query:\n " + explain , explain .contains ("bool" ));
189- assertTrue ("Explain should contain must:\n " + explain , explain .contains ("must" ));
190- assertTrue ("Explain should contain filter:\n " + explain , explain .contains ("filter" ));
331+ // Explicit filter_type=post must produce the same bool.must=[knn]/bool.filter=[term] shape as
332+ // the default, and the WHERE predicate must NOT leak into the knn payload (that would be
333+ // efficient mode). This is the key false-positive guard: substring-only checks would pass for
334+ // efficient mode too.
335+ String sourceBuilderJson = extractSourceBuilderJson (explain );
336+ assertTrue (
337+ "Explain should contain bool query:\n " + sourceBuilderJson ,
338+ sourceBuilderJson .contains ("\" bool\" " ));
339+ assertTrue (
340+ "Explain should contain must:\n " + sourceBuilderJson ,
341+ sourceBuilderJson .contains ("\" must\" " ));
342+ assertTrue (
343+ "Explain should contain filter:\n " + sourceBuilderJson ,
344+ sourceBuilderJson .contains ("\" filter\" " ));
345+ assertTrue (
346+ "Explain should contain the outer state predicate:\n " + sourceBuilderJson ,
347+ sourceBuilderJson .contains ("\" state.keyword\" " ));
348+
349+ String knnJson = decodeSoleKnnJson (explain );
350+ assertTrue ("knn JSON should contain knn key:\n " + knnJson , knnJson .contains ("\" knn\" " ));
351+ assertTrue (
352+ "knn JSON should target the embedding field:\n " + knnJson ,
353+ knnJson .contains ("\" embedding\" " ));
354+ assertFalse (
355+ "filter_type=post must not embed the WHERE predicate inside knn:\n " + knnJson ,
356+ knnJson .contains ("state" ));
357+ assertFalse (
358+ "filter_type=post must not embed a filter inside knn:\n " + knnJson ,
359+ knnJson .contains ("filter" ));
191360 }
192361
193362 @ Test
@@ -204,26 +373,19 @@ public void testExplainFilterTypeEfficientProducesKnnWithFilter() throws IOExcep
204373
205374 // Efficient mode: knn rebuilt with filter inside, wrapped in WrapperQueryBuilder.
206375 // The knn JSON (including the embedded filter) is base64-encoded inside the wrapper,
207- // so we verify structure by: (1) wrapper present, (2) no bool/must in plaintext
208- // (that would be post-filter shape), (3) decode the base64 payload to confirm
209- // the filter and predicate field are embedded inside the knn query.
210- assertTrue ( "Explain should contain wrapper query: \n " + explain , explain . contains ( "wrapper" ) );
376+ // so we verify structure by: (1) no bool/must in plaintext (that would be post-filter shape),
377+ // (2) decode the base64 payload to confirm the filter and predicate field are embedded inside
378+ // the knn query.
379+ String sourceBuilderJson = extractSourceBuilderJson ( explain );
211380 assertFalse (
212- "Efficient mode should not produce bool query (that is post-filter shape):\n " + explain ,
213- explain .contains ("\" bool\" " ));
381+ "Efficient mode should not produce bool query (that is post-filter shape):\n "
382+ + sourceBuilderJson ,
383+ sourceBuilderJson .contains ("\" bool\" " ));
214384 assertFalse (
215- "Efficient mode should not contain must clause:\n " + explain , explain .contains ("\" must\" " ));
216-
217- // Extract and decode the base64 knn payload to verify filter embedding.
218- // The explain output escapes quotes as \", so match both \" and " forms.
219- java .util .regex .Matcher m =
220- java .util .regex .Pattern .compile ("\\ \\ ?\" query\\ \\ ?\" :\\ \\ ?\" ([A-Za-z0-9+/=]+)\\ \\ ?\" " )
221- .matcher (explain );
222- assertTrue ("Explain should contain base64-encoded knn query:\n " + explain , m .find ());
223- String knnJson =
224- new String (
225- java .util .Base64 .getDecoder ().decode (m .group (1 )),
226- java .nio .charset .StandardCharsets .UTF_8 );
385+ "Efficient mode should not contain must clause:\n " + sourceBuilderJson ,
386+ sourceBuilderJson .contains ("\" must\" " ));
387+
388+ String knnJson = decodeSoleKnnJson (explain );
227389 assertTrue (
228390 "Efficient mode knn JSON should contain filter:\n " + knnJson , knnJson .contains ("filter" ));
229391 assertTrue (
0 commit comments