102102import org .opensearch .sql .ast .expression .PatternMethod ;
103103import org .opensearch .sql .ast .expression .PatternMode ;
104104import org .opensearch .sql .ast .expression .QualifiedName ;
105+ import org .opensearch .sql .ast .expression .SearchAnd ;
106+ import org .opensearch .sql .ast .expression .SearchComparison ;
107+ import org .opensearch .sql .ast .expression .SearchExpression ;
108+ import org .opensearch .sql .ast .expression .SearchGroup ;
109+ import org .opensearch .sql .ast .expression .SearchIn ;
110+ import org .opensearch .sql .ast .expression .SearchLiteral ;
111+ import org .opensearch .sql .ast .expression .SearchNot ;
112+ import org .opensearch .sql .ast .expression .SearchOr ;
105113import org .opensearch .sql .ast .expression .Span ;
106114import org .opensearch .sql .ast .expression .SpanUnit ;
107115import org .opensearch .sql .ast .expression .UnresolvedExpression ;
@@ -280,17 +288,207 @@ private RelBuilder scan(RelOptTable tableSchema, CalcitePlanContext context) {
280288 public RelNode visitSearch (Search node , CalcitePlanContext context ) {
281289 // Visit the Relation child to get the scan
282290 node .getChild ().get (0 ).accept (this , context );
283- // Create query_string function
284- Function queryStringFunc =
285- AstDSL .function (
286- "query_string" ,
287- AstDSL .unresolvedArg ("query" , AstDSL .stringLiteral (node .getQueryString ())));
288- RexNode queryStringRex = rexVisitor .analyze (queryStringFunc , context );
289-
290- context .relBuilder .filter (queryStringRex );
291+
292+ // Lower the typed SearchExpression AST to a native filter where possible. The
293+ // structured fragments (field comparisons, AND / OR / NOT, IN, time-modifier-resolved
294+ // comparisons) become a plain Compare / And / Or / Not / In AST that
295+ // ExpressionAnalyzer/rexVisitor handles like any `where`-clause condition, so the
296+ // analytics-engine route can mark them as DataFusion-native predicates instead of
297+ // delegating the whole thing to Lucene as a `query_string(...)`. Free-text / phrase
298+ // literals — and any comparison whose value looks like an OpenSearch date-math
299+ // expression or references a field absent from the relation's row type — stay in
300+ // query_string form because they have no faithful native equivalent.
301+ Set <String > knownFields = new HashSet <>(context .relBuilder .peek ().getRowType ().getFieldNames ());
302+ SearchExpression original = node .getOriginalExpression ();
303+ UnresolvedExpression filter =
304+ original != null
305+ ? buildSearchFilter (original , node .getQueryString (), knownFields )
306+ : buildQueryStringFunction (node .getQueryString ());
307+ RexNode filterRex = rexVisitor .analyze (filter , context );
308+
309+ context .relBuilder .filter (filterRex );
291310 return context .relBuilder .peek ();
292311 }
293312
313+ /**
314+ * Build the filter AST for a search predicate. Prefers a fully-native lowering of {@code e};
315+ * falls back to {@code AND}-splitting a top-level conjunction with one native and one free-text
316+ * side; finally falls back to wrapping the original query string in {@code query_string(...)}.
317+ */
318+ private UnresolvedExpression buildSearchFilter (
319+ SearchExpression e , String fallbackQueryString , Set <String > knownFields ) {
320+ Optional <UnresolvedExpression > structured = lowerSearchExpression (e , knownFields );
321+ if (structured .isPresent ()) {
322+ return structured .get ();
323+ }
324+ // Top-level AND-split: native(L) AND queryString(toQueryString(R)) (or symmetric).
325+ // Only applied at the outermost conjunction — deeper splits would change OR semantics.
326+ if (e instanceof SearchAnd ) {
327+ SearchAnd and = (SearchAnd ) e ;
328+ Optional <UnresolvedExpression > leftNative = lowerSearchExpression (and .getLeft (), knownFields );
329+ Optional <UnresolvedExpression > rightNative =
330+ lowerSearchExpression (and .getRight (), knownFields );
331+ if (leftNative .isPresent () && rightNative .isEmpty ()) {
332+ return AstDSL .and (
333+ leftNative .get (), buildQueryStringFunction (and .getRight ().toQueryString ()));
334+ }
335+ if (rightNative .isPresent () && leftNative .isEmpty ()) {
336+ return AstDSL .and (
337+ buildQueryStringFunction (and .getLeft ().toQueryString ()), rightNative .get ());
338+ }
339+ }
340+ // Fully relevance / mixed-OR / unsupported — keep the original query_string fallback.
341+ return buildQueryStringFunction (fallbackQueryString );
342+ }
343+
344+ /**
345+ * Recursively translate a {@link SearchExpression} to the equivalent generic-PPL filter AST.
346+ * Returns empty whenever any subtree contains a {@link SearchLiteral} (free text or phrase), a
347+ * {@link SearchComparison} whose value looks like an OpenSearch date-math expression (handled
348+ * correctly only by Lucene), or a comparison/IN against a field that isn't in the relation's row
349+ * type (which the analyzer would otherwise reject; PPL search semantics on unknown fields is
350+ * "match nothing", matched by the query_string fallback).
351+ */
352+ private Optional <UnresolvedExpression > lowerSearchExpression (
353+ SearchExpression e , Set <String > knownFields ) {
354+ if (e instanceof SearchGroup ) {
355+ return lowerSearchExpression (((SearchGroup ) e ).getExpression (), knownFields );
356+ }
357+ if (e instanceof SearchAnd ) {
358+ SearchAnd and = (SearchAnd ) e ;
359+ Optional <UnresolvedExpression > left = lowerSearchExpression (and .getLeft (), knownFields );
360+ Optional <UnresolvedExpression > right = lowerSearchExpression (and .getRight (), knownFields );
361+ if (left .isPresent () && right .isPresent ()) {
362+ return Optional .of (AstDSL .and (left .get (), right .get ()));
363+ }
364+ return Optional .empty ();
365+ }
366+ if (e instanceof SearchOr ) {
367+ SearchOr or = (SearchOr ) e ;
368+ Optional <UnresolvedExpression > left = lowerSearchExpression (or .getLeft (), knownFields );
369+ Optional <UnresolvedExpression > right = lowerSearchExpression (or .getRight (), knownFields );
370+ if (left .isPresent () && right .isPresent ()) {
371+ return Optional .of (AstDSL .or (left .get (), right .get ()));
372+ }
373+ return Optional .empty ();
374+ }
375+ if (e instanceof SearchNot ) {
376+ // Lucene `NOT field=value` matches docs where the field is missing OR field != value.
377+ // SQL `NOT (field = value)` (and Calcite's simplification to `field <> value`) drops
378+ // missing-field rows because NULL <> value evaluates to NULL → filtered. Routing NOT
379+ // through the query_string fallback preserves the documented PPL search semantics.
380+ return Optional .empty ();
381+ }
382+ if (e instanceof SearchComparison ) {
383+ SearchComparison comp = (SearchComparison ) e ;
384+ if (!isLowerableField (comp .getField (), knownFields )) {
385+ return Optional .empty ();
386+ }
387+ if (isOpenSearchDateMath (comp .getValue ().getLiteral ())) {
388+ return Optional .empty ();
389+ }
390+ if (containsLuceneWildcard (comp .getValue ().getLiteral ())) {
391+ // `severityText=ERR*` / `field=foo?` / `name=*-service` — Lucene-style wildcards in
392+ // the right-hand value. A native `=` lowering would compare literally and drop every
393+ // matching document. Keep the query in query_string form so Lucene evaluates the
394+ // wildcard.
395+ return Optional .empty ();
396+ }
397+ return Optional .of (
398+ AstDSL .compare (
399+ comp .getOperator ().getSymbol (), comp .getField (), comp .getValue ().getLiteral ()));
400+ }
401+ if (e instanceof SearchIn ) {
402+ SearchIn in = (SearchIn ) e ;
403+ if (!isLowerableField (in .getField (), knownFields )) {
404+ return Optional .empty ();
405+ }
406+ List <UnresolvedExpression > values =
407+ in .getValues ().stream ().map (SearchLiteral ::getLiteral ).collect (Collectors .toList ());
408+ return Optional .of (AstDSL .in (in .getField (), values ));
409+ }
410+ // SearchLiteral (standalone free-text / phrase) — no native equivalent.
411+ return Optional .empty ();
412+ }
413+
414+ private static boolean isLowerableField (Field field , Set <String > knownFields ) {
415+ QualifiedName qn = (QualifiedName ) field .getField ();
416+ return knownFields .contains (qn .toString ());
417+ }
418+
419+ /**
420+ * Recognise OpenSearch date-math values produced by {@code visitTimeModifierExpression} — {@code
421+ * "now"}, {@code "now-1h"}, {@code "now+1y/M-1M"}, anchored expressions like {@code
422+ * "2024-01-15||+1d"}, and epoch-millis strings such as {@code "1754020060000"}. These strings
423+ * have to be evaluated as Lucene-style date math; Calcite would compare them lexically. Detecting
424+ * them lets the visitor keep the comparison in {@code query_string} form so the existing
425+ * semantics survive.
426+ */
427+ private static boolean isOpenSearchDateMath (UnresolvedExpression value ) {
428+ if (!(value instanceof Literal )) {
429+ return false ;
430+ }
431+ Object raw = ((Literal ) value ).getValue ();
432+ if (!(raw instanceof String )) {
433+ return false ;
434+ }
435+ String s = ((String ) raw ).trim ();
436+ if (s .isEmpty ()) {
437+ return false ;
438+ }
439+ if (s .regionMatches (true , 0 , "now" , 0 , 3 )) {
440+ return true ;
441+ }
442+ if (s .contains ("||" )) {
443+ return true ;
444+ }
445+ // Epoch-millis from `earliest=1754020060` / `latest=1754020060.123` — 12+ digits only.
446+ if (s .length () >= 12 ) {
447+ for (int i = 0 ; i < s .length (); i ++) {
448+ char c = s .charAt (i );
449+ if (c < '0' || c > '9' ) {
450+ return false ;
451+ }
452+ }
453+ return true ;
454+ }
455+ return false ;
456+ }
457+
458+ /**
459+ * Detects an unescaped Lucene wildcard ({@code *} or {@code ?}) in a string literal. A preceding
460+ * odd run of backslashes means the wildcard is already escaped (literal star / question mark in
461+ * the value), so it's safe to lower as a regular equality.
462+ */
463+ private static boolean containsLuceneWildcard (UnresolvedExpression value ) {
464+ if (!(value instanceof Literal )) {
465+ return false ;
466+ }
467+ Object raw = ((Literal ) value ).getValue ();
468+ if (!(raw instanceof String )) {
469+ return false ;
470+ }
471+ String s = (String ) raw ;
472+ for (int i = 0 ; i < s .length (); i ++) {
473+ char c = s .charAt (i );
474+ if (c == '*' || c == '?' ) {
475+ int backslashes = 0 ;
476+ for (int j = i - 1 ; j >= 0 && s .charAt (j ) == '\\' ; j --) {
477+ backslashes ++;
478+ }
479+ if ((backslashes & 1 ) == 0 ) {
480+ return true ;
481+ }
482+ }
483+ }
484+ return false ;
485+ }
486+
487+ private Function buildQueryStringFunction (String queryString ) {
488+ return AstDSL .function (
489+ "query_string" , AstDSL .unresolvedArg ("query" , AstDSL .stringLiteral (queryString )));
490+ }
491+
294492 @ Override
295493 public RelNode visitFilter (Filter node , CalcitePlanContext context ) {
296494 visitChildren (node , context );
@@ -3038,14 +3236,28 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
30383236 countField = context .relBuilder .field (countFieldName );
30393237 }
30403238
3239+ // Append the rare/top field columns as secondary order keys so ties in the count column
3240+ // resolve deterministically. Without this, ROW_NUMBER's tie-break is insertion-order
3241+ // dependent and varies between backends (e.g. analytics-engine vs in-process Calcite).
3242+ // Skip array / struct / map columns — Calcite's executor can't compare those at runtime
3243+ // (`ArrayList cannot be cast to Comparable`), and pushdown to the OpenSearch terms
3244+ // aggregation only supports scalar keys.
3245+ List <RexNode > tieBreakKeys =
3246+ rexVisitor .analyze (fieldList , context ).stream ()
3247+ .filter (CalciteRelNodeVisitor ::isComparableOrderKey )
3248+ .toList ();
3249+ List <RexNode > orderKeys = new ArrayList <>(tieBreakKeys .size () + 1 );
3250+ orderKeys .add (countField );
3251+ orderKeys .addAll (tieBreakKeys );
3252+
30413253 RexNode rowNumberWindowOver =
30423254 PlanUtils .makeOver (
30433255 context ,
30443256 BuiltinFunctionName .ROW_NUMBER ,
30453257 null ,
30463258 List .of (),
30473259 partitionKeys ,
3048- List . of ( countField ) ,
3260+ orderKeys ,
30493261 WindowFrame .toCurrentRow ());
30503262 context .relBuilder .projectPlus (
30513263 context .relBuilder .alias (rowNumberWindowOver , ROW_NUMBER_COLUMN_FOR_RARE_TOP ));
@@ -3069,6 +3281,15 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
30693281 return context .relBuilder .peek ();
30703282 }
30713283
3284+ private static boolean isComparableOrderKey (RexNode key ) {
3285+ SqlTypeName typeName = key .getType ().getSqlTypeName ();
3286+ return typeName != SqlTypeName .ARRAY
3287+ && typeName != SqlTypeName .MULTISET
3288+ && typeName != SqlTypeName .MAP
3289+ && typeName != SqlTypeName .ROW
3290+ && typeName != SqlTypeName .STRUCTURED ;
3291+ }
3292+
30723293 @ Override
30733294 public RelNode visitTableFunction (TableFunction node , CalcitePlanContext context ) {
30743295 throw new CalciteUnsupportedException ("Table function is unsupported in Calcite" );
0 commit comments