Skip to content

Commit a224f19

Browse files
committed
Resolve simple OpenSearch date-math values at plan time in visitSearch
The Lucene-secondary engine on a composite parquet/lucene shard does not evaluate date-math tokens like {@code now} or {@code now-1h} inside a {@code query_string} filter, so the previous fallback path returned zero rows for every {@code earliest=}/{@code latest=} predicate the visitor left in query_string form. Resolve the subset we can handle without snap-to-unit math at plan time: - {@code "now"} / {@code "now()"} → current UTC instant - {@code "now+/-Nunit"} (units {@code s m h d w M y q}) → simple arithmetic - 12+-digit numeric strings → epoch-millis (PPL pre-converts {@code earliest=1754020060.123} / {@code latest=1754020061} to ms strings in {@code visitTimeModifierValue}) Emit the result as a {@code TIMESTAMP} literal so the comparison lowers natively via DataFusion against the parquet primary, bypassing Lucene entirely. Anchored expressions ({@code "2024-01-15||+1d"}), snap-to-unit rounding ({@code "now/h"}, {@code "now+1mon/q"}), week alignment ({@code "now/w-1d"}), and other shapes we don't model still fall through to the existing {@code query_string} fallback. Surfaced by `CalciteSearchCommandIT.testSearchWithAbsoluteEarliestAndNow` and similar time-modifier tests in the analytics-engine route. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 53c33ab commit a224f19

1 file changed

Lines changed: 149 additions & 8 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 149 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -384,19 +384,29 @@ private Optional<UnresolvedExpression> lowerSearchExpression(
384384
if (!isLowerableField(comp.getField(), knownFields)) {
385385
return Optional.empty();
386386
}
387-
if (isOpenSearchDateMath(comp.getValue().getLiteral())) {
388-
return Optional.empty();
387+
UnresolvedExpression value = comp.getValue().getLiteral();
388+
if (isOpenSearchDateMath(value)) {
389+
// Lucene's composite-secondary engine does NOT evaluate date-math tokens like
390+
// {@code now} inside a {@code query_string}, so a fallback would return zero rows on
391+
// parquet-backed shards. Resolve the simple cases ({@code now}, {@code now+/-Nunit},
392+
// epoch-millis) at plan time and emit a {@code TIMESTAMP} literal instead. Complex
393+
// forms (snap {@code /unit}, anchored {@code ||}, etc.) still fall through to
394+
// {@code query_string}.
395+
Optional<UnresolvedExpression> resolved = tryResolveOpenSearchDateMath(value);
396+
if (resolved.isEmpty()) {
397+
return Optional.empty();
398+
}
399+
value = resolved.get();
389400
}
390-
if (containsLuceneWildcard(comp.getValue().getLiteral())) {
401+
if (containsLuceneWildcard(value)) {
391402
// `severityText=ERR*` / `field=foo?` / `name=*-service` — Lucene-style wildcards in
392403
// the right-hand value. A native `=` lowering would compare literally and drop every
393404
// matching document. Keep the query in query_string form so Lucene evaluates the
394405
// wildcard.
395406
return Optional.empty();
396407
}
397408
return Optional.of(
398-
AstDSL.compare(
399-
comp.getOperator().getSymbol(), comp.getField(), comp.getValue().getLiteral()));
409+
AstDSL.compare(comp.getOperator().getSymbol(), comp.getField(), value));
400410
}
401411
if (e instanceof SearchIn) {
402412
SearchIn in = (SearchIn) e;
@@ -455,6 +465,140 @@ private static boolean isOpenSearchDateMath(UnresolvedExpression value) {
455465
return false;
456466
}
457467

468+
/**
469+
* Resolves the subset of OpenSearch date-math values we can evaluate at plan time:
470+
*
471+
* <ul>
472+
* <li>{@code "now"} / {@code "now()"} — current UTC instant.
473+
* <li>{@code "now+/-Nunit"} (units {@code s m h d w M y q}) — current instant with simple
474+
* arithmetic applied.
475+
* <li>Bare 12+-digit numeric strings — epoch-millis (PPL's
476+
* {@code earliest=<seconds>} / {@code latest=<decimal>} forms are pre-converted to
477+
* milliseconds by {@code visitTimeModifierValue}).
478+
* </ul>
479+
*
480+
* <p>Returns empty for anchored expressions ({@code "2024-01-15||+1d"}), snap-to-unit
481+
* rounding ({@code "now/h"}, {@code "now+1mon/q"}), week alignment ({@code "now/w-1d"}),
482+
* and any other shape we don't model. Those continue to fall through to the
483+
* {@code query_string} fallback in {@link #lowerSearchExpression}.
484+
*/
485+
private static Optional<UnresolvedExpression> tryResolveOpenSearchDateMath(
486+
UnresolvedExpression value) {
487+
if (!(value instanceof Literal)) {
488+
return Optional.empty();
489+
}
490+
Object raw = ((Literal) value).getValue();
491+
if (!(raw instanceof String s)) {
492+
return Optional.empty();
493+
}
494+
s = s.trim();
495+
if (s.isEmpty()) {
496+
return Optional.empty();
497+
}
498+
java.time.Instant result;
499+
String lower = s.toLowerCase(java.util.Locale.ROOT);
500+
if (lower.startsWith("now")) {
501+
String rest = lower.substring(3);
502+
if (rest.startsWith("()")) {
503+
rest = rest.substring(2);
504+
}
505+
result = applyDateMath(java.time.Instant.now(), rest);
506+
if (result == null) {
507+
return Optional.empty();
508+
}
509+
} else if (isAllDigits(s)) {
510+
try {
511+
result = java.time.Instant.ofEpochMilli(Long.parseLong(s));
512+
} catch (NumberFormatException e) {
513+
return Optional.empty();
514+
}
515+
} else {
516+
return Optional.empty();
517+
}
518+
// PPL TIMESTAMP literals downstream are parsed by Calcite, which accepts
519+
// {@code yyyy-MM-dd HH:mm:ss} or {@code yyyy-MM-dd HH:mm:ss.SSS}. Round at
520+
// millisecond precision; emit a fractional component only when non-zero so a
521+
// whole-second value matches the canonical PPL form (the analyzer rejects
522+
// {@code yyyy-MM-dd HH:mm:ss.000}-shape literals with a trailing-zero parse
523+
// error).
524+
long millis = result.getEpochSecond() * 1000 + result.getNano() / 1_000_000;
525+
String pattern = (millis % 1000 == 0) ? "yyyy-MM-dd HH:mm:ss" : "yyyy-MM-dd HH:mm:ss.SSS";
526+
String formatted =
527+
java.time.format.DateTimeFormatter.ofPattern(pattern)
528+
.withZone(java.time.ZoneOffset.UTC)
529+
.format(result);
530+
return Optional.of(AstDSL.timestampLiteral(formatted));
531+
}
532+
533+
private static boolean isAllDigits(String s) {
534+
if (s.length() < 12) {
535+
return false;
536+
}
537+
for (int i = 0; i < s.length(); i++) {
538+
char c = s.charAt(i);
539+
if (c < '0' || c > '9') {
540+
return false;
541+
}
542+
}
543+
return true;
544+
}
545+
546+
/**
547+
* Applies a sequence of {@code +/-Nunit} adjustments to a base instant. Returns {@code null}
548+
* on any character we don't model (unknown unit, snap-to-unit {@code /}, gap between
549+
* matches), letting the caller fall through to {@code query_string}.
550+
*/
551+
private static java.time.Instant applyDateMath(java.time.Instant base, String math) {
552+
if (math.isEmpty()) {
553+
return base;
554+
}
555+
java.util.regex.Matcher m =
556+
java.util.regex.Pattern.compile("([+-])(\\d+)([smhdwMyq])").matcher(math);
557+
java.time.Instant result = base;
558+
int lastEnd = 0;
559+
while (m.find()) {
560+
if (m.start() != lastEnd) {
561+
return null;
562+
}
563+
long sign = m.group(1).equals("+") ? 1 : -1;
564+
long n = Long.parseLong(m.group(2));
565+
java.time.ZonedDateTime zdt = result.atZone(java.time.ZoneOffset.UTC);
566+
switch (m.group(3)) {
567+
case "s":
568+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.SECONDS);
569+
break;
570+
case "m":
571+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.MINUTES);
572+
break;
573+
case "h":
574+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.HOURS);
575+
break;
576+
case "d":
577+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.DAYS);
578+
break;
579+
case "w":
580+
result = result.plus(sign * n * 7, java.time.temporal.ChronoUnit.DAYS);
581+
break;
582+
case "M":
583+
result = zdt.plusMonths(sign * n).toInstant();
584+
break;
585+
case "y":
586+
result = zdt.plusYears(sign * n).toInstant();
587+
break;
588+
case "q":
589+
result = zdt.plusMonths(sign * n * 3).toInstant();
590+
break;
591+
default:
592+
return null;
593+
}
594+
lastEnd = m.end();
595+
}
596+
if (lastEnd != math.length()) {
597+
return null;
598+
}
599+
return result;
600+
}
601+
458602
/**
459603
* Detects an unescaped Lucene wildcard ({@code *} or {@code ?}) in a string literal. A preceding
460604
* odd run of backslashes means the wildcard is already escaped (literal star / question mark in
@@ -739,9 +883,6 @@ private List<RexNode> expandProjectFields(
739883
.filter(addedFields::add)
740884
.forEach(field -> expandedFields.add(context.relBuilder.field(field)));
741885
}
742-
case Alias alias -> {
743-
expandedFields.add(rexVisitor.analyze(alias, context));
744-
}
745886
default ->
746887
throw new IllegalStateException(
747888
"Unexpected expression type in project list: " + expr.getClass().getSimpleName());

0 commit comments

Comments
 (0)