Skip to content

Commit 52ffeb8

Browse files
committed
Resolve simple OpenSearch date-math values at plan time in visitSearch
The Lucene-secondary engine on a composite parquet/lucene shard does not evaluate date-math tokens like {@code now} or {@code now-1h} inside a {@code query_string} filter, so the previous fallback path returned zero rows for every {@code earliest=}/{@code latest=} predicate the visitor left in query_string form. Resolve the subset we can handle without snap-to-unit math at plan time: - {@code "now"} / {@code "now()"} → current UTC instant - {@code "now+/-Nunit"} (units {@code s m h d w M y q}) → simple arithmetic - 12+-digit numeric strings → epoch-millis (PPL pre-converts {@code earliest=1754020060.123} / {@code latest=1754020061} to ms strings in {@code visitTimeModifierValue}) Emit the result as a {@code TIMESTAMP} literal so the comparison lowers natively via DataFusion against the parquet primary, bypassing Lucene entirely. Anchored expressions ({@code "2024-01-15||+1d"}), snap-to-unit rounding ({@code "now/h"}, {@code "now+1mon/q"}), week alignment ({@code "now/w-1d"}), and other shapes we don't model still fall through to the existing {@code query_string} fallback. Surfaced by `CalciteSearchCommandIT.testSearchWithAbsoluteEarliestAndNow` and similar time-modifier tests in the analytics-engine route. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 828b80d commit 52ffeb8

1 file changed

Lines changed: 141 additions & 5 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 141 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -384,19 +384,29 @@ private Optional<UnresolvedExpression> lowerSearchExpression(
384384
if (!isLowerableField(comp.getField(), knownFields)) {
385385
return Optional.empty();
386386
}
387-
if (isOpenSearchDateMath(comp.getValue().getLiteral())) {
388-
return Optional.empty();
387+
UnresolvedExpression value = comp.getValue().getLiteral();
388+
if (isOpenSearchDateMath(value)) {
389+
// Lucene's composite-secondary engine does NOT evaluate date-math tokens like
390+
// {@code now} inside a {@code query_string}, so a fallback would return zero rows on
391+
// parquet-backed shards. Resolve the simple cases ({@code now}, {@code now+/-Nunit},
392+
// epoch-millis) at plan time and emit a {@code TIMESTAMP} literal instead. Complex
393+
// forms (snap {@code /unit}, anchored {@code ||}, etc.) still fall through to
394+
// {@code query_string}.
395+
Optional<UnresolvedExpression> resolved = tryResolveOpenSearchDateMath(value);
396+
if (resolved.isEmpty()) {
397+
return Optional.empty();
398+
}
399+
value = resolved.get();
389400
}
390-
if (containsLuceneWildcard(comp.getValue().getLiteral())) {
401+
if (containsLuceneWildcard(value)) {
391402
// `severityText=ERR*` / `field=foo?` / `name=*-service` — Lucene-style wildcards in
392403
// the right-hand value. A native `=` lowering would compare literally and drop every
393404
// matching document. Keep the query in query_string form so Lucene evaluates the
394405
// wildcard.
395406
return Optional.empty();
396407
}
397408
return Optional.of(
398-
AstDSL.compare(
399-
comp.getOperator().getSymbol(), comp.getField(), comp.getValue().getLiteral()));
409+
AstDSL.compare(comp.getOperator().getSymbol(), comp.getField(), value));
400410
}
401411
if (e instanceof SearchIn) {
402412
SearchIn in = (SearchIn) e;
@@ -455,6 +465,132 @@ private static boolean isOpenSearchDateMath(UnresolvedExpression value) {
455465
return false;
456466
}
457467

468+
/**
469+
* Resolves the subset of OpenSearch date-math values we can evaluate at plan time:
470+
*
471+
* <ul>
472+
* <li>{@code "now"} / {@code "now()"} — current UTC instant.
473+
* <li>{@code "now+/-Nunit"} (units {@code s m h d w M y q}) — current instant with simple
474+
* arithmetic applied.
475+
* <li>Bare 12+-digit numeric strings — epoch-millis (PPL's
476+
* {@code earliest=<seconds>} / {@code latest=<decimal>} forms are pre-converted to
477+
* milliseconds by {@code visitTimeModifierValue}).
478+
* </ul>
479+
*
480+
* <p>Returns empty for anchored expressions ({@code "2024-01-15||+1d"}), snap-to-unit
481+
* rounding ({@code "now/h"}, {@code "now+1mon/q"}), week alignment ({@code "now/w-1d"}),
482+
* and any other shape we don't model. Those continue to fall through to the
483+
* {@code query_string} fallback in {@link #lowerSearchExpression}.
484+
*/
485+
private static Optional<UnresolvedExpression> tryResolveOpenSearchDateMath(
486+
UnresolvedExpression value) {
487+
if (!(value instanceof Literal)) {
488+
return Optional.empty();
489+
}
490+
Object raw = ((Literal) value).getValue();
491+
if (!(raw instanceof String s)) {
492+
return Optional.empty();
493+
}
494+
s = s.trim();
495+
if (s.isEmpty()) {
496+
return Optional.empty();
497+
}
498+
java.time.Instant result;
499+
String lower = s.toLowerCase(java.util.Locale.ROOT);
500+
if (lower.startsWith("now")) {
501+
String rest = lower.substring(3);
502+
if (rest.startsWith("()")) {
503+
rest = rest.substring(2);
504+
}
505+
result = applyDateMath(java.time.Instant.now(), rest);
506+
if (result == null) {
507+
return Optional.empty();
508+
}
509+
} else if (isAllDigits(s)) {
510+
try {
511+
result = java.time.Instant.ofEpochMilli(Long.parseLong(s));
512+
} catch (NumberFormatException e) {
513+
return Optional.empty();
514+
}
515+
} else {
516+
return Optional.empty();
517+
}
518+
String formatted =
519+
java.time.format.DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSSSSS")
520+
.withZone(java.time.ZoneOffset.UTC)
521+
.format(result);
522+
return Optional.of(AstDSL.timestampLiteral(formatted));
523+
}
524+
525+
private static boolean isAllDigits(String s) {
526+
if (s.length() < 12) {
527+
return false;
528+
}
529+
for (int i = 0; i < s.length(); i++) {
530+
char c = s.charAt(i);
531+
if (c < '0' || c > '9') {
532+
return false;
533+
}
534+
}
535+
return true;
536+
}
537+
538+
/**
539+
* Applies a sequence of {@code +/-Nunit} adjustments to a base instant. Returns {@code null}
540+
* on any character we don't model (unknown unit, snap-to-unit {@code /}, gap between
541+
* matches), letting the caller fall through to {@code query_string}.
542+
*/
543+
private static java.time.Instant applyDateMath(java.time.Instant base, String math) {
544+
if (math.isEmpty()) {
545+
return base;
546+
}
547+
java.util.regex.Matcher m =
548+
java.util.regex.Pattern.compile("([+-])(\\d+)([smhdwMyq])").matcher(math);
549+
java.time.Instant result = base;
550+
int lastEnd = 0;
551+
while (m.find()) {
552+
if (m.start() != lastEnd) {
553+
return null;
554+
}
555+
long sign = m.group(1).equals("+") ? 1 : -1;
556+
long n = Long.parseLong(m.group(2));
557+
java.time.ZonedDateTime zdt = result.atZone(java.time.ZoneOffset.UTC);
558+
switch (m.group(3)) {
559+
case "s":
560+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.SECONDS);
561+
break;
562+
case "m":
563+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.MINUTES);
564+
break;
565+
case "h":
566+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.HOURS);
567+
break;
568+
case "d":
569+
result = result.plus(sign * n, java.time.temporal.ChronoUnit.DAYS);
570+
break;
571+
case "w":
572+
result = result.plus(sign * n * 7, java.time.temporal.ChronoUnit.DAYS);
573+
break;
574+
case "M":
575+
result = zdt.plusMonths(sign * n).toInstant();
576+
break;
577+
case "y":
578+
result = zdt.plusYears(sign * n).toInstant();
579+
break;
580+
case "q":
581+
result = zdt.plusMonths(sign * n * 3).toInstant();
582+
break;
583+
default:
584+
return null;
585+
}
586+
lastEnd = m.end();
587+
}
588+
if (lastEnd != math.length()) {
589+
return null;
590+
}
591+
return result;
592+
}
593+
458594
/**
459595
* Detects an unescaped Lucene wildcard ({@code *} or {@code ?}) in a string literal. A preceding
460596
* odd run of backslashes means the wildcard is already escaped (literal star / question mark in

0 commit comments

Comments
 (0)