Skip to content

Commit 3b29ca5

Browse files
committed
Time Unit Unification for bin/stats (opensearch-project#4450)
(cherry picked from commit 5bb2747) Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent d994ca9 commit 3b29ca5

5 files changed

Lines changed: 599 additions & 60 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/utils/binning/SpanParser.java

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class SpanParser {
5252
Map.entry("months", "months"),
5353
Map.entry("month", "months"),
5454
Map.entry("mon", "months"),
55+
Map.entry("M", "months"), // Uppercase M for months (case-sensitive)
5556
// Milliseconds
5657
Map.entry("ms", "ms"),
5758
// Microseconds
@@ -63,7 +64,16 @@ public class SpanParser {
6364

6465
// Build direct lookup map for efficient unit detection
6566
for (String unit : NORMALIZED_UNITS.keySet()) {
66-
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
67+
// Preserve case for case-sensitive units: M (month), m (minute), us, cs, ds
68+
if (unit.equals("M")
69+
|| unit.equals("m")
70+
|| unit.equals("us")
71+
|| unit.equals("cs")
72+
|| unit.equals("ds")) {
73+
UNIT_LOOKUP.put(unit, unit);
74+
} else {
75+
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
76+
}
6777
}
6878
}
6979

@@ -135,15 +145,27 @@ private static SpanInfo parseNumericSpan(String spanStr) {
135145

136146
/** Extracts time unit from span string (returns original matched unit, not normalized). */
137147
public static String extractTimeUnit(String spanStr) {
138-
String lowerSpanStr = spanStr.toLowerCase(Locale.ROOT);
139148
String longestMatch = null;
140149

141150
// Find the longest unit that matches as a suffix
142151
for (String unit : UNIT_LOOKUP.keySet()) {
143-
if (lowerSpanStr.endsWith(unit)) {
152+
// For case-sensitive units (M, m, us, cs, ds), match case-sensitively
153+
boolean matches;
154+
if (unit.equals("M")
155+
|| unit.equals("m")
156+
|| unit.equals("us")
157+
|| unit.equals("cs")
158+
|| unit.equals("ds")) {
159+
matches = spanStr.endsWith(unit);
160+
} else {
161+
// For other units, match case-insensitively
162+
matches = spanStr.toLowerCase(Locale.ROOT).endsWith(unit.toLowerCase(Locale.ROOT));
163+
}
164+
165+
if (matches) {
144166
// Ensure this is a word boundary (not part of a larger word)
145-
int unitStartPos = lowerSpanStr.length() - unit.length();
146-
if (unitStartPos == 0 || !Character.isLetter(lowerSpanStr.charAt(unitStartPos - 1))) {
167+
int unitStartPos = spanStr.length() - unit.length();
168+
if (unitStartPos == 0 || !Character.isLetter(spanStr.charAt(unitStartPos - 1))) {
147169
// Keep the longest match
148170
if (longestMatch == null || unit.length() > longestMatch.length()) {
149171
longestMatch = unit;

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/TimeSpanHelper.java

Lines changed: 5 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ public RexNode createTimeSpanExpression(
3131
private boolean shouldApplyAligntime(String spanStr) {
3232
if (spanStr == null) return false;
3333

34-
spanStr = spanStr.replace("'", "").replace("\"", "").trim().toLowerCase();
34+
spanStr = spanStr.replace("'", "").replace("\"", "").trim();
3535
String timeUnit = SpanParser.extractTimeUnit(spanStr);
3636

3737
if (timeUnit == null) return true; // Pure number, assume hours
3838

3939
// Aligntime ignored for days, months, years
40-
String normalizedUnit = normalizeTimeUnit(timeUnit);
41-
return !normalizedUnit.equals("d") && !normalizedUnit.equals("M");
40+
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
41+
return !normalizedUnit.equals("d") && !normalizedUnit.equals("months");
4242
}
4343

4444
private RexNode createAlignedTimeSpan(
@@ -64,7 +64,7 @@ private RexNode createAlignedTimeSpan(
6464
if (timeUnit != null) {
6565
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
6666
intervalValue = Integer.parseInt(valueStr);
67-
normalizedUnit = normalizeTimeUnit(timeUnit);
67+
normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
6868
} else {
6969
intervalValue = Integer.parseInt(spanStr);
7070
normalizedUnit = "h";
@@ -86,7 +86,7 @@ private RexNode createStandardTimeSpan(
8686
if (timeUnit != null) {
8787
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
8888
int value = Integer.parseInt(valueStr);
89-
String normalizedUnit = normalizeTimeUnit(timeUnit);
89+
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
9090
return BinTimeSpanUtils.createBinTimeSpanExpression(
9191
fieldExpr, value, normalizedUnit, 0, context);
9292
} else {
@@ -120,45 +120,4 @@ private String extractModifier(String aligntimeStr) {
120120

121121
return null;
122122
}
123-
124-
private String normalizeTimeUnit(String unit) {
125-
switch (unit.toLowerCase()) {
126-
case "s":
127-
case "sec":
128-
case "secs":
129-
case "second":
130-
case "seconds":
131-
return "s";
132-
case "m":
133-
case "min":
134-
case "mins":
135-
case "minute":
136-
case "minutes":
137-
return "m";
138-
case "h":
139-
case "hr":
140-
case "hrs":
141-
case "hour":
142-
case "hours":
143-
return "h";
144-
case "d":
145-
case "day":
146-
case "days":
147-
return "d";
148-
case "mon":
149-
case "month":
150-
case "months":
151-
return "months";
152-
case "us":
153-
return "us";
154-
case "ms":
155-
return "ms";
156-
case "cs":
157-
return "cs";
158-
case "ds":
159-
return "ds";
160-
default:
161-
return unit;
162-
}
163-
}
164123
}

core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeUnitRegistry.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,36 @@ public class TimeUnitRegistry {
1414
private static final Map<String, TimeUnitConfig> UNIT_MAPPING = new HashMap<>();
1515

1616
static {
17-
// Microseconds
17+
// Microseconds (case-sensitive, lowercase only)
1818
UNIT_MAPPING.put("us", TimeUnitConfig.MICROSECONDS);
1919

2020
// Milliseconds
2121
UNIT_MAPPING.put("ms", TimeUnitConfig.MILLISECONDS);
2222

23-
// Centiseconds
23+
// Centiseconds (case-sensitive, lowercase only)
2424
UNIT_MAPPING.put("cs", TimeUnitConfig.CENTISECONDS);
2525

26-
// Deciseconds
26+
// Deciseconds (case-sensitive, lowercase only)
2727
UNIT_MAPPING.put("ds", TimeUnitConfig.DECISECONDS);
2828

2929
// Seconds
3030
UNIT_MAPPING.put("s", TimeUnitConfig.SECONDS);
3131
UNIT_MAPPING.put("sec", TimeUnitConfig.SECONDS);
32+
UNIT_MAPPING.put("secs", TimeUnitConfig.SECONDS);
3233
UNIT_MAPPING.put("second", TimeUnitConfig.SECONDS);
3334
UNIT_MAPPING.put("seconds", TimeUnitConfig.SECONDS);
3435

35-
// Minutes
36+
// Minutes (case-sensitive lowercase 'm')
3637
UNIT_MAPPING.put("m", TimeUnitConfig.MINUTES);
3738
UNIT_MAPPING.put("min", TimeUnitConfig.MINUTES);
39+
UNIT_MAPPING.put("mins", TimeUnitConfig.MINUTES);
3840
UNIT_MAPPING.put("minute", TimeUnitConfig.MINUTES);
3941
UNIT_MAPPING.put("minutes", TimeUnitConfig.MINUTES);
4042

4143
// Hours
4244
UNIT_MAPPING.put("h", TimeUnitConfig.HOURS);
4345
UNIT_MAPPING.put("hr", TimeUnitConfig.HOURS);
46+
UNIT_MAPPING.put("hrs", TimeUnitConfig.HOURS);
4447
UNIT_MAPPING.put("hour", TimeUnitConfig.HOURS);
4548
UNIT_MAPPING.put("hours", TimeUnitConfig.HOURS);
4649

@@ -49,7 +52,7 @@ public class TimeUnitRegistry {
4952
UNIT_MAPPING.put("day", TimeUnitConfig.DAYS);
5053
UNIT_MAPPING.put("days", TimeUnitConfig.DAYS);
5154

52-
// Months (case-sensitive M)
55+
// Months (case-sensitive uppercase 'M')
5356
UNIT_MAPPING.put("M", TimeUnitConfig.MONTHS);
5457
UNIT_MAPPING.put("mon", TimeUnitConfig.MONTHS);
5558
UNIT_MAPPING.put("month", TimeUnitConfig.MONTHS);
@@ -59,15 +62,19 @@ public class TimeUnitRegistry {
5962
/**
6063
* Gets the time unit configuration for the given unit string.
6164
*
62-
* @param unit The unit string (e.g., "h", "hours", "M")
65+
* @param unit The unit string (e.g., "h", "hours", "M", "m")
6366
* @return The time unit configuration, or null if not found
6467
*/
6568
public static TimeUnitConfig getConfig(String unit) {
66-
if (unit.equals("M")) {
67-
// M is case-sensitive for months
69+
// Handle case-sensitive units: M (month), m (minute), and subsecond units (us, cs, ds)
70+
if (unit.equals("M")
71+
|| unit.equals("m")
72+
|| unit.equals("us")
73+
|| unit.equals("cs")
74+
|| unit.equals("ds")) {
6875
return UNIT_MAPPING.get(unit);
6976
} else {
70-
// For all other units, use lowercase lookup
77+
// For all other units, use lowercase lookup for case-insensitive matching
7178
return UNIT_MAPPING.get(unit.toLowerCase());
7279
}
7380
}

0 commit comments

Comments
 (0)