Skip to content

Commit 5bb2747

Browse files
authored
Time Unit Unification for bin/stats (#4450)
1 parent 71813bf commit 5bb2747

5 files changed

Lines changed: 131 additions & 55 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/utils/binning/SpanParser.java

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class SpanParser {
5252
Map.entry("months", "months"),
5353
Map.entry("month", "months"),
5454
Map.entry("mon", "months"),
55+
Map.entry("M", "months"), // Uppercase M for months (case-sensitive)
5556
// Milliseconds
5657
Map.entry("ms", "ms"),
5758
// Microseconds
@@ -63,7 +64,16 @@ public class SpanParser {
6364

6465
// Build direct lookup map for efficient unit detection
6566
for (String unit : NORMALIZED_UNITS.keySet()) {
66-
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
67+
// Preserve case for case-sensitive units: M (month), m (minute), us, cs, ds
68+
if (unit.equals("M")
69+
|| unit.equals("m")
70+
|| unit.equals("us")
71+
|| unit.equals("cs")
72+
|| unit.equals("ds")) {
73+
UNIT_LOOKUP.put(unit, unit);
74+
} else {
75+
UNIT_LOOKUP.put(unit.toLowerCase(Locale.ROOT), unit);
76+
}
6777
}
6878
}
6979

@@ -135,15 +145,27 @@ private static SpanInfo parseNumericSpan(String spanStr) {
135145

136146
/** Extracts time unit from span string (returns original matched unit, not normalized). */
137147
public static String extractTimeUnit(String spanStr) {
138-
String lowerSpanStr = spanStr.toLowerCase(Locale.ROOT);
139148
String longestMatch = null;
140149

141150
// Find the longest unit that matches as a suffix
142151
for (String unit : UNIT_LOOKUP.keySet()) {
143-
if (lowerSpanStr.endsWith(unit)) {
152+
// For case-sensitive units (M, m, us, cs, ds), match case-sensitively
153+
boolean matches;
154+
if (unit.equals("M")
155+
|| unit.equals("m")
156+
|| unit.equals("us")
157+
|| unit.equals("cs")
158+
|| unit.equals("ds")) {
159+
matches = spanStr.endsWith(unit);
160+
} else {
161+
// For other units, match case-insensitively
162+
matches = spanStr.toLowerCase(Locale.ROOT).endsWith(unit.toLowerCase(Locale.ROOT));
163+
}
164+
165+
if (matches) {
144166
// Ensure this is a word boundary (not part of a larger word)
145-
int unitStartPos = lowerSpanStr.length() - unit.length();
146-
if (unitStartPos == 0 || !Character.isLetter(lowerSpanStr.charAt(unitStartPos - 1))) {
167+
int unitStartPos = spanStr.length() - unit.length();
168+
if (unitStartPos == 0 || !Character.isLetter(spanStr.charAt(unitStartPos - 1))) {
147169
// Keep the longest match
148170
if (longestMatch == null || unit.length() > longestMatch.length()) {
149171
longestMatch = unit;

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/TimeSpanHelper.java

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,14 @@ public RexNode createTimeSpanExpression(
3131
private boolean shouldApplyAligntime(String spanStr) {
3232
if (spanStr == null) return false;
3333

34-
spanStr = spanStr.replace("'", "").replace("\"", "").trim().toLowerCase();
34+
spanStr = spanStr.replace("'", "").replace("\"", "").trim();
3535
String timeUnit = SpanParser.extractTimeUnit(spanStr);
3636

3737
if (timeUnit == null) return true; // Pure number, assume hours
3838

3939
// Aligntime ignored for days, months, years
40-
String normalizedUnit = normalizeTimeUnit(timeUnit);
41-
return !normalizedUnit.equals("d") && !normalizedUnit.equals("M");
40+
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
41+
return !normalizedUnit.equals("d") && !normalizedUnit.equals("months");
4242
}
4343

4444
private RexNode createAlignedTimeSpan(
@@ -64,7 +64,7 @@ private RexNode createAlignedTimeSpan(
6464
if (timeUnit != null) {
6565
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
6666
intervalValue = Integer.parseInt(valueStr);
67-
normalizedUnit = normalizeTimeUnit(timeUnit);
67+
normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
6868
} else {
6969
intervalValue = Integer.parseInt(spanStr);
7070
normalizedUnit = "h";
@@ -86,7 +86,7 @@ private RexNode createStandardTimeSpan(
8686
if (timeUnit != null) {
8787
String valueStr = spanStr.substring(0, spanStr.length() - timeUnit.length());
8888
int value = Integer.parseInt(valueStr);
89-
String normalizedUnit = normalizeTimeUnit(timeUnit);
89+
String normalizedUnit = SpanParser.getNormalizedUnit(timeUnit);
9090
return BinTimeSpanUtils.createBinTimeSpanExpression(
9191
fieldExpr, value, normalizedUnit, 0, context);
9292
} else {
@@ -120,39 +120,4 @@ private String extractModifier(String aligntimeStr) {
120120

121121
return null;
122122
}
123-
124-
private String normalizeTimeUnit(String unit) {
125-
switch (unit.toLowerCase()) {
126-
case "s", "sec", "secs", "second", "seconds" -> {
127-
return "s";
128-
}
129-
case "m", "min", "mins", "minute", "minutes" -> {
130-
return "m";
131-
}
132-
case "h", "hr", "hrs", "hour", "hours" -> {
133-
return "h";
134-
}
135-
case "d", "day", "days" -> {
136-
return "d";
137-
}
138-
case "mon", "month", "months" -> {
139-
return "months";
140-
}
141-
case "us" -> {
142-
return "us";
143-
}
144-
case "ms" -> {
145-
return "ms";
146-
}
147-
case "cs" -> {
148-
return "cs";
149-
}
150-
case "ds" -> {
151-
return "ds";
152-
}
153-
default -> {
154-
return unit;
155-
}
156-
}
157-
}
158123
}

core/src/main/java/org/opensearch/sql/calcite/utils/binning/time/TimeUnitRegistry.java

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,33 +14,36 @@ public class TimeUnitRegistry {
1414
private static final Map<String, TimeUnitConfig> UNIT_MAPPING = new HashMap<>();
1515

1616
static {
17-
// Microseconds
17+
// Microseconds (case-sensitive, lowercase only)
1818
UNIT_MAPPING.put("us", TimeUnitConfig.MICROSECONDS);
1919

2020
// Milliseconds
2121
UNIT_MAPPING.put("ms", TimeUnitConfig.MILLISECONDS);
2222

23-
// Centiseconds
23+
// Centiseconds (case-sensitive, lowercase only)
2424
UNIT_MAPPING.put("cs", TimeUnitConfig.CENTISECONDS);
2525

26-
// Deciseconds
26+
// Deciseconds (case-sensitive, lowercase only)
2727
UNIT_MAPPING.put("ds", TimeUnitConfig.DECISECONDS);
2828

2929
// Seconds
3030
UNIT_MAPPING.put("s", TimeUnitConfig.SECONDS);
3131
UNIT_MAPPING.put("sec", TimeUnitConfig.SECONDS);
32+
UNIT_MAPPING.put("secs", TimeUnitConfig.SECONDS);
3233
UNIT_MAPPING.put("second", TimeUnitConfig.SECONDS);
3334
UNIT_MAPPING.put("seconds", TimeUnitConfig.SECONDS);
3435

35-
// Minutes
36+
// Minutes (case-sensitive lowercase 'm')
3637
UNIT_MAPPING.put("m", TimeUnitConfig.MINUTES);
3738
UNIT_MAPPING.put("min", TimeUnitConfig.MINUTES);
39+
UNIT_MAPPING.put("mins", TimeUnitConfig.MINUTES);
3840
UNIT_MAPPING.put("minute", TimeUnitConfig.MINUTES);
3941
UNIT_MAPPING.put("minutes", TimeUnitConfig.MINUTES);
4042

4143
// Hours
4244
UNIT_MAPPING.put("h", TimeUnitConfig.HOURS);
4345
UNIT_MAPPING.put("hr", TimeUnitConfig.HOURS);
46+
UNIT_MAPPING.put("hrs", TimeUnitConfig.HOURS);
4447
UNIT_MAPPING.put("hour", TimeUnitConfig.HOURS);
4548
UNIT_MAPPING.put("hours", TimeUnitConfig.HOURS);
4649

@@ -49,7 +52,7 @@ public class TimeUnitRegistry {
4952
UNIT_MAPPING.put("day", TimeUnitConfig.DAYS);
5053
UNIT_MAPPING.put("days", TimeUnitConfig.DAYS);
5154

52-
// Months (case-sensitive M)
55+
// Months (case-sensitive uppercase 'M')
5356
UNIT_MAPPING.put("M", TimeUnitConfig.MONTHS);
5457
UNIT_MAPPING.put("mon", TimeUnitConfig.MONTHS);
5558
UNIT_MAPPING.put("month", TimeUnitConfig.MONTHS);
@@ -59,15 +62,19 @@ public class TimeUnitRegistry {
5962
/**
6063
* Gets the time unit configuration for the given unit string.
6164
*
62-
* @param unit The unit string (e.g., "h", "hours", "M")
65+
* @param unit The unit string (e.g., "h", "hours", "M", "m")
6366
* @return The time unit configuration, or null if not found
6467
*/
6568
public static TimeUnitConfig getConfig(String unit) {
66-
if (unit.equals("M")) {
67-
// M is case-sensitive for months
69+
// Handle case-sensitive units: M (month), m (minute), and subsecond units (us, cs, ds)
70+
if (unit.equals("M")
71+
|| unit.equals("m")
72+
|| unit.equals("us")
73+
|| unit.equals("cs")
74+
|| unit.equals("ds")) {
6875
return UNIT_MAPPING.get(unit);
6976
} else {
70-
// For all other units, use lowercase lookup
77+
// For all other units, use lowercase lookup for case-insensitive matching
7178
return UNIT_MAPPING.get(unit.toLowerCase());
7279
}
7380
}

docs/user/ppl/cmd/bin.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ bin \<field\> [span=\<interval\>] [minspan=\<interval\>] [bins=\<count\>] [align
1818
* minute (m, min, mins, minute, minutes)
1919
* hour (h, hr, hrs, hour, hours)
2020
* day (d, day, days)
21-
* month (mon, month, months)
21+
* month (M, mon, month, months)
2222
* minspan: optional. The minimum interval size for automatic span calculation. Cannot be used with span or bins parameters.
2323
* bins: optional. The maximum number of equal-width bins to create. Cannot be used with span or minspan parameters. The bins parameter must be between 2 and 50000 (inclusive).
2424
* aligntime: optional. Align the bin times for time-based fields. Valid only for time-based discretization. Options:

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,4 +1076,86 @@ public void testBinWithDecimalSpan() throws IOException {
10761076
verifySchema(result, schema("count()", "bigint"), schema("cpu_usage", "string"));
10771077
verifyDataRows(result, rows(3, "37.5-45.0"), rows(2, "45.0-52.5"), rows(1, "52.5-60.0"));
10781078
}
1079+
1080+
@Test
1081+
public void testBinCaseSensitivity_mon_vs_M() throws IOException {
1082+
// Test uppercase 'M' for months - bin by 1 month
1083+
JSONObject monthResultM =
1084+
executeQuery(
1085+
String.format(
1086+
"source=%s | bin @timestamp span=1M | fields `@timestamp` | sort `@timestamp` |"
1087+
+ " head 1",
1088+
TEST_INDEX_TIME_DATA));
1089+
verifySchema(monthResultM, schema("@timestamp", null, "string"));
1090+
verifyDataRows(monthResultM, rows("2025-07"));
1091+
1092+
// Test full name 'mon' for months - should produce same result as 'M'
1093+
JSONObject monthResultMon =
1094+
executeQuery(
1095+
String.format(
1096+
"source=%s | bin @timestamp span=1mon | fields `@timestamp` | sort `@timestamp` |"
1097+
+ " head 1",
1098+
TEST_INDEX_TIME_DATA));
1099+
verifySchema(monthResultMon, schema("@timestamp", null, "string"));
1100+
verifyDataRows(monthResultMon, rows("2025-07"));
1101+
}
1102+
1103+
@Test
1104+
public void testBinWithSubsecondUnits() throws IOException {
1105+
// Test milliseconds (ms) - bin by 100 milliseconds
1106+
JSONObject msResult =
1107+
executeQuery(
1108+
String.format(
1109+
"source=%s | bin @timestamp span=100ms | fields `@timestamp` | sort `@timestamp` |"
1110+
+ " head 3",
1111+
TEST_INDEX_TIME_DATA));
1112+
verifySchema(msResult, schema("@timestamp", null, "timestamp"));
1113+
verifyDataRows(
1114+
msResult,
1115+
rows("2025-07-28 00:15:23"),
1116+
rows("2025-07-28 01:42:15"),
1117+
rows("2025-07-28 02:28:45"));
1118+
1119+
// Test microseconds (us) - bin by 500 microseconds
1120+
JSONObject usResult =
1121+
executeQuery(
1122+
String.format(
1123+
"source=%s | bin @timestamp span=500us | fields `@timestamp` | sort `@timestamp` |"
1124+
+ " head 3",
1125+
TEST_INDEX_TIME_DATA));
1126+
verifySchema(usResult, schema("@timestamp", null, "timestamp"));
1127+
verifyDataRows(
1128+
usResult,
1129+
rows("2025-07-28 00:15:23"),
1130+
rows("2025-07-28 01:42:15"),
1131+
rows("2025-07-28 02:28:45"));
1132+
1133+
// Test centiseconds (cs) - bin by 10 centiseconds (100ms)
1134+
JSONObject csResult =
1135+
executeQuery(
1136+
String.format(
1137+
"source=%s | bin @timestamp span=10cs | fields `@timestamp` | sort `@timestamp` |"
1138+
+ " head 3",
1139+
TEST_INDEX_TIME_DATA));
1140+
verifySchema(csResult, schema("@timestamp", null, "timestamp"));
1141+
verifyDataRows(
1142+
csResult,
1143+
rows("2025-07-28 00:15:23"),
1144+
rows("2025-07-28 01:42:15"),
1145+
rows("2025-07-28 02:28:45"));
1146+
1147+
// Test deciseconds (ds) - bin by 5 deciseconds (500ms)
1148+
JSONObject dsResult =
1149+
executeQuery(
1150+
String.format(
1151+
"source=%s | bin @timestamp span=5ds | fields `@timestamp` | sort `@timestamp` |"
1152+
+ " head 3",
1153+
TEST_INDEX_TIME_DATA));
1154+
verifySchema(dsResult, schema("@timestamp", null, "timestamp"));
1155+
verifyDataRows(
1156+
dsResult,
1157+
rows("2025-07-28 00:15:23"),
1158+
rows("2025-07-28 01:42:15"),
1159+
rows("2025-07-28 02:28:45"));
1160+
}
10791161
}

0 commit comments

Comments
 (0)