Skip to content

Commit 15e883e

Browse files
committed
Add CalciteAnalyticsDatetimeWireFormatIT regression net for #5420
Wire-format regression coverage for sql#5420. With DatetimeOutputCastRule deleted (sql#5454) and DatetimeOutputCastRewriter deleted (opensearch#21748), datetime root columns must reach the user as PPL's documented `yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]` format with typed schema labels (`timestamp` / `date` / `time`, never `string`) on the analytics-engine route. The IT skips cleanly when `-Dtests.analytics.parquet_indices=true` is not set — Calcite-legacy was never affected by sql#5420 and asserting the same contract on it is duplicative noise. Coverage: - Wire-format round trip (typed schema + space-separator value) on TIMESTAMP / DATE / TIME root columns, plus eval-derived TIMESTAMP and `min(ts)` aggregation. - Datetime processing inside AE (parsing for WHERE comparison, scalar extract functions year/month/day/hour, ORDER BY). - Nanosecond precision preservation via `date_nanos`. - Aggregation beyond min(): max(ts), dc(ts). Each test asserts the query routes to AE (LogicalTableScan with lowercase `opensearch`) before checking wire format, so a future regression that silently routes to Calcite-legacy can't leave the contract green by accident. Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent c7b171d commit 15e883e

1 file changed

Lines changed: 227 additions & 0 deletions

File tree

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite.remote;
7+
8+
import static org.junit.Assume.assumeTrue;
9+
import static org.opensearch.sql.util.MatcherUtils.rows;
10+
import static org.opensearch.sql.util.MatcherUtils.schema;
11+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
12+
import static org.opensearch.sql.util.MatcherUtils.verifySchema;
13+
14+
import java.io.IOException;
15+
import org.json.JSONObject;
16+
import org.junit.Assert;
17+
import org.junit.jupiter.api.Test;
18+
import org.opensearch.client.Request;
19+
import org.opensearch.sql.legacy.TestUtils;
20+
import org.opensearch.sql.ppl.PPLIntegTestCase;
21+
22+
/**
23+
* Regression net for sql#5420 on the analytics-engine route. Pins datetime wire format ({@code
24+
* yyyy-MM-dd HH:mm:ss[.SSSSSSSSS]}, typed schema labels) and asserts every query was served by AE —
25+
* without the routing pin, a silent fallback to Calcite would leave the assertions green (Calcite
26+
* already emits the documented format). Skipped on the legacy path.
27+
*/
28+
public class CalciteAnalyticsDatetimeWireFormatIT extends PPLIntegTestCase {
29+
30+
private static final String INDEX = "wire_format_dt";
31+
32+
@Override
33+
public void init() throws Exception {
34+
super.init();
35+
assumeTrue(
36+
"CalciteAnalyticsDatetimeWireFormatIT only meaningful with"
37+
+ " -Dtests.analytics.parquet_indices=true",
38+
isAnalyticsParquetIndicesEnabled());
39+
enableCalcite();
40+
41+
if (!TestUtils.isIndexExist(client(), INDEX)) {
42+
String mapping =
43+
"{\"mappings\":{\"properties\":{"
44+
+ "\"ts\":{\"type\":\"date\",\"format\":\"yyyy-MM-dd HH:mm:ss\"},"
45+
+ "\"ts_nanos\":{\"type\":\"date_nanos\"},"
46+
+ "\"d\":{\"type\":\"date\",\"format\":\"yyyy-MM-dd\"},"
47+
+ "\"t\":{\"type\":\"date\",\"format\":\"HH:mm:ss\"}}}}";
48+
TestUtils.createIndexByRestClient(client(), INDEX, mapping);
49+
50+
Request doc = new Request("PUT", "/" + INDEX + "/_doc/1?refresh=true");
51+
doc.setJsonEntity(
52+
"{\"ts\":\"2024-03-15 10:30:00\","
53+
+ "\"ts_nanos\":\"2024-03-15T10:30:00.123456789Z\","
54+
+ "\"d\":\"2024-03-15\","
55+
+ "\"t\":\"10:30:00\"}");
56+
client().performRequest(doc);
57+
58+
Request doc2 = new Request("PUT", "/" + INDEX + "/_doc/2?refresh=true");
59+
doc2.setJsonEntity(
60+
"{\"ts\":\"2024-03-16 23:59:59\","
61+
+ "\"ts_nanos\":\"2024-03-16T23:59:59.999999999Z\","
62+
+ "\"d\":\"2024-03-16\","
63+
+ "\"t\":\"23:59:59\"}");
64+
client().performRequest(doc2);
65+
}
66+
}
67+
68+
/**
69+
* AE route: {@code LogicalTableScan} + lowercase {@code opensearch}. Calcite legacy uses {@code
70+
* CalciteLogicalIndexScan}.
71+
*/
72+
private void assertRoutedToAnalyticsEngine(String query) throws IOException {
73+
String explained = explainQueryToString(query);
74+
Assert.assertTrue(
75+
"Expected analytics-engine route (LogicalTableScan + lowercase 'opensearch'), got: "
76+
+ explained,
77+
explained.contains("LogicalTableScan(table=[[opensearch,"));
78+
Assert.assertFalse(
79+
"Expected analytics-engine route, but query routed to Calcite legacy"
80+
+ " (CalciteLogicalIndexScan): "
81+
+ explained,
82+
explained.contains("CalciteLogicalIndexScan"));
83+
}
84+
85+
/** TIMESTAMP root col: typed schema + space-separator value. */
86+
@Test
87+
public void testTimestampRootColumnSpaceFormat() throws IOException {
88+
String query = "source=" + INDEX + " | where ts = '2024-03-15 10:30:00' | fields ts";
89+
assertRoutedToAnalyticsEngine(query);
90+
JSONObject result = executeQuery(query);
91+
verifySchema(result, schema("ts", "timestamp"));
92+
verifyDataRows(result, rows("2024-03-15 10:30:00"));
93+
}
94+
95+
/**
96+
* DATE-mapped col: AE widens to TIMESTAMP at scan time; value must use space separator, not ISO
97+
* {@code T}.
98+
*/
99+
@Test
100+
public void testDateRootColumnYmdFormat() throws IOException {
101+
String query = "source=" + INDEX + " | where d = '2024-03-15' | fields d";
102+
assertRoutedToAnalyticsEngine(query);
103+
JSONObject result = executeQuery(query);
104+
verifySchema(result, schema("d", "timestamp"));
105+
verifyDataRows(result, rows("2024-03-15 00:00:00"));
106+
}
107+
108+
/** TIME-mapped col: AE widens to TIMESTAMP; value must use space separator, not ISO {@code T}. */
109+
@Test
110+
public void testTimeRootColumnHmsFormat() throws IOException {
111+
String query = "source=" + INDEX + " | sort t | head 1 | fields t";
112+
assertRoutedToAnalyticsEngine(query);
113+
JSONObject result = executeQuery(query);
114+
verifySchema(result, schema("t", "timestamp"));
115+
Assert.assertFalse(
116+
"Time-mapped column must not surface as ISO T-separator literal",
117+
result.getJSONArray("datarows").getJSONArray(0).getString(0).contains("T"));
118+
}
119+
120+
/** Eval-derived TIMESTAMP follows the same wire-format contract as a root column. */
121+
@Test
122+
public void testEvalDerivedTimestampSpaceFormat() throws IOException {
123+
String query =
124+
"source=" + INDEX + " | where ts = '2024-03-15 10:30:00' | eval x = ts | fields x";
125+
assertRoutedToAnalyticsEngine(query);
126+
JSONObject result = executeQuery(query);
127+
verifySchema(result, schema("x", "timestamp"));
128+
verifyDataRows(result, rows("2024-03-15 10:30:00"));
129+
}
130+
131+
/** {@code min(ts)} returns a typed timestamp cell, not a stringified ISO-T literal. */
132+
@Test
133+
public void testStatsMinTimestampSpaceFormat() throws IOException {
134+
String query = "source=" + INDEX + " | stats min(ts) as min_ts";
135+
assertRoutedToAnalyticsEngine(query);
136+
JSONObject result = executeQuery(query);
137+
verifySchema(result, schema("min_ts", "timestamp"));
138+
verifyDataRows(result, rows("2024-03-15 10:30:00"));
139+
}
140+
141+
/**
142+
* AE parses indexed TIMESTAMP as a real timestamp for WHERE comparison (not lex string compare).
143+
*/
144+
@Test
145+
public void testTimestampWhereComparisonFiltersCorrectly() throws IOException {
146+
String matchQuery = "source=" + INDEX + " | where ts > '2024-03-16 00:00:00' | fields ts";
147+
assertRoutedToAnalyticsEngine(matchQuery);
148+
JSONObject match = executeQuery(matchQuery);
149+
verifySchema(match, schema("ts", "timestamp"));
150+
verifyDataRows(match, rows("2024-03-16 23:59:59"));
151+
152+
JSONObject miss =
153+
executeQuery("source=" + INDEX + " | where ts < '2024-03-15 00:00:00' | fields ts");
154+
Assert.assertEquals(
155+
"Strict comparison should exclude both rows when bound is before any seeded timestamp",
156+
0,
157+
miss.getJSONArray("datarows").length());
158+
}
159+
160+
/**
161+
* {@code year/month/day_of_month/hour} extract calendar fields from the parsed TIMESTAMP, not a
162+
* stringified form.
163+
*/
164+
@Test
165+
public void testTimestampScalarExtractFunctions() throws IOException {
166+
String query =
167+
"source="
168+
+ INDEX
169+
+ " | where ts = '2024-03-15 10:30:00'"
170+
+ " | eval y = year(ts), m = month(ts), dm = day_of_month(ts), h = hour(ts) "
171+
+ "| fields y, m, dm, h";
172+
assertRoutedToAnalyticsEngine(query);
173+
JSONObject result = executeQuery(query);
174+
verifySchema(
175+
result, schema("y", "int"), schema("m", "int"), schema("dm", "int"), schema("h", "int"));
176+
verifyDataRows(result, rows(2024, 3, 15, 10));
177+
}
178+
179+
/**
180+
* ORDER BY on TIMESTAMP returns rows ascending; schema stays {@code timestamp}, values use space
181+
* separator.
182+
*/
183+
@Test
184+
public void testTimestampOrderByTemporalSemantics() throws IOException {
185+
String query = "source=" + INDEX + " | sort ts | fields ts";
186+
assertRoutedToAnalyticsEngine(query);
187+
JSONObject result = executeQuery(query);
188+
verifySchema(result, schema("ts", "timestamp"));
189+
verifyDataRows(result, rows("2024-03-15 10:30:00"), rows("2024-03-16 23:59:59"));
190+
}
191+
192+
/**
193+
* {@code date_nanos} preserves 9-digit sub-second precision end-to-end (catches micro-truncation
194+
* regressions).
195+
*/
196+
@Test
197+
public void testTimestampNanoPrecisionTrailingNines() throws IOException {
198+
String query = "source=" + INDEX + " | sort ts_nanos | fields ts_nanos";
199+
assertRoutedToAnalyticsEngine(query);
200+
JSONObject result = executeQuery(query);
201+
verifySchema(result, schema("ts_nanos", "timestamp"));
202+
verifyDataRows(
203+
result, rows("2024-03-15 10:30:00.123456789"), rows("2024-03-16 23:59:59.999999999"));
204+
}
205+
206+
/** {@code max(ts)} returns a typed timestamp cell with the documented wire format. */
207+
@Test
208+
public void testStatsMaxTimestampSpaceFormat() throws IOException {
209+
String query = "source=" + INDEX + " | stats max(ts) as max_ts";
210+
assertRoutedToAnalyticsEngine(query);
211+
JSONObject result = executeQuery(query);
212+
verifySchema(result, schema("max_ts", "timestamp"));
213+
verifyDataRows(result, rows("2024-03-16 23:59:59"));
214+
}
215+
216+
/**
217+
* {@code dc(ts)} on two distinct timestamps returns 2 (AE dedups by temporal identity, not string
218+
* equality).
219+
*/
220+
@Test
221+
public void testStatsCountDistinctTimestamp() throws IOException {
222+
String query = "source=" + INDEX + " | stats dc(ts) as n";
223+
assertRoutedToAnalyticsEngine(query);
224+
JSONObject result = executeQuery(query);
225+
verifyDataRows(result, rows(2));
226+
}
227+
}

0 commit comments

Comments
 (0)