Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions integ-test/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -1147,6 +1147,23 @@ task integTestRemote(type: RestIntegTestTask) {
// - subsearch.maxout is lowered as a LIMIT on the in-subquery semi-join's right side,
// which the AE route does not honor, so the subsearch returns all rows.
excludeTestsMatching '*CalcitePPLInSubqueryIT.testSubsearchMaxOut'

// === Excludes: CalcitePPLConditionBuiltinFunctionIT route divergences ===
// Each test also carries an in-test assumeNotAnalytics(...) recording the reason (see
// AnalyticsRouteLimitation); listed here so the AE-route skip set stays countable.
// - isnull/isnotnull on the object/struct parent field big5.aws: objects are flattened
// to dotted leaf columns and the struct parent is not a queryable column.
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNullWithStruct'
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNotNullWithStruct'
// - isnull/isnotnull on the nested field nested_simple.address: nested fields are
// stripped at index creation (the route can't store them).
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNullWithNested'
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNotNullWithNested'
// - concat('H', null): DataFusion treats NULL as empty string; v2/Calcite propagates NULL.
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testNullIfWithExpression'
// - earliest('now', utc_timestamp()): 'now' and utc_timestamp() resolve to the same
// instant on the route (true) but differ on v2 (false).
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testEarliestWithEval'
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

package org.opensearch.sql.calcite.remote;

import static org.opensearch.sql.legacy.TestUtils.isIndexExist;
import static org.opensearch.sql.legacy.TestsConstants.*;
import static org.opensearch.sql.util.AnalyticsRouteLimitation.CONCAT_NULL_AS_EMPTY;
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EARLIEST_LATEST_NOW_CLOCK;
import static org.opensearch.sql.util.AnalyticsRouteLimitation.NESTED_FIELDS;
import static org.opensearch.sql.util.AnalyticsRouteLimitation.STRUCT_PARENT_FIELD;
import static org.opensearch.sql.util.MatcherUtils.*;
import static org.opensearch.sql.util.MatcherUtils.rows;

Expand All @@ -22,22 +27,29 @@ public void init() throws Exception {
super.init();
enableCalcite();

// init() runs as @Before, before every test method. On the analytics route the parquet-backed
// store is append-only on same-_id PUT, so seed the extra docs only when the index is first
// created — otherwise they accumulate a duplicate per test method and inflate row counts.
boolean stateCountryWithNullExisted =
isIndexExist(client(), TEST_INDEX_STATE_COUNTRY_WITH_NULL);
loadIndex(Index.STATE_COUNTRY);
loadIndex(Index.STATE_COUNTRY_WITH_NULL);
loadIndex(Index.CALCS);
loadIndex(Index.NESTED_SIMPLE);
loadIndex(Index.BIG5);
Request request1 =
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true");
request1.setJsonEntity(
"{\"name\":\" "
+ " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
client().performRequest(request1);
Request request2 =
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true");
request2.setJsonEntity(
"{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
client().performRequest(request2);
if (!stateCountryWithNullExisted) {
Request request1 =
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true");
request1.setJsonEntity(
"{\"name\":\" "
+ " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
client().performRequest(request1);
Request request2 =
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true");
request2.setJsonEntity(
"{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
client().performRequest(request2);
}
}

@Test
Expand All @@ -54,13 +66,17 @@ public void testIsNull() throws IOException {

@Test
public void testIsNullWithStruct() throws IOException {
// Queries the object/struct parent field 'aws' directly.
assumeNotAnalytics(STRUCT_PARENT_FIELD);
JSONObject actual = executeQuery("source=big5 | where isnull(aws) | fields aws");
verifySchema(actual, schema("aws", "struct"));
verifyNumOfRows(actual, 0);
}

@Test
public void testIsNullWithNested() throws IOException {
// Queries a nested field; the route strips nested fields at index creation.
assumeNotAnalytics(NESTED_FIELDS);
JSONObject actual =
executeQuery(
String.format(
Expand Down Expand Up @@ -124,13 +140,17 @@ public void testIsNotNullWithSingleNotEquals() throws IOException {

@Test
public void testIsNotNullWithStruct() throws IOException {
// Queries the object/struct parent field 'aws' directly.
assumeNotAnalytics(STRUCT_PARENT_FIELD);
JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws");
verifySchema(actual, schema("aws", "struct"));
verifyNumOfRows(actual, 3);
}

@Test
public void testIsNotNullWithNested() throws IOException {
// Queries a nested field; the route strips nested fields at index creation.
assumeNotAnalytics(NESTED_FIELDS);
JSONObject actual =
executeQuery(
String.format(
Expand Down Expand Up @@ -165,6 +185,8 @@ public void testNullIf() throws IOException {

@Test
public void testNullIfWithExpression() throws IOException {
// concat('H', name) over the null-name row diverges (NULL-as-empty vs NULL-propagating).
assumeNotAnalytics(CONCAT_NULL_AS_EMPTY);
JSONObject actual =
executeQuery(
String.format(
Expand Down Expand Up @@ -354,6 +376,8 @@ public void testLatest() throws IOException {

@Test
public void testEarliestWithEval() throws IOException {
// earliest('now', utc_timestamp()) resolves true on the route but false on v2 (clock source).
assumeNotAnalytics(EARLIEST_LATEST_NOW_CLOCK);
JSONObject actual =
executeQuery(
String.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,42 @@ public enum AnalyticsRouteLimitation {
SUBSEARCH_MAXOUT_IN_SUBQUERY(
"subsearch.maxout is not honored on the analytics-engine route: the LIMIT lowered onto the"
+ " in-subquery semi-join's right side is dropped, so the subsearch returns all rows"
+ " regardless of the cap.");
+ " regardless of the cap."),

/**
* Querying an {@code object}/struct parent field directly (e.g. {@code isnull(aws)} where {@code
* aws} is an {@code object}) fails on the analytics-engine route with {@code FIELD_NOT_FOUND}.
* The route flattens objects into dotted leaf columns — {@code aws.cloudwatch.log_group} scans
* fine — but the struct parent is not exposed as a queryable column. Distinct from {@link
* #NESTED_FIELDS}: {@code object} parents survive in the OpenSearch mapping (they aren't stripped
* at load) yet still can't be referenced as a whole.
*/
STRUCT_PARENT_FIELD(
"Querying an object/struct parent field directly is unsupported on the analytics-engine"
+ " route: objects are flattened to dotted leaf columns and the parent resolves to"
+ " FIELD_NOT_FOUND."),

/**
* {@code concat()} over a NULL argument diverges: the analytics-engine route (DataFusion) treats
* NULL as an empty string (e.g. {@code concat('H', null)} = {@code 'H'}), whereas the v2/Calcite
* engine propagates NULL ({@code concat('H', null)} = {@code null}). Any expression that depends
* on the NULL-propagating behavior over a possibly-null operand diverges.
*/
CONCAT_NULL_AS_EMPTY(
"concat() treats a NULL argument as an empty string on the analytics-engine route (DataFusion"
+ " semantics), whereas the v2/Calcite engine propagates NULL."),

/**
* {@code earliest('now', <ts>)} / {@code latest('now', <ts>)} where {@code <ts>} is {@code
* utc_timestamp()} diverge: on the analytics-engine route the relative-time {@code 'now'} and
* {@code utc_timestamp()} resolve to the same instant (so {@code earliest('now', now)} is {@code
* true}), whereas on the v2/Calcite path they differ (it is {@code false}) — a clock-source
* divergence between the relative-time evaluation and {@code utc_timestamp()}.
*/
EARLIEST_LATEST_NOW_CLOCK(
"earliest/latest with relative-time 'now' against utc_timestamp() diverges on the"
+ " analytics-engine route: 'now' and utc_timestamp() resolve to the same instant"
+ " (earliest('now', now) is true), but differ on the v2/Calcite path (false).");

private final String reason;

Expand Down
Loading