Skip to content

Commit 35ac017

Browse files
committed
Allow more than 2 alternative plans for dedup
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent 907b292 commit 35ac017

4 files changed

Lines changed: 71 additions & 12 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains;
2323

2424
import java.io.IOException;
25+
import java.util.List;
2526
import java.util.Locale;
2627
import org.junit.Ignore;
2728
import org.junit.Test;
@@ -2061,11 +2062,11 @@ public void testDedupExpr() throws IOException {
20612062
explainQueryYaml(
20622063
"source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | eval"
20632064
+ " new_state = lower(state) | dedup 2 new_gender, new_state"));
2064-
expected = loadExpectedPlan("explain_dedup_expr4.yaml");
2065-
alternative = loadExpectedPlan("explain_dedup_expr4_alternative.yaml");
20662065
assertYamlEqualsIgnoreId(
2067-
expected,
2068-
alternative,
2066+
List.of(
2067+
loadExpectedPlan("explain_dedup_expr4.yaml"),
2068+
loadExpectedPlan("explain_dedup_expr4_alternative.yaml"),
2069+
loadExpectedPlan("explain_dedup_expr4_alternative1.yaml")),
20692070
explainQueryYaml(
20702071
"source=opensearch-sql_test_index_account | fields account_number, gender, age, state |"
20712072
+ " eval new_gender = lower(gender) | eval new_state = lower(state) | sort gender,"
@@ -2101,11 +2102,11 @@ public void testDedupRename() throws IOException {
21012102
"source=opensearch-sql_test_index_account | eval tmp_gender = lower(gender) | eval"
21022103
+ " tmp_state = lower(state) | rename tmp_gender as new_gender | rename tmp_state"
21032104
+ " as new_state | dedup 2 new_gender, new_state"));
2104-
expected = loadExpectedPlan("explain_dedup_expr4.yaml");
2105-
alternative = loadExpectedPlan("explain_dedup_expr4_alternative.yaml");
21062105
assertYamlEqualsIgnoreId(
2107-
expected,
2108-
alternative,
2106+
List.of(
2107+
loadExpectedPlan("explain_dedup_expr4.yaml"),
2108+
loadExpectedPlan("explain_dedup_expr4_alternative.yaml"),
2109+
loadExpectedPlan("explain_dedup_expr4_alternative1.yaml")),
21092110
explainQueryYaml(
21102111
"source=opensearch-sql_test_index_account | fields account_number, gender, age, state |"
21112112
+ " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename"
@@ -2164,11 +2165,12 @@ public void testDedupWithExpr() throws IOException {
21642165
explainQueryYaml(
21652166
"source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | eval"
21662167
+ " new_state = lower(state) | dedup 2 age, account_number"));
2167-
expected = loadExpectedPlan("explain_dedup_with_expr4.yaml");
2168-
alternative = loadExpectedPlan("explain_dedup_with_expr4_alternative.yaml");
2168+
21692169
assertYamlEqualsIgnoreId(
2170-
expected,
2171-
alternative,
2170+
List.of(
2171+
loadExpectedPlan("explain_dedup_with_expr4.yaml"),
2172+
loadExpectedPlan("explain_dedup_with_expr4_alternative.yaml"),
2173+
loadExpectedPlan("explain_dedup_with_expr4_alternative2.yaml")),
21722174
explainQueryYaml(
21732175
"source=opensearch-sql_test_index_account | fields account_number, gender, age, state |"
21742176
+ " eval new_gender = lower(gender) | eval new_state = lower(state) | sort gender,"

integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,39 @@ public static void assertYamlEqualsIgnoreId(
466466
}
467467
}
468468

469+
/**
470+
* Compares actual YAML with a list of expected YAML strings, trying each in order until one
471+
* matches. This is useful when the DSL implementation can produce multiple valid plan variants.
472+
*
473+
* <p>The method iterates through each expected YAML string and attempts a comparison with the
474+
* actual YAML. If any expected YAML matches, the assertion succeeds. If none match, the method
475+
* throws an {@link AssertionError} from the last failed comparison.
476+
*
477+
* <p>Both expected and actual YAML strings are cleaned up (ignoring RelNode IDs, timestamps,
478+
* PIDs, etc.) before comparison.
479+
*
480+
* @param expectedYamls list of expected YAML strings to try (must not be empty)
481+
* @param actualYaml the actual YAML string to compare
482+
* @throws AssertionError if none of the expected YAMLs match the actual YAML (reports the last
483+
* failure)
484+
* @throws IllegalArgumentException if expectedYamls is empty
485+
*/
486+
public static void assertYamlEqualsIgnoreId(List<String> expectedYamls, String actualYaml) {
487+
if (expectedYamls.isEmpty()) {
488+
throw new IllegalArgumentException("expectedYamls list must not be empty");
489+
}
490+
AssertionError lastError = null;
491+
for (String expectedYaml : expectedYamls) {
492+
try {
493+
assertYamlEquals(cleanUpYaml(expectedYaml), cleanUpYaml(actualYaml));
494+
return;
495+
} catch (AssertionError e) {
496+
lastError = e;
497+
}
498+
}
499+
throw lastError;
500+
}
501+
469502
public static void assertYamlEquals(String expected, String actual) {
470503
String normalizedExpected = normalizeLineBreaks(expected).trim();
471504
String normalizedActual = normalizeLineBreaks(actual).trim();
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5])
5+
LogicalFilter(condition=[<=($6, 2)])
6+
LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $4, $5)])
7+
LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($5))])
8+
LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])
9+
LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)])
10+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
11+
physical: |
12+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"new_gender":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"missing_bucket":false,"order":"asc"}}},{"new_state":{"terms":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"script_fields":{"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false},"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last], fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5])
5+
LogicalFilter(condition=[<=($6, 2)])
6+
LogicalProject(account_number=[$0], gender=[$1], age=[$2], state=[$3], new_gender=[$4], new_state=[$5], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $1, $3)])
7+
LogicalFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($3))])
8+
LogicalSort(sort0=[$1], sort1=[$3], dir0=[ASC-nulls-first], dir1=[DESC-nulls-last])
9+
LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7], new_gender=[LOWER($4)], new_state=[LOWER($7)])
10+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
11+
physical: |
12+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{"new_state":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["state.keyword"]}},"ignore_failure":false},"new_gender":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQA/HsKICAib3AiOiB7CiAgICAibmFtZSI6ICJMT1dFUiIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0],"DIGESTS":["gender.keyword"]}},"ignore_failure":false}}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

0 commit comments

Comments
 (0)