Skip to content

Commit 49e9657

Browse files
committed
Merge remote-tracking branch 'origin/main' into addEarliest
2 parents 4b9ad84 + df9f5dd commit 49e9657

34 files changed

Lines changed: 538 additions & 35 deletions

core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -292,15 +292,38 @@ public Expression visitIn(In node, AnalysisContext context) {
292292

293293
private Expression visitIn(
294294
UnresolvedExpression field, List<UnresolvedExpression> valueList, AnalysisContext context) {
295-
if (valueList.size() == 1) {
296-
return visitCompare(new Compare("=", field, valueList.get(0)), context);
297-
} else if (valueList.size() > 1) {
298-
return DSL.or(
299-
visitCompare(new Compare("=", field, valueList.get(0)), context),
300-
visitIn(field, valueList.subList(1, valueList.size()), context));
301-
} else {
295+
if (valueList.isEmpty()) {
302296
throw new SemanticCheckException("Values in In clause should not be empty");
303297
}
298+
299+
Expression[] expressions = new Expression[valueList.size()];
300+
301+
for (int i = 0; i < expressions.length; i++) {
302+
expressions[i] = visitCompare(new Compare("=", field, valueList.get(i)), context);
303+
}
304+
305+
return buildOrTree(expressions, 0, expressions.length);
306+
}
307+
308+
/**
309+
* `DSL.or` can only take two arguments. To represent large lists without massive recursion, we
310+
* want to represent the expression as a balanced tree. This builds that tree from a node list.
311+
*
312+
* @param children The list of expressions to merge.
313+
* @param start The starting position (inclusive) for the current combination step.
314+
* @param end The ending position (exclusive) for the current combination step. If <= start,
315+
* children[start] is returned.
316+
* @return The final `DSL.or` expression.
317+
*/
318+
private Expression buildOrTree(Expression[] children, int start, int end) {
319+
if (end - start <= 1) {
320+
return children[start];
321+
}
322+
if (end - start == 2) {
323+
return DSL.or(children[start], children[end - 1]);
324+
}
325+
int split = start + (end - start) / 2;
326+
return DSL.or(buildOrTree(children, start, split), buildOrTree(children, split, end));
304327
}
305328

306329
@Override

core/src/main/java/org/opensearch/sql/calcite/udf/udaf/PercentileApproxFunction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
package org.opensearch.sql.calcite.udf.udaf;
77

8-
import com.tdunning.math.stats.AVLTreeDigest;
8+
import com.tdunning.math.stats.MergingDigest;
99
import java.util.ArrayList;
1010
import java.util.List;
1111
import java.util.Objects;
@@ -85,7 +85,7 @@ public void evaluate(double value) {
8585
public Object value(Object... argList) {
8686
double percent = (double) argList[1];
8787
double compression = (double) argList[0];
88-
AVLTreeDigest tree = new AVLTreeDigest(compression);
88+
MergingDigest tree = new MergingDigest(compression);
8989
for (Number num : candidate) {
9090
tree.add(num.doubleValue());
9191
}

core/src/main/java/org/opensearch/sql/expression/aggregation/PercentileApproximateAggregator.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import static org.opensearch.sql.data.model.ExprValueUtils.doubleValue;
99
import static org.opensearch.sql.utils.ExpressionUtils.format;
1010

11-
import com.tdunning.math.stats.AVLTreeDigest;
11+
import com.tdunning.math.stats.MergingDigest;
1212
import java.util.List;
1313
import org.opensearch.sql.common.utils.StringUtils;
1414
import org.opensearch.sql.data.model.ExprNullValue;
@@ -56,11 +56,11 @@ public String toString() {
5656
}
5757

5858
/**
59-
* PercentileApproximateState is used to store the AVLTreeDigest state for percentile estimation.
59+
* PercentileApproximateState is used to store the MergingDigest state for percentile estimation.
6060
*/
61-
protected static class PercentileApproximateState extends AVLTreeDigest
61+
protected static class PercentileApproximateState extends MergingDigest
6262
implements AggregationState {
63-
// The compression level for the AVLTreeDigest, keep the same default value as OpenSearch core.
63+
// The compression level for the MergingDigest, keep the same default value as OpenSearch core.
6464
public static final double DEFAULT_COMPRESSION = 100.0;
6565
private final double percent;
6666

core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import static org.opensearch.sql.expression.DSL.ref;
2828

2929
import com.google.common.collect.ImmutableMap;
30+
import java.util.ArrayList;
3031
import java.util.Collections;
3132
import java.util.LinkedHashMap;
3233
import java.util.List;
@@ -401,6 +402,17 @@ void visit_in() {
401402
() -> analyze(AstDSL.in(field("integer_value"), Collections.emptyList())));
402403
}
403404

405+
@Test
406+
void visit_in_large_list() {
407+
List<UnresolvedExpression> ints = new ArrayList<>();
408+
for (int i = 0; i < 10000; i++) {
409+
ints.add(intLiteral(i));
410+
}
411+
412+
// Shouldn't crash
413+
analyze(AstDSL.in(field("integer_value"), ints));
414+
}
415+
404416
@Test
405417
void multi_match_expression() {
406418
assertAnalyzeEqual(

core/src/test/java/org/opensearch/sql/expression/aggregation/PercentileApproxAggregatorTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,21 +68,21 @@ public void test_percentile_field_expression_with_user_defined_compression() {
6868
aggregation(
6969
DSL.percentile(DSL.ref("integer_value", INTEGER), DSL.literal(50), DSL.literal(0.1)),
7070
tuples);
71-
assertEquals(2.5, result.value());
71+
assertEquals(3.0, result.value());
7272
result =
7373
aggregation(
7474
DSL.percentile(DSL.ref("long_value", LONG), DSL.literal(50), DSL.literal(0.1)), tuples);
75-
assertEquals(2.5, result.value());
75+
assertEquals(3.0, result.value());
7676
result =
7777
aggregation(
7878
DSL.percentile(DSL.ref("double_value", DOUBLE), DSL.literal(50), DSL.literal(0.1)),
7979
tuples);
80-
assertEquals(2.5, result.value());
80+
assertEquals(3.0, result.value());
8181
result =
8282
aggregation(
8383
DSL.percentile(DSL.ref("float_value", FLOAT), DSL.literal(50), DSL.literal(0.1)),
8484
tuples);
85-
assertEquals(2.5, result.value());
85+
assertEquals(3.0, result.value());
8686
}
8787

8888
@Test

docs/user/dql/aggregations.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ Description
378378

379379
Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Returns the approximate percentile value of `expr` at the specified percentage. `percent` must be a constant between 0 and 100.
380380

381+
Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref `issue link <https://github.com/opensearch-project/OpenSearch/issues/18122>`_.
382+
381383
Example::
382384

383385
os> SELECT gender, percentile(age, 90) as p90 FROM accounts GROUP BY gender;

docs/user/ppl/cmd/stats.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Return the
309309

310310
* percent: The number must be a constant between 0 and 100.
311311

312+
Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref `issue link <https://github.com/opensearch-project/OpenSearch/issues/18122>`_.
313+
312314
Example::
313315

314316
os> source=accounts | stats percentile(age, 90) by gender;

integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ public void testSortPushDownExplain() throws Exception {
9595

9696
@Test
9797
public void testLimitPushDownExplain() throws Exception {
98-
// TODO fix after https://github.com/opensearch-project/sql/issues/3381
9998
String expected =
10099
isCalciteEnabled()
101100
? loadFromFile("expectedOutput/calcite/explain_limit_push.json")
@@ -110,6 +109,104 @@ public void testLimitPushDownExplain() throws Exception {
110109
+ "| fields ageMinus"));
111110
}
112111

112+
@Test
113+
public void testLimitWithFilterPushdownExplain() throws Exception {
114+
String expectedFilterThenLimit =
115+
isCalciteEnabled()
116+
? loadFromFile("expectedOutput/calcite/explain_filter_then_limit_push.json")
117+
: loadFromFile("expectedOutput/ppl/explain_filter_then_limit_push.json");
118+
assertJsonEqualsIgnoreId(
119+
expectedFilterThenLimit,
120+
explainQueryToString(
121+
"source=opensearch-sql_test_index_account"
122+
+ "| where age > 30 "
123+
+ "| head 5 "
124+
+ "| fields age"));
125+
126+
// The filter in limit-then-filter queries should not be pushed since the current DSL will
127+
// execute it as filter-then-limit
128+
String expectedLimitThenFilter =
129+
isCalciteEnabled()
130+
? loadFromFile("expectedOutput/calcite/explain_limit_then_filter_push.json")
131+
: loadFromFile("expectedOutput/ppl/explain_limit_then_filter_push.json");
132+
assertJsonEqualsIgnoreId(
133+
expectedLimitThenFilter,
134+
explainQueryToString(
135+
"source=opensearch-sql_test_index_account"
136+
+ "| head 5 "
137+
+ "| where age > 30 "
138+
+ "| fields age"));
139+
}
140+
141+
@Test
142+
public void testMultipleLimitExplain() throws Exception {
143+
String expected5Then10 =
144+
isCalciteEnabled()
145+
? loadFromFile("expectedOutput/calcite/explain_limit_5_10_push.json")
146+
: loadFromFile("expectedOutput/ppl/explain_limit_5_10_push.json");
147+
assertJsonEqualsIgnoreId(
148+
expected5Then10,
149+
explainQueryToString(
150+
"source=opensearch-sql_test_index_account"
151+
+ "| head 5 "
152+
+ "| head 10 "
153+
+ "| fields age"));
154+
155+
String expected10Then5 =
156+
isCalciteEnabled()
157+
? loadFromFile("expectedOutput/calcite/explain_limit_10_5_push.json")
158+
: loadFromFile("expectedOutput/ppl/explain_limit_10_5_push.json");
159+
assertJsonEqualsIgnoreId(
160+
expected10Then5,
161+
explainQueryToString(
162+
"source=opensearch-sql_test_index_account"
163+
+ "| head 10 "
164+
+ "| head 5 "
165+
+ "| fields age"));
166+
167+
String expected10from1then10from2 =
168+
isCalciteEnabled()
169+
? loadFromFile("expectedOutput/calcite/explain_limit_10from1_10from2_push.json")
170+
: loadFromFile("expectedOutput/ppl/explain_limit_10from1_10from2_push.json");
171+
assertJsonEqualsIgnoreId(
172+
expected10from1then10from2,
173+
explainQueryToString(
174+
"source=opensearch-sql_test_index_account"
175+
+ "| head 10 from 1 "
176+
+ "| head 10 from 2 "
177+
+ "| fields age"));
178+
179+
// The second limit should not be pushed down for limit-filter-limit queries
180+
String expected10ThenFilterThen5 =
181+
isCalciteEnabled()
182+
? loadFromFile("expectedOutput/calcite/explain_limit_10_filter_5_push.json")
183+
: loadFromFile("expectedOutput/ppl/explain_limit_10_filter_5_push.json");
184+
assertJsonEqualsIgnoreId(
185+
expected10ThenFilterThen5,
186+
explainQueryToString(
187+
"source=opensearch-sql_test_index_account"
188+
+ "| head 10 "
189+
+ "| where age > 30 "
190+
+ "| head 5 "
191+
+ "| fields age"));
192+
}
193+
194+
@Test
195+
public void testLimitWithMultipleOffsetPushdownExplain() throws Exception {
196+
String expected =
197+
isCalciteEnabled()
198+
? loadFromFile("expectedOutput/calcite/explain_limit_offsets_push.json")
199+
: loadFromFile("expectedOutput/ppl/explain_limit_offsets_push.json");
200+
201+
assertJsonEqualsIgnoreId(
202+
expected,
203+
explainQueryToString(
204+
"source=opensearch-sql_test_index_account"
205+
+ "| head 10 from 1 "
206+
+ "| head 5 from 2 "
207+
+ "| fields age"));
208+
}
209+
113210
@Test
114211
public void testFillNullPushDownExplain() throws Exception {
115212
String expected =

integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,9 +436,17 @@ public void testStatsPercentileWithNull() throws IOException {
436436
public void testStatsPercentileWithCompression() throws IOException {
437437
JSONObject response =
438438
executeQuery(
439-
String.format("source=%s | stats percentile(balance, 50, 1)", TEST_INDEX_BANK));
440-
verifySchema(response, schema("percentile(balance, 50, 1)", null, "bigint"));
439+
String.format("source=%s | stats percentile(balance, 50, 20)", TEST_INDEX_BANK));
440+
verifySchema(response, schema("percentile(balance, 50, 20)", null, "bigint"));
441441
verifyDataRows(response, rows(32838));
442+
443+
// disable pushdown by adding a eval command
444+
JSONObject responsePushdownDisabled =
445+
executeQuery(
446+
String.format(
447+
"source=%s | eval a = 1 | stats percentile(balance, 50, 20)", TEST_INDEX_BANK));
448+
verifySchema(responsePushdownDisabled, schema("percentile(balance, 50, 20)", null, "bigint"));
449+
verifyDataRows(responsePushdownDisabled, rows(32838));
442450
}
443451

444452
@Test
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalProject(age=[$8])\n LogicalSort(fetch=[5])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($8, 30), LIMIT->5, PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n"
5+
}
6+
}

0 commit comments

Comments
 (0)