Skip to content

Commit 154cbc4

Browse files
committed
Merge remote-tracking branch 'origin/main' into issues/399
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
2 parents d7949ef + c6a5fb9 commit 154cbc4

28 files changed

Lines changed: 773 additions & 156 deletions

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,7 @@ public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) {
381381
fields.forEach(
382382
field -> newEnv.define(new Symbol(Namespace.FIELD_NAME, field.toString()), field.type()));
383383

384-
List<Argument> options = node.getArguments();
385-
Integer noOfResults = (Integer) options.get(0).getValue().getValue();
384+
Integer noOfResults = node.getNoOfResults();
386385

387386
return new LogicalRareTopN(child, node.getCommandType(), noOfResults, fields, groupBys);
388387
}

core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,16 @@ public static RareTopN rareTopN(
540540
List<Argument> noOfResults,
541541
List<UnresolvedExpression> groupList,
542542
Field... fields) {
543-
return new RareTopN(input, commandType, noOfResults, Arrays.asList(fields), groupList)
544-
.attach(input);
543+
Integer N =
544+
(Integer)
545+
Argument.ArgumentMap.of(noOfResults)
546+
.getOrDefault("noOfResults", new Literal(10, DataType.INTEGER))
547+
.getValue();
548+
List<Argument> removed =
549+
noOfResults.stream()
550+
.filter(argument -> !argument.getArgName().equals("noOfResults"))
551+
.toList();
552+
return new RareTopN(commandType, N, removed, Arrays.asList(fields), groupList).attach(input);
545553
}
546554

547555
public static Limit limit(UnresolvedPlan input, Integer limit, Integer offset) {

core/src/main/java/org/opensearch/sql/ast/expression/Argument.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
3737
}
3838

3939
/** ArgumentMap is a helper class to get argument value by name. */
40+
@EqualsAndHashCode
41+
@ToString
4042
public static class ArgumentMap {
4143
private final Map<String, Literal> map;
4244

core/src/main/java/org/opensearch/sql/ast/tree/RareTopN.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import com.google.common.collect.ImmutableList;
99
import java.util.List;
10-
import lombok.AllArgsConstructor;
1110
import lombok.EqualsAndHashCode;
1211
import lombok.Getter;
1312
import lombok.RequiredArgsConstructor;
@@ -24,12 +23,11 @@
2423
@ToString
2524
@EqualsAndHashCode(callSuper = false)
2625
@RequiredArgsConstructor
27-
@AllArgsConstructor
2826
public class RareTopN extends UnresolvedPlan {
2927

3028
private UnresolvedPlan child;
3129
private final CommandType commandType;
32-
// arguments: noOfResults: Integer, countField: String, showCount: Boolean
30+
private final Integer noOfResults;
3331
private final List<Argument> arguments;
3432
private final List<Field> fields;
3533
private final List<UnresolvedExpression> groupExprList;
@@ -54,4 +52,10 @@ public enum CommandType {
5452
TOP,
5553
RARE
5654
}
55+
56+
public enum Option {
57+
countField,
58+
showCount,
59+
useNull,
60+
}
5761
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC;
1616
import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC;
1717
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP;
18-
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME;
1918
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_MAIN;
2019
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_SUBSEARCH;
20+
import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_NAME_TOP_RARE;
2121
import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation;
2222
import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall;
2323
import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild;
@@ -910,14 +910,12 @@ private boolean isCountField(RexCall call) {
910910
* @param groupExprList group by expression list
911911
* @param aggExprList aggregate expression list
912912
* @param context CalcitePlanContext
913-
* @param hintBucketNonNull adda bucket nullable hint on LogicalAggregate if set
914913
* @return Pair of (group-by list, field list, aggregate list)
915914
*/
916915
private Pair<List<RexNode>, List<AggCall>> aggregateWithTrimming(
917916
List<UnresolvedExpression> groupExprList,
918917
List<UnresolvedExpression> aggExprList,
919-
CalcitePlanContext context,
920-
boolean hintBucketNonNull) {
918+
CalcitePlanContext context) {
921919
Pair<List<RexNode>, List<AggCall>> resolved =
922920
resolveAttributesForAggregation(groupExprList, aggExprList, context);
923921
List<RexNode> resolvedGroupByList = resolved.getLeft();
@@ -1021,7 +1019,6 @@ private Pair<List<RexNode>, List<AggCall>> aggregateWithTrimming(
10211019
List<String> intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft());
10221020
context.relBuilder.aggregate(
10231021
context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight());
1024-
if (hintBucketNonNull) hintBucketNonNullOnAggregate(context.relBuilder);
10251022
// During aggregation, Calcite projects both input dependencies and output group-by fields.
10261023
// When names conflict, Calcite adds numeric suffixes (e.g., "value0").
10271024
// Apply explicit renaming to restore the intended aliases.
@@ -1030,24 +1027,6 @@ private Pair<List<RexNode>, List<AggCall>> aggregateWithTrimming(
10301027
return Pair.of(reResolved.getLeft(), reResolved.getRight());
10311028
}
10321029

1033-
private void hintBucketNonNullOnAggregate(RelBuilder relBuilder) {
1034-
final RelHint statHits =
1035-
RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build();
1036-
assert relBuilder.peek() instanceof LogicalAggregate
1037-
: "Stats hits should be added to LogicalAggregate";
1038-
relBuilder.hints(statHits);
1039-
relBuilder
1040-
.getCluster()
1041-
.setHintStrategies(
1042-
HintStrategyTable.builder()
1043-
.hintStrategy(
1044-
"stats_args",
1045-
(hint, rel) -> {
1046-
return rel instanceof LogicalAggregate;
1047-
})
1048-
.build());
1049-
}
1050-
10511030
/**
10521031
* Imitates {@code Registrar.registerExpression} of {@link RelBuilder} to derive the output order
10531032
* of group-by keys after aggregation.
@@ -1162,7 +1141,10 @@ private void visitAggregation(Aggregation node, CalcitePlanContext context, bool
11621141
}
11631142

11641143
Pair<List<RexNode>, List<AggCall>> aggregationAttributes =
1165-
aggregateWithTrimming(groupExprList, aggExprList, context, toAddHintsOnAggregate);
1144+
aggregateWithTrimming(groupExprList, aggExprList, context);
1145+
if (toAddHintsOnAggregate) {
1146+
addIgnoreNullBucketHintToAggregate(context);
1147+
}
11661148

11671149
// schema reordering
11681150
List<RexNode> outputFields = context.relBuilder.fields();
@@ -1883,9 +1865,8 @@ public RelNode visitKmeans(Kmeans node, CalcitePlanContext context) {
18831865
@Override
18841866
public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
18851867
visitChildren(node, context);
1886-
1887-
ArgumentMap arguments = ArgumentMap.of(node.getArguments());
1888-
String countFieldName = (String) arguments.get("countField").getValue();
1868+
ArgumentMap argumentMap = ArgumentMap.of(node.getArguments());
1869+
String countFieldName = (String) argumentMap.get(RareTopN.Option.countField.name()).getValue();
18891870
if (context.relBuilder.peek().getRowType().getFieldNames().contains(countFieldName)) {
18901871
throw new IllegalArgumentException(
18911872
"Field `"
@@ -1900,7 +1881,26 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
19001881
groupExprList.addAll(fieldList);
19011882
List<UnresolvedExpression> aggExprList =
19021883
List.of(AstDSL.alias(countFieldName, AstDSL.aggregate("count", null)));
1903-
aggregateWithTrimming(groupExprList, aggExprList, context, false);
1884+
1885+
// if usenull=false, add a isNotNull before Aggregate and the hint to this Aggregate
1886+
Boolean bucketNullable = (Boolean) argumentMap.get(RareTopN.Option.useNull.name()).getValue();
1887+
boolean toAddHintsOnAggregate = false;
1888+
if (!bucketNullable && !groupExprList.isEmpty()) {
1889+
toAddHintsOnAggregate = true;
1890+
// add isNotNull filter before aggregation to filter out null bucket
1891+
List<RexNode> groupByList =
1892+
groupExprList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList();
1893+
context.relBuilder.filter(
1894+
PlanUtils.getSelectColumns(groupByList).stream()
1895+
.map(context.relBuilder::field)
1896+
.map(context.relBuilder::isNotNull)
1897+
.toList());
1898+
}
1899+
aggregateWithTrimming(groupExprList, aggExprList, context);
1900+
1901+
if (toAddHintsOnAggregate) {
1902+
addIgnoreNullBucketHintToAggregate(context);
1903+
}
19041904

19051905
// 2. add a window column
19061906
List<RexNode> partitionKeys = rexVisitor.analyze(node.getGroupExprList(), context);
@@ -1920,26 +1920,46 @@ public RelNode visitRareTopN(RareTopN node, CalcitePlanContext context) {
19201920
List.of(countField),
19211921
WindowFrame.toCurrentRow());
19221922
context.relBuilder.projectPlus(
1923-
context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME));
1923+
context.relBuilder.alias(rowNumberWindowOver, ROW_NUMBER_COLUMN_NAME_TOP_RARE));
19241924

19251925
// 3. filter row_number() <= k in each partition
1926-
Integer N = (Integer) arguments.get("noOfResults").getValue();
1926+
int k = node.getNoOfResults();
19271927
context.relBuilder.filter(
19281928
context.relBuilder.lessThanOrEqual(
1929-
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME), context.relBuilder.literal(N)));
1929+
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE),
1930+
context.relBuilder.literal(k)));
19301931

19311932
// 4. project final output. the default output is group by list + field list
1932-
Boolean showCount = (Boolean) arguments.get("showCount").getValue();
1933+
Boolean showCount = (Boolean) argumentMap.get(RareTopN.Option.showCount.name()).getValue();
19331934
if (showCount) {
1934-
context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME));
1935+
context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE));
19351936
} else {
19361937
context.relBuilder.projectExcept(
1937-
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME),
1938+
context.relBuilder.field(ROW_NUMBER_COLUMN_NAME_TOP_RARE),
19381939
context.relBuilder.field(countFieldName));
19391940
}
19401941
return context.relBuilder.peek();
19411942
}
19421943

1944+
private static void addIgnoreNullBucketHintToAggregate(CalcitePlanContext context) {
1945+
final RelHint statHits =
1946+
RelHint.builder("stats_args").hintOption(Argument.BUCKET_NULLABLE, "false").build();
1947+
assert context.relBuilder.peek() instanceof LogicalAggregate
1948+
: "Stats hits should be added to LogicalAggregate";
1949+
context.relBuilder.hints(statHits);
1950+
context
1951+
.relBuilder
1952+
.getCluster()
1953+
.setHintStrategies(
1954+
HintStrategyTable.builder()
1955+
.hintStrategy(
1956+
"stats_args",
1957+
(hint, rel) -> {
1958+
return rel instanceof LogicalAggregate;
1959+
})
1960+
.build());
1961+
}
1962+
19431963
@Override
19441964
public RelNode visitTableFunction(TableFunction node, CalcitePlanContext context) {
19451965
throw new CalciteUnsupportedException("Table function is unsupported in Calcite");
@@ -2242,7 +2262,7 @@ public RelNode visitTimechart(
22422262
try {
22432263
// Step 1: Initial aggregation - IMPORTANT: order is [spanExpr, byField]
22442264
groupExprList = Arrays.asList(spanExpr, byField);
2245-
aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context, false);
2265+
aggregateWithTrimming(groupExprList, List.of(node.getAggregateFunction()), context);
22462266

22472267
// First rename the timestamp field (2nd to last) to @timestamp
22482268
List<String> fieldNames = context.relBuilder.peek().getRowType().getFieldNames();

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public interface PlanUtils {
6262
/** this is only for dedup command, do not reuse it in other command */
6363
String ROW_NUMBER_COLUMN_FOR_DEDUP = "_row_number_dedup_";
6464

65-
String ROW_NUMBER_COLUMN_NAME = "_row_number_";
65+
String ROW_NUMBER_COLUMN_NAME_TOP_RARE = "_row_number_top_rare_";
6666
String ROW_NUMBER_COLUMN_NAME_MAIN = "_row_number_main_";
6767
String ROW_NUMBER_COLUMN_NAME_SUBSEARCH = "_row_number_subsearch_";
6868

docs/user/ppl/admin/settings.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ The behaviours it controlled includes:
211211

212212
- The default value of argument ``bucket_nullable`` in ``stats`` command. Check `stats command <../cmd/stats.rst>`_ for details.
213213
- The return value of ``divide`` and ``/`` operator. Check `expressions <../functions/expressions.rst>`_ for details.
214+
- The default value of argument ``usenull`` in ``top`` and ``rare`` commands. Check `top command <../cmd/top.rst>`_ and `rare command <../cmd/rare.rst>`_ for details.
214215

215216
Example 1
216217
-------

docs/user/ppl/cmd/rare.rst

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
=============
1+
====
22
rare
3-
=============
3+
====
44

55
.. rubric:: Table of contents
66

@@ -10,13 +10,13 @@ rare
1010

1111

1212
Description
13-
============
13+
===========
1414
| Using ``rare`` command to find the least common tuple of values of all fields in the field list.
1515
1616
**Note**: A maximum of 10 results is returned for each distinct tuple of values of the group-by fields.
1717

1818
Syntax
19-
============
19+
======
2020
rare <field-list> [by-clause]
2121

2222
rare [rare-options] <field-list> [by-clause] ``(available from 3.1.0+)``
@@ -26,10 +26,13 @@ rare [rare-options] <field-list> [by-clause] ``(available from 3.1.0+)``
2626
* rare-options: optional. options for the rare command. Supported syntax is [countfield=<string>] [showcount=<bool>].
2727
* showcount=<bool>: optional. whether to create a field in output that represent a count of the tuple of values. Default value is ``true``.
2828
* countfield=<string>: optional. the name of the field that contains count. Default value is ``'count'``.
29+
* usenull=<bool>: optional (since 3.4.0). whether to output the null value. The default value of ``usenull`` is determined by ``plugins.ppl.syntax.legacy.preferred``:
2930

31+
* When ``plugins.ppl.syntax.legacy.preferred=true``, ``usenull`` defaults to ``true``
32+
* When ``plugins.ppl.syntax.legacy.preferred=false``, ``usenull`` defaults to ``false``
3033

3134
Example 1: Find the least common values in a field
32-
===========================================
35+
==================================================
3336

3437
The example finds least common gender of all the accounts.
3538

@@ -46,7 +49,7 @@ PPL query::
4649

4750

4851
Example 2: Find the least common values organized by gender
49-
====================================================
52+
===========================================================
5053

5154
The example finds least common age of all the accounts group by gender.
5255

@@ -66,12 +69,10 @@ PPL query::
6669
Example 3: Rare command with Calcite enabled
6770
============================================
6871

69-
The example finds least common gender of all the accounts when ``plugins.calcite.enabled`` is true.
70-
7172
PPL query::
7273

73-
PPL> source=accounts | rare gender;
74-
fetched row
74+
os> source=accounts | rare gender;
75+
fetched rows / total rows = 2/2
7576
+--------+-------+
7677
| gender | count |
7778
|--------+-------|
@@ -83,19 +84,47 @@ PPL query::
8384
Example 4: Specify the count field option
8485
=========================================
8586

86-
The example specifies the count field when ``plugins.calcite.enabled`` is true.
87-
8887
PPL query::
8988

90-
PPL> source=accounts | rare countfield='cnt' gender;
91-
fetched row
89+
os> source=accounts | rare countfield='cnt' gender;
90+
fetched rows / total rows = 2/2
9291
+--------+-----+
9392
| gender | cnt |
9493
|--------+-----|
9594
| F | 1 |
9695
| M | 3 |
9796
+--------+-----+
9897

98+
99+
Example 5: Specify the usenull field option
100+
===========================================
101+
102+
PPL query::
103+
104+
os> source=accounts | rare usenull=false email;
105+
fetched rows / total rows = 3/3
106+
+-----------------------+-------+
107+
| email | count |
108+
|-----------------------+-------|
109+
| amberduke@pyrami.com | 1 |
110+
| daleadams@boink.com | 1 |
111+
| hattiebond@netagy.com | 1 |
112+
+-----------------------+-------+
113+
114+
PPL query::
115+
116+
os> source=accounts | rare usenull=true email;
117+
fetched rows / total rows = 4/4
118+
+-----------------------+-------+
119+
| email | count |
120+
|-----------------------+-------|
121+
| null | 1 |
122+
| amberduke@pyrami.com | 1 |
123+
| daleadams@boink.com | 1 |
124+
| hattiebond@netagy.com | 1 |
125+
+-----------------------+-------+
126+
127+
99128
Limitations
100129
===========
101130
The ``rare`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node.

0 commit comments

Comments
 (0)