Skip to content

Commit 3847fbd

Browse files
authored
[Backport 2.19-dev] Support format=yaml in Explain API (opensearch-project#4446) (opensearch-project#4609)
* Support format=yaml in Explain API (opensearch-project#4446) --------- Signed-off-by: Peng Huo <penghuo@gmail.com> * Fix compile issue in main (opensearch-project#4608) Signed-off-by: Peng Huo <penghuo@gmail.com> * Update Signed-off-by: Peng Huo <penghuo@gmail.com> * update doc Signed-off-by: Peng Huo <penghuo@gmail.com> * Update Signed-off-by: Peng Huo <penghuo@gmail.com> --------- Signed-off-by: Peng Huo <penghuo@gmail.com>
1 parent 2c41580 commit 3847fbd

17 files changed

Lines changed: 461 additions & 242 deletions

File tree

core/src/main/java/org/opensearch/sql/ast/statement/Explain.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ public enum ExplainFormat {
4242
SIMPLE,
4343
STANDARD,
4444
EXTENDED,
45-
COST
45+
COST,
46+
/** Formats explain output in yaml format. */
47+
YAML
4648
}
4749

4850
public static ExplainFormat format(String format) {

core/src/main/java/org/opensearch/sql/executor/ExecutionEngine.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,22 @@ public boolean equals(Object o) {
109109
public int hashCode() {
110110
return Objects.hash(root, calcite);
111111
}
112+
113+
public static ExplainResponse normalizeLf(ExplainResponse response) {
114+
ExecutionEngine.ExplainResponseNodeV2 calcite = response.getCalcite();
115+
if (calcite != null) {
116+
return new ExplainResponse(
117+
new ExecutionEngine.ExplainResponseNodeV2(
118+
normalizeLf(calcite.getLogical()),
119+
normalizeLf(calcite.getPhysical()),
120+
normalizeLf(calcite.getExtended())));
121+
}
122+
return response;
123+
}
124+
125+
private static String normalizeLf(String value) {
126+
return value == null ? null : value.replace("\r\n", "\n");
127+
}
112128
}
113129

114130
@AllArgsConstructor

core/src/main/java/org/opensearch/sql/executor/QueryService.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ public void explainWithLegacy(
201201
Explain.ExplainFormat format,
202202
Optional<Throwable> calciteFailure) {
203203
try {
204-
if (format != null && format != Explain.ExplainFormat.STANDARD) {
204+
if (format != null
205+
&& (format != Explain.ExplainFormat.STANDARD && format != Explain.ExplainFormat.YAML)) {
205206
throw new UnsupportedOperationException(
206207
"Explain mode " + format.name() + " is not supported in v2 engine");
207208
}

core/src/main/java/org/opensearch/sql/utils/YamlFormatter.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55

66
package org.opensearch.sql.utils;
77

8+
import com.fasterxml.jackson.annotation.JsonInclude;
89
import com.fasterxml.jackson.core.JsonProcessingException;
910
import com.fasterxml.jackson.databind.ObjectMapper;
11+
import com.fasterxml.jackson.databind.SerializationFeature;
1012
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
1113
import com.fasterxml.jackson.dataformat.yaml.YAMLGenerator;
1214

@@ -21,11 +23,16 @@ public class YamlFormatter {
2123
static {
2224
YAMLFactory yamlFactory = new YAMLFactory();
2325
yamlFactory.disable(YAMLGenerator.Feature.WRITE_DOC_START_MARKER);
26+
yamlFactory.enable(YAMLGenerator.Feature.USE_PLATFORM_LINE_BREAKS);
27+
yamlFactory.enable(YAMLGenerator.Feature.LITERAL_BLOCK_STYLE);
2428
yamlFactory.enable(YAMLGenerator.Feature.MINIMIZE_QUOTES); // Enable smart quoting
2529
yamlFactory.enable(
2630
YAMLGenerator.Feature.ALWAYS_QUOTE_NUMBERS_AS_STRINGS); // Quote numeric strings
2731
yamlFactory.enable(YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR);
2832
YAML_MAPPER = new ObjectMapper(yamlFactory);
33+
34+
YAML_MAPPER.setSerializationInclusion(JsonInclude.Include.NON_NULL);
35+
YAML_MAPPER.configure(SerializationFeature.WRITE_NULL_MAP_VALUES, false);
2936
}
3037

3138
/**

docs/category.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
{
22
"bash": [
3-
"user/ppl/interfaces/endpoint.rst",
4-
"user/ppl/interfaces/protocol.rst",
53
"user/optimization/optimization.rst",
64
"user/admin/settings.rst"
75
],
8-
"ppl_cli": [
6+
"bash_calcite": [
7+
"user/ppl/interfaces/endpoint.rst",
8+
"user/ppl/interfaces/protocol.rst"
99
],
1010
"sql_cli": [
1111
"user/dql/expressions.rst",

docs/user/ppl/interfaces/endpoint.rst

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -73,28 +73,79 @@ Description
7373

7474
You can send HTTP explain request to endpoint **/_plugins/_ppl/_explain** with your query in request body to understand the execution plan for the PPL query. The explain endpoint is useful when user want to get insight how the query is executed in the engine.
7575

76-
Example
77-
-------
76+
Description
77+
-----------
78+
79+
To translate your query, send it to explain endpoint. The explain output is OpenSearch domain specific language (DSL) in JSON format. You can just copy and paste it to your console to run it against OpenSearch directly.
80+
81+
Explain output could be set different formats: ``standard`` (the default format), ``simple``, ``extended``, ``dsl``.
82+
83+
84+
Example 1 default (standard) format
85+
-----------------------------------
7886

79-
The following PPL query demonstrated that where and stats command were pushed down to OpenSearch DSL aggregation query::
87+
Explain query::
8088

8189
sh$ curl -sS -H 'Content-Type: application/json' \
8290
... -X POST localhost:9200/_plugins/_ppl/_explain \
83-
... -d '{"query" : "source=accounts | where age > 10 | stats avg(age)"}'
91+
... -d '{"query" : "source=state_country | where age>30 | fields age"}'
8492
{
85-
"root": {
86-
"name": "ProjectOperator",
87-
"description": {
88-
"fields": "[avg(age)]"
89-
},
90-
"children": [
91-
{
92-
"name": "OpenSearchIndexScan",
93-
"description": {
94-
"request": "OpenSearchQueryRequest(indexName=accounts, sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":10,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"aggregations\":{\"avg(age)\":{\"avg\":{\"field\":\"age\"}}}}, searchDone=false)"
95-
},
96-
"children": []
97-
}
98-
]
93+
"calcite": {
94+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n",
95+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[age], FILTER->>($0, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
9996
}
10097
}
98+
99+
Example 2 simple format
100+
-----------------------
101+
102+
Explain query::
103+
104+
sh$ curl -sS -H 'Content-Type: application/json' \
105+
... -X POST localhost:9200/_plugins/_ppl/_explain?format=simple \
106+
... -d '{"query" : "source=state_country | where age>30 | fields age"}'
107+
{
108+
"calcite": {
109+
"logical": "LogicalSystemLimit\n LogicalProject\n LogicalFilter\n CalciteLogicalIndexScan\n"
110+
}
111+
}
112+
113+
Example 3 extended format
114+
-------------------------
115+
116+
Explain query::
117+
118+
sh$ curl -sS -H 'Content-Type: application/json' \
119+
... -X POST localhost:9200/_plugins/_ppl/_explain?format=extended \
120+
... -d '{"query" : "source=state_country | where age>30 | fields age | dedup age"}'
121+
{
122+
"calcite": {
123+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(age=[$0])\n LogicalFilter(condition=[<=($1, 1)])\n LogicalProject(age=[$0], _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0 ORDER BY $0)])\n LogicalFilter(condition=[IS NOT NULL($0)])\n LogicalProject(age=[$5])\n LogicalFilter(condition=[>($5, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])\n",
124+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..1=[{inputs}], expr#2=[1], expr#3=[<=($t1, $t2)], age=[$t0], $condition=[$t3])\n EnumerableWindow(window#0=[window(partition {0} order by [0] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[age], FILTER->>($0, 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n",
125+
"extended": "public org.apache.calcite.linq4j.Enumerable bind(final org.apache.calcite.DataContext root) {\n final org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan v1stashed = (org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableIndexScan) root.get(\"v1stashed\");\n int prevStart;\n int prevEnd;\n final java.util.Comparator comparator = new java.util.Comparator(){\n public int compare(Long v0, Long v1) {\n final int c;\n c = org.apache.calcite.runtime.Utilities.compareNullsLast(v0, v1);\n if (c != 0) {\n return c;\n }\n return 0;\n }\n\n public int compare(Object o0, Object o1) {\n return this.compare((Long) o0, (Long) o1);\n }\n\n };\n final org.apache.calcite.runtime.SortedMultiMap multiMap = new org.apache.calcite.runtime.SortedMultiMap();\n v1stashed.scan().foreach(new org.apache.calcite.linq4j.function.Function1() {\n public Object apply(Long v) {\n Long key = v;\n multiMap.putMulti(key, v);\n return null;\n }\n public Object apply(Object v) {\n return apply(\n (Long) v);\n }\n }\n );\n final java.util.Iterator iterator = multiMap.arrays(comparator);\n final java.util.ArrayList _list = new java.util.ArrayList(\n multiMap.size());\n Long a0w0 = (Long) null;\n while (iterator.hasNext()) {\n final Object[] _rows = (Object[]) iterator.next();\n prevStart = -1;\n prevEnd = 2147483647;\n for (int i = 0; i < _rows.length; ++i) {\n if (i != prevEnd) {\n int actualStart = i < prevEnd ? 0 : prevEnd + 1;\n prevEnd = i;\n a0w0 = Long.valueOf(((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown((i - 0 + 1))).longValue());\n }\n _list.add(new Object[] {\n (Long) _rows[i],\n a0w0});\n }\n }\n multiMap.clear();\n final org.apache.calcite.linq4j.Enumerable _inputEnumerable = org.apache.calcite.linq4j.Linq4j.asEnumerable(_list);\n final org.apache.calcite.linq4j.AbstractEnumerable child = new org.apache.calcite.linq4j.AbstractEnumerable(){\n public org.apache.calcite.linq4j.Enumerator enumerator() {\n return new org.apache.calcite.linq4j.Enumerator(){\n public final org.apache.calcite.linq4j.Enumerator inputEnumerator = _inputEnumerable.enumerator();\n public void reset() {\n inputEnumerator.reset();\n }\n\n public boolean moveNext() {\n while (inputEnumerator.moveNext()) {\n if (org.apache.calcite.runtime.SqlFunctions.toLong(((Object[]) inputEnumerator.current())[1]) <= $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b) {\n return true;\n }\n }\n return false;\n }\n\n public void close() {\n inputEnumerator.close();\n }\n\n public Object current() {\n return (Long) ((Object[]) inputEnumerator.current())[0];\n }\n\n static final long $L4J$C$_Number_org_apache_calcite_linq4j_tree_Primitive_of_long_class_358aa52b = ((Number)org.apache.calcite.linq4j.tree.Primitive.of(long.class).numberValueRoundDown(1)).longValue();\n };\n }\n\n };\n return child.take(10000);\n}\n\n\npublic Class getElementType() {\n return java.lang.Long.class;\n}\n\n\n"
126+
}
127+
}
128+
129+
Example 4 YAML format (experimental)
130+
-----------------------------------
131+
132+
.. note::
133+
YAML explain output is an experimental feature and not intended for
134+
production use. The interface and output may change without notice.
135+
136+
Return Explain response format in In ``yaml`` format.
137+
138+
Explain query::
139+
140+
sh$ curl -sS -H 'Content-Type: application/json' \
141+
... -X POST localhost:9200/_plugins/_ppl/_explain?format=yaml \
142+
... -d '{"query" : "source=state_country | where age>30 | fields age"}'
143+
calcite:
144+
logical: |
145+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
146+
LogicalProject(age=[$5])
147+
LogicalFilter(condition=[>($5, 30)])
148+
CalciteLogicalIndexScan(table=[[OpenSearch, state_country]])
149+
physical: |
150+
CalciteEnumerableIndexScan(table=[[OpenSearch, state_country]], PushDownContext=[[PROJECT->[age], FILTER->>($0, 30), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"age":{"from":30,"to":null,"include_lower":false,"include_upper":true,"boost":1.0}}},"_source":{"includes":["age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
151+

0 commit comments

Comments
 (0)