Skip to content

Commit ff97db0

Browse files
github-actions[bot]noChargeraaarone90coderabbitai[bot]
committed
Add Frequently Used Big5 PPL Queries (#4976)
* Add frequent used queries Signed-off-by: Louis Chu <lingzhichu.clz@gmail.com> * Add new queries to CalcitePPLBig5IT Signed-off-by: Louis Chu <lingzhichu.clz@gmail.com> * Add frequently used Big5 PPL queries Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Update integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Aaron Alvarez <900908alvarezaaron@gmail.com> * Update integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Aaron Alvarez <900908alvarezaaron@gmail.com> * Update integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Aaron Alvarez <900908alvarezaaron@gmail.com> * Update integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Aaron Alvarez <900908alvarezaaron@gmail.com> * Addressing naming difference issues Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Fixing formatting issues Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Fixing integration tests Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Removing dedup Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Adding comment to rex ppl file to explain what the query does Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Adding comments to ppl queries to explain what they do Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Fixing comments Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Empty commit to trigger CI Signed-off-by: Aaron Alvarez <aaarone@amazon.com> * Removing correctness check for rex command Signed-off-by: Aaron Alvarez <aaarone@amazon.com> --------- Signed-off-by: Louis Chu <lingzhichu.clz@gmail.com> Signed-off-by: Aaron Alvarez <aaarone@amazon.com> Signed-off-by: Aaron Alvarez <900908alvarezaaron@gmail.com> Co-authored-by: Louis Chu <lingzhichu.clz@gmail.com> Co-authored-by: Aaron Alvarez <aaarone@amazon.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> (cherry picked from commit b66dc12) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 7b9f657 commit ff97db0

7 files changed

Lines changed: 157 additions & 2 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/big5/CalcitePPLBig5IT.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,36 @@ public void coalesce_nonexistent_field_fallback() throws IOException {
4545
timing(summary, "coalesce_nonexistent_field_fallback", ppl);
4646
}
4747

48+
/**
49+
* Tests regex-based field extraction and transformation using rex command. Validates that the
50+
* Calcite plan correctly handles regex patterns.
51+
*/
52+
@Test
53+
public void rex_regex_transformation() throws IOException {
54+
String ppl = sanitize(loadExpectedQuery("rex_regex_transformation.ppl"));
55+
timing(summary, "rex_regex_transformation", ppl);
56+
}
57+
58+
/**
59+
* Tests LIKE pattern matching with aggregation using script engine. Validates filtering by
60+
* message content and grouping results.
61+
*/
62+
@Test
63+
public void script_engine_like_pattern_with_aggregation() throws IOException {
64+
String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_aggregation.ppl"));
65+
timing(summary, "script_engine_like_pattern_with_aggregation", ppl);
66+
}
67+
68+
/**
69+
* Tests LIKE pattern matching with sorting and result limiting. Validates filtering by message
70+
* content with timestamp ordering.
71+
*/
72+
@Test
73+
public void script_engine_like_pattern_with_sort() throws IOException {
74+
String ppl = sanitize(loadExpectedQuery("script_engine_like_pattern_with_sort.ppl"));
75+
timing(summary, "script_engine_like_pattern_with_sort", ppl);
76+
}
77+
4878
/** Tests deduplication by metrics.size field with sorting by timestamp. */
4979
@Test
5080
public void dedup_metrics_size_field() throws IOException {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,11 @@ public void testCountBySpanForCustomFormats() throws IOException {
738738
public void testSpanByImplicitTimestamp() throws IOException {
739739
JSONObject result = executeQuery("source=big5 | stats count() by span(1d) as span");
740740
verifySchema(result, schema("count()", "bigint"), schema("span", "timestamp"));
741-
verifyDataRows(result, rows(1, "2023-01-02 00:00:00"));
741+
verifyDataRows(
742+
result,
743+
rows(1, "2023-01-02 00:00:00"),
744+
rows(1, "2023-03-01 00:00:00"),
745+
rows(1, "2023-05-01 00:00:00"));
742746

743747
Throwable t =
744748
assertThrowsWithReplace(

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public void testIsNotNull() throws IOException {
9696
public void testIsNotNullWithStruct() throws IOException {
9797
JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws");
9898
verifySchema(actual, schema("aws", "struct"));
99-
verifyNumOfRows(actual, 1);
99+
verifyNumOfRows(actual, 3);
100100
}
101101

102102
@Test
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
{"index":{}}
22
{"message":"2023-04-30T21:48:56.160Z Apr 30 21:48:56 ip-66-221-134-40 journal: donkey glazer fly shark whip servant thornfalcon","process":{"name":"journal"},"aws.cloudwatch":{"ingestion_time":"2023-04-30T21:48:56.160Z","log_group":"/var/log/messages","log_stream":"luckcrafter"},"tags":["preserve_original_event"],"meta":{"file":"2023-01-02/1682891301-gotext.ndjson.gz"},"cloud":{"region":"eu-central-1"},"@timestamp":"2023-01-02T22:02:34.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"tmin":849,"size":1981},"log.file.path":"/var/log/messages/luckcrafter","event":{"id":"sunsetmark","dataset":"generic","ingested":"2023-07-20T03:36:30.223806Z"},"agent":{"id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","name":"fancydancer","ephemeral_id":"c315dc22-3ea6-44dc-8d56-fd02f675367b","type":"filebeat","version":"8.8.0"}}
3+
{"index":{}}
4+
{"message":"2024-04-11T18:00:10.965Z Apr 11 18:00:10 ip-32-11-43-93 sshd: cloak bolt thorn hugger rib jackal wolverine shaker boar fighter taker boulderfox","process":{"name":"sshd"},"aws.cloudwatch":{"log_stream":"mirrorlighter","ingestion_time":"2024-04-11T18:00:10.965Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712851210-sshd.ndjson.gz"},"cloud":{"region":"ap-southeast-3"},"@timestamp":"2023-05-01T21:59:58.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3166,"tmin":1},"log.file.path":"/var/log/messages/mirrorlighter","event":{"id":"patternantler","ingested":"2024-04-11T17:39:10.965818973Z","dataset":"generic"},"agent":{"id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5","name":"brindlehugger","type":"filebeat","version":"8.8.0","ephemeral_id":"c79a289f-6c16-4de2-a6c8-8ee5c84473d5"}}
5+
{"index":{}}
6+
{"message":"2024-04-11T10:15:01.628Z Apr 11 10:15:01 ip-95-21-51-112 kernel: kicker stinger slave dolphin sparkox","process":{"name":"kernel"},"aws.cloudwatch":{"log_stream":"plumebard","ingestion_time":"2024-04-11T10:15:01.628Z","log_group":"/var/log/messages"},"tags":["preserve_original_event"],"meta":{"file":"2024-04-11/1712826901-kernel.ndjson.gz"},"cloud":{"region":"ap-south-1"},"@timestamp":"2023-03-01T22:31:11.000Z","input":{"type":"aws-cloudwatch"},"metrics":{"size":3993,"tmin":1},"log.file.path":"/var/log/messages/plumebard","event":{"id":"chipgambler","ingested":"2024-04-11T10:09:29.628941177Z","dataset":"generic"},"agent":{"id":"5f25fa16-6a99-489f-b1c5-f27c0627a459","name":"lemongrabber","type":"filebeat","version":"8.8.0","ephemeral_id":"5f25fa16-6a99-489f-b1c5-f27c0627a459"}}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/* Extract log type and filename from file paths, calculate filename length, and sort by timestamp */
2+
/*
3+
{
4+
"name": "rex_regex_transformation",
5+
"operation-type": "search",
6+
"index": "{{index_name | default('big5')}}",
7+
"body": {
8+
"query": {
9+
"match_all": {}
10+
},
11+
"_source": {
12+
"includes": ["log.file.path", "@timestamp"],
13+
"excludes": []
14+
},
15+
"sort": [
16+
{
17+
"@timestamp": {
18+
"order": "desc",
19+
"missing": "_last"
20+
}
21+
}
22+
]
23+
}
24+
}
25+
*/
26+
source = big5
27+
| rex field=log.file.path '/var/log/(?<logType>\\w+)/(?<filename>\\w+)'
28+
| eval filename_len = length(filename)
29+
| fields log.file.path, logType, filename, filename_len, @timestamp
30+
| sort - @timestamp
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/* Filter messages containing 'sshd' and aggregate count by metrics.size */
2+
/*
3+
{
4+
"name": "script_engine_like_pattern_with_aggregation",
5+
"operation-type": "search",
6+
"index": "{{index_name | default('custom-big5')}}",
7+
"body": {
8+
"query": {
9+
"script": {
10+
"script": {
11+
"source": "{\"langType\":\"calcite\",\"script\":\"...\"}",
12+
"lang": "opensearch_compounded_script",
13+
"params": {
14+
"utcTimestamp": "{{current_timestamp}}"
15+
}
16+
},
17+
"boost": 1.0
18+
}
19+
},
20+
"_source": {
21+
"includes": ["message", "metrics.size"],
22+
"excludes": []
23+
},
24+
"aggregations": {
25+
"composite_buckets": {
26+
"composite": {
27+
"size": 10000,
28+
"sources": [
29+
{
30+
"metrics.size": {
31+
"terms": {
32+
"field": "metrics.size",
33+
"missing_bucket": true,
34+
"missing_order": "first",
35+
"order": "asc"
36+
}
37+
}
38+
}
39+
]
40+
}
41+
}
42+
}
43+
}
44+
}
45+
*/
46+
source = big5
47+
| where like(`message`, '%sshd%')
48+
| stats count() by metrics.size
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/* Filter messages containing 'sshd', sort by timestamp, and return top 10 results */
2+
/*
3+
{
4+
"name": "script_engine_like_pattern_with_sort",
5+
"operation-type": "search",
6+
"index": "{{index_name | default('big5')}}",
7+
"body": {
8+
"query": {
9+
"script": {
10+
"script": {
11+
"source": "{\"langType\":\"calcite\",\"script\":\"...\"}",
12+
"lang": "opensearch_compounded_script",
13+
"params": {
14+
"utcTimestamp": "{{current_timestamp}}"
15+
}
16+
},
17+
"boost": 1.0
18+
}
19+
},
20+
"size": 10,
21+
"_source": {
22+
"includes": ["agent", "process", "log", "message", "tags", "cloud", "input", "@timestamp", "ecs", "data_stream", "meta", "host", "metrics", "aws", "event"],
23+
"excludes": []
24+
},
25+
"sort": [
26+
{
27+
"@timestamp": {
28+
"order": "desc",
29+
"missing": "_last"
30+
}
31+
}
32+
]
33+
}
34+
}
35+
*/
36+
source = big5
37+
| where like(`message`, '%sshd%')
38+
| sort - @timestamp
39+
| head 10

0 commit comments

Comments
 (0)