Skip to content

Commit aee298d

Browse files
authored
feat: add sort_array benchmark (apache#3758)
1 parent a1922ee commit aee298d

1 file changed

Lines changed: 108 additions & 0 deletions

File tree

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
/**
23+
* Benchmark to measure performance of Comet array expressions. To run this benchmark:
24+
* {{{
25+
* SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometArrayExpressionBenchmark
26+
* }}}
27+
* Results will be written to "spark/benchmarks/CometArrayExpressionBenchmark-**results.txt".
28+
*/
29+
object CometArrayExpressionBenchmark extends CometBenchmarkBase {
30+
31+
private def buildWideIntArrayExpr(width: Int, modulus: Int): String = {
32+
require(width > 0, "width must be positive")
33+
34+
(0 until width)
35+
.map { i =>
36+
val seed = 13 + i * 17
37+
if (i % 11 == 0) {
38+
s"CASE WHEN value % 32 = 0 THEN NULL ELSE CAST((value * $seed + $i) % $modulus AS INT) END"
39+
} else {
40+
s"CAST((value * $seed + $i) % $modulus AS INT)"
41+
}
42+
}
43+
.mkString("array(", ",\n ", ")")
44+
}
45+
46+
private def prepareSortArrayTable(width: Int)(f: => Unit): Unit = {
47+
val intArrayExpr = buildWideIntArrayExpr(width, modulus = width * 32)
48+
withTempPath { dir =>
49+
withTempTable("parquetV1Table") {
50+
prepareTable(
51+
dir,
52+
spark.sql(s"""
53+
SELECT
54+
$intArrayExpr AS int_arr
55+
FROM $tbl
56+
"""))
57+
f
58+
}
59+
}
60+
}
61+
62+
def sortArrayIntAscBenchmark(values: Int, width: Int): Unit = {
63+
prepareSortArrayTable(width) {
64+
runExpressionBenchmark(
65+
s"sort_array int ascending (width=$width)",
66+
values,
67+
"SELECT sort_array(int_arr) FROM parquetV1Table")
68+
}
69+
}
70+
71+
def sortArrayIntDescBenchmark(values: Int, width: Int): Unit = {
72+
prepareSortArrayTable(width) {
73+
runExpressionBenchmark(
74+
s"sort_array int descending (width=$width)",
75+
values,
76+
"SELECT sort_array(int_arr, false) FROM parquetV1Table")
77+
}
78+
}
79+
80+
def sortArrayIntAscFirstElementBenchmark(values: Int, width: Int): Unit = {
81+
prepareSortArrayTable(width) {
82+
runExpressionBenchmark(
83+
s"element_at(sort_array(int_arr), 1) (width=$width)",
84+
values,
85+
"SELECT element_at(sort_array(int_arr), 1) FROM parquetV1Table")
86+
}
87+
}
88+
89+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
90+
val values = 4 * 1024 * 1024
91+
92+
runBenchmarkWithTable("sortArrayIntAsc", values) { v =>
93+
sortArrayIntAscBenchmark(v, width = 16)
94+
}
95+
96+
runBenchmarkWithTable("sortArrayIntDesc", values) { v =>
97+
sortArrayIntDescBenchmark(v, width = 16)
98+
}
99+
100+
runBenchmarkWithTable("sortArrayIntAscWide", values) { v =>
101+
sortArrayIntAscBenchmark(v, width = 32)
102+
}
103+
104+
runBenchmarkWithTable("sortArrayIntAscFirstElement", values) { v =>
105+
sortArrayIntAscFirstElementBenchmark(v, width = 32)
106+
}
107+
}
108+
}

0 commit comments

Comments
 (0)