Skip to content

Commit 90ec8ad

Browse files
authored
[opt](function) speed up md5 with AVX2 batch path (#63484)
Root cause: md5/md5sum evaluated every row through Md5Digest and OpenSSL, which leaves the vectorized string function path dominated by per-row scalar digest setup and hex materialization. Fix: add an AVX2 multi-buffer MD5 helper with scalar fallback, expose a batch hex API, and route single-argument md5/md5sum over ColumnString/ColumnVarbinary through the batch path while keeping multi-argument md5sum and sm3 on the existing digest implementation. test with sql: ```sql SET parallel_pipeline_task_num=1; SET enable_query_cache=false; SELECT SUM(ASCII(SUBSTRING(MD5(CAST(number AS STRING)), 1, 1))) FROM numbers("number" = "50000000"); ``` result: | version | times | avg | median | |---|---:|---:|---:| | upstream/master baseline | 8.59, 10.21, 9.52, 9.93, 8.85s | 9.42s | 9.52s | | after AVX2 batch | 2.83, 2.84, 2.82, 2.79, 2.82s | 2.82s | 2.82s |
1 parent d7d516f commit 90ec8ad

5 files changed

Lines changed: 677 additions & 7 deletions

File tree

be/src/exprs/function/function_string_digest.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
// under the License.
1717

1818
#include <cstddef>
19+
#include <cstring>
1920
#include <string_view>
21+
#include <type_traits>
22+
#include <vector>
2023

2124
#include "common/status.h"
2225
#include "core/assert_cast.h"
@@ -98,6 +101,14 @@ class FunctionStringDigestMulti : public IFunction {
98101
const std::vector<ColumnPtr>& argument_columns,
99102
const std::vector<uint8_t>& is_const, ColumnString::Chars& res_data,
100103
ColumnString::Offsets& res_offset) const {
104+
if constexpr (std::is_same_v<Impl, MD5Sum>) {
105+
if (argument_columns.size() == 1) {
106+
const auto* col = assert_cast<const ColumnType*>(argument_columns[0].get());
107+
vector_execute_single_md5(col, input_rows_count, is_const[0], res_data, res_offset);
108+
return;
109+
}
110+
}
111+
101112
using ObjectData = typename Impl::ObjectData;
102113
for (size_t i = 0; i < input_rows_count; ++i) {
103114
ObjectData digest;
@@ -114,6 +125,42 @@ class FunctionStringDigestMulti : public IFunction {
114125
i, res_data, res_offset);
115126
}
116127
}
128+
129+
template <typename ColumnType>
130+
void vector_execute_single_md5(const ColumnType* col, size_t input_rows_count, bool is_const,
131+
ColumnString::Chars& res_data,
132+
ColumnString::Offsets& res_offset) const {
133+
ColumnString::check_chars_length(input_rows_count * MD5_HEX_LENGTH, input_rows_count);
134+
res_data.resize(input_rows_count * MD5_HEX_LENGTH);
135+
for (size_t i = 0; i < input_rows_count; ++i) {
136+
res_offset[i] = (i + 1) * MD5_HEX_LENGTH;
137+
}
138+
if (input_rows_count == 0) {
139+
return;
140+
}
141+
142+
if (is_const) {
143+
StringRef data_ref = col->get_data_at(0);
144+
const unsigned char* input = reinterpret_cast<const unsigned char*>(data_ref.data);
145+
size_t length = data_ref.size;
146+
char digest[MD5_HEX_LENGTH];
147+
md5_hex_batch(&input, &length, digest, 1);
148+
for (size_t i = 0; i < input_rows_count; ++i) {
149+
std::memcpy(res_data.data() + i * MD5_HEX_LENGTH, digest, MD5_HEX_LENGTH);
150+
}
151+
return;
152+
}
153+
154+
std::vector<const unsigned char*> inputs(input_rows_count);
155+
std::vector<size_t> lengths(input_rows_count);
156+
for (size_t i = 0; i < input_rows_count; ++i) {
157+
StringRef data_ref = col->get_data_at(i);
158+
inputs[i] = reinterpret_cast<const unsigned char*>(data_ref.data);
159+
lengths[i] = data_ref.size;
160+
}
161+
md5_hex_batch(inputs.data(), lengths.data(), reinterpret_cast<char*>(res_data.data()),
162+
input_rows_count);
163+
}
117164
};
118165

119166
class FunctionStringDigestSHA1 : public IFunction {

0 commit comments

Comments
 (0)