Skip to content

Commit 787c700

Browse files
committed
upd
1 parent c7510d4 commit 787c700

File tree

3 files changed

+679
-0
lines changed

3 files changed

+679
-0
lines changed
Lines changed: 397 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,397 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
// ============================================================
19+
// Benchmark: ColumnArrayView vs hand-written array column access
20+
//
21+
// ColumnArrayView (see column_array_view.h) provides a unified interface
22+
// to read array column elements regardless of whether the underlying
23+
// column is Plain, ColumnConst, or ColumnNullable.
24+
//
25+
// This benchmark measures whether ColumnArrayView introduces measurable
26+
// overhead compared to hand-written (direct) array column access code.
27+
//
28+
// Test scenarios:
29+
// 1. Int64 array: sum all elements across all rows
30+
// 2. String array: sum lengths of all elements across all rows
31+
// 3. Const array: same as above but with ColumnConst wrapper
32+
// 4. Nullable array: with outer nullable wrapper
33+
// ============================================================
34+
35+
#include <benchmark/benchmark.h>
36+
37+
#include <cstdint>
38+
#include <string>
39+
40+
#include "core/assert_cast.h"
41+
#include "core/column/column_array.h"
42+
#include "core/column/column_array_view.h"
43+
#include "core/column/column_const.h"
44+
#include "core/column/column_nullable.h"
45+
#include "core/column/column_string.h"
46+
#include "core/column/column_vector.h"
47+
#include "core/data_type/primitive_type.h"
48+
49+
namespace doris {
50+
51+
static constexpr size_t ARR_NUM_ROWS = 4096;
52+
static constexpr size_t ARR_ELEM_PER_ROW = 8;
53+
54+
// ============================================================
55+
// Array column factory helpers
56+
// ============================================================
57+
58+
// Build Array<Nullable(Int64)> with ARR_NUM_ROWS rows, each having ARR_ELEM_PER_ROW elements.
59+
static ColumnPtr make_int64_array_column() {
60+
auto data_col = ColumnInt64::create();
61+
auto null_col = ColumnUInt8::create();
62+
auto offsets = ColumnArray::ColumnOffsets::create();
63+
64+
data_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW);
65+
null_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW);
66+
67+
size_t offset = 0;
68+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
69+
for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) {
70+
data_col->insert_value(static_cast<int64_t>(i * ARR_ELEM_PER_ROW + j + 1));
71+
null_col->insert_value(0);
72+
}
73+
offset += ARR_ELEM_PER_ROW;
74+
offsets->insert_value(offset);
75+
}
76+
77+
auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col));
78+
return ColumnArray::create(std::move(nullable_data), std::move(offsets));
79+
}
80+
81+
// Build Array<Nullable(Int64)> with some null elements (every 5th element is null).
82+
static ColumnPtr make_int64_array_column_with_nulls() {
83+
auto data_col = ColumnInt64::create();
84+
auto null_col = ColumnUInt8::create();
85+
auto offsets = ColumnArray::ColumnOffsets::create();
86+
87+
data_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW);
88+
null_col->reserve(ARR_NUM_ROWS * ARR_ELEM_PER_ROW);
89+
90+
size_t offset = 0;
91+
size_t flat_idx = 0;
92+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
93+
for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) {
94+
data_col->insert_value(static_cast<int64_t>(flat_idx + 1));
95+
null_col->insert_value(flat_idx % 5 == 0 ? 1 : 0);
96+
flat_idx++;
97+
}
98+
offset += ARR_ELEM_PER_ROW;
99+
offsets->insert_value(offset);
100+
}
101+
102+
auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col));
103+
return ColumnArray::create(std::move(nullable_data), std::move(offsets));
104+
}
105+
106+
// Build Array<Nullable(String)> with ARR_NUM_ROWS rows.
107+
static ColumnPtr make_string_array_column() {
108+
auto data_col = ColumnString::create();
109+
auto null_col = ColumnUInt8::create();
110+
auto offsets = ColumnArray::ColumnOffsets::create();
111+
112+
size_t offset = 0;
113+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
114+
for (size_t j = 0; j < ARR_ELEM_PER_ROW; ++j) {
115+
std::string val = "str_" + std::to_string(i * ARR_ELEM_PER_ROW + j);
116+
data_col->insert_data(val.data(), val.size());
117+
null_col->insert_value(0);
118+
}
119+
offset += ARR_ELEM_PER_ROW;
120+
offsets->insert_value(offset);
121+
}
122+
123+
auto nullable_data = ColumnNullable::create(std::move(data_col), std::move(null_col));
124+
return ColumnArray::create(std::move(nullable_data), std::move(offsets));
125+
}
126+
127+
// Wrap with outer Nullable (no rows are actually null, just the wrapper overhead).
128+
static ColumnPtr wrap_nullable(const ColumnPtr& col) {
129+
return ColumnNullable::create(col->assume_mutable(),
130+
ColumnUInt8::create(col->size(), 0));
131+
}
132+
133+
// Wrap as Const.
134+
static ColumnPtr wrap_const(const ColumnPtr& col) {
135+
// Take the first row of the array column, make a 1-row column, then const-expand.
136+
auto single = col->clone_empty();
137+
single->insert_from(*col, 0);
138+
return ColumnConst::create(std::move(single), ARR_NUM_ROWS);
139+
}
140+
141+
// ============================================================
142+
// Hand-written accessor for Array<Nullable(Int64)>
143+
// ============================================================
144+
145+
struct HandwrittenArrayAccessor {
146+
const ColumnArray::Offsets64& offsets;
147+
const ColumnInt64::Container& data;
148+
const NullMap& nested_null_map;
149+
150+
explicit HandwrittenArrayAccessor(const ColumnPtr& col)
151+
: offsets(assert_cast<const ColumnArray&>(*col).get_offsets()),
152+
data(assert_cast<const ColumnInt64&>(
153+
assert_cast<const ColumnNullable&>(
154+
assert_cast<const ColumnArray&>(*col).get_data())
155+
.get_nested_column())
156+
.get_data()),
157+
nested_null_map(assert_cast<const ColumnNullable&>(
158+
assert_cast<const ColumnArray&>(*col).get_data())
159+
.get_null_map_data()) {}
160+
161+
size_t row_begin(size_t row) const { return offsets[row - 1]; }
162+
size_t row_end(size_t row) const { return offsets[row]; }
163+
int64_t value_at(size_t flat_idx) const { return data[flat_idx]; }
164+
bool is_null_at(size_t flat_idx) const { return nested_null_map[flat_idx]; }
165+
};
166+
167+
// ============================================================
168+
// 1. Int64 Plain Array: sum all elements
169+
// ============================================================
170+
171+
static void Handwritten_ArrayInt64_Plain(benchmark::State& state) {
172+
const auto col = make_int64_array_column();
173+
HandwrittenArrayAccessor acc(col);
174+
for (auto _ : state) {
175+
int64_t sum = 0;
176+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
177+
size_t begin = acc.row_begin(i);
178+
size_t end = acc.row_end(i);
179+
for (size_t j = begin; j < end; ++j) {
180+
sum += acc.value_at(j);
181+
}
182+
}
183+
benchmark::DoNotOptimize(sum);
184+
}
185+
}
186+
BENCHMARK(Handwritten_ArrayInt64_Plain)->Unit(benchmark::kNanosecond);
187+
188+
static void ArrayView_ArrayInt64_Plain(benchmark::State& state) {
189+
const auto col = make_int64_array_column();
190+
const auto view = ColumnArrayView<TYPE_BIGINT>::create(col);
191+
for (auto _ : state) {
192+
int64_t sum = 0;
193+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
194+
auto arr = view[i];
195+
for (size_t j = 0; j < arr.size(); ++j) {
196+
sum += arr.value_at(j);
197+
}
198+
}
199+
benchmark::DoNotOptimize(sum);
200+
}
201+
}
202+
BENCHMARK(ArrayView_ArrayInt64_Plain)->Unit(benchmark::kNanosecond);
203+
204+
// ============================================================
205+
// 2. Int64 Array with null elements: sum non-null elements
206+
// ============================================================
207+
208+
static void Handwritten_ArrayInt64_WithNulls(benchmark::State& state) {
209+
const auto col = make_int64_array_column_with_nulls();
210+
HandwrittenArrayAccessor acc(col);
211+
for (auto _ : state) {
212+
int64_t sum = 0;
213+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
214+
size_t begin = acc.row_begin(i);
215+
size_t end = acc.row_end(i);
216+
for (size_t j = begin; j < end; ++j) {
217+
if (!acc.is_null_at(j)) {
218+
sum += acc.value_at(j);
219+
}
220+
}
221+
}
222+
benchmark::DoNotOptimize(sum);
223+
}
224+
}
225+
BENCHMARK(Handwritten_ArrayInt64_WithNulls)->Unit(benchmark::kNanosecond);
226+
227+
static void ArrayView_ArrayInt64_WithNulls(benchmark::State& state) {
228+
const auto col = make_int64_array_column_with_nulls();
229+
const auto view = ColumnArrayView<TYPE_BIGINT>::create(col);
230+
for (auto _ : state) {
231+
int64_t sum = 0;
232+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
233+
auto arr = view[i];
234+
for (size_t j = 0; j < arr.size(); ++j) {
235+
if (!arr.is_null_at(j)) {
236+
sum += arr.value_at(j);
237+
}
238+
}
239+
}
240+
benchmark::DoNotOptimize(sum);
241+
}
242+
}
243+
BENCHMARK(ArrayView_ArrayInt64_WithNulls)->Unit(benchmark::kNanosecond);
244+
245+
// ============================================================
246+
// 3. String Array: sum string lengths
247+
// ============================================================
248+
249+
struct HandwrittenStringArrayAccessor {
250+
const ColumnArray::Offsets64& offsets;
251+
const ColumnString& str_col;
252+
const NullMap& nested_null_map;
253+
254+
explicit HandwrittenStringArrayAccessor(const ColumnPtr& col)
255+
: offsets(assert_cast<const ColumnArray&>(*col).get_offsets()),
256+
str_col(assert_cast<const ColumnString&>(
257+
assert_cast<const ColumnNullable&>(
258+
assert_cast<const ColumnArray&>(*col).get_data())
259+
.get_nested_column())),
260+
nested_null_map(assert_cast<const ColumnNullable&>(
261+
assert_cast<const ColumnArray&>(*col).get_data())
262+
.get_null_map_data()) {}
263+
264+
size_t row_begin(size_t row) const { return offsets[row - 1]; }
265+
size_t row_end(size_t row) const { return offsets[row]; }
266+
StringRef value_at(size_t flat_idx) const { return str_col.get_data_at(flat_idx); }
267+
bool is_null_at(size_t flat_idx) const { return nested_null_map[flat_idx]; }
268+
};
269+
270+
static void Handwritten_ArrayString_Plain(benchmark::State& state) {
271+
const auto col = make_string_array_column();
272+
HandwrittenStringArrayAccessor acc(col);
273+
for (auto _ : state) {
274+
int64_t sum = 0;
275+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
276+
size_t begin = acc.row_begin(i);
277+
size_t end = acc.row_end(i);
278+
for (size_t j = begin; j < end; ++j) {
279+
sum += acc.value_at(j).size;
280+
}
281+
}
282+
benchmark::DoNotOptimize(sum);
283+
}
284+
}
285+
BENCHMARK(Handwritten_ArrayString_Plain)->Unit(benchmark::kNanosecond);
286+
287+
static void ArrayView_ArrayString_Plain(benchmark::State& state) {
288+
const auto col = make_string_array_column();
289+
const auto view = ColumnArrayView<TYPE_STRING>::create(col);
290+
for (auto _ : state) {
291+
int64_t sum = 0;
292+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
293+
auto arr = view[i];
294+
for (size_t j = 0; j < arr.size(); ++j) {
295+
sum += arr.value_at(j).size;
296+
}
297+
}
298+
benchmark::DoNotOptimize(sum);
299+
}
300+
}
301+
BENCHMARK(ArrayView_ArrayString_Plain)->Unit(benchmark::kNanosecond);
302+
303+
// ============================================================
304+
// 4. Const Array: Const(Array<Int64>)
305+
// ============================================================
306+
307+
static void Handwritten_ArrayInt64_Const(benchmark::State& state) {
308+
const auto base = make_int64_array_column();
309+
const auto const_col = wrap_const(base);
310+
// Hand-written: unpack const, then access the single row repeatedly
311+
const auto& inner = assert_cast<const ColumnConst&>(*const_col).get_data_column();
312+
const auto& array_col = assert_cast<const ColumnArray&>(inner);
313+
const auto& arr_offsets = array_col.get_offsets();
314+
const auto& nested_nullable = assert_cast<const ColumnNullable&>(array_col.get_data());
315+
const auto& int_data = assert_cast<const ColumnInt64&>(nested_nullable.get_nested_column()).get_data();
316+
317+
size_t begin = arr_offsets[-1]; // sentinel = 0
318+
size_t end = arr_offsets[0];
319+
320+
for (auto _ : state) {
321+
int64_t sum = 0;
322+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
323+
for (size_t j = begin; j < end; ++j) {
324+
sum += int_data[j];
325+
}
326+
}
327+
benchmark::DoNotOptimize(sum);
328+
}
329+
}
330+
BENCHMARK(Handwritten_ArrayInt64_Const)->Unit(benchmark::kNanosecond);
331+
332+
static void ArrayView_ArrayInt64_Const(benchmark::State& state) {
333+
const auto base = make_int64_array_column();
334+
const auto const_col = wrap_const(base);
335+
const auto view = ColumnArrayView<TYPE_BIGINT>::create(const_col);
336+
for (auto _ : state) {
337+
int64_t sum = 0;
338+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
339+
auto arr = view[i];
340+
for (size_t j = 0; j < arr.size(); ++j) {
341+
sum += arr.value_at(j);
342+
}
343+
}
344+
benchmark::DoNotOptimize(sum);
345+
}
346+
}
347+
BENCHMARK(ArrayView_ArrayInt64_Const)->Unit(benchmark::kNanosecond);
348+
349+
// ============================================================
350+
// 5. Nullable Array: Nullable(Array<Int64>)
351+
// ============================================================
352+
353+
static void Handwritten_ArrayInt64_Nullable(benchmark::State& state) {
354+
const auto base = make_int64_array_column();
355+
const auto nullable_col = wrap_nullable(base);
356+
// Hand-written: unpack nullable
357+
const auto& nullable = assert_cast<const ColumnNullable&>(*nullable_col);
358+
const auto& outer_null_map = nullable.get_null_map_data();
359+
const auto& array_col = assert_cast<const ColumnArray&>(nullable.get_nested_column());
360+
const auto& arr_offsets = array_col.get_offsets();
361+
const auto& nested_nullable = assert_cast<const ColumnNullable&>(array_col.get_data());
362+
const auto& int_data = assert_cast<const ColumnInt64&>(nested_nullable.get_nested_column()).get_data();
363+
364+
for (auto _ : state) {
365+
int64_t sum = 0;
366+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
367+
if (outer_null_map[i]) continue;
368+
size_t begin = arr_offsets[i - 1];
369+
size_t end = arr_offsets[i];
370+
for (size_t j = begin; j < end; ++j) {
371+
sum += int_data[j];
372+
}
373+
}
374+
benchmark::DoNotOptimize(sum);
375+
}
376+
}
377+
BENCHMARK(Handwritten_ArrayInt64_Nullable)->Unit(benchmark::kNanosecond);
378+
379+
static void ArrayView_ArrayInt64_Nullable(benchmark::State& state) {
380+
const auto base = make_int64_array_column();
381+
const auto nullable_col = wrap_nullable(base);
382+
const auto view = ColumnArrayView<TYPE_BIGINT>::create(nullable_col);
383+
for (auto _ : state) {
384+
int64_t sum = 0;
385+
for (size_t i = 0; i < ARR_NUM_ROWS; ++i) {
386+
if (view.is_null_at(i)) continue;
387+
auto arr = view[i];
388+
for (size_t j = 0; j < arr.size(); ++j) {
389+
sum += arr.value_at(j);
390+
}
391+
}
392+
benchmark::DoNotOptimize(sum);
393+
}
394+
}
395+
BENCHMARK(ArrayView_ArrayInt64_Nullable)->Unit(benchmark::kNanosecond);
396+
397+
} // namespace doris

0 commit comments

Comments
 (0)