-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Expand file tree
/
Copy pathpredicate_column.h
More file actions
436 lines (378 loc) · 17.8 KB
/
predicate_column.h
File metadata and controls
436 lines (378 loc) · 17.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <optional>
#include "core/arena.h"
#include "core/column/column.h"
#include "core/column/column_decimal.h"
#include "core/column/column_string.h"
#include "core/column/column_vector.h"
#include "core/data_type/define_primitive_type.h"
#include "core/data_type/primitive_type.h"
#include "core/decimal12.h"
#include "core/string_ref.h"
#include "core/types.h"
#include "core/uint24.h"
namespace doris {
/**
* used to keep predicate column in storage layer
*
* T = predicate column type
*/
template <PrimitiveType Type>
class PredicateColumnType final : public COWHelper<IColumn, PredicateColumnType<Type>> {
private:
PredicateColumnType() = default;
PredicateColumnType(const size_t n) : data(n) {}
PredicateColumnType(const PredicateColumnType& src) : data(src.data.begin(), src.data.end()) {}
friend class COWHelper<IColumn, PredicateColumnType<Type>>;
using T = std::conditional_t<is_string_type(Type), StringRef,
typename PrimitiveTypeTraits<Type>::CppType>;
using ColumnType = typename PrimitiveTypeTraits<Type>::ColumnType;
void insert_string_to_res_column(const uint16_t* sel, size_t sel_size, ColumnString* res_ptr) {
_refs.resize(sel_size);
size_t length = 0;
for (size_t i = 0; i < sel_size; i++) {
uint16_t n = sel[i];
auto& sv = reinterpret_cast<StringRef&>(data[n]);
_refs[i].data = sv.data;
_refs[i].size = sv.size;
length += sv.size;
}
res_ptr->get_offsets().reserve(sel_size + res_ptr->get_offsets().size());
res_ptr->get_chars().reserve(length + res_ptr->get_chars().size());
res_ptr->insert_many_strings_without_reserve(_refs.data(), sel_size);
}
template <PrimitiveType Y, template <PrimitiveType> typename ColumnContainer>
void insert_default_value_res_column(const uint16_t* sel, size_t sel_size,
ColumnContainer<Y>* res_ptr) {
static_assert(std::is_same_v<ColumnContainer<Y>, ColumnType>);
auto& res_data = res_ptr->get_data();
DCHECK(res_data.empty());
// Has to reserve first, could not call resize or reserve after get_end_ptr
// because reserve or resize may change memory block.
size_t org_num = res_data.size();
res_data.reserve(sel_size);
auto* y = (typename PrimitiveTypeTraits<Y>::CppType*)res_data.get_end_ptr();
for (size_t i = 0; i < sel_size; i++) {
y[i] = data[sel[i]];
}
res_data.resize(org_num + sel_size);
}
void insert_byte_to_res_column(const uint16_t* sel, size_t sel_size, IColumn* res_ptr) {
for (size_t i = 0; i < sel_size; i++) {
uint16_t n = sel[i];
char* ch_val = reinterpret_cast<char*>(&data[n]);
res_ptr->insert_data(ch_val, 0);
}
}
void insert_many_default_type(const char* data_ptr, size_t num) {
auto old_size = data.size();
data.resize(old_size + num);
memcpy(reinterpret_cast<void*>(data.data() + old_size), data_ptr, num * sizeof(T));
}
public:
using Self = PredicateColumnType;
using value_type = T;
using Container = PaddedPODArray<value_type>;
size_t size() const override { return data.size(); }
StringRef get_data_at(size_t n) const override {
if constexpr (std::is_same_v<T, StringRef>) {
auto res = reinterpret_cast<const StringRef&>(data[n]);
if constexpr (Type == TYPE_CHAR) {
res.size = strnlen(res.data, res.size);
}
return res;
} else {
throw doris::Exception(
ErrorCode::INTERNAL_ERROR,
"should not call get_data_at in predicate column except for string type");
}
}
void insert_from(const IColumn& src, size_t n) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"should not call insert_from in predicate column");
}
void insert_range_from(const IColumn& src, size_t start, size_t length) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"should not call insert_range_from in predicate column");
}
void insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
const uint32_t* indices_end) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"should not call insert_indices_from in predicate column");
}
void pop_back(size_t n) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"should not call pop_back in predicate column");
}
void update_hash_with_value(size_t n, SipHash& hash) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"should not call update_hash_with_value in predicate column");
}
void insert_string_value(const char* data_ptr, size_t length) {
StringRef sv((char*)data_ptr, length);
data.push_back_without_reserve(sv);
}
// used for int128
void insert_in_copy_way(const char* data_ptr, size_t length) {
T val {};
memcpy(&val, data_ptr, sizeof(val));
data.push_back_without_reserve(val);
}
void insert_default_type(const char* data_ptr, size_t length) {
T* val = (T*)data_ptr;
data.push_back_without_reserve(*val);
}
void insert_data(const char* data_ptr, size_t length) override {
if constexpr (std::is_same_v<T, StringRef>) {
insert_string_value(data_ptr, length);
} else if constexpr (std::is_same_v<T, Int128>) {
insert_in_copy_way(data_ptr, length);
} else {
insert_default_type(data_ptr, length);
}
}
void insert_many_date(const char* data_ptr, size_t num) {
constexpr size_t input_type_size = sizeof(PrimitiveTypeTraits<TYPE_DATE>::StorageFieldType);
static_assert(input_type_size == sizeof(uint24_t));
const auto* input_data_ptr = reinterpret_cast<const uint24_t*>(data_ptr);
auto* res_ptr = reinterpret_cast<VecDateTimeValue*>(data.get_end_ptr());
size_t old_size = data.size();
for (int i = 0; i < num; i++) {
res_ptr[i].set_olap_date(unaligned_load<uint24_t>(&input_data_ptr[i]));
}
data.resize(old_size + num);
}
void insert_many_datetime(const char* data_ptr, size_t num) {
constexpr size_t input_type_size =
sizeof(PrimitiveTypeTraits<TYPE_DATETIME>::StorageFieldType);
static_assert(input_type_size == sizeof(uint64_t));
const auto* input_data_ptr = reinterpret_cast<const uint64_t*>(data_ptr);
auto* res_ptr = reinterpret_cast<VecDateTimeValue*>(data.get_end_ptr());
size_t old_size = data.size();
for (int i = 0; i < num; i++) {
res_ptr[i].from_olap_datetime(input_data_ptr[i]);
}
data.resize(old_size + num);
}
// The logic is same to ColumnDecimal::insert_many_fix_len_data
void insert_many_decimalv2(const char* data_ptr, size_t num) {
size_t old_size = data.size();
data.resize(old_size + num);
auto* target = (DecimalV2Value*)(data.data() + old_size);
for (int i = 0; i < num; i++) {
const char* cur_ptr = data_ptr + sizeof(decimal12_t) * i;
auto int_value = unaligned_load<int64_t>(cur_ptr);
int32_t frac_value = *(int32_t*)(cur_ptr + sizeof(int64_t));
target[i].from_olap_decimal(int_value, frac_value);
}
}
void insert_many_fix_len_data(const char* data_ptr, size_t num) override {
if constexpr (Type == TYPE_DECIMALV2) {
// DecimalV2 is special, its storage is <int64, int32>, but its compute type is <int64,int64>
// should convert here, but it may have some performance lost
insert_many_decimalv2(data_ptr, num);
} else if constexpr (std::is_same_v<T, StringRef>) {
// here is unreachable, just for compilation to be able to pass
} else if constexpr (Type == TYPE_DATE) {
// Datev1 is special, its storage is uint24, but its compute type is actual int64.
insert_many_date(data_ptr, num);
} else if constexpr (Type == TYPE_DATETIME) {
insert_many_datetime(data_ptr, num);
} else {
insert_many_default_type(data_ptr, num);
}
}
void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict,
size_t num, uint32_t /*dict_num*/) override {
if constexpr (std::is_same_v<T, StringRef>) {
for (size_t end_index = start_index + num; start_index < end_index; ++start_index) {
int32_t codeword = data_array[start_index];
insert_string_value(dict[codeword].data, dict[codeword].size);
}
}
}
void insert_many_continuous_binary_data(const char* data_, const uint32_t* offsets,
const size_t num) override {
if (UNLIKELY(num == 0)) {
return;
}
if constexpr (std::is_same_v<T, StringRef>) {
const auto total_mem_size = offsets[num] - offsets[0];
char* destination = _arena.alloc(total_mem_size);
memcpy(destination, data_ + offsets[0], total_mem_size);
size_t org_elem_num = data.size();
data.resize(org_elem_num + num);
auto* data_ptr = &data[org_elem_num];
for (size_t i = 0; i != num; ++i) {
data_ptr[i].data = destination + offsets[i] - offsets[0];
data_ptr[i].size = offsets[i + 1] - offsets[i];
}
DCHECK(data_ptr[num - 1].data + data_ptr[num - 1].size == destination + total_mem_size);
}
}
void insert_many_strings(const StringRef* strings, size_t num) override {
if (num == 0) {
return;
}
if constexpr (std::is_same_v<T, StringRef>) {
size_t total_mem_size = 0;
for (size_t i = 0; i < num; i++) {
total_mem_size += strings[i].size;
}
char* destination = _arena.alloc(total_mem_size);
char* org_dst = destination;
size_t org_elem_num = data.size();
data.resize(org_elem_num + num);
uint64_t fragment_start_offset = 0;
size_t fragment_len = 0;
for (size_t i = 0; i < num; i++) {
data[org_elem_num + i].data = destination + fragment_len;
data[org_elem_num + i].size = strings[i].size;
fragment_len += strings[i].size;
// Compute the largest continuous memcpy block and copy them.
// If this is the last element in data array, then should copy the current memory block.
if (i == num - 1 || strings[i + 1].data != strings[i].data + strings[i].size) {
memcpy(destination, strings[fragment_start_offset].data, fragment_len);
destination += fragment_len;
fragment_start_offset = i == num - 1 ? 0 : i + 1;
fragment_len = 0;
}
}
CHECK(destination - org_dst == total_mem_size)
<< "Copied size not equal to expected size";
} else {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"Method insert_many_binary_data is not supported");
}
}
void insert_default() override { data.push_back(T()); }
void clear() override {
data.clear();
_arena.clear();
}
size_t byte_size() const override { return data.size() * sizeof(T); }
size_t allocated_bytes() const override { return byte_size(); }
bool has_enough_capacity(const IColumn& src) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"has_enough_capacity not supported in PredicateColumnType");
}
void reserve(size_t n) override { data.reserve(n); }
std::string get_name() const override { return type_to_string(Type); }
MutableColumnPtr clone_resized(size_t size) const override {
DCHECK(size == 0);
return this->create();
}
void insert_duplicate_fields(const Field& x, const size_t n) override {
if constexpr (is_string_type(Type)) {
const auto& str = x.get<TYPE_STRING>();
auto* dst = _arena.alloc(str.size() * n);
for (size_t i = 0; i < n; i++) {
memcpy(dst, str.data(), str.size());
insert_string_value(dst, str.size());
dst += str.size();
}
} else if constexpr (Type == TYPE_LARGEINT) {
const auto& v = x.get<TYPE_LARGEINT>();
for (size_t i = 0; i < n; i++) {
insert_in_copy_way(reinterpret_cast<const char*>(&v), sizeof(v));
}
} else {
const auto& v = x.get<Type>();
for (size_t i = 0; i < n; i++) {
insert_default_type(reinterpret_cast<const char*>(&v), sizeof(v));
}
}
}
void insert(const Field& x) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"insert not supported in PredicateColumnType");
}
[[noreturn]] Field operator[](size_t n) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"operator[] not supported in PredicateColumnType");
}
void get(size_t n, Field& res) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get field not supported in PredicateColumnType");
}
[[noreturn]] bool get_bool(size_t n) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get field not supported in PredicateColumnType");
}
[[noreturn]] Int64 get_int(size_t n) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get field not supported in PredicateColumnType");
}
// it's impossible to use ComplexType as key , so we don't have to implement them
[[noreturn]] StringRef serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"serialize_value_into_arena not supported in PredicateColumnType");
}
[[noreturn]] const char* deserialize_and_insert_from_arena(const char* pos) override {
throw doris::Exception(
ErrorCode::INTERNAL_ERROR,
"deserialize_and_insert_from_arena not supported in PredicateColumnType");
}
[[noreturn]] StringRef get_raw_data() const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get_raw_data not supported in PredicateColumnType");
}
[[noreturn]] bool structure_equals(const IColumn& rhs) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"structure_equals not supported in PredicateColumnType");
}
[[noreturn]] ColumnPtr filter(const IColumn::Filter& filt,
ssize_t result_size_hint) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"filter not supported in PredicateColumnType");
}
[[noreturn]] size_t filter(const IColumn::Filter&) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"filter not supported in PredicateColumnType");
}
[[noreturn]] MutableColumnPtr permute(const IColumn::Permutation& perm,
size_t limit) const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"permute not supported in PredicateColumnType");
}
Container& get_data() { return data; }
const Container& get_data() const { return data; }
Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override {
ColumnType* column = assert_cast<ColumnType*>(col_ptr);
if constexpr (is_string_type(Type)) {
insert_string_to_res_column(sel, sel_size, column);
} else if constexpr (Type == TYPE_BOOLEAN) {
insert_byte_to_res_column(sel, sel_size, col_ptr);
} else {
insert_default_value_res_column(sel, sel_size, column);
}
return Status::OK();
}
void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"should not call replace_column_data in predicate column");
}
private:
Container data;
// manages the memory for slice's data(For string type)
Arena _arena;
std::vector<StringRef> _refs;
};
} // namespace doris