Skip to content

Commit 632f791

Browse files
HappenLeeenglefly
andauthored
[Exec](be) Support offset prue column and null column in BE (#61888)
### What problem does this PR solve? Problem Summary: This PR includes two main changes: Add offset-only read optimization support for string, array, and map types in column reader ### Release note - [Storage] Add offset-only read optimization for complex types (string, array, map) to improve read performance ### Check List - Test: BE unit tests passed - Behavior changed: No (materialization fix prevents silent failures, now returns error explicitly) - Does this need documentation: No --------- Co-authored-by: englefly <englefly@gmail.com>
1 parent d5e9518 commit 632f791

9 files changed

Lines changed: 628 additions & 38 deletions

File tree

be/src/core/column/column.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,15 @@ class IColumn : public COW<IColumn> {
266266
"Method insert_many_continuous_binary_data is not supported for " + get_name());
267267
}
268268

269+
/// Insert `num` string entries with real length information but no actual
270+
/// character data. Used by OFFSET_ONLY reading mode where actual string
271+
/// content is not needed but length information must be preserved.
272+
virtual void insert_offsets_from_lengths(const uint32_t* lengths, size_t num) {
273+
throw doris::Exception(
274+
ErrorCode::NOT_IMPLEMENTED_ERROR,
275+
"Method insert_offsets_from_lengths is not supported for " + get_name());
276+
}
277+
269278
virtual void insert_many_strings(const StringRef* strings, size_t num) {
270279
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
271280
"Method insert_many_strings is not supported for " + get_name());

be/src/core/column/column_nullable.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,14 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
185185
get_nested_column().insert_many_continuous_binary_data(data, offsets, num);
186186
}
187187

188+
void insert_offsets_from_lengths(const uint32_t* lengths, size_t num) override {
189+
if (UNLIKELY(num == 0)) {
190+
return;
191+
}
192+
push_false_to_nullmap(num);
193+
get_nested_column().insert_offsets_from_lengths(lengths, num);
194+
}
195+
188196
// Default value in `ColumnNullable` is null
189197
void insert_default() override {
190198
get_nested_column().insert_default();

be/src/core/column/column_string.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,29 @@ class ColumnStr final : public COWHelper<IColumn, ColumnStr<T>> {
276276
sanity_check_simple();
277277
}
278278

279+
// Insert `num` string entries with real length information but no actual
280+
// character data. The `lengths` array provides the byte length of each
281+
// string. Offsets are built with correct cumulative sizes so that
282+
// size_at(i) returns the true string length. The chars buffer is extended
283+
// with zero-filled padding to maintain the invariant chars.size() == offsets.back().
284+
// Used by OFFSET_ONLY reading mode where actual string content is not needed
285+
// but length information must be preserved (e.g., for length() function).
286+
void insert_offsets_from_lengths(const uint32_t* lengths, size_t num) override {
287+
if (UNLIKELY(num == 0)) {
288+
return;
289+
}
290+
const auto old_rows = offsets.size();
291+
// Build cumulative offsets from lengths
292+
offsets.resize(old_rows + num);
293+
auto* offsets_ptr = &offsets[old_rows];
294+
size_t running_offset = offsets[old_rows - 1];
295+
for (size_t i = 0; i < num; ++i) {
296+
running_offset += lengths[i];
297+
offsets_ptr[i] = static_cast<T>(running_offset);
298+
}
299+
chars.resize(offsets[old_rows + num - 1]);
300+
}
301+
279302
void insert_many_strings(const StringRef* strings, size_t num) override {
280303
size_t new_size = 0;
281304
for (size_t i = 0; i < num; i++) {

be/src/storage/segment/binary_dict_page.cpp

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "common/logging.h"
3030
#include "common/status.h"
3131
#include "core/column/column.h"
32+
#include "core/column/column_string.h"
3233
#include "storage/segment/binary_plain_page_v2.h"
3334
#include "storage/segment/bitshuffle_page.h"
3435
#include "storage/segment/encoding_info.h"
@@ -318,11 +319,28 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, MutableColumnPtr& dst) {
318319
_bit_shuffle_ptr->_cur_index));
319320
*n = max_fetch;
320321

321-
const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
322-
size_t start_index = _bit_shuffle_ptr->_cur_index;
322+
if (_options.only_read_offsets) {
323+
// OFFSET_ONLY mode: resolve dict codes to get real string lengths
324+
// without copying actual char data. This allows length() to work.
325+
const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
326+
size_t start_index = _bit_shuffle_ptr->_cur_index;
327+
// Reuse _buffer (int32_t vector) to store uint32_t lengths.
328+
// int32_t and uint32_t have the same size/alignment, and string
329+
// lengths are always non-negative, so the bit patterns are identical.
330+
_buffer.resize(max_fetch);
331+
for (size_t i = 0; i < max_fetch; ++i) {
332+
int32_t codeword = data_array[start_index + i];
333+
_buffer[i] = static_cast<int32_t>(_dict_word_info[codeword].size);
334+
}
335+
dst->insert_offsets_from_lengths(reinterpret_cast<const uint32_t*>(_buffer.data()),
336+
max_fetch);
337+
} else {
338+
const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
339+
size_t start_index = _bit_shuffle_ptr->_cur_index;
323340

324-
dst->insert_many_dict_data(data_array, start_index, _dict_word_info, max_fetch,
325-
_num_dict_items);
341+
dst->insert_many_dict_data(data_array, start_index, _dict_word_info, max_fetch,
342+
_num_dict_items);
343+
}
326344

327345
_bit_shuffle_ptr->_cur_index += max_fetch;
328346

@@ -343,8 +361,32 @@ Status BinaryDictPageDecoder::read_by_rowids(const rowid_t* rowids, ordinal_t pa
343361
return Status::OK();
344362
}
345363

346-
const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
347364
auto total = *n;
365+
366+
if (_options.only_read_offsets) {
367+
// OFFSET_ONLY mode: resolve dict codes to get real string lengths
368+
// without copying actual char data. This allows length() to work correctly.
369+
const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
370+
size_t read_count = 0;
371+
_buffer.resize(total);
372+
for (size_t i = 0; i < total; ++i) {
373+
ordinal_t ord = rowids[i] - page_first_ordinal;
374+
if (ord >= _bit_shuffle_ptr->_num_elements) [[unlikely]] {
375+
break;
376+
}
377+
int32_t codeword = data_array[ord];
378+
_buffer[read_count] = static_cast<int32_t>(_dict_word_info[codeword].size);
379+
read_count++;
380+
}
381+
if (read_count > 0) {
382+
dst->insert_offsets_from_lengths(reinterpret_cast<const uint32_t*>(_buffer.data()),
383+
read_count);
384+
}
385+
*n = read_count;
386+
return Status::OK();
387+
}
388+
389+
const auto* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->get_data(0));
348390
size_t read_count = 0;
349391
_buffer.resize(total);
350392
for (size_t i = 0; i < total; ++i) {

be/src/storage/segment/binary_plain_page.h

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030

3131
#include "common/logging.h"
3232
#include "core/column/column_complex.h"
33-
#include "core/column/column_nullable.h"
3433
#include "storage/olap_common.h"
3534
#include "storage/segment/options.h"
3635
#include "storage/segment/page_builder.h"
@@ -244,6 +243,21 @@ class BinaryPlainPageDecoder : public PageDecoder {
244243
}
245244
const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - _cur_idx));
246245

246+
if (_options.only_read_offsets) {
247+
// OFFSET_ONLY mode: read string lengths from page offset trailer
248+
// without copying actual char data. This allows length() to work.
249+
_offsets.resize(max_fetch);
250+
for (size_t i = 0; i < max_fetch; ++i) {
251+
uint32_t str_start = offset(_cur_idx + i);
252+
uint32_t str_end = offset(_cur_idx + i + 1);
253+
_offsets[i] = str_end - str_start;
254+
}
255+
dst->insert_offsets_from_lengths(_offsets.data(), max_fetch);
256+
_cur_idx += max_fetch;
257+
*n = max_fetch;
258+
return Status::OK();
259+
}
260+
247261
uint32_t last_offset = guarded_offset(_cur_idx);
248262
_offsets.resize(max_fetch + 1);
249263
_offsets[0] = last_offset;
@@ -279,6 +293,29 @@ class BinaryPlainPageDecoder : public PageDecoder {
279293
}
280294

281295
auto total = *n;
296+
297+
if (_options.only_read_offsets) {
298+
// OFFSET_ONLY mode: read string lengths from page offset trailer
299+
// without copying actual char data. This allows length() to work.
300+
size_t read_count = 0;
301+
_offsets.resize(total);
302+
for (size_t i = 0; i < total; ++i) {
303+
ordinal_t ord = rowids[i] - page_first_ordinal;
304+
if (UNLIKELY(ord >= _num_elems)) {
305+
break;
306+
}
307+
uint32_t str_start = offset(ord);
308+
uint32_t str_end = offset(ord + 1);
309+
_offsets[read_count] = str_end - str_start;
310+
read_count++;
311+
}
312+
if (read_count > 0) {
313+
dst->insert_offsets_from_lengths(_offsets.data(), read_count);
314+
}
315+
*n = read_count;
316+
return Status::OK();
317+
}
318+
282319
size_t read_count = 0;
283320
_binary_data.resize(total);
284321
for (size_t i = 0; i < total; ++i) {

0 commit comments

Comments
 (0)