Skip to content

Commit 7d63a11

Browse files
committed
remove config::load_reader_max_block_bytes
1 parent 1c426b7 commit 7d63a11

File tree

6 files changed

+2
-53
lines changed

6 files changed

+2
-53
lines changed

be/src/common/config.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,10 +1729,6 @@ DEFINE_mBool(enable_auto_clone_on_mow_publish_missing_version, "false");
17291729
// The maximum csv line reader output buffer size
17301730
DEFINE_mInt64(max_csv_line_reader_output_buffer_size, "4294967296");
17311731

1732-
// The maximum bytes of a single block returned by load file readers (CsvReader, NewJsonReader,
1733-
// ParquetReader, OrcReader). Default is 64MB. Set to 0 to disable the limit.
1734-
DEFINE_mInt64(load_reader_max_block_bytes, "67108864");
1735-
17361732
// Maximum number of OpenMP threads allowed for concurrent vector index builds.
17371733
// -1 means auto: use 80% of the available CPU cores.
17381734
DEFINE_Int32(omp_threads_limit, "-1");

be/src/common/config.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1790,10 +1790,6 @@ DECLARE_String(fuzzy_test_type);
17901790
// The maximum csv line reader output buffer size
17911791
DECLARE_mInt64(max_csv_line_reader_output_buffer_size);
17921792

1793-
// The maximum bytes of a single block returned by load file readers (CsvReader, NewJsonReader,
1794-
// ParquetReader, OrcReader). Default is 200MB. Set to 0 to disable the limit.
1795-
DECLARE_mInt64(load_reader_max_block_bytes);
1796-
17971793
// Maximum number of OpenMP threads available for concurrent index builds.
17981794
// -1 means auto: use 80% of detected CPU cores.
17991795
DECLARE_Int32(omp_threads_limit);

be/src/format/orc/vorc_reader.cpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,14 +2451,6 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
24512451
return Status::OK();
24522452
}
24532453

2454-
// Limit memory per batch for load paths: pre-shrink _batch_size using the bytes-per-row
2455-
// estimate from the previous batch so the current batch stays within load_reader_max_block_bytes
2456-
// (effective from call 2 onward; first batch is capped on the next call).
2457-
const int64_t max_block_bytes =
2458-
(_state != nullptr && _state->query_type() == TQueryType::LOAD &&
2459-
config::load_reader_max_block_bytes > 0)
2460-
? config::load_reader_max_block_bytes
2461-
: 0;
24622454
if (_push_down_agg_type == TPushAggOp::type::COUNT) {
24632455
auto rows = std::min(get_remaining_rows(), (int64_t)_batch_size);
24642456

@@ -2475,15 +2467,6 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
24752467
return Status::OK();
24762468
}
24772469

2478-
if (max_block_bytes > 0 && _load_bytes_per_row > 0 && _row_reader) {
2479-
size_t new_batch_size = std::max(
2480-
(size_t)1, (size_t)((int64_t)max_block_bytes / (int64_t)_load_bytes_per_row));
2481-
if (new_batch_size != _batch_size) {
2482-
_batch_size = new_batch_size;
2483-
_batch = _row_reader->createRowBatch(_batch_size);
2484-
}
2485-
}
2486-
24872470
if (!_seek_to_read_one_line()) {
24882471
*eof = true;
24892472
return Status::OK();
@@ -2826,9 +2809,6 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo
28262809
*read_rows = block->rows();
28272810
}
28282811

2829-
if (max_block_bytes > 0 && *read_rows > 0) {
2830-
_load_bytes_per_row = block->bytes() / *read_rows;
2831-
}
28322812
return Status::OK();
28332813
}
28342814

be/src/format/orc/vorc_reader.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -694,10 +694,7 @@ class OrcReader : public GenericReader {
694694
io::FileSystemProperties _system_properties;
695695
io::FileDescription _file_description;
696696
size_t _batch_size;
697-
// Bytes-per-row estimate from the previous batch, used to pre-shrink _batch_size
698-
// before reading so that oversized blocks are prevented from the current call onward.
699-
// Zero means no prior data (first batch).
700-
size_t _load_bytes_per_row = 0;
697+
701698
int64_t _range_start_offset;
702699
int64_t _range_size;
703700
std::string _ctz;

be/src/format/parquet/vparquet_reader.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -727,19 +727,6 @@ Status ParquetReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
727727
return Status::OK();
728728
}
729729

730-
// Limit memory per batch for load paths.
731-
// _load_bytes_per_row is updated after each batch so the *next* call pre-shrinks _batch_size
732-
// before reading, ensuring the current batch is already within the limit (from call 2 onward).
733-
const int64_t max_block_bytes =
734-
(_state != nullptr && _state->query_type() == TQueryType::LOAD &&
735-
config::load_reader_max_block_bytes > 0)
736-
? config::load_reader_max_block_bytes
737-
: 0;
738-
if (max_block_bytes > 0 && _load_bytes_per_row > 0) {
739-
_batch_size = std::max((size_t)1,
740-
(size_t)((int64_t)max_block_bytes / (int64_t)_load_bytes_per_row));
741-
}
742-
743730
SCOPED_RAW_TIMER(&_reader_statistics.column_read_time);
744731
Status batch_st =
745732
_current_group_reader->next_batch(block, _batch_size, read_rows, &_row_group_eof);
@@ -756,10 +743,6 @@ Status ParquetReader::get_next_block(Block* block, size_t* read_rows, bool* eof)
756743
batch_st.to_string());
757744
}
758745

759-
if (max_block_bytes > 0 && *read_rows > 0) {
760-
_load_bytes_per_row = block->bytes() / *read_rows;
761-
}
762-
763746
if (_row_group_eof) {
764747
auto column_st = _current_group_reader->merged_column_statistics();
765748
_column_statistics.merge(column_st);

be/src/format/parquet/vparquet_reader.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,10 +343,7 @@ class ParquetReader : public GenericReader {
343343

344344
// parquet file reader object
345345
size_t _batch_size;
346-
// Bytes-per-row estimate from the previous batch, used to pre-shrink _batch_size
347-
// before reading so that oversized blocks are prevented from the current call onward.
348-
// Zero means no prior data (first batch).
349-
size_t _load_bytes_per_row = 0;
346+
350347
int64_t _range_start_offset;
351348
int64_t _range_size;
352349
const cctz::time_zone* _ctz = nullptr;

0 commit comments

Comments
 (0)