Skip to content

Commit 3c595ba

Browse files
committed
perf: use unique_ptr instead of shared_ptr
Replace unnecessary shared pointers to improve insertion performance
1 parent 9d86458 commit 3c595ba

7 files changed

Lines changed: 53 additions & 51 deletions

File tree

contrib/pax_storage/src/cpp/storage/columns/pax_column.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ std::string PaxColumn::DebugString() {
208208

209209
template <typename T>
210210
PaxCommColumn<T>::PaxCommColumn(uint32 capacity) {
211-
data_ = std::make_shared<DataBuffer<T>>(capacity * sizeof(T));
211+
data_ = std::make_unique<DataBuffer<T>>(capacity * sizeof(T));
212212
}
213213

214214
template <typename T>
@@ -218,7 +218,7 @@ template <typename T> // NOLINT: redirect constructor
218218
PaxCommColumn<T>::PaxCommColumn() : PaxCommColumn(DEFAULT_CAPACITY) {}
219219

220220
template <typename T>
221-
void PaxCommColumn<T>::Set(std::shared_ptr<DataBuffer<T>> data) {
221+
void PaxCommColumn<T>::Set(std::unique_ptr<DataBuffer<T>> data) {
222222
data_ = std::move(data);
223223
}
224224

@@ -318,17 +318,17 @@ template class PaxCommColumn<double>;
318318
PaxNonFixedColumn::PaxNonFixedColumn(uint32 data_capacity,
319319
uint32 offsets_capacity)
320320
: estimated_size_(0),
321-
data_(std::make_shared<DataBuffer<char>>(data_capacity)),
322-
offsets_(std::make_shared<DataBuffer<int32>>(offsets_capacity)),
321+
data_(std::make_unique<DataBuffer<char>>(data_capacity)),
322+
offsets_(std::make_unique<DataBuffer<int32>>(offsets_capacity)),
323323
next_offsets_(0) {}
324324

325325
PaxNonFixedColumn::PaxNonFixedColumn()
326326
: PaxNonFixedColumn(DEFAULT_CAPACITY, DEFAULT_CAPACITY) {}
327327

328328
PaxNonFixedColumn::~PaxNonFixedColumn() {}
329329

330-
void PaxNonFixedColumn::Set(std::shared_ptr<DataBuffer<char>> data,
331-
std::shared_ptr<DataBuffer<int32>> offsets,
330+
void PaxNonFixedColumn::Set(std::unique_ptr<DataBuffer<char>> data,
331+
std::unique_ptr<DataBuffer<int32>> offsets,
332332
size_t total_size) {
333333
estimated_size_ = total_size;
334334
data_ = std::move(data);

contrib/pax_storage/src/cpp/storage/columns/pax_column.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,9 @@ class PaxColumn {
239239
}
240240

241241
// Get the null bitmap
242-
inline const std::unique_ptr<Bitmap8>& GetBitmap() const { return null_bitmap_; }
242+
inline const std::unique_ptr<Bitmap8> &GetBitmap() const {
243+
return null_bitmap_;
244+
}
243245

244246
// Set the column kv attributes
245247
void SetAttributes(const std::map<std::string, std::string> &attrs);
@@ -425,7 +427,7 @@ class PaxCommColumn : public PaxColumn {
425427

426428
PaxCommColumn();
427429

428-
virtual void Set(std::shared_ptr<DataBuffer<T>> data);
430+
virtual void Set(std::unique_ptr<DataBuffer<T>> data);
429431

430432
PaxColumnTypeInMem GetPaxColumnTypeInMem() const override;
431433

@@ -455,7 +457,7 @@ class PaxCommColumn : public PaxColumn {
455457
int32 GetTypeLength() const override;
456458

457459
protected:
458-
std::shared_ptr<DataBuffer<T>> data_;
460+
std::unique_ptr<DataBuffer<T>> data_;
459461
};
460462

461463
extern template class PaxCommColumn<char>;
@@ -474,8 +476,8 @@ class PaxNonFixedColumn : public PaxColumn {
474476

475477
~PaxNonFixedColumn() override;
476478

477-
virtual void Set(std::shared_ptr<DataBuffer<char>> data,
478-
std::shared_ptr<DataBuffer<int32>> offsets,
479+
virtual void Set(std::unique_ptr<DataBuffer<char>> data,
480+
std::unique_ptr<DataBuffer<int32>> offsets,
479481
size_t total_size);
480482

481483
void Append(char *buffer, size_t size) override;
@@ -514,13 +516,13 @@ class PaxNonFixedColumn : public PaxColumn {
514516

515517
protected:
516518
size_t estimated_size_;
517-
std::shared_ptr<DataBuffer<char>> data_;
519+
std::unique_ptr<DataBuffer<char>> data_;
518520

519521
// orc needs to serialize int32 array
520522
// the length of a single tuple field will not exceed 2GB,
521523
// so a variable-length element of the offsets stream can use int32
522524
// to represent the length
523-
std::shared_ptr<DataBuffer<int32>> offsets_;
525+
std::unique_ptr<DataBuffer<int32>> offsets_;
524526

525527
// used to record next offset in write path
526528
// in read path, next_offsets_ always be -1

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ void PaxEncodingColumn<T>::InitDecoder() {
124124
}
125125

126126
template <typename T>
127-
void PaxEncodingColumn<T>::Set(std::shared_ptr<DataBuffer<T>> data) {
127+
void PaxEncodingColumn<T>::Set(std::unique_ptr<DataBuffer<T>> data) {
128128
if (decoder_) {
129129
// should not decoding null
130130
if (data->Used() != 0) {
@@ -155,7 +155,7 @@ void PaxEncodingColumn<T>::Set(std::shared_ptr<DataBuffer<T>> data) {
155155

156156
Assert(!data->IsMemTakeOver());
157157
} else {
158-
PaxCommColumn<T>::Set(data);
158+
PaxCommColumn<T>::Set(std::move(data));
159159
}
160160
}
161161

@@ -175,7 +175,7 @@ std::pair<char *, size_t> PaxEncodingColumn<T>::GetBuffer() {
175175
if (encoder_) {
176176
// changed streaming encode to blocking encode
177177
// because we still need store a origin data in `PaxCommColumn<T>`
178-
auto origin_data_buffer = PaxCommColumn<T>::data_;
178+
auto origin_data_buffer = PaxCommColumn<T>::data_.get();
179179

180180
shared_data_ = std::make_shared<DataBuffer<char>>(origin_data_buffer->Used());
181181
encoder_->SetDataBuffer(shared_data_);

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class PaxEncodingColumn : public PaxCommColumn<T> {
4444

4545
~PaxEncodingColumn() override;
4646

47-
void Set(std::shared_ptr<DataBuffer<T>> data) override;
47+
void Set(std::unique_ptr<DataBuffer<T>> data) override;
4848

4949
std::pair<char *, size_t> GetBuffer() override;
5050

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ PaxNonFixedEncodingColumn::PaxNonFixedEncodingColumn(
145145

146146
PaxNonFixedEncodingColumn::~PaxNonFixedEncodingColumn() {}
147147

148-
void PaxNonFixedEncodingColumn::Set(std::shared_ptr<DataBuffer<char>> data,
149-
std::shared_ptr<DataBuffer<int32>> offsets,
148+
void PaxNonFixedEncodingColumn::Set(std::unique_ptr<DataBuffer<char>> data,
149+
std::unique_ptr<DataBuffer<int32>> offsets,
150150
size_t total_size) {
151151
bool exist_decoder;
152152
Assert(data && offsets);
@@ -179,7 +179,7 @@ void PaxNonFixedEncodingColumn::Set(std::shared_ptr<DataBuffer<char>> data,
179179
// `data_` have the same buffer with `shared_data_`
180180
PaxNonFixedColumn::data_->Brush(shared_data_->Used());
181181
// no delete the origin data
182-
shared_data_ = data;
182+
shared_data_ = std::move(data);
183183
}
184184
};
185185

@@ -228,16 +228,16 @@ void PaxNonFixedEncodingColumn::Set(std::shared_ptr<DataBuffer<char>> data,
228228
PaxNonFixedColumn::next_offsets_ = -1;
229229
} else if (exist_decoder && !has_offsets_processor) {
230230
data_decompress();
231-
PaxNonFixedColumn::offsets_ = offsets;
231+
PaxNonFixedColumn::offsets_ = std::move(offsets);
232232
PaxNonFixedColumn::estimated_size_ = total_size;
233233
PaxNonFixedColumn::next_offsets_ = -1;
234234
} else if (!exist_decoder && has_offsets_processor) {
235-
PaxNonFixedColumn::data_ = data;
235+
PaxNonFixedColumn::data_ = std::move(data);
236236
offsets_decompress();
237237
PaxNonFixedColumn::estimated_size_ = total_size;
238238
PaxNonFixedColumn::next_offsets_ = -1;
239239
} else { // (!compressor_ && !offsets_compressor_)
240-
PaxNonFixedColumn::Set(data, offsets, total_size);
240+
PaxNonFixedColumn::Set(std::move(data), std::move(offsets), total_size);
241241
}
242242
}
243243

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ class PaxNonFixedEncodingColumn : public PaxNonFixedColumn {
4242

4343
~PaxNonFixedEncodingColumn() override;
4444

45-
void Set(std::shared_ptr<DataBuffer<char>> data,
46-
std::shared_ptr<DataBuffer<int32>> offsets,
45+
void Set(std::unique_ptr<DataBuffer<char>> data,
46+
std::unique_ptr<DataBuffer<int32>> offsets,
4747
size_t total_size) override;
4848

4949
std::pair<char *, size_t> GetBuffer() override;

contrib/pax_storage/src/cpp/storage/orc/orc_format_reader.cc

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -413,14 +413,14 @@ static std::unique_ptr<PaxColumn> BuildEncodingColumn(
413413
const ColumnEncoding &data_encoding, bool is_vec) {
414414
uint32 not_null_rows = 0;
415415
uint64 data_stream_len = 0;
416-
std::shared_ptr<DataBuffer<T>> data_stream_buffer;
416+
std::unique_ptr<DataBuffer<T>> data_stream_buffer;
417417

418418
Assert(data_stream.kind() == pax::porc::proto::Stream_Kind_DATA);
419419

420420
not_null_rows = static_cast<uint32>(data_stream.column());
421421
data_stream_len = static_cast<uint64>(data_stream.length());
422422

423-
data_stream_buffer = std::make_shared<DataBuffer<T>>(
423+
data_stream_buffer = std::make_unique<DataBuffer<T>>(
424424
reinterpret_cast<T *>(data_buffer->GetAvailableBuffer()), data_stream_len,
425425
false, false);
426426

@@ -443,7 +443,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingColumn(
443443
auto pax_column =
444444
traits::ColumnOptCreateTraits<PaxVecEncodingColumn, T>::create_decoding(
445445
alloc_size, decoding_option);
446-
pax_column->Set(data_stream_buffer, (size_t)not_null_rows);
446+
pax_column->Set(std::move(data_stream_buffer), (size_t)not_null_rows);
447447
return pax_column;
448448
} else {
449449
AssertImply(data_encoding.kind() ==
@@ -455,7 +455,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingColumn(
455455
auto pax_column =
456456
traits::ColumnOptCreateTraits<PaxEncodingColumn, T>::create_decoding(
457457
alloc_size, decoding_option);
458-
pax_column->Set(data_stream_buffer);
458+
pax_column->Set(std::move(data_stream_buffer));
459459
return pax_column;
460460
}
461461
}
@@ -466,14 +466,14 @@ static std::unique_ptr<PaxColumn> BuildEncodingBitPackedColumn(
466466
bool is_vec) {
467467
uint32 not_null_rows = 0;
468468
uint64 column_data_len = 0;
469-
std::shared_ptr<DataBuffer<int8>> column_data_buffer;
469+
std::unique_ptr<DataBuffer<int8>> column_data_buffer;
470470

471471
Assert(data_stream.kind() == pax::porc::proto::Stream_Kind_DATA);
472472

473473
not_null_rows = static_cast<uint32>(data_stream.column());
474474
column_data_len = static_cast<uint64>(data_stream.length());
475475

476-
column_data_buffer = std::make_shared<DataBuffer<int8>>(
476+
column_data_buffer = std::make_unique<DataBuffer<int8>>(
477477
reinterpret_cast<int8 *>(data_buffer->GetAvailableBuffer()),
478478
column_data_len, false, false);
479479

@@ -496,7 +496,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingBitPackedColumn(
496496
auto pax_column =
497497
traits::ColumnOptCreateTraits2<PaxVecBitPackedColumn>::create_decoding(
498498
alloc_size, decoding_option);
499-
pax_column->Set(column_data_buffer, (size_t)not_null_rows);
499+
pax_column->Set(std::move(column_data_buffer), (size_t)not_null_rows);
500500
return pax_column;
501501
} else {
502502
AssertImply(data_encoding.kind() ==
@@ -508,7 +508,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingBitPackedColumn(
508508
auto pax_column =
509509
traits::ColumnOptCreateTraits2<PaxBitPackedColumn>::create_decoding(
510510
alloc_size, decoding_option);
511-
pax_column->Set(column_data_buffer);
511+
pax_column->Set(std::move(column_data_buffer));
512512
return pax_column;
513513
}
514514
}
@@ -521,16 +521,16 @@ static std::unique_ptr<PaxColumn> BuildEncodingDecimalColumn(
521521
uint32 not_null_rows = 0;
522522
uint64 offset_stream_len = 0;
523523
uint64 data_stream_len = 0;
524-
std::shared_ptr<DataBuffer<int32>> offset_stream_buffer;
525-
std::shared_ptr<DataBuffer<char>> data_stream_buffer;
524+
std::unique_ptr<DataBuffer<int32>> offset_stream_buffer;
525+
std::unique_ptr<DataBuffer<char>> data_stream_buffer;
526526
std::unique_ptr<PaxNonFixedColumn> pax_column;
527527
uint64 padding = 0;
528528

529529
not_null_rows = static_cast<uint32>(len_stream.column());
530530
offset_stream_len = static_cast<uint64>(len_stream.length());
531531
padding = len_stream.padding();
532532

533-
offset_stream_buffer = std::make_shared<DataBuffer<int32>>(
533+
offset_stream_buffer = std::make_unique<DataBuffer<int32>>(
534534
reinterpret_cast<int32 *>(data_buffer->GetAvailableBuffer()),
535535
offset_stream_len, false, false);
536536

@@ -560,7 +560,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingDecimalColumn(
560560
}
561561
#endif
562562

563-
data_stream_buffer = std::make_shared<DataBuffer<char>>(
563+
data_stream_buffer = std::make_unique<DataBuffer<char>>(
564564
data_buffer->GetAvailableBuffer(), data_stream_len, false, false);
565565
data_stream_buffer->BrushAll();
566566
data_buffer->Brush(data_stream_len);
@@ -591,7 +591,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingDecimalColumn(
591591
data_cap, offsets_cap, std::move(decoding_option));
592592

593593
// current memory will be freed in pax_columns->data_
594-
pax_column->Set(data_stream_buffer, offset_stream_buffer, data_stream_len);
594+
pax_column->Set(std::move(data_stream_buffer), std::move(offset_stream_buffer), data_stream_len);
595595
return pax_column;
596596
}
597597

@@ -601,7 +601,7 @@ static std::unique_ptr<PaxColumn> BuildVecEncodingDecimalColumn(
601601
const ColumnEncoding &data_encoding, bool is_vec) {
602602
uint32 not_null_rows = 0;
603603
uint64 data_stream_len = 0;
604-
std::shared_ptr<DataBuffer<int8>> data_stream_buffer;
604+
std::unique_ptr<DataBuffer<int8>> data_stream_buffer;
605605

606606
CBDB_CHECK(is_vec, cbdb::CException::ExType::kExTypeLogicError);
607607

@@ -610,7 +610,7 @@ static std::unique_ptr<PaxColumn> BuildVecEncodingDecimalColumn(
610610
not_null_rows = static_cast<uint32>(data_stream.column());
611611
data_stream_len = static_cast<uint64>(data_stream.length());
612612

613-
data_stream_buffer = std::make_shared<DataBuffer<int8>>(
613+
data_stream_buffer = std::make_unique<DataBuffer<int8>>(
614614
reinterpret_cast<int8 *>(data_buffer->GetAvailableBuffer()),
615615
data_stream_len, false, false);
616616

@@ -630,7 +630,7 @@ static std::unique_ptr<PaxColumn> BuildVecEncodingDecimalColumn(
630630

631631
auto pax_column = traits::ColumnOptCreateTraits2<PaxShortNumericColumn>:: //
632632
create_decoding(alloc_size, decoding_option);
633-
pax_column->Set(data_stream_buffer, (size_t)not_null_rows);
633+
pax_column->Set(std::move(data_stream_buffer), (size_t)not_null_rows);
634634

635635
return pax_column;
636636
}
@@ -644,8 +644,8 @@ static std::unique_ptr<PaxColumn> BuildEncodingVecNonFixedColumn(
644644
uint64 offset_stream_len = 0;
645645
uint64 padding = 0;
646646
uint64 data_stream_len = 0;
647-
std::shared_ptr<DataBuffer<int32>> offset_stream_buffer;
648-
std::shared_ptr<DataBuffer<char>> data_stream_buffer;
647+
std::unique_ptr<DataBuffer<int32>> offset_stream_buffer;
648+
std::unique_ptr<DataBuffer<char>> data_stream_buffer;
649649
std::unique_ptr<PaxVecNonFixedColumn> pax_column;
650650
PaxDecoder::DecodingOption decoding_option;
651651
size_t data_cap, offsets_cap;
@@ -658,7 +658,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingVecNonFixedColumn(
658658
offset_stream_len = static_cast<uint64>(len_stream.length());
659659
padding = len_stream.padding();
660660

661-
offset_stream_buffer = std::make_shared<DataBuffer<int32>>(
661+
offset_stream_buffer = std::make_unique<DataBuffer<int32>>(
662662
reinterpret_cast<int32 *>(data_buffer->GetAvailableBuffer()),
663663
offset_stream_len, false, false);
664664

@@ -676,7 +676,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingVecNonFixedColumn(
676676
}
677677

678678
data_buffer->Brush(offset_stream_len);
679-
data_stream_buffer = std::make_shared<DataBuffer<char>>(
679+
data_stream_buffer = std::make_unique<DataBuffer<char>>(
680680
data_buffer->GetAvailableBuffer(), data_stream_len, false, false);
681681

682682
decoding_option.column_encode_type = data_encoding.kind();
@@ -730,7 +730,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingVecNonFixedColumn(
730730
create_decoding(data_cap, offsets_cap, std::move(decoding_option));
731731
}
732732
}
733-
pax_column->Set(data_stream_buffer, offset_stream_buffer, data_stream_len,
733+
pax_column->Set(std::move(data_stream_buffer), std::move(offset_stream_buffer), data_stream_len,
734734
not_null_rows);
735735
return pax_column;
736736
}
@@ -743,8 +743,8 @@ static std::unique_ptr<PaxColumn> BuildEncodingNonFixedColumn(
743743
[[maybe_unused]] uint32 not_null_rows = 0;
744744
uint64 offset_stream_len = 0;
745745
uint64 data_stream_len = 0;
746-
std::shared_ptr<DataBuffer<int32>> offset_stream_buffer;
747-
std::shared_ptr<DataBuffer<char>> data_stream_buffer;
746+
std::unique_ptr<DataBuffer<int32>> offset_stream_buffer;
747+
std::unique_ptr<DataBuffer<char>> data_stream_buffer;
748748
std::unique_ptr<PaxNonFixedColumn> pax_column;
749749
uint64 padding = 0;
750750
PaxDecoder::DecodingOption decoding_option;
@@ -754,7 +754,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingNonFixedColumn(
754754
offset_stream_len = static_cast<uint64>(len_stream.length());
755755
padding = len_stream.padding();
756756

757-
offset_stream_buffer = std::make_shared<DataBuffer<int32>>(
757+
offset_stream_buffer = std::make_unique<DataBuffer<int32>>(
758758
reinterpret_cast<int32 *>(data_buffer->GetAvailableBuffer()),
759759
offset_stream_len, false, false);
760760

@@ -776,7 +776,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingNonFixedColumn(
776776
}
777777
#endif
778778

779-
data_stream_buffer = std::make_shared<DataBuffer<char>>(
779+
data_stream_buffer = std::make_unique<DataBuffer<char>>(
780780
data_buffer->GetAvailableBuffer(), data_stream_len, false, false);
781781
data_stream_buffer->BrushAll();
782782
data_buffer->Brush(data_stream_len);
@@ -819,7 +819,7 @@ static std::unique_ptr<PaxColumn> BuildEncodingNonFixedColumn(
819819
}
820820

821821
// current memory will be freed in pax_columns->data_
822-
pax_column->Set(data_stream_buffer, offset_stream_buffer, data_stream_len);
822+
pax_column->Set(std::move(data_stream_buffer), std::move(offset_stream_buffer), data_stream_len);
823823
return pax_column;
824824
}
825825

0 commit comments

Comments
 (0)