Skip to content

Commit 1ca7a64

Browse files
authored
perf: use unique_ptr instead of shared_ptr (#1374)
performance: replace unnecessary shared_ptr with unique_ptr to eliminate atomic refcounting and reduce contention on hot insert paths.
1 parent 6e0c40b commit 1ca7a64

8 files changed

Lines changed: 66 additions & 64 deletions

File tree

contrib/pax_storage/src/cpp/storage/columns/pax_column.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ std::string PaxColumn::DebugString() {
208208

209209
template <typename T>
210210
PaxCommColumn<T>::PaxCommColumn(uint32 capacity) {
211-
data_ = std::make_shared<DataBuffer<T>>(capacity * sizeof(T));
211+
data_ = std::make_unique<DataBuffer<T>>(capacity * sizeof(T));
212212
}
213213

214214
template <typename T>
@@ -218,7 +218,7 @@ template <typename T> // NOLINT: redirect constructor
218218
PaxCommColumn<T>::PaxCommColumn() : PaxCommColumn(DEFAULT_CAPACITY) {}
219219

220220
template <typename T>
221-
void PaxCommColumn<T>::Set(std::shared_ptr<DataBuffer<T>> data) {
221+
void PaxCommColumn<T>::Set(std::unique_ptr<DataBuffer<T>> data) {
222222
data_ = std::move(data);
223223
}
224224

@@ -318,17 +318,17 @@ template class PaxCommColumn<double>;
318318
PaxNonFixedColumn::PaxNonFixedColumn(uint32 data_capacity,
319319
uint32 offsets_capacity)
320320
: estimated_size_(0),
321-
data_(std::make_shared<DataBuffer<char>>(data_capacity)),
322-
offsets_(std::make_shared<DataBuffer<int32>>(offsets_capacity)),
321+
data_(std::make_unique<DataBuffer<char>>(data_capacity)),
322+
offsets_(std::make_unique<DataBuffer<int32>>(offsets_capacity)),
323323
next_offsets_(0) {}
324324

325325
PaxNonFixedColumn::PaxNonFixedColumn()
326326
: PaxNonFixedColumn(DEFAULT_CAPACITY, DEFAULT_CAPACITY) {}
327327

328328
PaxNonFixedColumn::~PaxNonFixedColumn() {}
329329

330-
void PaxNonFixedColumn::Set(std::shared_ptr<DataBuffer<char>> data,
331-
std::shared_ptr<DataBuffer<int32>> offsets,
330+
void PaxNonFixedColumn::Set(std::unique_ptr<DataBuffer<char>> data,
331+
std::unique_ptr<DataBuffer<int32>> offsets,
332332
size_t total_size) {
333333
estimated_size_ = total_size;
334334
data_ = std::move(data);

contrib/pax_storage/src/cpp/storage/columns/pax_column.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,9 @@ class PaxColumn {
239239
}
240240

241241
// Get the null bitmap
242-
inline const std::unique_ptr<Bitmap8>& GetBitmap() const { return null_bitmap_; }
242+
inline const std::unique_ptr<Bitmap8> &GetBitmap() const {
243+
return null_bitmap_;
244+
}
243245

244246
// Set the column kv attributes
245247
void SetAttributes(const std::map<std::string, std::string> &attrs);
@@ -425,7 +427,7 @@ class PaxCommColumn : public PaxColumn {
425427

426428
PaxCommColumn();
427429

428-
virtual void Set(std::shared_ptr<DataBuffer<T>> data);
430+
virtual void Set(std::unique_ptr<DataBuffer<T>> data);
429431

430432
PaxColumnTypeInMem GetPaxColumnTypeInMem() const override;
431433

@@ -455,7 +457,7 @@ class PaxCommColumn : public PaxColumn {
455457
int32 GetTypeLength() const override;
456458

457459
protected:
458-
std::shared_ptr<DataBuffer<T>> data_;
460+
std::unique_ptr<DataBuffer<T>> data_;
459461
};
460462

461463
extern template class PaxCommColumn<char>;
@@ -474,8 +476,8 @@ class PaxNonFixedColumn : public PaxColumn {
474476

475477
~PaxNonFixedColumn() override;
476478

477-
virtual void Set(std::shared_ptr<DataBuffer<char>> data,
478-
std::shared_ptr<DataBuffer<int32>> offsets,
479+
virtual void Set(std::unique_ptr<DataBuffer<char>> data,
480+
std::unique_ptr<DataBuffer<int32>> offsets,
479481
size_t total_size);
480482

481483
void Append(char *buffer, size_t size) override;
@@ -514,13 +516,13 @@ class PaxNonFixedColumn : public PaxColumn {
514516

515517
protected:
516518
size_t estimated_size_;
517-
std::shared_ptr<DataBuffer<char>> data_;
519+
std::unique_ptr<DataBuffer<char>> data_;
518520

519521
// orc needs to serialize int32 array
520522
// the length of a single tuple field will not exceed 2GB,
521523
// so a variable-length element of the offsets stream can use int32
522524
// to represent the length
523-
std::shared_ptr<DataBuffer<int32>> offsets_;
525+
std::unique_ptr<DataBuffer<int32>> offsets_;
524526

525527
// used to record next offset in write path
526528
// in read path, next_offsets_ always be -1

contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -137,61 +137,61 @@ static std::unique_ptr<PaxColumn> CreateDecodeColumn(
137137
std::unique_ptr<PaxColumn> column_rc;
138138
switch (bits) {
139139
case 16: {
140-
auto buffer_for_read = std::make_shared<DataBuffer<int16>>(
140+
auto buffer_for_read = std::make_unique<DataBuffer<int16>>(
141141
reinterpret_cast<int16 *>(encoded_buff), encoded_len, false, false);
142142
buffer_for_read->Brush(encoded_len);
143143

144144
if (storage_type == PaxStorageFormat::kTypeStoragePorcNonVec) {
145145
auto int_column =
146146
ColumnOptCreateTraits<PaxEncodingColumn, int16>::create_decoding(
147147
origin_len / sizeof(int16), std::move(decoding_option));
148-
int_column->Set(buffer_for_read);
148+
int_column->Set(std::move(buffer_for_read));
149149
column_rc = std::move(int_column);
150150
} else {
151151
auto int_column =
152152
ColumnOptCreateTraits<PaxVecEncodingColumn, int16>::create_decoding(
153153
origin_len / sizeof(int16), std::move(decoding_option));
154-
int_column->Set(buffer_for_read, column_not_nulls);
154+
int_column->Set(std::move(buffer_for_read), column_not_nulls);
155155
column_rc = std::move(int_column);
156156
}
157157
break;
158158
}
159159
case 32: {
160-
auto buffer_for_read = std::make_shared<DataBuffer<int32>>(
160+
auto buffer_for_read = std::make_unique<DataBuffer<int32>>(
161161
reinterpret_cast<int32 *>(encoded_buff), encoded_len, false, false);
162162
buffer_for_read->Brush(encoded_len);
163163

164164
if (storage_type == PaxStorageFormat::kTypeStoragePorcNonVec) {
165165
auto int_column =
166166
ColumnOptCreateTraits<PaxEncodingColumn, int32>::create_decoding(
167167
origin_len / sizeof(int32), std::move(decoding_option));
168-
int_column->Set(buffer_for_read);
168+
int_column->Set(std::move(buffer_for_read));
169169
column_rc = std::move(int_column);
170170
} else {
171171
auto int_column =
172172
ColumnOptCreateTraits<PaxVecEncodingColumn, int32>::create_decoding(
173173
origin_len / sizeof(int32), std::move(decoding_option));
174-
int_column->Set(buffer_for_read, column_not_nulls);
174+
int_column->Set(std::move(buffer_for_read), column_not_nulls);
175175
column_rc = std::move(int_column);
176176
}
177177
break;
178178
}
179179
case 64: {
180-
auto buffer_for_read = std::make_shared<DataBuffer<int64>>(
180+
auto buffer_for_read = std::make_unique<DataBuffer<int64>>(
181181
reinterpret_cast<int64 *>(encoded_buff), encoded_len, false, false);
182182
buffer_for_read->Brush(encoded_len);
183183

184184
if (storage_type == PaxStorageFormat::kTypeStoragePorcNonVec) {
185185
auto int_column =
186186
ColumnOptCreateTraits<PaxEncodingColumn, int64>::create_decoding(
187187
origin_len / sizeof(int64), std::move(decoding_option));
188-
int_column->Set(buffer_for_read);
188+
int_column->Set(std::move(buffer_for_read));
189189
column_rc = std::move(int_column);
190190
} else {
191191
auto int_column =
192192
ColumnOptCreateTraits<PaxVecEncodingColumn, int64>::create_decoding(
193193
origin_len / sizeof(int64), std::move(decoding_option));
194-
int_column->Set(buffer_for_read, column_not_nulls);
194+
int_column->Set(std::move(buffer_for_read), column_not_nulls);
195195
column_rc = std::move(int_column);
196196
}
197197
break;
@@ -749,14 +749,14 @@ TEST_P(PaxNonFixedColumnCompressTest,
749749
auto non_fixed_column_for_read = new PaxNonFixedEncodingColumn(
750750
number_of_rows * number, sizeof(int32) * number_of_rows,
751751
std::move(decoding_option));
752-
auto data_buffer_for_read = std::make_shared<DataBuffer<char>>(
752+
auto data_buffer_for_read = std::make_unique<DataBuffer<char>>(
753753
encoded_buff, encoded_len, false, false);
754754
data_buffer_for_read->Brush(encoded_len);
755-
auto length_buffer_cpy = std::make_shared<DataBuffer<int32>>(
755+
auto length_buffer_cpy = std::make_unique<DataBuffer<int32>>(
756756
(int32 *)offset_stream_buff, offset_stream_len, false, false);
757757
length_buffer_cpy->BrushAll();
758-
non_fixed_column_for_read->Set(data_buffer_for_read, length_buffer_cpy,
759-
origin_len);
758+
non_fixed_column_for_read->Set(std::move(data_buffer_for_read),
759+
std::move(length_buffer_cpy), origin_len);
760760
ASSERT_EQ(non_fixed_column_for_read->GetCompressLevel(), 5);
761761
char *verify_buff;
762762
size_t verify_len;

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ void PaxEncodingColumn<T>::InitDecoder() {
124124
}
125125

126126
template <typename T>
127-
void PaxEncodingColumn<T>::Set(std::shared_ptr<DataBuffer<T>> data) {
127+
void PaxEncodingColumn<T>::Set(std::unique_ptr<DataBuffer<T>> data) {
128128
if (decoder_) {
129129
// should not decoding null
130130
if (data->Used() != 0) {
@@ -155,7 +155,7 @@ void PaxEncodingColumn<T>::Set(std::shared_ptr<DataBuffer<T>> data) {
155155

156156
Assert(!data->IsMemTakeOver());
157157
} else {
158-
PaxCommColumn<T>::Set(data);
158+
PaxCommColumn<T>::Set(std::move(data));
159159
}
160160
}
161161

@@ -175,7 +175,7 @@ std::pair<char *, size_t> PaxEncodingColumn<T>::GetBuffer() {
175175
if (encoder_) {
176176
// changed streaming encode to blocking encode
177177
// because we still need store a origin data in `PaxCommColumn<T>`
178-
auto origin_data_buffer = PaxCommColumn<T>::data_;
178+
auto origin_data_buffer = PaxCommColumn<T>::data_.get();
179179

180180
shared_data_ = std::make_shared<DataBuffer<char>>(origin_data_buffer->Used());
181181
encoder_->SetDataBuffer(shared_data_);

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_column.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class PaxEncodingColumn : public PaxCommColumn<T> {
4444

4545
~PaxEncodingColumn() override;
4646

47-
void Set(std::shared_ptr<DataBuffer<T>> data) override;
47+
void Set(std::unique_ptr<DataBuffer<T>> data) override;
4848

4949
std::pair<char *, size_t> GetBuffer() override;
5050

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ PaxNonFixedEncodingColumn::PaxNonFixedEncodingColumn(
145145

146146
PaxNonFixedEncodingColumn::~PaxNonFixedEncodingColumn() {}
147147

148-
void PaxNonFixedEncodingColumn::Set(std::shared_ptr<DataBuffer<char>> data,
149-
std::shared_ptr<DataBuffer<int32>> offsets,
148+
void PaxNonFixedEncodingColumn::Set(std::unique_ptr<DataBuffer<char>> data,
149+
std::unique_ptr<DataBuffer<int32>> offsets,
150150
size_t total_size) {
151151
bool exist_decoder;
152152
Assert(data && offsets);
@@ -179,7 +179,7 @@ void PaxNonFixedEncodingColumn::Set(std::shared_ptr<DataBuffer<char>> data,
179179
// `data_` have the same buffer with `shared_data_`
180180
PaxNonFixedColumn::data_->Brush(shared_data_->Used());
181181
// no delete the origin data
182-
shared_data_ = data;
182+
shared_data_ = std::move(data);
183183
}
184184
};
185185

@@ -228,16 +228,16 @@ void PaxNonFixedEncodingColumn::Set(std::shared_ptr<DataBuffer<char>> data,
228228
PaxNonFixedColumn::next_offsets_ = -1;
229229
} else if (exist_decoder && !has_offsets_processor) {
230230
data_decompress();
231-
PaxNonFixedColumn::offsets_ = offsets;
231+
PaxNonFixedColumn::offsets_ = std::move(offsets);
232232
PaxNonFixedColumn::estimated_size_ = total_size;
233233
PaxNonFixedColumn::next_offsets_ = -1;
234234
} else if (!exist_decoder && has_offsets_processor) {
235-
PaxNonFixedColumn::data_ = data;
235+
PaxNonFixedColumn::data_ = std::move(data);
236236
offsets_decompress();
237237
PaxNonFixedColumn::estimated_size_ = total_size;
238238
PaxNonFixedColumn::next_offsets_ = -1;
239239
} else { // (!compressor_ && !offsets_compressor_)
240-
PaxNonFixedColumn::Set(data, offsets, total_size);
240+
PaxNonFixedColumn::Set(std::move(data), std::move(offsets), total_size);
241241
}
242242
}
243243

contrib/pax_storage/src/cpp/storage/columns/pax_encoding_non_fixed_column.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ class PaxNonFixedEncodingColumn : public PaxNonFixedColumn {
4242

4343
~PaxNonFixedEncodingColumn() override;
4444

45-
void Set(std::shared_ptr<DataBuffer<char>> data,
46-
std::shared_ptr<DataBuffer<int32>> offsets,
45+
void Set(std::unique_ptr<DataBuffer<char>> data,
46+
std::unique_ptr<DataBuffer<int32>> offsets,
4747
size_t total_size) override;
4848

4949
std::pair<char *, size_t> GetBuffer() override;

0 commit comments

Comments
 (0)