Skip to content

Commit 688b7cf

Browse files
committed
Enhancement: simple code optimization
1. Explicit inline functions. 2. Instead of checking the physical size of the file every time a tuple is inserted, it is checked every 16 tuples. performance result ``` create table t1(a int, b int, c int, d int, e text, f text,g text, h text) using pax with(compresstype =zstd,compresslevel=5); gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 6124.535 ms (00:06.125) gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5993.682 ms (00:05.994) -- optimized with this commit create table t1(a int, b int, c int, d int, e text, f text,g text, h text) using pax with(compresstype =zstd,compresslevel=5); gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5713.184 ms (00:05.713) gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5430.221 ms (00:05.430) ```
1 parent 1d9a1f2 commit 688b7cf

7 files changed

Lines changed: 31 additions & 18 deletions

File tree

contrib/pax_storage/src/cpp/access/pax_dml_state.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ void CPaxDmlStateLocal::Reset() { cbdb::pax_memory_context = nullptr; }
104104
CPaxDmlStateLocal::CPaxDmlStateLocal()
105105
: last_oid_(InvalidOid), cb_{.func = DmlStateResetCallback, .arg = NULL} {}
106106

107-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
107+
inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
108108
CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
109109
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue> value;
110110

@@ -121,7 +121,7 @@ CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
121121
return value;
122122
}
123123

124-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
124+
inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
125125
CPaxDmlStateLocal::FindDmlState(const Oid &oid) {
126126
Assert(OidIsValid(oid));
127127

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,6 @@ void cbdb::MemoryCtxRegisterResetCallback(MemoryContext context,
124124
CBDB_WRAP_END;
125125
}
126126

127-
Oid cbdb::RelationGetRelationId(Relation rel) {
128-
CBDB_WRAP_START;
129-
{ return RelationGetRelid(rel); }
130-
CBDB_WRAP_END;
131-
}
132-
133127
#ifdef RUN_GTEST
134128
Datum cbdb::DatumFromCString(const char *src, size_t length) {
135129
CBDB_WRAP_START;

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,6 @@ void MemoryCtxDelete(MemoryContext memory_context);
114114
void MemoryCtxRegisterResetCallback(MemoryContext context,
115115
MemoryContextCallback *cb);
116116

117-
Oid RelationGetRelationId(Relation rel);
118-
119117
static inline void *DatumToPointer(Datum d) noexcept {
120118
return DatumGetPointer(d);
121119
}
@@ -164,6 +162,10 @@ static inline float8 DatumToFloat8(Datum d) noexcept {
164162
return DatumGetFloat8(d);
165163
}
166164

165+
static inline Oid RelationGetRelationId(Relation rel) noexcept {
166+
return RelationGetRelid(rel);
167+
}
168+
167169
BpChar *BpcharInput(const char *s, size_t len, int32 atttypmod);
168170
VarChar *VarcharInput(const char *s, size_t len, int32 atttypmod);
169171
text *CstringToText(const char *s, size_t len);

contrib/pax_storage/src/cpp/storage/orc/orc_writer.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,11 @@ std::vector<std::pair<int, Datum>> OrcWriter::PrepareWriteTuple(
361361
// Numeric always need ensure that with the 4B header, otherwise it will
362362
// be converted twice in the vectorization path.
363363
if (required_stats_cols[i] || VARATT_IS_COMPRESSED(tts_value_vl) ||
364-
VARATT_IS_EXTERNAL(tts_value_vl) || attrs->atttypid == NUMERICOID) {
364+
VARATT_IS_EXTERNAL(tts_value_vl)
365+
#ifdef VEC_BUILD
366+
|| attrs->atttypid == NUMERICOID
367+
#endif
368+
) {
365369
// still detoast the origin toast
366370
detoast_vl = cbdb::PgDeToastDatum(tts_value_vl);
367371
Assert(detoast_vl != nullptr);

contrib/pax_storage/src/cpp/storage/pax.cc

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
#include "storage/vec/pax_vec_reader.h"
5050
#endif
5151

52+
#define PAX_SPLIT_CHECK_INTERVAL (16)
53+
5254
namespace paxc {
5355
class IndexUpdaterInternal {
5456
public:
@@ -280,17 +282,23 @@ void TableWriter::Open() {
280282
// insert tuple into the aux table before inserting any tuples.
281283
cbdb::InsertMicroPartitionPlaceHolder(RelationGetRelid(relation_),
282284
current_blockno_);
285+
cur_physical_size_ = 0;
283286
}
284287

285288
void TableWriter::WriteTuple(TupleTableSlot *slot) {
286289
Assert(writer_);
287290
Assert(strategy_);
288-
// should check split strategy before write tuple
289-
// otherwise, may got a empty file in the disk
290-
if (strategy_->ShouldSplit(writer_->PhysicalSize(), num_tuples_)) {
291+
// Sampled split check to reduce PhysicalSize() overhead
292+
// We first perform a sampled pre-write check to avoid empty files.
293+
if ((num_tuples_ % PAX_SPLIT_CHECK_INTERVAL) == 0) {
294+
cur_physical_size_ = writer_->PhysicalSize();
295+
}
296+
297+
if (strategy_->ShouldSplit(cur_physical_size_, num_tuples_)) {
291298
writer_->Close();
292299
writer_ = nullptr;
293300
Open();
301+
cur_physical_size_ = 0;
294302
}
295303

296304
writer_->WriteTuple(slot);

contrib/pax_storage/src/cpp/storage/pax.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class TableWriter {
131131
std::vector<std::tuple<ColumnEncoding_Kind, int>> encoding_opts_;
132132

133133
bool is_dfs_table_space_;
134+
size_t cur_physical_size_ = 0;
134135
};
135136

136137
class TableReader final {

contrib/pax_storage/src/cpp/storage/vec/pax_porc_adpater.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,15 @@ static void CopyDecimalBuffer(PaxColumn *column,
240240
column->GetBuffer(data_index_begin + non_null_offset);
241241

242242
auto vl = (struct varlena *)DatumGetPointer(buffer);
243-
Assert(!(VARATT_IS_EXTERNAL(vl) || VARATT_IS_COMPRESSED(vl) ||
244-
VARATT_IS_SHORT(vl)));
245243
num_len = VARSIZE_ANY_EXHDR(vl);
246-
// direct cast
247-
numeric = (Numeric)(buffer);
244+
// if it has been detoasted in OrcWriter::PrepareWriteTuple, it must be a
245+
// 4 byte header varlena, so we can direct cast
246+
if (unlikely(VARATT_IS_EXTERNAL(vl) || VARATT_IS_COMPRESSED(vl) ||
247+
VARATT_IS_SHORT(vl))) {
248+
numeric = cbdb::DatumToNumeric(PointerGetDatum(buffer));
249+
} else { // direct cast
250+
numeric = (Numeric)(buffer);
251+
}
248252

249253
char *dest_buff = out_data_buffer->GetAvailableBuffer();
250254
Assert(out_data_buffer->Available() >= (size_t)type_len);

0 commit comments

Comments
 (0)