Skip to content

Commit 7f0085c

Browse files
committed
Enhancement: simple code optimization
1. Explicit inline functions. 2. Instead of checking the physical size of the file every time a tuple is inserted, it is checked every 16 tuples. performance result ``` create table t1(a int, b int, c int, d int, e text, f text,g text, h text) using pax with(compresstype =zstd,compresslevel=5); gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 6124.535 ms (00:06.125) gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5993.682 ms (00:05.994) -- optimized with this commit create table t1(a int, b int, c int, d int, e text, f text,g text, h text) using pax with(compresstype =zstd,compresslevel=5); gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5713.184 ms (00:05.713) gpadmin=# insert into t1 select i, i+1,i+2,i+3, i::text, i::text, i::text, i::text from generate_series(1,5000000) i; INSERT 0 5000000 Time: 5430.221 ms (00:05.430) ```
1 parent 1d9a1f2 commit 7f0085c

5 files changed

Lines changed: 22 additions & 16 deletions

File tree

contrib/pax_storage/src/cpp/access/pax_dml_state.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ void CPaxDmlStateLocal::Reset() { cbdb::pax_memory_context = nullptr; }
104104
CPaxDmlStateLocal::CPaxDmlStateLocal()
105105
: last_oid_(InvalidOid), cb_{.func = DmlStateResetCallback, .arg = NULL} {}
106106

107-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
107+
inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
108108
CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
109109
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue> value;
110110

@@ -121,7 +121,7 @@ CPaxDmlStateLocal::RemoveDmlState(const Oid &oid) {
121121
return value;
122122
}
123123

124-
std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
124+
inline std::shared_ptr<CPaxDmlStateLocal::DmlStateValue>
125125
CPaxDmlStateLocal::FindDmlState(const Oid &oid) {
126126
Assert(OidIsValid(oid));
127127

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,6 @@ void cbdb::MemoryCtxRegisterResetCallback(MemoryContext context,
124124
CBDB_WRAP_END;
125125
}
126126

127-
Oid cbdb::RelationGetRelationId(Relation rel) {
128-
CBDB_WRAP_START;
129-
{ return RelationGetRelid(rel); }
130-
CBDB_WRAP_END;
131-
}
132-
133127
#ifdef RUN_GTEST
134128
Datum cbdb::DatumFromCString(const char *src, size_t length) {
135129
CBDB_WRAP_START;

contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,6 @@ void MemoryCtxDelete(MemoryContext memory_context);
114114
void MemoryCtxRegisterResetCallback(MemoryContext context,
115115
MemoryContextCallback *cb);
116116

117-
Oid RelationGetRelationId(Relation rel);
118-
119117
static inline void *DatumToPointer(Datum d) noexcept {
120118
return DatumGetPointer(d);
121119
}
@@ -164,6 +162,10 @@ static inline float8 DatumToFloat8(Datum d) noexcept {
164162
return DatumGetFloat8(d);
165163
}
166164

165+
static inline Oid RelationGetRelationId(Relation rel) noexcept {
166+
return RelationGetRelid(rel);
167+
}
168+
167169
BpChar *BpcharInput(const char *s, size_t len, int32 atttypmod);
168170
VarChar *VarcharInput(const char *s, size_t len, int32 atttypmod);
169171
text *CstringToText(const char *s, size_t len);

contrib/pax_storage/src/cpp/storage/pax.cc

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include "storage/pax.h"
2929

30+
#include <algorithm>
3031
#include <map>
3132
#include <utility>
3233

@@ -280,17 +281,23 @@ void TableWriter::Open() {
280281
// insert tuple into the aux table before inserting any tuples.
281282
cbdb::InsertMicroPartitionPlaceHolder(RelationGetRelid(relation_),
282283
current_blockno_);
284+
split_check_interval_ = 16;
285+
cur_physical_size_ = 0;
283286
}
284287

285288
void TableWriter::WriteTuple(TupleTableSlot *slot) {
286289
Assert(writer_);
287290
Assert(strategy_);
288-
// should check split strategy before write tuple
289-
// otherwise, may got a empty file in the disk
290-
if (strategy_->ShouldSplit(writer_->PhysicalSize(), num_tuples_)) {
291-
writer_->Close();
292-
writer_ = nullptr;
293-
Open();
291+
// Sampled split check to reduce PhysicalSize() overhead
292+
// We first perform a sampled pre-write check to avoid empty files.
293+
if ((num_tuples_ % split_check_interval_) == 0) {
294+
cur_physical_size_ = writer_->PhysicalSize();
295+
if (strategy_->ShouldSplit(cur_physical_size_, num_tuples_)) {
296+
writer_->Close();
297+
writer_ = nullptr;
298+
Open();
299+
cur_physical_size_ = 0;
300+
}
294301
}
295302

296303
writer_->WriteTuple(slot);

contrib/pax_storage/src/cpp/storage/pax.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ class TableWriter {
131131
std::vector<std::tuple<ColumnEncoding_Kind, int>> encoding_opts_;
132132

133133
bool is_dfs_table_space_;
134+
135+
uint32_t split_check_interval_ = 16;
136+
size_t cur_physical_size_ = 0;
134137
};
135138

136139
class TableReader final {

0 commit comments

Comments
 (0)