Skip to content

Commit 42650f4

Browse files
committed
Add punch hole GC
Signed-off-by: v01dstar <yang.zhang@pingcap.com>
1 parent fe7cfbb commit 42650f4

29 files changed

Lines changed: 940 additions & 49 deletions

include/titan/db.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ class TitanDB : public StackableDB {
201201
// "rocksdb.titandb.discardable_ratio_le100_file_num" - returns count of
202202
// file whose discardable ratio is less or equal to 100%.
203203
static const std::string kNumDiscardableRatioLE100File;
204+
// "rockdb.titandb.kNumHolePunchableBlobSize" - returns the size of hole
205+
// punchable blobs (no longer referenced in SSTs) in the database.
206+
static const std::string kHolePunchableBlobSize;
204207
};
205208

206209
bool GetProperty(ColumnFamilyHandle* column_family, const Slice& property,

include/titan/options.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ struct TitanCFOptions : public ColumnFamilyOptions {
173173
// data's 0s and 0s created by punch holes).
174174
uint64_t block_size{4096};
175175
bool enable_punch_hole_gc{false};
176+
uint64_t punch_hole_threshold{4 * 1024 * 1024};
176177

177178
TitanCFOptions() = default;
178179
explicit TitanCFOptions(const ColumnFamilyOptions& options)
@@ -230,12 +231,14 @@ struct MutableTitanCFOptions {
230231
: blob_run_mode(opts.blob_run_mode),
231232
min_blob_size(opts.min_blob_size),
232233
blob_file_compression(opts.blob_file_compression),
233-
blob_file_discardable_ratio(opts.blob_file_discardable_ratio) {}
234+
blob_file_discardable_ratio(opts.blob_file_discardable_ratio),
235+
punch_hole_threshold(opts.punch_hole_threshold) {}
234236

235237
TitanBlobRunMode blob_run_mode;
236238
uint64_t min_blob_size;
237239
CompressionType blob_file_compression;
238240
double blob_file_discardable_ratio;
241+
uint64_t punch_hole_threshold;
239242
};
240243

241244
struct TitanOptions : public TitanDBOptions, public TitanCFOptions {

src/blob_file_builder.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,12 @@ void BlobFileBuilder::FlushSampleRecords(OutContexts* out_ctx) {
164164
void BlobFileBuilder::WriteEncoderData(BlobHandle* handle) {
165165
handle->offset = file_->GetFileSize();
166166
handle->size = encoder_.GetEncodedSize();
167-
live_data_size_ += handle->size;
167+
if (block_size_ > 0) {
168+
live_data_size_ +=
169+
(handle->size + block_size_ - 1) / block_size_ * block_size_;
170+
} else {
171+
live_data_size_ += handle->size;
172+
}
168173

169174
status_ = file_->Append(encoder_.GetHeader());
170175
if (ok()) {

src/blob_file_builder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ class BlobFileBuilder {
108108
const std::string& GetSmallestKey() { return smallest_key_; }
109109
const std::string& GetLargestKey() { return largest_key_; }
110110

111+
uint64_t GetBlockSize() { return block_size_; }
112+
111113
uint64_t live_data_size() const { return live_data_size_; }
112114

113115
private:

src/blob_file_manager.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,15 @@ class BlobFileManager {
7272
(void)handles;
7373
return Status::OK();
7474
}
75+
76+
// Updates the metadata of the file. This is used to update the
77+
// metadata of the file after the file is punched with holes.
78+
virtual Status BatchUpdateFiles(
79+
uint32_t cf_id, const std::vector<std::shared_ptr<BlobFileMeta>>& files) {
80+
(void)cf_id;
81+
(void)files;
82+
return Status::OK();
83+
}
7584
};
7685

7786
} // namespace titandb

src/blob_file_set.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,22 @@ class BlobFileSet {
9898

9999
bool IsOpened() { return opened_.load(std::memory_order_acquire); }
100100

101+
uint64_t GetBlockSize(uint32_t cf_id) {
102+
MutexLock l(mutex_);
103+
auto storage = GetBlobStorage(cf_id).lock();
104+
if (storage != nullptr && storage->cf_options().enable_punch_hole_gc) {
105+
return storage->cf_options().block_size;
106+
}
107+
return 0;
108+
}
109+
110+
std::unordered_map<uint64_t, uint64_t> GetFileBlockSizes(uint32_t cf_id) {
111+
MutexLock l(mutex_);
112+
auto storage = GetBlobStorage(cf_id).lock();
113+
return storage ? storage->GetFileBlockSizes()
114+
: std::unordered_map<uint64_t, uint64_t>();
115+
}
116+
101117
private:
102118
struct ManifestWriter;
103119

src/blob_file_size_collector.cc

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,13 @@ namespace titandb {
77

88
TablePropertiesCollector*
99
BlobFileSizeCollectorFactory::CreateTablePropertiesCollector(
10-
rocksdb::TablePropertiesCollectorFactory::Context /* context */) {
11-
return new BlobFileSizeCollector();
10+
rocksdb::TablePropertiesCollectorFactory::Context context) {
11+
if (blob_file_set_ != nullptr) {
12+
return new BlobFileSizeCollector(
13+
blob_file_set_->GetBlockSize(context.column_family_id),
14+
blob_file_set_->GetFileBlockSizes(context.column_family_id));
15+
}
16+
return new BlobFileSizeCollector(0, {});
1217
}
1318

1419
const std::string BlobFileSizeCollector::kPropertiesName =
@@ -57,11 +62,32 @@ Status BlobFileSizeCollector::AddUserKey(const Slice& /* key */,
5762
return s;
5863
}
5964

65+
auto size = index.blob_handle.size;
66+
if (default_block_size_ > 0 && !file_block_sizes_.empty()) {
67+
// If the blob file cannot be found in the block size map, it must be a
68+
// newly created file that has not been added blob_file_set, in this case,
69+
// we know the block size of the file is default_block_size_.
70+
// If the blob file can be found in the block size map, it implies we are
71+
// moving the reference only, while keeping the blob at the original file,
72+
// in this case, we should use the block size of the original file.
73+
uint64_t block_size = default_block_size_;
74+
if (!file_block_sizes_.empty()) {
75+
auto iter = file_block_sizes_.find(index.file_number);
76+
if (iter != file_block_sizes_.end()) {
77+
block_size = iter->second;
78+
}
79+
}
80+
if (block_size > 0) {
81+
// Align blob size with block size.
82+
size = (size + block_size - 1) / block_size * block_size;
83+
}
84+
}
85+
6086
auto iter = blob_files_size_.find(index.file_number);
6187
if (iter == blob_files_size_.end()) {
62-
blob_files_size_[index.file_number] = index.blob_handle.size;
88+
blob_files_size_[index.file_number] = size;
6389
} else {
64-
iter->second += index.blob_handle.size;
90+
iter->second += size;
6591
}
6692

6793
return Status::OK();

src/blob_file_size_collector.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,31 @@ namespace titandb {
1313
class BlobFileSizeCollectorFactory final
1414
: public TablePropertiesCollectorFactory {
1515
public:
16+
// If punch_hole_gc is enabled, then blob_file_set must be provided.
17+
// If blob_file_set is not provided, then punch_hole_gc will be considered
18+
// disabled, blob size will not align with block size.
19+
BlobFileSizeCollectorFactory(BlobFileSet* blob_file_set = nullptr)
20+
: blob_file_set_(blob_file_set) {}
21+
BlobFileSizeCollectorFactory(const BlobFileSizeCollectorFactory&) = delete;
22+
void operator=(const BlobFileSizeCollectorFactory&) = delete;
1623
TablePropertiesCollector* CreateTablePropertiesCollector(
1724
TablePropertiesCollectorFactory::Context context) override;
1825

1926
const char* Name() const override { return "BlobFileSizeCollector"; }
27+
28+
private:
29+
BlobFileSet* blob_file_set_;
2030
};
2131

2232
class BlobFileSizeCollector final : public TablePropertiesCollector {
2333
public:
2434
const static std::string kPropertiesName;
2535

36+
BlobFileSizeCollector(uint64_t default_block_size,
37+
std::unordered_map<uint64_t, uint64_t> file_block_sizes)
38+
: default_block_size_(default_block_size),
39+
file_block_sizes_(file_block_sizes) {}
40+
2641
static bool Encode(const std::map<uint64_t, uint64_t>& blob_files_size,
2742
std::string* result);
2843
static bool Decode(Slice* slice,
@@ -38,6 +53,8 @@ class BlobFileSizeCollector final : public TablePropertiesCollector {
3853

3954
private:
4055
std::map<uint64_t, uint64_t> blob_files_size_;
56+
uint64_t default_block_size_;
57+
std::unordered_map<uint64_t, uint64_t> file_block_sizes_;
4158
};
4259

4360
} // namespace titandb

src/blob_format.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ void BlobFileMeta::EncodeTo(std::string* dst) const {
144144
PutVarint64(dst, block_size_);
145145
PutLengthPrefixedSlice(dst, smallest_key_);
146146
PutLengthPrefixedSlice(dst, largest_key_);
147+
PutVarint64(dst, effective_file_size_);
147148
}
148149

149150
Status BlobFileMeta::DecodeFromV1(Slice* src) {
@@ -191,6 +192,12 @@ Status BlobFileMeta::DecodeFrom(Slice* src) {
191192
} else {
192193
return Status::Corruption("BlobLargestKey decode failed");
193194
}
195+
uint64_t effective_file_size;
196+
if (!GetVarint64(src, &effective_file_size)) {
197+
return Status::Corruption(
198+
"BlobFileMeta hole_punchable_size_ decode failed");
199+
}
200+
effective_file_size_ = effective_file_size;
194201
return Status::OK();
195202
}
196203

src/blob_format.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,14 @@ class BlobFileMeta {
248248
uint64_t file_size() const { return file_size_; }
249249
uint64_t live_data_size() const { return live_data_size_; }
250250
uint32_t file_level() const { return file_level_; }
251+
uint64_t block_size() const { return block_size_; }
251252
const std::string& smallest_key() const { return smallest_key_; }
252253
const std::string& largest_key() const { return largest_key_; }
254+
int64_t effective_file_size() const { return effective_file_size_; }
253255

254256
void set_live_data_size(int64_t size) { live_data_size_ = size; }
257+
// This should be called with db mutex held.
258+
void set_effective_file_size(int64_t size) { effective_file_size_ = size; }
255259
uint64_t file_entries() const { return file_entries_; }
256260
FileState file_state() const { return state_; }
257261
bool is_obsolete() const { return state_ == FileState::kObsolete; }
@@ -275,6 +279,10 @@ class BlobFileMeta {
275279
(file_size_ - kBlobMaxHeaderSize - kBlobFooterSize));
276280
}
277281
TitanInternalStats::StatsType GetDiscardableRatioLevel() const;
282+
// This should be called with db mutex held.
283+
uint64_t GetHolePunchableSize() const {
284+
return effective_file_size_ - live_data_size_;
285+
}
278286
void Dump(bool with_keys) const;
279287

280288
private:
@@ -291,6 +299,18 @@ class BlobFileMeta {
291299
std::string smallest_key_;
292300
std::string largest_key_;
293301

302+
// The effective size of current file. This is different from `file_size_`, as
303+
// `file_size_` is the original size of the file, and does not consider space
304+
// reclaimed by punch hole GC.
305+
// We can't use file system's `st_blocks` to get the logical size, because
306+
// the file system's block size may be different from Titan's block size.
307+
// This is used to calculate the size of the punchable hole. i.e.
308+
// effective_file_size_ - live_data_size_.
309+
// This might be bigger than the actual size of the file, when Titan crashes
310+
// before updating the `effective_file_size_` during punch hole GC. This is
311+
// fine, as it will be corrected when the file is chose for GC next time.
312+
int64_t effective_file_size_{0};
313+
294314
// Not persistent field
295315

296316
// Size of data with reference from SST files.
@@ -303,7 +323,11 @@ class BlobFileMeta {
303323
// So when state_ == kPendingLSM, it uses this to record the delta as a
304324
// positive number if any later compaction is trigger before previous
305325
// `OnCompactionCompleted()` is called.
326+
// The size is aligned with block size, when punch hole GC is enabled.
306327
std::atomic<int64_t> live_data_size_{0};
328+
// This is different from `file_size_`, as `file_size_` is the original size
329+
// of the file, and does not consider space reclaimed by punch hole GC.
330+
std::atomic<int64_t> disk_usage_{0};
307331
std::atomic<FileState> state_{FileState::kNone};
308332
};
309333

0 commit comments

Comments
 (0)