Skip to content

Commit b640914

Browse files
authored
[opt](memory) release packed file writer buffer after flush (#63967)
### What problem does this PR solve? PackedFileWriter buffers data for files smaller than small_file_threshold_bytes before deciding whether to pack them into a packed file or switch to direct write. The buffered data is stored in a std::string. After the buffered data is flushed to the inner writer or submitted to PackedFileManager, the old code only called clear(), which resets size but keeps capacity. When segment file writers are still retained by upper-level rowset structures after close, this retained capacity can keep a large amount of memory alive and show up under PackedFileWriter::appendv in memory profiling: <img width="800" height="1180" alt="image" src="https://github.com/user-attachments/assets/7e0e2c40-c35b-4bfc-b45b-aeed31c29771" /> This change reserves the final append size before buffering to reduce repeated std::string growth, and releases the buffer capacity after the data has been flushed or submitted.
1 parent 7c9c366 commit b640914

3 files changed

Lines changed: 32 additions & 2 deletions

File tree

be/src/io/fs/packed_file_writer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ Status PackedFileWriter::appendv(const Slice* data, size_t data_cnt) {
8080
if (_is_direct_write) {
8181
RETURN_IF_ERROR(_inner_writer->appendv(data, data_cnt));
8282
} else {
83+
_buffer.reserve(_bytes_appended + total_size);
8384
// Buffer small file data
8485
for (size_t i = 0; i < data_cnt; ++i) {
8586
_buffer.append(data[i].data, data[i].size);
@@ -181,14 +182,18 @@ Status PackedFileWriter::_wait_packed_upload() {
181182
return Status::OK();
182183
}
183184

185+
void PackedFileWriter::_release_buffer() {
186+
std::string().swap(_buffer);
187+
}
188+
184189
Status PackedFileWriter::_switch_to_direct_write() {
185190
DCHECK(!_is_direct_write);
186191

187192
// If we have buffered data, write it to inner writer first
188193
if (_buffer.size() > 0) {
189194
Slice buffer_slice(_buffer.data(), _buffer.size());
190195
RETURN_IF_ERROR(_inner_writer->appendv(&buffer_slice, 1));
191-
_buffer.clear();
196+
_release_buffer();
192197
}
193198

194199
return Status::OK();
@@ -213,7 +218,7 @@ Status PackedFileWriter::_send_to_packed_manager() {
213218

214219
Slice data_slice(_buffer.data(), _buffer.size());
215220
RETURN_IF_ERROR(_packed_file_manager->append_small_file(_file_path, data_slice, _append_info));
216-
_buffer.clear();
221+
_release_buffer();
217222
return Status::OK();
218223
}
219224

be/src/io/fs/packed_file_writer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,13 @@ class PackedFileWriter final : public FileWriter {
6060
// Returns true if this file's data was written to a packed file (not direct write)
6161
bool is_in_packed_file() const override { return !_is_direct_write; }
6262

63+
#ifdef BE_TEST
64+
size_t buffer_capacity_for_test() const { return _buffer.capacity(); }
65+
#endif
66+
6367
private:
68+
void _release_buffer();
69+
6470
// Async close: submit data without waiting
6571
Status _close_async();
6672

be/test/io/fs/packed_file_writer_test.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,25 @@ TEST_F(PackedFileWriterTest, SwitchToDirectWrite) {
172172
EXPECT_GT(inner_writer_ptr->append_calls(), 0);
173173
}
174174

175+
TEST_F(PackedFileWriterTest, SwitchToDirectWriteReleasesBufferedMemory) {
176+
Path file_path("switch_file_release_buffer");
177+
auto* inner_writer_ptr = _inner_writer.get();
178+
PackedFileWriter writer(std::move(_inner_writer), file_path, _append_info);
179+
180+
const size_t default_capacity = std::string().capacity();
181+
std::string small_data(80, 'a');
182+
Slice small_slice(small_data);
183+
ASSERT_TRUE(writer.appendv(&small_slice, 1).ok());
184+
EXPECT_GT(writer.buffer_capacity_for_test(), default_capacity);
185+
186+
std::string large_data(30, 'b');
187+
Slice large_slice(large_data);
188+
ASSERT_TRUE(writer.appendv(&large_slice, 1).ok());
189+
190+
EXPECT_GT(inner_writer_ptr->append_calls(), 0);
191+
EXPECT_LE(writer.buffer_capacity_for_test(), default_capacity);
192+
}
193+
175194
TEST_F(PackedFileWriterTest, CloseAsync) {
176195
Path file_path("async_file");
177196
PackedFileWriter writer(std::move(_inner_writer), file_path, _append_info);

0 commit comments

Comments
 (0)