Skip to content

Commit 4938d63

Browse files
authored
[opt](exec) skip result serialization for dry run queries (#63356)
### What problem does this PR solve? Issue Number: N/A Related PR: None Problem Summary: When dry_run_query is enabled, FE only needs the returned row count, but BE still spends most of PhysicalResultSink time serializing MySQL result rows In a local dry-run case against numbers("number"="1000000"), the profile showed AppendBatchTime = 77.689ms, TupleConvertTime = 68.650ms, and ResultSendTime = 2.702us, which means the dry-run path was still paying almost the full result sink conversion cost. This change keeps output expr evaluation intact, but returns early in the MySQL result writers once the output block is produced in dry-run mode. That preserves returned row accounting while skipping result serialization, block copy, and sink enqueue work that dry-run queries never consume.
1 parent d5fb1e5 commit 4938d63

2 files changed

Lines changed: 26 additions & 3 deletions

File tree

be/src/exec/sink/writer/vmysql_result_writer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,12 @@ Status VMysqlResultWriter::write(RuntimeState* state, Block& input_block) {
296296
Block block;
297297
RETURN_IF_ERROR(VExprContext::get_output_block_after_execute_exprs(_output_vexpr_ctxs,
298298
input_block, &block));
299+
300+
if (_is_dry_run) {
301+
_written_rows += cast_set<int64_t>(block.rows());
302+
return Status::OK();
303+
}
304+
299305
const auto total_bytes = block.bytes();
300306

301307
if (total_bytes > config::thrift_max_message_size) [[unlikely]] {

be/test/core/data_type_serde/data_type_serde_mysql_test.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ class TestBlockSerializer final : public MySQLResultBlockBuffer {
7777
public:
7878
TestBlockSerializer(RuntimeState* state) : MySQLResultBlockBuffer(state) {}
7979
~TestBlockSerializer() override = default;
80+
size_t queue_size() {
81+
std::lock_guard<std::mutex> l(_lock);
82+
return _result_batch_queue.size();
83+
}
8084
std::shared_ptr<TFetchDataResult> get_block() {
8185
std::lock_guard<std::mutex> l(_lock);
8286
DCHECK_EQ(_result_batch_queue.size(), 1);
@@ -86,7 +90,7 @@ class TestBlockSerializer final : public MySQLResultBlockBuffer {
8690
}
8791
};
8892

89-
void serialize_and_deserialize_mysql_test() {
93+
void serialize_and_deserialize_mysql_test(bool dry_run) {
9094
Block block;
9195
// create_descriptor_tablet();
9296
std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>> cols {
@@ -317,12 +321,25 @@ void serialize_and_deserialize_mysql_test() {
317321
auto serializer = std::make_shared<TestBlockSerializer>(&state);
318322
VMysqlResultWriter mysql_writer(serializer, _output_vexpr_ctxs, nullptr, false);
319323

320-
Status st = mysql_writer.write(&runtime_stat, block);
324+
TQueryOptions query_options;
325+
query_options.__set_dry_run_query(dry_run);
326+
runtime_stat.set_query_options(query_options);
327+
328+
Status st = mysql_writer.init(&runtime_stat);
321329
EXPECT_TRUE(st.ok());
330+
331+
st = mysql_writer.write(&runtime_stat, block);
332+
EXPECT_TRUE(st.ok());
333+
EXPECT_EQ(mysql_writer.get_written_rows(), row_num);
334+
EXPECT_EQ(serializer->queue_size(), dry_run ? 0 : 1);
322335
}
323336

324337
TEST(DataTypeSerDeMysqlTest, ScalaSerDeTest) {
325-
serialize_and_deserialize_mysql_test();
338+
serialize_and_deserialize_mysql_test(false);
339+
}
340+
341+
TEST(DataTypeSerDeMysqlTest, DryRunSkipsSerialization) {
342+
serialize_and_deserialize_mysql_test(true);
326343
}
327344

328345
} // namespace doris

0 commit comments

Comments
 (0)