Skip to content

Commit e84ee46

Browse files
authored
Raft: Fix coprocessor ignore new added RegionReadStatus thus cause inconsistent result (#10543)
close #10510, ref #10540 * Handling new added RegionReadStatus enum values for error response of coprocessor * Do not set the `default` branch for switch (e.status) so that the compiler will throw an error if there is not covered new enum values Signed-off-by: JaySon-Huang <tshent@qq.com>
1 parent c87c3bf commit e84ee46

6 files changed

Lines changed: 100 additions & 26 deletions

File tree

dbms/src/Flash/CoprocessorHandler.cpp

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -249,31 +249,8 @@ grpc::Status CoprocessorHandler<is_stream>::execute()
249249
response = cop_response;
250250
}
251251

252-
errorpb::Error * region_err;
253-
switch (e.status)
254-
{
255-
case RegionException::RegionReadStatus::OTHER:
256-
case RegionException::RegionReadStatus::BUCKET_EPOCH_NOT_MATCH:
257-
case RegionException::RegionReadStatus::FLASHBACK:
258-
case RegionException::RegionReadStatus::KEY_NOT_IN_REGION:
259-
case RegionException::RegionReadStatus::TIKV_SERVER_ISSUE:
260-
case RegionException::RegionReadStatus::READ_INDEX_TIMEOUT:
261-
case RegionException::RegionReadStatus::NOT_LEADER:
262-
case RegionException::RegionReadStatus::NOT_FOUND_TIKV:
263-
case RegionException::RegionReadStatus::NOT_FOUND:
264-
GET_METRIC(tiflash_coprocessor_request_error, reason_region_not_found).Increment();
265-
region_err = response->mutable_region_error();
266-
region_err->mutable_region_not_found()->set_region_id(cop_request->context().region_id());
267-
break;
268-
case RegionException::RegionReadStatus::EPOCH_NOT_MATCH:
269-
GET_METRIC(tiflash_coprocessor_request_error, reason_epoch_not_match).Increment();
270-
region_err = response->mutable_region_error();
271-
region_err->mutable_epoch_not_match();
272-
break;
273-
default:
274-
// should not happen
275-
break;
276-
}
252+
setResponseByRegionException(response, e, cop_request->context().region_id());
253+
277254
if constexpr (is_stream)
278255
{
279256
cop_writer->Write(stream_response);

dbms/src/Storages/KVStore/MultiRaft/RegionExecutionResult.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
#pragma once
1616

17-
#include <Storages/KVStore/Read/RegionException.h>
1817
#include <Storages/KVStore/Region_fwd.h>
1918
#include <Storages/KVStore/Types.h>
2019

dbms/src/Storages/KVStore/Read/LearnerReadWorker.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ void LearnerReadWorker::recordReadIndexError(
263263
const LearnerReadSnapshot & regions_snapshot,
264264
RegionsReadIndexResult & read_index_result)
265265
{
266+
// Now TiFlash set `unavailable_regions` by `read_index_result`, then `unavailable_regions` will check
267+
// and throw `LockException` or `RegionException` in `tryThrowRegionException`. Then `CoprocessorHandler`
268+
// or other similar logic handle the exceptions and retry the read request.
269+
266270
// if size of batch_read_index_result is not equal with batch_read_index_req, there must be region_error/lock, find and return directly.
267271
for (auto & [region_id, resp] : read_index_result)
268272
{
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright 2025 PingCAP, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include <Common/TiFlashMetrics.h>
16+
#include <Storages/KVStore/Read/RegionException.h>
17+
#include <kvproto/coprocessor.pb.h>
18+
#include <kvproto/errorpb.pb.h>
19+
20+
namespace DB
21+
{
22+
void setResponseByRegionException(coprocessor::Response * response, const RegionException & e, RegionID region_id)
23+
{
24+
assert(response != nullptr);
25+
26+
errorpb::Error * region_err;
27+
switch (e.status)
28+
{
29+
case RegionException::RegionReadStatus::OK:
30+
case RegionException::RegionReadStatus::MEET_LOCK:
31+
// Should not happen RegionException with RegionReadStatus::OK status
32+
// And RegionReadStatus::MEET_LOCK should be handled in LockException
33+
region_err = response->mutable_region_error();
34+
region_err->set_message(
35+
fmt::format("Unexpected RegionException with status {}", magic_enum::enum_name(e.status)));
36+
region_err->mutable_region_not_found()->set_region_id(region_id);
37+
break;
38+
case RegionException::RegionReadStatus::OTHER:
39+
case RegionException::RegionReadStatus::BUCKET_EPOCH_NOT_MATCH:
40+
case RegionException::RegionReadStatus::FLASHBACK:
41+
case RegionException::RegionReadStatus::KEY_NOT_IN_REGION:
42+
case RegionException::RegionReadStatus::TIKV_SERVER_ISSUE:
43+
case RegionException::RegionReadStatus::READ_INDEX_TIMEOUT:
44+
case RegionException::RegionReadStatus::NOT_LEADER:
45+
case RegionException::RegionReadStatus::NOT_FOUND_TIKV:
46+
case RegionException::RegionReadStatus::NOT_FOUND:
47+
case RegionException::RegionReadStatus::STALE_COMMAND:
48+
case RegionException::RegionReadStatus::STORE_NOT_MATCH:
49+
GET_METRIC(tiflash_coprocessor_request_error, reason_region_not_found).Increment();
50+
region_err = response->mutable_region_error();
51+
region_err->mutable_region_not_found()->set_region_id(region_id);
52+
break;
53+
case RegionException::RegionReadStatus::EPOCH_NOT_MATCH:
54+
GET_METRIC(tiflash_coprocessor_request_error, reason_epoch_not_match).Increment();
55+
region_err = response->mutable_region_error();
56+
region_err->mutable_epoch_not_match();
57+
// TODO: set current_regions when mutable epoch not match is hit
58+
break;
59+
// Notice: We need to handle all enum cases here to ensure correctly retrying.
60+
// Do NOT add a default case here.
61+
// default:
62+
// break;
63+
}
64+
}
65+
} // namespace DB

dbms/src/Storages/KVStore/Read/RegionException.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919

2020
#include <magic_enum.hpp>
2121

22+
namespace coprocessor
23+
{
24+
class Response;
25+
}
26+
2227
namespace DB
2328
{
2429

@@ -59,4 +64,6 @@ class RegionException : public Exception
5964
RegionReadStatus status;
6065
};
6166

67+
void setResponseByRegionException(coprocessor::Response * response, const RegionException & e, RegionID region_id);
68+
6269
} // namespace DB

dbms/src/Storages/KVStore/tests/gtest_learner_read.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,16 @@
1717
#include <Interpreters/Context.h>
1818
#include <Storages/KVStore/KVStore.h>
1919
#include <Storages/KVStore/Read/LearnerReadWorker.h>
20+
#include <Storages/KVStore/Read/RegionException.h>
2021
#include <Storages/KVStore/Region.h>
2122
#include <Storages/KVStore/Types.h>
2223
#include <Storages/PathPool.h>
2324
#include <Storages/RegionQueryInfo.h>
2425
#include <TestUtils/TiFlashTestBasic.h>
2526
#include <kvproto/kvrpcpb.pb.h>
2627

28+
#include <magic_enum.hpp>
29+
2730
namespace DB::tests
2831
{
2932
class LearnerReadTest : public ::testing::Test
@@ -261,4 +264,23 @@ try
261264
}
262265
CATCH
263266

267+
TEST_F(LearnerReadTest, SetRespByRegionException)
268+
{
269+
RegionID reg_id = 12345;
270+
RegionException::UnavailableRegions regs{reg_id};
271+
272+
size_t num_enum_values = 0;
273+
for (auto s : magic_enum::enum_values<RegionException::RegionReadStatus>())
274+
{
275+
auto tmp_regs = regs;
276+
RegionException e(std::move(tmp_regs), s, "test");
277+
coprocessor::Response response;
278+
setResponseByRegionException(&response, e, reg_id);
279+
// response must have region_error set
280+
ASSERT_TRUE(response.has_region_error());
281+
num_enum_values++;
282+
}
283+
LOG_INFO(Logger::get(), "SetRespByRegionException test passed with {} enum values.", num_enum_values);
284+
}
285+
264286
} // namespace DB::tests

0 commit comments

Comments
 (0)