Skip to content

Commit 8718153

Browse files
fix tiflash resource control low token signal miss (#10140) (#10195)
close #10137 Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io> Signed-off-by: guo-shaoge <shaoge1994@163.com> Co-authored-by: guo-shaoge <shaoge1994@163.com>
1 parent bbda147 commit 8718153

9 files changed

Lines changed: 907 additions & 648 deletions

File tree

dbms/src/Common/TiFlashMetrics.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,9 @@ namespace DB
737737
F(type_remaining_tokens, {"type", "remaining_tokens"}), \
738738
F(type_avg_speed, {"type", "avg_speed"}), \
739739
F(type_total_consumption, {"type", "total_consumption"}), \
740+
F(type_low_token_threshold, {"type", "low_token_threshold"}), \
741+
F(type_request_gac_count, {"type", "request_gac_count"}), \
742+
F(type_enter_degrade_mode, {"type", "enter_degrade_mode"}), \
740743
F(type_bucket_fill_rate, {"type", "bucket_fill_rate"}), \
741744
F(type_bucket_capacity, {"type", "bucket_capacity"}), \
742745
F(type_compute_ru_consumption, {"type", "compute_ru_consumption"}), \

dbms/src/Flash/Pipeline/Schedule/TaskQueues/ResourceControlQueue.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,22 +114,12 @@ bool ResourceControlQueue<NestedTaskQueueType>::take(TaskPtr & task)
114114
if unlikely (updateResourceGroupInfosWithoutLock())
115115
continue;
116116

117-
UInt64 wait_dura = LocalAdmissionController::DEFAULT_FETCH_GAC_INTERVAL_MS;
117+
UInt64 wait_dura = LocalAdmissionController::DEFAULT_MAX_EST_WAIT_DURATION.count();
118118
if (!resource_group_infos.empty())
119119
{
120120
const ResourceGroupInfo & group_info = resource_group_infos.top();
121121
const bool ru_exhausted = LocalAdmissionController::isRUExhausted(group_info.priority);
122122

123-
LOG_TRACE(
124-
logger,
125-
"trying to schedule task of resource group {}, priority: {}, ru exhausted: {}, is_finished: {}, "
126-
"task_queue.empty(): {}",
127-
group_info.name,
128-
group_info.priority,
129-
ru_exhausted,
130-
is_finished,
131-
group_info.task_queue->empty());
132-
133123
// When highest priority of resource group is less than zero, means RU of all resource groups are exhausted.
134124
// Should not take any task from nested task queue for this situation.
135125
if (!ru_exhausted)
@@ -143,8 +133,9 @@ bool ResourceControlQueue<NestedTaskQueueType>::take(TaskPtr & task)
143133
assert(!task);
144134
// Wakeup when:
145135
// 1. finish() is called.
146-
// 2. refill_token_callback is called by LAC.
147-
// 3. token refilled in trickle mode.
136+
// 2. new task submit.
137+
// 3. LAC got resp from GAC or estWaitDura timeout.
138+
// so wait_dura is used to avoid stuck.
148139
cv.wait_for(lock, std::chrono::milliseconds(wait_dura));
149140
}
150141
}
@@ -158,7 +149,6 @@ void ResourceControlQueue<NestedTaskQueueType>::updateStatistics(
158149
assert(task);
159150
auto ru = cpuTimeToRU(inc_value);
160151
const String & resource_group_name = task->getResourceGroupName();
161-
LOG_TRACE(logger, "resource group {} will consume {} RU(or {} cpu time in ns)", resource_group_name, ru, inc_value);
162152
LocalAdmissionController::global_instance->consumeCPUResource(resource_group_name, ru, inc_value);
163153

164154
NestedTaskQueuePtr group_queue = nullptr;

dbms/src/Flash/Pipeline/Schedule/TaskQueues/tests/gtest_resource_control_queue.cpp

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ std::shared_ptr<ResourceGroup> createResourceGroupOfDynamicTokenBucket(
8181
TokenBucket::TokenBucketConfig config(
8282
user_ru_per_sec,
8383
user_ru_per_sec,
84+
static_cast<double>(std::numeric_limits<uint64_t>::max()),
8485
static_cast<double>(std::numeric_limits<uint64_t>::max()));
85-
resource_group->bucket->reConfig(config);
86+
resource_group->bucket->reConfig(config, std::chrono::steady_clock::now());
8687
return resource_group;
8788
}
8889

@@ -190,25 +191,21 @@ class TestResourceControlQueue : public ::testing::Test
190191
resource_groups = std::vector<ResourceGroupPtr>{
191192
createResourceGroupOfStaticTokenBucket(
192193
"rg1",
193-
ResourceGroup::MediumPriorityValue,
194+
ResourceGroup::UserMediumPriority,
194195
rg1_ru_per_sec,
195196
rg1_burstable),
196-
createResourceGroupOfStaticTokenBucket(
197-
"rg2",
198-
ResourceGroup::MediumPriorityValue,
199-
rg2_ru_per_sec,
200-
false),
197+
createResourceGroupOfStaticTokenBucket("rg2", ResourceGroup::UserMediumPriority, rg2_ru_per_sec, false),
201198
};
202199
else
203200
resource_groups = std::vector<ResourceGroupPtr>{
204201
createResourceGroupOfDynamicTokenBucket(
205202
"rg1",
206-
ResourceGroup::MediumPriorityValue,
203+
ResourceGroup::UserMediumPriority,
207204
rg1_ru_per_sec,
208205
rg1_burstable),
209206
createResourceGroupOfDynamicTokenBucket(
210207
"rg2",
211-
ResourceGroup::MediumPriorityValue,
208+
ResourceGroup::UserMediumPriority,
212209
rg2_ru_per_sec,
213210
false),
214211
};
@@ -247,15 +244,15 @@ class TestResourceControlQueue : public ::testing::Test
247244
std::vector<ResourceGroupPtr> resource_groups;
248245
if (static_token_bucket)
249246
resource_groups = std::vector<ResourceGroupPtr>{
250-
createResourceGroupOfStaticTokenBucket("rg-ru20", ResourceGroup::MediumPriorityValue, 20, false),
251-
createResourceGroupOfStaticTokenBucket("rg-ru100", ResourceGroup::MediumPriorityValue, 100, false),
252-
createResourceGroupOfStaticTokenBucket("rg-ru200", ResourceGroup::MediumPriorityValue, 200, false),
247+
createResourceGroupOfStaticTokenBucket("rg-ru20", ResourceGroup::UserMediumPriority, 20, false),
248+
createResourceGroupOfStaticTokenBucket("rg-ru100", ResourceGroup::UserMediumPriority, 100, false),
249+
createResourceGroupOfStaticTokenBucket("rg-ru200", ResourceGroup::UserMediumPriority, 200, false),
253250
};
254251
else
255252
resource_groups = std::vector<ResourceGroupPtr>{
256-
createResourceGroupOfDynamicTokenBucket("rg-ru20", ResourceGroup::MediumPriorityValue, 20, false),
257-
createResourceGroupOfDynamicTokenBucket("rg-ru100", ResourceGroup::MediumPriorityValue, 100, false),
258-
createResourceGroupOfDynamicTokenBucket("rg-ru200", ResourceGroup::MediumPriorityValue, 200, false),
253+
createResourceGroupOfDynamicTokenBucket("rg-ru20", ResourceGroup::UserMediumPriority, 20, false),
254+
createResourceGroupOfDynamicTokenBucket("rg-ru100", ResourceGroup::UserMediumPriority, 100, false),
255+
createResourceGroupOfDynamicTokenBucket("rg-ru200", ResourceGroup::UserMediumPriority, 200, false),
259256
};
260257

261258
setupMockLAC(resource_groups);
@@ -304,15 +301,15 @@ class TestResourceControlQueue : public ::testing::Test
304301
// RU proportion is 1:5:10.
305302
if (static_token_bucket)
306303
resource_groups = std::vector<ResourceGroupPtr>{
307-
createResourceGroupOfStaticTokenBucket("rg-ru20K", ResourceGroup::MediumPriorityValue, 20000, false),
308-
createResourceGroupOfStaticTokenBucket("rg-ru100K", ResourceGroup::MediumPriorityValue, 100000, false),
309-
createResourceGroupOfStaticTokenBucket("rg-ru200K", ResourceGroup::MediumPriorityValue, 200000, false),
304+
createResourceGroupOfStaticTokenBucket("rg-ru20K", ResourceGroup::UserMediumPriority, 20000, false),
305+
createResourceGroupOfStaticTokenBucket("rg-ru100K", ResourceGroup::UserMediumPriority, 100000, false),
306+
createResourceGroupOfStaticTokenBucket("rg-ru200K", ResourceGroup::UserMediumPriority, 200000, false),
310307
};
311308
else
312309
resource_groups = std::vector<ResourceGroupPtr>{
313-
createResourceGroupOfDynamicTokenBucket("rg-ru20K", ResourceGroup::MediumPriorityValue, 20000, false),
314-
createResourceGroupOfDynamicTokenBucket("rg-ru100K", ResourceGroup::MediumPriorityValue, 100000, false),
315-
createResourceGroupOfDynamicTokenBucket("rg-ru200K", ResourceGroup::MediumPriorityValue, 200000, false),
310+
createResourceGroupOfDynamicTokenBucket("rg-ru20K", ResourceGroup::UserMediumPriority, 20000, false),
311+
createResourceGroupOfDynamicTokenBucket("rg-ru100K", ResourceGroup::UserMediumPriority, 100000, false),
312+
createResourceGroupOfDynamicTokenBucket("rg-ru200K", ResourceGroup::UserMediumPriority, 200000, false),
316313
};
317314

318315
setupMockLAC(resource_groups);
@@ -433,7 +430,7 @@ TEST_F(TestResourceControlQueue, RunOutOfRU)
433430
// 1. When RU is exhausted, expect that task cannot be executed.
434431
const uint64_t ru_per_sec = 1;
435432
auto resource_group
436-
= createResourceGroupOfDynamicTokenBucket(rg_name, ResourceGroup::MediumPriorityValue, ru_per_sec, false);
433+
= createResourceGroupOfDynamicTokenBucket(rg_name, ResourceGroup::UserMediumPriority, ru_per_sec, false);
437434
setupMockLAC({resource_group});
438435

439436
const int thread_num = 10;
@@ -459,7 +456,7 @@ TEST_F(TestResourceControlQueue, RunOutOfRU)
459456
const uint64_t new_ru_per_sec = 1000000;
460457
resource_group = createResourceGroupOfDynamicTokenBucket(
461458
rg_name,
462-
ResourceGroup::MediumPriorityValue,
459+
ResourceGroup::UserMediumPriority,
463460
new_ru_per_sec,
464461
false);
465462
LocalAdmissionController::global_instance->resource_groups.insert({rg_name, resource_group});
@@ -571,9 +568,9 @@ TEST_F(TestResourceControlQueue, cancel)
571568
{
572569
const auto rg_names = std::vector<String>{"rg-ru20K", "rg-ru100K", "rg-ru200K"};
573570
auto resource_groups = std::vector<ResourceGroupPtr>{
574-
createResourceGroupOfDynamicTokenBucket(rg_names[0], ResourceGroup::MediumPriorityValue, 20000, false),
575-
createResourceGroupOfDynamicTokenBucket(rg_names[1], ResourceGroup::MediumPriorityValue, 100000, false),
576-
createResourceGroupOfDynamicTokenBucket(rg_names[2], ResourceGroup::MediumPriorityValue, 200000, false),
571+
createResourceGroupOfDynamicTokenBucket(rg_names[0], ResourceGroup::UserMediumPriority, 20000, false),
572+
createResourceGroupOfDynamicTokenBucket(rg_names[1], ResourceGroup::UserMediumPriority, 100000, false),
573+
createResourceGroupOfDynamicTokenBucket(rg_names[2], ResourceGroup::UserMediumPriority, 200000, false),
577574
};
578575
const String query_id_prefix = "mock_query_id";
579576
const String req_id_prefix = "mock_req_id";

0 commit comments

Comments
 (0)