Skip to content

Commit 1c03278

Browse files
authored
Merge pull request #2964 from zhoukangsheng/support_health_check_option
Support channel level application health check options
2 parents c47ec71 + ffaa7ba commit 1c03278

12 files changed

Lines changed: 101 additions & 32 deletions

docs/cn/client.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ channel.Init("http://...", "random:min_working_instances=6 hold_seconds=10", &op
291291
| ------------------------- | ----- | ---------------------------------------- | ----------------------- |
292292
| health_check_interval (R) | 3 | seconds between consecutive health-checkings | src/brpc/socket_map.cpp |
293293

294-
在默认的配置下,一旦server被连接上,它会恢复为可用状态;brpc还提供了应用层健康检查的机制,框架会发送一个HTTP GET请求到该server,请求路径通过-health\_check\_path设置(默认为空),只有当server返回200时,它才会恢复。在两种健康检查机制下,都可通过-health\_check\_timeout\_ms设置超时(默认500ms)。如果在隔离过程中,server从命名服务中删除了,brpc也会停止连接尝试。
294+
在默认的配置下,一旦server被连接上,它会恢复为可用状态,可通过-health\_check\_timeout\_ms设置超时(默认500ms);brpc还提供了应用层健康检查的机制,框架会发送一个HTTP GET请求到该server,只有当server返回200时,它才会恢复,在这种机制下,既可通过-health\_check\_path(默认为空)-health\_check\_timeout\_ms(默认500ms)分别设置全局的健康检查请求路径和超时,也可通过ChannelOptions中的hc_option成员变量来对不同的channel设置不同的请求路径和超时,ChannelOptions设置的健康检查参数优先级要高于gflag参数。如果在隔离过程中,server从命名服务中删除了,brpc也会停止连接尝试。
295295

296296
# 发起访问
297297

src/brpc/channel.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ namespace brpc {
4242

4343
DECLARE_bool(enable_rpcz);
4444
DECLARE_bool(usercode_in_pthread);
45+
DEFINE_string(health_check_path, "", "Http path of health check call."
46+
"By default health check succeeds if the server is connectable."
47+
"If this flag is set, health check is not completed until a http "
48+
"call to the path succeeds within -health_check_timeout_ms(to make "
49+
"sure the server functions well).");
50+
DEFINE_int32(health_check_timeout_ms, 500, "The timeout for both establishing "
51+
"the connection and the http call to -health_check_path over the connection");
4552

4653
ChannelOptions::ChannelOptions()
4754
: connect_timeout_ms(200)
@@ -70,7 +77,8 @@ ChannelSSLOptions* ChannelOptions::mutable_ssl_options() {
7077
static ChannelSignature ComputeChannelSignature(const ChannelOptions& opt) {
7178
if (opt.auth == NULL &&
7279
!opt.has_ssl_options() &&
73-
opt.connection_group.empty()) {
80+
opt.connection_group.empty() &&
81+
opt.hc_option.health_check_path.empty()) {
7482
// Returning zeroized result by default is more intuitive for users.
7583
return ChannelSignature();
7684
}
@@ -90,6 +98,12 @@ static ChannelSignature ComputeChannelSignature(const ChannelOptions& opt) {
9098
buf.append("|auth=");
9199
buf.append((char*)&opt.auth, sizeof(opt.auth));
92100
}
101+
if (!opt.hc_option.health_check_path.empty()) {
102+
buf.append("|health_check_path=");
103+
buf.append(opt.hc_option.health_check_path);
104+
buf.append("|health_check_timeout_ms=");
105+
buf.append(std::to_string(opt.hc_option.health_check_timeout_ms));
106+
}
93107
if (opt.has_ssl_options()) {
94108
const ChannelSSLOptions& ssl = opt.ssl_options();
95109
buf.push_back('|');
@@ -173,7 +187,10 @@ int Channel::InitChannelOptions(const ChannelOptions* options) {
173187
LOG(ERROR) << "Channel does not support the protocol";
174188
return -1;
175189
}
176-
190+
if (_options.hc_option.health_check_path.empty()) {
191+
_options.hc_option.health_check_path = FLAGS_health_check_path;
192+
_options.hc_option.health_check_timeout_ms = FLAGS_health_check_timeout_ms;
193+
}
177194
if (_options.use_rdma) {
178195
#if BRPC_WITH_RDMA
179196
if (!OptionsAvailableForRdma(&_options)) {
@@ -349,7 +366,7 @@ int Channel::InitSingle(const butil::EndPoint& server_addr_and_port,
349366
return -1;
350367
}
351368
if (SocketMapInsert(SocketMapKey(server_addr_and_port, sig),
352-
&_server_id, ssl_ctx, _options.use_rdma) != 0) {
369+
&_server_id, ssl_ctx, _options.use_rdma, _options.hc_option) != 0) {
353370
LOG(ERROR) << "Fail to insert into SocketMap";
354371
return -1;
355372
}
@@ -388,6 +405,7 @@ int Channel::Init(const char* ns_url,
388405
ns_opt.log_succeed_without_server = _options.log_succeed_without_server;
389406
ns_opt.use_rdma = _options.use_rdma;
390407
ns_opt.channel_signature = ComputeChannelSignature(_options);
408+
ns_opt.hc_option = _options.hc_option;
391409
if (CreateSocketSSLContext(_options, &ns_opt.ssl_ctx) != 0) {
392410
return -1;
393411
}

src/brpc/channel.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "brpc/retry_policy.h"
3737
#include "brpc/backup_request_policy.h"
3838
#include "brpc/naming_service_filter.h"
39+
#include "brpc/health_check_option.h"
3940

4041
namespace brpc {
4142

@@ -143,6 +144,10 @@ struct ChannelOptions {
143144
// Default: ""
144145
std::string connection_group;
145146

147+
// Set the health check param according to the channel granularity.
148+
// Its priority is higher than FLAGS_health_check_path and FLAGS_health_check_timeout_ms.
149+
// When it is not set, FLAGS_health_check_path and FLAGS_health_check_timeout_ms will take effect.
150+
HealthCheckOption hc_option;
146151
private:
147152
// SSLOptions is large and not often used, allocate it on heap to
148153
// prevent ChannelOptions from being bloated in most cases.

src/brpc/details/health_check.cpp

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,6 @@ namespace brpc {
3131
// Declared at socket.cpp
3232
extern SocketVarsCollector* g_vars;
3333

34-
DEFINE_string(health_check_path, "", "Http path of health check call."
35-
"By default health check succeeds if the server is connectable."
36-
"If this flag is set, health check is not completed until a http "
37-
"call to the path succeeds within -health_check_timeout_ms(to make "
38-
"sure the server functions well).");
39-
DEFINE_int32(health_check_timeout_ms, 500, "The timeout for both establishing "
40-
"the connection and the http call to -health_check_path over the connection");
41-
4234
class HealthCheckChannel : public brpc::Channel {
4335
public:
4436
HealthCheckChannel() {}
@@ -65,6 +57,7 @@ class OnAppHealthCheckDone : public google::protobuf::Closure {
6557
SocketId id;
6658
int64_t interval_s;
6759
int64_t last_check_time_ms;
60+
HealthCheckOption hc_option;
6861
};
6962

7063
class HealthCheckManager {
@@ -81,15 +74,16 @@ void HealthCheckManager::StartCheck(SocketId id, int64_t check_interval_s) {
8174
<< " was abandoned during health checking";
8275
return;
8376
}
84-
LOG(INFO) << "Checking path=" << ptr->remote_side() << FLAGS_health_check_path;
77+
LOG(INFO) << "Checking path=" << ptr->remote_side() << ptr->health_check_path();
8578
OnAppHealthCheckDone* done = new OnAppHealthCheckDone;
8679
done->id = id;
8780
done->interval_s = check_interval_s;
81+
done->hc_option = ptr->_hc_option;
8882
brpc::ChannelOptions options;
8983
options.protocol = PROTOCOL_HTTP;
9084
options.max_retry = 0;
9185
options.timeout_ms =
92-
std::min((int64_t)FLAGS_health_check_timeout_ms, check_interval_s * 1000);
86+
std::min((int64_t)(done->hc_option.health_check_timeout_ms), check_interval_s * 1000);
9387
if (done->channel.Init(id, &options) != 0) {
9488
LOG(WARNING) << "Fail to init health check channel to SocketId=" << id;
9589
ptr->_ninflight_app_health_check.fetch_sub(
@@ -103,7 +97,7 @@ void HealthCheckManager::StartCheck(SocketId id, int64_t check_interval_s) {
10397
void* HealthCheckManager::AppCheck(void* arg) {
10498
OnAppHealthCheckDone* done = static_cast<OnAppHealthCheckDone*>(arg);
10599
done->cntl.Reset();
106-
done->cntl.http_request().uri() = FLAGS_health_check_path;
100+
done->cntl.http_request().uri() = done->hc_option.health_check_path;
107101
ControllerPrivateAccessor(&done->cntl).set_health_check_call();
108102
done->last_check_time_ms = butil::gettimeofday_ms();
109103
done->channel.CallMethod(NULL, &done->cntl, NULL, NULL, done);
@@ -121,14 +115,14 @@ void OnAppHealthCheckDone::Run() {
121115
}
122116
if (!cntl.Failed() || ptr->Failed()) {
123117
LOG_IF(INFO, !cntl.Failed()) << "Succeeded to call "
124-
<< ptr->remote_side() << FLAGS_health_check_path;
118+
<< ptr->remote_side() << hc_option.health_check_path;
125119
// if ptr->Failed(), previous SetFailed would trigger next round
126120
// of hc, just return here.
127121
ptr->_ninflight_app_health_check.fetch_sub(
128122
1, butil::memory_order_relaxed);
129123
return;
130124
}
131-
RPC_VLOG << "Fail to check path=" << FLAGS_health_check_path
125+
RPC_VLOG << "Fail to check path=" << hc_option.health_check_path
132126
<< ", " << cntl.ErrorText();
133127

134128
int64_t sleep_time_ms =
@@ -206,14 +200,14 @@ bool HealthCheckTask::OnTriggeringTask(timespec* next_abstime) {
206200
hc = ptr->CheckHealth();
207201
}
208202
if (hc == 0) {
209-
if (!FLAGS_health_check_path.empty()) {
203+
if (!ptr->health_check_path().empty()) {
210204
ptr->_ninflight_app_health_check.fetch_add(
211205
1, butil::memory_order_relaxed);
212206
}
213207
// See comments above.
214208
ptr->Revive(2/*note*/);
215209
ptr->_hc_count = 0;
216-
if (!FLAGS_health_check_path.empty()) {
210+
if (!ptr->health_check_path().empty()) {
217211
HealthCheckManager::StartCheck(_id, ptr->_health_check_interval_s);
218212
}
219213
ptr->AfterHCCompleted();

src/brpc/details/naming_service_thread.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ void NamingServiceThread::Actions::ResetServers(
126126
// to pick those Sockets with the right settings during OnAddedServers
127127
const SocketMapKey key(_added[i], _owner->_options.channel_signature);
128128
CHECK_EQ(0, SocketMapInsert(key, &tagged_id.id, _owner->_options.ssl_ctx,
129-
_owner->_options.use_rdma));
129+
_owner->_options.use_rdma, _owner->_options.hc_option));
130130
_added_sockets.push_back(tagged_id);
131131
}
132132

src/brpc/details/naming_service_thread.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ struct GetNamingServiceThreadOptions {
5050
bool succeed_without_server;
5151
bool log_succeed_without_server;
5252
bool use_rdma;
53+
HealthCheckOption hc_option;
5354
ChannelSignature channel_signature;
5455
std::shared_ptr<SocketSSLContext> ssl_ctx;
5556
};

src/brpc/health_check_option.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#ifndef BRPC_HEALTH_CHECK_OPTION_H
19+
#define BRPC_HEALTH_CHECK_OPTION_H
20+
21+
#include <string>
22+
23+
namespace brpc {
24+
25+
struct HealthCheckOption {
26+
// Http path of health check call
27+
std::string health_check_path;
28+
// The timeout for both establishing the connection and the http call to health_check_path over the connection
29+
int32_t health_check_timeout_ms{500};
30+
};
31+
32+
} // namespace brpc
33+
34+
#endif // BRPC_HEALTH_CHECK_OPTION_H

src/brpc/socket.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ DEFINE_int32(connect_timeout_as_unreachable, 3,
9999
"times *continuously*, the error is changed to ENETUNREACH which "
100100
"fails the main socket as well when this socket is pooled.");
101101

102-
DECLARE_int32(health_check_timeout_ms);
103102
DECLARE_bool(usercode_in_coroutine);
104103

105104
static bool validate_connect_timeout_as_unreachable(const char*, int32_t v) {
@@ -743,6 +742,7 @@ int Socket::OnCreated(const SocketOptions& options) {
743742
reset_parsing_context(options.initial_parsing_context);
744743
_correlation_id = 0;
745744
_health_check_interval_s = options.health_check_interval_s;
745+
_hc_option = options.hc_option;
746746
_is_hc_related_ref_held = false;
747747
_hc_started.store(false, butil::memory_order_relaxed);
748748
_ninprocess.store(1, butil::memory_order_relaxed);
@@ -2593,7 +2593,7 @@ int Socket::CheckHealth() {
25932593
LOG(INFO) << "Checking " << *this;
25942594
}
25952595
const timespec duetime =
2596-
butil::milliseconds_from_now(FLAGS_health_check_timeout_ms);
2596+
butil::milliseconds_from_now(_hc_option.health_check_timeout_ms);
25972597
const int connected_fd = Connect(&duetime, NULL, NULL);
25982598
if (connected_fd >= 0) {
25992599
::close(connected_fd);

src/brpc/socket.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "brpc/http_method.h"
4242
#include "brpc/event_dispatcher.h"
4343
#include "brpc/versioned_ref_with_id.h"
44+
#include "brpc/health_check_option.h"
4445

4546
namespace brpc {
4647
namespace policy {
@@ -285,6 +286,7 @@ struct SocketOptions {
285286
int tcp_user_timeout_ms{ -1};
286287
// Tag of this socket
287288
bthread_tag_t bthread_tag{bthread_self_tag()};
289+
HealthCheckOption hc_option;
288290
};
289291

290292
// Abstractions on reading from and writing into file descriptors.
@@ -412,6 +414,10 @@ friend void DereferenceSocket(Socket*);
412414
// Initialized by SocketOptions.health_check_interval_s.
413415
int health_check_interval() const { return _health_check_interval_s; }
414416

417+
const std::string& health_check_path() const { return _hc_option.health_check_path; }
418+
419+
int32_t health_check_timeout_ms() const {return _hc_option.health_check_timeout_ms; }
420+
415421
// True if health checking is enabled.
416422
bool HCEnabled() const {
417423
// This fence makes sure that we see change of
@@ -980,6 +986,7 @@ friend void DereferenceSocket(Socket*);
980986
int _tcp_user_timeout_ms;
981987

982988
HttpMethod _http_request_method;
989+
HealthCheckOption _hc_option;
983990
};
984991

985992
} // namespace brpc

src/brpc/socket_map.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ SocketMap* get_or_new_client_side_socket_map() {
9191

9292
int SocketMapInsert(const SocketMapKey& key, SocketId* id,
9393
const std::shared_ptr<SocketSSLContext>& ssl_ctx,
94-
bool use_rdma) {
95-
return get_or_new_client_side_socket_map()->Insert(key, id, ssl_ctx, use_rdma);
94+
bool use_rdma,
95+
const HealthCheckOption& hc_option) {
96+
return get_or_new_client_side_socket_map()->Insert(key, id, ssl_ctx, use_rdma, hc_option);
9697
}
9798

9899
int SocketMapFind(const SocketMapKey& key, SocketId* id) {
@@ -225,7 +226,8 @@ void SocketMap::ShowSocketMapInBvarIfNeed() {
225226

226227
int SocketMap::Insert(const SocketMapKey& key, SocketId* id,
227228
const std::shared_ptr<SocketSSLContext>& ssl_ctx,
228-
bool use_rdma) {
229+
bool use_rdma,
230+
const HealthCheckOption& hc_option) {
229231
ShowSocketMapInBvarIfNeed();
230232

231233
std::unique_lock<butil::Mutex> mu(_mutex);
@@ -249,6 +251,7 @@ int SocketMap::Insert(const SocketMapKey& key, SocketId* id,
249251
opt.remote_side = key.peer.addr;
250252
opt.initial_ssl_ctx = ssl_ctx;
251253
opt.use_rdma = use_rdma;
254+
opt.hc_option = hc_option;
252255
if (_options.socket_creator->CreateSocket(opt, &tmp_id) != 0) {
253256
PLOG(FATAL) << "Fail to create socket to " << key.peer;
254257
return -1;

0 commit comments

Comments
 (0)