Skip to content

Commit 0148b39

Browse files
caiconghuicaiconghui1
andauthored
[fix](metric) fix be down when enable_system_metrics is false (#16140)
if we set enable_system_metrics to false, we will see be down with following message "enable metric calculator failed, maybe you set enable_system_metrics to false ", so fix it Co-authored-by: caiconghui1 <caiconghui1@jd.com>
1 parent fa14b7e commit 0148b39

File tree

5 files changed

+56
-39
lines changed

5 files changed

+56
-39
lines changed

be/src/common/daemon.cpp

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,11 @@ void Daemon::calculate_metrics_thread() {
242242
if (last_ts == -1L) {
243243
last_ts = GetMonoTimeMicros() / 1000;
244244
lst_query_bytes = DorisMetrics::instance()->query_scan_bytes->value();
245-
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
246-
DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes,
247-
&lst_net_receive_bytes);
245+
if (config::enable_system_metrics) {
246+
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
247+
DorisMetrics::instance()->system_metrics()->get_network_traffic(
248+
&lst_net_send_bytes, &lst_net_receive_bytes);
249+
}
248250
} else {
249251
int64_t current_ts = GetMonoTimeMicros() / 1000;
250252
long interval = (current_ts - last_ts) / 1000;
@@ -256,23 +258,27 @@ void Daemon::calculate_metrics_thread() {
256258
DorisMetrics::instance()->query_scan_bytes_per_second->set_value(qps < 0 ? 0 : qps);
257259
lst_query_bytes = current_query_bytes;
258260

259-
// 2. max disk io util
260-
DorisMetrics::instance()->max_disk_io_util_percent->set_value(
261-
DorisMetrics::instance()->system_metrics()->get_max_io_util(lst_disks_io_time,
262-
15));
263-
// update lst map
264-
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
265-
266-
// 3. max network traffic
267-
int64_t max_send = 0;
268-
int64_t max_receive = 0;
269-
DorisMetrics::instance()->system_metrics()->get_max_net_traffic(
270-
lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive);
271-
DorisMetrics::instance()->max_network_send_bytes_rate->set_value(max_send);
272-
DorisMetrics::instance()->max_network_receive_bytes_rate->set_value(max_receive);
273-
// update lst map
274-
DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes,
275-
&lst_net_receive_bytes);
261+
if (config::enable_system_metrics) {
262+
// 2. max disk io util
263+
DorisMetrics::instance()->system_metrics()->update_max_disk_io_util_percent(
264+
lst_disks_io_time, 15);
265+
266+
// update lst map
267+
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
268+
269+
// 3. max network traffic
270+
int64_t max_send = 0;
271+
int64_t max_receive = 0;
272+
DorisMetrics::instance()->system_metrics()->get_max_net_traffic(
273+
lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive);
274+
DorisMetrics::instance()->system_metrics()->update_max_network_send_bytes_rate(
275+
max_send);
276+
DorisMetrics::instance()->system_metrics()->update_max_network_receive_bytes_rate(
277+
max_receive);
278+
// update lst map
279+
DorisMetrics::instance()->system_metrics()->get_network_traffic(
280+
&lst_net_send_bytes, &lst_net_receive_bytes);
281+
}
276282
}
277283
} while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(15)));
278284
}
@@ -381,12 +387,6 @@ void Daemon::start() {
381387
CHECK(st.ok()) << st;
382388

383389
if (config::enable_metric_calculator) {
384-
CHECK(DorisMetrics::instance()->is_inited())
385-
<< "enable metric calculator failed, maybe you set enable_system_metrics to false "
386-
<< " or there may be some hardware error which causes metric init failed, please "
387-
"check log first;"
388-
<< " you can set enable_metric_calculator = false to quickly recover ";
389-
390390
st = Thread::create(
391391
"Daemon", "calculate_metrics_thread",
392392
[this]() { this->calculate_metrics_thread(); }, &_calculate_metrics_thread);

be/src/util/doris_metrics.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,6 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_waitting_permits, MetricUnit::NOUN
142142
DEFINE_HISTOGRAM_METRIC_PROTOTYPE_2ARG(tablet_version_num_distribution, MetricUnit::NOUNIT);
143143

144144
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(query_scan_bytes_per_second, MetricUnit::BYTES);
145-
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_disk_io_util_percent, MetricUnit::PERCENT);
146-
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_send_bytes_rate, MetricUnit::BYTES);
147-
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_receive_bytes_rate, MetricUnit::BYTES);
148145

149146
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(readable_blocks_total, MetricUnit::BLOCKS);
150147
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(writable_blocks_total, MetricUnit::BLOCKS);
@@ -275,9 +272,6 @@ DorisMetrics::DorisMetrics() : _metric_registry(_s_registry_name) {
275272
HISTOGRAM_METRIC_REGISTER(_server_metric_entity, tablet_version_num_distribution);
276273

277274
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, query_scan_bytes_per_second);
278-
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_disk_io_util_percent);
279-
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_network_send_bytes_rate);
280-
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_network_receive_bytes_rate);
281275

282276
INT_COUNTER_METRIC_REGISTER(_server_metric_entity, load_rows);
283277
INT_COUNTER_METRIC_REGISTER(_server_metric_entity, load_bytes);
@@ -325,7 +319,6 @@ void DorisMetrics::initialize(bool init_system_metrics, const std::set<std::stri
325319
if (init_system_metrics) {
326320
_system_metrics.reset(
327321
new SystemMetrics(&_metric_registry, disk_devices, network_interfaces));
328-
_is_inited = true;
329322
}
330323
}
331324

be/src/util/doris_metrics.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,6 @@ class DorisMetrics {
147147
// The following metrics will be calculated
148148
// by metric calculator
149149
IntGauge* query_scan_bytes_per_second;
150-
IntGauge* max_disk_io_util_percent;
151-
IntGauge* max_network_send_bytes_rate;
152-
IntGauge* max_network_receive_bytes_rate;
153150

154151
// Metrics related with file reader/writer
155152
IntCounter* local_file_reader_total;
@@ -234,7 +231,6 @@ class DorisMetrics {
234231
MetricRegistry* metric_registry() { return &_metric_registry; }
235232
SystemMetrics* system_metrics() { return _system_metrics.get(); }
236233
MetricEntity* server_entity() { return _server_metric_entity.get(); }
237-
bool is_inited() const { return _is_inited; }
238234

239235
private:
240236
// Don't allow constructor
@@ -253,8 +249,6 @@ class DorisMetrics {
253249
std::unique_ptr<SystemMetrics> _system_metrics;
254250

255251
std::shared_ptr<MetricEntity> _server_metric_entity;
256-
257-
bool _is_inited = false;
258252
};
259253

260254
}; // namespace doris

be/src/util/system_metrics.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,10 @@ struct ProcMetrics {
301301
IntAtomicCounter* proc_procs_blocked;
302302
};
303303

304+
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_disk_io_util_percent, MetricUnit::PERCENT);
305+
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_network_send_bytes_rate, MetricUnit::BYTES);
306+
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_network_receive_bytes_rate, MetricUnit::BYTES);
307+
304308
const char* SystemMetrics::_s_hook_name = "system_metrics";
305309

306310
SystemMetrics::SystemMetrics(MetricRegistry* registry, const std::set<std::string>& disk_devices,
@@ -318,6 +322,10 @@ SystemMetrics::SystemMetrics(MetricRegistry* registry, const std::set<std::strin
318322
_install_snmp_metrics(_server_entity.get());
319323
_install_load_avg_metrics(_server_entity.get());
320324
_install_proc_metrics(_server_entity.get());
325+
326+
INT_GAUGE_METRIC_REGISTER(_server_entity.get(), _max_disk_io_util_percent);
327+
INT_GAUGE_METRIC_REGISTER(_server_entity.get(), _max_network_send_bytes_rate);
328+
INT_GAUGE_METRIC_REGISTER(_server_entity.get(), _max_network_receive_bytes_rate);
321329
}
322330

323331
SystemMetrics::~SystemMetrics() {
@@ -858,6 +866,19 @@ void SystemMetrics::get_max_net_traffic(const std::map<std::string, int64_t>& ls
858866
*rcv_rate = max_rcv / interval_sec;
859867
}
860868

869+
void SystemMetrics::update_max_disk_io_util_percent(const std::map<std::string, int64_t>& lst_value,
870+
int64_t interval_sec) {
871+
_max_disk_io_util_percent->set_value(get_max_io_util(lst_value, interval_sec));
872+
}
873+
874+
void SystemMetrics::update_max_network_send_bytes_rate(int64_t max_send_bytes_rate) {
875+
_max_network_send_bytes_rate->set_value(max_send_bytes_rate);
876+
}
877+
878+
void SystemMetrics::update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate) {
879+
_max_network_receive_bytes_rate->set_value(max_receive_bytes_rate);
880+
}
881+
861882
void SystemMetrics::_install_proc_metrics(MetricEntity* entity) {
862883
_proc_metrics.reset(new ProcMetrics(entity));
863884
}

be/src/util/system_metrics.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ class SystemMetrics {
5151
const std::map<std::string, int64_t>& lst_rcv_map,
5252
int64_t interval_sec, int64_t* send_rate, int64_t* rcv_rate);
5353

54+
void update_max_disk_io_util_percent(const std::map<std::string, int64_t>& lst_value,
55+
int64_t interval_sec);
56+
void update_max_network_send_bytes_rate(int64_t max_send_bytes_rate);
57+
void update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate);
58+
5459
private:
5560
void _install_cpu_metrics();
5661
// On Intel(R) Xeon(R) CPU E5-2450 0 @ 2.10GHz;
@@ -99,6 +104,10 @@ class SystemMetrics {
99104
size_t _line_buf_size = 0;
100105
MetricRegistry* _registry = nullptr;
101106
std::shared_ptr<MetricEntity> _server_entity = nullptr;
107+
108+
IntGauge* _max_disk_io_util_percent;
109+
IntGauge* _max_network_send_bytes_rate;
110+
IntGauge* _max_network_receive_bytes_rate;
102111
};
103112

104113
} // namespace doris

0 commit comments

Comments
 (0)