Skip to content

Commit 34f8b6f

Browse files
committed
feat(host_metrics source): add temperature metrics collector
1 parent 984aa76 commit 34f8b6f

5 files changed

Lines changed: 114 additions & 11 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
The `host_metrics` source can now collect hardware temperature readings via a
2+
new `temperature` collector. When enabled, it emits `temperature_celsius`,
3+
`temperature_max_celsius`, and `temperature_critical_celsius` gauges, each
4+
tagged with the `component` label of the sensor it was read from.
5+
6+
The collector is opt-in: add `temperature` to the `collectors` list to enable
7+
it. Components that do not report a given value (for example a missing critical
8+
threshold) are skipped, and environments without temperature sensors simply
9+
produce no metrics.
10+
11+
authors: somaz94

src/sources/host_metrics/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ mod network;
4040
mod process;
4141
#[cfg(target_os = "linux")]
4242
mod tcp;
43+
mod temperature;
4344

4445
/// Collector types.
4546
#[serde_as]
@@ -78,6 +79,9 @@ pub enum Collector {
7879

7980
/// Metrics related to TCP connections.
8081
TCP,
82+
83+
/// Metrics related to component temperatures.
84+
Temperature,
8185
}
8286

8387
/// Filtering configuration.
@@ -186,7 +190,7 @@ pub fn default_namespace() -> Option<String> {
186190
Some(String::from("host"))
187191
}
188192

189-
const fn example_collectors() -> [&'static str; 9] {
193+
const fn example_collectors() -> [&'static str; 10] {
190194
[
191195
"cgroups",
192196
"cpu",
@@ -197,6 +201,7 @@ const fn example_collectors() -> [&'static str; 9] {
197201
"memory",
198202
"network",
199203
"tcp",
204+
"temperature",
200205
]
201206
}
202207

@@ -420,6 +425,9 @@ impl HostMetrics {
420425
if self.config.has_collector(Collector::TCP) {
421426
self.tcp_metrics(&mut buffer).await;
422427
}
428+
if self.config.has_collector(Collector::Temperature) {
429+
self.temperature_metrics(&mut buffer).await;
430+
}
423431

424432
let metrics = buffer.metrics;
425433
self.events_received.emit(CountByteSize(
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
use sysinfo::Components;
2+
use vector_lib::metric_tags;
3+
4+
use super::HostMetrics;
5+
6+
const COMPONENT: &str = "component";
7+
const TEMPERATURE_CELSIUS: &str = "temperature_celsius";
8+
const TEMPERATURE_MAX_CELSIUS: &str = "temperature_max_celsius";
9+
const TEMPERATURE_CRITICAL_CELSIUS: &str = "temperature_critical_celsius";
10+
11+
impl HostMetrics {
12+
pub async fn temperature_metrics(&self, output: &mut super::MetricsBuffer) {
13+
output.name = "temperature";
14+
let components = Components::new_with_refreshed_list();
15+
for component in &components {
16+
let label = component.label();
17+
let tags = || metric_tags!(COMPONENT => label);
18+
if let Some(temperature) = component.temperature() {
19+
output.gauge(TEMPERATURE_CELSIUS, temperature as f64, tags());
20+
}
21+
if let Some(max) = component.max() {
22+
output.gauge(TEMPERATURE_MAX_CELSIUS, max as f64, tags());
23+
}
24+
if let Some(critical) = component.critical() {
25+
output.gauge(TEMPERATURE_CRITICAL_CELSIUS, critical as f64, tags());
26+
}
27+
}
28+
}
29+
}
30+
31+
#[cfg(test)]
32+
mod tests {
33+
use super::{
34+
super::{HostMetrics, HostMetricsConfig, MetricsBuffer, tests::all_gauges},
35+
COMPONENT,
36+
};
37+
38+
#[tokio::test]
39+
async fn generates_temperature_metrics() {
40+
let mut buffer = MetricsBuffer::new(None);
41+
HostMetrics::new(HostMetricsConfig::default())
42+
.temperature_metrics(&mut buffer)
43+
.await;
44+
let metrics = buffer.metrics;
45+
46+
// Temperature sensors are not exposed in many environments (containers,
47+
// virtual machines, CI runners), so the component list can legitimately
48+
// be empty. When metrics are produced, they must all be gauges named
49+
// `temperature*` and carry the `component` tag.
50+
assert!(all_gauges(&metrics));
51+
for metric in &metrics {
52+
assert!(
53+
metric.name().starts_with("temperature"),
54+
"unexpected metric name: {}",
55+
metric.name()
56+
);
57+
assert!(
58+
metric
59+
.tags()
60+
.expect("temperature metric is missing tags")
61+
.contains_key(COMPONENT),
62+
"temperature metric is missing the `component` tag"
63+
);
64+
}
65+
}
66+
}

website/cue/reference/components/sources/generated/host_metrics.cue

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,18 @@ generated: components: sources: host_metrics: configuration: {
8080
8181
Only available on Linux.
8282
"""
83-
cpu: "Metrics related to CPU utilization."
84-
disk: "Metrics related to disk I/O utilization."
85-
filesystem: "Metrics related to filesystem space utilization."
86-
host: "Metrics related to the host."
87-
load: "Metrics related to the system load average."
88-
memory: "Metrics related to memory utilization."
89-
network: "Metrics related to network utilization."
90-
process: "Metrics related to Process utilization."
91-
tcp: "Metrics related to TCP connections."
83+
cpu: "Metrics related to CPU utilization."
84+
disk: "Metrics related to disk I/O utilization."
85+
filesystem: "Metrics related to filesystem space utilization."
86+
host: "Metrics related to the host."
87+
load: "Metrics related to the system load average."
88+
memory: "Metrics related to memory utilization."
89+
network: "Metrics related to network utilization."
90+
process: "Metrics related to Process utilization."
91+
tcp: "Metrics related to TCP connections."
92+
temperature: "Metrics related to component temperatures."
9293
}
93-
examples: ["cgroups", "cpu", "disk", "filesystem", "load", "host", "memory", "network", "tcp"]
94+
examples: ["cgroups", "cpu", "disk", "filesystem", "load", "host", "memory", "network", "tcp", "temperature"]
9495
}
9596
}
9697
}

website/cue/reference/components/sources/host_metrics.cue

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,11 @@ components: sources: host_metrics: {
193193
}
194194
}
195195

196+
// Host temperature
197+
temperature_celsius: _host & _temperature_gauge & {description: "The current temperature reported by a hardware component, in degrees Celsius."}
198+
temperature_max_celsius: _host & _temperature_gauge & {description: "The highest temperature recorded for a hardware component, in degrees Celsius."}
199+
temperature_critical_celsius: _host & _temperature_gauge & {description: "The temperature at which a hardware component is considered critical, in degrees Celsius."}
200+
196201
// Helpers
197202
_host: {
198203
default_namespace: "host"
@@ -307,5 +312,17 @@ components: sources: host_metrics: {
307312
}
308313
}
309314
}
315+
316+
_temperature_gauge: {
317+
type: "gauge"
318+
tags: _host_metrics_tags & {
319+
collector: examples: ["temperature"]
320+
component: {
321+
description: "The label of the hardware component the temperature was read from."
322+
required: true
323+
examples: ["Core 0", "coretemp Package id 0", "nvme Composite"]
324+
}
325+
}
326+
}
310327
}
311328
}

0 commit comments

Comments
 (0)