|
| 1 | +// Copyright 2021-Present Datadog, Inc. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +use std::time::Duration; |
| 16 | + |
| 17 | +use procfs::ProcResult; |
| 18 | +use procfs::process::Process; |
| 19 | +use quickwit_common::metrics::{ |
| 20 | + IO_READ_BYTES, IO_READ_SYSCALLS, IO_WRITE_BYTES, IO_WRITE_SYSCALLS, |
| 21 | +}; |
| 22 | +use quickwit_common::rate_limited_tracing::rate_limited_warn; |
| 23 | +use quickwit_metrics::Counter; |
| 24 | +use tracing::error; |
| 25 | + |
| 26 | +const PROC_IO_METRICS_POLLING_INTERVAL: Duration = Duration::from_secs(5); |
| 27 | + |
| 28 | +/// Reads `/proc/self/io` on a fixed interval and publishes the cumulative byte and syscall |
| 29 | +/// counters as Prometheus counters. |
| 30 | +/// |
| 31 | +/// `/proc/self/io` exposes monotonic per-process counters maintained by the kernel. Prometheus |
| 32 | +/// counters are also monotonic but only expose an `inc_by(delta)` API, so we keep the previously |
| 33 | +/// observed value and increment by the difference on each poll. |
| 34 | +async fn proc_io_metrics_loop() -> ProcResult<()> { |
| 35 | + let process = Process::myself()?; |
| 36 | + |
| 37 | + let mut previous_read_bytes: u64 = 0; |
| 38 | + let mut previous_write_bytes: u64 = 0; |
| 39 | + let mut previous_read_syscalls: u64 = 0; |
| 40 | + let mut previous_write_syscalls: u64 = 0; |
| 41 | + |
| 42 | + let mut poll_interval = tokio::time::interval(PROC_IO_METRICS_POLLING_INTERVAL); |
| 43 | + poll_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); |
| 44 | + |
| 45 | + loop { |
| 46 | + poll_interval.tick().await; |
| 47 | + |
| 48 | + let io = match process.io() { |
| 49 | + Ok(io) => io, |
| 50 | + Err(error) => { |
| 51 | + rate_limited_warn!( |
| 52 | + limit_per_min = 1, |
| 53 | + %error, |
| 54 | + "failed to read /proc/self/io" |
| 55 | + ); |
| 56 | + continue; |
| 57 | + } |
| 58 | + }; |
| 59 | + increment_counter(&IO_READ_BYTES, io.read_bytes, &mut previous_read_bytes); |
| 60 | + increment_counter(&IO_WRITE_BYTES, io.write_bytes, &mut previous_write_bytes); |
| 61 | + increment_counter(&IO_READ_SYSCALLS, io.syscr, &mut previous_read_syscalls); |
| 62 | + increment_counter(&IO_WRITE_SYSCALLS, io.syscw, &mut previous_write_syscalls); |
| 63 | + } |
| 64 | +} |
| 65 | + |
| 66 | +fn increment_counter(counter: &Counter, current: u64, previous: &mut u64) { |
| 67 | + debug_assert!( |
| 68 | + current >= *previous, |
| 69 | + "/proc/self/io counters should be monotonic for a given PID" |
| 70 | + ); |
| 71 | + let diff = current.saturating_sub(*previous); |
| 72 | + counter.inc_by(diff); |
| 73 | + *previous = current; |
| 74 | +} |
| 75 | + |
| 76 | +pub fn start_proc_io_metrics_loop() { |
| 77 | + tokio::task::spawn(async { |
| 78 | + if let Err(error) = proc_io_metrics_loop().await { |
| 79 | + error!(%error, "failed to collect /proc/self/io metrics"); |
| 80 | + } |
| 81 | + }); |
| 82 | +} |
0 commit comments