Skip to content

Commit 5958c56

Browse files
committed
in_podman_metrics: add per-container disk I/O metrics
Add four counters exposing per-container block I/O, read from the cgroups v2 io.stat file and summed across block devices: - container_disk_read_bytes_total - container_disk_write_bytes_total - container_disk_reads_total - container_disk_writes_total This complements the existing CPU, memory and network metrics. The values are collected in the cgroups v2 path; on cgroups v1 hosts the counters are reported as invalid and skipped. Signed-off-by: Stefano Tondo <stondo@gmail.com>
1 parent 0481348 commit 5958c56

4 files changed

Lines changed: 94 additions & 0 deletions

File tree

plugins/in_podman_metrics/podman_metrics.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ static int add_container_to_list(struct flb_in_metrics *ctx, flb_sds_t id, flb_s
158158
cnt->rss = UINT64_MAX;
159159
cnt->cpu_user = UINT64_MAX;
160160
cnt->cpu = UINT64_MAX;
161+
cnt->disk_read_bytes = UINT64_MAX;
162+
cnt->disk_write_bytes = UINT64_MAX;
163+
cnt->disk_reads = UINT64_MAX;
164+
cnt->disk_writes = UINT64_MAX;
161165

162166
mk_list_init(&cnt->net_data);
163167

@@ -303,6 +307,10 @@ static int create_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **gauge, fl
303307
* - container_network_receive_errors_total
304308
* - container_network_transmit_bytes_total
305309
* - container_network_transmit_errors_total
310+
* - container_disk_read_bytes_total
311+
* - container_disk_write_bytes_total
312+
* - container_disk_reads_total
313+
* - container_disk_writes_total
306314
*/
307315
static int create_counters(struct flb_in_metrics *ctx)
308316
{
@@ -328,6 +336,14 @@ static int create_counters(struct flb_in_metrics *ctx)
328336
DESCRIPTION_CPU_USER, NULL, cnt->cpu_user);
329337
create_counter(ctx, &ctx->c_cpu, cnt->id, cnt->name, cnt->image_name, COUNTER_CPU_PREFIX, FIELDS_METRIC, COUNTER_CPU,
330338
DESCRIPTION_CPU, NULL, cnt->cpu);
339+
create_counter(ctx, &ctx->c_disk_read_bytes, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_READ_BYTES,
340+
DESCRIPTION_DISK_READ_BYTES, NULL, cnt->disk_read_bytes);
341+
create_counter(ctx, &ctx->c_disk_write_bytes, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_WRITE_BYTES,
342+
DESCRIPTION_DISK_WRITE_BYTES, NULL, cnt->disk_write_bytes);
343+
create_counter(ctx, &ctx->c_disk_reads, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_READS,
344+
DESCRIPTION_DISK_READS, NULL, cnt->disk_reads);
345+
create_counter(ctx, &ctx->c_disk_writes, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_WRITES,
346+
DESCRIPTION_DISK_WRITES, NULL, cnt->disk_writes);
331347
mk_list_foreach_safe(inner_head, inner_tmp, &cnt->net_data)
332348
{
333349
iface = mk_list_entry(inner_head, struct net_iface, _head);
@@ -423,6 +439,10 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
423439
ctx->c_memory_limit = NULL;
424440
ctx->c_cpu_user = NULL;
425441
ctx->c_cpu = NULL;
442+
ctx->c_disk_read_bytes = NULL;
443+
ctx->c_disk_write_bytes = NULL;
444+
ctx->c_disk_reads = NULL;
445+
ctx->c_disk_writes = NULL;
426446
ctx->rx_bytes = NULL;
427447
ctx->rx_errors = NULL;
428448
ctx->tx_bytes = NULL;

plugins/in_podman_metrics/podman_metrics_config.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@
8686
#define STAT_KEY_CPU "usage_usec"
8787
#define STAT_KEY_CPU_USER "user_usec"
8888

89+
/* Field tokens in cgroups v2 io.stat (per block device, summed across devices) */
90+
#define IO_STAT_KEY_READ_BYTES "rbytes="
91+
#define IO_STAT_KEY_WRITE_BYTES "wbytes="
92+
#define IO_STAT_KEY_READS "rios="
93+
#define IO_STAT_KEY_WRITES "wios="
94+
8995
/* Static lists of fields in counters or gauges */
9096
#define FIELDS_METRIC (char*[3]){"id", "name", "image" }
9197
#define FIELDS_METRIC_WITH_IFACE (char*[4]){"id", "name", "image", "interface" }
@@ -107,6 +113,7 @@
107113
#define V2_SYSFS_FILE_CPU_STAT "cpu.stat"
108114
#define V2_SYSFS_FILE_PIDS "cgroup.procs"
109115
#define V2_SYSFS_FILE_PIDS_ALT "containers/cgroup.procs"
116+
#define V2_SYSFS_FILE_IO_STAT "io.stat"
110117

111118
/* Values used to construct counters/gauges names and descriptions */
112119
#define COUNTER_PREFIX "container"
@@ -138,6 +145,16 @@
138145
#define COUNTER_TX_ERRORS "transmit_errors_total"
139146
#define DESCRIPTION_TX_ERRORS "Network transmitedd errors"
140147

148+
#define COUNTER_DISK_PREFIX "disk"
149+
#define COUNTER_DISK_READ_BYTES "read_bytes_total"
150+
#define DESCRIPTION_DISK_READ_BYTES "Container block I/O bytes read"
151+
#define COUNTER_DISK_WRITE_BYTES "write_bytes_total"
152+
#define DESCRIPTION_DISK_WRITE_BYTES "Container block I/O bytes written"
153+
#define COUNTER_DISK_READS "reads_total"
154+
#define DESCRIPTION_DISK_READS "Container block I/O reads completed"
155+
#define COUNTER_DISK_WRITES "writes_total"
156+
#define DESCRIPTION_DISK_WRITES "Container block I/O writes completed"
157+
141158

142159
struct net_iface {
143160
flb_sds_t name;
@@ -160,6 +177,10 @@ struct container {
160177
uint64_t cpu;
161178
uint64_t cpu_user;
162179
uint64_t rss;
180+
uint64_t disk_read_bytes;
181+
uint64_t disk_write_bytes;
182+
uint64_t disk_reads;
183+
uint64_t disk_writes;
163184

164185
struct mk_list net_data;
165186
};
@@ -192,6 +213,10 @@ struct flb_in_metrics {
192213
struct cmt_counter *rx_errors;
193214
struct cmt_counter *tx_bytes;
194215
struct cmt_counter *tx_errors;
216+
struct cmt_counter *c_disk_read_bytes;
217+
struct cmt_counter *c_disk_write_bytes;
218+
struct cmt_counter *c_disk_reads;
219+
struct cmt_counter *c_disk_writes;
195220

196221
/* cgroup version used by host */
197222
int cgroup_version;

plugins/in_podman_metrics/podman_metrics_data.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,50 @@ uint64_t get_data_from_sysfs(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_
132132
return data;
133133
}
134134

135+
/*
136+
* Sum a cgroups v2 io.stat field across all block devices. io.stat lines look
137+
* like: "8:0 rbytes=1024 wbytes=0 rios=2 wios=0 dbytes=0 dios=0". The key is the
138+
* field token including '=', e.g. "rbytes=". Returns the summed value, or
139+
* UINT64_MAX if the file cannot be opened (interpreted as an invalid counter).
140+
* An existing but empty io.stat (no I/O yet) correctly returns 0.
141+
*/
142+
uint64_t sum_io_stat_field(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_t name, flb_sds_t key)
143+
{
144+
char path[SYSFS_FILE_PATH_SIZE];
145+
uint64_t total = 0;
146+
FILE *fp;
147+
char *line = NULL;
148+
char *pos;
149+
size_t len = 0;
150+
size_t key_len;
151+
152+
if (dir == NULL) {
153+
return UINT64_MAX;
154+
}
155+
156+
snprintf(path, sizeof(path), "%s/%s", dir, name);
157+
158+
fp = fopen(path, "r");
159+
if (!fp) {
160+
flb_plg_warn(ctx->ins, "Failed to read %s", path);
161+
return UINT64_MAX;
162+
}
163+
164+
key_len = strlen(key);
165+
while (getline(&line, &len, fp) != -1) {
166+
pos = line;
167+
while ((pos = strstr(pos, key)) != NULL) {
168+
pos += key_len;
169+
total += strtoull(pos, NULL, 10);
170+
}
171+
}
172+
flb_free(line);
173+
fclose(fp);
174+
175+
flb_plg_debug(ctx->ins, "%s [%s]: %lu", path, key, total);
176+
return total;
177+
}
178+
135179
/*
136180
* Check if container sysfs data is pressent in previously generated list of sysfs directories.
137181
* For cgroups v1, use subsystem (directory, for example memory) to search full path.
@@ -367,6 +411,10 @@ int fill_counters_with_sysfs_data_v2(struct flb_in_metrics *ctx)
367411
cnt->memory_limit = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY_LIMIT, NULL);
368412
cnt->cpu_user = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU_USER);
369413
cnt->cpu = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU);
414+
cnt->disk_read_bytes = sum_io_stat_field(ctx, path, V2_SYSFS_FILE_IO_STAT, IO_STAT_KEY_READ_BYTES);
415+
cnt->disk_write_bytes = sum_io_stat_field(ctx, path, V2_SYSFS_FILE_IO_STAT, IO_STAT_KEY_WRITE_BYTES);
416+
cnt->disk_reads = sum_io_stat_field(ctx, path, V2_SYSFS_FILE_IO_STAT, IO_STAT_KEY_READS);
417+
cnt->disk_writes = sum_io_stat_field(ctx, path, V2_SYSFS_FILE_IO_STAT, IO_STAT_KEY_WRITES);
370418
pid = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_PIDS, NULL);
371419
if (!pid || pid == UINT64_MAX) {
372420
pid = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_PIDS_ALT, NULL);

plugins/in_podman_metrics/podman_metrics_data.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ int destroy_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **g);
3737
uint64_t read_from_file(struct flb_in_metrics *ctx, flb_sds_t path);
3838
uint64_t read_key_value_from_file(struct flb_in_metrics *ctx, flb_sds_t path, flb_sds_t key);
3939
uint64_t get_data_from_sysfs(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_t name, flb_sds_t key);
40+
uint64_t sum_io_stat_field(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_t name, flb_sds_t key);
4041

4142
int get_container_sysfs_subdirectory(struct flb_in_metrics *ctx, flb_sds_t id, flb_sds_t subsystem, flb_sds_t *path);
4243
int get_net_data_from_proc(struct flb_in_metrics *ctx, struct container *cnt, uint64_t pid);

0 commit comments

Comments
 (0)