Skip to content

Commit 32fc804

Browse files
committed
in_podman_metrics: add per-container disk I/O metrics
Add four counters exposing per-container block I/O, read from the cgroups v2 io.stat file and summed across block devices: - container_disk_read_bytes_total - container_disk_write_bytes_total - container_disk_reads_total - container_disk_writes_total This complements the existing CPU, memory and network metrics. The values are collected in the cgroups v2 path; on cgroups v1 hosts the counters are reported as invalid and skipped. Signed-off-by: Stefano Tondo <stondo@gmail.com>
1 parent 0481348 commit 32fc804

4 files changed

Lines changed: 114 additions & 0 deletions

File tree

plugins/in_podman_metrics/podman_metrics.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ static int add_container_to_list(struct flb_in_metrics *ctx, flb_sds_t id, flb_s
158158
cnt->rss = UINT64_MAX;
159159
cnt->cpu_user = UINT64_MAX;
160160
cnt->cpu = UINT64_MAX;
161+
cnt->disk_read_bytes = UINT64_MAX;
162+
cnt->disk_write_bytes = UINT64_MAX;
163+
cnt->disk_reads = UINT64_MAX;
164+
cnt->disk_writes = UINT64_MAX;
161165

162166
mk_list_init(&cnt->net_data);
163167

@@ -303,6 +307,10 @@ static int create_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **gauge, fl
303307
* - container_network_receive_errors_total
304308
* - container_network_transmit_bytes_total
305309
* - container_network_transmit_errors_total
310+
* - container_disk_read_bytes_total
311+
* - container_disk_write_bytes_total
312+
* - container_disk_reads_total
313+
* - container_disk_writes_total
306314
*/
307315
static int create_counters(struct flb_in_metrics *ctx)
308316
{
@@ -328,6 +336,14 @@ static int create_counters(struct flb_in_metrics *ctx)
328336
DESCRIPTION_CPU_USER, NULL, cnt->cpu_user);
329337
create_counter(ctx, &ctx->c_cpu, cnt->id, cnt->name, cnt->image_name, COUNTER_CPU_PREFIX, FIELDS_METRIC, COUNTER_CPU,
330338
DESCRIPTION_CPU, NULL, cnt->cpu);
339+
create_counter(ctx, &ctx->c_disk_read_bytes, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_READ_BYTES,
340+
DESCRIPTION_DISK_READ_BYTES, NULL, cnt->disk_read_bytes);
341+
create_counter(ctx, &ctx->c_disk_write_bytes, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_WRITE_BYTES,
342+
DESCRIPTION_DISK_WRITE_BYTES, NULL, cnt->disk_write_bytes);
343+
create_counter(ctx, &ctx->c_disk_reads, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_READS,
344+
DESCRIPTION_DISK_READS, NULL, cnt->disk_reads);
345+
create_counter(ctx, &ctx->c_disk_writes, cnt->id, cnt->name, cnt->image_name, COUNTER_DISK_PREFIX, FIELDS_METRIC, COUNTER_DISK_WRITES,
346+
DESCRIPTION_DISK_WRITES, NULL, cnt->disk_writes);
331347
mk_list_foreach_safe(inner_head, inner_tmp, &cnt->net_data)
332348
{
333349
iface = mk_list_entry(inner_head, struct net_iface, _head);
@@ -423,6 +439,10 @@ static int in_metrics_init(struct flb_input_instance *in, struct flb_config *con
423439
ctx->c_memory_limit = NULL;
424440
ctx->c_cpu_user = NULL;
425441
ctx->c_cpu = NULL;
442+
ctx->c_disk_read_bytes = NULL;
443+
ctx->c_disk_write_bytes = NULL;
444+
ctx->c_disk_reads = NULL;
445+
ctx->c_disk_writes = NULL;
426446
ctx->rx_bytes = NULL;
427447
ctx->rx_errors = NULL;
428448
ctx->tx_bytes = NULL;

plugins/in_podman_metrics/podman_metrics_config.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@
8686
#define STAT_KEY_CPU "usage_usec"
8787
#define STAT_KEY_CPU_USER "user_usec"
8888

89+
/* Field tokens in cgroups v2 io.stat (per block device, summed across devices) */
90+
#define IO_STAT_KEY_READ_BYTES "rbytes="
91+
#define IO_STAT_KEY_WRITE_BYTES "wbytes="
92+
#define IO_STAT_KEY_READS "rios="
93+
#define IO_STAT_KEY_WRITES "wios="
94+
8995
/* Static lists of fields in counters or gauges */
9096
#define FIELDS_METRIC (char*[3]){"id", "name", "image" }
9197
#define FIELDS_METRIC_WITH_IFACE (char*[4]){"id", "name", "image", "interface" }
@@ -107,6 +113,7 @@
107113
#define V2_SYSFS_FILE_CPU_STAT "cpu.stat"
108114
#define V2_SYSFS_FILE_PIDS "cgroup.procs"
109115
#define V2_SYSFS_FILE_PIDS_ALT "containers/cgroup.procs"
116+
#define V2_SYSFS_FILE_IO_STAT "io.stat"
110117

111118
/* Values used to construct counters/gauges names and descriptions */
112119
#define COUNTER_PREFIX "container"
@@ -138,6 +145,16 @@
138145
#define COUNTER_TX_ERRORS "transmit_errors_total"
139146
#define DESCRIPTION_TX_ERRORS "Network transmitedd errors"
140147

148+
#define COUNTER_DISK_PREFIX "disk"
149+
#define COUNTER_DISK_READ_BYTES "read_bytes_total"
150+
#define DESCRIPTION_DISK_READ_BYTES "Container block I/O bytes read"
151+
#define COUNTER_DISK_WRITE_BYTES "write_bytes_total"
152+
#define DESCRIPTION_DISK_WRITE_BYTES "Container block I/O bytes written"
153+
#define COUNTER_DISK_READS "reads_total"
154+
#define DESCRIPTION_DISK_READS "Container block I/O reads completed"
155+
#define COUNTER_DISK_WRITES "writes_total"
156+
#define DESCRIPTION_DISK_WRITES "Container block I/O writes completed"
157+
141158

142159
struct net_iface {
143160
flb_sds_t name;
@@ -160,6 +177,10 @@ struct container {
160177
uint64_t cpu;
161178
uint64_t cpu_user;
162179
uint64_t rss;
180+
uint64_t disk_read_bytes;
181+
uint64_t disk_write_bytes;
182+
uint64_t disk_reads;
183+
uint64_t disk_writes;
163184

164185
struct mk_list net_data;
165186
};
@@ -192,6 +213,10 @@ struct flb_in_metrics {
192213
struct cmt_counter *rx_errors;
193214
struct cmt_counter *tx_bytes;
194215
struct cmt_counter *tx_errors;
216+
struct cmt_counter *c_disk_read_bytes;
217+
struct cmt_counter *c_disk_write_bytes;
218+
struct cmt_counter *c_disk_reads;
219+
struct cmt_counter *c_disk_writes;
195220

196221
/* cgroup version used by host */
197222
int cgroup_version;

plugins/in_podman_metrics/podman_metrics_data.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,73 @@ uint64_t get_data_from_sysfs(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_
132132
return data;
133133
}
134134

135+
/*
136+
* Read all cgroups v2 io.stat counters in a single pass and store them in cnt.
137+
* io.stat lines look like:
138+
* "8:0 rbytes=1024 wbytes=0 rios=2 wios=0 dbytes=0 dios=0"
139+
* The rbytes/wbytes/rios/wios fields are summed across all block devices. On a
140+
* missing or unreadable file (for example cgroups v1, where io.stat does not
141+
* exist) the four counters are set to UINT64_MAX so they are treated as invalid
142+
* and skipped, mirroring the other sysfs readers. An existing but empty io.stat
143+
* (no I/O yet) yields 0 for each counter.
144+
*/
145+
void read_io_stat(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_t name, struct container *cnt)
146+
{
147+
char path[SYSFS_FILE_PATH_SIZE];
148+
FILE *fp;
149+
char *line = NULL;
150+
char *pos;
151+
size_t len = 0;
152+
int i;
153+
struct {
154+
const char *key;
155+
size_t key_len;
156+
uint64_t *total;
157+
} fields[] = {
158+
{ IO_STAT_KEY_READ_BYTES, sizeof(IO_STAT_KEY_READ_BYTES) - 1, &cnt->disk_read_bytes },
159+
{ IO_STAT_KEY_WRITE_BYTES, sizeof(IO_STAT_KEY_WRITE_BYTES) - 1, &cnt->disk_write_bytes },
160+
{ IO_STAT_KEY_READS, sizeof(IO_STAT_KEY_READS) - 1, &cnt->disk_reads },
161+
{ IO_STAT_KEY_WRITES, sizeof(IO_STAT_KEY_WRITES) - 1, &cnt->disk_writes },
162+
};
163+
164+
cnt->disk_read_bytes = UINT64_MAX;
165+
cnt->disk_write_bytes = UINT64_MAX;
166+
cnt->disk_reads = UINT64_MAX;
167+
cnt->disk_writes = UINT64_MAX;
168+
169+
if (dir == NULL) {
170+
return;
171+
}
172+
173+
snprintf(path, sizeof(path), "%s/%s", dir, name);
174+
175+
fp = fopen(path, "r");
176+
if (!fp) {
177+
flb_plg_warn(ctx->ins, "Failed to read %s", path);
178+
return;
179+
}
180+
181+
for (i = 0; i < 4; i++) {
182+
*fields[i].total = 0;
183+
}
184+
185+
while (getline(&line, &len, fp) != -1) {
186+
for (i = 0; i < 4; i++) {
187+
pos = line;
188+
while ((pos = strstr(pos, fields[i].key)) != NULL) {
189+
pos += fields[i].key_len;
190+
*fields[i].total += strtoull(pos, NULL, 10);
191+
}
192+
}
193+
}
194+
flb_free(line);
195+
fclose(fp);
196+
197+
flb_plg_debug(ctx->ins, "%s: rbytes=%lu wbytes=%lu rios=%lu wios=%lu", path,
198+
cnt->disk_read_bytes, cnt->disk_write_bytes,
199+
cnt->disk_reads, cnt->disk_writes);
200+
}
201+
135202
/*
136203
* Check if container sysfs data is pressent in previously generated list of sysfs directories.
137204
* For cgroups v1, use subsystem (directory, for example memory) to search full path.
@@ -367,6 +434,7 @@ int fill_counters_with_sysfs_data_v2(struct flb_in_metrics *ctx)
367434
cnt->memory_limit = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY_LIMIT, NULL);
368435
cnt->cpu_user = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU_USER);
369436
cnt->cpu = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU);
437+
read_io_stat(ctx, path, V2_SYSFS_FILE_IO_STAT, cnt);
370438
pid = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_PIDS, NULL);
371439
if (!pid || pid == UINT64_MAX) {
372440
pid = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_PIDS_ALT, NULL);

plugins/in_podman_metrics/podman_metrics_data.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ int destroy_gauge(struct flb_in_metrics *ctx, struct cmt_gauge **g);
3737
uint64_t read_from_file(struct flb_in_metrics *ctx, flb_sds_t path);
3838
uint64_t read_key_value_from_file(struct flb_in_metrics *ctx, flb_sds_t path, flb_sds_t key);
3939
uint64_t get_data_from_sysfs(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_t name, flb_sds_t key);
40+
void read_io_stat(struct flb_in_metrics *ctx, flb_sds_t dir, flb_sds_t name, struct container *cnt);
4041

4142
int get_container_sysfs_subdirectory(struct flb_in_metrics *ctx, flb_sds_t id, flb_sds_t subsystem, flb_sds_t *path);
4243
int get_net_data_from_proc(struct flb_in_metrics *ctx, struct container *cnt, uint64_t pid);

0 commit comments

Comments
 (0)