From 34b34777fa9d881e3cedca1a9ecfcb066085580a Mon Sep 17 00:00:00 2001 From: stondo Date: Wed, 29 Apr 2026 14:37:14 +0200 Subject: [PATCH] in_podman_metrics: fix multiple cgroup v2 issues Fix five bugs in the podman_metrics input plugin: 1. CPU counter division: cgroup v2 cpu.stat reports usage in microseconds, not nanoseconds like cgroup v1 cpuacct. Use the correct divisor (1e6) when converting to seconds. 2. RSS memory key: cgroup v2 memory.stat does not have a "rss" field. The equivalent metric is "anon" (anonymous memory). Add V2_STAT_KEY_RSS and use it in the v2 collection path. 3. memory.max "max" keyword: cgroup v2 uses the literal string "max" in memory.max when the memory limit is unlimited. read_from_file() fails to parse this with fscanf("%lu"), causing spurious warnings. Add read_from_sysfs_or_max() helper that returns 0 for "max" (unlimited). 4. PID alt path typo: V2_SYSFS_FILE_PIDS_ALT was set to "containers/cgroup.procs" (plural) but the actual cgroup v2 subdirectory is "container/cgroup.procs" (singular). This caused PID lookup to fail for all containers, which in turn prevented all network metrics from being collected. 5. Image name NULL safety: when parsing container metadata JSON, strstr() for the closing quote of the image name field can return NULL if the metadata is malformed or truncated. The result was used directly in pointer arithmetic and strncpy(), causing undefined behaviour and potential crashes. Add a NULL guard that falls back to image="unknown" when parsing fails. Fixes: #7769 Signed-off-by: stondo --- plugins/in_podman_metrics/podman_metrics.c | 30 +++++++--- .../in_podman_metrics/podman_metrics_config.h | 3 +- .../in_podman_metrics/podman_metrics_data.c | 55 ++++++++++++++++++- 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/plugins/in_podman_metrics/podman_metrics.c b/plugins/in_podman_metrics/podman_metrics.c index 342649bf624..6d6f0b0b020 100644 --- a/plugins/in_podman_metrics/podman_metrics.c +++ b/plugins/in_podman_metrics/podman_metrics.c @@ -113,10 +113,15 @@ static int collect_container_data(struct flb_in_metrics *ctx) metadata_token_start = strstr(metadata, JSON_SUBFIELD_IMAGE_NAME); if (metadata_token_start) { metadata_token_stop = strstr(metadata_token_start + JSON_SUBFIELD_SIZE_IMAGE_NAME+1, "\\\""); - metadata_token_size = metadata_token_stop - metadata_token_start - JSON_SUBFIELD_SIZE_IMAGE_NAME; - - strncpy(image_name, metadata_token_start+JSON_SUBFIELD_SIZE_IMAGE_NAME, metadata_token_size); - image_name[metadata_token_size] = '\0'; + if (metadata_token_stop) { + metadata_token_size = metadata_token_stop - metadata_token_start - JSON_SUBFIELD_SIZE_IMAGE_NAME; + strncpy(image_name, metadata_token_start+JSON_SUBFIELD_SIZE_IMAGE_NAME, metadata_token_size); + image_name[metadata_token_size] = '\0'; + } + else { + strncpy(image_name, "unknown", IMAGE_NAME_SIZE - 1); + image_name[7] = '\0'; + } flb_plg_trace(ctx->ins, "Found image name %s", image_name); add_container_to_list(ctx, id, name, image_name); @@ -225,10 +230,19 @@ static int create_counter(struct flb_in_metrics *ctx, struct cmt_counter **count return -1; } - if (strcmp(metric_name, COUNTER_CPU) == 0 || strcmp(metric_name, COUNTER_CPU_USER) == 0) { - fvalue = fvalue / 1000000000; - flb_plg_trace(ctx->ins, "Converting %s from nanoseconds to seconds (%lu -> %lu)", metric_name, value, fvalue); - + if (strcmp(metric_name, COUNTER_CPU) == 0 || + strcmp(metric_name, COUNTER_CPU_USER) == 0) { + if (ctx->cgroup_version == CGROUP_V2) { + /* cgroup v2 cpu.stat reports in microseconds */ + fvalue = fvalue / 1000000; + } + else { + /* cgroup v1 cpuacct reports in nanoseconds */ + fvalue = fvalue / 1000000000; + } + flb_plg_trace(ctx->ins, + "Converting %s to seconds (%lu -> %lu)", + metric_name, value, fvalue); } labels = (char *[]){id, name, image_name, interface}; diff --git a/plugins/in_podman_metrics/podman_metrics_config.h b/plugins/in_podman_metrics/podman_metrics_config.h index 1f6133e199a..11a08d2a740 100644 --- a/plugins/in_podman_metrics/podman_metrics_config.h +++ b/plugins/in_podman_metrics/podman_metrics_config.h @@ -83,6 +83,7 @@ /* Key names in .stat files */ #define STAT_KEY_RSS "rss" +#define V2_STAT_KEY_RSS "anon" #define STAT_KEY_CPU "usage_usec" #define STAT_KEY_CPU_USER "user_usec" @@ -106,7 +107,7 @@ #define V2_SYSFS_FILE_MEMORY_LIMIT "memory.max" #define V2_SYSFS_FILE_CPU_STAT "cpu.stat" #define V2_SYSFS_FILE_PIDS "cgroup.procs" -#define V2_SYSFS_FILE_PIDS_ALT "containers/cgroup.procs" +#define V2_SYSFS_FILE_PIDS_ALT "container/cgroup.procs" /* Values used to construct counters/gauges names and descriptions */ #define COUNTER_PREFIX "container" diff --git a/plugins/in_podman_metrics/podman_metrics_data.c b/plugins/in_podman_metrics/podman_metrics_data.c index 28771cf5a06..6f7405418b4 100644 --- a/plugins/in_podman_metrics/podman_metrics_data.c +++ b/plugins/in_podman_metrics/podman_metrics_data.c @@ -342,6 +342,54 @@ int fill_counters_with_sysfs_data_v1(struct flb_in_metrics *ctx) return 0; } +/* + * Read uint64_t value from sysfs file, with special handling for the "max" + * keyword used by cgroup v2 to indicate an unlimited resource. + * Returns 0 when file contains "max". + */ +static uint64_t read_from_sysfs_or_max(struct flb_in_metrics *ctx, + flb_sds_t dir, + flb_sds_t name) +{ + char path[SYSFS_FILE_PATH_SIZE]; + char buf[32]; + uint64_t value = UINT64_MAX; + FILE *fp; + int c; + + if (dir == NULL) { + return value; + } + + snprintf(path, sizeof(path), "%s/%s", dir, name); + + fp = fopen(path, "r"); + if (!fp) { + flb_plg_warn(ctx->ins, "Failed to read %s", path); + return value; + } + + if (fgets(buf, sizeof(buf), fp) != NULL) { + /* cgroup v2 uses "max" to indicate unlimited */ + if (strncmp(buf, "max", 3) == 0) { + flb_plg_debug(ctx->ins, "%s: max (unlimited)", path); + fclose(fp); + return 0; + } + c = sscanf(buf, "%lu", &value); + if (c != 1) { + flb_plg_warn(ctx->ins, + "Failed to read a number from %s", path); + fclose(fp); + return UINT64_MAX; + } + } + + fclose(fp); + flb_plg_debug(ctx->ins, "%s: %lu", path, value); + return value; +} + /* * Iterate over previously created container list. For each entry, generate its * path in sysfs system directory. From this path, grab data about container metrics @@ -363,8 +411,11 @@ int fill_counters_with_sysfs_data_v2(struct flb_in_metrics *ctx) cnt->memory_usage = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY, NULL); cnt->memory_max_usage = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MAX_MEMORY, NULL); - cnt->rss = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY_STAT, STAT_KEY_RSS); - cnt->memory_limit = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_MEMORY_LIMIT, NULL); + cnt->rss = get_data_from_sysfs(ctx, path, + V2_SYSFS_FILE_MEMORY_STAT, + V2_STAT_KEY_RSS); + cnt->memory_limit = read_from_sysfs_or_max(ctx, path, + V2_SYSFS_FILE_MEMORY_LIMIT); cnt->cpu_user = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU_USER); cnt->cpu = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_CPU_STAT, STAT_KEY_CPU); pid = get_data_from_sysfs(ctx, path, V2_SYSFS_FILE_PIDS, NULL);