Skip to content

Commit 495ff15

Browse files
committed
in_process_exporter_metrics: extend hash key to avoid pid reuse
1 parent 67b1acb commit 495ff15

1 file changed

Lines changed: 78 additions & 23 deletions

File tree

plugins/in_process_exporter_metrics/pe_process.c

Lines changed: 78 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ static int process_thread_update(struct flb_pe *ctx, uint64_t ts, flb_sds_t pid,
488488
flb_sds_t tmp = NULL;
489489
flb_sds_t thread_name = NULL;
490490
flb_sds_t tid_str = NULL;
491+
flb_sds_t active_key = NULL;
491492
uint64_t val;
492493
const char *pattern = "/[0-9]*";
493494
struct mk_list *head;
@@ -522,10 +523,6 @@ static int process_thread_update(struct flb_pe *ctx, uint64_t ts, flb_sds_t pid,
522523
continue;
523524
}
524525

525-
if (flb_hash_table_add(active_tids, tid_str, strlen(tid_str), NULL, 0) < 0) {
526-
*active_index_complete = FLB_FALSE;
527-
}
528-
529526
if (check_path_for_proc(ctx, thread->str, "stat") != 0) {
530527
continue;
531528
}
@@ -543,6 +540,17 @@ static int process_thread_update(struct flb_pe *ctx, uint64_t ts, flb_sds_t pid,
543540
continue;
544541
}
545542

543+
active_key = flb_sds_create(name);
544+
flb_sds_cat_safe(&active_key, ":", 1);
545+
flb_sds_cat_safe(&active_key, thread_name, strlen(thread_name));
546+
flb_sds_cat_safe(&active_key, ":", 1);
547+
flb_sds_cat_safe(&active_key, tid_str, strlen(tid_str));
548+
if (flb_hash_table_add(active_tids, active_key, (int) flb_sds_len(active_key), NULL, 0) < 0) {
549+
*active_index_complete = FLB_FALSE;
550+
}
551+
flb_sds_destroy(active_key);
552+
active_key = NULL;
553+
546554
/* split with the close parenthesis.
547555
* The entry of processes stat will start after that. */
548556
tmp = strstr(entry->str, ")");
@@ -946,26 +954,51 @@ static char *get_metric_label_value(struct cmt_metric *metric, int index)
946954
return NULL;
947955
}
948956

949-
static void purge_stale_metrics(struct cmt_map *map, int label_index,
957+
/*
958+
* Build a composite lookup key from labels[0..id_label_index] (inclusive),
959+
* joined by ':'. This mirrors the key stored in active_ids so the check
960+
* matches the exact emitted identity (name, pid[, ppid] or
961+
* name, threadname, tid) rather than just the bare id.
962+
*/
963+
static void purge_stale_metrics(struct cmt_map *map,
964+
int id_label_index,
950965
struct flb_hash_table *active_ids)
951966
{
952-
char *id_val;
967+
int i;
968+
char *label_val;
953969
void *out_buf;
954970
size_t out_size;
971+
flb_sds_t key;
955972
struct cfl_list *tmp;
956973
struct cfl_list *head;
957974
struct cmt_metric *metric;
958975

959976
cfl_list_foreach_safe(head, tmp, &map->metrics) {
960977
metric = cfl_list_entry(head, struct cmt_metric, _head);
961-
id_val = get_metric_label_value(metric, label_index);
962-
if (!id_val) {
978+
key = NULL;
979+
for (i = 0; i <= id_label_index; i++) {
980+
label_val = get_metric_label_value(metric, i);
981+
if (!label_val) {
982+
flb_sds_destroy(key);
983+
key = NULL;
984+
break;
985+
}
986+
if (!key) {
987+
key = flb_sds_create(label_val);
988+
}
989+
else {
990+
flb_sds_cat_safe(&key, ":", 1);
991+
flb_sds_cat_safe(&key, label_val, strlen(label_val));
992+
}
993+
}
994+
if (!key) {
963995
continue;
964996
}
965-
if (flb_hash_table_get(active_ids, id_val, strlen(id_val),
997+
if (flb_hash_table_get(active_ids, key, (int) flb_sds_len(key),
966998
&out_buf, &out_size) < 0) {
967999
cmt_map_metric_destroy(metric);
9681000
}
1001+
flb_sds_destroy(key);
9691002
}
9701003
}
9711004

@@ -994,6 +1027,7 @@ static int process_update(struct flb_pe *ctx)
9941027
struct flb_hash_table *active_pids = NULL;
9951028
struct flb_hash_table *active_tids = NULL;
9961029
int active_index_complete = FLB_TRUE;
1030+
flb_sds_t active_key = NULL;
9971031

9981032
mk_list_init(&procfs_list);
9991033

@@ -1080,9 +1114,14 @@ static int process_update(struct flb_pe *ctx)
10801114
continue;
10811115
}
10821116

1083-
if (flb_hash_table_add(active_pids, pid_str, strlen(pid_str), NULL, 0) < 0) {
1117+
active_key = flb_sds_create(name);
1118+
flb_sds_cat_safe(&active_key, ":", 1);
1119+
flb_sds_cat_safe(&active_key, pid_str, strlen(pid_str));
1120+
if (flb_hash_table_add(active_pids, active_key, (int) flb_sds_len(active_key), NULL, 0) < 0) {
10841121
active_index_complete = FLB_FALSE;
10851122
}
1123+
flb_sds_destroy(active_key);
1124+
active_key = NULL;
10861125

10871126
mk_list_init(&split_list);
10881127

@@ -1107,6 +1146,17 @@ static int process_update(struct flb_pe *ctx)
11071146
entry = flb_slist_entry_get(&split_list, 1);
11081147
ppid_str = entry->str;
11091148

1149+
active_key = flb_sds_create(name);
1150+
flb_sds_cat_safe(&active_key, ":", 1);
1151+
flb_sds_cat_safe(&active_key, pid_str, strlen(pid_str));
1152+
flb_sds_cat_safe(&active_key, ":", 1);
1153+
flb_sds_cat_safe(&active_key, ppid_str, strlen(ppid_str));
1154+
if (flb_hash_table_add(active_pids, active_key, (int) flb_sds_len(active_key), NULL, 0) < 0) {
1155+
active_index_complete = FLB_FALSE;
1156+
}
1157+
flb_sds_destroy(active_key);
1158+
active_key = NULL;
1159+
11101160
/* State */
11111161
if (ctx->enabled_flag & METRIC_STATE) {
11121162
/* node_processes_state
@@ -1242,22 +1292,27 @@ static int process_update(struct flb_pe *ctx)
12421292
flb_slist_destroy(&procfs_list);
12431293

12441294
if (active_index_complete == FLB_TRUE) {
1245-
/* Remove metrics for processes that are no longer running */
1246-
purge_stale_metrics(ctx->cpu_seconds->map, 1, active_pids);
1247-
purge_stale_metrics(ctx->read_bytes->map, 1, active_pids);
1248-
purge_stale_metrics(ctx->write_bytes->map, 1, active_pids);
1249-
purge_stale_metrics(ctx->major_page_faults->map, 1, active_pids);
1250-
purge_stale_metrics(ctx->minor_page_faults->map, 1, active_pids);
1295+
/* Remove metrics for processes that are no longer running.
1296+
* Metrics with ppid at label[2]: key = name:pid:ppid (id_label_index=2).
1297+
* Metrics without ppid (context_switches, thread_wchan): key = name:pid
1298+
* (id_label_index=1); active_pids also holds name:pid entries for these. */
1299+
purge_stale_metrics(ctx->cpu_seconds->map, 2, active_pids);
1300+
purge_stale_metrics(ctx->read_bytes->map, 2, active_pids);
1301+
purge_stale_metrics(ctx->write_bytes->map, 2, active_pids);
1302+
purge_stale_metrics(ctx->major_page_faults->map, 2, active_pids);
1303+
purge_stale_metrics(ctx->minor_page_faults->map, 2, active_pids);
1304+
purge_stale_metrics(ctx->memory_bytes->map, 2, active_pids);
1305+
purge_stale_metrics(ctx->open_fds->map, 2, active_pids);
1306+
purge_stale_metrics(ctx->fd_ratio->map, 2, active_pids);
1307+
purge_stale_metrics(ctx->start_time->map, 2, active_pids);
1308+
purge_stale_metrics(ctx->num_threads->map, 2, active_pids);
1309+
purge_stale_metrics(ctx->states->map, 2, active_pids);
12511310
purge_stale_metrics(ctx->context_switches->map, 1, active_pids);
1252-
purge_stale_metrics(ctx->memory_bytes->map, 1, active_pids);
1253-
purge_stale_metrics(ctx->open_fds->map, 1, active_pids);
1254-
purge_stale_metrics(ctx->fd_ratio->map, 1, active_pids);
1255-
purge_stale_metrics(ctx->start_time->map, 1, active_pids);
1256-
purge_stale_metrics(ctx->num_threads->map, 1, active_pids);
1257-
purge_stale_metrics(ctx->states->map, 1, active_pids);
12581311
purge_stale_metrics(ctx->thread_wchan->map, 1, active_pids);
12591312

1260-
/* Remove metrics for threads that are no longer running */
1313+
/* Remove metrics for threads that are no longer running.
1314+
* Thread metrics: labels = {name, threadname, tid, ...},
1315+
* key = name:threadname:tid (id_label_index=2). */
12611316
purge_stale_metrics(ctx->thread_cpu_seconds->map, 2, active_tids);
12621317
purge_stale_metrics(ctx->thread_io_bytes->map, 2, active_tids);
12631318
purge_stale_metrics(ctx->thread_major_page_faults->map, 2, active_tids);

0 commit comments

Comments
 (0)