@@ -481,7 +481,8 @@ static int process_proc_thread_status(struct flb_pe *ctx, uint64_t ts,
481481}
482482
483483static int process_thread_update (struct flb_pe * ctx , uint64_t ts , flb_sds_t pid ,
484- flb_sds_t name , struct flb_hash_table * active_tids )
484+ flb_sds_t name , struct flb_hash_table * active_tids ,
485+ int * active_index_complete )
485486{
486487 int ret ;
487488 flb_sds_t tmp = NULL ;
@@ -521,7 +522,9 @@ static int process_thread_update(struct flb_pe *ctx, uint64_t ts, flb_sds_t pid,
521522 continue ;
522523 }
523524
524- flb_hash_table_add (active_tids , tid_str , strlen (tid_str ), NULL , 0 );
525+ if (flb_hash_table_add (active_tids , tid_str , strlen (tid_str ), NULL , 0 ) < 0 ) {
526+ * active_index_complete = FLB_FALSE ;
527+ }
525528
526529 if (check_path_for_proc (ctx , thread -> str , "stat" ) != 0 ) {
527530 continue ;
@@ -990,6 +993,7 @@ static int process_update(struct flb_pe *ctx)
990993 int include_flag = FLB_FALSE ;
991994 struct flb_hash_table * active_pids = NULL ;
992995 struct flb_hash_table * active_tids = NULL ;
996+ int active_index_complete = FLB_TRUE ;
993997
994998 mk_list_init (& procfs_list );
995999
@@ -1076,7 +1080,9 @@ static int process_update(struct flb_pe *ctx)
10761080 continue ;
10771081 }
10781082
1079- flb_hash_table_add (active_pids , pid_str , strlen (pid_str ), NULL , 0 );
1083+ if (flb_hash_table_add (active_pids , pid_str , strlen (pid_str ), NULL , 0 ) < 0 ) {
1084+ active_index_complete = FLB_FALSE ;
1085+ }
10801086
10811087 mk_list_init (& split_list );
10821088
@@ -1214,7 +1220,7 @@ static int process_update(struct flb_pe *ctx)
12141220
12151221 /* Collect the states of threads */
12161222 if (ctx -> enabled_flag & METRIC_THREAD ) {
1217- ret = process_thread_update (ctx , ts , pid_str , name , active_tids );
1223+ ret = process_thread_update (ctx , ts , pid_str , name , active_tids , & active_index_complete );
12181224 if (ret == -1 ) {
12191225 flb_plg_debug (ctx -> ins , "collect thread procfs is failed on the pid = %s" , pid_str );
12201226 }
@@ -1235,27 +1241,32 @@ static int process_update(struct flb_pe *ctx)
12351241
12361242 flb_slist_destroy (& procfs_list );
12371243
1238- /* Remove metrics for processes that are no longer running */
1239- purge_stale_metrics (ctx -> cpu_seconds -> map , 1 , active_pids );
1240- purge_stale_metrics (ctx -> read_bytes -> map , 1 , active_pids );
1241- purge_stale_metrics (ctx -> write_bytes -> map , 1 , active_pids );
1242- purge_stale_metrics (ctx -> major_page_faults -> map , 1 , active_pids );
1243- purge_stale_metrics (ctx -> minor_page_faults -> map , 1 , active_pids );
1244- purge_stale_metrics (ctx -> context_switches -> map , 1 , active_pids );
1245- purge_stale_metrics (ctx -> memory_bytes -> map , 1 , active_pids );
1246- purge_stale_metrics (ctx -> open_fds -> map , 1 , active_pids );
1247- purge_stale_metrics (ctx -> fd_ratio -> map , 1 , active_pids );
1248- purge_stale_metrics (ctx -> start_time -> map , 1 , active_pids );
1249- purge_stale_metrics (ctx -> num_threads -> map , 1 , active_pids );
1250- purge_stale_metrics (ctx -> states -> map , 1 , active_pids );
1251- purge_stale_metrics (ctx -> thread_wchan -> map , 1 , active_pids );
1252-
1253- /* Remove metrics for threads that are no longer running */
1254- purge_stale_metrics (ctx -> thread_cpu_seconds -> map , 2 , active_tids );
1255- purge_stale_metrics (ctx -> thread_io_bytes -> map , 2 , active_tids );
1256- purge_stale_metrics (ctx -> thread_major_page_faults -> map , 2 , active_tids );
1257- purge_stale_metrics (ctx -> thread_minor_page_faults -> map , 2 , active_tids );
1258- purge_stale_metrics (ctx -> thread_context_switches -> map , 2 , active_tids );
1244+ if (active_index_complete == FLB_TRUE ) {
1245+ /* Remove metrics for processes that are no longer running */
1246+ purge_stale_metrics (ctx -> cpu_seconds -> map , 1 , active_pids );
1247+ purge_stale_metrics (ctx -> read_bytes -> map , 1 , active_pids );
1248+ purge_stale_metrics (ctx -> write_bytes -> map , 1 , active_pids );
1249+ purge_stale_metrics (ctx -> major_page_faults -> map , 1 , active_pids );
1250+ purge_stale_metrics (ctx -> minor_page_faults -> map , 1 , active_pids );
1251+ purge_stale_metrics (ctx -> context_switches -> map , 1 , active_pids );
1252+ purge_stale_metrics (ctx -> memory_bytes -> map , 1 , active_pids );
1253+ purge_stale_metrics (ctx -> open_fds -> map , 1 , active_pids );
1254+ purge_stale_metrics (ctx -> fd_ratio -> map , 1 , active_pids );
1255+ purge_stale_metrics (ctx -> start_time -> map , 1 , active_pids );
1256+ purge_stale_metrics (ctx -> num_threads -> map , 1 , active_pids );
1257+ purge_stale_metrics (ctx -> states -> map , 1 , active_pids );
1258+ purge_stale_metrics (ctx -> thread_wchan -> map , 1 , active_pids );
1259+
1260+ /* Remove metrics for threads that are no longer running */
1261+ purge_stale_metrics (ctx -> thread_cpu_seconds -> map , 2 , active_tids );
1262+ purge_stale_metrics (ctx -> thread_io_bytes -> map , 2 , active_tids );
1263+ purge_stale_metrics (ctx -> thread_major_page_faults -> map , 2 , active_tids );
1264+ purge_stale_metrics (ctx -> thread_minor_page_faults -> map , 2 , active_tids );
1265+ purge_stale_metrics (ctx -> thread_context_switches -> map , 2 , active_tids );
1266+ } else {
1267+ flb_plg_warn (ctx -> ins ,
1268+ "skipping stale metric purge because active ID tracking is incomplete" );
1269+ }
12591270
12601271 flb_hash_table_destroy (active_pids );
12611272 flb_hash_table_destroy (active_tids );
0 commit comments