Skip to content

Commit 792ab6c

Browse files
committed
try to fix slow start
1 parent 69e7b37 commit 792ab6c

4 files changed

Lines changed: 253 additions & 54 deletions

File tree

src/extract_gpuinfo_amdgpu.c

Lines changed: 139 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,13 @@
3737
#include <libdrm/amdgpu.h>
3838
#include <libdrm/amdgpu_drm.h>
3939
#include <math.h>
40+
#include <pthread.h>
4041
#include <stdarg.h>
42+
#include <stdatomic.h>
4143
#include <stdbool.h>
4244
#include <stdint.h>
4345
#include <stdio.h>
46+
#include <stdlib.h>
4447
#include <string.h>
4548
#include <sys/stat.h>
4649
#include <sys/syscall.h>
@@ -49,6 +52,9 @@
4952
#include <uthash.h>
5053
#include <xf86drm.h>
5154

55+
extern bool nvtop_debug_amdgpu_metrics;
56+
extern bool nvtop_enable_pcie_bw_sleep;
57+
5258
// extern
5359
const char *amdgpu_parse_marketing_name(struct amdgpu_gpu_info *info);
5460

@@ -120,16 +126,24 @@ struct gpu_info_amdgpu {
120126

121127
// We poll the fan frequently enough and want to avoid the open/close overhead of the sysfs file
122128
FILE *fanSpeedFILE; // FILE* for this device current fan speed
123-
FILE *PCIeBW; // FILE* for this device PCIe bandwidth over one second
124129
FILE *powerCap; // FILE* for this device power cap
125130

131+
// gpu_metrics sysfs file descriptor for non-blocking PCIe bandwidth reading
132+
// (replaces pcie_bw which blocks for 1 second per read due to kernel msleep(1000))
133+
int gpuMetricsFD;
134+
uint64_t last_pcie_bw_acc; // Previous pcie_bandwidth_acc value for delta computation
135+
bool has_pcie_bw_acc_prev; // Whether we have a previous accumulated value
136+
126137
nvtop_device *amdgpuDevice; // The AMDGPU driver device
127138
nvtop_device *hwmonDevice; // The AMDGPU driver hwmon device
128139

129140
struct amdgpu_process_info_cache *last_update_process_cache, *current_update_process_cache; // Cached processes info
130141

131142
// Used to compute the actual fan speed
132143
unsigned maxFanValue;
144+
145+
// Asynchronous PCIe Bandwidth fetching thread (Fallback if gpuMetricsFD < 0 or missing PCIe)
146+
FILE *PCIeBW; // FILE* for this device PCIe bandwidth over one second
133147
};
134148

135149
unsigned amdgpu_count;
@@ -142,6 +156,7 @@ static bool gpuinfo_amdgpu_get_device_handles(struct list_head *devices, unsigne
142156
static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info);
143157
static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info);
144158
static void gpuinfo_amdgpu_get_running_processes(struct gpu_info *_gpu_info);
159+
static int rewindAndReadPattern(FILE *file, const char *format, ...);
145160

146161
struct gpu_vendor gpu_vendor_amdgpu = {
147162
.init = gpuinfo_amdgpu_init,
@@ -235,8 +250,11 @@ static bool gpuinfo_amdgpu_init(void) {
235250
static void gpuinfo_amdgpu_shutdown(void) {
236251
for (unsigned i = 0; i < amdgpu_count; ++i) {
237252
struct gpu_info_amdgpu *gpu_info = &gpu_infos[i];
253+
238254
if (gpu_info->fanSpeedFILE)
239255
fclose(gpu_info->fanSpeedFILE);
256+
if (gpu_info->gpuMetricsFD >= 0)
257+
close(gpu_info->gpuMetricsFD);
240258
if (gpu_info->PCIeBW)
241259
fclose(gpu_info->PCIeBW);
242260
if (gpu_info->powerCap)
@@ -370,11 +388,29 @@ static void initDeviceSysfsPaths(struct gpu_info_amdgpu *gpu_info) {
370388
}
371389

372390
int sysfsFD = open(devicePath, O_RDONLY);
373-
// Open the PCIe bandwidth file for dynamic info gathering
391+
// Open the gpu_metrics file for non-blocking PCIe bandwidth reading
392+
// (pcie_bw sysfs blocks for 1 second per read due to kernel msleep(1000))
393+
gpu_info->gpuMetricsFD = openat(sysfsFD, "gpu_metrics", O_RDONLY);
394+
gpu_info->last_pcie_bw_acc = 0;
395+
gpu_info->has_pcie_bw_acc_prev = false;
396+
397+
bool metrics_has_pcie = false;
398+
if (gpu_info->gpuMetricsFD >= 0) {
399+
uint8_t header[4];
400+
if (pread(gpu_info->gpuMetricsFD, header, sizeof(header), 0) == 4) {
401+
if (header[2] == 1 && header[3] >= 4) {
402+
metrics_has_pcie = true;
403+
}
404+
}
405+
}
406+
407+
// Open the legacy PCIe bandwidth file for async worker fallback gathering
374408
gpu_info->PCIeBW = NULL;
375-
int pcieBWFD = openat(sysfsFD, "pcie_bw", O_RDONLY);
376-
if (pcieBWFD) {
377-
gpu_info->PCIeBW = fdopen(pcieBWFD, "r");
409+
if (!metrics_has_pcie) {
410+
int pcieBWFD = openat(sysfsFD, "pcie_bw", O_RDONLY);
411+
if (pcieBWFD >= 0) {
412+
gpu_info->PCIeBW = fdopen(pcieBWFD, "r");
413+
}
378414
}
379415

380416
close(sysfsFD);
@@ -466,6 +502,7 @@ static bool gpuinfo_amdgpu_get_device_handles(struct list_head *devices, unsigne
466502
list_add_tail(&gpu_infos[amdgpu_count].base.list, devices);
467503
// Register a fdinfo callback for this GPU
468504
processinfo_register_fdinfo_callback(parse_drm_fdinfo_amd, &gpu_infos[amdgpu_count].base);
505+
469506
amdgpu_count++;
470507
} else {
471508
_drmFreeVersion(ver);
@@ -705,11 +742,21 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
705742

706743
// Memory usage
707744
struct drm_amdgpu_memory_info memory_info;
745+
struct timespec t_query_start, t_query_end;
746+
if (nvtop_debug_amdgpu_metrics) {
747+
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
748+
}
708749
if (libdrm_amdgpu_handle && _amdgpu_query_info)
709750
last_libdrm_return_status =
710751
_amdgpu_query_info(gpu_info->amdgpu_device, AMDGPU_INFO_MEMORY, sizeof(memory_info), &memory_info);
711752
else
712753
last_libdrm_return_status = 1;
754+
if (nvtop_debug_amdgpu_metrics) {
755+
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
756+
double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
757+
(t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
758+
fprintf(stderr, "[DEBUG] AMD _amdgpu_query_info(AMDGPU_INFO_MEMORY) took %.2f ms\n", elapsed_q);
759+
}
713760
if (!last_libdrm_return_status) {
714761
if (gpu_info->base.static_info.integrated_graphics) {
715762
SET_GPUINFO_DYNAMIC(dynamic_info, total_memory,
@@ -739,7 +786,16 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
739786

740787
// Fan speed
741788
unsigned currentFanSpeed;
789+
if (nvtop_debug_amdgpu_metrics) {
790+
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
791+
}
742792
int patternsMatched = rewindAndReadPattern(gpu_info->fanSpeedFILE, "%u", &currentFanSpeed);
793+
if (nvtop_debug_amdgpu_metrics) {
794+
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
795+
double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
796+
(t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
797+
fprintf(stderr, "[DEBUG] AMD rewindAndReadPattern(fanSpeedFILE) took %.2f ms\n", elapsed_q);
798+
}
743799
if (patternsMatched == 1) {
744800
SET_GPUINFO_DYNAMIC(dynamic_info, fan_speed, currentFanSpeed * 100 / gpu_info->maxFanValue);
745801
}
@@ -762,21 +818,93 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
762818
SET_GPUINFO_DYNAMIC(dynamic_info, pcie_link_gen, pcieGen);
763819
}
764820

765-
// PCIe bandwidth
766-
if (gpu_info->PCIeBW) {
767-
// According to https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/pm/amdgpu_pm.c, under the pcie_bw
768-
// section, we should be able to read the number of packets received and sent by the GPU and get the maximum payload
769-
// size during the last second. This is untested but should work when the file is populated by the driver.
821+
// PCIe bandwidth via gpu_metrics (non-blocking, replaces pcie_bw which has a 1-second kernel sleep)
822+
if (gpu_info->gpuMetricsFD >= 0) {
823+
// Read the gpu_metrics binary file from sysfs
824+
// The file starts with a 4-byte header: structure_size(u16), format_revision(u8), content_revision(u8)
825+
// For dGPU metrics v1_4+, pcie_bandwidth_inst is available at a known offset
826+
uint8_t metrics_buf[256]; // Large enough for the header + PCIe bandwidth fields
827+
ssize_t nread = pread(gpu_info->gpuMetricsFD, metrics_buf, sizeof(metrics_buf), 0);
828+
if (nread >= 4) {
829+
uint16_t structure_size;
830+
memcpy(&structure_size, metrics_buf, sizeof(structure_size));
831+
uint8_t format_revision = metrics_buf[2];
832+
uint8_t content_revision = metrics_buf[3];
833+
834+
// gpu_metrics v1_4+ (dGPU) has pcie_bandwidth_acc and pcie_bandwidth_inst
835+
// format_revision == 1 means dGPU metrics, content_revision >= 4 means v1_4+
836+
if (format_revision == 1 && content_revision >= 4 && nread >= (ssize_t)structure_size) {
837+
// In gpu_metrics_v1_4, the layout after the header has pcie_bandwidth_acc and pcie_bandwidth_inst
838+
// as uint64_t fields. We use pcie_bandwidth_inst (instantaneous bandwidth in GB/sec)
839+
// and split evenly as an approximation for RX/TX since the kernel doesn't separate them.
840+
//
841+
// Field offsets within gpu_metrics_v1_4 (after the 4-byte header):
842+
// The pcie_bandwidth_inst field follows pcie_bandwidth_acc.
843+
// We scan from the structure definition to find pcie_bandwidth_acc offset.
844+
//
845+
// Offset calculation for gpu_metrics_v1_4:
846+
// header(4) + temp_hotspot(2) + temp_mem(2) + temp_vrsoc(2) = 10
847+
// curr_socket_power(2) = 12
848+
// avg_gfx_activity(2) + avg_umc_activity(2) + vcn_activity[4](8) = 24
849+
// energy_accumulator(8) = 32
850+
// system_clock_counter(8) = 40
851+
// throttle_status(4) = 44
852+
// gfxclk_lock_status(4) = 48
853+
// pcie_link_width(2) + pcie_link_speed(2) = 52
854+
// xgmi_link_width(2) + xgmi_link_speed(2) = 56
855+
// gfx_activity_acc(4) + mem_activity_acc(4) = 64
856+
// pcie_bandwidth_acc(8) = offset 64, ends at 72
857+
// pcie_bandwidth_inst(8) = offset 72, ends at 80
858+
// const size_t pcie_bw_acc_offset = 64;
859+
const size_t pcie_bw_inst_offset = 72;
860+
if (nread >= (ssize_t)(pcie_bw_inst_offset + sizeof(uint64_t))) {
861+
uint64_t pcie_bw_inst;
862+
memcpy(&pcie_bw_inst, metrics_buf + pcie_bw_inst_offset, sizeof(pcie_bw_inst));
863+
864+
// In gpu_metrics, if a sensor is unsupported, it often reports 0xFFFFFFFFFFFFFFFF (UINT64_MAX)
865+
if (pcie_bw_inst != UINT64_MAX) {
866+
// pcie_bandwidth_inst is in GB/sec, convert to KiB/sec
867+
// Split evenly between RX and TX as a best approximation
868+
uint64_t total_kib = pcie_bw_inst * 1024 * 1024; // GB/sec -> KiB/sec
869+
SET_GPUINFO_DYNAMIC(dynamic_info, pcie_rx, total_kib / 2);
870+
SET_GPUINFO_DYNAMIC(dynamic_info, pcie_tx, total_kib / 2);
871+
}
872+
}
873+
}
874+
875+
if (nvtop_debug_amdgpu_metrics) {
876+
fprintf(stderr, "[DEBUG] AMD gpu_metrics read %zd bytes: format_revision=%u, content_revision=%u\n", nread,
877+
format_revision, content_revision);
878+
fprintf(stderr, "[DEBUG] Raw gpu_metrics hex dump:\n");
879+
for (ssize_t i = 0; i < nread; i++) {
880+
fprintf(stderr, "%02x ", metrics_buf[i]);
881+
if ((i + 1) % 16 == 0)
882+
fprintf(stderr, "\n");
883+
}
884+
fprintf(stderr, "\n");
885+
}
886+
}
887+
} else if (gpu_info->PCIeBW && nvtop_enable_pcie_bw_sleep) {
770888
uint64_t received, transmitted;
771889
int maxPayloadSize;
890+
if (nvtop_debug_amdgpu_metrics) {
891+
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
892+
}
772893
int NreadPatterns =
773894
rewindAndReadPattern(gpu_info->PCIeBW, "%" SCNu64 " %" SCNu64 " %i", &received, &transmitted, &maxPayloadSize);
895+
if (nvtop_debug_amdgpu_metrics) {
896+
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
897+
double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
898+
(t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
899+
fprintf(stderr, "[DEBUG] AMD pcie_bw inline read took %.2f ms. Matches: %d\n", elapsed_q, NreadPatterns);
900+
}
774901
if (NreadPatterns == 3) {
775902
received *= maxPayloadSize;
776903
transmitted *= maxPayloadSize;
777-
// Set in KiB
904+
// Store in KiB
778905
received /= 1024;
779906
transmitted /= 1024;
907+
780908
SET_GPUINFO_DYNAMIC(dynamic_info, pcie_rx, received);
781909
SET_GPUINFO_DYNAMIC(dynamic_info, pcie_tx, transmitted);
782910
}

src/extract_gpuinfo_nvidia.c

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <stdio.h>
3030
#include <stdlib.h>
3131
#include <string.h>
32+
#include <unistd.h>
3233

3334
#define NVML_SUCCESS 0
3435
#define NVML_ERROR_NOT_SUPPORTED 3
@@ -170,8 +171,8 @@ typedef struct {
170171
unsigned long long usedGpuMemory;
171172
unsigned int gpuInstanceId;
172173
unsigned int computeInstanceId;
173-
// This is present in https://github.com/NVIDIA/DCGM/blob/master/sdk/nvidia/nvml/nvml.h#L294 but not the latest driver nvml.h
174-
// unsigned long long usedGpuCcProtectedMemory;
174+
// This is present in https://github.com/NVIDIA/DCGM/blob/master/sdk/nvidia/nvml/nvml.h#L294 but not the latest driver
175+
// nvml.h unsigned long long usedGpuCcProtectedMemory;
175176
} nvmlProcessInfo_v3_t;
176177

177178
static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v1)(nvmlDevice_t device, unsigned int *infoCount,
@@ -394,11 +395,11 @@ static bool gpuinfo_nvidia_init(void) {
394395
goto init_error_clean_exit;
395396

396397
nvmlDeviceGetGraphicsRunningProcesses[1] =
397-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v1;
398+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v1;
398399
nvmlDeviceGetGraphicsRunningProcesses[2] =
399-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v2;
400+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v2;
400401
nvmlDeviceGetGraphicsRunningProcesses[3] =
401-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v3;
402+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v3;
402403

403404
nvmlDeviceGetComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v3");
404405
nvmlDeviceGetComputeRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v2");
@@ -408,23 +409,23 @@ static bool gpuinfo_nvidia_init(void) {
408409
goto init_error_clean_exit;
409410

410411
nvmlDeviceGetComputeRunningProcesses[1] =
411-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v1;
412+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v1;
412413
nvmlDeviceGetComputeRunningProcesses[2] =
413-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v2;
414+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v2;
414415
nvmlDeviceGetComputeRunningProcesses[3] =
415-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v3;
416+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v3;
416417

417418
// These functions were not available in older NVML libs; don't error if not present
418419
nvmlDeviceGetMPSComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses_v3");
419420
nvmlDeviceGetMPSComputeRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses_v2");
420421
nvmlDeviceGetMPSComputeRunningProcesses_v1 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses");
421422

422423
nvmlDeviceGetMPSComputeRunningProcesses[1] =
423-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v1;
424+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v1;
424425
nvmlDeviceGetMPSComputeRunningProcesses[2] =
425-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v2;
426+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v2;
426427
nvmlDeviceGetMPSComputeRunningProcesses[3] =
427-
(nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v3;
428+
(nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v3;
428429

429430
// These ones might not be available
430431
nvmlDeviceGetProcessUtilization = dlsym(libnvidia_ml_handle, "nvmlDeviceGetProcessUtilization");
@@ -882,30 +883,44 @@ static void gpuinfo_nvidia_get_running_processes(struct gpu_info *_gpu_info) {
882883
}
883884
}
884885
memset(_gpu_info->processes, 0, _gpu_info->processes_count * sizeof(*_gpu_info->processes));
886+
unsigned valid_procs = 0;
885887
for (unsigned i = 0; i < graphical_count + compute_count; ++i) {
886-
if (i < graphical_count)
887-
_gpu_info->processes[i].type = gpu_process_graphical;
888-
else
889-
_gpu_info->processes[i].type = gpu_process_compute;
888+
pid_t parsed_pid = -1;
890889
switch (version) {
891890
case 2: {
892891
nvmlProcessInfo_v2_t *pinfo = (nvmlProcessInfo_v2_t *)retrieved_infos;
893-
_gpu_info->processes[i].pid = pinfo[i].pid;
894-
_gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory;
892+
parsed_pid = pinfo[i].pid;
893+
_gpu_info->processes[valid_procs].pid = pinfo[i].pid;
894+
_gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory;
895895
} break;
896896
case 3: {
897897
nvmlProcessInfo_v3_t *pinfo = (nvmlProcessInfo_v3_t *)retrieved_infos;
898-
_gpu_info->processes[i].pid = pinfo[i].pid;
899-
_gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory;
898+
parsed_pid = pinfo[i].pid;
899+
_gpu_info->processes[valid_procs].pid = pinfo[i].pid;
900+
_gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory;
900901
} break;
901902
default: {
902903
nvmlProcessInfo_v1_t *pinfo = (nvmlProcessInfo_v1_t *)retrieved_infos;
903-
_gpu_info->processes[i].pid = pinfo[i].pid;
904-
_gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory;
904+
parsed_pid = pinfo[i].pid;
905+
_gpu_info->processes[valid_procs].pid = pinfo[i].pid;
906+
_gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory;
905907
} break;
906908
}
907-
SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[i].valid);
909+
910+
// Do not display nvtop in its own interface.
911+
if (parsed_pid == getpid()) {
912+
continue;
913+
}
914+
915+
if (i < graphical_count)
916+
_gpu_info->processes[valid_procs].type = gpu_process_graphical;
917+
else
918+
_gpu_info->processes[valid_procs].type = gpu_process_compute;
919+
920+
SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[valid_procs].valid);
921+
valid_procs++;
908922
}
923+
_gpu_info->processes_count = valid_procs;
909924
}
910925
}
911926
// If the GPU is in MIG mode; process utilization is not supported

src/extract_processinfo_fdinfo.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ void processinfo_sweep_fdinfos(void) {
135135
if (!client_pid)
136136
goto next;
137137

138+
// Do not show nvtop itself in its internal processes list.
139+
if (client_pid == (unsigned int)getpid())
140+
goto next;
141+
138142
fd_dir_fd = openat(pid_dir_fd, "fd", O_DIRECTORY);
139143
if (fd_dir_fd < 0)
140144
goto next;
@@ -277,12 +281,10 @@ void processinfo_sweep_fdinfos(void) {
277281
process_info->dec_engine_used + processes_info_local.dec_engine_used);
278282
}
279283
if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, gpu_cycles)) {
280-
SET_GPUINFO_PROCESS(process_info, gpu_cycles,
281-
process_info->gpu_cycles + processes_info_local.gpu_cycles);
284+
SET_GPUINFO_PROCESS(process_info, gpu_cycles, process_info->gpu_cycles + processes_info_local.gpu_cycles);
282285
}
283286
if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, sample_delta)) {
284-
SET_GPUINFO_PROCESS(process_info, sample_delta,
285-
process_info->sample_delta + processes_info_local.sample_delta);
287+
SET_GPUINFO_PROCESS(process_info, sample_delta, process_info->sample_delta + processes_info_local.sample_delta);
286288
}
287289
}
288290

0 commit comments

Comments
 (0)