try to fix slow start

thor2002ro · thor2002ro · commit 792ab6ccfba3 · 2026-02-26T22:11:16.000+02:00
diff --git a/src/extract_gpuinfo_amdgpu.c b/src/extract_gpuinfo_amdgpu.c
@@ -37,10 +37,13 @@
 #include <libdrm/amdgpu.h>
 #include <libdrm/amdgpu_drm.h>
 #include <math.h>
+#include <pthread.h>
 #include <stdarg.h>
+#include <stdatomic.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
@@ -49,6 +52,9 @@
 #include <uthash.h>
 #include <xf86drm.h>
 
+extern bool nvtop_debug_amdgpu_metrics;
+extern bool nvtop_enable_pcie_bw_sleep;
+
 // extern
 const char *amdgpu_parse_marketing_name(struct amdgpu_gpu_info *info);
 
@@ -120,16 +126,24 @@ struct gpu_info_amdgpu {
 
   // We poll the fan frequently enough and want to avoid the open/close overhead of the sysfs file
   FILE *fanSpeedFILE; // FILE* for this device current fan speed
-  FILE *PCIeBW;       // FILE* for this device PCIe bandwidth over one second
   FILE *powerCap;     // FILE* for this device power cap
 
+  // gpu_metrics sysfs file descriptor for non-blocking PCIe bandwidth reading
+  // (replaces pcie_bw which blocks for 1 second per read due to kernel msleep(1000))
+  int gpuMetricsFD;
+  uint64_t last_pcie_bw_acc; // Previous pcie_bandwidth_acc value for delta computation
+  bool has_pcie_bw_acc_prev; // Whether we have a previous accumulated value
+
   nvtop_device *amdgpuDevice; // The AMDGPU driver device
   nvtop_device *hwmonDevice;  // The AMDGPU driver hwmon device
 
   struct amdgpu_process_info_cache *last_update_process_cache, *current_update_process_cache; // Cached processes info
 
   // Used to compute the actual fan speed
   unsigned maxFanValue;
+
+  // Asynchronous PCIe Bandwidth fetching thread (Fallback if gpuMetricsFD < 0 or missing PCIe)
+  FILE *PCIeBW; // FILE* for this device PCIe bandwidth over one second
 };
 
 unsigned amdgpu_count;
@@ -142,6 +156,7 @@ static bool gpuinfo_amdgpu_get_device_handles(struct list_head *devices, unsigne
 static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info);
 static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info);
 static void gpuinfo_amdgpu_get_running_processes(struct gpu_info *_gpu_info);
+static int rewindAndReadPattern(FILE *file, const char *format, ...);
 
 struct gpu_vendor gpu_vendor_amdgpu = {
     .init = gpuinfo_amdgpu_init,
@@ -235,8 +250,11 @@ static bool gpuinfo_amdgpu_init(void) {
 static void gpuinfo_amdgpu_shutdown(void) {
   for (unsigned i = 0; i < amdgpu_count; ++i) {
     struct gpu_info_amdgpu *gpu_info = &gpu_infos[i];
+
     if (gpu_info->fanSpeedFILE)
       fclose(gpu_info->fanSpeedFILE);
+    if (gpu_info->gpuMetricsFD >= 0)
+      close(gpu_info->gpuMetricsFD);
     if (gpu_info->PCIeBW)
       fclose(gpu_info->PCIeBW);
     if (gpu_info->powerCap)
@@ -370,11 +388,29 @@ static void initDeviceSysfsPaths(struct gpu_info_amdgpu *gpu_info) {
   }
 
   int sysfsFD = open(devicePath, O_RDONLY);
-  // Open the PCIe bandwidth file for dynamic info gathering
+  // Open the gpu_metrics file for non-blocking PCIe bandwidth reading
+  // (pcie_bw sysfs blocks for 1 second per read due to kernel msleep(1000))
+  gpu_info->gpuMetricsFD = openat(sysfsFD, "gpu_metrics", O_RDONLY);
+  gpu_info->last_pcie_bw_acc = 0;
+  gpu_info->has_pcie_bw_acc_prev = false;
+
+  bool metrics_has_pcie = false;
+  if (gpu_info->gpuMetricsFD >= 0) {
+    uint8_t header[4];
+    if (pread(gpu_info->gpuMetricsFD, header, sizeof(header), 0) == 4) {
+      if (header[2] == 1 && header[3] >= 4) {
+        metrics_has_pcie = true;
+      }
+    }
+  }
+
+  // Open the legacy PCIe bandwidth file for async worker fallback gathering
   gpu_info->PCIeBW = NULL;
-  int pcieBWFD = openat(sysfsFD, "pcie_bw", O_RDONLY);
-  if (pcieBWFD) {
-    gpu_info->PCIeBW = fdopen(pcieBWFD, "r");
+  if (!metrics_has_pcie) {
+    int pcieBWFD = openat(sysfsFD, "pcie_bw", O_RDONLY);
+    if (pcieBWFD >= 0) {
+      gpu_info->PCIeBW = fdopen(pcieBWFD, "r");
+    }
   }
 
   close(sysfsFD);
@@ -466,6 +502,7 @@ static bool gpuinfo_amdgpu_get_device_handles(struct list_head *devices, unsigne
       list_add_tail(&gpu_infos[amdgpu_count].base.list, devices);
       // Register a fdinfo callback for this GPU
       processinfo_register_fdinfo_callback(parse_drm_fdinfo_amd, &gpu_infos[amdgpu_count].base);
+
       amdgpu_count++;
     } else {
       _drmFreeVersion(ver);
@@ -705,11 +742,21 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
 
   // Memory usage
   struct drm_amdgpu_memory_info memory_info;
+  struct timespec t_query_start, t_query_end;
+  if (nvtop_debug_amdgpu_metrics) {
+    clock_gettime(CLOCK_MONOTONIC, &t_query_start);
+  }
   if (libdrm_amdgpu_handle && _amdgpu_query_info)
     last_libdrm_return_status =
         _amdgpu_query_info(gpu_info->amdgpu_device, AMDGPU_INFO_MEMORY, sizeof(memory_info), &memory_info);
   else
     last_libdrm_return_status = 1;
+  if (nvtop_debug_amdgpu_metrics) {
+    clock_gettime(CLOCK_MONOTONIC, &t_query_end);
+    double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
+                       (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
+    fprintf(stderr, "[DEBUG] AMD _amdgpu_query_info(AMDGPU_INFO_MEMORY) took %.2f ms\n", elapsed_q);
+  }
   if (!last_libdrm_return_status) {
     if (gpu_info->base.static_info.integrated_graphics) {
       SET_GPUINFO_DYNAMIC(dynamic_info, total_memory,
@@ -739,7 +786,16 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
 
   // Fan speed
   unsigned currentFanSpeed;
+  if (nvtop_debug_amdgpu_metrics) {
+    clock_gettime(CLOCK_MONOTONIC, &t_query_start);
+  }
   int patternsMatched = rewindAndReadPattern(gpu_info->fanSpeedFILE, "%u", &currentFanSpeed);
+  if (nvtop_debug_amdgpu_metrics) {
+    clock_gettime(CLOCK_MONOTONIC, &t_query_end);
+    double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
+                       (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
+    fprintf(stderr, "[DEBUG] AMD rewindAndReadPattern(fanSpeedFILE) took %.2f ms\n", elapsed_q);
+  }
   if (patternsMatched == 1) {
     SET_GPUINFO_DYNAMIC(dynamic_info, fan_speed, currentFanSpeed * 100 / gpu_info->maxFanValue);
   }
@@ -762,21 +818,93 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
     SET_GPUINFO_DYNAMIC(dynamic_info, pcie_link_gen, pcieGen);
   }
 
-  // PCIe bandwidth
-  if (gpu_info->PCIeBW) {
-    // According to https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/pm/amdgpu_pm.c, under the pcie_bw
-    // section, we should be able to read the number of packets received and sent by the GPU and get the maximum payload
-    // size during the last second. This is untested but should work when the file is populated by the driver.
+  // PCIe bandwidth via gpu_metrics (non-blocking, replaces pcie_bw which has a 1-second kernel sleep)
+  if (gpu_info->gpuMetricsFD >= 0) {
+    // Read the gpu_metrics binary file from sysfs
+    // The file starts with a 4-byte header: structure_size(u16), format_revision(u8), content_revision(u8)
+    // For dGPU metrics v1_4+, pcie_bandwidth_inst is available at a known offset
+    uint8_t metrics_buf[256]; // Large enough for the header + PCIe bandwidth fields
+    ssize_t nread = pread(gpu_info->gpuMetricsFD, metrics_buf, sizeof(metrics_buf), 0);
+    if (nread >= 4) {
+      uint16_t structure_size;
+      memcpy(&structure_size, metrics_buf, sizeof(structure_size));
+      uint8_t format_revision = metrics_buf[2];
+      uint8_t content_revision = metrics_buf[3];
+
+      // gpu_metrics v1_4+ (dGPU) has pcie_bandwidth_acc and pcie_bandwidth_inst
+      // format_revision == 1 means dGPU metrics, content_revision >= 4 means v1_4+
+      if (format_revision == 1 && content_revision >= 4 && nread >= (ssize_t)structure_size) {
+        // In gpu_metrics_v1_4, the layout after the header has pcie_bandwidth_acc and pcie_bandwidth_inst
+        // as uint64_t fields. We use pcie_bandwidth_inst (instantaneous bandwidth in GB/sec)
+        // and split evenly as an approximation for RX/TX since the kernel doesn't separate them.
+        //
+        // Field offsets within gpu_metrics_v1_4 (after the 4-byte header):
+        //   The pcie_bandwidth_inst field follows pcie_bandwidth_acc.
+        //   We scan from the structure definition to find pcie_bandwidth_acc offset.
+        //
+        // Offset calculation for gpu_metrics_v1_4:
+        //   header(4) + temp_hotspot(2) + temp_mem(2) + temp_vrsoc(2) = 10
+        //   curr_socket_power(2) = 12
+        //   avg_gfx_activity(2) + avg_umc_activity(2) + vcn_activity[4](8) = 24
+        //   energy_accumulator(8) = 32
+        //   system_clock_counter(8) = 40
+        //   throttle_status(4) = 44
+        //   gfxclk_lock_status(4) = 48
+        //   pcie_link_width(2) + pcie_link_speed(2) = 52
+        //   xgmi_link_width(2) + xgmi_link_speed(2) = 56
+        //   gfx_activity_acc(4) + mem_activity_acc(4) = 64
+        //   pcie_bandwidth_acc(8) = offset 64, ends at 72
+        //   pcie_bandwidth_inst(8) = offset 72, ends at 80
+        // const size_t pcie_bw_acc_offset = 64;
+        const size_t pcie_bw_inst_offset = 72;
+        if (nread >= (ssize_t)(pcie_bw_inst_offset + sizeof(uint64_t))) {
+          uint64_t pcie_bw_inst;
+          memcpy(&pcie_bw_inst, metrics_buf + pcie_bw_inst_offset, sizeof(pcie_bw_inst));
+
+          // In gpu_metrics, if a sensor is unsupported, it often reports 0xFFFFFFFFFFFFFFFF (UINT64_MAX)
+          if (pcie_bw_inst != UINT64_MAX) {
+            // pcie_bandwidth_inst is in GB/sec, convert to KiB/sec
+            // Split evenly between RX and TX as a best approximation
+            uint64_t total_kib = pcie_bw_inst * 1024 * 1024; // GB/sec -> KiB/sec
+            SET_GPUINFO_DYNAMIC(dynamic_info, pcie_rx, total_kib / 2);
+            SET_GPUINFO_DYNAMIC(dynamic_info, pcie_tx, total_kib / 2);
+          }
+        }
+      }
+
+      if (nvtop_debug_amdgpu_metrics) {
+        fprintf(stderr, "[DEBUG] AMD gpu_metrics read %zd bytes: format_revision=%u, content_revision=%u\n", nread,
+                format_revision, content_revision);
+        fprintf(stderr, "[DEBUG] Raw gpu_metrics hex dump:\n");
+        for (ssize_t i = 0; i < nread; i++) {
+          fprintf(stderr, "%02x ", metrics_buf[i]);
+          if ((i + 1) % 16 == 0)
+            fprintf(stderr, "\n");
+        }
+        fprintf(stderr, "\n");
+      }
+    }
+  } else if (gpu_info->PCIeBW && nvtop_enable_pcie_bw_sleep) {
     uint64_t received, transmitted;
     int maxPayloadSize;
+    if (nvtop_debug_amdgpu_metrics) {
+      clock_gettime(CLOCK_MONOTONIC, &t_query_start);
+    }
     int NreadPatterns =
         rewindAndReadPattern(gpu_info->PCIeBW, "%" SCNu64 " %" SCNu64 " %i", &received, &transmitted, &maxPayloadSize);
+    if (nvtop_debug_amdgpu_metrics) {
+      clock_gettime(CLOCK_MONOTONIC, &t_query_end);
+      double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
+                         (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
+      fprintf(stderr, "[DEBUG] AMD pcie_bw inline read took %.2f ms. Matches: %d\n", elapsed_q, NreadPatterns);
+    }
     if (NreadPatterns == 3) {
       received *= maxPayloadSize;
       transmitted *= maxPayloadSize;
-      // Set in KiB
+      // Store in KiB
       received /= 1024;
       transmitted /= 1024;
+
       SET_GPUINFO_DYNAMIC(dynamic_info, pcie_rx, received);
       SET_GPUINFO_DYNAMIC(dynamic_info, pcie_tx, transmitted);
     }
diff --git a/src/extract_gpuinfo_nvidia.c b/src/extract_gpuinfo_nvidia.c
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 #define NVML_SUCCESS 0
 #define NVML_ERROR_NOT_SUPPORTED 3
@@ -170,8 +171,8 @@ typedef struct {
   unsigned long long usedGpuMemory;
   unsigned int gpuInstanceId;
   unsigned int computeInstanceId;
-  // This is present in https://github.com/NVIDIA/DCGM/blob/master/sdk/nvidia/nvml/nvml.h#L294 but not the latest driver nvml.h
-  // unsigned long long usedGpuCcProtectedMemory;
+  // This is present in https://github.com/NVIDIA/DCGM/blob/master/sdk/nvidia/nvml/nvml.h#L294 but not the latest driver
+  // nvml.h unsigned long long usedGpuCcProtectedMemory;
 } nvmlProcessInfo_v3_t;
 
 static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v1)(nvmlDevice_t device, unsigned int *infoCount,
@@ -394,11 +395,11 @@ static bool gpuinfo_nvidia_init(void) {
     goto init_error_clean_exit;
 
   nvmlDeviceGetGraphicsRunningProcesses[1] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v1;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v1;
   nvmlDeviceGetGraphicsRunningProcesses[2] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v2;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v2;
   nvmlDeviceGetGraphicsRunningProcesses[3] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v3;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v3;
 
   nvmlDeviceGetComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v3");
   nvmlDeviceGetComputeRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v2");
@@ -408,23 +409,23 @@ static bool gpuinfo_nvidia_init(void) {
     goto init_error_clean_exit;
 
   nvmlDeviceGetComputeRunningProcesses[1] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v1;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v1;
   nvmlDeviceGetComputeRunningProcesses[2] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v2;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v2;
   nvmlDeviceGetComputeRunningProcesses[3] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v3;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v3;
 
   // These functions were not available in older NVML libs; don't error if not present
   nvmlDeviceGetMPSComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses_v3");
   nvmlDeviceGetMPSComputeRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses_v2");
   nvmlDeviceGetMPSComputeRunningProcesses_v1 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses");
 
   nvmlDeviceGetMPSComputeRunningProcesses[1] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v1;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v1;
   nvmlDeviceGetMPSComputeRunningProcesses[2] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v2;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v2;
   nvmlDeviceGetMPSComputeRunningProcesses[3] =
-      (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v3;
+      (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v3;
 
   // These ones might not be available
   nvmlDeviceGetProcessUtilization = dlsym(libnvidia_ml_handle, "nvmlDeviceGetProcessUtilization");
@@ -882,30 +883,44 @@ static void gpuinfo_nvidia_get_running_processes(struct gpu_info *_gpu_info) {
         }
       }
       memset(_gpu_info->processes, 0, _gpu_info->processes_count * sizeof(*_gpu_info->processes));
+      unsigned valid_procs = 0;
       for (unsigned i = 0; i < graphical_count + compute_count; ++i) {
-        if (i < graphical_count)
-          _gpu_info->processes[i].type = gpu_process_graphical;
-        else
-          _gpu_info->processes[i].type = gpu_process_compute;
+        pid_t parsed_pid = -1;
         switch (version) {
         case 2: {
           nvmlProcessInfo_v2_t *pinfo = (nvmlProcessInfo_v2_t *)retrieved_infos;
-          _gpu_info->processes[i].pid = pinfo[i].pid;
-          _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory;
+          parsed_pid = pinfo[i].pid;
+          _gpu_info->processes[valid_procs].pid = pinfo[i].pid;
+          _gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory;
         } break;
         case 3: {
           nvmlProcessInfo_v3_t *pinfo = (nvmlProcessInfo_v3_t *)retrieved_infos;
-          _gpu_info->processes[i].pid = pinfo[i].pid;
-          _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory;
+          parsed_pid = pinfo[i].pid;
+          _gpu_info->processes[valid_procs].pid = pinfo[i].pid;
+          _gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory;
         } break;
         default: {
           nvmlProcessInfo_v1_t *pinfo = (nvmlProcessInfo_v1_t *)retrieved_infos;
-          _gpu_info->processes[i].pid = pinfo[i].pid;
-          _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory;
+          parsed_pid = pinfo[i].pid;
+          _gpu_info->processes[valid_procs].pid = pinfo[i].pid;
+          _gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory;
         } break;
         }
-        SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[i].valid);
+
+        // Do not display nvtop in its own interface.
+        if (parsed_pid == getpid()) {
+          continue;
+        }
+
+        if (i < graphical_count)
+          _gpu_info->processes[valid_procs].type = gpu_process_graphical;
+        else
+          _gpu_info->processes[valid_procs].type = gpu_process_compute;
+
+        SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[valid_procs].valid);
+        valid_procs++;
       }
+      _gpu_info->processes_count = valid_procs;
     }
   }
   // If the GPU is in MIG mode; process utilization is not supported
diff --git a/src/extract_processinfo_fdinfo.c b/src/extract_processinfo_fdinfo.c
@@ -135,6 +135,10 @@ void processinfo_sweep_fdinfos(void) {
     if (!client_pid)
       goto next;
 
+    // Do not show nvtop itself in its internal processes list.
+    if (client_pid == (unsigned int)getpid())
+      goto next;
+
     fd_dir_fd = openat(pid_dir_fd, "fd", O_DIRECTORY);
     if (fd_dir_fd < 0)
       goto next;
@@ -277,12 +281,10 @@ void processinfo_sweep_fdinfos(void) {
                             process_info->dec_engine_used + processes_info_local.dec_engine_used);
       }
       if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, gpu_cycles)) {
-        SET_GPUINFO_PROCESS(process_info, gpu_cycles,
-                            process_info->gpu_cycles + processes_info_local.gpu_cycles);
+        SET_GPUINFO_PROCESS(process_info, gpu_cycles, process_info->gpu_cycles + processes_info_local.gpu_cycles);
       }
       if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, sample_delta)) {
-        SET_GPUINFO_PROCESS(process_info, sample_delta,
-                            process_info->sample_delta + processes_info_local.sample_delta);
+        SET_GPUINFO_PROCESS(process_info, sample_delta, process_info->sample_delta + processes_info_local.sample_delta);
       }
     }
 
diff --git a/src/nvtop.c b/src/nvtop.c

Original file line number	Diff line number	Diff line change
`@@ -135,6 +135,10 @@ void processinfo_sweep_fdinfos(void) {`
`135`	`135`	`if (!client_pid)`
`136`	`136`	`goto next;`
`137`	`137`
	`138`	`+ // Do not show nvtop itself in its internal processes list.`
	`139`	`+ if (client_pid == (unsigned int)getpid())`
	`140`	`+ goto next;`
	`141`	`+`
`138`	`142`	`fd_dir_fd = openat(pid_dir_fd, "fd", O_DIRECTORY);`
`139`	`143`	`if (fd_dir_fd < 0)`
`140`	`144`	`goto next;`
`@@ -277,12 +281,10 @@ void processinfo_sweep_fdinfos(void) {`
`277`	`281`	`process_info->dec_engine_used + processes_info_local.dec_engine_used);`
`278`	`282`	`}`
`279`	`283`	`if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, gpu_cycles)) {`
`280`		`- SET_GPUINFO_PROCESS(process_info, gpu_cycles,`
`281`		`- process_info->gpu_cycles + processes_info_local.gpu_cycles);`
	`284`	`+ SET_GPUINFO_PROCESS(process_info, gpu_cycles, process_info->gpu_cycles + processes_info_local.gpu_cycles);`
`282`	`285`	`}`
`283`	`286`	`if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, sample_delta)) {`
`284`		`- SET_GPUINFO_PROCESS(process_info, sample_delta,`
`285`		`- process_info->sample_delta + processes_info_local.sample_delta);`
	`287`	`+ SET_GPUINFO_PROCESS(process_info, sample_delta, process_info->sample_delta + processes_info_local.sample_delta);`
`286`	`288`	`}`
`287`	`289`	`}`
`288`	`290`