diff --git a/.gitignore b/.gitignore index df7dff2a..0e8d46f4 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ build/ cmake-build*/ .vscode .idea +.cache + diff --git a/include/nvtop/extract_gpuinfo_common.h b/include/nvtop/extract_gpuinfo_common.h index 9e4d1c9d..f3bc5458 100644 --- a/include/nvtop/extract_gpuinfo_common.h +++ b/include/nvtop/extract_gpuinfo_common.h @@ -61,6 +61,7 @@ enum gpuinfo_static_info_valid { gpuinfo_l2cache_size_valid, gpuinfo_n_exec_engines_valid, gpuinfo_engine_count_valid, + gpuinfo_fan_rpm_max_valid, gpuinfo_static_info_count, }; @@ -76,6 +77,7 @@ struct gpuinfo_static_info { unsigned l2cache_size; unsigned n_exec_engines; unsigned engine_count; + unsigned fan_rpm_max; bool integrated_graphics; bool encode_decode_shared; unsigned char valid[(gpuinfo_static_info_count + CHAR_BIT - 1) / CHAR_BIT]; @@ -132,7 +134,7 @@ struct gpuinfo_dynamic_info { unsigned int gpu_temp; // GPU temperature °celsius unsigned int power_draw; // Power usage in milliwatts unsigned int power_draw_max; // Max power usage in milliwatts - bool multi_instance_mode; // True if the GPU is in multi-instance mode + bool multi_instance_mode; // True if the GPU is in multi-instance mode unsigned char valid[(gpuinfo_dynamic_info_count + CHAR_BIT - 1) / CHAR_BIT]; }; diff --git a/src/extract_gpuinfo.c b/src/extract_gpuinfo.c index 896b78fb..a8752847 100644 --- a/src/extract_gpuinfo.c +++ b/src/extract_gpuinfo.c @@ -134,45 +134,69 @@ bool gpuinfo_fix_dynamic_info_from_process_info(struct list_head *devices) { unsigned reportedGpuRate = dynamic_info->gpu_util_rate; RESET_GPUINFO_DYNAMIC(dynamic_info, gpu_util_rate); - // AMDGPU does not provide encode and decode utilization through the DRM sensor info. - // Update them here since per-process sysfs exposes this information. - bool needGpuEncode = !GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, encoder_rate); - bool needGpuDecode = !GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, decoder_rate); + // For encode/decode, save vendor-reported values and reset, then re-aggregate + // from per-process info (same pattern as gpu_util_rate). This ensures GPUs that + // set a baseline (e.g., AMDGPU sets 0) show 0% when idle, while GPUs that never + // set encoder_rate/decoder_rate still show null. + bool validReportedEncoderRate = GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, encoder_rate); + unsigned reportedEncoderRate = dynamic_info->encoder_rate; + if (validReportedEncoderRate) + RESET_GPUINFO_DYNAMIC(dynamic_info, encoder_rate); + + bool validReportedDecoderRate = GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, decoder_rate); + unsigned reportedDecoderRate = dynamic_info->decoder_rate; + if (validReportedDecoderRate) + RESET_GPUINFO_DYNAMIC(dynamic_info, decoder_rate); + bool needGPUMemory = !GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory) && GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, total_memory); - if (needGpuRate || needGpuEncode || needGpuDecode || needGPUMemory) { - for (unsigned processIdx = 0; processIdx < device->processes_count; ++processIdx) { - struct gpu_process *process_info = &device->processes[processIdx]; - if (needGpuRate && GPUINFO_PROCESS_FIELD_VALID(process_info, gpu_usage)) { - if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, gpu_util_rate)) { - dynamic_info->gpu_util_rate = MYMIN(100, dynamic_info->gpu_util_rate + process_info->gpu_usage); - } else { - SET_GPUINFO_DYNAMIC(dynamic_info, gpu_util_rate, MYMIN(100, process_info->gpu_usage)); - } + for (unsigned processIdx = 0; processIdx < device->processes_count; ++processIdx) { + struct gpu_process *process_info = &device->processes[processIdx]; + if (needGpuRate && GPUINFO_PROCESS_FIELD_VALID(process_info, gpu_usage)) { + if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, gpu_util_rate)) { + dynamic_info->gpu_util_rate = MYMIN(100, dynamic_info->gpu_util_rate + process_info->gpu_usage); + } else { + SET_GPUINFO_DYNAMIC(dynamic_info, gpu_util_rate, MYMIN(100, process_info->gpu_usage)); } - if (needGpuEncode && GPUINFO_PROCESS_FIELD_VALID(process_info, encode_usage)) { - if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, encoder_rate)) { - dynamic_info->encoder_rate = MYMIN(100, dynamic_info->encoder_rate + process_info->encode_usage); - } else { - SET_GPUINFO_DYNAMIC(dynamic_info, encoder_rate, MYMIN(100, process_info->encode_usage)); - } + } + if (GPUINFO_PROCESS_FIELD_VALID(process_info, encode_usage)) { + if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, encoder_rate)) { + dynamic_info->encoder_rate = MYMIN(100, dynamic_info->encoder_rate + process_info->encode_usage); + } else { + SET_GPUINFO_DYNAMIC(dynamic_info, encoder_rate, MYMIN(100, process_info->encode_usage)); } - if (needGpuDecode && GPUINFO_PROCESS_FIELD_VALID(process_info, decode_usage)) { - if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, decoder_rate)) { - dynamic_info->decoder_rate = MYMIN(100, dynamic_info->decoder_rate + process_info->decode_usage); - } else { - SET_GPUINFO_DYNAMIC(dynamic_info, decoder_rate, MYMIN(100, process_info->decode_usage)); - } + } + if (GPUINFO_PROCESS_FIELD_VALID(process_info, decode_usage)) { + if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, decoder_rate)) { + dynamic_info->decoder_rate = MYMIN(100, dynamic_info->decoder_rate + process_info->decode_usage); + } else { + SET_GPUINFO_DYNAMIC(dynamic_info, decoder_rate, MYMIN(100, process_info->decode_usage)); } - if (needGPUMemory && GPUINFO_PROCESS_FIELD_VALID(process_info, gpu_memory_usage)) { - if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory)) { - dynamic_info->used_memory += dynamic_info->used_memory + process_info->gpu_memory_usage; - } else { - SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, process_info->gpu_memory_usage); - } + } + if (needGPUMemory && GPUINFO_PROCESS_FIELD_VALID(process_info, gpu_memory_usage)) { + if (GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory)) { + dynamic_info->used_memory += dynamic_info->used_memory + process_info->gpu_memory_usage; + } else { + SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, process_info->gpu_memory_usage); } } } + // Restore vendor-reported encoder/decoder rate if process aggregation didn't produce a value, + // or use the max of vendor-reported and process-aggregated + if (validReportedEncoderRate) { + if (!GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, encoder_rate)) { + SET_GPUINFO_DYNAMIC(dynamic_info, encoder_rate, reportedEncoderRate); + } else if (dynamic_info->encoder_rate < reportedEncoderRate) { + dynamic_info->encoder_rate = reportedEncoderRate; + } + } + if (validReportedDecoderRate) { + if (!GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, decoder_rate)) { + SET_GPUINFO_DYNAMIC(dynamic_info, decoder_rate, reportedDecoderRate); + } else if (dynamic_info->decoder_rate < reportedDecoderRate) { + dynamic_info->decoder_rate = reportedDecoderRate; + } + } // Sanitize what we got from processes: we can't have more than the total! if (needGPUMemory && GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory) && GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, total_memory) && @@ -394,13 +418,14 @@ void gpuinfo_refresh_utilisation_rate(struct gpu_info *gpu_info) { return; if (IS_VALID(gpuinfo_engine_count_valid, gpu_info->static_info.valid)) - ec = gpu_info->static_info.engine_count; + ec = gpu_info->static_info.engine_count; else - ec = 1; + ec = 1; avg_delta_secs = ((double)total_delta / gpu_info->processes_count) / 1000000000.0; max_freq_hz = gpu_info->dynamic_info.gpu_clock_speed_max * 1000000; - utilisation_rate = (unsigned int)((((double)gfx_total_process_cycles) / (((double)max_freq_hz) * avg_delta_secs * ec)) * 100); + utilisation_rate = + (unsigned int)((((double)gfx_total_process_cycles) / (((double)max_freq_hz) * avg_delta_secs * ec)) * 100); utilisation_rate = utilisation_rate > 100 ? 100 : utilisation_rate; SET_GPUINFO_DYNAMIC(&gpu_info->dynamic_info, gpu_util_rate, utilisation_rate); diff --git a/src/extract_gpuinfo_amdgpu.c b/src/extract_gpuinfo_amdgpu.c index f64c353c..cb4331f0 100644 --- a/src/extract_gpuinfo_amdgpu.c +++ b/src/extract_gpuinfo_amdgpu.c @@ -37,10 +37,13 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include #include @@ -49,6 +52,9 @@ #include #include +extern bool nvtop_debug_amdgpu_metrics; +extern bool nvtop_enable_pcie_bw_sleep; + // extern const char *amdgpu_parse_marketing_name(struct amdgpu_gpu_info *info); @@ -120,9 +126,15 @@ struct gpu_info_amdgpu { // We poll the fan frequently enough and want to avoid the open/close overhead of the sysfs file FILE *fanSpeedFILE; // FILE* for this device current fan speed - FILE *PCIeBW; // FILE* for this device PCIe bandwidth over one second + FILE *fanRPMFILE; // FILE* for raw RPM reading (always fan1_input) FILE *powerCap; // FILE* for this device power cap + // gpu_metrics sysfs file descriptor for non-blocking PCIe bandwidth reading + // (replaces pcie_bw which blocks for 1 second per read due to kernel msleep(1000)) + int gpuMetricsFD; + uint64_t last_pcie_bw_acc; // Previous pcie_bandwidth_acc value for delta computation + bool has_pcie_bw_acc_prev; // Whether we have a previous accumulated value + nvtop_device *amdgpuDevice; // The AMDGPU driver device nvtop_device *hwmonDevice; // The AMDGPU driver hwmon device @@ -130,6 +142,9 @@ struct gpu_info_amdgpu { // Used to compute the actual fan speed unsigned maxFanValue; + + // Asynchronous PCIe Bandwidth fetching thread (Fallback if gpuMetricsFD < 0 or missing PCIe) + FILE *PCIeBW; // FILE* for this device PCIe bandwidth over one second }; unsigned amdgpu_count; @@ -142,6 +157,7 @@ static bool gpuinfo_amdgpu_get_device_handles(struct list_head *devices, unsigne static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info); static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info); static void gpuinfo_amdgpu_get_running_processes(struct gpu_info *_gpu_info); +static int rewindAndReadPattern(FILE *file, const char *format, ...); struct gpu_vendor gpu_vendor_amdgpu = { .init = gpuinfo_amdgpu_init, @@ -235,8 +251,14 @@ static bool gpuinfo_amdgpu_init(void) { static void gpuinfo_amdgpu_shutdown(void) { for (unsigned i = 0; i < amdgpu_count; ++i) { struct gpu_info_amdgpu *gpu_info = &gpu_infos[i]; + if (gpu_info->fanSpeedFILE) fclose(gpu_info->fanSpeedFILE); + // Only close if it's a separate file handle (not shared with fanSpeedFILE) + if (gpu_info->fanRPMFILE && gpu_info->fanRPMFILE != gpu_info->fanSpeedFILE) + fclose(gpu_info->fanRPMFILE); + if (gpu_info->gpuMetricsFD >= 0) + close(gpu_info->gpuMetricsFD); if (gpu_info->PCIeBW) fclose(gpu_info->PCIeBW); if (gpu_info->powerCap) @@ -332,15 +354,32 @@ static void initDeviceSysfsPaths(struct gpu_info_amdgpu *gpu_info) { // Look for which fan to use (PWM or RPM) gpu_info->fanSpeedFILE = NULL; + gpu_info->fanRPMFILE = NULL; unsigned pwmIsEnabled; int NreadPatterns = readAttributeFromDevice(gpu_info->hwmonDevice, "pwm1_enable", "%u", &pwmIsEnabled); bool usePWMSensor = NreadPatterns == 1 && pwmIsEnabled > 0; bool useRPMSensor = false; - if (!usePWMSensor) { + // When pwm1_enable=2 (automatic), the driver/firmware controls the fan + // directly and pwm1 often reads 0 regardless of actual speed. + // Prefer fan1_input (RPM tachometer) which always reflects reality. + if (usePWMSensor && pwmIsEnabled == 2) { + // Check if RPM tachometer is available and readable + unsigned rpmCheck; + NreadPatterns = readAttributeFromDevice(gpu_info->hwmonDevice, "fan1_input", "%u", &rpmCheck); + if (NreadPatterns == 1) { + unsigned rpmMax; + NreadPatterns = readAttributeFromDevice(gpu_info->hwmonDevice, "fan1_max", "%u", &rpmMax); + if (NreadPatterns == 1 && rpmMax > 0) { + usePWMSensor = false; + useRPMSensor = true; + } + } + } + if (!usePWMSensor && !useRPMSensor) { unsigned rpmIsEnabled; NreadPatterns = readAttributeFromDevice(gpu_info->hwmonDevice, "fan1_enable", "%u", &rpmIsEnabled); - useRPMSensor = NreadPatterns && rpmIsEnabled > 0; + useRPMSensor = NreadPatterns == 1 && rpmIsEnabled > 0; } // Either RPM or PWM or neither assert((useRPMSensor ^ usePWMSensor) || (!useRPMSensor && !usePWMSensor)); @@ -360,6 +399,21 @@ static void initDeviceSysfsPaths(struct gpu_info_amdgpu *gpu_info) { } } } + + // Always try to open fan1_input for raw RPM display, independent of + // which sensor is used for percentage calculation. + // If fanSpeedFILE already reads fan1_input (useRPMSensor), reuse it. + if (useRPMSensor && gpu_info->fanSpeedFILE) { + gpu_info->fanRPMFILE = gpu_info->fanSpeedFILE; + } else { + int fanRPMFD = openat(hwmonFD, "fan1_input", O_RDONLY); + if (fanRPMFD >= 0) { + gpu_info->fanRPMFILE = fdopen(fanRPMFD, "r"); + if (!gpu_info->fanRPMFILE) + close(fanRPMFD); + } + } + // Open the power cap file for dynamic info gathering gpu_info->powerCap = NULL; int powerCapFD = openat(hwmonFD, "power1_cap", O_RDONLY); @@ -370,11 +424,29 @@ static void initDeviceSysfsPaths(struct gpu_info_amdgpu *gpu_info) { } int sysfsFD = open(devicePath, O_RDONLY); - // Open the PCIe bandwidth file for dynamic info gathering + // Open the gpu_metrics file for non-blocking PCIe bandwidth reading + // (pcie_bw sysfs blocks for 1 second per read due to kernel msleep(1000)) + gpu_info->gpuMetricsFD = openat(sysfsFD, "gpu_metrics", O_RDONLY); + gpu_info->last_pcie_bw_acc = 0; + gpu_info->has_pcie_bw_acc_prev = false; + + bool metrics_has_pcie = false; + if (gpu_info->gpuMetricsFD >= 0) { + uint8_t header[4]; + if (pread(gpu_info->gpuMetricsFD, header, sizeof(header), 0) == 4) { + if (header[2] == 1 && header[3] >= 4) { + metrics_has_pcie = true; + } + } + } + + // Open the legacy PCIe bandwidth file for async worker fallback gathering gpu_info->PCIeBW = NULL; - int pcieBWFD = openat(sysfsFD, "pcie_bw", O_RDONLY); - if (pcieBWFD) { - gpu_info->PCIeBW = fdopen(pcieBWFD, "r"); + if (!metrics_has_pcie) { + int pcieBWFD = openat(sysfsFD, "pcie_bw", O_RDONLY); + if (pcieBWFD >= 0) { + gpu_info->PCIeBW = fdopen(pcieBWFD, "r"); + } } close(sysfsFD); @@ -466,6 +538,7 @@ static bool gpuinfo_amdgpu_get_device_handles(struct list_head *devices, unsigne list_add_tail(&gpu_infos[amdgpu_count].base.list, devices); // Register a fdinfo callback for this GPU processinfo_register_fdinfo_callback(parse_drm_fdinfo_amd, &gpu_infos[amdgpu_count].base); + amdgpu_count++; } else { _drmFreeVersion(ver); @@ -627,6 +700,13 @@ static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info) { if (nReadPatterns == 1) { SET_GPUINFO_STATIC(static_info, temperature_shutdown_threshold, emergencyTemp); } + + // Fan RPM max (for UI display alongside percentage) + unsigned fanRPMMax; + nReadPatterns = readAttributeFromDevice(gpu_info->hwmonDevice, "fan1_max", "%u", &fanRPMMax); + if (nReadPatterns == 1 && fanRPMMax > 0) { + SET_GPUINFO_STATIC(static_info, fan_rpm_max, fanRPMMax); + } } nvtop_pcie_link max_link_characteristics; @@ -705,16 +785,29 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { // Memory usage struct drm_amdgpu_memory_info memory_info; + struct timespec t_query_start, t_query_end; + if (nvtop_debug_amdgpu_metrics) { + clock_gettime(CLOCK_MONOTONIC, &t_query_start); + } if (libdrm_amdgpu_handle && _amdgpu_query_info) last_libdrm_return_status = _amdgpu_query_info(gpu_info->amdgpu_device, AMDGPU_INFO_MEMORY, sizeof(memory_info), &memory_info); else last_libdrm_return_status = 1; + if (nvtop_debug_amdgpu_metrics) { + clock_gettime(CLOCK_MONOTONIC, &t_query_end); + double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 + + (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0; + fprintf(stderr, "[DEBUG] AMD _amdgpu_query_info(AMDGPU_INFO_MEMORY) took %.2f ms\n", elapsed_q); + } if (!last_libdrm_return_status) { if (gpu_info->base.static_info.integrated_graphics) { - SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, memory_info.vram.total_heap_size + memory_info.gtt.total_heap_size); + SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, + memory_info.vram.total_heap_size + memory_info.gtt.total_heap_size); SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, memory_info.vram.heap_usage + memory_info.gtt.heap_usage); - SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, memory_info.vram.total_heap_size + memory_info.gtt.total_heap_size - dynamic_info->used_memory); + SET_GPUINFO_DYNAMIC(dynamic_info, free_memory, + memory_info.vram.total_heap_size + memory_info.gtt.total_heap_size - + dynamic_info->used_memory); } else { SET_GPUINFO_DYNAMIC(dynamic_info, total_memory, memory_info.vram.total_heap_size); SET_GPUINFO_DYNAMIC(dynamic_info, used_memory, memory_info.vram.heap_usage); @@ -736,11 +829,37 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { // Fan speed unsigned currentFanSpeed; + if (nvtop_debug_amdgpu_metrics) { + clock_gettime(CLOCK_MONOTONIC, &t_query_start); + } int patternsMatched = rewindAndReadPattern(gpu_info->fanSpeedFILE, "%u", ¤tFanSpeed); + if (nvtop_debug_amdgpu_metrics) { + clock_gettime(CLOCK_MONOTONIC, &t_query_end); + double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 + + (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0; + fprintf(stderr, "[DEBUG] AMD rewindAndReadPattern(fanSpeedFILE) took %.2f ms\n", elapsed_q); + } if (patternsMatched == 1) { SET_GPUINFO_DYNAMIC(dynamic_info, fan_speed, currentFanSpeed * 100 / gpu_info->maxFanValue); } + // Fan RPM (raw tachometer reading) + if (gpu_info->fanRPMFILE) { + unsigned currentRPM; + // If fanRPMFILE is the same as fanSpeedFILE (RPM sensor used for both), + // reuse the value we already read instead of reading the file again. + if (gpu_info->fanRPMFILE == gpu_info->fanSpeedFILE) { + if (patternsMatched == 1) { + SET_GPUINFO_DYNAMIC(dynamic_info, fan_rpm, currentFanSpeed); + } + } else { + patternsMatched = rewindAndReadPattern(gpu_info->fanRPMFILE, "%u", ¤tRPM); + if (patternsMatched == 1) { + SET_GPUINFO_DYNAMIC(dynamic_info, fan_rpm, currentRPM); + } + } + } + // Device power usage if (libdrm_amdgpu_handle && _amdgpu_query_sensor_info) last_libdrm_return_status = @@ -759,21 +878,93 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { SET_GPUINFO_DYNAMIC(dynamic_info, pcie_link_gen, pcieGen); } - // PCIe bandwidth - if (gpu_info->PCIeBW) { - // According to https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/pm/amdgpu_pm.c, under the pcie_bw - // section, we should be able to read the number of packets received and sent by the GPU and get the maximum payload - // size during the last second. This is untested but should work when the file is populated by the driver. + // PCIe bandwidth via gpu_metrics (non-blocking, replaces pcie_bw which has a 1-second kernel sleep) + if (gpu_info->gpuMetricsFD >= 0) { + // Read the gpu_metrics binary file from sysfs + // The file starts with a 4-byte header: structure_size(u16), format_revision(u8), content_revision(u8) + // For dGPU metrics v1_4+, pcie_bandwidth_inst is available at a known offset + uint8_t metrics_buf[256]; // Large enough for the header + PCIe bandwidth fields + ssize_t nread = pread(gpu_info->gpuMetricsFD, metrics_buf, sizeof(metrics_buf), 0); + if (nread >= 4) { + uint16_t structure_size; + memcpy(&structure_size, metrics_buf, sizeof(structure_size)); + uint8_t format_revision = metrics_buf[2]; + uint8_t content_revision = metrics_buf[3]; + + // gpu_metrics v1_4+ (dGPU) has pcie_bandwidth_acc and pcie_bandwidth_inst + // format_revision == 1 means dGPU metrics, content_revision >= 4 means v1_4+ + if (format_revision == 1 && content_revision >= 4 && nread >= (ssize_t)structure_size) { + // In gpu_metrics_v1_4, the layout after the header has pcie_bandwidth_acc and pcie_bandwidth_inst + // as uint64_t fields. We use pcie_bandwidth_inst (instantaneous bandwidth in GB/sec) + // and split evenly as an approximation for RX/TX since the kernel doesn't separate them. + // + // Field offsets within gpu_metrics_v1_4 (after the 4-byte header): + // The pcie_bandwidth_inst field follows pcie_bandwidth_acc. + // We scan from the structure definition to find pcie_bandwidth_acc offset. + // + // Offset calculation for gpu_metrics_v1_4: + // header(4) + temp_hotspot(2) + temp_mem(2) + temp_vrsoc(2) = 10 + // curr_socket_power(2) = 12 + // avg_gfx_activity(2) + avg_umc_activity(2) + vcn_activity[4](8) = 24 + // energy_accumulator(8) = 32 + // system_clock_counter(8) = 40 + // throttle_status(4) = 44 + // gfxclk_lock_status(4) = 48 + // pcie_link_width(2) + pcie_link_speed(2) = 52 + // xgmi_link_width(2) + xgmi_link_speed(2) = 56 + // gfx_activity_acc(4) + mem_activity_acc(4) = 64 + // pcie_bandwidth_acc(8) = offset 64, ends at 72 + // pcie_bandwidth_inst(8) = offset 72, ends at 80 + // const size_t pcie_bw_acc_offset = 64; + const size_t pcie_bw_inst_offset = 72; + if (nread >= (ssize_t)(pcie_bw_inst_offset + sizeof(uint64_t))) { + uint64_t pcie_bw_inst; + memcpy(&pcie_bw_inst, metrics_buf + pcie_bw_inst_offset, sizeof(pcie_bw_inst)); + + // In gpu_metrics, if a sensor is unsupported, it often reports 0xFFFFFFFFFFFFFFFF (UINT64_MAX) + if (pcie_bw_inst != UINT64_MAX) { + // pcie_bandwidth_inst is in GB/sec, convert to KiB/sec + // Split evenly between RX and TX as a best approximation + uint64_t total_kib = pcie_bw_inst * 1024 * 1024; // GB/sec -> KiB/sec + SET_GPUINFO_DYNAMIC(dynamic_info, pcie_rx, total_kib / 2); + SET_GPUINFO_DYNAMIC(dynamic_info, pcie_tx, total_kib / 2); + } + } + } + + if (nvtop_debug_amdgpu_metrics) { + fprintf(stderr, "[DEBUG] AMD gpu_metrics read %zd bytes: format_revision=%u, content_revision=%u\n", nread, + format_revision, content_revision); + fprintf(stderr, "[DEBUG] Raw gpu_metrics hex dump:\n"); + for (ssize_t i = 0; i < nread; i++) { + fprintf(stderr, "%02x ", metrics_buf[i]); + if ((i + 1) % 16 == 0) + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + } + } + } else if (gpu_info->PCIeBW && nvtop_enable_pcie_bw_sleep) { uint64_t received, transmitted; int maxPayloadSize; + if (nvtop_debug_amdgpu_metrics) { + clock_gettime(CLOCK_MONOTONIC, &t_query_start); + } int NreadPatterns = rewindAndReadPattern(gpu_info->PCIeBW, "%" SCNu64 " %" SCNu64 " %i", &received, &transmitted, &maxPayloadSize); + if (nvtop_debug_amdgpu_metrics) { + clock_gettime(CLOCK_MONOTONIC, &t_query_end); + double elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 + + (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0; + fprintf(stderr, "[DEBUG] AMD pcie_bw inline read took %.2f ms. Matches: %d\n", elapsed_q, NreadPatterns); + } if (NreadPatterns == 3) { received *= maxPayloadSize; transmitted *= maxPayloadSize; - // Set in KiB + // Store in KiB received /= 1024; transmitted /= 1024; + SET_GPUINFO_DYNAMIC(dynamic_info, pcie_rx, received); SET_GPUINFO_DYNAMIC(dynamic_info, pcie_tx, transmitted); } @@ -787,6 +978,12 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) { SET_GPUINFO_DYNAMIC(dynamic_info, power_draw_max, powerCap / 1000); } } + + // AMDGPU does not expose encode/decode utilization through DRM sensor queries. + // Set baseline to 0; actual per-process usage will be aggregated in + // gpuinfo_fix_dynamic_info_from_process_info. + SET_GPUINFO_DYNAMIC(dynamic_info, encoder_rate, 0); + SET_GPUINFO_DYNAMIC(dynamic_info, decoder_rate, 0); } static const char drm_amdgpu_pdev_old[] = "pdev"; diff --git a/src/extract_gpuinfo_nvidia.c b/src/extract_gpuinfo_nvidia.c index 25d22809..02cac222 100644 --- a/src/extract_gpuinfo_nvidia.c +++ b/src/extract_gpuinfo_nvidia.c @@ -29,6 +29,7 @@ #include #include #include +#include #define NVML_SUCCESS 0 #define NVML_ERROR_NOT_SUPPORTED 3 @@ -170,8 +171,8 @@ typedef struct { unsigned long long usedGpuMemory; unsigned int gpuInstanceId; unsigned int computeInstanceId; - // This is present in https://github.com/NVIDIA/DCGM/blob/master/sdk/nvidia/nvml/nvml.h#L294 but not the latest driver nvml.h - // unsigned long long usedGpuCcProtectedMemory; + // This is present in https://github.com/NVIDIA/DCGM/blob/master/sdk/nvidia/nvml/nvml.h#L294 but not the latest driver + // nvml.h unsigned long long usedGpuCcProtectedMemory; } nvmlProcessInfo_v3_t; static nvmlReturn_t (*nvmlDeviceGetGraphicsRunningProcesses_v1)(nvmlDevice_t device, unsigned int *infoCount, @@ -394,11 +395,11 @@ static bool gpuinfo_nvidia_init(void) { goto init_error_clean_exit; nvmlDeviceGetGraphicsRunningProcesses[1] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v1; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v1; nvmlDeviceGetGraphicsRunningProcesses[2] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v2; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v2; nvmlDeviceGetGraphicsRunningProcesses[3] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v3; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetGraphicsRunningProcesses_v3; nvmlDeviceGetComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v3"); nvmlDeviceGetComputeRunningProcesses_v2 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetComputeRunningProcesses_v2"); @@ -408,11 +409,11 @@ static bool gpuinfo_nvidia_init(void) { goto init_error_clean_exit; nvmlDeviceGetComputeRunningProcesses[1] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v1; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v1; nvmlDeviceGetComputeRunningProcesses[2] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v2; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v2; nvmlDeviceGetComputeRunningProcesses[3] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v3; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetComputeRunningProcesses_v3; // These functions were not available in older NVML libs; don't error if not present nvmlDeviceGetMPSComputeRunningProcesses_v3 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses_v3"); @@ -420,11 +421,11 @@ static bool gpuinfo_nvidia_init(void) { nvmlDeviceGetMPSComputeRunningProcesses_v1 = dlsym(libnvidia_ml_handle, "nvmlDeviceGetMPSComputeRunningProcesses"); nvmlDeviceGetMPSComputeRunningProcesses[1] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v1; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v1; nvmlDeviceGetMPSComputeRunningProcesses[2] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v2; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v2; nvmlDeviceGetMPSComputeRunningProcesses[3] = - (nvmlReturn_t(*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v3; + (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *, void *))nvmlDeviceGetMPSComputeRunningProcesses_v3; // These ones might not be available nvmlDeviceGetProcessUtilization = dlsym(libnvidia_ml_handle, "nvmlDeviceGetProcessUtilization"); @@ -882,30 +883,44 @@ static void gpuinfo_nvidia_get_running_processes(struct gpu_info *_gpu_info) { } } memset(_gpu_info->processes, 0, _gpu_info->processes_count * sizeof(*_gpu_info->processes)); + unsigned valid_procs = 0; for (unsigned i = 0; i < graphical_count + compute_count; ++i) { - if (i < graphical_count) - _gpu_info->processes[i].type = gpu_process_graphical; - else - _gpu_info->processes[i].type = gpu_process_compute; + pid_t parsed_pid = -1; switch (version) { case 2: { nvmlProcessInfo_v2_t *pinfo = (nvmlProcessInfo_v2_t *)retrieved_infos; - _gpu_info->processes[i].pid = pinfo[i].pid; - _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory; + parsed_pid = pinfo[i].pid; + _gpu_info->processes[valid_procs].pid = pinfo[i].pid; + _gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory; } break; case 3: { nvmlProcessInfo_v3_t *pinfo = (nvmlProcessInfo_v3_t *)retrieved_infos; - _gpu_info->processes[i].pid = pinfo[i].pid; - _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory; + parsed_pid = pinfo[i].pid; + _gpu_info->processes[valid_procs].pid = pinfo[i].pid; + _gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory; } break; default: { nvmlProcessInfo_v1_t *pinfo = (nvmlProcessInfo_v1_t *)retrieved_infos; - _gpu_info->processes[i].pid = pinfo[i].pid; - _gpu_info->processes[i].gpu_memory_usage = pinfo[i].usedGpuMemory; + parsed_pid = pinfo[i].pid; + _gpu_info->processes[valid_procs].pid = pinfo[i].pid; + _gpu_info->processes[valid_procs].gpu_memory_usage = pinfo[i].usedGpuMemory; } break; } - SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[i].valid); + + // Do not display nvtop in its own interface. + if (parsed_pid == getpid()) { + continue; + } + + if (i < graphical_count) + _gpu_info->processes[valid_procs].type = gpu_process_graphical; + else + _gpu_info->processes[valid_procs].type = gpu_process_compute; + + SET_VALID(gpuinfo_process_gpu_memory_usage_valid, _gpu_info->processes[valid_procs].valid); + valid_procs++; } + _gpu_info->processes_count = valid_procs; } } // If the GPU is in MIG mode; process utilization is not supported diff --git a/src/extract_processinfo_fdinfo.c b/src/extract_processinfo_fdinfo.c index b8be7dc1..a2218a48 100644 --- a/src/extract_processinfo_fdinfo.c +++ b/src/extract_processinfo_fdinfo.c @@ -135,6 +135,10 @@ void processinfo_sweep_fdinfos(void) { if (!client_pid) goto next; + // Do not show nvtop itself in its internal processes list. + if (client_pid == (unsigned int)getpid()) + goto next; + fd_dir_fd = openat(pid_dir_fd, "fd", O_DIRECTORY); if (fd_dir_fd < 0) goto next; @@ -277,12 +281,10 @@ void processinfo_sweep_fdinfos(void) { process_info->dec_engine_used + processes_info_local.dec_engine_used); } if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, gpu_cycles)) { - SET_GPUINFO_PROCESS(process_info, gpu_cycles, - process_info->gpu_cycles + processes_info_local.gpu_cycles); + SET_GPUINFO_PROCESS(process_info, gpu_cycles, process_info->gpu_cycles + processes_info_local.gpu_cycles); } if (GPUINFO_PROCESS_FIELD_VALID(&processes_info_local, sample_delta)) { - SET_GPUINFO_PROCESS(process_info, sample_delta, - process_info->sample_delta + processes_info_local.sample_delta); + SET_GPUINFO_PROCESS(process_info, sample_delta, process_info->sample_delta + processes_info_local.sample_delta); } } diff --git a/src/interface.c b/src/interface.c index 3aa80463..6b695735 100644 --- a/src/interface.c +++ b/src/interface.c @@ -19,9 +19,9 @@ * */ +#include "nvtop/interface.h" #include "nvtop/common.h" #include "nvtop/extract_gpuinfo_common.h" -#include "nvtop/interface.h" #include "nvtop/interface_common.h" #include "nvtop/interface_internal_common.h" #include "nvtop/interface_layout_selection.h" @@ -43,7 +43,7 @@ #include static unsigned int sizeof_device_field[device_field_count] = { - [device_name] = 11, [device_fan_speed] = 11, [device_temperature] = 10, [device_power] = 15, + [device_name] = 11, [device_fan_speed] = 26, [device_temperature] = 10, [device_power] = 15, [device_clock] = 11, [device_mem_clock] = 12, [device_pcie] = 46, [device_shadercores] = 7, [device_l2features] = 11, [device_execengines] = 11, }; @@ -761,8 +761,19 @@ static void draw_devices(struct list_head *devices, struct nvtop_interface *inte // FAN if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_speed)) { - mvwprintw(dev->fan_speed, 0, 0, " FAN %3u%% ", - device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed); + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_rpm) && + GPUINFO_STATIC_FIELD_VALID(&device->static_info, fan_rpm_max)) { + mvwprintw(dev->fan_speed, 0, 0, " FAN %3u%% [%u/%u RPM]", + device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed, + device->dynamic_info.fan_rpm, device->static_info.fan_rpm_max); + } else if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_rpm)) { + mvwprintw(dev->fan_speed, 0, 0, " FAN %3u%% [%u RPM]", + device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed, + device->dynamic_info.fan_rpm); + } else { + mvwprintw(dev->fan_speed, 0, 0, " FAN %3u%% ", + device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed); + } mvwchgat(dev->fan_speed, 0, 1, 3, 0, cyan_color, NULL); } else if (device->static_info.integrated_graphics) { mvwprintw(dev->fan_speed, 0, 0, " CPU-FAN "); @@ -2094,6 +2105,18 @@ bool show_information_messages(unsigned num_messages, const char **messages) { return dontShowAgainOption; } +static void format_memory(char *buf, size_t bufsz, uint64_t bytes) { + // Always convert to MiB (1024^2 bytes) + double val = (double)bytes / (1024.0 * 1024.0); + // Format with up to two decimal places (adjust as needed) + if (val >= 100.0) + snprintf(buf, bufsz, "%.1f MiB", val); + else if (val >= 10.0) + snprintf(buf, bufsz, "%.2f MiB", val); + else + snprintf(buf, bufsz, "%.2f MiB", val); // for small values, still two decimals +} + void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { struct gpu_info *device; @@ -2104,74 +2127,145 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { const char *indent_level_six = " "; const char *indent_level_eight = " "; - const char *device_name_field = "device_name"; - const char *gpu_clock_field = "gpu_clock"; - const char *mem_clock_field = "mem_clock"; - const char *temp_field = "temp"; - const char *fan_field = "fan_speed"; - const char *power_field = "power_draw"; - const char *gpu_util_field = "gpu_util"; - const char *mem_util_field = "mem_util"; - const char *mem_total_field = "mem_total"; - const char *mem_used_field = "mem_used"; - const char *mem_free_field = "mem_free"; - printf("%s{\n", indent_level_two); - // Device Name + // ----- PCI address ----- + printf("%s\"pci\": \"%s\",\n", indent_level_four, device->pdev); + + // ----- Static info ----- + printf("%s\"integrated_graphics\": %s,\n", indent_level_four, + device->static_info.integrated_graphics ? "true" : "false"); + printf("%s\"encode_decode_shared\": %s,\n", indent_level_four, + device->static_info.encode_decode_shared ? "true" : "false"); + + // Slowdown threshold + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, temperature_slowdown_threshold)) { + unsigned int temp_raw = device->static_info.temperature_slowdown_threshold; + unsigned int temp_celsius = temp_raw / 1000; // convert to degrees + unsigned int temp_convert; + if (!use_fahrenheit_option) + temp_convert = temp_celsius; + else + temp_convert = (unsigned)(32 + nearbyint(temp_celsius * 1.8)); + printf("%s\"temp_slowdown_threshold\": \"%u%s\",\n", indent_level_four, temp_convert, + use_fahrenheit_option ? "F" : "C"); + } else { + printf("%s\"temp_slowdown_threshold\": null,\n", indent_level_four); + } + + // Shutdown threshold + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, temperature_shutdown_threshold)) { + unsigned int temp_raw = device->static_info.temperature_shutdown_threshold; + unsigned int temp_celsius = temp_raw / 1000; + unsigned int temp_convert; + if (!use_fahrenheit_option) + temp_convert = temp_celsius; + else + temp_convert = (unsigned)(32 + nearbyint(temp_celsius * 1.8)); + printf("%s\"temp_shutdown_threshold\": \"%u%s\",\n", indent_level_four, temp_convert, + use_fahrenheit_option ? "F" : "C"); + } else { + printf("%s\"temp_shutdown_threshold\": null,\n", indent_level_four); + } + + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, max_pcie_gen)) + printf("%s\"max_pcie_gen\": %u,\n", indent_level_four, device->static_info.max_pcie_gen); + else + printf("%s\"max_pcie_gen\": null,\n", indent_level_four); + + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, max_pcie_link_width)) + printf("%s\"max_pcie_link_width\": %u,\n", indent_level_four, device->static_info.max_pcie_link_width); + else + printf("%s\"max_pcie_link_width\": null,\n", indent_level_four); + + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, n_shared_cores)) + printf("%s\"n_shared_cores\": %u,\n", indent_level_four, device->static_info.n_shared_cores); + + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, l2cache_size)) + printf("%s\"l2cache_size\": %u,\n", indent_level_four, device->static_info.l2cache_size); + + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, n_exec_engines)) + printf("%s\"n_exec_engines\": %u,\n", indent_level_four, device->static_info.n_exec_engines); + + // ----- Dynamic info (current + max where available) ----- + // Device name (original field) if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, device_name)) - printf("%s\"%s\": \"%s\",\n", indent_level_four, device_name_field, device->static_info.device_name); + printf("%s\"device_name\": \"%s\",\n", indent_level_four, device->static_info.device_name); else - printf("%s\"%s\": null,\n", indent_level_four, device_name_field); + printf("%s\"device_name\": null,\n", indent_level_four); - // GPU Clock Speed + // GPU clock (current and max) if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, gpu_clock_speed)) - printf("%s\"%s\": \"%uMHz\",\n", indent_level_four, gpu_clock_field, device->dynamic_info.gpu_clock_speed); + printf("%s\"gpu_clock\": \"%uMHz\",\n", indent_level_four, device->dynamic_info.gpu_clock_speed); + else + printf("%s\"gpu_clock\": null,\n", indent_level_four); + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, gpu_clock_speed_max)) + printf("%s\"gpu_clock_max\": \"%uMHz\",\n", indent_level_four, device->dynamic_info.gpu_clock_speed_max); else - printf("%s\"%s\": null,\n", indent_level_four, gpu_clock_field); + printf("%s\"gpu_clock_max\": null,\n", indent_level_four); - // MEM Clock Speed + // MEM clock (current and max) if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, mem_clock_speed)) - printf("%s\"%s\": \"%uMHz\",\n", indent_level_four, mem_clock_field, device->dynamic_info.mem_clock_speed); + printf("%s\"mem_clock\": \"%uMHz\",\n", indent_level_four, device->dynamic_info.mem_clock_speed); else - printf("%s\"%s\": null,\n", indent_level_four, mem_clock_field); + printf("%s\"mem_clock\": null,\n", indent_level_four); + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, mem_clock_speed_max)) + printf("%s\"mem_clock_max\": \"%uMHz\",\n", indent_level_four, device->dynamic_info.mem_clock_speed_max); + else + printf("%s\"mem_clock_max\": null,\n", indent_level_four); - // GPU Temperature + // GPU temperature (current) if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, gpu_temp)) { unsigned int temp_convert; if (!use_fahrenheit_option) temp_convert = device->dynamic_info.gpu_temp; else temp_convert = (unsigned)(32 + nearbyint(device->dynamic_info.gpu_temp * 1.8)); - - printf("%s\"%s\": \"%u%s\",\n", indent_level_four, temp_field, temp_convert, use_fahrenheit_option ? "F" : "C"); + printf("%s\"temp\": \"%u%s\",\n", indent_level_four, temp_convert, use_fahrenheit_option ? "F" : "C"); } else { - printf("%s\"%s\": null,\n", indent_level_four, temp_field); + printf("%s\"temp\": null,\n", indent_level_four); } - // Fan speed - if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_speed)) - printf("%s\"%s\": \"%u%%\",\n", indent_level_four, fan_field, + // Fan speed (percentage or RPM fallback) + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_speed)) { + printf("%s\"fan_speed\": \"%u%%\",\n", indent_level_four, device->dynamic_info.fan_speed > 100 ? 100 : device->dynamic_info.fan_speed); - else if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_rpm)) - printf("%s\"%s\": \"%uRPM\",\n", indent_level_four, fan_field, + } else if (device->static_info.integrated_graphics) { + printf("%s\"fan_speed\": \"CPU Fan\",\n", indent_level_four); + } else { + printf("%s\"fan_speed\": null,\n", indent_level_four); + } + + // Fan RPM (raw data) + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, fan_rpm)) { + printf("%s\"fan_rpm\": \"%u\",\n", indent_level_four, device->dynamic_info.fan_rpm > 9999 ? 9999 : device->dynamic_info.fan_rpm); - else if (device->static_info.integrated_graphics) - printf("%s\"%s\": \"CPU Fan\",\n", indent_level_four, fan_field); - else - printf("%s\"%s\": null,\n", indent_level_four, fan_field); + } else { + printf("%s\"fan_rpm\": null,\n", indent_level_four); + } - // Power draw + // Fan RPM Max + if (GPUINFO_STATIC_FIELD_VALID(&device->static_info, fan_rpm_max)) { + printf("%s\"fan_rpm_max\": \"%u\",\n", indent_level_four, device->static_info.fan_rpm_max); + } else { + printf("%s\"fan_rpm_max\": null,\n", indent_level_four); + } + + // Power draw (current and max) if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, power_draw)) - printf("%s\"%s\": \"%uW\",\n", indent_level_four, power_field, device->dynamic_info.power_draw / 1000); + printf("%s\"power_draw\": \"%uW\",\n", indent_level_four, device->dynamic_info.power_draw / 1000); + else + printf("%s\"power_draw\": null,\n", indent_level_four); + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, power_draw_max)) + printf("%s\"power_draw_max\": \"%uW\",\n", indent_level_four, device->dynamic_info.power_draw_max / 1000); else - printf("%s\"%s\": null,\n", indent_level_four, power_field); + printf("%s\"power_draw_max\": null,\n", indent_level_four); - // GPU Utilization + // GPU utilization if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, gpu_util_rate)) - printf("%s\"%s\": \"%u%%\",\n", indent_level_four, gpu_util_field, device->dynamic_info.gpu_util_rate); + printf("%s\"gpu_util\": \"%u%%\",\n", indent_level_four, device->dynamic_info.gpu_util_rate); else - printf("%s\"%s\": null,\n", indent_level_four, gpu_util_field); + printf("%s\"gpu_util\": null,\n", indent_level_four); // Encode / Decode if (device->static_info.encode_decode_shared) { @@ -2193,28 +2287,58 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { printf("null,\n"); } - // Memory Utilization + // Memory utilization if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, mem_util_rate)) - printf("%s\"%s\": \"%u%%\",\n", indent_level_four, mem_util_field, device->dynamic_info.mem_util_rate); + printf("%s\"mem_util\": \"%u%%\",\n", indent_level_four, device->dynamic_info.mem_util_rate); else - printf("%s\"%s\": null,\n", indent_level_four, mem_util_field); - // Memory Total - if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, total_memory)) - printf("%s\"%s\": \"%llu\",\n", indent_level_four, mem_total_field, device->dynamic_info.total_memory); + printf("%s\"mem_util\": null,\n", indent_level_four); + + // Memory total / used / free (human-readable) + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, total_memory)) { + char mem_buf[32]; + format_memory(mem_buf, sizeof(mem_buf), device->dynamic_info.total_memory); + printf("%s\"mem_total\": \"%s\",\n", indent_level_four, mem_buf); + } else { + printf("%s\"mem_total\": null,\n", indent_level_four); + } + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, used_memory)) { + char mem_buf[32]; + format_memory(mem_buf, sizeof(mem_buf), device->dynamic_info.used_memory); + printf("%s\"mem_used\": \"%s\",\n", indent_level_four, mem_buf); + } else { + printf("%s\"mem_used\": null,\n", indent_level_four); + } + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, free_memory)) { + char mem_buf[32]; + format_memory(mem_buf, sizeof(mem_buf), device->dynamic_info.free_memory); + printf("%s\"mem_free\": \"%s\",\n", indent_level_four, mem_buf); + } else { + printf("%s\"mem_free\": null,\n", indent_level_four); + } + + // PCIe link status (current) + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, pcie_link_gen)) + printf("%s\"pcie_link_gen\": %u,\n", indent_level_four, device->dynamic_info.pcie_link_gen); else - printf("%s\"%s\": null,\n", indent_level_four, mem_total_field); - // Memory Used - if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, used_memory)) - printf("%s\"%s\": \"%llu\",\n", indent_level_four, mem_used_field, device->dynamic_info.used_memory); + printf("%s\"pcie_link_gen\": null,\n", indent_level_four); + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, pcie_link_width)) + printf("%s\"pcie_link_width\": %u,\n", indent_level_four, device->dynamic_info.pcie_link_width); else - printf("%s\"%s\": null,\n", indent_level_four, mem_used_field); - // Memory Available - if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, free_memory)) - printf("%s\"%s\": \"%llu\",\n", indent_level_four, mem_free_field, device->dynamic_info.free_memory); + printf("%s\"pcie_link_width\": null,\n", indent_level_four); + + // PCIe bandwidth (KiB/s) + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, pcie_rx)) + printf("%s\"pcie_rx\": \"%uKiB/s\",\n", indent_level_four, device->dynamic_info.pcie_rx); + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, pcie_tx)) + printf("%s\"pcie_tx\": \"%uKiB/s\",\n", indent_level_four, device->dynamic_info.pcie_tx); + + // Effective load rate + if (GPUINFO_DYNAMIC_FIELD_VALID(&device->dynamic_info, effective_load_rate)) + printf("%s\"effective_load_rate\": \"%u%%\",\n", indent_level_four, device->dynamic_info.effective_load_rate); else - printf("%s\"%s\": null,\n", indent_level_four, mem_free_field); + printf("%s\"effective_load_rate\": null,\n", indent_level_four); - // Processes + // ----- Processes ----- printf("%s\"processes\" : [\n", indent_level_four); for (unsigned i = 0; i < device->processes_count; ++i) { struct gpu_process *proc = &device->processes[i]; @@ -2223,9 +2347,9 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { // PID printf("%s\"pid\": \"%d\",\n", indent_level_eight, proc->pid); + // Command line (escaped) printf("%s\"cmdline\": \"", indent_level_eight); for (char *li = proc->cmdline; *li != '\0'; li++) { - // We need to escape some characters for for json strings if (*li == '\n') { printf("\\n"); continue; @@ -2242,13 +2366,13 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { printf("\\t"); continue; } - // escaping backslash and quotes if (*li == '\\' || *li == '"') printf("\\"); printf("%c", *li); } printf("\",\n"); + // Process type printf("%s\"kind\": ", indent_level_eight); if (proc->type != gpu_process_unknown) { printf("\""); @@ -2272,7 +2396,7 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { } printf(",\n"); - // GPU memory usage + // User printf("%s\"user\": ", indent_level_eight); if (GPUINFO_PROCESS_FIELD_VALID(proc, user_name)) printf("\"%s\",\n", proc->user_name); @@ -2286,23 +2410,25 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { else printf("null,\n"); - // GPU memory usage + // GPU memory bytes (human-readable) printf("%s\"gpu_mem_bytes_alloc\": ", indent_level_eight); - if (GPUINFO_PROCESS_FIELD_VALID(proc, gpu_memory_usage)) - printf("\"%llu\",\n", proc->gpu_memory_usage); - else + if (GPUINFO_PROCESS_FIELD_VALID(proc, gpu_memory_usage)) { + char mem_buf[32]; + format_memory(mem_buf, sizeof(mem_buf), proc->gpu_memory_usage); + printf("\"%s\",\n", mem_buf); + } else { printf("null,\n"); + } - // GPU memory usage + // GPU memory percentage printf("%s\"gpu_mem_usage\": ", indent_level_eight); if (GPUINFO_PROCESS_FIELD_VALID(proc, gpu_memory_percentage)) printf("\"%u%%\",\n", proc->gpu_memory_percentage); else printf("null,\n"); - // Encode usage + // Encode / decode if (device->static_info.encode_decode_shared) { - // (Notice: no comma at the end as it's the last field here) printf("%s\"encode_decode\": ", indent_level_eight); if (GPUINFO_PROCESS_FIELD_VALID(proc, decode_usage)) printf("\"%u%%\"\n", proc->decode_usage); @@ -2314,7 +2440,6 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { printf("\"%u%%\",\n", proc->encode_usage); else printf("null,\n"); - // (Notice: no comma at the end as it's the last field here) printf("%s\"decode\": ", indent_level_eight); if (GPUINFO_PROCESS_FIELD_VALID(proc, decode_usage)) printf("\"%u%%\"\n", proc->decode_usage); @@ -2327,9 +2452,9 @@ void print_snapshot(struct list_head *devices, bool use_fahrenheit_option) { printf(","); printf("\n"); } - // (Notice: no comma at the end as it's the last field here) printf("%s]\n", indent_level_four); + // Close device object if (device->list.next == devices) printf("%s}\n", indent_level_two); else diff --git a/src/nvtop.c b/src/nvtop.c index 67954448..d32b96c6 100644 --- a/src/nvtop.c +++ b/src/nvtop.c @@ -28,6 +28,7 @@ #include "nvtop/version.h" #include +#include #include #include #include @@ -58,6 +59,9 @@ static void cont_handler(int signum) { signal_cont_received = 1; } +bool nvtop_debug_amdgpu_metrics = false; +bool nvtop_enable_pcie_bw_sleep = false; + static const char helpstring[] = "Available options:\n" " -d --delay : Select the refresh rate (1 == 0.1s)\n" " -v --version : Print the version and exit\n" @@ -75,8 +79,9 @@ static const char helpstring[] = "Available options:\n" "(default 30s, negative = always on screen)\n" " -h --help : Print help and exit\n" " -s --snapshot : Output the current gpu stats without ncurses" - "(useful for scripting)\n" - " -l --loop : Output the current gpu stats without ncurses in a loop\n"; + " -l --loop : Output the current gpu stats without ncurses in a loop\n" + " -S --pciespeed : Forces 1-second delay for PCIe bandwidth fallback (AMD only)\n" + " -D --debug : Output raw gpu_metrics data to stderr (AMD only)\n"; static const char versionString[] = "nvtop version " NVTOP_VERSION_STRING; @@ -95,10 +100,12 @@ static const struct option long_opts[] = { {.name = "reverse-abs", .has_arg = no_argument, .flag = NULL, .val = 'r'}, {.name = "snapshot", .has_arg = no_argument, .flag = NULL, .val = 's'}, {.name = "loop", .has_arg = no_argument, .flag = NULL, .val = 'l'}, + {.name = "pciespeed", .has_arg = no_argument, .flag = NULL, .val = 'S'}, + {.name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'D'}, {0, 0, 0, 0}, }; -static const char opts[] = "hvd:c:CfE:pPrisl"; +static const char opts[] = "hvd:c:CfE:pPrislSD"; int main(int argc, char **argv) { (void)setlocale(LC_CTYPE, ""); @@ -181,6 +188,12 @@ int main(int argc, char **argv) { case 'l': loop_snapshot = true; break; + case 'D': + nvtop_debug_amdgpu_metrics = true; + break; + case 'S': + nvtop_enable_pcie_bw_sleep = true; + break; case ':': case '?': switch (optopt) { @@ -233,27 +246,53 @@ int main(int argc, char **argv) { return EXIT_SUCCESS; } + gpuinfo_populate_static_infos(&monitoredGpus); + + // Pre-warm the cycle-based metrics by taking an initial reading here. + // This allows the ensuing setup time (e.g. sysfs parsing, curses init) to + // count towards the 100ms time delta needed to calculate load percentages + // before the first frame is drawn. + gpuinfo_refresh_dynamic_info(&monitoredGpus); + gpuinfo_refresh_processes(&monitoredGpus); + gpuinfo_utilisation_rate(&monitoredGpus); + + nvtop_time time_startup_refresh; + nvtop_get_current_time(&time_startup_refresh); + if (show_snapshot || loop_snapshot) { - gpuinfo_populate_static_infos(&monitoredGpus); - - // Always do a refresh followed by a short sleep to have valid cycle based - // metrics - gpuinfo_refresh_dynamic_info(&monitoredGpus); - gpuinfo_refresh_processes(&monitoredGpus); - gpuinfo_utilisation_rate(&monitoredGpus); - // Default to 0.1 sec + // Default to 0.1 sec if not given if (!update_interval_option_set) update_interval_option = 100; + bool first_snapshot = true; + do { + if (first_snapshot) { + nvtop_time time_before_snap; + nvtop_get_current_time(&time_before_snap); + double startup_elapsed_ms = nvtop_difftime(time_startup_refresh, time_before_snap) * 1000.0; + + if (startup_elapsed_ms < update_interval_option) { + double remaining_ms = update_interval_option - startup_elapsed_ms; +#if _POSIX_C_SOURCE >= 199309L + struct timespec tv = {.tv_sec = (long)(remaining_ms / 1000.0), + .tv_nsec = (long)(fmod(remaining_ms, 1000.0) * 1000000.0)}; + nanosleep(&tv, &tv); +#else + usleep((useconds_t)(remaining_ms * 1000.0)); +#endif + } + first_snapshot = false; + } else { #if _POSIX_C_SOURCE >= 199309L - struct timespec tv = {.tv_sec = update_interval_option / 1000, - .tv_nsec = (update_interval_option % 1000) * 1000000}; - nanosleep(&tv, &tv); + struct timespec tv = {.tv_sec = update_interval_option / 1000, + .tv_nsec = (update_interval_option % 1000) * 1000000}; + nanosleep(&tv, &tv); #else - int sec = update_interval_option / 1000; - sleep(sec > 0 ? sec : 1); + int sec = update_interval_option / 1000; + sleep(sec > 0 ? sec : 1); #endif + } gpuinfo_refresh_dynamic_info(&monitoredGpus); gpuinfo_refresh_processes(&monitoredGpus); gpuinfo_utilisation_rate(&monitoredGpus); @@ -308,7 +347,6 @@ int main(int argc, char **argv) { allDevicesOptions.update_interval = update_interval_option; allDevicesOptions.has_gpu_info_bar = allDevicesOptions.has_gpu_info_bar || show_gpu_info_bar; - gpuinfo_populate_static_infos(&monitoredGpus); unsigned numMonitoredGpus = interface_check_and_fix_monitored_gpus(allDevCount, &monitoredGpus, &nonMonitoredGpus, &allDevicesOptions); @@ -320,6 +358,22 @@ int main(int argc, char **argv) { } } + // Ensure at least 100ms has elapsed since the pre-warm metrics were taken + // to guarantee a valid time delta for load percent calculations. + nvtop_time time_before_ui; + nvtop_get_current_time(&time_before_ui); + double startup_elapsed_ms = nvtop_difftime(time_startup_refresh, time_before_ui) * 1000.0; + + if (startup_elapsed_ms < 100.0) { + double remaining_ms = 100.0 - startup_elapsed_ms; +#if _POSIX_C_SOURCE >= 199309L + struct timespec tv = {.tv_sec = 0, .tv_nsec = (long)(remaining_ms * 1000000.0)}; + nanosleep(&tv, &tv); +#else + usleep((useconds_t)(remaining_ms * 1000.0)); +#endif + } + struct nvtop_interface *interface = initialize_curses(allDevCount, numMonitoredGpus, interface_largest_gpu_name(&monitoredGpus), allDevicesOptions); timeout(interface_update_interval(interface));