From bd4285e2dccd364e19d825f497d64439cae9a16d Mon Sep 17 00:00:00 2001 From: Dmitriy Philimonov Date: Sat, 3 May 2025 12:13:20 +0300 Subject: [PATCH] Preliminary support for NVIDIA Jetson boards NVIDIA Jetson device is an insdustrial Linux based embedded aarch64 platfrom with powerful builtin GPU, which is used for AI tasks, mostly for CV purposes. The support is provided via --enable-nvidia-jetson switch in the configure script. All the source code related to the NVIDIA Jetson is placed in the linux/NvidiaJetson.{h,c} source files and hidden by 'NVIDIA_JETSON' C preprocessor define. So, for x86_64 platforms the source code stays unchanged. Additional functionality added by this commit: 1. Fix for the CPU temperature reading. The Jetson device is not supported by libsensors. The CPU has 8 cores with only one CPU temperature sensor for all of them located in the thermal zone file. libsensor might be compiled in or turned off. The additional care was taken to provide successfull build with/without libsensors. 2. The Jetson GPU Meter was added: current load, frequency and temperature. 3. The exact GPU memory allocated by each process is loaded from the nvgpu kernel driver via sysfs and merged to the LinuxProcess data (field LinuxProcess::gpu_mem). The field "GPU_MEM" visualizes this field. For root user only. 4. Additional filter for processes which use GPU right now via hot key 'g', the help is supplied. For root user only. == Technical details == The code tries to find out the correct sensors during the application startup. As an example, the sensors location for NVIDIA Jetson Orin are the following: - CPU temperature: /sys/devices/virtual/thermal/thermal_zone0/type - GPU temperature: /sys/devices/virtual/thermal/thermal_zone1/type - GPU frequency: /sys/class/devfreq/17000000.gpu/cur_freq - GPU curr load: /sys/class/devfreq/17000000.gpu/device/load Measure: - The GPU frequency is provided in Hz, shown in MHz. - The CPU/GPU temperatures are provided in Celsius multipled by 1000 (milli Celsius), shown in Cesius P.S. The GUI shows all temperatures for NVIDIA Jetson with additional precision comparing to the default x86_64 platform. If htop starts with root privileges (effective user id is 0), the experimental code activates. It reads the fixed sysfs file /sys/kernel/debug/nvmap/iovmm/clients with the following content, e.g.: ``` CLIENT PROCESS PID SIZE user gpu_burn 7979 23525644K user gnome_shell 8119 5800K user Xorg 2651 17876K total 23549320K ``` Unfortunately, the /sys/kernel/debug/* files are allowed to read only for the root user, that's why the restriction applies. The patch also adds a separate field 'GPU_MEM', which reads data from the added LinuxProcess::gpu_mem field. The field stores memory allocated for GPU in kilobytes. It is populated by the function NvidiaJetson_LoadGpuProcessTable (the implementation is located in NvidiaJetson.c), which is called at the end of the function Machine_scanTables. Additionally, the new Action is added: actionToggleGpuFilter, which is activated by 'g' hot key (the help is updated appropriately). The GpuFilter shows only the processes which currently utilize GPU (i.e. highly extended nvmap/iovmm/clients table). It is achieved by the filtering machinery associated with ProcessTable::pidMatchList. The code below constructs GPU_PID_MATCH_LIST hash table, then actionToggleGpuFilter either stores it to the ProcessTable::pidMatchList or restores old value of ProcessTable::pidMatchList. The separate LinuxProcess's PROCESS_FLAG_LINUX_GPU_JETSON (or something ...) flag isn't added for GPU_MEM, because currently the functionality of population LinuxProcess::gpu_mem is shared with the GPU consumers filter construction. So, even if GPU_MEM field is not activated, the filter showing GPU consumers should work. This kind of architecture is chosen intentially since it saves memory for the hash table GPU_PID_MATCH_LIST (which is now actually a set), and therefore increases performance. All other approaches convert GPU_PID_MATCH_LIST to a true key/value storage (key = pid, value = gpu memory allocated) with further merge code. == NVIDIA Jetson models == Tested for NVIDIA Jetson Orin and Xavier boards. --- Action.c | 28 ++++ Action.h | 3 + CPUMeter.c | 28 +++- CRT.c | 7 + CRT.h | 1 + DisplayOptionsPanel.c | 7 +- MainPanel.c | 6 + Makefile.am | 5 + Settings.c | 4 + Settings.h | 1 + XUtils.h | 27 ++++ configure.ac | 43 ++++- linux/LinuxMachine.c | 10 +- linux/LinuxMachine.h | 5 +- linux/LinuxProcess.c | 11 ++ linux/LinuxProcess.h | 4 + linux/LinuxProcessTable.c | 22 +-- linux/NvidiaJetson.c | 323 ++++++++++++++++++++++++++++++++++++++ linux/NvidiaJetson.h | 17 ++ linux/Platform.c | 6 +- linux/ProcessField.h | 1 + 21 files changed, 531 insertions(+), 28 deletions(-) create mode 100644 linux/NvidiaJetson.c create mode 100644 linux/NvidiaJetson.h diff --git a/Action.c b/Action.c index 1d3bccc51..42f35727d 100644 --- a/Action.c +++ b/Action.c @@ -27,9 +27,11 @@ in the source distribution for its full text. #include "ListItem.h" #include "Macros.h" #include "MainPanel.h" +#include "NvidiaJetson.h" #include "OpenFilesScreen.h" #include "Process.h" #include "ProcessLocksScreen.h" +#include "ProcessTable.h" #include "ProvideCurses.h" #include "Row.h" #include "RowField.h" @@ -646,6 +648,26 @@ static Htop_Reaction actionTogglePauseUpdate(State* st) { return HTOP_REFRESH | HTOP_REDRAW_BAR | HTOP_KEEP_FOLLOWING; } +#ifdef NVIDIA_JETSON +static Htop_Reaction actionToggleGpuFilter(State* st) { + static Hashtable *stash = NULL; + + Hashtable *GpuPidMatchList = NvidiaJetson_GetPidMatchList(); + if (GpuPidMatchList) { + st->showGpuProcesses = !st->showGpuProcesses; + + ProcessTable *pt = (ProcessTable *)st->host->activeTable; + if (st->showGpuProcesses) { + stash = pt->pidMatchList; + pt->pidMatchList = GpuPidMatchList; + } else { + pt->pidMatchList = stash; + } + } + return HTOP_REFRESH | HTOP_REDRAW_BAR | HTOP_KEEP_FOLLOWING; +} +#endif + static const struct { const char* key; bool roInactive; @@ -658,6 +680,9 @@ static const struct { { .key = " F3 /: ", .roInactive = false, .info = "incremental name search" }, { .key = " F4 \\: ", .roInactive = false, .info = "incremental name filtering" }, { .key = " F5 t: ", .roInactive = false, .info = "tree view" }, +#ifdef NVIDIA_JETSON + { .key = " g: ", .roInactive = false, .info = "show GPU processes (root only)" }, +#endif { .key = " p: ", .roInactive = false, .info = "toggle program path" }, { .key = " m: ", .roInactive = false, .info = "toggle merged command" }, { .key = " Z: ", .roInactive = false, .info = "pause/resume process updates" }, @@ -933,6 +958,9 @@ void Action_setBindings(Htop_Action* keys) { keys['a'] = actionSetAffinity; keys['c'] = actionTagAllChildren; keys['e'] = actionShowEnvScreen; +#ifdef NVIDIA_JETSON + keys['g'] = actionToggleGpuFilter; +#endif keys['h'] = actionHelp; keys['k'] = actionKill; keys['l'] = actionLsof; diff --git a/Action.h b/Action.h index 282ba4189..d65ee08b4 100644 --- a/Action.h +++ b/Action.h @@ -40,6 +40,9 @@ typedef struct State_ { bool pauseUpdate; bool hideSelection; bool hideMeters; +#ifdef NVIDIA_JETSON + bool showGpuProcesses; +#endif } State; static inline bool State_hideFunctionBar(const State* st) { diff --git a/CPUMeter.c b/CPUMeter.c index 69da88db0..22fc1175b 100644 --- a/CPUMeter.c +++ b/CPUMeter.c @@ -98,15 +98,33 @@ static void CPUMeter_updateValues(Meter* this) { } } + /* + --enable-sensors turns on BUILD_WITH_CPU_TEMP only + --enable-nvidia-jetson turns on both NVIDIA_JETSON and BUILD_WITH_CPU_TEMP + */ #ifdef BUILD_WITH_CPU_TEMP if (settings->showCPUTemperature) { double cpuTemperature = this->values[CPU_METER_TEMPERATURE]; - if (isNaN(cpuTemperature)) + if (isNaN(cpuTemperature)) { xSnprintf(cpuTemperatureBuffer, sizeof(cpuTemperatureBuffer), "N/A"); - else if (settings->degreeFahrenheit) - xSnprintf(cpuTemperatureBuffer, sizeof(cpuTemperatureBuffer), "%3d%sF", (int)(cpuTemperature * 9 / 5 + 32), CRT_degreeSign); - else - xSnprintf(cpuTemperatureBuffer, sizeof(cpuTemperatureBuffer), "%d%sC", (int)cpuTemperature, CRT_degreeSign); + } else if (settings->degreeFahrenheit) { + cpuTemperature = convertCelsiusToFahrenheit(cpuTemperature); + /* Fahrenheit scale gives almost x2 more precise value than Celsius scale => no need to show fractional part */ + xSnprintf(cpuTemperatureBuffer, sizeof(cpuTemperatureBuffer), "%3d%sF", (int)cpuTemperature, CRT_degreeSign); + } else if (settings->showCPUTemperatureFractional) { + /* + - Modern CPUs has temperature sensors which give a precise value with 3 digits in the fractional part, + see hwmon files, e.g. /sys/class/hwmon/hwmon.../temp1_input, one digit in the fractional part is quite + enough right now. + - If your CPU is above 100C - you have a real problem, no need to print it pretty. + - The formatter "%04.1f" guarantees filling zero in the fractional part, e.g. strings like "37.0C" appears, + the side effect is that temperature value '5C' is shown as "05.0C" + */ + xSnprintf(cpuTemperatureBuffer, sizeof(cpuTemperatureBuffer), "%04.1f%sC", cpuTemperature, CRT_degreeSign); + } else { + /* if your CPU is above 100C - you have a real problem, no need to print it pretty */ + xSnprintf(cpuTemperatureBuffer, sizeof(cpuTemperatureBuffer), "%2d%sC", (int)cpuTemperature, CRT_degreeSign); + } } #endif diff --git a/CRT.c b/CRT.c index e700cdbd7..c24873914 100644 --- a/CRT.c +++ b/CRT.c @@ -202,6 +202,7 @@ static int CRT_colorSchemes[LAST_COLORSCHEME][LAST_COLORELEMENT] = { [GPU_ENGINE_2] = ColorPair(Yellow, Black), [GPU_ENGINE_3] = ColorPair(Red, Black), [GPU_ENGINE_4] = A_BOLD | ColorPair(Blue, Black), + [GPU_FILTER] = A_BOLD | ColorPair(Red, Cyan), [GPU_RESIDUE] = ColorPair(Magenta, Black), [PANEL_EDIT] = ColorPair(White, Blue), [SCREENS_OTH_BORDER] = ColorPair(Blue, Blue), @@ -320,6 +321,7 @@ static int CRT_colorSchemes[LAST_COLORSCHEME][LAST_COLORELEMENT] = { [GPU_ENGINE_2] = A_NORMAL, [GPU_ENGINE_3] = A_REVERSE | A_BOLD, [GPU_ENGINE_4] = A_REVERSE, + [GPU_FILTER] = A_REVERSE, [GPU_RESIDUE] = A_BOLD, [PANEL_EDIT] = A_BOLD, [SCREENS_OTH_BORDER] = A_DIM, @@ -438,6 +440,7 @@ static int CRT_colorSchemes[LAST_COLORSCHEME][LAST_COLORELEMENT] = { [GPU_ENGINE_2] = ColorPair(Yellow, White), [GPU_ENGINE_3] = ColorPair(Red, White), [GPU_ENGINE_4] = ColorPair(Blue, White), + [GPU_FILTER] = ColorPair(Blue, White), [GPU_RESIDUE] = ColorPair(Magenta, White), [PANEL_EDIT] = ColorPair(White, Blue), [SCREENS_OTH_BORDER] = A_BOLD | ColorPair(Black, White), @@ -556,6 +559,7 @@ static int CRT_colorSchemes[LAST_COLORSCHEME][LAST_COLORELEMENT] = { [GPU_ENGINE_2] = ColorPair(Yellow, Black), [GPU_ENGINE_3] = ColorPair(Red, Black), [GPU_ENGINE_4] = ColorPair(Blue, Black), + [GPU_FILTER] = A_BOLD | ColorPair(Yellow, Cyan), [GPU_RESIDUE] = ColorPair(Magenta, Black), [PANEL_EDIT] = ColorPair(White, Blue), [SCREENS_OTH_BORDER] = ColorPair(Blue, Black), @@ -674,6 +678,7 @@ static int CRT_colorSchemes[LAST_COLORSCHEME][LAST_COLORELEMENT] = { [GPU_ENGINE_2] = A_BOLD | ColorPair(Yellow, Blue), [GPU_ENGINE_3] = A_BOLD | ColorPair(Red, Blue), [GPU_ENGINE_4] = A_BOLD | ColorPair(White, Blue), + [GPU_FILTER] = A_BOLD | ColorPair(White, Cyan), [GPU_RESIDUE] = A_BOLD | ColorPair(Magenta, Blue), [PANEL_EDIT] = ColorPair(White, Blue), [SCREENS_OTH_BORDER] = A_BOLD | ColorPair(Yellow, Blue), @@ -790,6 +795,7 @@ static int CRT_colorSchemes[LAST_COLORSCHEME][LAST_COLORELEMENT] = { [GPU_ENGINE_2] = ColorPair(Yellow, Black), [GPU_ENGINE_3] = ColorPair(Red, Black), [GPU_ENGINE_4] = ColorPair(Blue, Black), + [GPU_FILTER] = A_BOLD | ColorPair(Yellow, Green), [GPU_RESIDUE] = ColorPair(Magenta, Black), [PANEL_EDIT] = ColorPair(White, Cyan), [SCREENS_OTH_BORDER] = ColorPair(White, Black), @@ -905,6 +911,7 @@ static int CRT_colorSchemes[LAST_COLORSCHEME][LAST_COLORELEMENT] = { [GPU_ENGINE_2] = A_NORMAL, [GPU_ENGINE_3] = A_BOLD | ColorPair(Cyan, Black), [GPU_ENGINE_4] = A_BOLD | ColorPair(Cyan, Black), + [GPU_FILTER] = A_BOLD | ColorPair(Red, Cyan), [GPU_RESIDUE] = A_BOLD, [PANEL_EDIT] = A_BOLD, [SCREENS_OTH_BORDER] = A_BOLD | ColorPairGrayBlack, diff --git a/CRT.h b/CRT.h index 776b170ea..07fdc733f 100644 --- a/CRT.h +++ b/CRT.h @@ -129,6 +129,7 @@ typedef enum ColorElements_ { GPU_ENGINE_2, GPU_ENGINE_3, GPU_ENGINE_4, + GPU_FILTER, GPU_RESIDUE, PANEL_EDIT, SCREENS_OTH_BORDER, diff --git a/DisplayOptionsPanel.c b/DisplayOptionsPanel.c index 234df4b68..85259a0c8 100644 --- a/DisplayOptionsPanel.c +++ b/DisplayOptionsPanel.c @@ -185,14 +185,15 @@ DisplayOptionsPanel* DisplayOptionsPanel_new(Settings* settings, ScreenManager* Panel_add(super, (Object*) CheckItem_newByRef("Also show CPU frequency", &(settings->showCPUFrequency))); #ifdef BUILD_WITH_CPU_TEMP Panel_add(super, (Object*) CheckItem_newByRef( - #if defined(HTOP_LINUX) - "Also show CPU temperature (requires libsensors)", - #elif defined(HTOP_FREEBSD) + #if defined(HTOP_FREEBSD) || defined(NVIDIA_JETSON) "Also show CPU temperature", + #elif defined(HTOP_LINUX) + "Also show CPU temperature (requires libsensors)", #else #error Unknown temperature implementation! #endif &(settings->showCPUTemperature))); + Panel_add(super, (Object*) CheckItem_newByRef("- Show fractional CPU temperature for Celsius", (&settings->showCPUTemperatureFractional))); Panel_add(super, (Object*) CheckItem_newByRef("- Show temperature in degree Fahrenheit instead of Celsius", &(settings->degreeFahrenheit))); #endif Panel_add(super, (Object*) CheckItem_newByRef("Show cached memory in graph and bar modes", &(settings->showCachedMemory))); diff --git a/MainPanel.c b/MainPanel.c index f82439d2c..2f17b1faa 100644 --- a/MainPanel.c +++ b/MainPanel.c @@ -197,6 +197,12 @@ static void MainPanel_drawFunctionBar(Panel* super, bool hideFunctionBar) { } else if (this->state->failedUpdate) { FunctionBar_append(this->state->failedUpdate, CRT_colors[FAILED_READ]); } + +#ifdef NVIDIA_JETSON + if (this->state->showGpuProcesses) { + FunctionBar_append("GPU", CRT_colors[GPU_FILTER]); + } +#endif } static void MainPanel_printHeader(Panel* super) { diff --git a/Makefile.am b/Makefile.am index cf2ccee43..214f64c52 100644 --- a/Makefile.am +++ b/Makefile.am @@ -215,6 +215,11 @@ linux_platform_sources = \ zfs/ZfsArcMeter.c \ zfs/ZfsCompressedArcMeter.c +if NVIDIA_JETSON +linux_platform_headers += linux/NvidiaJetson.h +linux_platform_sources += linux/NvidiaJetson.c +endif + if HAVE_DELAYACCT linux_platform_headers += linux/LibNl.h linux_platform_sources += linux/LibNl.c diff --git a/Settings.c b/Settings.c index ae5402fba..aeae7683a 100644 --- a/Settings.c +++ b/Settings.c @@ -472,6 +472,8 @@ static bool Settings_read(Settings* this, const char* fileName, const Machine* h #ifdef BUILD_WITH_CPU_TEMP } else if (String_eq(option[0], "show_cpu_temperature")) { this->showCPUTemperature = atoi(option[1]); + } else if (String_eq(option[0], "show_cpu_temperature_fractional")) { + this->showCPUTemperatureFractional = atoi(option[1]); } else if (String_eq(option[0], "degree_fahrenheit")) { this->degreeFahrenheit = atoi(option[1]); #endif @@ -703,6 +705,7 @@ int Settings_write(const Settings* this, bool onCrash) { printSettingInteger("show_cpu_frequency", this->showCPUFrequency); #ifdef BUILD_WITH_CPU_TEMP printSettingInteger("show_cpu_temperature", this->showCPUTemperature); + printSettingInteger("show_cpu_temperature_fractional", this->showCPUTemperatureFractional); printSettingInteger("degree_fahrenheit", this->degreeFahrenheit); #endif printSettingInteger("show_cached_memory", this->showCachedMemory); @@ -808,6 +811,7 @@ Settings* Settings_new(const Machine* host, Hashtable* dynamicMeters, Hashtable* this->showCPUFrequency = false; #ifdef BUILD_WITH_CPU_TEMP this->showCPUTemperature = false; + this->showCPUTemperatureFractional = false; this->degreeFahrenheit = false; #endif this->showCachedMemory = true; diff --git a/Settings.h b/Settings.h index 01e808e86..22aebe070 100644 --- a/Settings.h +++ b/Settings.h @@ -79,6 +79,7 @@ typedef struct Settings_ { bool showCPUFrequency; #ifdef BUILD_WITH_CPU_TEMP bool showCPUTemperature; + bool showCPUTemperatureFractional; bool degreeFahrenheit; #endif bool showProgramPath; diff --git a/XUtils.h b/XUtils.h index 32e9446df..52247b678 100644 --- a/XUtils.h +++ b/XUtils.h @@ -17,6 +17,7 @@ in the source distribution for its full text. #include #include #include // IWYU pragma: keep +#include #include #include // IWYU pragma: keep #include // IWYU pragma: keep @@ -123,6 +124,13 @@ char* xStrndup(const char* str, size_t len); ATTR_NONNULL ATTR_ACCESS3_W(2, 3) ssize_t xReadfile(const char* pathname, void* buffer, size_t count); + +ATTR_NONNULL ATTR_ACCESS3_W(2, 3) +static inline double xReadNumberFile(const char *pathname, char *buf, const size_t len) { + ssize_t nread = xReadfile(pathname, buf, len); + return nread > 0 ? strtod(buf, NULL) : NAN; +} + ATTR_NONNULL ATTR_ACCESS3_W(3, 4) ssize_t xReadfileat(openat_arg_t dirfd, const char* pathname, void* buffer, size_t count); @@ -178,4 +186,23 @@ static inline int xDirfd(DIR* dirp) { return r; } +static inline double convertCelsiusToFahrenheit(const double celsius) { + return celsius * 9 / 5 + 32; +} + +static inline uint64_t fast_strtoull_dec(char** str, int maxlen) { + uint64_t result = 0; + + if (!maxlen) + maxlen = 20; // length of maximum value of 18446744073709551615 + + while (maxlen-- && **str >= '0' && **str <= '9') { + result *= 10; + result += **str - '0'; + (*str)++; + } + + return result; +} + #endif diff --git a/configure.ac b/configure.ac index 6d7752add..526cd29e2 100644 --- a/configure.ac +++ b/configure.ac @@ -1171,7 +1171,47 @@ case "$enable_sensors" in AC_MSG_ERROR([bad value '$enable_sensors' for --enable-sensors]) ;; esac -if test "$enable_sensors" = yes || test "$my_htop_platform" = freebsd; then + +AC_ARG_ENABLE([nvidia-jetson], + [AS_HELP_STRING([--enable-nvidia-jetson], + [enable nvidia jetson support @<:@default=check@:>@])], + [], + [enable_nvidia_jetson=check]) +case "$enable_nvidia_jetson" in + no) + ;; + check) + if test -f "/etc/nv_tegra_release"; then + if grep -q "BOARD" "/etc/nv_tegra_release"; then + enable_nvidia_jetson=yes + fi + fi + ;; + yes) + if test -f "/etc/nv_tegra_release"; then + if grep -q "BOARD" "/etc/nv_tegra_release"; then + enable_nvidia_jetson=yes + else + enable_nvidia_jetson=no + fi + else + enable_nvidia_jetson=no + fi + ;; + *) + AC_MSG_ERROR([bad value '$enable_nvidia_jetson' for --enable-nvidia-jetson]) + ;; +esac + +if test "$enable_nvidia_jetson" = yes; then + AC_DEFINE([NVIDIA_JETSON], [1], [Detected correct NVIDIA Jetson board]) +else + AC_MSG_NOTICE([This is not a NVIDIA Jetson board]) +fi + +AM_CONDITIONAL([NVIDIA_JETSON], [test "$enable_nvidia_jetson" = yes]) + +if test "$enable_sensors" = yes || test "$my_htop_platform" = freebsd || test "$enable_nvidia_jetson" = yes; then AC_DEFINE([BUILD_WITH_CPU_TEMP], [1], [Define if CPU temperature option should be enabled.]) fi @@ -1309,6 +1349,7 @@ AC_MSG_RESULT([ (Linux) delay accounting: $enable_delayacct (Linux) sensors: $enable_sensors (Linux) capabilities: $enable_capabilities + (Linux) nvidia-jeston: $enable_nvidia_jetson unicode: $enable_unicode affinity: $enable_affinity unwind: $enable_unwind diff --git a/linux/LinuxMachine.c b/linux/LinuxMachine.c index 188358ef1..1e71e98f1 100644 --- a/linux/LinuxMachine.c +++ b/linux/LinuxMachine.c @@ -32,6 +32,7 @@ in the source distribution for its full text. #include "UsersTable.h" #include "XUtils.h" +#include "linux/NvidiaJetson.h" #include "linux/Platform.h" // needed for GNU/hurd to get PATH_MAX // IWYU pragma: keep #ifdef HAVE_SENSORS_SENSORS_H @@ -746,7 +747,10 @@ void Machine_scan(Machine* super) { ) LinuxMachine_scanCPUFrequency(this); - #ifdef HAVE_SENSORS_SENSORS_H + #ifdef NVIDIA_JETSON + if (settings->showCPUTemperature) + NvidiaJetson_getCPUTemperatures(this->cpuData, super->existingCPUs); + #elif defined(HAVE_SENSORS_SENSORS_H) if (settings->showCPUTemperature) LibSensors_getCPUTemperatures(this->cpuData, super->existingCPUs, super->activeCPUs); #endif @@ -792,6 +796,10 @@ Machine* Machine_new(UsersTable* usersTable, uid_t userId) { // Initialize CPU count LinuxMachine_updateCPUcount(this); + #ifdef NVIDIA_JETSON + NvidiaJetson_FindSensors(); + #endif + #ifdef HAVE_SENSORS_SENSORS_H // Fetch CPU topology LinuxMachine_fetchCPUTopologyFromCPUinfo(this); diff --git a/linux/LinuxMachine.h b/linux/LinuxMachine.h index 4bc334dda..d9089b6a3 100644 --- a/linux/LinuxMachine.h +++ b/linux/LinuxMachine.h @@ -46,9 +46,10 @@ typedef struct CPUData_ { double frequency; - #ifdef HAVE_SENSORS_SENSORS_H + #ifdef BUILD_WITH_CPU_TEMP double temperature; - + #endif + #ifdef HAVE_SENSORS_SENSORS_H int physicalID; /* different for each CPU socket */ int coreID; /* same for hyperthreading */ int ccdID; /* same for each AMD chiplet */ diff --git a/linux/LinuxProcess.c b/linux/LinuxProcess.c index 3a250283a..aabe1fc22 100644 --- a/linux/LinuxProcess.c +++ b/linux/LinuxProcess.c @@ -20,6 +20,7 @@ in the source distribution for its full text. #include "CRT.h" #include "Macros.h" +#include "NvidiaJetson.h" #include "Process.h" #include "ProvideCurses.h" #include "RichString.h" @@ -112,6 +113,9 @@ const ProcessFieldData Process_fields[LAST_PROCESSFIELD] = { #endif [GPU_TIME] = { .name = "GPU_TIME", .title = "GPU_TIME ", .description = "Total GPU time", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, }, [GPU_PERCENT] = { .name = "GPU_PERCENT", .title = " GPU% ", .description = "Percentage of the GPU time the process used in the last sampling", .flags = PROCESS_FLAG_LINUX_GPU, .defaultSortDesc = true, }, +#ifdef NVIDIA_JETSON + [GPU_MEM] = { .name = "GPU_MEM", .title = "GPU_M ", .description = "GPU memory allocated for the process", .flags = 0, .defaultSortDesc = true, }, +#endif }; Process* LinuxProcess_new(const Machine* host) { @@ -362,6 +366,9 @@ static void LinuxProcess_rowWriteField(const Row* super, RichString* str, Proces xSnprintf(buffer, n, "N/A "); } break; +#ifdef NVIDIA_JETSON + case GPU_MEM: Row_printKBytes(str, lp->gpu_mem, coloring); return; +#endif default: Process_writeField(this, str, field); return; @@ -466,6 +473,10 @@ static int LinuxProcess_compareByKey(const Process* v1, const Process* v2, Proce return SPACESHIP_NUMBER(p1->gpu_time, p2->gpu_time); case ISCONTAINER: return SPACESHIP_NUMBER(v1->isRunningInContainer, v2->isRunningInContainer); +#ifdef NVIDIA_JETSON + case GPU_MEM: + return SPACESHIP_NUMBER(p1->gpu_mem, p2->gpu_mem); +#endif default: return Process_compareByKey_Base(v1, v2, key); } diff --git a/linux/LinuxProcess.h b/linux/LinuxProcess.h index fafd7d004..2d7525f4f 100644 --- a/linux/LinuxProcess.h +++ b/linux/LinuxProcess.h @@ -115,6 +115,10 @@ typedef struct LinuxProcess_ { /* Activity of GPU: 0 if active, otherwise time of last scan in milliseconds */ uint64_t gpu_activityMs; + #ifdef NVIDIA_JETSON + uint64_t gpu_mem; + #endif + /* Autogroup scheduling (CFS) information */ long int autogroup_id; int autogroup_nice; diff --git a/linux/LinuxProcessTable.c b/linux/LinuxProcessTable.c index c4bd55a43..6c217936c 100644 --- a/linux/LinuxProcessTable.c +++ b/linux/LinuxProcessTable.c @@ -43,6 +43,7 @@ in the source distribution for its full text. #include "linux/GPU.h" #include "linux/LinuxMachine.h" #include "linux/LinuxProcess.h" +#include "linux/NvidiaJetson.h" #include "linux/Platform.h" // needed for GNU/hurd to get PATH_MAX // IWYU pragma: keep #ifdef HAVE_DELAYACCT @@ -78,21 +79,6 @@ static FILE* fopenat(openat_arg_t openatArg, const char* pathname, const char* m return fp; } -static inline uint64_t fast_strtoull_dec(char** str, int maxlen) { - uint64_t result = 0; - - if (!maxlen) - maxlen = 20; // length of maximum value of 18446744073709551615 - - while (maxlen-- && **str >= '0' && **str <= '9') { - result *= 10; - result += **str - '0'; - (*str)++; - } - - return result; -} - static long long fast_strtoll_dec(char** str, int maxlen) { bool neg = false; @@ -1926,4 +1912,10 @@ void ProcessTable_goThroughEntries(ProcessTable* super) { #endif LinuxProcessTable_recurseProcTree(this, rootFd, lhost, PROCDIR, NULL); + + #ifdef NVIDIA_JETSON + /* Merge GPU data only to the currently active table */ + if ((Table*)this == host->activeTable) + NvidiaJetson_LoadGpuProcessTable(((Table*)this)->table); + #endif } diff --git a/linux/NvidiaJetson.c b/linux/NvidiaJetson.c new file mode 100644 index 000000000..4a8fdcb71 --- /dev/null +++ b/linux/NvidiaJetson.c @@ -0,0 +1,323 @@ +#include "config.h" +#ifdef NVIDIA_JETSON + +#include +#include +#include +#include + +#include "CRT.h" +#include "Hashtable.h" +#include "Macros.h" +#include "Meter.h" +#include "XUtils.h" + +#include "linux/LinuxProcess.h" +#include "linux/NvidiaJetson.h" + + +/* + +NVIDIA Jetson devices support is located here. + +Jetson has: +- one CPU temperature sensor per 8 cores. +- one GPU temperature sensor, on Jetson Orin it goes off if no GPU load: user gets error on file open +- the process table where kernel nvgpu driver collects GPU clients (experimental, root access only): + * process id + * process name + * memory in kilobytes allocated for GPU (Jetson device shares system RAM for GPU) + +The code tries to find out the correct sensors during the application startup. As an example, the sensors +location for NVIDIA Jetson Orin: +- CPU temperature: /sys/devices/virtual/thermal/thermal_zone0/type +- GPU temperature: /sys/devices/virtual/thermal/thermal_zone1/type +- GPU frequency: /sys/class/devfreq/17000000.gpu/cur_freq +- GPU curr load: /sys/class/devfreq/17000000.gpu/device/load + +Measure: +- The GPU frequency is provided in Hz, shown in MHz. +- The CPU/GPU temperatures are provided in Celsius multipled by 1000 (milli Celsius), shown in Cesius +- The Farenheit support is not provided + +If htop starts with root privileges (effective user id is 0), the experimental code activates. +It reads the fixed sysfs file /sys/kernel/debug/nvmap/iovmm/clients with the following content, e.g.: +``` +CLIENT PROCESS PID SIZE +user gpu_burn 7979 23525644K +user gnome_shell 8119 5800K +user Xorg 2651 17876K +total 23549320K +``` +Unfortunately, the /sys/kernel/debug/... files are allowed to read only for the root user, that's why the restriction applies. + +The patch out of this file adds a separate field 'GPU_MEM', which reads data from LinuxProcess::gpu_mem field. +The field stores memory allocated for GPU in kilobytes. It is populated by the function +NvidiaJetson_LoadGpuProcessTable (the implementation is located here), which is called at the end of the function +Machine_scanTables. + +Additionally, the new Action is added: actionToggleGpuFilter, which is activated by 'g' hot key (the help is updated +appropriately). The GpuFilter shows only the processes which currently utilize GPU (i.e. highly extended +nvmap/iovmm/clients table). It is achieved by the filtering machinery associated with ProcessTable::pidMatchList. +The code below constructs GPU_PID_MATCH_LIST hash table, then actionToggleGpuFilter either stores it to the +ProcessTable::pidMatchList or restores old value of ProcessTable::pidMatchList. + +The separate LinuxProcess's PROCESS_FLAG_LINUX_GPU_JETSON (or something ...) flag isn't added for GPU_MEM, because +currently the functionality of population LinuxProcess::gpu_mem is shared with the GPU consumers filter construction. +So, even if GPU_MEM field is not activated, the filter showing GPU consumers should work. This kind of architecture is +chosen intentially since it saves memory for the hash table GPU_PID_MATCH_LIST (which is now actually a set), and therefore +increases performance. All other approaches convert GPU_PID_MATCH_LIST to a true key/value storage (key = pid, +value = gpu memory allocated) with further merge code. + +*/ + +/* global paths per each sensor */ +char *CpuTempSensorPath = NULL; +char *GpuTempSensorPath = NULL; +char *GpuFreqSensorPath = NULL; +char *GpuLoadSensorPath = NULL; + +#define MAX_GPU_PROCESS_COUNT 256UL +static Hashtable *GPU_PID_MATCH_LIST = NULL; + +static void NVidiaJetsonHashtableFunctor_ResetGpuMem(ATTR_UNUSED ht_key_t key, void* val, ATTR_UNUSED void* userData) { + LinuxProcess *lp = val; + lp->gpu_mem = 0; +} + +Hashtable *NvidiaJetson_GetPidMatchList(void) { + return GPU_PID_MATCH_LIST; +} + +void NvidiaJetson_LoadGpuProcessTable(Hashtable *pidHash) { + static int IsRootUser = -1; + + /* needs root privileges */ + if (!IsRootUser) + return; + + /* first time function is called */ + if (IsRootUser == -1) { + IsRootUser = geteuid() == 0; + if (!IsRootUser) + return; + + GPU_PID_MATCH_LIST = Hashtable_new(MAX_GPU_PROCESS_COUNT, false); + } + + FILE *input = fopen("/sys/kernel/debug/nvmap/iovmm/clients", "r"); + if (input == NULL) + return; + + /* reset all knowledge about GPU allocations */ + Hashtable_foreach(pidHash, NVidiaJetsonHashtableFunctor_ResetGpuMem, NULL); + Hashtable_clear(GPU_PID_MATCH_LIST); + + /* construct new knowledge regarding GPU allocations */ + static const char sentinel = -128; + static const size_t line_sz = 256; + + char line[line_sz]; + char *last = &line[sizeof(line) - 1]; + + *last = sentinel; + while (fgets(line, sizeof(line), input)) { + /* line example: "user Xorg 2651 17876K" */ + char *saveptr; + + if (String_startsWith(line, "CLIENT")) // skip header + continue; + if (String_startsWith(line, "total")) // final line, skip for now + break; + if (*last == '\0') { + /* overflow */ + *last = sentinel; + continue; + } + /* char *user = */ strtok_r(line, " \n", &saveptr); + /* char *proc = */ strtok_r(NULL, " \n", &saveptr); + char *pidPtr = strtok_r(NULL, " \n", &saveptr); + unsigned int pid = fast_strtoull_dec(&pidPtr, 10); + + char *memPtr = strtok_r(NULL, " \n", &saveptr); + uint64_t gpumem = fast_strtoull_dec(&memPtr, 20); + + /* memory allocation showed in kylobytes, i.e. the token usually looks like "17876K" */ + if (memPtr && *memPtr != 'K') { + switch (*memPtr) { + case 'M': gpumem *= 1024; break; + case 'G': gpumem *= 1024*1024; break; + default: gpumem = 0; break; + } + } + + LinuxProcess *lp = Hashtable_get(pidHash, pid); + if (lp) { + lp->gpu_mem = gpumem; + } + + Hashtable_put(GPU_PID_MATCH_LIST, pid, (void*)1); + } + fclose(input); +} + +static inline bool IsJetsonOrinGPU(const char *name) { + return strstr(name, "gpu"); +} + +static inline bool IsJetsonXavierGPU(const char *name) { + return strstr(name, "gv11"); +} + +static void NvidiaJetson_FindGPUDevice(void) { + const struct dirent* entry; + + #define SYS_CLASS_DEVFREQ "/sys/class/devfreq" + + DIR* dir = opendir(SYS_CLASS_DEVFREQ); + if (!dir) + return; + + while ((entry = readdir(dir)) != NULL) { + const char* name = entry->d_name; + + if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) + continue; + + if (IsJetsonOrinGPU(name) || IsJetsonXavierGPU(name)) { + /* allocated once at the application startup, so not freed until the exit */ + xAsprintf(&GpuFreqSensorPath, SYS_CLASS_DEVFREQ "/%s/cur_freq", name); + xAsprintf(&GpuLoadSensorPath, SYS_CLASS_DEVFREQ "/%s/device/load", name); + break; + } + } + closedir(dir); + + #undef SYS_CLASS_DEVFREQ +} + +static void NvidiaJetson_GoThroughThermalZones(void) { + char path[64]; + char content[4]; + const struct dirent* entry; + + #define SYS_DEVICE_VIRTUAL_THERMAL "/sys/devices/virtual/thermal" + + DIR* dir = opendir(SYS_DEVICE_VIRTUAL_THERMAL); + if (!dir) + return; + + while ((entry = readdir(dir)) != NULL && (CpuTempSensorPath == NULL || GpuTempSensorPath == NULL)) { + const char* name = entry->d_name; + ssize_t ret; + + if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) + continue; + + if (!String_startsWith(name, "thermal_zone")) + continue; + + xSnprintf(path, sizeof(path), SYS_DEVICE_VIRTUAL_THERMAL "/%s/type", name); + ret = xReadfile(path, content, sizeof(content)); + if (ret <= 0) + continue; + + content[0] = tolower(content[0]); + content[1] = tolower(content[1]); + content[2] = tolower(content[2]); + content[3] = tolower(content[3]); + + /* allocated once at the application startup, so not freed until the exit */ + + if (CpuTempSensorPath == NULL && String_startsWith(content, "cpu")) { + xAsprintf(&CpuTempSensorPath, SYS_DEVICE_VIRTUAL_THERMAL "/%s/temp", name); + } + if (GpuTempSensorPath == NULL && String_startsWith(content, "gpu")) { + xAsprintf(&GpuTempSensorPath, SYS_DEVICE_VIRTUAL_THERMAL "/%s/temp", name); + } + } + closedir(dir); + + #undef SYS_DEVICE_VIRTUAL_THERMAL +} + +void NvidiaJetson_FindSensors(void) { + NvidiaJetson_GoThroughThermalZones(); + NvidiaJetson_FindGPUDevice(); +} + +void NvidiaJetson_getCPUTemperatures(CPUData* cpus, unsigned int existingCPUs) { + char buffer[22]; + double temp = xReadNumberFile(CpuTempSensorPath, buffer, sizeof(buffer)) / 1000.0; + for (unsigned int i = 0; i <= existingCPUs; ++i) + cpus[i].temperature = temp; +} + +enum JetsonValues { + JETSON_GPU_LOAD = 0, + JETSON_GPU_TEMP = 1, + JETSON_GPU_FREQ = 2, + JETSON_GPU_TOTAL_COUNT, +}; + +static void JetsonGPUMeter_updateValues(Meter* this) { + char buffer[22]; + this->values[JETSON_GPU_LOAD] = xReadNumberFile(GpuLoadSensorPath, buffer, sizeof(buffer)); + this->curItems = 1; /* only show bar for JETSON_GPU_LOAD */ + + this->values[JETSON_GPU_TEMP] = xReadNumberFile(GpuTempSensorPath, buffer, sizeof(buffer)) / 1000.0; + this->values[JETSON_GPU_FREQ] = xReadNumberFile(GpuFreqSensorPath, buffer, sizeof(buffer)) / 1000000.0; + double percent = this->values[JETSON_GPU_LOAD] / 10.0; + + char c = 'C'; + double gpuTemperature = this->values[JETSON_GPU_TEMP]; + if (this->host->settings->degreeFahrenheit) { + gpuTemperature = convertCelsiusToFahrenheit(gpuTemperature); + c = 'F'; + } + + unsigned int gpuFrequency = this->values[JETSON_GPU_FREQ]; + xSnprintf(this->txtBuffer, sizeof(this->txtBuffer), "%.1f%% %3uMHz %.1f%s%c", + percent, gpuFrequency, gpuTemperature, CRT_degreeSign, c + ); +} + +static void JetsonGPUMeter_display(const Object* cast, RichString* out) { + char buffer[32]; + const Meter* this = (const Meter*)cast; + + RichString_writeAscii(out, CRT_colors[METER_TEXT], ":"); + xSnprintf(buffer, sizeof(buffer), "%.1f", this->values[JETSON_GPU_LOAD]); + RichString_appendAscii(out, CRT_colors[METER_VALUE], buffer); + + RichString_appendAscii(out, CRT_colors[METER_TEXT], " freq:"); + xSnprintf(buffer, sizeof(buffer), "%3uMHz", (unsigned)this->values[JETSON_GPU_FREQ]); + RichString_appendAscii(out, CRT_colors[METER_VALUE], buffer); + + RichString_appendAscii(out, CRT_colors[METER_TEXT], " temp:"); + xSnprintf(buffer, sizeof(buffer), "%.1f%sC", this->values[JETSON_GPU_TEMP], CRT_degreeSign); + RichString_appendWide(out, CRT_colors[METER_VALUE], buffer); +} + +static const int JetsonGPUMeter_attributes[] = { + DEFAULT_COLOR +}; + +const MeterClass JetsonGPUMeter_class = { + .super = { + .extends = Class(Meter), + .delete = Meter_delete, + .display = JetsonGPUMeter_display, + }, + .updateValues = JetsonGPUMeter_updateValues, + .defaultMode = BAR_METERMODE, + .supportedModes = METERMODE_DEFAULT_SUPPORTED, + .maxItems = JETSON_GPU_TOTAL_COUNT, + .total = 1000.0, + .attributes = JetsonGPUMeter_attributes, + .name = "jetson_gpu", + .uiName = "Jetson GPU", + .caption = "GPU" +}; + +#endif diff --git a/linux/NvidiaJetson.h b/linux/NvidiaJetson.h new file mode 100644 index 000000000..51e7830b9 --- /dev/null +++ b/linux/NvidiaJetson.h @@ -0,0 +1,17 @@ +#ifndef HEADER_NVIDIA_JETSON +#define HEADER_NVIDIA_JETSON + +#include "Hashtable.h" +#include "Meter.h" + +#include "linux/LinuxMachine.h" + +void NvidiaJetson_getCPUTemperatures(CPUData* cpus, unsigned int existingCPUs); +void NvidiaJetson_FindSensors(void); + +void NvidiaJetson_LoadGpuProcessTable(Hashtable *pidHash); +Hashtable *NvidiaJetson_GetPidMatchList(void); + +extern const MeterClass JetsonGPUMeter_class; + +#endif diff --git a/linux/Platform.c b/linux/Platform.c index ddaf1324d..703a958e5 100644 --- a/linux/Platform.c +++ b/linux/Platform.c @@ -56,6 +56,7 @@ in the source distribution for its full text. #include "linux/IOPriorityPanel.h" #include "linux/LinuxMachine.h" #include "linux/LinuxProcess.h" +#include "linux/NvidiaJetson.h" #include "linux/SELinuxMeter.h" #include "linux/SystemdMeter.h" #include "linux/ZramMeter.h" @@ -254,6 +255,9 @@ const MeterClass* const Platform_meterTypes[] = { &SystemdUserMeter_class, &FileDescriptorMeter_class, &GPUMeter_class, +#ifdef NVIDIA_JETSON + &JetsonGPUMeter_class, +#endif NULL }; @@ -360,7 +364,7 @@ double Platform_setCPUValues(Meter* this, unsigned int cpu) { v[CPU_METER_FREQUENCY] = cpuData->frequency; -#ifdef HAVE_SENSORS_SENSORS_H +#ifdef BUILD_WITH_CPU_TEMP v[CPU_METER_TEMPERATURE] = cpuData->temperature; #else v[CPU_METER_TEMPERATURE] = NAN; diff --git a/linux/ProcessField.h b/linux/ProcessField.h index 47c4199fe..7655dbd4b 100644 --- a/linux/ProcessField.h +++ b/linux/ProcessField.h @@ -51,6 +51,7 @@ in the source distribution for its full text. GPU_TIME = 132, \ GPU_PERCENT = 133, \ ISCONTAINER = 134, \ + GPU_MEM = 135, \ // End of list