Skip to content

Commit 272e92f

Browse files
committed
nvidia: add voltage via nvapi
1 parent de2ce99 commit 272e92f

6 files changed

Lines changed: 218 additions & 6 deletions

File tree

server/metrics/gpu/gpu.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ GPUS::GPUS() {
9090

9191
continue;
9292
}
93+
94+
if (!ptr->nvapi_available) {
95+
SPDLOG_WARN("NvAPI is not loaded. Voltage might not be available");
96+
}
9397
}
9498
} else if (driver == "panfrost") {
9599
gpu = std::make_shared<Panfrost>(drm_node, pci_dev, vendor_id, device_id);

server/metrics/gpu/meson.build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ gpu_lib = static_library(
1313
'msm/dpu.cpp',
1414
'msm/kgsl.cpp',
1515
'nvidia/nvidia.cpp',
16-
'nvidia/nvml_loader.cpp'),
16+
'nvidia/nvml_loader.cpp',
17+
'nvidia/nvapi_loader.cpp'),
1718
include_directories: '../',
1819
dependencies: [libdrm_dep,
1920
libcap_dep,
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#include "nvapi_loader.hpp"
2+
#include <dlfcn.h>
3+
#include <spdlog/spdlog.h>
4+
5+
static std::shared_ptr<libnvapi_loader> libnvapi_;
6+
7+
std::shared_ptr<libnvapi_loader> get_libnvapi_loader()
8+
{
9+
if (!libnvapi_)
10+
libnvapi_ = std::make_shared<libnvapi_loader>();
11+
12+
return libnvapi_;
13+
}
14+
15+
libnvapi_loader::libnvapi_loader() {
16+
load();
17+
}
18+
19+
libnvapi_loader::~libnvapi_loader() {
20+
unload();
21+
}
22+
23+
#define LOAD_NVAPI_FUNCTION(name, id) \
24+
do { \
25+
name = reinterpret_cast<decltype(this->name)>(nvapi_QueryInterface(id)); \
26+
if (!name) { \
27+
SPDLOG_ERROR("Failed to find address of {}", #name); \
28+
unload(); \
29+
return false; \
30+
} \
31+
} while(0)
32+
33+
bool libnvapi_loader::load() {
34+
if (loaded_)
35+
return true;
36+
37+
library_ = dlopen(library_name.c_str(), RTLD_LAZY | RTLD_NODELETE);
38+
39+
if (!library_) {
40+
SPDLOG_ERROR("Failed to open {}: {}", library_name, dlerror());
41+
return false;
42+
}
43+
44+
nvapi_QueryInterface =
45+
reinterpret_cast<decltype(nvapi_QueryInterface)>(dlsym(library_, "nvapi_QueryInterface"));
46+
47+
if (!nvapi_QueryInterface) {
48+
SPDLOG_ERROR("Failed to find address of nvapi_QueryInterface");
49+
return false;
50+
}
51+
52+
LOAD_NVAPI_FUNCTION(nvapi_Initialize, NVAPI_INIT_ID);
53+
LOAD_NVAPI_FUNCTION(nvapi_GetErrorMessage, NVAPI_GET_ERROR_MESSAGE);
54+
LOAD_NVAPI_FUNCTION(nvapi_EnumPhysicalGPUs, NVAPI_ENUM_PHYSICAL_GPUS);
55+
LOAD_NVAPI_FUNCTION(nvapi_GPU_GetBusId, NVAPI_GET_BUS_ID);
56+
LOAD_NVAPI_FUNCTION(nvapi_GetVoltage, NVAPI_GET_CURRENT_VOLTAGE);
57+
58+
loaded_ = true;
59+
return true;
60+
}
61+
62+
#undef LOAD_NVAPI_FUNCTION
63+
64+
void libnvapi_loader::unload() {
65+
if (library_) {
66+
dlclose(library_);
67+
library_ = NULL;
68+
}
69+
70+
loaded_ = false;
71+
72+
nvapi_QueryInterface = nullptr;
73+
nvapi_Initialize = nullptr;
74+
nvapi_GetErrorMessage = nullptr;
75+
nvapi_EnumPhysicalGPUs = nullptr;
76+
nvapi_GPU_GetBusId = nullptr;
77+
nvapi_GetVoltage = nullptr;
78+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#pragma once
2+
3+
#include <string>
4+
#include <memory>
5+
#include <atomic>
6+
7+
#define NVAPI_MAX_PHYSICAL_GPUS 64
8+
#define NVAPI_INIT_ID 0x0150e828
9+
#define NVAPI_GET_ERROR_MESSAGE 0x6c2d048c
10+
#define NVAPI_ENUM_PHYSICAL_GPUS 0xe5ac921f
11+
#define NVAPI_GET_BUS_ID 0x1be0b8e5
12+
#define NVAPI_GET_CURRENT_VOLTAGE 0x465f9bcf
13+
14+
class libnvapi_loader {
15+
public:
16+
libnvapi_loader();
17+
~libnvapi_loader();
18+
19+
struct NvApiVoltage {
20+
unsigned int version = sizeof(NvApiVoltage) | (1 << 16);
21+
unsigned int flags;
22+
unsigned int padding_1[8];
23+
unsigned int value_microvolts;
24+
unsigned int padding_2[8];
25+
};
26+
27+
bool load();
28+
bool is_loaded() { return loaded_; }
29+
30+
void* (*nvapi_QueryInterface)(unsigned int);
31+
int (*nvapi_Initialize)();
32+
int (*nvapi_GetErrorMessage)(int, char(*)[64]);
33+
int (*nvapi_EnumPhysicalGPUs)(void*, unsigned int*);
34+
int (*nvapi_GPU_GetBusId)(long, unsigned int*);
35+
int (*nvapi_GetVoltage)(long, NvApiVoltage*);
36+
37+
private:
38+
void unload();
39+
void* library_ = nullptr;
40+
41+
const std::string library_name = "libnvidia-api.so.1";
42+
std::atomic<bool> loaded_ = false;
43+
44+
// Disallow copy constructor and assignment operator.
45+
libnvapi_loader(const libnvapi_loader&);
46+
void operator=(const libnvapi_loader&);
47+
};
48+
49+
std::shared_ptr<libnvapi_loader> get_libnvapi_loader();

server/metrics/gpu/nvidia/nvidia.cpp

Lines changed: 79 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Nvidia::Nvidia(
77
GPU(drm_node, pci_dev, vendor_id, device_id, "gpu-nvidia"
88
) {
99
nvml_available = init_nvml(pci_dev);
10+
nvapi_available = init_nvapi(pci_dev);
1011
}
1112

1213
bool Nvidia::init_nvml(const std::string& pci_dev) {
@@ -35,6 +36,67 @@ bool Nvidia::init_nvml(const std::string& pci_dev) {
3536
return true;
3637
}
3738

39+
bool Nvidia::init_nvapi(const std::string& pci_dev) {
40+
nvapi = get_libnvapi_loader();
41+
42+
unsigned int pciBusId = 0;
43+
44+
{
45+
unsigned int domain, bus, slot, func;
46+
47+
if (sscanf(pci_dev.c_str(), "%x:%02x:%02x.%x", &domain, &bus, &slot, &func) != 4) {
48+
SPDLOG_ERROR("nvapi: Failed to parse PCI device ID: '{}'", pci_dev);
49+
return false;
50+
}
51+
52+
pciBusId = bus;
53+
}
54+
55+
if (!nvapi)
56+
return false;
57+
58+
if (!nvapi->is_loaded())
59+
return false;
60+
61+
int result = nvapi->nvapi_Initialize();
62+
63+
if (result != 0) {
64+
char msg[64] = {};
65+
nvapi->nvapi_GetErrorMessage(result, &msg);
66+
SPDLOG_ERROR("nvapi_Initialize() failed: {}", msg);
67+
return false;
68+
}
69+
70+
long nvGPUHandle[NVAPI_MAX_PHYSICAL_GPUS] = {};
71+
unsigned int numOfGPUs = 0;
72+
73+
result = nvapi->nvapi_EnumPhysicalGPUs(&nvGPUHandle, &numOfGPUs);
74+
75+
if (result != 0) {
76+
char msg[64] = {};
77+
nvapi->nvapi_GetErrorMessage(result, &msg);
78+
SPDLOG_ERROR("nvapi_EnumPhysicalGPUs() failed: {}", msg);
79+
return false;
80+
}
81+
82+
for (unsigned int i = 0; i < numOfGPUs; i++) {
83+
unsigned int busId = 0;
84+
result = nvapi->nvapi_GPU_GetBusId(nvGPUHandle[i], &busId);
85+
86+
if (result == 0 && busId == pciBusId) {
87+
nvapi_device = nvGPUHandle[i];
88+
break;
89+
}
90+
}
91+
92+
if (nvapi_device == 0) {
93+
SPDLOG_ERROR("nvapi: Failed to find gpu with {}", pci_dev);
94+
return false;
95+
}
96+
97+
return true;
98+
}
99+
38100
const std::vector<nvmlProcessInfo_v1_t> Nvidia::get_processes() {
39101
unsigned int info_count = 0;
40102

@@ -121,15 +183,27 @@ int Nvidia::get_core_clock() {
121183

122184
int Nvidia::get_voltage() {
123185
uint32_t voltage = 0;
186+
bool try_nvapi = false;
124187

125-
if (nvml->nvmlInternalGetVoltage == nullptr) {
126-
return 0;
188+
if (nvml->nvmlInternalGetVoltage != nullptr) {
189+
nvmlReturn_t ret = nvml->nvmlInternalGetVoltage(device, &voltage);
190+
191+
if (ret != NVML_SUCCESS) {
192+
try_nvapi = true;
193+
}
194+
} else {
195+
try_nvapi = true;
127196
}
128197

129-
nvmlReturn_t ret = nvml->nvmlInternalGetVoltage(device, &voltage);
198+
if (try_nvapi) {
199+
libnvapi_loader::NvApiVoltage voltage_info = {};
130200

131-
if (ret != NVML_SUCCESS)
132-
return 0;
201+
int nv_ret = nvapi->nvapi_GetVoltage(nvapi_device, &voltage_info);
202+
203+
if (nv_ret == 0) {
204+
voltage = voltage_info.value_microvolts;
205+
}
206+
}
133207

134208
return voltage / 1000.f;
135209
}

server/metrics/gpu/nvidia/nvidia.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44
#include <utility>
55
#include "gpu.hpp"
66
#include "nvml_loader.hpp"
7+
#include "nvapi_loader.hpp"
78

89
class Nvidia : public GPU {
910
private:
1011
std::shared_ptr<libnvml_loader> nvml;
1112
nvmlDevice_t device = nullptr;
1213
bool init_nvml(const std::string& pci_dev);
1314

15+
std::shared_ptr<libnvapi_loader> nvapi;
16+
uint32_t nvapi_device = 0;
17+
bool init_nvapi(const std::string& pci_dev);
18+
1419
const std::vector<nvmlProcessInfo_v1_t> get_processes();
1520

1621
public:
@@ -20,6 +25,7 @@ class Nvidia : public GPU {
2025
);
2126

2227
bool nvml_available = false;
28+
bool nvapi_available = false;
2329

2430
// System-related functions
2531
int get_load() override;

0 commit comments

Comments
 (0)