Skip to content

Commit 3a5b400

Browse files
Merge pull request #383 from KernelTuner/fix-uuid-from-cuda-backends
Fix how UUID/PCI bus is acquired in CUDA backends and processed by NVML
2 parents b6d196b + 773339c commit 3a5b400

4 files changed

Lines changed: 76 additions & 39 deletions

File tree

kernel_tuner/backends/cupy.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,11 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
6262
# default dynamically allocated shared memory size, can be overwritten using smem_args
6363
self.smem_size = 0
6464

65-
# setup observers
66-
self.observers = observers or []
67-
self.observers.append(CupyRuntimeObserver(self))
68-
for obs in self.observers:
69-
obs.register_device(self)
70-
7165
# collect environment information
7266
env = dict()
7367
cupy_info = str(get_runtime_info()).split("\n")[:-1]
7468
info_dict = {
75-
s.split(":")[0].strip(): s.split(":")[1].strip() for s in cupy_info
69+
s.split(":", 1)[0].strip(): s.split(":", 1)[1].strip() for s in cupy_info
7670
}
7771
env["device_name"] = info_dict[f"Device {device} Name"]
7872
env["pci_bus_id"] = info_dict[f"Device {device} PCI Bus ID"]
@@ -89,6 +83,12 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
8983
self.env = env
9084
self.name = env["device_name"]
9185

86+
# setup observers
87+
self.observers = observers or []
88+
self.observers.append(CupyRuntimeObserver(self))
89+
for obs in self.observers:
90+
obs.register_device(self)
91+
9292
def ready_argument_list(self, arguments):
9393
"""Ready argument list to be passed to the kernel, allocates gpu mem.
9494

kernel_tuner/backends/nvcuda.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,27 +88,33 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
8888
# default dynamically allocated shared memory size, can be overwritten using smem_args
8989
self.smem_size = 0
9090

91-
# setup observers
92-
self.observers = observers or []
93-
self.observers.append(CudaRuntimeObserver(self))
94-
for observer in self.observers:
95-
observer.register_device(self)
96-
9791
# collect environment information
9892
err, device_properties = runtime.cudaGetDeviceProperties(device)
9993
cuda_error_check(err)
10094
env = dict()
10195
env["uuid"] = str(uuid.UUID(bytes=device_properties.uuid.bytes))
10296
env["device_name"] = device_properties.name.decode()
103-
env["pci_bus_id"] = device_properties.pciBusID
10497
env["cuda_version"] = driver.CUDA_VERSION
10598
env["compute_capability"] = self.cc
10699
env["iterations"] = self.iterations
107100
env["compiler_options"] = self.compiler_options
108101
env["device_properties"] = str(device_properties).replace("\n", ", ")
102+
103+
# We must use `cudaDeviceGetPCIBusId` to get the PCI bus string
104+
# It returns a series of bytes containing a null byte, not a `str`
105+
err, pci_bus = runtime.cudaDeviceGetPCIBusId(32, device) # 32 = length?
106+
cuda_error_check(err)
107+
env["pci_bus_id"] = pci_bus.decode("ascii").split("\x00", 1)[0]
108+
109109
self.env = env
110110
self.name = env["device_name"]
111111

112+
# setup observers
113+
self.observers = observers or []
114+
self.observers.append(CudaRuntimeObserver(self))
115+
for observer in self.observers:
116+
observer.register_device(self)
117+
112118
def __del__(self):
113119
for device_memory in self.allocations:
114120
if isinstance(device_memory, driver.CUdeviceptr):

kernel_tuner/backends/pycuda.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,6 @@ def _finish_up():
130130
# default dynamically allocated shared memory size, can be overwritten using smem_args
131131
self.smem_size = 0
132132

133-
# setup observers
134-
self.observers = observers or []
135-
self.observers.append(PyCudaRuntimeObserver(self))
136-
for obs in self.observers:
137-
obs.register_device(self)
138-
139133
# collect environment information
140134
env = dict()
141135
env["device_name"] = self.context.get_device().name()
@@ -148,6 +142,12 @@ def _finish_up():
148142
self.env = env
149143
self.name = env["device_name"]
150144

145+
# setup observers
146+
self.observers = observers or []
147+
self.observers.append(PyCudaRuntimeObserver(self))
148+
for obs in self.observers:
149+
obs.register_device(self)
150+
151151
def __del__(self):
152152
for gpu_mem in self.allocations:
153153
# if needed for when using mocks during testing

kernel_tuner/observers/nvml.py

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,42 @@ def __init__(
2424
use_locked_clocks=False
2525
):
2626
"""Create object to control device using NVML."""
27+
# We set these first as __del__ checks these
28+
# and this __init__ may exceptions midway
29+
self.pwr_limit_default = None
30+
self.modified_clocks = False
31+
2732
pynvml.nvmlInit()
2833

29-
if sum(x is not None for x in [device_id, device_uuid, device_pci_bus]) != 1:
30-
raise ValueError("invalid device: specify either the index, the UUID, or the PCI-bus")
31-
elif device_id is not None:
34+
if device_id is not None:
3235
self.dev = pynvml.nvmlDeviceGetHandleByIndex(device_id)
3336
elif device_uuid is not None:
3437
self.dev = pynvml.nvmlDeviceGetHandleByUUID(device_uuid)
3538
elif device_pci_bus is not None:
36-
self.dev = pynvml.nvmlDeviceGetHandleByPciBusId_v2(device_pci_bus)
39+
self.dev = pynvml.nvmlDeviceGetHandleByPciBusId(device_pci_bus)
3740

3841
self.id = pynvml.nvmlDeviceGetIndex(self.dev)
42+
self.uuid = pynvml.nvmlDeviceGetUUID(self.dev)
43+
self.pci_bus = pynvml.nvmlDeviceGetPciInfo_v3(self.dev).busId
3944
self.nvidia_smi = nvidia_smi_fallback or "nvidia-smi"
4045

46+
if device_id is not None and self.id != device_id:
47+
raise ValueError(f"NVML device ID does not match requested device: {device_id} != {self.id}")
48+
49+
# Some backends have UUID starting with "GPU-"
50+
if device_uuid is not None and self.uuid.removeprefix("GPU-") != device_uuid.removeprefix("GPU-"):
51+
raise ValueError(f"NVML device UUID does not match requested device: {device_uuid} != {self.uuid}")
52+
53+
# lstrip is needed since some backends use leading zeros
54+
if device_pci_bus is not None and self.pci_bus.lstrip("0") != device_pci_bus.lstrip("0"):
55+
raise ValueError(f"NVML device PCI-bus does not match requested device: {device_pci_bus} != {self.pci_bus}")
56+
4157
try:
4258
self.pwr_limit_default = pynvml.nvmlDeviceGetPowerManagementLimit(self.dev)
4359
self.pwr_constraints = pynvml.nvmlDeviceGetPowerManagementLimitConstraints(self.dev)
4460
except pynvml.NVMLError_NotSupported:
45-
self.pwr_limit_default = None
4661
# inverted range to make all range checks fail
62+
self.pwr_limit_default = None
4763
self.pwr_constraints = [1, 0]
4864

4965
try:
@@ -58,7 +74,6 @@ def __init__(
5874
self._auto_boost = None
5975

6076
# try to initialize application clocks
61-
self.modified_clocks = False
6277
try:
6378
if not use_locked_clocks:
6479
self.gr_clock_default = pynvml.nvmlDeviceGetDefaultApplicationsClock(
@@ -287,6 +302,11 @@ def pwr_usage(self):
287302
NVML_FI_DEV_POWER_INSTANT = 186
288303
return pynvml.nvmlDeviceGetFieldValues(self.dev, [NVML_FI_DEV_POWER_INSTANT])[0].value.uiVal
289304

305+
def energy_usage(self):
306+
"""Return total energy usage since bootup in milli joules."""
307+
NVML_FI_DEV_TOTAL_ENERGY_CONSUMPTION = 83
308+
return pynvml.nvmlDeviceGetFieldValues(self.dev, [NVML_FI_DEV_TOTAL_ENERGY_CONSUMPTION])[0].value.ullVal
309+
290310
def gr_voltage(self):
291311
"""Return current graphics voltage in millivolts."""
292312
args = ["nvidia-smi", "-i", str(self.id), "-q", "-d", "VOLTAGE"]
@@ -335,7 +355,7 @@ class NVMLObserver(BenchmarkObserver):
335355
def __init__(
336356
self,
337357
observables,
338-
device=0,
358+
device=None,
339359
save_all=False,
340360
nvidia_smi_fallback=None,
341361
use_locked_clocks=False,
@@ -374,6 +394,8 @@ def __init__(
374394

375395
self.record_gr_voltage = False
376396
self.t0 = 0
397+
self.initial_energy_reading = None
398+
377399
if "gr_voltage" in observables:
378400
self.record_gr_voltage = True
379401
self.gr_voltage_readings = []
@@ -386,26 +408,34 @@ def __init__(
386408
self.iteration = {obs: [] for obs in self.during_obs}
387409

388410
def register_device(self, dev):
411+
env = getattr(dev, "env", dict())
412+
uuid = env.get("uuid")
413+
pci_bus = env.get("pci_bus_id")
414+
389415
if self.device is not None:
390416
self.nvml = nvml(device_id=self.device, **self.nvml_kwargs)
417+
elif uuid is not None and pci_bus is not None:
418+
self.nvml = nvml(device_uuid=uuid, device_pci_bus=pci_bus, **self.nvml_kwargs)
419+
elif uuid is not None:
420+
self.nvml = nvml(device_uuid=uuid, **self.nvml_kwargs)
421+
elif pci_bus is not None:
422+
self.nvml = nvml(device_pci_bus=pci_bus, **self.nvml_kwargs)
391423
else:
392-
env = getattr(dev, "env", dict())
393-
uuid = env.get("uuid")
394-
pci_bus = env.get("pci_bus_id")
395-
396-
if uuid is not None:
397-
self.nvml = nvml(device_uuid=uuid, **self.nvml_kwargs)
398-
elif pci_bus is not None:
399-
self.nvml = nvml(device_pci_bus=pci_bus, **self.nvml_kwargs)
400-
else:
401-
raise ValueError("failed to detect NVIDIA device: no UUID or PCI-bus-id in environment")
402-
403-
424+
raise ValueError("failed to detect NVIDIA device: no UUID or PCI-bus-id in environment")
404425

405426
def read_power(self):
406427
""" Return power in Watt """
407428
return self.nvml.pwr_usage() / 1e3
408429

430+
def read_energy(self):
431+
""" Return cumulative energy usage in Joule """
432+
now = self.nvml.energy_usage()
433+
434+
if self.initial_energy_reading is None:
435+
self.initial_energy_reading = now
436+
437+
return (now - self.initial_energy_reading) / 1e3
438+
409439
def before_start(self):
410440
# clear results of the observables for next measurement
411441
self.iteration = {obs: [] for obs in self.during_obs}
@@ -530,3 +560,4 @@ def get_idle_power(device, n=5, sleep_s=0.1):
530560
time.sleep(sleep_s)
531561
readings.append(d.pwr_usage())
532562
return np.mean(readings) * 1e-3 # Watt
563+

0 commit comments

Comments
 (0)