Skip to content

Commit dc3fa07

Browse files
Merge pull request #2937 from devitocodes/ncu-py
compiler: Minor tweaks to enable integrated NCU-based profiling
2 parents c71560b + afa9cd0 commit dc3fa07

6 files changed

Lines changed: 70 additions & 16 deletions

File tree

devito/__init__.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from devito.core import * # noqa
4343
from devito.logger import logger_registry, _set_log_level # noqa
4444
from devito.mpi.routines import mpi_registry
45-
from devito.operator import profiler_registry, operator_registry
45+
from devito.operator import NcuProfiling, profiler_registry, operator_registry
4646

4747
# Apply monkey-patching while we wait for our patches to be upstreamed and released
4848
from devito.mpatches import * # noqa
@@ -159,8 +159,17 @@ def autotune_callback(val): # noqa
159159
configuration.add('opt', 'advanced', list(operator_registry._accepted), deprecate='dle')
160160
configuration.add('opt-options', {}, deprecate='dle-options')
161161

162+
162163
# Setup Operator profiling
163-
configuration.add('profiling', 'basic', list(profiler_registry), impacts_jit=False)
164+
def profiling_preprocessor(i):
165+
if isinstance(i, dict):
166+
return NcuProfiling(i['ncu'])
167+
168+
return i
169+
170+
171+
configuration.add('profiling', 'basic', list(profiler_registry),
172+
preprocessor=profiling_preprocessor, impacts_jit=False)
164173

165174
# Initialize `configuration`
166175
init_configuration()

devito/arch/archinfo.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
__all__ = [ # noqa: RUF022
2222
'platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices',
2323
'get_nvidia_cc', 'get_cuda_path', 'get_cuda_version', 'get_hip_path',
24-
'check_cuda_runtime', 'get_m1_llvm_path', 'get_advisor_path', 'Platform',
25-
'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power', 'Device',
24+
'check_cuda_runtime', 'load_cudart', 'get_m1_llvm_path', 'get_advisor_path',
25+
'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power',
26+
'Device',
2627
'NvidiaDevice', 'AmdDevice', 'IntelDevice',
2728
# Brand-agnostic
2829
'ANYCPU', 'ANYGPU',
@@ -646,13 +647,25 @@ def get_m1_llvm_path(language):
646647

647648

648649
@memoized_func
649-
def check_cuda_runtime():
650+
def load_cudart():
651+
"""
652+
Load the CUDA runtime library.
653+
"""
650654
libname = ctypes.util.find_library("cudart")
651655
if not libname:
656+
raise RuntimeError("Unable to find CUDA runtime library `libcudart`")
657+
658+
return ctypes.CDLL(libname)
659+
660+
661+
@memoized_func
662+
def check_cuda_runtime():
663+
try:
664+
cuda = load_cudart()
665+
except RuntimeError:
652666
warning("Unable to check compatibility of NVidia driver and runtime")
653667
return
654668

655-
cuda = ctypes.CDLL(libname)
656669
driver_version = ctypes.c_int()
657670
runtime_version = ctypes.c_int()
658671

@@ -1115,11 +1128,10 @@ def max_shm_per_block(self):
11151128
"""
11161129
Get the maximum amount of shared memory per thread block
11171130
"""
1118-
# Load libcudart
1119-
libname = ctypes.util.find_library("cudart")
1120-
if not libname:
1131+
try:
1132+
lib = load_cudart()
1133+
except RuntimeError:
11211134
return 64 * 1024 # 64 KB default
1122-
lib = ctypes.CDLL(libname)
11231135

11241136
cudaDevAttrMaxSharedMemoryPerBlockOptin = 97
11251137
# get current device

devito/operator/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from .operator import Operator # noqa
2-
from .profiling import profiler_registry # noqa
2+
from .profiling import NcuProfiling, profiler_registry # noqa
33
from .registry import operator_registry # noqa

devito/operator/operator.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,7 +1158,12 @@ def lower_perfentry(v):
11581158
perf(f"{indent*2}+ {n} ran in {fround(v1.time):.2f} s "
11591159
f"[{fround(v1.time/v.time*100):.2f}%] {metrics}")
11601160

1161-
# Emit performance mode and arguments
1161+
self._emit_perf_args(args)
1162+
1163+
return summary
1164+
1165+
def _emit_perf_args(self, args):
1166+
"""Emit performance mode and runtime performance arguments."""
11621167
perf_args = {}
11631168
for i in self.input + self.dimensions:
11641169
if not i.is_PerfKnob:
@@ -1176,8 +1181,6 @@ def lower_perfentry(v):
11761181
perf_args = {k: perf_args[k] for k in sorted(perf_args)}
11771182
perf(f"Performance[mode={self._mode}] arguments: {perf_args}")
11781183

1179-
return summary
1180-
11811184
# Pickling support
11821185

11831186
def __getstate__(self):

devito/operator/profiling.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from devito.symbolics import subs_op_args
2020
from devito.tools import DefaultOrderedDict, flatten
2121

22-
__all__ = ['create_profile']
22+
__all__ = ['NcuProfiling', 'create_profile']
2323

2424

2525
SectionData = namedtuple('SectionData', 'ops sops points traffic itermaps')
@@ -28,6 +28,28 @@
2828
PerfEntry = namedtuple('PerfEntry', 'time gflopss gpointss oi ops itershapes')
2929

3030

31+
class NcuProfiling(str):
32+
33+
"""
34+
String-like profiling mode carrying the Operator selected for NCU.
35+
36+
The string value is ``'ncu'`` so profiler construction can use this object
37+
directly as a key into ``profiler_registry``. The selected Operator is kept
38+
in ``operator_name``.
39+
"""
40+
41+
def __new__(cls, operator_name):
42+
if not isinstance(operator_name, str) or not operator_name:
43+
raise ValueError("Expected DEVITO_PROFILING=ncu:op_name")
44+
if ',' in operator_name:
45+
raise ValueError("NCU profiling supports one Operator at a time")
46+
47+
obj = str.__new__(cls, 'ncu')
48+
obj.operator_name = operator_name
49+
50+
return obj
51+
52+
3153
class Profiler:
3254

3355
_default_includes = []
@@ -532,6 +554,7 @@ def create_profile(name):
532554
'advanced': AdvancedProfiler,
533555
'advanced1': AdvancedProfilerVerbose1,
534556
'advanced2': AdvancedProfilerVerbose2,
557+
'ncu': AdvancedProfilerVerbose2,
535558
'advisor': AdvisorProfiler
536559
}
537560
"""Profiling levels."""

devito/parameters.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,14 @@ def init_configuration(configuration=configuration, env_vars_mapper=env_vars_map
199199
# Env variable format: 'var=k1:v1;k2:v2:k3:v3:...'
200200
keys, values = zip(*[i.split(':') for i in items], strict=True)
201201
# Casting
202-
values = [eval(i) for i in values]
202+
processed = []
203+
for i in values:
204+
try:
205+
processed.append(eval(i))
206+
except (NameError, SyntaxError):
207+
# Allow unquoted strings as `k:v` values.
208+
processed.append(i)
209+
values = processed
203210
except AttributeError:
204211
# Env variable format: 'var=v', 'v' is not a string
205212
keys = [v]

0 commit comments

Comments
 (0)