Skip to content

Commit a23b68d

Browse files
committed
compiler: Add skeleton support for NCU-based profiling
1 parent c71560b commit a23b68d

5 files changed

Lines changed: 63 additions & 13 deletions

File tree

devito/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from devito.core import * # noqa
4343
from devito.logger import logger_registry, _set_log_level # noqa
4444
from devito.mpi.routines import mpi_registry
45-
from devito.operator import profiler_registry, operator_registry
45+
from devito.operator import NcuProfiling, profiler_registry, operator_registry
4646

4747
# Apply monkey-patching while we wait for our patches to be upstreamed and released
4848
from devito.mpatches import * # noqa
@@ -160,7 +160,15 @@ def autotune_callback(val): # noqa
160160
configuration.add('opt-options', {}, deprecate='dle-options')
161161

162162
# Setup Operator profiling
163-
configuration.add('profiling', 'basic', list(profiler_registry), impacts_jit=False)
163+
def profiling_preprocessor(i):
164+
if isinstance(i, dict):
165+
return NcuProfiling(i['ncu'])
166+
167+
return i
168+
169+
170+
configuration.add('profiling', 'basic', list(profiler_registry),
171+
preprocessor=profiling_preprocessor, impacts_jit=False)
164172

165173
# Initialize `configuration`
166174
init_configuration()

devito/arch/archinfo.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
__all__ = [ # noqa: RUF022
2222
'platform_registry', 'get_cpu_info', 'get_gpu_info', 'get_visible_devices',
2323
'get_nvidia_cc', 'get_cuda_path', 'get_cuda_version', 'get_hip_path',
24-
'check_cuda_runtime', 'get_m1_llvm_path', 'get_advisor_path', 'Platform',
25-
'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power', 'Device',
24+
'check_cuda_runtime', 'load_cudart', 'get_m1_llvm_path', 'get_advisor_path',
25+
'Platform', 'Cpu64', 'Intel64', 'IntelSkylake', 'Amd', 'Arm', 'Power',
26+
'Device',
2627
'NvidiaDevice', 'AmdDevice', 'IntelDevice',
2728
# Brand-agnostic
2829
'ANYCPU', 'ANYGPU',
@@ -646,13 +647,25 @@ def get_m1_llvm_path(language):
646647

647648

648649
@memoized_func
649-
def check_cuda_runtime():
650+
def load_cudart():
651+
"""
652+
Load the CUDA runtime library.
653+
"""
650654
libname = ctypes.util.find_library("cudart")
651655
if not libname:
656+
raise RuntimeError("Unable to find CUDA runtime library `libcudart`")
657+
658+
return ctypes.CDLL(libname)
659+
660+
661+
@memoized_func
662+
def check_cuda_runtime():
663+
try:
664+
cuda = load_cudart()
665+
except RuntimeError:
652666
warning("Unable to check compatibility of NVidia driver and runtime")
653667
return
654668

655-
cuda = ctypes.CDLL(libname)
656669
driver_version = ctypes.c_int()
657670
runtime_version = ctypes.c_int()
658671

@@ -1115,11 +1128,10 @@ def max_shm_per_block(self):
11151128
"""
11161129
Get the maximum amount of shared memory per thread block
11171130
"""
1118-
# Load libcudart
1119-
libname = ctypes.util.find_library("cudart")
1120-
if not libname:
1131+
try:
1132+
lib = load_cudart()
1133+
except RuntimeError:
11211134
return 64 * 1024 # 64 KB default
1122-
lib = ctypes.CDLL(libname)
11231135

11241136
cudaDevAttrMaxSharedMemoryPerBlockOptin = 97
11251137
# get current device

devito/operator/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from .operator import Operator # noqa
2-
from .profiling import profiler_registry # noqa
2+
from .profiling import NcuProfiling, profiler_registry # noqa
33
from .registry import operator_registry # noqa

devito/operator/profiling.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from devito.symbolics import subs_op_args
2020
from devito.tools import DefaultOrderedDict, flatten
2121

22-
__all__ = ['create_profile']
22+
__all__ = ['create_profile', 'NcuProfiling']
2323

2424

2525
SectionData = namedtuple('SectionData', 'ops sops points traffic itermaps')
@@ -28,6 +28,28 @@
2828
PerfEntry = namedtuple('PerfEntry', 'time gflopss gpointss oi ops itershapes')
2929

3030

31+
class NcuProfiling(str):
32+
33+
"""
34+
String-like profiling mode carrying the Operator selected for NCU.
35+
36+
The string value is ``'ncu'`` so profiler construction can use this object
37+
directly as a key into ``profiler_registry``. The selected Operator is kept
38+
in ``operator_name``.
39+
"""
40+
41+
def __new__(cls, operator_name):
42+
if not isinstance(operator_name, str) or not operator_name:
43+
raise ValueError("Expected DEVITO_PROFILING=ncu:op_name")
44+
if ',' in operator_name:
45+
raise ValueError("NCU profiling supports one Operator at a time")
46+
47+
obj = str.__new__(cls, 'ncu')
48+
obj.operator_name = operator_name
49+
50+
return obj
51+
52+
3153
class Profiler:
3254

3355
_default_includes = []
@@ -532,6 +554,7 @@ def create_profile(name):
532554
'advanced': AdvancedProfiler,
533555
'advanced1': AdvancedProfilerVerbose1,
534556
'advanced2': AdvancedProfilerVerbose2,
557+
'ncu': AdvancedProfilerVerbose2,
535558
'advisor': AdvisorProfiler
536559
}
537560
"""Profiling levels."""

devito/parameters.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,14 @@ def init_configuration(configuration=configuration, env_vars_mapper=env_vars_map
199199
# Env variable format: 'var=k1:v1;k2:v2:k3:v3:...'
200200
keys, values = zip(*[i.split(':') for i in items], strict=True)
201201
# Casting
202-
values = [eval(i) for i in values]
202+
processed = []
203+
for i in values:
204+
try:
205+
processed.append(eval(i))
206+
except (NameError, SyntaxError):
207+
# Allow unquoted strings as `k:v` values.
208+
processed.append(i)
209+
values = processed
203210
except AttributeError:
204211
# Env variable format: 'var=v', 'v' is not a string
205212
keys = [v]

0 commit comments

Comments
 (0)