Skip to content

Commit 6c2adc2

Browse files
Merge pull request #359 from KernelTuner/programmatic_imports
Programmatic imports
2 parents fafe0ff + a67c8f4 commit 6c2adc2

10 files changed

Lines changed: 207 additions & 104 deletions

File tree

kernel_tuner/__init__.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
1-
from kernel_tuner.integration import store_results, create_device_targets
2-
from kernel_tuner.interface import tune_kernel, tune_kernel_T1, run_kernel
3-
41
from importlib.metadata import version
52

3+
from kernel_tuner.interface import run_kernel, tune_kernel, tune_kernel_T1
4+
65
__version__ = version(__package__)
6+
7+
__all__ = [
8+
"create_device_targets",
9+
"run_kernel",
10+
"store_results",
11+
"tune_kernel",
12+
"tune_kernel_T1",
13+
"__version__",
14+
]
15+
16+
17+
def __getattr__(name):
18+
if name in ("store_results", "create_device_targets"):
19+
from kernel_tuner import integration
20+
return getattr(integration, name)
21+
raise AttributeError(f"module 'kernel_tuner' has no attribute {name!r}")

kernel_tuner/core.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,15 @@
77

88
import numpy as np
99

10-
try:
11-
import cupy as cp
12-
except ImportError:
13-
cp = np
10+
def _get_cupy():
11+
try:
12+
import cupy as _cp
13+
except ImportError:
14+
return None
15+
return _cp
1416

1517
import kernel_tuner.util as util
1618
from kernel_tuner.accuracy import Tunable
17-
from kernel_tuner.backends.compiler import CompilerFunctions
18-
from kernel_tuner.backends.cupy import CupyFunctions
19-
from kernel_tuner.backends.hip import HipFunctions
20-
from kernel_tuner.backends.hypertuner import HypertunerFunctions
21-
from kernel_tuner.backends.nvcuda import CudaFunctions
22-
from kernel_tuner.backends.opencl import OpenCLFunctions
23-
from kernel_tuner.backends.pycuda import PyCudaFunctions
24-
from kernel_tuner.observers.nvml import NVMLObserver
2519
from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver, PrologueObserver
2620
from kernel_tuner.observers.tegra import TegraObserver
2721

@@ -35,6 +29,7 @@
3529
except ImportError:
3630
DeviceArray = Exception # using Exception here as a type that will never be among kernel arguments
3731

32+
3833
_KernelInstance = namedtuple(
3934
"_KernelInstance",
4035
[
@@ -272,27 +267,31 @@ def __init__(
272267
logging.debug("DeviceInterface instantiated, lang=%s", lang)
273268

274269
if lang.upper() == "CUDA":
270+
from kernel_tuner.backends.pycuda import PyCudaFunctions
275271
dev = PyCudaFunctions(
276272
device,
277273
compiler_options=compiler_options,
278274
iterations=iterations,
279275
observers=observers,
280276
)
281277
elif lang.upper() == "CUPY":
278+
from kernel_tuner.backends.cupy import CupyFunctions
282279
dev = CupyFunctions(
283280
device,
284281
compiler_options=compiler_options,
285282
iterations=iterations,
286283
observers=observers,
287284
)
288285
elif lang.upper() == "NVCUDA":
286+
from kernel_tuner.backends.nvcuda import CudaFunctions
289287
dev = CudaFunctions(
290288
device,
291289
compiler_options=compiler_options,
292290
iterations=iterations,
293291
observers=observers,
294292
)
295293
elif lang.upper() == "OPENCL":
294+
from kernel_tuner.backends.opencl import OpenCLFunctions
296295
dev = OpenCLFunctions(
297296
device,
298297
platform,
@@ -301,20 +300,23 @@ def __init__(
301300
observers=observers,
302301
)
303302
elif lang.upper() in ["C", "FORTRAN"]:
303+
from kernel_tuner.backends.compiler import CompilerFunctions
304304
dev = CompilerFunctions(
305305
compiler=compiler,
306306
compiler_options=compiler_options,
307307
iterations=iterations,
308308
observers=observers,
309309
)
310310
elif lang.upper() == "HIP":
311+
from kernel_tuner.backends.hip import HipFunctions
311312
dev = HipFunctions(
312313
device,
313314
compiler_options=compiler_options,
314315
iterations=iterations,
315316
observers=observers,
316317
)
317318
elif lang.upper() == "HYPERTUNER":
319+
from kernel_tuner.backends.hypertuner import HypertunerFunctions
318320
dev = HypertunerFunctions(
319321
iterations=iterations,
320322
compiler_options=compiler_options
@@ -333,8 +335,12 @@ def __init__(
333335
self.output_observers = []
334336
self.prologue_observers = []
335337
if observers:
338+
try:
339+
from kernel_tuner.observers.nvml import NVMLObserver as _NVMLObserver
340+
except ImportError:
341+
_NVMLObserver = None
336342
for obs in observers:
337-
if isinstance(obs, NVMLObserver):
343+
if _NVMLObserver is not None and isinstance(obs, _NVMLObserver):
338344
self.nvml = obs.nvml
339345
self.use_nvml = True
340346
if isinstance(obs, TegraObserver):
@@ -500,7 +506,12 @@ def check_kernel_output(
500506

501507
should_sync = [answer[i] is not None for i, arg in enumerate(instance.arguments)]
502508
else:
503-
should_sync = [isinstance(arg, (np.ndarray, cp.ndarray, torch.Tensor, DeviceArray)) for arg in instance.arguments]
509+
cp = _get_cupy()
510+
cupy_ndarray = (cp.ndarray,) if cp is not None else ()
511+
should_sync = [
512+
isinstance(arg, (np.ndarray, torch.Tensor, DeviceArray) + cupy_ndarray)
513+
for arg in instance.arguments
514+
]
504515

505516
# re-copy original contents of output arguments to GPU memory, to overwrite any changes
506517
# by earlier kernel runs
@@ -516,7 +527,9 @@ def check_kernel_output(
516527
result_host = []
517528
for i, arg in enumerate(instance.arguments):
518529
if should_sync[i]:
519-
if isinstance(arg, (np.ndarray, cp.ndarray)):
530+
cp = _get_cupy()
531+
cupy_ndarray = (cp.ndarray,) if cp is not None else ()
532+
if isinstance(arg, (np.ndarray,) + cupy_ndarray):
520533
result_host.append(np.zeros_like(arg))
521534
self.dev.memcpy_dtoh(result_host[-1], gpu_args[i])
522535
elif isinstance(arg, torch.Tensor) and isinstance(answer[i], torch.Tensor):
@@ -790,8 +803,10 @@ def _default_verify_function(instance, answer, result_host, atol, verbose):
790803
# for each element in the argument list, check if the types match
791804
for i, arg in enumerate(instance.arguments):
792805
if answer[i] is not None: # skip None elements in the answer list
793-
if isinstance(answer[i], (np.ndarray, cp.ndarray)) and isinstance(
794-
arg, (np.ndarray, cp.ndarray)
806+
cp = _get_cupy()
807+
cupy_ndarray = (cp.ndarray,) if cp is not None else ()
808+
if isinstance(answer[i], (np.ndarray,) + cupy_ndarray) and isinstance(
809+
arg, (np.ndarray,) + cupy_ndarray
795810
):
796811
if not np.can_cast(arg.dtype, answer[i].dtype):
797812
raise TypeError(
@@ -840,7 +855,9 @@ def _default_verify_function(instance, answer, result_host, atol, verbose):
840855
)
841856
else:
842857
# either answer[i] and argument have different types or answer[i] is not a numpy type
843-
if not isinstance(answer[i], (np.ndarray, cp.ndarray, torch.Tensor)) or not isinstance(
858+
cp = _get_cupy()
859+
cupy_ndarray = (cp.ndarray,) if cp is not None else ()
860+
if not isinstance(answer[i], (np.ndarray, torch.Tensor) + cupy_ndarray) or not isinstance(
844861
answer[i], np.number
845862
):
846863
raise TypeError(
@@ -865,7 +882,8 @@ def _flatten(a):
865882
if expected is not None:
866883
result = _ravel(result_host[i])
867884
expected = _flatten(expected)
868-
if any([isinstance(array, cp.ndarray) for array in [expected, result]]):
885+
cp = _get_cupy()
886+
if cp is not None and any([isinstance(array, cp.ndarray) for array in [expected, result]]):
869887
output_test = cp.allclose(expected, result, atol=atol)
870888
elif isinstance(expected, torch.Tensor) and isinstance(result, torch.Tensor):
871889
output_test = torch.allclose(expected, result, atol=atol)

kernel_tuner/integration.py

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,7 @@
44

55
from jsonschema import validate
66

7-
from kernel_tuner.util import get_instance_string, looks_like_a_filename, read_file
8-
9-
#specifies for a number of pre-defined objectives whether
10-
#the objective should be minimized or maximized (boolean value denotes higher is better)
11-
objective_default_map = {
12-
"time": False,
13-
"energy": False,
14-
"fitness": True,
15-
"cost": False,
16-
"loss": False,
17-
"GFLOP/s": True,
18-
"TFLOP/s": True,
19-
"GB/s": True,
20-
"TB/s": True,
21-
"GFLOPS/W": True,
22-
"TFLOPS/W": True,
23-
"GFLOP/J": True,
24-
"TFLOP/J": True
25-
}
26-
27-
def get_objective_defaults(objective, objective_higher_is_better):
28-
""" Uses time as default objective and attempts to lookup objective_higher_is_better for known objectives """
29-
objective = objective or "time"
30-
if objective_higher_is_better is None:
31-
if objective in objective_default_map:
32-
objective_higher_is_better = objective_default_map[objective]
33-
else:
34-
raise ValueError(f"Please specify objective_higher_is_better for objective {objective}")
35-
return objective, objective_higher_is_better
7+
from kernel_tuner.util import get_instance_string, looks_like_a_filename, read_file, get_objective_defaults
368

379
schema_v1_0 = {
3810
"$schema": "https://json-schema.org/draft-07/schema#",

kernel_tuner/interface.py

Lines changed: 55 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"""
2626

2727
import logging
28+
import importlib
2829
from argparse import ArgumentParser
2930
from ast import literal_eval
3031
from datetime import datetime
@@ -38,7 +39,7 @@
3839
import kernel_tuner.core as core
3940
import kernel_tuner.util as util
4041
from kernel_tuner.file_utils import get_input_file, get_t4_metadata, get_t4_results, import_class_from_file
41-
from kernel_tuner.integration import get_objective_defaults
42+
from kernel_tuner.util import get_objective_defaults
4243
from kernel_tuner.runners.sequential import SequentialRunner
4344
from kernel_tuner.runners.simulation import SimulationRunner
4445
from kernel_tuner.searchspace import Searchspace
@@ -48,52 +49,63 @@
4849
except ImportError:
4950
torch = util.TorchPlaceHolder()
5051

51-
from kernel_tuner.strategies import (
52-
basinhopping,
53-
bayes_opt,
54-
brute_force,
55-
diff_evo,
56-
dual_annealing,
57-
firefly_algorithm,
58-
genetic_algorithm,
59-
greedy_ils,
60-
greedy_mls,
61-
minimize,
62-
mls,
63-
ordered_greedy_mls,
64-
pso,
65-
pyatf_strategies,
66-
random_sample,
67-
simulated_annealing,
68-
skopt,
69-
gen_hybrid_vndx,
70-
gen_adaptive_tabu_greywolf,
71-
)
7252
from kernel_tuner.strategies.wrapper import OptAlgWrapper
7353

74-
strategy_map = {
75-
"brute_force": brute_force,
76-
"random_sample": random_sample,
77-
"minimize": minimize,
78-
"basinhopping": basinhopping,
79-
"diff_evo": diff_evo,
80-
"genetic_algorithm": genetic_algorithm,
81-
"greedy_mls": greedy_mls,
82-
"ordered_greedy_mls": ordered_greedy_mls,
83-
"greedy_ils": greedy_ils,
84-
"dual_annealing": dual_annealing,
85-
"mls": mls,
86-
"pso": pso,
87-
"simulated_annealing": simulated_annealing,
88-
"skopt": skopt,
89-
"firefly_algorithm": firefly_algorithm,
90-
"bayes_opt": bayes_opt,
91-
"pyatf_strategies": pyatf_strategies,
92-
"hybrid_vndx": gen_hybrid_vndx,
93-
"adaptive_tabu_greywolf": gen_adaptive_tabu_greywolf,
54+
_STRATEGY_IMPORTS = {
55+
"brute_force": "kernel_tuner.strategies.brute_force",
56+
"random_sample": "kernel_tuner.strategies.random_sample",
57+
"minimize": "kernel_tuner.strategies.minimize",
58+
"basinhopping": "kernel_tuner.strategies.basinhopping",
59+
"diff_evo": "kernel_tuner.strategies.diff_evo",
60+
"genetic_algorithm": "kernel_tuner.strategies.genetic_algorithm",
61+
"greedy_mls": "kernel_tuner.strategies.greedy_mls",
62+
"ordered_greedy_mls": "kernel_tuner.strategies.ordered_greedy_mls",
63+
"greedy_ils": "kernel_tuner.strategies.greedy_ils",
64+
"dual_annealing": "kernel_tuner.strategies.dual_annealing",
65+
"mls": "kernel_tuner.strategies.mls",
66+
"pso": "kernel_tuner.strategies.pso",
67+
"simulated_annealing": "kernel_tuner.strategies.simulated_annealing",
68+
"skopt": "kernel_tuner.strategies.skopt",
69+
"firefly_algorithm": "kernel_tuner.strategies.firefly_algorithm",
70+
"bayes_opt": "kernel_tuner.strategies.bayes_opt",
71+
"pyatf_strategies": "kernel_tuner.strategies.pyatf_strategies",
72+
"hybrid_vndx": "kernel_tuner.strategies.gen_hybrid_vndx",
73+
"adaptive_tabu_greywolf": "kernel_tuner.strategies.gen_adaptive_tabu_greywolf",
9474
}
9575

9676

77+
def _strategy_import_error(strategy_name, module_path, err):
78+
base_msg = (
79+
f"Failed to import strategy '{strategy_name}' from '{module_path}'. "
80+
"This strategy may require optional dependencies that are not installed."
81+
)
82+
return ImportError(f"{base_msg} Original error: {err}")
83+
84+
85+
class _LazyStrategyModule:
86+
def __init__(self, name, module_path):
87+
self._name = name
88+
self._module_path = module_path
89+
self._module = None
90+
91+
def _load(self):
92+
if self._module is None:
93+
try:
94+
self._module = importlib.import_module(self._module_path)
95+
except ImportError as err:
96+
raise _strategy_import_error(self._name, self._module_path, err)
97+
return self._module
98+
99+
def __getattr__(self, attr):
100+
return getattr(self._load(), attr)
101+
102+
def __repr__(self):
103+
return f"<lazy strategy module '{self._name}'>"
104+
105+
106+
strategy_map = {name: _LazyStrategyModule(name, path) for name, path in _STRATEGY_IMPORTS.items()}
107+
108+
97109
class Options(dict):
98110
"""read-only class for passing options around."""
99111

@@ -657,7 +669,7 @@ def tune_kernel(
657669
tuning_options.strategy_options = Options(strategy_options or {})
658670
# if no strategy selected
659671
else:
660-
strategy = brute_force
672+
strategy = strategy_map["brute_force"]
661673

662674
# select the runner for this job based on input
663675
selected_runner = SimulationRunner if simulation_mode else SequentialRunner

kernel_tuner/searchspace.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from inspect import signature
1111

1212
import numpy as np
13-
from scipy.stats.qmc import LatinHypercube
1413
from constraint import (
1514
BacktrackingSolver,
1615
Constraint,
@@ -1333,6 +1332,7 @@ def get_distributed_random_sample(self, num_samples: int, sampling_factor=10) ->
13331332

13341333
def get_LHS_sample_indices(self, num_samples: int) -> List[int]:
13351334
"""Get a Latin Hypercube sample of parameter configuration indices."""
1335+
from scipy.stats.qmc import LatinHypercube
13361336
if num_samples > self.size:
13371337
warn(
13381338
f"Too many samples requested ({num_samples}), reducing the number of samples to half of the searchspace size ({self.size})"

kernel_tuner/strategies/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from time import perf_counter
66

77
import numpy as np
8-
from scipy.spatial import distance
98

109
from kernel_tuner import util
1110
from kernel_tuner.searchspace import Searchspace
@@ -325,6 +324,7 @@ def unscale_and_snap_to_nearest_valid(x, params, searchspace, eps):
325324

326325
if neighbors:
327326
# sort on distance to x
327+
from scipy.spatial import distance
328328
neighbors.sort(key=lambda y: distance.euclidean(x,scale_from_params(y, searchspace.tune_params, eps)))
329329

330330
# return closest valid neighbor

0 commit comments

Comments
 (0)