|
| 1 | +import hashlib |
| 2 | + |
| 3 | +import coffee |
| 4 | +import loopy as lp |
| 5 | + |
| 6 | +from . import caching, configuration as conf, datatypes, exceptions as ex, utils, version |
| 7 | + |
| 8 | + |
| 9 | +class Kernel(caching.Cached): |
| 10 | + |
| 11 | + """OP2 kernel type. |
| 12 | +
|
| 13 | + :param code: kernel function definition, including signature; either a |
| 14 | + string or an AST :class:`.Node` |
| 15 | + :param name: kernel function name; must match the name of the kernel |
| 16 | + function given in `code` |
| 17 | + :param opts: options dictionary for :doc:`PyOP2 IR optimisations <ir>` |
| 18 | + (optional, ignored if `code` is a string) |
| 19 | + :param include_dirs: list of additional include directories to be searched |
| 20 | + when compiling the kernel (optional, defaults to empty) |
| 21 | + :param headers: list of system headers to include when compiling the kernel |
| 22 | + in the form ``#include <header.h>`` (optional, defaults to empty) |
| 23 | + :param user_code: code snippet to be executed once at the very start of |
| 24 | + the generated kernel wrapper code (optional, defaults to |
| 25 | + empty) |
| 26 | + :param ldargs: A list of arguments to pass to the linker when |
| 27 | + compiling this Kernel. |
| 28 | + :param requires_zeroed_output_arguments: Does this kernel require the |
| 29 | + output arguments to be zeroed on entry when called? (default no) |
| 30 | + :param cpp: Is the kernel actually C++ rather than C? If yes, |
| 31 | + then compile with the C++ compiler (kernel is wrapped in |
| 32 | + extern C for linkage reasons). |
| 33 | +
|
| 34 | + Consider the case of initialising a :class:`~pyop2.Dat` with seeded random |
| 35 | + values in the interval 0 to 1. The corresponding :class:`~pyop2.Kernel` is |
| 36 | + constructed as follows: :: |
| 37 | +
|
| 38 | + op2.Kernel("void setrand(double *x) { x[0] = (double)random()/RAND_MAX); }", |
| 39 | + name="setrand", |
| 40 | + headers=["#include <stdlib.h>"], user_code="srandom(10001);") |
| 41 | +
|
| 42 | + .. note:: |
| 43 | + When running in parallel with MPI the generated code must be the same |
| 44 | + on all ranks. |
| 45 | + """ |
| 46 | + |
| 47 | + _cache = {} |
| 48 | + |
| 49 | + @classmethod |
| 50 | + @utils.validate_type(('name', str, ex.NameTypeError)) |
| 51 | + def _cache_key(cls, code, name, opts={}, include_dirs=[], headers=[], |
| 52 | + user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, |
| 53 | + flop_count=None): |
| 54 | + # Both code and name are relevant since there might be multiple kernels |
| 55 | + # extracting different functions from the same code |
| 56 | + # Also include the PyOP2 version, since the Kernel class might change |
| 57 | + |
| 58 | + if isinstance(code, coffee.base.Node): |
| 59 | + code = code.gencode() |
| 60 | + if isinstance(code, lp.TranslationUnit): |
| 61 | + from loopy.tools import LoopyKeyBuilder |
| 62 | + from hashlib import sha256 |
| 63 | + key_hash = sha256() |
| 64 | + code.update_persistent_hash(key_hash, LoopyKeyBuilder()) |
| 65 | + code = key_hash.hexdigest() |
| 66 | + hashee = (str(code) + name + str(sorted(opts.items())) + str(include_dirs) |
| 67 | + + str(headers) + version.__version__ + str(ldargs) + str(cpp) + str(requires_zeroed_output_arguments)) |
| 68 | + return hashlib.md5(hashee.encode()).hexdigest() |
| 69 | + |
| 70 | + @utils.cached_property |
| 71 | + def _wrapper_cache_key_(self): |
| 72 | + return (self._key, ) |
| 73 | + |
| 74 | + def __init__(self, code, name, opts={}, include_dirs=[], headers=[], |
| 75 | + user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, |
| 76 | + flop_count=None): |
| 77 | + # Protect against re-initialization when retrieved from cache |
| 78 | + if self._initialized: |
| 79 | + return |
| 80 | + self._name = name |
| 81 | + self._cpp = cpp |
| 82 | + # Record used optimisations |
| 83 | + self._opts = opts |
| 84 | + self._include_dirs = include_dirs |
| 85 | + self._ldargs = ldargs if ldargs is not None else [] |
| 86 | + self._headers = headers |
| 87 | + self._user_code = user_code |
| 88 | + assert isinstance(code, (str, coffee.base.Node, lp.Program, lp.LoopKernel, lp.TranslationUnit)) |
| 89 | + self._code = code |
| 90 | + self._initialized = True |
| 91 | + self.requires_zeroed_output_arguments = requires_zeroed_output_arguments |
| 92 | + self.flop_count = flop_count |
| 93 | + |
| 94 | + @property |
| 95 | + def name(self): |
| 96 | + """Kernel name, must match the kernel function name in the code.""" |
| 97 | + return self._name |
| 98 | + |
| 99 | + @property |
| 100 | + def code(self): |
| 101 | + return self._code |
| 102 | + |
| 103 | + @utils.cached_property |
| 104 | + def num_flops(self): |
| 105 | + if self.flop_count is not None: |
| 106 | + return self.flop_count |
| 107 | + if not conf.configuration["compute_kernel_flops"]: |
| 108 | + return 0 |
| 109 | + if isinstance(self.code, coffee.base.Node): |
| 110 | + v = coffee.visitors.EstimateFlops() |
| 111 | + return v.visit(self.code) |
| 112 | + elif isinstance(self.code, lp.TranslationUnit): |
| 113 | + op_map = lp.get_op_map( |
| 114 | + self.code.copy(options=lp.Options(ignore_boostable_into=True), |
| 115 | + silenced_warnings=['insn_count_subgroups_upper_bound', |
| 116 | + 'get_x_map_guessing_subgroup_size', |
| 117 | + 'summing_if_branches_ops']), |
| 118 | + subgroup_size='guess') |
| 119 | + return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], dtype=[datatypes.ScalarType]).eval_and_sum({}) |
| 120 | + else: |
| 121 | + return 0 |
| 122 | + |
| 123 | + def __str__(self): |
| 124 | + return "OP2 Kernel: %s" % self._name |
| 125 | + |
| 126 | + def __repr__(self): |
| 127 | + return 'Kernel("""%s""", %r)' % (self._code, self._name) |
| 128 | + |
| 129 | + def __eq__(self, other): |
| 130 | + return self.cache_key == other.cache_key |
| 131 | + |
| 132 | + |
| 133 | +class PyKernel(Kernel): |
| 134 | + @classmethod |
| 135 | + def _cache_key(cls, *args, **kwargs): |
| 136 | + return None |
| 137 | + |
| 138 | + def __init__(self, code, name=None, **kwargs): |
| 139 | + self._func = code |
| 140 | + self._name = name |
| 141 | + |
| 142 | + def __getattr__(self, attr): |
| 143 | + """Return None on unrecognised attributes""" |
| 144 | + return None |
| 145 | + |
| 146 | + def __call__(self, *args): |
| 147 | + return self._func(*args) |
| 148 | + |
| 149 | + def __repr__(self): |
| 150 | + return 'Kernel("""%s""", %r)' % (self._func, self._name) |
0 commit comments