Skip to content
1 change: 1 addition & 0 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def _normalize_kwargs(cls, **kwargs):
o['cire-maxpar'] = oo.pop('cire-maxpar', False)
o['cire-ftemps'] = oo.pop('cire-ftemps', False)
o['cire-mingain'] = oo.pop('cire-mingain', cls.CIRE_MINGAIN)
o['cire-minmem'] = oo.pop('cire-minmem', cls.CIRE_MINMEM)
o['cire-schedule'] = oo.pop('cire-schedule', cls.CIRE_SCHEDULE)

# Shared-memory parallelism
Expand Down
1 change: 1 addition & 0 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def _normalize_kwargs(cls, **kwargs):
o['cire-maxpar'] = oo.pop('cire-maxpar', True)
o['cire-ftemps'] = oo.pop('cire-ftemps', False)
o['cire-mingain'] = oo.pop('cire-mingain', cls.CIRE_MINGAIN)
o['cire-minmem'] = oo.pop('cire-minmem', cls.CIRE_MINMEM)
o['cire-schedule'] = oo.pop('cire-schedule', cls.CIRE_SCHEDULE)

# GPU parallelism
Expand Down
8 changes: 8 additions & 0 deletions devito/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ class BasicOperator(Operator):
intensity of the generated kernel.
"""

CIRE_MINMEM = True
"""
Minimize memory consumption when allocating temporaries for CIRE-optimized
expressions. This may come at the cost of slighly worse performance due to
the potential need for extra registers to hold a greater number of support
variables (e.g., strides).
"""

SCALAR_MIN_TYPE = np.float16
"""
Minimum datatype for a scalar arising from a common sub-expression or CIRE temp.
Expand Down
11 changes: 10 additions & 1 deletion devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,16 @@ class ClusterGroup(tuple):

def __new__(cls, clusters, ispace=None):
obj = super().__new__(cls, flatten(as_tuple(clusters)))
obj._ispace = ispace

if ispace is not None:
obj._ispace = ispace
else:
# Best effort attempt to infer a common IterationSpace
try:
obj._ispace, = {c.ispace for c in obj}
except ValueError:
obj._ispace = None

return obj

@classmethod
Expand Down
106 changes: 71 additions & 35 deletions devito/passes/clusters/aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,20 @@

from devito.exceptions import CompilationError
from devito.finite_differences import EvalDerivative, IndexDerivative, Weights
from devito.ir import (SEQUENTIAL, PARALLEL_IF_PVT, SEPARABLE, Forward,
IterationSpace, Interval, Cluster, ExprGeometry, Queue,
IntervalGroup, LabeledVector, Vector, normalize_properties,
relax_properties, unbounded, minimum, maximum, extrema,
vmax, vmin)
from devito.ir import (
SEQUENTIAL, PARALLEL_IF_PVT, SEPARABLE, Forward, IterationSpace, Interval,
Cluster, ClusterGroup, ExprGeometry, Queue, IntervalGroup, LabeledVector,
Vector, normalize_properties, relax_properties, unbounded, minimum, maximum,
extrema, vmax, vmin
)
from devito.passes.clusters.cse import _cse
from devito.symbolics import (Uxmapper, estimate_cost, search, reuse_if_untouched,
uxreplace, sympy_dtype)
retrieve_functions, uxreplace, sympy_dtype)
from devito.tools import (Stamp, as_mapper, as_tuple, flatten, frozendict,
is_integer, generator, split, timed_pass)
from devito.types import (Eq, Symbol, Temp, TempArray, TempFunction,
ModuloDimension, CustomDimension, IncrDimension,
StencilDimension, Indexed, Hyperplane)
StencilDimension, Indexed, Hyperplane, Size)
from devito.types.grid import MultiSubDimension

__all__ = ['cire']
Expand Down Expand Up @@ -113,23 +114,22 @@ def __init__(self, sregistry, options, platform):
self.opt_rotate = options['cire-rotate']
self.opt_ftemps = options['cire-ftemps']
self.opt_mingain = options['cire-mingain']
self.opt_minmem = options['cire-minmem']
self.opt_min_dtype = options['scalar-min-type']
self.opt_multisubdomain = True

def _aliases_from_clusters(self, clusters, exclude, meta):
exprs = flatten([c.exprs for c in clusters])

def _aliases_from_clusters(self, cgroup, exclude, meta):
# [Clusters]_n -> [Schedule]_m
variants = []
for mapper in self._generate(exprs, exclude):
for mapper in self._generate(cgroup, exclude):
# Clusters -> AliasList
found = collect(mapper.extracted, meta.ispace, self.opt_minstorage)
pexprs, aliases = choose(found, exprs, mapper, self.opt_mingain)
exprs, aliases = choose(found, cgroup, mapper, self.opt_mingain)

# AliasList -> Schedule
schedule = lower_aliases(aliases, meta, self.opt_maxpar)

variants.append(Variant(schedule, pexprs))
variants.append(Variant(schedule, exprs))

if not variants:
return []
Expand All @@ -143,14 +143,15 @@ def _aliases_from_clusters(self, clusters, exclude, meta):

# Schedule -> [Clusters]_k
processed, subs = lower_schedule(schedule, meta, self.sregistry,
self.opt_ftemps, self.opt_min_dtype)
self.opt_ftemps, self.opt_min_dtype,
self.opt_minmem)

# [Clusters]_k -> [Clusters]_k (optimization)
if self.opt_multisubdomain:
processed = optimize_clusters_msds(processed)

# [Clusters]_k -> [Clusters]_{k+n}
for c in clusters:
for c in cgroup:
n = len(c.exprs)
cexprs, exprs = exprs[:n], exprs[n:]

Expand All @@ -168,9 +169,9 @@ def _aliases_from_clusters(self, clusters, exclude, meta):
def process(self, clusters):
raise NotImplementedError

def _generate(self, exprs, exclude):
def _generate(self, cgroup, exclude):
"""
Generate one or more extractions from ``exprs``. An extraction is a
Generate one or more extractions from a ClusterGroup. An extraction is a
set of CIRE candidates which may be turned into aliases. Two different
extractions may contain overlapping sub-expressions and, therefore,
should be processed and evaluated indipendently. An extraction won't
Expand All @@ -189,8 +190,8 @@ def _lookup_key(self, c):

def _select(self, variants):
"""
Select the best variant out of a set of variants, weighing flops
and working set.
Select the best variant out of a set of `variants`, weighing flops and
working set.
"""
raise NotImplementedError

Expand Down Expand Up @@ -258,7 +259,7 @@ def callback(self, clusters, prefix, xtracted=None):
if not g:
continue

made = self._aliases_from_clusters(g, exclude, ak)
made = self._aliases_from_clusters(ClusterGroup(g), exclude, ak)

if made:
idx = processed.index(g[0])
Expand All @@ -283,7 +284,9 @@ def _select(self, variants):

class CireInvariantsElementary(CireInvariants):

def _generate(self, exprs, exclude):
def _generate(self, cgroup, exclude):
exprs = cgroup.exprs

# E.g., extract `sin(x)` and `sqrt(x)` from `a*sin(x)*sqrt(x)`
rule = lambda e: e.is_Function or (e.is_Pow and e.exp.is_Number and 0 < e.exp < 1)
cbk_search = lambda e: search(e, rule, 'all', 'bfs_first_hit')
Expand All @@ -306,7 +309,9 @@ def cbk_search(expr):

class CireInvariantsDivs(CireInvariants):

def _generate(self, exprs, exclude):
def _generate(self, cgroup, exclude):
exprs = cgroup.exprs

# E.g., extract `1/h_x`
rule = lambda e: e.is_Pow and (not e.exp.is_Number or e.exp < 0)
cbk_search = lambda e: search(e, rule, 'all', 'bfs_first_hit')
Expand Down Expand Up @@ -337,13 +342,17 @@ def process(self, clusters):
# TODO: to process third- and higher-order derivatives, we could
# extend this by calling `_aliases_from_clusters` repeatedly until
# `made` is empty. To be investigated
made = self._aliases_from_clusters([c], exclude, self._lookup_key(c))
made = self._aliases_from_clusters(
ClusterGroup(c), exclude, self._lookup_key(c)
)

processed.extend(flatten(made) or [c])

return processed

def _generate(self, exprs, exclude):
def _generate(self, cgroup, exclude):
exprs = cgroup.exprs

# E.g., extract `u.dx*a*b` and `u.dx*a*c` from
# `[(u.dx*a*b).dy`, `(u.dx*a*c).dy]`
basextr = self._do_generate(exprs, exclude, self._cbk_search,
Expand Down Expand Up @@ -598,14 +607,15 @@ def collect(extracted, ispace, minstorage):
return aliases


def choose(aliases, exprs, mapper, mingain):
def choose(aliases, cgroup, mapper, mingain):
"""
Analyze the detected aliases and, after applying a cost model to rule out
the aliases with a bad memory/flops trade-off, inject them into the original
expressions.
"""
aliases = AliasList(aliases)
exprs = cgroup.exprs

aliases = AliasList(aliases)
if not aliases:
return exprs, aliases

Expand Down Expand Up @@ -831,11 +841,12 @@ def optimize_schedule_rotations(schedule, sregistry):
return schedule.rebuild(*processed, rmapper=rmapper)


def lower_schedule(schedule, meta, sregistry, ftemps, min_dtype):
def lower_schedule(schedule, meta, sregistry, opt_ftemps, opt_min_dtype,
opt_minmem):
"""
Turn a Schedule into a sequence of Clusters.
"""
if ftemps:
if opt_ftemps:
make = TempFunction
else:
# Typical case -- the user does *not* "see" the CIRE-created temporaries
Expand Down Expand Up @@ -865,31 +876,51 @@ def lower_schedule(schedule, meta, sregistry, ftemps, min_dtype):
dimensions = [d.parent if d.is_AbstractSub else d
for d in writeto.itdims]

# The halo must be set according to the size of `writeto`
halo = [(abs(i.lower), abs(i.upper)) for i in writeto]
# The minimum halo required along each Dimension depends on `writeto`.
# The user might suggest to go more relaxed about this via `opt_minmem`,
# in which case we extend the halo based on the surrounding
# Functions to minimize support variables such as strides etc
min_halo = {i.dim: Size(abs(i.lower), abs(i.upper)) for i in writeto}

if opt_minmem:
functions = []
else:
functions = retrieve_functions(pivot)

halo = dict(min_halo)
for f in functions:
for d, h0 in list(halo.items()):
try:
h1 = f._size_halo[d]
except KeyError:
continue
halo[d] = Size(max(h0.left, h1.left), max(h0.right, h1.right))

shift = [halo[d].left - min_halo[d].left for d in writeto.itdims]
halo = tuple(halo.values())

# The indices used to write into the Array
indices = []
for i in writeto:
for i, s in zip(writeto, shift):
try:
# E.g., `xs`
sub_iterators = writeto.sub_iterators[i.dim]
assert len(sub_iterators) <= 1
indices.append(sub_iterators[0])
indices.append(sub_iterators[0] + s)
except (KeyError, IndexError):
# E.g., `z` -- a non-shifted Dimension
indices.append(i.dim - i.lower)
indices.append(i.dim - i.lower + s)

dtype = sympy_dtype(pivot, base=meta.dtype)
obj = make(name=name, dimensions=dimensions, halo=halo, dtype=dtype)
expression = Eq(obj[indices], uxreplace(pivot, subs))

callback = lambda idx: obj[idx]
callback = lambda idx: obj[[i + s for i, s in zip(idx, shift)]]
else:
# Degenerate case: scalar expression
assert writeto.size == 0

dtype = sympy_dtype(pivot, base=meta.dtype, smin=min_dtype)
dtype = sympy_dtype(pivot, base=meta.dtype, smin=opt_min_dtype)
obj = Temp(name=name, dtype=dtype)
expression = Eq(obj, uxreplace(pivot, subs))

Expand Down Expand Up @@ -980,6 +1011,11 @@ def pick_best(variants):
indexeds1 = search(i.exprs, Indexed)
functions1.update({i.function for i in indexeds1})

# Filter out objects that are extremely likely to be in cache if not
# in registers
functions0 = {f for f in functions0 if f.ndim >= 2}
functions1 = {f for f in functions1 if f.ndim >= 2}

nfunctions0 = len(functions0)
nfunctions1 = len(functions1)

Expand Down
2 changes: 1 addition & 1 deletion devito/passes/iet/linearization.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def key1(f, d):

* False if not statically linearizable, that is not linearizable via
constant symbolic sizes and strides;
* A 3-tuple `(Dimension, halo size, grid)` otherwise.
* A 3-tuple `(Dimension, halo size, pad dtype)` otherwise.
"""
if f.is_regular:
# For paddable objects the following holds:
Expand Down
7 changes: 1 addition & 6 deletions devito/types/basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import abc
import inspect
from collections import namedtuple
from ctypes import POINTER, _Pointer, c_char_p, c_char, Structure
from functools import reduce, cached_property
from operator import mul
Expand All @@ -18,16 +17,12 @@
from devito.types.args import ArgProvider
from devito.types.caching import Cached, Uncached
from devito.types.lazy import Evaluable
from devito.types.utils import DimensionTuple
from devito.types.utils import DimensionTuple, Offset, Size

__all__ = ['Symbol', 'Scalar', 'Indexed', 'IndexedData', 'DeviceMap',
'IrregularFunctionInterface']


Size = namedtuple('Size', 'left right')
Offset = namedtuple('Offset', 'left right')


class CodeSymbol:

"""
Expand Down
6 changes: 4 additions & 2 deletions devito/types/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
from devito.types.dimension import Dimension
from devito.types.args import ArgProvider
from devito.types.caching import CacheManager
from devito.types.basic import AbstractFunction, Size
from devito.types.utils import Buffer, DimensionTuple, NODE, CELL, host_layer, Staggering
from devito.types.basic import AbstractFunction
from devito.types.utils import (
Buffer, DimensionTuple, NODE, CELL, Size, Staggering, host_layer
)

__all__ = ['Function', 'TimeFunction', 'SubFunction', 'TempFunction']

Expand Down
11 changes: 8 additions & 3 deletions devito/types/utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from collections import namedtuple
from ctypes import POINTER, Structure
from functools import cached_property

from devito.tools import EnrichedTuple, Tag
# Additional Function-related APIs

__all__ = ['Buffer', 'DimensionTuple', 'NODE', 'CELL', 'IgnoreDimSort',
'HierarchyLayer', 'HostLayer', 'DeviceLayer', 'DiskLayer',
'host_layer', 'device_layer', 'disk_layer']
__all__ = ['Buffer', 'DimensionTuple', 'NODE', 'CELL', 'Size', 'Offset',
'IgnoreDimSort', 'HierarchyLayer', 'HostLayer', 'DeviceLayer',
'DiskLayer', 'host_layer', 'device_layer', 'disk_layer']


class Buffer(Tag):
Expand All @@ -23,6 +24,10 @@ class Stagger(Tag):
CELL = Stagger('cell')


Size = namedtuple('Size', 'left right')
Offset = namedtuple('Offset', 'left right')


class DimensionTuple(EnrichedTuple):

def __getitem_hook__(self, dim):
Expand Down
Loading
Loading