Skip to content

Commit 9bd9877

Browse files
committed
compiler: refine nested parallel support with decl check
1 parent ca89c6f commit 9bd9877

6 files changed

Lines changed: 32 additions & 31 deletions

File tree

conftest.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def skipif(items, whole_module=False):
3838
accepted.update({'device', 'device-C', 'device-openmp', 'device-openacc',
3939
'device-aomp', 'cpu64-icc', 'cpu64-icx', 'cpu64-nvc',
4040
'noadvisor', 'cpu64-arm', 'cpu64-icpx', 'chkpnt'})
41-
accepted.update({'nodevice', 'noomp', 'nointel'})
41+
accepted.update({'nodevice', 'noomp'})
4242
unknown = sorted(set(items) - accepted)
4343
if unknown:
4444
raise ValueError(f"Illegal skipif argument(s) `{unknown}`")
@@ -93,11 +93,6 @@ def skipif(items, whole_module=False):
9393
if i == 'noomp' and 'openmp' not in configuration['language']:
9494
skipit = "Must use openmp"
9595
break
96-
# Skip if not using an Intel compiler
97-
if i == 'nointel' and \
98-
not isinstance(configuration['compiler'], (IntelCompiler, OneapiCompiler)):
99-
skipit = "Must use an Intel compiler"
100-
break
10196
# Skip if it won't run on Arm
10297
if i == 'cpu64-arm' and isinstance(configuration['platform'], Arm):
10398
skipit = "Arm doesn't support x86-specific instructions"

devito/arch/compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,7 @@ def __lookup_cmds__(self):
894894
check_output(["mpiicc", f"-cc={self.CC}", "--version"]).decode("utf-8")
895895
self.MPICC = 'mpiicc'
896896
self.MPICXX = 'mpicxx'
897-
except FileNotFoundError:
897+
except (FileNotFoundError, CalledProcessError):
898898
self.MPICC = 'mpicc'
899899
self.MPICXX = 'mpicxx'
900900

devito/passes/iet/languages/openmp.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,10 @@ def _support_nested_parallelism(cls, compiler):
283283
# In case we have a CustomCompiler
284284
if isinstance(compiler, CustomCompiler):
285285
compiler = compiler._base()
286-
if isinstance(compiler, (IntelCompiler, OneapiCompiler)): # noqa: SIM103
287-
return True
288-
else:
289-
return False
286+
# Only supported by icc (IntelCompiler) but not by
287+
# OneAPI's DPC++ compiler (OneapiCompiler) that inherits from IntelCompiler
288+
return isinstance(compiler, IntelCompiler) and not \
289+
isinstance(compiler, OneapiCompiler)
290290

291291

292292
class Ompizer(AbstractOmpizer):

devito/passes/iet/parpragma.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,18 @@ def _make_parregion(self, partree, parrays):
346346
def _make_guard(self, parregion):
347347
return parregion
348348

349+
def _support_uindices(self, uindices):
350+
if not uindices:
351+
# No secondary indices, so we can apply nested parallelism
352+
return True
353+
else:
354+
# Compiler supports nested parallelism with multiple indices
355+
# such as for(int i = 0, j=1; ...)
356+
return self._support_nested_parallelism(self.compiler)
357+
349358
def _make_nested_partree(self, partree):
350359
# Apply heuristic
351-
if self.nhyperthreads <= self.nested or \
352-
not self._support_nested_parallelism(self.compiler):
360+
if self.nhyperthreads <= self.nested:
353361
return partree
354362

355363
# Note: there might be multiple sub-trees amenable to nested parallelism,
@@ -371,7 +379,8 @@ def _make_nested_partree(self, partree):
371379
# within a block)
372380
candidates = []
373381
for i in inner:
374-
if self.key(i) and any((j.dim.root is i.dim.root) for j in outer):
382+
if self.key(i) and any((j.dim.root is i.dim.root) for j in outer) and \
383+
self._support_uindices(i.uindices):
375384
candidates.append(i)
376385
elif candidates:
377386
# If there's at least one candidate but `i` doesn't honor the

tests/test_caching.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
import pytest
66
from sympy import Expr
77

8-
from conftest import skipif
98
from devito import (
109
ConditionalDimension, Constant, DefaultDimension, Dimension, Eq, Function, Grid,
1110
Operator, SparseFunction, SparseTimeFunction, SubDimension, TensorFunction,
1211
TensorTimeFunction, TimeFunction, VectorFunction, VectorTimeFunction, _SymbolCache,
13-
clear_cache, solve
12+
clear_cache, solve, switchconfig
1413
)
1514
from devito.types import (
1615
DeviceID, LocalObject, NPThreads, NThreadsBase, Object, Scalar, Symbol, ThreadID
@@ -468,7 +467,7 @@ def test_grid_dtypes(self):
468467

469468
assert hash(grid0) != hash(grid1)
470469

471-
@skipif('nointel')
470+
@switchconfig(compiler='icc')
472471
def test_special_symbols(self):
473472
"""
474473
This test checks the singletonization, through the caching infrastructure,

tests/test_dle.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from devito import (
1010
CustomDimension, DefaultDimension, Dimension, Eq, Function, Grid, Inc, Operator,
1111
PrecomputedSparseTimeFunction, ReduceMax, ReduceMin, ReduceMinMax, SpaceDimension,
12-
SparseTimeFunction, SubDimension, TimeFunction, configuration, cos, dimensions, info
12+
SparseTimeFunction, SubDimension, TimeFunction, configuration, cos, dimensions, info,
13+
switchconfig
1314
)
1415
from devito.arch.compiler import IntelCompiler, OneapiCompiler
1516
from devito.exceptions import InvalidArgument
@@ -1238,7 +1239,6 @@ def test_parallel_prec_inject(self):
12381239

12391240
class TestNestedParallelism:
12401241

1241-
@skipif('nointel')
12421242
def test_basic(self):
12431243
grid = Grid(shape=(3, 3, 3))
12441244

@@ -1251,7 +1251,6 @@ def test_basic(self):
12511251
'par-dynamic-work': 0}))
12521252

12531253
# Does it compile? Honoring the OpenMP specification isn't trivial
1254-
print(op)
12551254
assert op.cfunction
12561255

12571256
# Does it produce the right result
@@ -1271,7 +1270,6 @@ def test_basic(self):
12711270
assert iterations[2].pragmas[0].ccode.value ==\
12721271
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
12731272

1274-
@skipif('nointel')
12751273
def test_collapsing(self):
12761274
grid = Grid(shape=(3, 3, 3))
12771275

@@ -1280,7 +1278,6 @@ def test_collapsing(self):
12801278

12811279
op = Operator(Eq(u.forward, u + f + 1),
12821280
opt=('blocking', 'openmp', {'par-nested': 0,
1283-
'cire-rotate': True,
12841281
'par-collapse-ncores': 1,
12851282
'par-collapse-work': 0,
12861283
'par-dynamic-work': 0}))
@@ -1302,7 +1299,7 @@ def test_collapsing(self):
13021299
('omp parallel for collapse(2) schedule(dynamic,1) '
13031300
'num_threads(nthreads_nested)')
13041301

1305-
@skipif('nointel')
1302+
@switchconfig(compiler='icc')
13061303
def test_multiple_subnests_v0(self):
13071304
grid = Grid(shape=(3, 3, 3))
13081305
x, y, z = grid.dimensions
@@ -1335,7 +1332,7 @@ def test_multiple_subnests_v0(self):
13351332
('omp parallel for collapse(2) schedule(dynamic,1) '
13361333
'num_threads(nthreads_nested)')
13371334

1338-
@skipif('nointel')
1335+
@switchconfig(compiler='icc')
13391336
def test_multiple_subnests_v1(self):
13401337
"""
13411338
Unlike ``test_multiple_subnestes_v0``, now we use the ``cire-rotate=True``
@@ -1374,7 +1371,6 @@ def test_multiple_subnests_v1(self):
13741371
assert trees[-1][3].pragmas[0].ccode.value ==\
13751372
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
13761373

1377-
@skipif('nointel')
13781374
@pytest.mark.parametrize('blocklevels', [1, 2])
13791375
def test_nested_cache_blocking_structure_subdims(self, blocklevels):
13801376
"""
@@ -1438,7 +1434,6 @@ def test_nested_cache_blocking_structure_subdims(self, blocklevels):
14381434
('omp parallel for collapse(2) schedule(dynamic,1) '
14391435
'num_threads(nthreads_nested)')
14401436

1441-
@skipif('nointel')
14421437
@pytest.mark.parametrize('exprs,collapsed,scheduling', [
14431438
(['Eq(u.forward, u.dx)'], '2', 'static'),
14441439
(['Eq(u.forward, u.dy)'], '2', 'static'),
@@ -1471,7 +1466,6 @@ def test_collapsing_w_wo_halo(self, exprs, collapsed, scheduling):
14711466
assert iterations[1].pragmas[0].ccode.value ==\
14721467
"".join([ompfor_string, scheduling_string])
14731468

1474-
@skipif('device')
14751469
def test_nested_parallelism_support(self):
14761470
grid = Grid(shape=(10, 10, 10))
14771471

@@ -1484,7 +1478,9 @@ def test_nested_parallelism_support(self):
14841478
v1.data_with_halo[:] = 1.
14851479

14861480
eqn = Eq(v.forward, (v.dx * (1 + 2*f) * f).dx)
1487-
op = Operator(eqn, opt=('advanced', {'openmp': True, 'par-nested': 0}))
1481+
op = Operator(eqn, opt=('advanced', {'openmp': True,
1482+
'par-collapse-ncores': 1,
1483+
'par-nested': 0}))
14881484

14891485
bns, _ = assert_blocking(op, {'x0_blk0'})
14901486
trees = retrieve_iteration_tree(bns['x0_blk0'])
@@ -1493,13 +1489,15 @@ def test_nested_parallelism_support(self):
14931489
# Check omp pargams
14941490
assert trees[0][0].pragmas[0].ccode.value == \
14951491
'omp for collapse(2) schedule(dynamic,1)'
1496-
if isinstance(configuration['compiler'], (IntelCompiler, OneapiCompiler)):
1492+
if isinstance(configuration['compiler'], IntelCompiler) and \
1493+
not isinstance(configuration['compiler'], OneapiCompiler):
14971494
# Supports nested parallelism
14981495
assert trees[0][2].pragmas[0].ccode.value == \
1499-
'#pragma omp parallel for collapse(2) schedule(dynamic,1)'\
1496+
'omp parallel for collapse(2) schedule(dynamic,1)'\
15001497
' num_threads(nthreads_nested)'
15011498
assert trees[1][2].pragmas[0].ccode.value == \
1502-
trees[0][2].pragmas[0].ccode.value
1499+
'omp parallel for collapse(2) schedule(static,1)'\
1500+
' num_threads(nthreads_nested)'
15031501
else:
15041502
# Most compiler don't support nested parallelism
15051503
assert not trees[0][2].pragmas

0 commit comments

Comments
 (0)