Skip to content

Commit ca89c6f

Browse files
committed
compiler: restrict nested parallelism to supported compilers (intel)
1 parent a844bac commit ca89c6f

5 files changed

Lines changed: 73 additions & 3 deletions

File tree

conftest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def skipif(items, whole_module=False):
3838
accepted.update({'device', 'device-C', 'device-openmp', 'device-openacc',
3939
'device-aomp', 'cpu64-icc', 'cpu64-icx', 'cpu64-nvc',
4040
'noadvisor', 'cpu64-arm', 'cpu64-icpx', 'chkpnt'})
41-
accepted.update({'nodevice', 'noomp'})
41+
accepted.update({'nodevice', 'noomp', 'nointel'})
4242
unknown = sorted(set(items) - accepted)
4343
if unknown:
4444
raise ValueError(f"Illegal skipif argument(s) `{unknown}`")
@@ -93,6 +93,11 @@ def skipif(items, whole_module=False):
9393
if i == 'noomp' and 'openmp' not in configuration['language']:
9494
skipit = "Must use openmp"
9595
break
96+
# Skip if not using an Intel compiler
97+
if i == 'nointel' and \
98+
not isinstance(configuration['compiler'], (IntelCompiler, OneapiCompiler)):
99+
skipit = "Must use an Intel compiler"
100+
break
96101
# Skip if it won't run on Arm
97102
if i == 'cpu64-arm' and isinstance(configuration['platform'], Arm):
98103
skipit = "Arm doesn't support x86-specific instructions"

devito/passes/iet/languages/openmp.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from sympy import And, Ne, Not
66

77
from devito.arch import AMDGPUX, INTELGPUX, NVIDIAX, PVC
8-
from devito.arch.compiler import CustomCompiler, GNUCompiler, NvidiaCompiler
8+
from devito.arch.compiler import (
9+
CustomCompiler, GNUCompiler, IntelCompiler, NvidiaCompiler, OneapiCompiler
10+
)
911
from devito.ir import (
1012
Call, Conditional, DeviceCall, FindSymbols, List, ParallelBlock, PointerCast, Pragma,
1113
Prodder, While
@@ -276,6 +278,16 @@ def _support_complex_reduction(cls, compiler):
276278
# Gcc doesn't supports complex reduction
277279
return not isinstance(compiler, GNUCompiler)
278280

281+
@classmethod
282+
def _support_nested_parallelism(cls, compiler):
283+
# In case we have a CustomCompiler
284+
if isinstance(compiler, CustomCompiler):
285+
compiler = compiler._base()
286+
if isinstance(compiler, (IntelCompiler, OneapiCompiler)): # noqa: SIM103
287+
return True
288+
else:
289+
return False
290+
279291

280292
class Ompizer(AbstractOmpizer):
281293
langbb = OmpBB

devito/passes/iet/parpragma.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ def _support_array_reduction(cls, compiler):
5454
def _support_complex_reduction(cls, compiler):
5555
return False
5656

57+
@classmethod
58+
def _support_nested_parallelism(cls, compiler):
59+
return False
60+
5761
@property
5862
def simd_reg_nbytes(self):
5963
return self.platform.simd_reg_nbytes
@@ -344,7 +348,8 @@ def _make_guard(self, parregion):
344348

345349
def _make_nested_partree(self, partree):
346350
# Apply heuristic
347-
if self.nhyperthreads <= self.nested:
351+
if self.nhyperthreads <= self.nested or \
352+
not self._support_nested_parallelism(self.compiler):
348353
return partree
349354

350355
# Note: there might be multiple sub-trees amenable to nested parallelism,

tests/test_caching.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66
from sympy import Expr
77

8+
from conftest import skipif
89
from devito import (
910
ConditionalDimension, Constant, DefaultDimension, Dimension, Eq, Function, Grid,
1011
Operator, SparseFunction, SparseTimeFunction, SubDimension, TensorFunction,
@@ -467,6 +468,7 @@ def test_grid_dtypes(self):
467468

468469
assert hash(grid0) != hash(grid1)
469470

471+
@skipif('nointel')
470472
def test_special_symbols(self):
471473
"""
472474
This test checks the singletonization, through the caching infrastructure,

tests/test_dle.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
PrecomputedSparseTimeFunction, ReduceMax, ReduceMin, ReduceMinMax, SpaceDimension,
1212
SparseTimeFunction, SubDimension, TimeFunction, configuration, cos, dimensions, info
1313
)
14+
from devito.arch.compiler import IntelCompiler, OneapiCompiler
1415
from devito.exceptions import InvalidArgument
1516
from devito.ir.iet import (
1617
Expression, FindNodes, IsPerfectIteration, Iteration, retrieve_iteration_tree
@@ -1237,6 +1238,7 @@ def test_parallel_prec_inject(self):
12371238

12381239
class TestNestedParallelism:
12391240

1241+
@skipif('nointel')
12401242
def test_basic(self):
12411243
grid = Grid(shape=(3, 3, 3))
12421244

@@ -1249,6 +1251,7 @@ def test_basic(self):
12491251
'par-dynamic-work': 0}))
12501252

12511253
# Does it compile? Honoring the OpenMP specification isn't trivial
1254+
print(op)
12521255
assert op.cfunction
12531256

12541257
# Does it produce the right result
@@ -1268,6 +1271,7 @@ def test_basic(self):
12681271
assert iterations[2].pragmas[0].ccode.value ==\
12691272
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
12701273

1274+
@skipif('nointel')
12711275
def test_collapsing(self):
12721276
grid = Grid(shape=(3, 3, 3))
12731277

@@ -1276,6 +1280,7 @@ def test_collapsing(self):
12761280

12771281
op = Operator(Eq(u.forward, u + f + 1),
12781282
opt=('blocking', 'openmp', {'par-nested': 0,
1283+
'cire-rotate': True,
12791284
'par-collapse-ncores': 1,
12801285
'par-collapse-work': 0,
12811286
'par-dynamic-work': 0}))
@@ -1297,6 +1302,7 @@ def test_collapsing(self):
12971302
('omp parallel for collapse(2) schedule(dynamic,1) '
12981303
'num_threads(nthreads_nested)')
12991304

1305+
@skipif('nointel')
13001306
def test_multiple_subnests_v0(self):
13011307
grid = Grid(shape=(3, 3, 3))
13021308
x, y, z = grid.dimensions
@@ -1329,6 +1335,7 @@ def test_multiple_subnests_v0(self):
13291335
('omp parallel for collapse(2) schedule(dynamic,1) '
13301336
'num_threads(nthreads_nested)')
13311337

1338+
@skipif('nointel')
13321339
def test_multiple_subnests_v1(self):
13331340
"""
13341341
Unlike ``test_multiple_subnestes_v0``, now we use the ``cire-rotate=True``
@@ -1367,6 +1374,7 @@ def test_multiple_subnests_v1(self):
13671374
assert trees[-1][3].pragmas[0].ccode.value ==\
13681375
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
13691376

1377+
@skipif('nointel')
13701378
@pytest.mark.parametrize('blocklevels', [1, 2])
13711379
def test_nested_cache_blocking_structure_subdims(self, blocklevels):
13721380
"""
@@ -1430,6 +1438,7 @@ def test_nested_cache_blocking_structure_subdims(self, blocklevels):
14301438
('omp parallel for collapse(2) schedule(dynamic,1) '
14311439
'num_threads(nthreads_nested)')
14321440

1441+
@skipif('nointel')
14331442
@pytest.mark.parametrize('exprs,collapsed,scheduling', [
14341443
(['Eq(u.forward, u.dx)'], '2', 'static'),
14351444
(['Eq(u.forward, u.dy)'], '2', 'static'),
@@ -1461,3 +1470,40 @@ def test_collapsing_w_wo_halo(self, exprs, collapsed, scheduling):
14611470

14621471
assert iterations[1].pragmas[0].ccode.value ==\
14631472
"".join([ompfor_string, scheduling_string])
1473+
1474+
@skipif('device')
1475+
def test_nested_parallelism_support(self):
1476+
grid = Grid(shape=(10, 10, 10))
1477+
1478+
f = Function(name='f', grid=grid, space_order=4)
1479+
v = TimeFunction(name="v", grid=grid, space_order=4)
1480+
v1 = TimeFunction(name="v1", grid=grid, space_order=4)
1481+
1482+
f.data_with_halo[:] = 0.5
1483+
v.data_with_halo[:] = 1.
1484+
v1.data_with_halo[:] = 1.
1485+
1486+
eqn = Eq(v.forward, (v.dx * (1 + 2*f) * f).dx)
1487+
op = Operator(eqn, opt=('advanced', {'openmp': True, 'par-nested': 0}))
1488+
1489+
bns, _ = assert_blocking(op, {'x0_blk0'})
1490+
trees = retrieve_iteration_tree(bns['x0_blk0'])
1491+
assert len(trees) == 2
1492+
1493+
# Check omp pargams
1494+
assert trees[0][0].pragmas[0].ccode.value == \
1495+
'omp for collapse(2) schedule(dynamic,1)'
1496+
if isinstance(configuration['compiler'], (IntelCompiler, OneapiCompiler)):
1497+
# Supports nested parallelism
1498+
assert trees[0][2].pragmas[0].ccode.value == \
1499+
'#pragma omp parallel for collapse(2) schedule(dynamic,1)'\
1500+
' num_threads(nthreads_nested)'
1501+
assert trees[1][2].pragmas[0].ccode.value == \
1502+
trees[0][2].pragmas[0].ccode.value
1503+
else:
1504+
# Most compiler don't support nested parallelism
1505+
assert not trees[0][2].pragmas
1506+
assert not trees[1][2].pragmas
1507+
1508+
# Should compile properly
1509+
op.cfunction # noqa: B018

0 commit comments

Comments
 (0)