Skip to content

Commit 43cd78b

Browse files
committed
compiler: refine nested parallel support with decl check
1 parent d06beed commit 43cd78b

3 files changed

Lines changed: 16 additions & 14 deletions

File tree

devito/passes/iet/parpragma.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,18 @@ def _make_parregion(self, partree, parrays):
346346
def _make_guard(self, parregion):
347347
return parregion
348348

349+
def _support_uindices(self, uindices):
350+
if not uindices:
351+
# No secondary indices, so we can apply nested parallelism
352+
return True
353+
else:
354+
# Compiler supports nested parallelism with multiple indices
355+
# such as for(int i = 0, j=1; ...)
356+
return self._support_nested_parallelism(self.compiler)
357+
349358
def _make_nested_partree(self, partree):
350359
# Apply heuristic
351-
if self.nhyperthreads <= self.nested or \
352-
not self._support_nested_parallelism(self.compiler):
360+
if self.nhyperthreads <= self.nested:
353361
return partree
354362

355363
# Note: there might be multiple sub-trees amenable to nested parallelism,
@@ -371,7 +379,8 @@ def _make_nested_partree(self, partree):
371379
# within a block)
372380
candidates = []
373381
for i in inner:
374-
if self.key(i) and any((j.dim.root is i.dim.root) for j in outer):
382+
if self.key(i) and any((j.dim.root is i.dim.root) for j in outer) and \
383+
self._support_uindices(i.uindices):
375384
candidates.append(i)
376385
elif candidates:
377386
# If there's at least one candidate but `i` doesn't honor the

tests/test_caching.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pytest
66
from sympy import Expr
77

8-
from conftest import skipif
98
from devito import (
109
ConditionalDimension, Constant, DefaultDimension, Dimension, Eq, Function, Grid,
1110
Operator, SparseFunction, SparseTimeFunction, SubDimension, TensorFunction,
@@ -468,7 +467,6 @@ def test_grid_dtypes(self):
468467

469468
assert hash(grid0) != hash(grid1)
470469

471-
@skipif('nointel')
472470
def test_special_symbols(self):
473471
"""
474472
This test checks the singletonization, through the caching infrastructure,

tests/test_dle.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,7 +1238,6 @@ def test_parallel_prec_inject(self):
12381238

12391239
class TestNestedParallelism:
12401240

1241-
@skipif('nointel')
12421241
def test_basic(self):
12431242
grid = Grid(shape=(3, 3, 3))
12441243

@@ -1251,7 +1250,6 @@ def test_basic(self):
12511250
'par-dynamic-work': 0}))
12521251

12531252
# Does it compile? Honoring the OpenMP specification isn't trivial
1254-
print(op)
12551253
assert op.cfunction
12561254

12571255
# Does it produce the right result
@@ -1271,7 +1269,6 @@ def test_basic(self):
12711269
assert iterations[2].pragmas[0].ccode.value ==\
12721270
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
12731271

1274-
@skipif('nointel')
12751272
def test_collapsing(self):
12761273
grid = Grid(shape=(3, 3, 3))
12771274

@@ -1280,7 +1277,6 @@ def test_collapsing(self):
12801277

12811278
op = Operator(Eq(u.forward, u + f + 1),
12821279
opt=('blocking', 'openmp', {'par-nested': 0,
1283-
'cire-rotate': True,
12841280
'par-collapse-ncores': 1,
12851281
'par-collapse-work': 0,
12861282
'par-dynamic-work': 0}))
@@ -1374,7 +1370,6 @@ def test_multiple_subnests_v1(self):
13741370
assert trees[-1][3].pragmas[0].ccode.value ==\
13751371
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
13761372

1377-
@skipif('nointel')
13781373
@pytest.mark.parametrize('blocklevels', [1, 2])
13791374
def test_nested_cache_blocking_structure_subdims(self, blocklevels):
13801375
"""
@@ -1438,7 +1433,6 @@ def test_nested_cache_blocking_structure_subdims(self, blocklevels):
14381433
('omp parallel for collapse(2) schedule(dynamic,1) '
14391434
'num_threads(nthreads_nested)')
14401435

1441-
@skipif('nointel')
14421436
@pytest.mark.parametrize('exprs,collapsed,scheduling', [
14431437
(['Eq(u.forward, u.dx)'], '2', 'static'),
14441438
(['Eq(u.forward, u.dy)'], '2', 'static'),
@@ -1471,7 +1465,6 @@ def test_collapsing_w_wo_halo(self, exprs, collapsed, scheduling):
14711465
assert iterations[1].pragmas[0].ccode.value ==\
14721466
"".join([ompfor_string, scheduling_string])
14731467

1474-
@skipif('device')
14751468
def test_nested_parallelism_support(self):
14761469
grid = Grid(shape=(10, 10, 10))
14771470

@@ -1484,7 +1477,9 @@ def test_nested_parallelism_support(self):
14841477
v1.data_with_halo[:] = 1.
14851478

14861479
eqn = Eq(v.forward, (v.dx * (1 + 2*f) * f).dx)
1487-
op = Operator(eqn, opt=('advanced', {'openmp': True, 'par-nested': 0}))
1480+
op = Operator(eqn, opt=('advanced', {'openmp': True,
1481+
'par-collapse-ncores': 1,
1482+
'par-nested': 0}))
14881483

14891484
bns, _ = assert_blocking(op, {'x0_blk0'})
14901485
trees = retrieve_iteration_tree(bns['x0_blk0'])
@@ -1496,7 +1491,7 @@ def test_nested_parallelism_support(self):
14961491
if isinstance(configuration['compiler'], (IntelCompiler, OneapiCompiler)):
14971492
# Supports nested parallelism
14981493
assert trees[0][2].pragmas[0].ccode.value == \
1499-
'#pragma omp parallel for collapse(2) schedule(dynamic,1)'\
1494+
'omp parallel for collapse(2) schedule(dynamic,1)'\
15001495
' num_threads(nthreads_nested)'
15011496
assert trees[1][2].pragmas[0].ccode.value == \
15021497
trees[0][2].pragmas[0].ccode.value

0 commit comments

Comments
 (0)