Skip to content

Commit 052aeb3

Browse files
committed
compiler: refine nested parallel support with decl check
1 parent 84589c4 commit 052aeb3

3 files changed

Lines changed: 12 additions & 12 deletions

File tree

devito/passes/iet/parpragma.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,18 @@ def _make_parregion(self, partree, parrays):
346346
def _make_guard(self, parregion):
347347
return parregion
348348

349+
def _support_uindices(self, uindices):
350+
if not uindices:
351+
# No secondary indices, so we can apply nested parallelism
352+
return True
353+
else:
354+
# Compiler supports nested parallelism with multiple indices
355+
# such as for(int i = 0, j=1; ...)
356+
return self._support_nested_parallelism(self.compiler)
357+
349358
def _make_nested_partree(self, partree):
350359
# Apply heuristic
351-
if self.nhyperthreads <= self.nested or \
352-
not self._support_nested_parallelism(self.compiler):
360+
if self.nhyperthreads <= self.nested:
353361
return partree
354362

355363
# Note: there might be multiple sub-trees amenable to nested parallelism,
@@ -371,7 +379,8 @@ def _make_nested_partree(self, partree):
371379
# within a block)
372380
candidates = []
373381
for i in inner:
374-
if self.key(i) and any((j.dim.root is i.dim.root) for j in outer):
382+
if self.key(i) and any((j.dim.root is i.dim.root) for j in outer) and \
383+
self._support_uindices(i.uindices):
375384
candidates.append(i)
376385
elif candidates:
377386
# If there's at least one candidate but `i` doesn't honor the

tests/test_caching.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pytest
66
from sympy import Expr
77

8-
from conftest import skipif
98
from devito import (
109
ConditionalDimension, Constant, DefaultDimension, Dimension, Eq, Function, Grid,
1110
Operator, SparseFunction, SparseTimeFunction, SubDimension, TensorFunction,
@@ -468,7 +467,6 @@ def test_grid_dtypes(self):
468467

469468
assert hash(grid0) != hash(grid1)
470469

471-
@skipif('nointel')
472470
def test_special_symbols(self):
473471
"""
474472
This test checks the singletonization, through the caching infrastructure,

tests/test_dle.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,7 +1238,6 @@ def test_parallel_prec_inject(self):
12381238

12391239
class TestNestedParallelism:
12401240

1241-
@skipif('nointel')
12421241
def test_basic(self):
12431242
grid = Grid(shape=(3, 3, 3))
12441243

@@ -1251,7 +1250,6 @@ def test_basic(self):
12511250
'par-dynamic-work': 0}))
12521251

12531252
# Does it compile? Honoring the OpenMP specification isn't trivial
1254-
print(op)
12551253
assert op.cfunction
12561254

12571255
# Does it produce the right result
@@ -1271,7 +1269,6 @@ def test_basic(self):
12711269
assert iterations[2].pragmas[0].ccode.value ==\
12721270
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
12731271

1274-
@skipif('nointel')
12751272
def test_collapsing(self):
12761273
grid = Grid(shape=(3, 3, 3))
12771274

@@ -1280,7 +1277,6 @@ def test_collapsing(self):
12801277

12811278
op = Operator(Eq(u.forward, u + f + 1),
12821279
opt=('blocking', 'openmp', {'par-nested': 0,
1283-
'cire-rotate': True,
12841280
'par-collapse-ncores': 1,
12851281
'par-collapse-work': 0,
12861282
'par-dynamic-work': 0}))
@@ -1374,7 +1370,6 @@ def test_multiple_subnests_v1(self):
13741370
assert trees[-1][3].pragmas[0].ccode.value ==\
13751371
'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
13761372

1377-
@skipif('nointel')
13781373
@pytest.mark.parametrize('blocklevels', [1, 2])
13791374
def test_nested_cache_blocking_structure_subdims(self, blocklevels):
13801375
"""
@@ -1438,7 +1433,6 @@ def test_nested_cache_blocking_structure_subdims(self, blocklevels):
14381433
('omp parallel for collapse(2) schedule(dynamic,1) '
14391434
'num_threads(nthreads_nested)')
14401435

1441-
@skipif('nointel')
14421436
@pytest.mark.parametrize('exprs,collapsed,scheduling', [
14431437
(['Eq(u.forward, u.dx)'], '2', 'static'),
14441438
(['Eq(u.forward, u.dy)'], '2', 'static'),
@@ -1471,7 +1465,6 @@ def test_collapsing_w_wo_halo(self, exprs, collapsed, scheduling):
14711465
assert iterations[1].pragmas[0].ccode.value ==\
14721466
"".join([ompfor_string, scheduling_string])
14731467

1474-
@skipif('device')
14751468
def test_nested_parallelism_support(self):
14761469
grid = Grid(shape=(10, 10, 10))
14771470

0 commit comments

Comments
 (0)