1111 PrecomputedSparseTimeFunction , ReduceMax , ReduceMin , ReduceMinMax , SpaceDimension ,
1212 SparseTimeFunction , SubDimension , TimeFunction , configuration , cos , dimensions , info
1313)
14+ from devito .arch .compiler import IntelCompiler , OneapiCompiler
1415from devito .exceptions import InvalidArgument
1516from devito .ir .iet import (
1617 Expression , FindNodes , IsPerfectIteration , Iteration , retrieve_iteration_tree
@@ -1237,6 +1238,7 @@ def test_parallel_prec_inject(self):
12371238
12381239class TestNestedParallelism :
12391240
1241+ @skipif ('nointel' )
12401242 def test_basic (self ):
12411243 grid = Grid (shape = (3 , 3 , 3 ))
12421244
@@ -1249,6 +1251,7 @@ def test_basic(self):
12491251 'par-dynamic-work' : 0 }))
12501252
12511253 # Does it compile? Honoring the OpenMP specification isn't trivial
1254+ print (op )
12521255 assert op .cfunction
12531256
12541257 # Does it produce the right result
@@ -1268,6 +1271,7 @@ def test_basic(self):
12681271 assert iterations [2 ].pragmas [0 ].ccode .value == \
12691272 'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
12701273
1274+ @skipif ('nointel' )
12711275 def test_collapsing (self ):
12721276 grid = Grid (shape = (3 , 3 , 3 ))
12731277
@@ -1276,6 +1280,7 @@ def test_collapsing(self):
12761280
12771281 op = Operator (Eq (u .forward , u + f + 1 ),
12781282 opt = ('blocking' , 'openmp' , {'par-nested' : 0 ,
1283+ 'cire-rotate' : True ,
12791284 'par-collapse-ncores' : 1 ,
12801285 'par-collapse-work' : 0 ,
12811286 'par-dynamic-work' : 0 }))
@@ -1297,6 +1302,7 @@ def test_collapsing(self):
12971302 ('omp parallel for collapse(2) schedule(dynamic,1) '
12981303 'num_threads(nthreads_nested)' )
12991304
1305+ @skipif ('nointel' )
13001306 def test_multiple_subnests_v0 (self ):
13011307 grid = Grid (shape = (3 , 3 , 3 ))
13021308 x , y , z = grid .dimensions
@@ -1329,6 +1335,7 @@ def test_multiple_subnests_v0(self):
13291335 ('omp parallel for collapse(2) schedule(dynamic,1) '
13301336 'num_threads(nthreads_nested)' )
13311337
1338+ @skipif ('nointel' )
13321339 def test_multiple_subnests_v1 (self ):
13331340 """
13341341 Unlike ``test_multiple_subnestes_v0``, now we use the ``cire-rotate=True``
@@ -1367,6 +1374,7 @@ def test_multiple_subnests_v1(self):
13671374 assert trees [- 1 ][3 ].pragmas [0 ].ccode .value == \
13681375 'omp parallel for schedule(dynamic,1) num_threads(nthreads_nested)'
13691376
1377+ @skipif ('nointel' )
13701378 @pytest .mark .parametrize ('blocklevels' , [1 , 2 ])
13711379 def test_nested_cache_blocking_structure_subdims (self , blocklevels ):
13721380 """
@@ -1430,6 +1438,7 @@ def test_nested_cache_blocking_structure_subdims(self, blocklevels):
14301438 ('omp parallel for collapse(2) schedule(dynamic,1) '
14311439 'num_threads(nthreads_nested)' )
14321440
1441+ @skipif ('nointel' )
14331442 @pytest .mark .parametrize ('exprs,collapsed,scheduling' , [
14341443 (['Eq(u.forward, u.dx)' ], '2' , 'static' ),
14351444 (['Eq(u.forward, u.dy)' ], '2' , 'static' ),
@@ -1461,3 +1470,40 @@ def test_collapsing_w_wo_halo(self, exprs, collapsed, scheduling):
14611470
14621471 assert iterations [1 ].pragmas [0 ].ccode .value == \
14631472 "" .join ([ompfor_string , scheduling_string ])
1473+
1474+ @skipif ('device' )
1475+ def test_nested_parallelism_support (self ):
1476+ grid = Grid (shape = (10 , 10 , 10 ))
1477+
1478+ f = Function (name = 'f' , grid = grid , space_order = 4 )
1479+ v = TimeFunction (name = "v" , grid = grid , space_order = 4 )
1480+ v1 = TimeFunction (name = "v1" , grid = grid , space_order = 4 )
1481+
1482+ f .data_with_halo [:] = 0.5
1483+ v .data_with_halo [:] = 1.
1484+ v1 .data_with_halo [:] = 1.
1485+
1486+ eqn = Eq (v .forward , (v .dx * (1 + 2 * f ) * f ).dx )
1487+ op = Operator (eqn , opt = ('advanced' , {'openmp' : True , 'par-nested' : 0 }))
1488+
1489+ bns , _ = assert_blocking (op , {'x0_blk0' })
1490+ trees = retrieve_iteration_tree (bns ['x0_blk0' ])
1491+ assert len (trees ) == 2
1492+
1493+ # Check omp pargams
1494+ assert trees [0 ][0 ].pragmas [0 ].ccode .value == \
1495+ 'omp for collapse(2) schedule(dynamic,1)'
1496+ if isinstance (configuration ['compiler' ], (IntelCompiler , OneapiCompiler )):
1497+ # Supports nested parallelism
1498+ assert trees [0 ][2 ].pragmas [0 ].ccode .value == \
1499+ '#pragma omp parallel for collapse(2) schedule(dynamic,1)' \
1500+ ' num_threads(nthreads_nested)'
1501+ assert trees [1 ][2 ].pragmas [0 ].ccode .value == \
1502+ trees [0 ][2 ].pragmas [0 ].ccode .value
1503+ else :
1504+ # Most compiler don't support nested parallelism
1505+ assert not trees [0 ][2 ].pragmas
1506+ assert not trees [1 ][2 ].pragmas
1507+
1508+ # Should compile properly
1509+ op .cfunction # noqa: B018
0 commit comments