|
11 | 11 | PrecomputedSparseTimeFunction, ReduceMax, ReduceMin, ReduceMinMax, SpaceDimension, |
12 | 12 | SparseTimeFunction, SubDimension, TimeFunction, configuration, cos, dimensions, info |
13 | 13 | ) |
| 14 | +from devito.arch.compiler import IntelCompiler, OneapiCompiler |
14 | 15 | from devito.exceptions import InvalidArgument |
15 | 16 | from devito.ir.iet import ( |
16 | 17 | Expression, FindNodes, IsPerfectIteration, Iteration, retrieve_iteration_tree |
@@ -1461,3 +1462,40 @@ def test_collapsing_w_wo_halo(self, exprs, collapsed, scheduling): |
1461 | 1462 |
|
1462 | 1463 | assert iterations[1].pragmas[0].ccode.value ==\ |
1463 | 1464 | "".join([ompfor_string, scheduling_string]) |
| 1465 | + |
| 1466 | + @skipif('device') |
| 1467 | + def test_nested_parallelism_support(self): |
| 1468 | + grid = Grid(shape=(10, 10, 10)) |
| 1469 | + |
| 1470 | + f = Function(name='f', grid=grid, space_order=4) |
| 1471 | + v = TimeFunction(name="v", grid=grid, space_order=4) |
| 1472 | + v1 = TimeFunction(name="v1", grid=grid, space_order=4) |
| 1473 | + |
| 1474 | + f.data_with_halo[:] = 0.5 |
| 1475 | + v.data_with_halo[:] = 1. |
| 1476 | + v1.data_with_halo[:] = 1. |
| 1477 | + |
| 1478 | + eqn = Eq(v.forward, (v.dx * (1 + 2*f) * f).dx) |
| 1479 | + op = Operator(eqn, opt=('advanced', {'openmp': True, 'par-nested': 0})) |
| 1480 | + |
| 1481 | + bns, _ = assert_blocking(op, {'x0_blk0'}) |
| 1482 | + trees = retrieve_iteration_tree(bns['x0_blk0']) |
| 1483 | + assert len(trees) == 2 |
| 1484 | + |
| 1485 | + # Check omp pargams |
| 1486 | + assert trees[0][0].pragmas[0].ccode.value == \ |
| 1487 | + 'omp for collapse(2) schedule(dynamic,1)' |
| 1488 | + if isinstance(configuration['compiler'], (IntelCompiler, OneapiCompiler)): |
| 1489 | + # Supports nested parallelism |
| 1490 | + assert trees[0][2].pragmas[0].ccode.value == \ |
| 1491 | + '#pragma omp parallel for collapse(2) schedule(dynamic,1)'\ |
| 1492 | + ' num_threads(nthreads_nested)' |
| 1493 | + assert trees[1][2].pragmas[0].ccode.value == \ |
| 1494 | + trees[0][2].pragmas[0].ccode.value |
| 1495 | + else: |
| 1496 | + # Most compiler don't support nested parallelism |
| 1497 | + assert not trees[0][2].pragmas |
| 1498 | + assert not trees[1][2].pragmas |
| 1499 | + |
| 1500 | + # Should compile properly |
| 1501 | + op.cfunction # noqa: B018 |
0 commit comments