|
6 | 6 |
|
7 | 7 | #include <cassert> |
8 | 8 | #include <type_traits> |
| 9 | +#include <utility> |
9 | 10 |
|
10 | 11 | #include <Kokkos_Core.hpp> |
11 | 12 |
|
12 | | -#include "chunk_span.hpp" |
13 | 13 | #include "chunk_traits.hpp" |
14 | | -#include "parallel_for_each.hpp" |
| 14 | +#include "ddc_to_kokkos_execution_policy.hpp" |
15 | 15 |
|
16 | 16 | namespace ddc { |
17 | 17 |
|
18 | 18 | namespace detail { |
19 | 19 |
|
20 | | -template < |
21 | | - typename Tsrc, |
22 | | - typename Tdst, |
23 | | - typename DDomSrc, |
24 | | - typename DDomDst, |
25 | | - typename MemorySpace, |
26 | | - typename LayoutSrc, |
27 | | - typename LayoutDst> |
| 20 | +template <typename ChunkSpanDst, typename ChunkSpanSrc, typename IndexSequence> |
28 | 21 | class CopyKokkosLambdaAdapter |
29 | 22 | { |
30 | | - ddc::ChunkSpan<Tdst, DDomDst, LayoutDst, MemorySpace> m_dst; |
| 23 | +}; |
| 24 | + |
| 25 | +template <typename ChunkSpanDst, typename ChunkSpanSrc, std::size_t... Idx> |
| 26 | +class CopyKokkosLambdaAdapter<ChunkSpanDst, ChunkSpanSrc, std::index_sequence<Idx...>> |
| 27 | +{ |
| 28 | + template <std::size_t I> |
| 29 | + using index_type = DiscreteVectorElement; |
31 | 30 |
|
32 | | - ddc::ChunkSpan<Tsrc const, DDomSrc, LayoutSrc, MemorySpace> m_src; |
| 31 | + ChunkSpanDst m_dst; |
| 32 | + |
| 33 | + ChunkSpanSrc m_src; |
33 | 34 |
|
34 | 35 | public: |
35 | | - explicit CopyKokkosLambdaAdapter( |
36 | | - ddc::ChunkSpan<Tdst, DDomDst, LayoutDst, MemorySpace> const& dst, |
37 | | - ddc::ChunkSpan<Tsrc const, DDomSrc, LayoutSrc, MemorySpace> const& src) |
| 36 | + explicit CopyKokkosLambdaAdapter(ChunkSpanDst const& dst, ChunkSpanSrc const& src) |
38 | 37 | : m_dst(dst) |
39 | 38 | , m_src(src) |
40 | 39 | { |
41 | 40 | } |
42 | 41 |
|
43 | | - KOKKOS_FUNCTION void operator()(DDomDst::discrete_element_type idst) const |
| 42 | + KOKKOS_FUNCTION void operator()(index_type<0> /*id*/) const |
| 43 | + requires(sizeof...(Idx) == 0) |
44 | 44 | { |
45 | | - m_dst(idst) = m_src(typename DDomSrc::discrete_element_type(idst)); |
| 45 | + m_dst() = m_src(); |
| 46 | + } |
| 47 | + |
| 48 | + KOKKOS_FUNCTION void operator()(index_type<Idx>... ids) const |
| 49 | + requires(sizeof...(Idx) > 0) |
| 50 | + { |
| 51 | + using DVectDst = ChunkSpanDst::discrete_vector_type; |
| 52 | + using DVectSrc = ChunkSpanSrc::discrete_vector_type; |
| 53 | + DVectDst const ddst(ids...); |
| 54 | + m_dst(ddst) = m_src(DVectSrc(ddst)); |
46 | 55 | } |
47 | 56 | }; |
48 | 57 |
|
| 58 | +template <typename ChunkSpanDst, typename ChunkSpanSrc> |
| 59 | +CopyKokkosLambdaAdapter(ChunkSpanDst const& dst, ChunkSpanSrc const& src) |
| 60 | + -> CopyKokkosLambdaAdapter< |
| 61 | + ChunkSpanDst, |
| 62 | + ChunkSpanSrc, |
| 63 | + std::make_index_sequence<ChunkSpanDst::rank()>>; |
| 64 | + |
49 | 65 | } // namespace detail |
50 | 66 |
|
51 | 67 | /** Copy the content of a borrowed chunk into another. It supports transposition and broadcasting at the same time. |
@@ -82,9 +98,11 @@ auto parallel_copy(ExecSpace const& execution_space, ChunkDst&& dst, ChunkSrc&& |
82 | 98 | // Alternative implementations: |
83 | 99 | // - outer loop over src dimensions and inner loop over batch dimensions |
84 | 100 | // - outer loop over batch dimensions and inner loop over src dimensions |
85 | | - ddc::parallel_for_each( |
86 | | - execution_space, |
87 | | - dst.domain(), |
| 101 | + Kokkos::parallel_for( |
| 102 | + "ddc_copy_default", |
| 103 | + detail::ddc_to_kokkos_execution_policy( |
| 104 | + execution_space, |
| 105 | + detail::array(dst.domain().extents())), |
88 | 106 | detail::CopyKokkosLambdaAdapter(dst.span_view(), src.span_cview())); |
89 | 107 | } |
90 | 108 | return dst.span_view(); |
|
0 commit comments