diff --git a/README.md b/README.md
index 7c2766c1e..eef846925 100644
--- a/README.md
+++ b/README.md
@@ -338,7 +338,7 @@ Supported compilers include:
| NVIDIA GPU |
NVIDIA cuFFT |
- Open DPC++ |
+ Open DPC++AdaptiveCpp |
Dynamic, Static |
@@ -349,7 +349,7 @@ Supported compilers include:
| AMD GPU |
AMD rocFFT |
- Open DPC++ |
+ Open DPC++AdaptiveCpp |
Dynamic, Static |
diff --git a/docs/building_the_project_with_adaptivecpp.rst b/docs/building_the_project_with_adaptivecpp.rst
index 41e5b03f7..c4e1ccfca 100644
--- a/docs/building_the_project_with_adaptivecpp.rst
+++ b/docs/building_the_project_with_adaptivecpp.rst
@@ -57,7 +57,7 @@ additional guidance. The target architectures must be specified with
``HIP_TARGETS``. See the `AdaptiveCpp documentation
`_.
-If a backend library supports multiple domains (i.e. BLAS, RNG), it may be
+If a backend library supports multiple domains (i.e. BLAS, DFT, RNG), it may be
desirable to only enable selected domains. For this, the ``TARGET_DOMAINS``
variable should be set. For further details, see :ref:`_build_target_domains`.
@@ -81,6 +81,9 @@ The most important supported build options are:
* - ENABLE_CUBLAS_BACKEND
- True, False
- False
+ * - ENABLE_CUFFT_BACKEND
+ - True, False
+ - False
* - ENABLE_CURAND_BACKEND
- True, False
- False
@@ -93,6 +96,9 @@ The most important supported build options are:
* - ENABLE_ROCBLAS_BACKEND
- True, False
- False
+ * - ENABLE_ROCFFT_BACKEND
+ - True, False
+ - False
* - ENABLE_ROCRAND_BACKEND
- True, False
- False
@@ -106,7 +112,7 @@ The most important supported build options are:
- True, False
- True
* - TARGET_DOMAINS (list)
- - blas, rng
+ - blas, dft, rng
- All supported domains
Some additional build options are given in
@@ -120,8 +126,8 @@ Backends
Building for CUDA
~~~~~~~~~~~~~~~~~
-The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND`` and
-``ENABLE_CURAND_BACKEND``.
+The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``,
+``ENABLE_CUFFT_BACKEND`` and ``ENABLE_CURAND_BACKEND``.
The target architecture must be set using the ``HIPSYCL_TARGETS`` parameter. For
example, to target a Nvidia A100 (Ampere architecture), set
@@ -140,8 +146,8 @@ the CUDA libraries should be found automatically by CMake.
Building for ROCm
~~~~~~~~~~~~~~~~~
-The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND`` and
-``ENABLE_ROCRAND_BACKEND``.
+The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND``,
+``ENABLE_ROCFFT_BACKEND`` and ``ENABLE_ROCRAND_BACKEND``.
The target architecture must be set using the ``HIPSYCL_TARGETS`` parameter. See
the `AdaptiveCpp documentation
diff --git a/src/dft/backends/cufft/backward.cpp b/src/dft/backends/cufft/backward.cpp
index 475f1ea49..41483062e 100644
--- a/src/dft/backends/cufft/backward.cpp
+++ b/src/dft/backends/cufft/backward.cpp
@@ -76,7 +76,7 @@ ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
auto stream = detail::setup_stream(func_name, ih, plan);
auto inout_native = reinterpret_cast*>(
- ih.get_native_mem(inout_acc));
+ ih.get_native_mem(inout_acc));
detail::cufft_execute>(
func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]),
reinterpret_cast(inout_native + offsets[1]));
@@ -121,14 +121,14 @@ ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
auto stream = detail::setup_stream(func_name, ih, plan);
- auto in_native = reinterpret_cast(
- reinterpret_cast*>(
- ih.get_native_mem(in_acc)) +
- offsets[0]);
- auto out_native = reinterpret_cast(
- reinterpret_cast*>(
- ih.get_native_mem(out_acc)) +
- offsets[1]);
+ auto in_native =
+ reinterpret_cast(reinterpret_cast*>(
+ ih.get_native_mem(in_acc)) +
+ offsets[0]);
+ auto out_native =
+ reinterpret_cast(reinterpret_cast*>(
+ ih.get_native_mem(out_acc)) +
+ offsets[1]);
detail::cufft_execute>(
func_name, stream, plan, in_native, out_native);
});
diff --git a/src/dft/backends/cufft/commit.cpp b/src/dft/backends/cufft/commit.cpp
index b6d2164ff..91cd17971 100644
--- a/src/dft/backends/cufft/commit.cpp
+++ b/src/dft/backends/cufft/commit.cpp
@@ -34,6 +34,8 @@
#include "oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp"
#include "oneapi/math/dft/types.hpp"
+#include "execute_helper.hpp"
+#include "../../execute_helper_generic.hpp"
#include "../stride_helper.hpp"
#include
@@ -84,7 +86,7 @@ class cufft_commit final : public dft::detail::commit_impl {
if (fix_context) {
// cufftDestroy changes the context so change it back.
CUdevice interopDevice =
- sycl::get_native(this->get_queue().get_device());
+ sycl::get_native(this->get_queue().get_device());
CUcontext interopContext;
if (cuDevicePrimaryCtxRetain(&interopContext, interopDevice) != CUDA_SUCCESS) {
throw math::exception("dft/backends/cufft", __FUNCTION__,
@@ -353,8 +355,8 @@ class cufft_commit final : public dft::detail::commit_impl {
.submit([&](sycl::handler& cgh) {
auto workspace_acc =
buffer_workspace.template get_access(cgh);
- cgh.host_task([=](sycl::interop_handle ih) {
- auto stream = ih.get_native_queue();
+ dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
+ auto stream = ih.get_native_queue();
auto result = cufftSetStream(plan, stream);
if (result != CUFFT_SUCCESS) {
throw oneapi::math::exception(
@@ -362,7 +364,7 @@ class cufft_commit final : public dft::detail::commit_impl {
"cufftSetStream returned " + std::to_string(result));
}
auto workspace_native = reinterpret_cast(
- ih.get_native_mem(workspace_acc));
+ ih.get_native_mem(workspace_acc));
cufftSetWorkArea(plan, workspace_native);
});
})
diff --git a/src/dft/backends/cufft/execute_helper.hpp b/src/dft/backends/cufft/execute_helper.hpp
index da485fea2..3d5f6e791 100644
--- a/src/dft/backends/cufft/execute_helper.hpp
+++ b/src/dft/backends/cufft/execute_helper.hpp
@@ -36,6 +36,12 @@
namespace oneapi::math::dft::cufft::detail {
+#if defined(__ADAPTIVECPP__) || defined(__HIPSYCL__)
+constexpr auto sycl_cuda_backend{ sycl::backend::cuda };
+#else // DPC++
+constexpr auto sycl_cuda_backend{ sycl::backend::ext_oneapi_cuda };
+#endif
+
template
inline dft::detail::commit_impl* checked_get_commit(
dft::detail::descriptor& desc) {
@@ -142,7 +148,7 @@ void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, v
}
inline CUstream setup_stream(const std::string& func, sycl::interop_handle ih, cufftHandle plan) {
- auto stream = ih.get_native_queue();
+ auto stream = ih.get_native_queue();
auto result = cufftSetStream(plan, stream);
if (result != CUFFT_SUCCESS) {
throw oneapi::math::exception("dft/backends/cufft", func,
diff --git a/src/dft/backends/cufft/forward.cpp b/src/dft/backends/cufft/forward.cpp
index 6b2867b5f..324c83142 100644
--- a/src/dft/backends/cufft/forward.cpp
+++ b/src/dft/backends/cufft/forward.cpp
@@ -79,7 +79,7 @@ ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
auto stream = detail::setup_stream(func_name, ih, plan);
auto inout_native = reinterpret_cast*>(
- ih.get_native_mem(inout_acc));
+ ih.get_native_mem(inout_acc));
detail::cufft_execute>(
func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]),
reinterpret_cast(inout_native + offsets[1]));
@@ -124,14 +124,14 @@ ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
auto stream = detail::setup_stream(func_name, ih, plan);
- auto in_native = reinterpret_cast(
- reinterpret_cast*>(
- ih.get_native_mem(in_acc)) +
- offsets[0]);
- auto out_native = reinterpret_cast(
- reinterpret_cast*>(
- ih.get_native_mem(out_acc)) +
- offsets[1]);
+ auto in_native =
+ reinterpret_cast(reinterpret_cast*>(
+ ih.get_native_mem(in_acc)) +
+ offsets[0]);
+ auto out_native =
+ reinterpret_cast(reinterpret_cast*>(
+ ih.get_native_mem(out_acc)) +
+ offsets[1]);
detail::cufft_execute>(
func_name, stream, plan, in_native, out_native);
});
diff --git a/src/dft/backends/rocfft/commit.cpp b/src/dft/backends/rocfft/commit.cpp
index 4c5d51d2f..47f12f336 100644
--- a/src/dft/backends/rocfft/commit.cpp
+++ b/src/dft/backends/rocfft/commit.cpp
@@ -34,6 +34,8 @@
#include "oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp"
#include "oneapi/math/dft/types.hpp"
+#include "execute_helper.hpp"
+#include "../../execute_helper_generic.hpp"
#include "../stride_helper.hpp"
#include "rocfft_handle.hpp"
@@ -557,9 +559,9 @@ class rocfft_commit final : public dft::detail::commit_impl {
this->get_queue().submit([&](sycl::handler& cgh) {
auto workspace_acc =
buffer_workspace.template get_access(cgh);
- cgh.host_task([=](sycl::interop_handle ih) {
+ dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
auto workspace_native = reinterpret_cast(
- ih.get_native_mem(workspace_acc));
+ ih.get_native_mem(workspace_acc));
set_workspace_impl(handle, workspace_native, workspace_bytes, "set_workspace");
});
});
diff --git a/src/dft/backends/rocfft/execute_helper.hpp b/src/dft/backends/rocfft/execute_helper.hpp
index c1ee6302b..27218d0ad 100644
--- a/src/dft/backends/rocfft/execute_helper.hpp
+++ b/src/dft/backends/rocfft/execute_helper.hpp
@@ -36,6 +36,12 @@
namespace oneapi::math::dft::rocfft::detail {
+#if defined(__ADAPTIVECPP__) || defined(__HIPSYCL__)
+constexpr auto sycl_hip_backend{ sycl::backend::hip };
+#else // DPC++
+constexpr auto sycl_hip_backend{ sycl::backend::ext_oneapi_hip };
+#endif
+
template
inline dft::detail::commit_impl* checked_get_commit(
dft::detail::descriptor& desc) {
@@ -60,12 +66,12 @@ inline auto expect_config(DescT& desc, const char* message) {
template
inline void* native_mem(sycl::interop_handle& ih, Acc& buf) {
- return ih.get_native_mem(buf);
+ return ih.get_native_mem(buf);
}
inline hipStream_t setup_stream(const std::string& func, sycl::interop_handle& ih,
rocfft_execution_info info) {
- auto stream = ih.get_native_queue();
+ auto stream = ih.get_native_queue();
auto result = rocfft_execution_info_set_stream(info, stream);
if (result != rocfft_status_success) {
throw oneapi::math::exception(
diff --git a/src/dft/execute_helper_generic.hpp b/src/dft/execute_helper_generic.hpp
index 4f5ae6727..a40a3f465 100644
--- a/src/dft/execute_helper_generic.hpp
+++ b/src/dft/execute_helper_generic.hpp
@@ -39,7 +39,11 @@ namespace oneapi::math::dft::detail {
*/
template
static inline void fft_enqueue_task(HandlerT&& cgh, FnT&& f) {
-#ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND
+#if defined(__ADAPTIVECPP__)
+ cgh.AdaptiveCpp_enqueue_custom_operation([=](sycl::interop_handle ih) {
+#elif defined(__HIPSYCL__)
+ cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) {
+#elif defined(SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND)
cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) {
#else
cgh.host_task([=](sycl::interop_handle ih) {
diff --git a/tests/unit_tests/dft/source/descriptor_tests.cpp b/tests/unit_tests/dft/source/descriptor_tests.cpp
index a4290e553..a825a1539 100644
--- a/tests/unit_tests/dft/source/descriptor_tests.cpp
+++ b/tests/unit_tests/dft/source/descriptor_tests.cpp
@@ -571,7 +571,9 @@ inline void recommit_values(sycl::queue& sycl_queue) {
}
template
-inline void change_queue_causes_wait(sycl::queue& busy_queue) {
+inline void change_queue_causes_wait([[maybe_unused]] sycl::queue& busy_queue) {
+// Skip this test in AdaptiveCpp, which doesn't support host_task
+#if !defined(__ADAPTIVECPP__) && !defined(__HIPSYCL__)
// create a queue with work on it, and then show that work is waited on when the descriptor
// is committed to a new queue.
// its possible to have a false positive result, but a false negative should not be possible.
@@ -616,6 +618,7 @@ inline void change_queue_causes_wait(sycl::queue& busy_queue) {
// busy queue task has now completed.
auto after_status = e.template get_info();
ASSERT_EQ(after_status, sycl::info::event_command_status::complete);
+#endif
}
template