Skip to content

Commit 72d6b2d

Browse files
authored
Merge branch 'master' into update-ndarray-docstrings
2 parents d2c3879 + 78179a2 commit 72d6b2d

72 files changed

Lines changed: 2222 additions & 351 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ repos:
6666
name: isort (pyi)
6767
types: [pyi]
6868
- repo: https://github.com/pycqa/flake8
69-
rev: 7.1.2
69+
rev: 7.2.0
7070
hooks:
7171
- id: flake8
7272
args: ["--config=.flake8"]
@@ -79,7 +79,7 @@ repos:
7979
- id: clang-format
8080
args: ["-i"]
8181
- repo: https://github.com/gitleaks/gitleaks
82-
rev: v8.24.0
82+
rev: v8.26.0
8383
hooks:
8484
- id: gitleaks
8585
- repo: https://github.com/jumanjihouse/pre-commit-hooks

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ This release achieves 100% compliance with Python Array API specification (revis
1616
* Added implementation of `dpnp.bartlett` [#2366](https://github.com/IntelPython/dpnp/pull/2366)
1717
* Added implementation of `dpnp.convolve` [#2205](https://github.com/IntelPython/dpnp/pull/2205)
1818
* Added implementation of `dpnp.kaiser` [#2387](https://github.com/IntelPython/dpnp/pull/2387)
19+
* Added implementation of `dpnp.interp` [#2417](https://github.com/IntelPython/dpnp/pull/2417)
20+
* Added support to build `dpnp` for specified AMD GPU architecture using [CodePlay oneAPI plug-in](https://developer.codeplay.com/products/oneapi/amd/home/) [#2302](https://github.com/IntelPython/dpnp/pull/2302)
1921

2022
### Changed
2123

@@ -27,7 +29,16 @@ This release achieves 100% compliance with Python Array API specification (revis
2729
* Updated `dpnp.einsum` to add support for `order=None` [#2411](https://github.com/IntelPython/dpnp/pull/2411)
2830
* Updated Python Array API specification version supported to `2024.12` [#2416](https://github.com/IntelPython/dpnp/pull/2416)
2931
* Removed `einsum_call` keyword from `dpnp.einsum_path` signature [#2421](https://github.com/IntelPython/dpnp/pull/2421)
32+
* Updated `dpnp.vdot` to return a 0-D array when one of the inputs is a scalar [#2295](https://github.com/IntelPython/dpnp/pull/2295)
33+
* Updated `dpnp.outer` to return the same dtype as NumPy when multiplying an array with a scalar [#2295](https://github.com/IntelPython/dpnp/pull/2295)
3034
* Changed `"max dimensions"` to `None` in array API capabilities [#2432](https://github.com/IntelPython/dpnp/pull/2432)
35+
* Updated kernel header `i0.hpp` to expose `cyl_bessel_i0` function depending on build target [#2440](https://github.com/IntelPython/dpnp/pull/2440)
36+
* Added MKL functions `arg`, `copysign`, `i0`, and `inv` from VM namespace to be used by implementation of the appropriate element-wise functions [#2445](https://github.com/IntelPython/dpnp/pull/2445)
37+
* Clarified details about conda install instructions in `Quick start quide` and `README` [#2446](https://github.com/IntelPython/dpnp/pull/2446)
38+
* Bumped oneMKL version up to `0.7` [#2448](https://github.com/IntelPython/dpnp/pull/2448)
39+
* The parameter `axis` in `dpnp.take_along_axis` function has now a default value of `-1` [#2442](https://github.com/IntelPython/dpnp/pull/2442)
40+
* Updates the list of required python versions documented in `Quick Start Guide` [#2449](https://github.com/IntelPython/dpnp/pull/2449)
41+
* Updated FFT module to ensure an input array is Hermitian before calling complex-to-real FFT [#2444](https://github.com/IntelPython/dpnp/pull/2444)
3142

3243
### Fixed
3344

CMakeLists.txt

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -75,27 +75,60 @@ option(DPNP_USE_ONEMKL_INTERFACES
7575
"Build DPNP with oneMKL Interfaces"
7676
OFF
7777
)
78+
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
79+
7880
set(_dpnp_sycl_targets)
81+
set(_use_onemkl_interfaces OFF)
7982
set(_use_onemkl_interfaces_cuda OFF)
83+
set(_use_onemkl_interfaces_hip OFF)
84+
85+
set(_dpnp_sycl_target_compile_options)
86+
set(_dpnp_sycl_target_link_options)
87+
8088
if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
81-
if(DPNP_TARGET_CUDA)
82-
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
83-
set(_use_onemkl_interfaces_cuda ON)
84-
else()
85-
if(DEFINED ENV{DPNP_TARGET_CUDA})
86-
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
87-
set(_use_onemkl_interfaces_cuda ON)
88-
endif()
89-
endif()
89+
if(DPNP_TARGET_CUDA)
90+
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
91+
set(_use_onemkl_interfaces_cuda ON)
92+
endif()
93+
94+
if (NOT "x${HIP_TARGETS}" STREQUAL "x")
95+
set(_use_onemkl_interfaces_hip ON)
96+
97+
if ("x${_dpnp_sycl_targets}" STREQUAL "x")
98+
set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},spir64-unknown-unknown")
99+
else()
100+
set(_dpnp_sycl_targets "amd_gpu_${HIP_TARGETS},${_dpnp_sycl_targets}")
101+
endif()
102+
endif()
90103
else()
91-
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
104+
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
105+
106+
if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda")
107+
set(_use_onemkl_interfaces_cuda ON)
108+
endif()
109+
110+
if ("${DPNP_SYCL_TARGETS}" MATCHES "amd_gpu_")
111+
set(_use_onemkl_interfaces_hip ON)
112+
113+
if ("x${HIP_TARGETS}" STREQUAL "x")
114+
message(FATAL_ERROR "HIP_TARGETS must be specified when using HIP backend")
115+
endif()
116+
endif()
117+
118+
if ("${DPNP_SYCL_TARGETS}" MATCHES "amdgcn-amd-amdhsa")
119+
message(FATAL_ERROR
120+
"Legacy target 'amdgcn-amd-amdhsa' is not supported. "
121+
"Use alias form 'amd_gpu_<arch>' instead"
122+
)
123+
endif()
92124
endif()
93125

94-
if(_dpnp_sycl_targets)
126+
if (_dpnp_sycl_targets)
95127
message(STATUS "Compiling for -fsycl-targets=${_dpnp_sycl_targets}")
128+
list(APPEND _dpnp_sycl_target_compile_options -fsycl-targets=${_dpnp_sycl_targets})
129+
list(APPEND _dpnp_sycl_target_link_options -fsycl-targets=${_dpnp_sycl_targets})
96130
endif()
97131

98-
set(_use_onemkl_interfaces OFF)
99132
if(DPNP_USE_ONEMKL_INTERFACES)
100133
set(_use_onemkl_interfaces ON)
101134
else()
@@ -107,26 +140,33 @@ endif()
107140
if(_use_onemkl_interfaces)
108141
set(BUILD_FUNCTIONAL_TESTS False)
109142
set(BUILD_EXAMPLES False)
143+
set(ENABLE_MKLGPU_BACKEND True)
144+
set(ENABLE_MKLCPU_BACKEND True)
145+
110146
if(_use_onemkl_interfaces_cuda)
111147
set(ENABLE_CUBLAS_BACKEND True)
112148
set(ENABLE_CUSOLVER_BACKEND True)
113149
set(ENABLE_CUFFT_BACKEND True)
114150
# set(ENABLE_CURAND_BACKEND True)
115-
set(ENABLE_MKLGPU_BACKEND True)
116-
set(ENABLE_MKLCPU_BACKEND True)
151+
endif()
152+
if(_use_onemkl_interfaces_hip)
153+
set(ENABLE_ROCBLAS_BACKEND True)
154+
set(ENABLE_ROCSOLVER_BACKEND True)
155+
set(ENABLE_ROCFFT_BACKEND True)
156+
# set(ENABLE_ROCRAND_BACKEND True)
117157
endif()
118158

119159
if(DPNP_ONEMKL_INTERFACES_DIR)
120-
FetchContent_Declare(onemkl_interfaces_library SOURCE_DIR "${DPNP_ONEMKL_INTERFACES_DIR}")
160+
FetchContent_Declare(onemath_library SOURCE_DIR "${DPNP_ONEMKL_INTERFACES_DIR}")
121161
else()
122162
FetchContent_Declare(
123-
onemkl_interfaces_library
163+
onemath_library
124164
GIT_REPOSITORY https://github.com/uxlfoundation/oneMath.git
125-
GIT_TAG 8f4312ef966420b9b8b4b82b9d5c22e2c91a1fe7 # v0.6
165+
GIT_TAG 20ba6fd7ae4af6ed693246cfd22c343e6522edbe # v0.7
126166
)
127167
endif()
128168

129-
FetchContent_MakeAvailable(onemkl_interfaces_library)
169+
FetchContent_MakeAvailable(onemath_library)
130170
if(TARGET onemath)
131171
set(MKL_INTERFACES_LIB "onemath" CACHE INTERNAL "OneMath lib target")
132172
elseif(TARGET onemkl)
@@ -136,6 +176,13 @@ if(_use_onemkl_interfaces)
136176
endif()
137177
message(STATUS "MKL interfaces lib target used: ${MKL_INTERFACES_LIB}")
138178
set(CMAKE_INSTALL_RPATH "${CMAKE_BINARY_DIR}/lib")
179+
else()
180+
if(_use_onemkl_interfaces_cuda OR _use_onemkl_interfaces_hip)
181+
message(FATAL_ERROR
182+
"CUDA or HIP targets are enabled, but oneMKL Interfaces are not. "
183+
"Please set DPNP_USE_ONEMKL_INTERFACES=ON to enable them."
184+
)
185+
endif()
139186
endif()
140187

141188
if(WIN32)

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ To get the library from the latest release, follow the instructions from
4141
To install `dpnp` from the Intel(R) conda channel, use the following command:
4242

4343
```bash
44-
conda install dpnp -c https://software.repos.intel.com/python/conda/ -c conda-forge
44+
conda install dpnp -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels
4545
```
4646

4747
## Pip
@@ -60,7 +60,7 @@ To try out the latest features, install `dpnp` using our development channel on
6060
Anaconda cloud:
6161

6262
```bash
63-
conda install dpnp -c dppy/label/dev -c https://software.repos.intel.com/python/conda/ -c conda-forge
63+
conda install dpnp -c dppy/label/dev -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels
6464
```
6565

6666

doc/quick_start_guide.rst

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Follow device driver installation instructions to complete the step.
2424
Python Interpreter
2525
==================
2626

27-
You will need Python 3.8, 3.9, or 3.10 installed on your system. If you
27+
You will need Python 3.9, 3.10, 3.11 or 3.12 installed on your system. If you
2828
do not have one yet the easiest way to do that is to install
2929
`Intel Distribution for Python*`_. It installs all essential Python numerical
3030
and machine learning packages optimized for the Intel hardware, including
@@ -42,14 +42,20 @@ Install Package from Intel(R) channel
4242

4343
You will need one of the commands below:
4444

45-
* Conda: ``conda install dpnp -c https://software.repos.intel.com/python/conda/ -c conda-forge``
45+
* Conda: ``conda install dpnp -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels``
4646

4747
* Pip: ``python -m pip install --index-url https://software.repos.intel.com/python/pypi dpnp``
4848

4949
These commands install dpnp package along with its dependencies, including
5050
``dpctl`` package with `Data Parallel Control Library`_ and all required
5151
compiler runtimes and OneMKL.
5252

53+
.. warning::
54+
Packages from the Intel channel are meant to be used together with dependencies from the **conda-forge** channel, and might not
55+
work correctly when used in an environment where packages from the ``anaconda`` default channel have been installed. It is
56+
advisable to use the `miniforge <https://github.com/conda-forge/miniforge>`__ installer for ``conda``/``mamba``, as it comes with
57+
``conda-forge`` as the only default channel.
58+
5359
.. note::
5460
Before installing with conda or pip it is strongly advised to update ``conda`` and ``pip`` to latest versions
5561

@@ -68,7 +74,7 @@ And to build dpnp package from the sources:
6874

6975
.. code-block:: bash
7076
71-
conda build conda-recipe -c https://software.repos.intel.com/python/conda/ -c conda-forge
77+
conda build conda-recipe -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels
7278
7379
Finally, to install the result package:
7480

@@ -90,7 +96,7 @@ On Linux:
9096
9197
conda create -n build-env dpctl cython dpcpp_linux-64 mkl-devel-dpcpp tbb-devel \
9298
onedpl-devel cmake scikit-build ninja pytest intel-gpu-ocl-icd-system \
93-
-c dppy/label/dev -c https://software.repos.intel.com/python/conda/ -c conda-forge
99+
-c dppy/label/dev -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels
94100
conda activate build-env
95101
96102
On Windows:
@@ -99,7 +105,7 @@ On Windows:
99105
100106
conda create -n build-env dpctl cython dpcpp_win-64 mkl-devel-dpcpp tbb-devel \
101107
onedpl-devel cmake scikit-build ninja pytest intel-gpu-ocl-icd-system \
102-
-c dppy/label/dev -c https://software.repos.intel.com/python/conda/ -c conda-forge
108+
-c dppy/label/dev -c https://software.repos.intel.com/python/conda/ -c conda-forge --override-channels
103109
conda activate build-env
104110
105111
To build and install the package on Linux OS, run:
@@ -130,18 +136,52 @@ Building ``dpnp`` for these targets requires that these CodePlay plugins be inst
130136
installation layout of compatible version. The following plugins from CodePlay are supported:
131137

132138
- `oneAPI for NVIDIA(R) GPUs <codeplay_nv_plugin_>`_
139+
- `oneAPI for AMD GPUs <codeplay_amd_plugin_>`_
133140

134141
.. _codeplay_nv_plugin: https://developer.codeplay.com/products/oneapi/nvidia/
142+
.. _codeplay_amd_plugin: https://developer.codeplay.com/products/oneapi/amd/
135143

136144
Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
137145
<https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_
138146

139-
Build ``dpnp`` as follows:
147+
``dpnp`` can be built for CUDA devices as follows:
140148

141149
.. code-block:: bash
142150
143151
python scripts/build_locally.py --target=cuda
144152
153+
And for AMD devices:
154+
155+
.. code-block:: bash
156+
157+
python scripts/build_locally.py --target-hip=<arch>
158+
159+
Note that the *oneAPI for AMD GPUs* plugin requires the architecture be specified and only
160+
one architecture can be specified at a time.
161+
162+
To determine the architecture code (``<arch>``) for your AMD GPU, run:
163+
164+
.. code-block:: bash
165+
166+
rocminfo | grep 'Name: *gfx.*'
167+
168+
This will print names like ``gfx90a``, ``gfx1030``, etc.
169+
You can then use one of them as the argument to ``--target-hip``.
170+
171+
For example:
172+
173+
.. code-block:: bash
174+
python scripts/build_locally.py --target-hip=gfx90a
175+
176+
177+
It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
178+
architecture all at once:
179+
180+
.. code-block:: bash
181+
182+
python scripts/build_locally.py --target=cuda --target-hip=gfx90a
183+
184+
145185
Testing
146186
=======
147187

dpnp/backend/extensions/blas/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,12 @@ if(_dpnp_sycl_targets)
4040
target_compile_options(
4141
${python_module_name}
4242
PRIVATE
43-
-fsycl-targets=${_dpnp_sycl_targets}
43+
${_dpnp_sycl_target_compile_options}
4444
)
4545
target_link_options(
4646
${python_module_name}
4747
PRIVATE
48-
-fsycl-targets=${_dpnp_sycl_targets}
48+
${_dpnp_sycl_target_link_options}
4949
)
5050
endif()
5151

dpnp/backend/extensions/common/ext/common.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,27 @@ struct IsNan
106106
}
107107
};
108108

109+
template <typename T, bool hasValueType>
110+
struct value_type_of_impl;
111+
112+
template <typename T>
113+
struct value_type_of_impl<T, false>
114+
{
115+
using type = T;
116+
};
117+
118+
template <typename T>
119+
struct value_type_of_impl<T, true>
120+
{
121+
using type = typename T::value_type;
122+
};
123+
124+
template <typename T>
125+
using value_type_of = value_type_of_impl<T, type_utils::is_complex_v<T>>;
126+
127+
template <typename T>
128+
using value_type_of_t = typename value_type_of<T>::type;
129+
109130
size_t get_max_local_size(const sycl::device &device);
110131
size_t get_max_local_size(const sycl::device &device,
111132
int cpu_local_size_limit,

0 commit comments

Comments
 (0)