xtensor-stack
diff --git a/‎.gitignore‎
Lines changed: 6 additions & 0 deletions b/‎.gitignore‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 79 additions & 26 deletions b/‎CMakeLists.txt‎
Lines changed: 79 additions & 26 deletions
diff --git a/‎README.md‎
Lines changed: 80 additions & 4 deletions b/‎README.md‎
Lines changed: 80 additions & 4 deletions
diff --git a/‎bench/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎bench/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
@@ -33,3 +33,9 @@
 *.exe
 *.out
 *.app
+
+# cmake
+build
+
+# vscode
+.vscode
@@ -15,7 +15,8 @@ cmake_minimum_required(VERSION 3.1.3)  # 3.1.3 for set(CMAKE_CXX_STANDARD 14)
 project(xtensor-fftw)
 
 set(XTENSOR_FFTW_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
-
+set(FFTW_INCLUDE_CUSTOM_DIRS "" CACHE STRING "Set the FFTW include dir without the requirement of FFTW installation.")
+set(FFTW_LINK_FLAGS "" CACHE STRING "Set the CXX library to link, e.g.: -L/usr/local -lfftw3")
 #--------------------------------------- versioning
 
 file(STRINGS "${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/xtensor-fftw_config.hpp" xtensor-fftw_version_defines
@@ -46,6 +47,35 @@ OPTION(FIX_RPATH "Correctly set rpath for the linker" OFF)
 OPTION(DEFAULT_COLUMN_MAJOR "Set xtensor default layout to column major. This is currently not supported, since FFTW demands row major layout." OFF)
 OPTION(COVERAGE "Enable coverage compile flags (gcc only!)" OFF)
 OPTION(DISABLE_EXCEPTIONS "Disable C++ exceptions" OFF)
+OPTION(FFTW_USE_FLOAT "Enable FFTW Float type" ON)
+OPTION(FFTW_USE_DOUBLE "Enable FFTW Double type" ON)
+OPTION(FFTW_USE_LONG_DOUBLE "Enable FFTW Long Double type" ON)
+
+if(FFTW_USE_FLOAT)
+    add_definitions(-DXTENSOR_FFTW_USE_FLOAT)
+    set(REQUIRE_FLOAT_LIB "FLOAT_LIB")
+else()
+    set(REQUIRE_FLOAT_LIB "")
+endif()
+
+if(FFTW_USE_DOUBLE)
+    add_definitions(-DXTENSOR_FFTW_USE_DOUBLE)
+    set(REQUIRE_DOUBLE_LIB "DOUBLE_LIB")
+else()
+    set(REQUIRE_DOUBLE_LIB "")
+endif()
+
+if(FFTW_USE_LONG_DOUBLE AND NOT MSVC)
+    add_definitions(-DXTENSOR_FFTW_USE_LONG_DOUBLE)
+    set(REQUIRE_LONG_DOUBLE_LIB "LONGDOUBLE_LIB")
+else()
+    set(REQUIRE_LONG_DOUBLE_LIB "")
+    set(FFTW_USE_LONG_DOUBLE OFF)
+endif()
+
+if(NOT REQUIRE_FLOAT_LIB AND NOT REQUIRE_DOUBLE_LIB AND NOT REQUIRE_LONG_DOUBLE_LIB) 
+    message(FATAL_ERROR "Please, select at least one of the available FFTW type libraries")
+endif()
 
 if (COVERAGE)
     if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU")
@@ -65,21 +95,6 @@ set(CMAKE_CXX_EXTENSIONS NO)
 # .. our own
 include_directories(${XTENSOR_FFTW_INCLUDE_DIR})
 
-# .. xtensor
-set(xtensor_REQUIRED_VERSION 0.20.9)
-if(TARGET xtensor)
-    set(xtensor_VERSION ${XTENSOR_VERSION_MAJOR}.${XTENSOR_VERSION_MINOR}.${XTENSOR_VERSION_PATCH})
-    # Note: This is not SEMVER compatible comparison
-    if( NOT ${xtensor_VERSION} VERSION_GREATER_EQUAL ${xtensor_REQUIRED_VERSION})
-        message(ERROR "Mismatch xtensor versions. Found '${xtensor_VERSION}' but requires: '${xtensor_REQUIRED_VERSION}'")
-    else()
-        message(STATUS "Found xtensor v${xtensor_VERSION}")
-    endif()
-else()
-    find_package(xtensor ${xtensor_REQUIRED_VERSION} REQUIRED)
-    message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor")
-endif()
-
 # .. xtl
 set(xtl_REQUIRED_VERSION 0.6.9)
 if(TARGET xtl)
@@ -95,18 +110,51 @@ else()
     message(STATUS "Found xtl: ${xtl_INCLUDE_DIRS}/xtl")
 endif()
 
+# .. xtensor
+set(xtensor_REQUIRED_VERSION 0.20.9)
+if(TARGET xtensor)
+    set(xtensor_VERSION ${XTENSOR_VERSION_MAJOR}.${XTENSOR_VERSION_MINOR}.${XTENSOR_VERSION_PATCH})
+    # Note: This is not SEMVER compatible comparison
+    if( NOT ${xtensor_VERSION} VERSION_GREATER_EQUAL ${xtensor_REQUIRED_VERSION})
+        message(ERROR "Mismatch xtensor versions. Found '${xtensor_VERSION}' but requires: '${xtensor_REQUIRED_VERSION}'")
+    else()
+        message(STATUS "Found xtensor v${xtensor_VERSION}")
+    endif()
+else()
+    find_package(xtensor ${xtensor_REQUIRED_VERSION} REQUIRED)
+    message(STATUS "Found xtensor: ${xtensor_INCLUDE_DIRS}/xtensor")
+endif()
+
 # .. fftw
-if(MSVC)
-    # no long double component, since in the Windows conda-forge build it is not available
-    # and the "official" prebuilt long double library can only be used from MinGW
-    find_package(FFTW REQUIRED
-            COMPONENTS FLOAT_LIB DOUBLE_LIB)
-    add_definitions(-DFFTW_NO_LONGDOUBLE)
+if(NOT "${FFTW_INCLUDE_CUSTOM_DIRS}" STREQUAL "")
+    include_directories(${FFTW_INCLUDE_CUSTOM_DIRS})
 else()
-    find_package(FFTW REQUIRED
-            COMPONENTS FLOAT_LIB DOUBLE_LIB LONGDOUBLE_LIB)
-endif(MSVC)
-include_directories(${FFTW_INCLUDE_DIRS})
+    if(MSVC)
+        # no long double component, since in the Windows conda-forge build it is not available
+        # and the "official" prebuilt long double library can only be used from MinGW
+        find_package(FFTW REQUIRED
+                COMPONENTS ${REQUIRE_FLOAT_LIB} ${REQUIRE_DOUBLE_LIB})
+        add_definitions(-DFFTW_NO_LONGDOUBLE)
+    else(MSVC)
+        find_package(FFTW REQUIRED
+                COMPONENTS ${REQUIRE_FLOAT_LIB} ${REQUIRE_DOUBLE_LIB} ${REQUIRE_LONG_DOUBLE_LIB})
+    endif()
+    include_directories(${FFTW_INCLUDE_DIRS})
+
+    # link only with selected libraries
+    set(FFTW_LINK_FLAGS "")
+    if(FFTW_USE_FLOAT)
+        set(FFTW_LINK_FLAGS ${FFTW_FLOAT_LIB})
+    endif()
+
+    if(FFTW_USE_DOUBLE)
+        set(FFTW_LINK_FLAGS ${FFTW_LINK_FLAGS} ${FFTW_DOUBLE_LIB})
+    endif()
+
+    if(FFTW_USE_LONG_DOUBLE)
+        set(FFTW_LINK_FLAGS ${FFTW_LINK_FLAGS} ${FFTW_LONGDOUBLE_LIB})
+    endif()
+endif()
 
 # warnings (gcc and clang)
 if (COMPILE_WARNINGS)
@@ -129,7 +177,12 @@ endif(FIX_RPATH)
 #--------------------------------------- library contents
 
 set(XTENSOR_FFTW_HEADERS
+        ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/basic_double.hpp
+        ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/basic_float.hpp
+        ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/basic_long_double.hpp
+        ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/basic_option.hpp
         ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/basic.hpp
+        ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/common.hpp
         ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/helper.hpp
         ${XTENSOR_FFTW_INCLUDE_DIR}/xtensor-fftw/xtensor-fftw_config.hpp
 )
 
@@ -58,6 +58,25 @@ _xtensor-fftw_ is a header-only library.
 To use, include one of the header files in the `include` directory, e.g. `xtensor-fftw/basic.hpp`, in your c++ code.
 To compile, one should also include the paths to the FFTW header and libraries and link to the appropriate FFTW library.
 
+FFTW allows three modes of calculus : `float`, `double` and `long double`.  
+The impact of the precision type can be see below in the benchmark results.  
+Use the following matrix to include, compile and link the right target:  
+
+| `#include`                         | `precision types`          | `xtensor-fftw compile options`    | `FFTW compile options`  |
+|------------------------------------|----------------------------|-----------------------------------|-------------------------|
+| xtensor-fftw/basic_float.hpp       | float                      | -DXTENSOR_FFTW_USE_FLOAT=ON       | -DENABLE_FLOAT=ON       |
+| xtensor-fftw/basic_double.hpp      | double                     | -DXTENSOR_FFTW_USE_DOUBLE=ON      | -DENABLE_DOUBLE=ON      |
+| xtensor-fftw/basic_long_double.hpp | long double                | -DXTENSOR_FFTW_USE_LONG_DOUBLE=ON | -DENABLE_LONGDOUBLE=ON  |
+| xtensor-fftw/basic_option.hpp      | depends by compile options | subset of above options           | subset of above options |
+| xtensor-fftw/basic.hpp             | all types                  | no option                         | all above options       |
+
+Specify only the required precision type to reduce the dependencies size of your application (for example for a Mobile App it matters), in fact FFTW needs to compile a specific library for each precision thus creating:
+* `libfftw3f` for float precision
+* `libfftw3` for double precision
+* `libfftw3l` for long double precision
+
+>__*Notes*__: FFTW allow SIMD instructions (SSE,SSE2,AVX,AVX2), OpenMP and Threads optimizations. Take a look to the availables options before compile it.  
+
 The functions in `xtensor-fftw/basic.hpp` mimic the behavior of `numpy.fft` as much as possible.
 In most cases transforms on identical input data should produce identical results within reasonable machine precision error bounds.
 However, there are a few differences that one should keep in mind:
@@ -126,11 +145,10 @@ What follows are instructions for compiling and running the _xtensor-fftw_ tests
 These also serve as an example of how to do build your own code using _xtensor-fftw_ (excluding the GoogleTest specific parts).
 
 ### Dependencies for building tests
+
 The main dependency is a version of FFTW 3.
-For the tests, we need the floating point version which is enabled in the FFTW configuration step using:
-```bash
-./configure --enable-float
-```
+To enable all the precision types, FFTW must be compiled with the related flags:  
+`cmake -DENABLE_FLOAT:BOOL=ON -DENABLE_LONGDOUBLE:BOOL=ON /path/of/fftw3-src`
 
 CMake and _xtensor_ must also be installed in order to compile the _xtensor-fftw_ tests.
 Both can either be installed through Conda or built/installed manually.
@@ -179,6 +197,64 @@ cd test
 ./test_xtensor-fftw
 ```
 
+## Advanced Setting
+
+This section shows how to configure `cmake` in order to exploit advanced settings.  
+
+### Use only Double precision
+
+After a standard installation of FFTW library without specify a particular options, this command allow to run Test and Benchmarks using only `double` precision: 
+
+```cmake
+cmake -DBUILD_BENCHMARK=ON -DDOWNLOAD_GBENCH=ON -DBUILD_TESTS=ON -DDOWNLOAD_GTEST=ON -DFFTW_USE_FLOAT=OFF  -DFFTW_USE_LONG_DOUBLE=OFF -DFFTW_USE_DOUBLE=ON  -DCMAKE_BUILD_TYPE=Release  ..
+```
+
+Let's see what `./bench/benchmark_xtensor-fftw` produce:
+
+```
+Run on (16 X 2300 MHz CPU s)
+-------------------------------------------------------------------------------
+Benchmark                                        Time           CPU Iterations
+-------------------------------------------------------------------------------
+rfft1Dxarray_double/TransformAndInvert         66375 ns      66354 ns      10149
+rfft1Dxarray_double/TransformAndInvert_nD      70856 ns      70829 ns      10128
+rfft2Dxarray_double/TransformAndInvert         61264 ns      61256 ns      11456
+rfft2Dxarray_double/TransformAndInvert_nD      62297 ns      62269 ns      10851
+```
+
+### Manually specify FFTW headers and link flags
+
+This can be very useful: in this case FFTW is not required to be installed, just compiled.  
+The following command produce the same results as before:  
+
+```cmake
+cmake -DBUILD_BENCHMARK=ON -DDOWNLOAD_GBENCH=ON -DBUILD_TESTS=ON -DDOWNLOAD_GTEST=ON -DFFTW_USE_FLOAT=OFF -DFFTW_USE_LONG_DOUBLE=OFF -DFFTW_USE_DOUBLE=ON -DFFTW_INCLUDE_CUSTOM_DIRS=/path/to/fftw3/api -DFFTW_LINK_FLAGS="-L/path/to/fftw3/build -lfftw3" ..
+```
+
+### Use Intel MKL
+
+Since 2018 Intel has release a version of his famous MKL (Math Kernel Library) with a C++ and Fortran wrapper of FFTW.  
+Once MKL (or oneAPI MKL) installed on the system enter the following command with adjusted path to your system:
+
+```cmake
+cmake -DBUILD_BENCHMARK=ON -DDOWNLOAD_GBENCH=ON -DBUILD_TESTS=ON -DDOWNLOAD_GTEST=ON -DFFTW_USE_FLOAT=OFF  -DFFTW_USE_LONG_DOUBLE=OFF -DFFTW_USE_DOUBLE=ON -DFFTW_INCLUDE_CUSTOM_DIRS=/opt/intel/oneapi/mkl/2021.2.0/include/fftw -DFFTW_LINK_FLAGS="-L/opt/intel/oneapi/mkl/2021.2.0/lib -L/opt/intel/oneapi/compiler/2021.2.0/mac/compiler/lib -lmkl_core -lmkl_intel_thread -lmkl_intel_lp64 -liomp5" -DRUN_HAVE_STD_REGEX=0 -DCMAKE_BUILD_TYPE=Release ..
+```
+
+Let's see what `./bench/benchmark_xtensor-fftw` now produce:
+
+```
+Run on (16 X 2300 MHz CPU s)
+-------------------------------------------------------------------------------
+Benchmark                                        Time           CPU Iterations
+-------------------------------------------------------------------------------
+rfft1Dxarray_double/TransformAndInvert          9265 ns       9258 ns      58371
+rfft1Dxarray_double/TransformAndInvert_nD       9636 ns       9602 ns      73961
+rfft2Dxarray_double/TransformAndInvert         34428 ns      34427 ns      20216
+rfft2Dxarray_double/TransformAndInvert_nD      37401 ns      37393 ns      19480
+```
+
+>__*Note*__: Before running test or benchmark remember to export the intel library path, e.g. on OS X: `export DYLD_LIBRARY_PATH=/opt/intel/oneapi/mkl/2021.2.0/lib/:/opt/intel/oneapi/compiler/2021.2.0/mac/compiler/lib/`
+
 ## License
 
 We use a shared copyright model that enables all contributors to maintain the
 
@@ -101,6 +101,6 @@ add_executable(${XTENSOR_FFTW_TARGET} ${XTENSOR_FFTW_BENCHMARKS} ${XTENSOR_HEADE
 if(DOWNLOAD_GBENCH OR GBENCH_SRC_DIR)
     add_dependencies(${XTENSOR_FFTW_TARGET} benchmark)
 endif()
-target_link_libraries(${XTENSOR_FFTW_TARGET} ${benchmark_BOTH_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}  ${FFTW_FLOAT_LIB} ${FFTW_DOUBLE_LIB} ${FFTW_LONGDOUBLE_LIB})
+target_link_libraries(${XTENSOR_FFTW_TARGET} ${benchmark_BOTH_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${FFTW_LINK_FLAGS})
 
 add_custom_target(xbench COMMAND benchmark_xtensor-fftw DEPENDS ${XTENSOR_FFTW_TARGET})
-Original file line number
+Diff line change
 *.exe
 *.out
 *.app
++
 +# cmake
 +build
++
 +# vscode
 +.vscode