|
1 | 1 | # |
2 | 2 | # Encapsulates building FFTW as an External Project. |
3 | 3 | # |
4 | | -# NOTE: internal building of fftw is for convenience, |
5 | | -# and the version of fftw built here does not |
6 | | -# use modern hardware optimzations. |
| 4 | +# SIMD codelet selection |
| 5 | +# ---------------------- |
| 6 | +# FFTW SIMD codelets are hand-written assembly routines baked into the |
| 7 | +# library at compile time. Passing -march=native to the ITK build does |
| 8 | +# NOT activate them; they must be requested explicitly via FFTW's own |
| 9 | +# CMake options (ENABLE_NEON, ENABLE_SSE, ENABLE_SSE2, ENABLE_AVX, ENABLE_AVX2). |
7 | 10 | # |
8 | | -# The build configuration chosen to be |
9 | | -# generalizable to as many hardware platforms. |
10 | | -# Being backward compatible for decades |
11 | | -# old hardware is the goal of this internal |
12 | | -# representation. |
| 11 | +# This file detects appropriate defaults at cmake configure time: |
13 | 12 | # |
14 | | -# This is primarily used to support testing |
15 | | -# and should not be used for production |
16 | | -# builds where performance is a concern. |
| 13 | +# Native builds (CMAKE_CROSSCOMPILING is false): |
| 14 | +# - ARM64 (aarch64/arm64/ARM64): NEON=ON (mandatory in ARMv8); x86 SIMD off. |
| 15 | +# - x86/x86_64 with GCC/Clang: each of SSE, SSE2, AVX, AVX2 is probed |
| 16 | +# individually via __builtin_cpu_supports() / CheckCSourceRuns so that |
| 17 | +# the detected flags match the actual build-host CPU. A pre-AVX |
| 18 | +# Sandy Bridge gets SSE+SSE2 only; a Haswell or later gets all four. |
| 19 | +# On MSVC the probes are skipped (intrinsic unavailable) and SIMD |
| 20 | +# defaults to off; users can override via FFTW_ENABLE_* options. |
| 21 | +# - Other architectures: all SIMD off (conservative fallback). |
| 22 | +# |
| 23 | +# Cross-compiled builds (CMAKE_CROSSCOMPILING is true): |
| 24 | +# - ARM64: NEON=ON (mandatory); x86 SIMD off. |
| 25 | +# - x86_64: SSE+SSE2 only (baseline; AVX/AVX2 not assumed for target). |
| 26 | +# - Other: all SIMD off. |
| 27 | +# |
| 28 | +# Every flag is an individually overridable cache option, e.g.: |
| 29 | +# cmake -DFFTW_ENABLE_AVX2=OFF ... |
17 | 30 | # |
18 | 31 | # These instructions follow the guidance provided for modern cmake usage as described: |
19 | 32 | # https://github.com/dev-cafe/cmake-cookbook/blob/master/chapter-08/recipe-03/c-example/external/upstream/fftw3/CMakeLists.txt |
@@ -64,6 +77,74 @@ if(NOT ITK_USE_SYSTEM_FFTW) |
64 | 77 |
|
65 | 78 | set(FFTW_STAGED_INSTALL_PREFIX "${ITK_BINARY_DIR}/fftw") |
66 | 79 |
|
| 80 | + # Detect SIMD defaults (see file header for full policy description). |
| 81 | + # CheckCSourceRuns results are cached after the first cmake configure run. |
| 82 | + include(CheckCSourceRuns) |
| 83 | + |
| 84 | + set(_fftw_default_neon OFF) |
| 85 | + set(_fftw_default_sse OFF) |
| 86 | + set(_fftw_default_sse2 OFF) |
| 87 | + set(_fftw_default_avx OFF) |
| 88 | + set(_fftw_default_avx2 OFF) |
| 89 | + |
| 90 | + if(NOT CMAKE_CROSSCOMPILING) |
| 91 | + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") |
| 92 | + # NEON is mandatory in ARMv8/AArch64 — every arm64 CPU has it. |
| 93 | + set(_fftw_default_neon ON) |
| 94 | + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i686") |
| 95 | + # Probe each x86 SIMD level individually via CPUID so the defaults |
| 96 | + # are accurate for the actual build-host CPU (e.g. pre-AVX Sandy Bridge |
| 97 | + # or pre-AVX2 Ivy Bridge get only the levels their hardware supports). |
| 98 | + # __builtin_cpu_supports is a GCC/Clang intrinsic; skip on MSVC. |
| 99 | + if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|AppleClang") |
| 100 | + foreach(_fftw_simd IN ITEMS sse sse2 avx avx2) |
| 101 | + check_c_source_runs( |
| 102 | + "int main(void){return __builtin_cpu_supports(\"${_fftw_simd}\")?0:1;}" |
| 103 | + _fftw_cpu_has_${_fftw_simd} |
| 104 | + ) |
| 105 | + if(_fftw_cpu_has_${_fftw_simd}) |
| 106 | + set(_fftw_default_${_fftw_simd} ON) |
| 107 | + endif() |
| 108 | + endforeach() |
| 109 | + endif() |
| 110 | + endif() |
| 111 | + else() |
| 112 | + # Cross-compiling: conservative architecture-level fallback. |
| 113 | + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") |
| 114 | + set(_fftw_default_neon ON) |
| 115 | + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64") |
| 116 | + # SSE/SSE2 are baseline on all 64-bit x86 CPUs; AVX/AVX2 not assumed. |
| 117 | + set(_fftw_default_sse ON) |
| 118 | + set(_fftw_default_sse2 ON) |
| 119 | + endif() |
| 120 | + endif() |
| 121 | + |
| 122 | + option( |
| 123 | + FFTW_ENABLE_NEON |
| 124 | + "Enable FFTW NEON SIMD codelets (ARM64)" |
| 125 | + ${_fftw_default_neon} |
| 126 | + ) |
| 127 | + option( |
| 128 | + FFTW_ENABLE_SSE |
| 129 | + "Enable FFTW SSE SIMD codelets (x86)" |
| 130 | + ${_fftw_default_sse} |
| 131 | + ) |
| 132 | + option( |
| 133 | + FFTW_ENABLE_SSE2 |
| 134 | + "Enable FFTW SSE2 SIMD codelets (x86)" |
| 135 | + ${_fftw_default_sse2} |
| 136 | + ) |
| 137 | + option( |
| 138 | + FFTW_ENABLE_AVX |
| 139 | + "Enable FFTW AVX SIMD codelets (x86)" |
| 140 | + ${_fftw_default_avx} |
| 141 | + ) |
| 142 | + option( |
| 143 | + FFTW_ENABLE_AVX2 |
| 144 | + "Enable FFTW AVX2 SIMD codelets (x86)" |
| 145 | + ${_fftw_default_avx2} |
| 146 | + ) |
| 147 | + |
67 | 148 | # Macro to generate library filename with appropriate prefix/suffix |
68 | 149 | # Args: output_var library_base_name |
69 | 150 | macro(_library_name_to_filename output_var library_base_name) |
@@ -111,10 +192,12 @@ if(NOT ITK_USE_SYSTEM_FFTW) |
111 | 192 | -DCMAKE_INSTALL_PREFIX:PATH=${FFTW_STAGED_INSTALL_PREFIX} |
112 | 193 | -DCMAKE_INSTALL_LIBDIR:STRING=${CMAKE_INSTALL_LIBDIR} |
113 | 194 | -DCMAKE_INSTALL_BINDIR:STRING=${CMAKE_INSTALL_BINDIR} |
114 | | - -DDISABLE_FORTRAN:BOOL=ON -DENABLE_AVX:BOOL=OFF -DENABLE_AVX2:BOOL=OFF |
115 | | - -DENABLE_FLOAT:BOOL=ON -DENABLE_LONG_DOUBLE:BOOL=OFF |
| 195 | + -DDISABLE_FORTRAN:BOOL=ON -DENABLE_AVX:BOOL=${FFTW_ENABLE_AVX} |
| 196 | + -DENABLE_AVX2:BOOL=${FFTW_ENABLE_AVX2} -DENABLE_FLOAT:BOOL=ON |
| 197 | + -DENABLE_LONG_DOUBLE:BOOL=OFF -DENABLE_NEON:BOOL=${FFTW_ENABLE_NEON} |
116 | 198 | -DENABLE_OPENMP:BOOL=OFF -DENABLE_QUAD_PRECISION:BOOL=OFF |
117 | | - -DENABLE_SSE:BOOL=OFF -DENABLE_SSE2:BOOL=OFF -DENABLE_THREADS:BOOL=ON |
| 199 | + -DENABLE_SSE:BOOL=${FFTW_ENABLE_SSE} |
| 200 | + -DENABLE_SSE2:BOOL=${FFTW_ENABLE_SSE2} -DENABLE_THREADS:BOOL=ON |
118 | 201 | -DCMAKE_APPLE_SILICON_PROCESSOR:STRING=${CMAKE_APPLE_SILICON_PROCESSOR} |
119 | 202 | -DCMAKE_C_COMPILER_LAUNCHER:PATH=${CMAKE_C_COMPILER_LAUNCHER} |
120 | 203 | -DCMAKE_C_COMPILER:PATH=${CMAKE_C_COMPILER} |
@@ -175,10 +258,12 @@ if(NOT ITK_USE_SYSTEM_FFTW) |
175 | 258 | -DCMAKE_INSTALL_PREFIX:PATH=${FFTW_STAGED_INSTALL_PREFIX} |
176 | 259 | -DCMAKE_INSTALL_LIBDIR:STRING=${CMAKE_INSTALL_LIBDIR} |
177 | 260 | -DCMAKE_INSTALL_BINDIR:STRING=${CMAKE_INSTALL_BINDIR} |
178 | | - -DDISABLE_FORTRAN:BOOL=ON -DENABLE_AVX:BOOL=OFF -DENABLE_AVX2:BOOL=OFF |
179 | | - -DENABLE_FLOAT:BOOL=OFF -DENABLE_LONG_DOUBLE:BOOL=OFF |
| 261 | + -DDISABLE_FORTRAN:BOOL=ON -DENABLE_AVX:BOOL=${FFTW_ENABLE_AVX} |
| 262 | + -DENABLE_AVX2:BOOL=${FFTW_ENABLE_AVX2} -DENABLE_FLOAT:BOOL=OFF |
| 263 | + -DENABLE_LONG_DOUBLE:BOOL=OFF -DENABLE_NEON:BOOL=${FFTW_ENABLE_NEON} |
180 | 264 | -DENABLE_OPENMP:BOOL=OFF -DENABLE_QUAD_PRECISION:BOOL=OFF |
181 | | - -DENABLE_SSE:BOOL=OFF -DENABLE_SSE2:BOOL=OFF -DENABLE_THREADS:BOOL=ON |
| 265 | + -DENABLE_SSE:BOOL=${FFTW_ENABLE_SSE} |
| 266 | + -DENABLE_SSE2:BOOL=${FFTW_ENABLE_SSE2} -DENABLE_THREADS:BOOL=ON |
182 | 267 | -DCMAKE_APPLE_SILICON_PROCESSOR:STRING=${CMAKE_APPLE_SILICON_PROCESSOR} |
183 | 268 | -DCMAKE_C_COMPILER_LAUNCHER:PATH=${CMAKE_C_COMPILER_LAUNCHER} |
184 | 269 | -DCMAKE_C_COMPILER:PATH=${CMAKE_C_COMPILER} |
|
0 commit comments