|
1 | 1 | # |
2 | 2 | # Encapsulates building FFTW as an External Project. |
3 | 3 | # |
4 | | -# SIMD codelet selection |
5 | | -# ---------------------- |
6 | | -# FFTW SIMD codelets are hand-written assembly routines baked into the |
7 | | -# library at compile time. Passing -march=native to the ITK build does |
8 | | -# NOT activate them; they must be requested explicitly via FFTW's own |
9 | | -# CMake options (ENABLE_NEON, ENABLE_SSE, ENABLE_SSE2, ENABLE_AVX, ENABLE_AVX2). |
| 4 | +# SIMD codelet selection and binary redistribution policy |
| 5 | +# ------------------------------------------------------- |
| 6 | +# FFTW SIMD codelets are hand-written assembly routines compiled INTO the |
| 7 | +# library at build time. Unlike -march=native on the ITK side, FFTW codelets |
| 8 | +# must be requested explicitly via FFTW's own CMake options |
| 9 | +# (ENABLE_NEON, ENABLE_SSE, ENABLE_SSE2, ENABLE_AVX, ENABLE_AVX2). |
| 10 | +# |
| 11 | +# For redistributable binary packages (conda, pip/PyPI, manylinux Docker |
| 12 | +# images, etc.) SIMD codelets must only be enabled when the resulting binary |
| 13 | +# will run correctly on ALL machines in the target distribution. The ISA |
| 14 | +# baseline mandated by each architecture ABI is universally safe: |
| 15 | +# |
| 16 | +# x86_64 / AMD64 : SSE and SSE2 are required by the AMD64 ABI. Every |
| 17 | +# 64-bit x86 CPU (including all manylinux2014 / |
| 18 | +# manylinux_2_28 targets) supports them. DEFAULT ON. |
10 | 19 | # |
11 | | -# This file detects appropriate defaults at cmake configure time: |
| 20 | +# aarch64 / arm64 : NEON is required by the AArch64 ABI. Every 64-bit ARM |
| 21 | +# CPU (Apple Silicon, all Linux aarch64 targets) supports |
| 22 | +# it. DEFAULT ON. |
12 | 23 | # |
13 | | -# Native builds (CMAKE_CROSSCOMPILING is false): |
14 | | -# - ARM64 (aarch64/arm64/ARM64): NEON=ON (mandatory in ARMv8); x86 SIMD off. |
15 | | -# - x86/x86_64 with GCC/Clang: each of SSE, SSE2, AVX, AVX2 is probed |
16 | | -# individually via __builtin_cpu_supports() / CheckCSourceRuns so that |
17 | | -# the detected flags match the actual build-host CPU. A pre-AVX |
18 | | -# Sandy Bridge gets SSE+SSE2 only; a Haswell or later gets all four. |
19 | | -# On MSVC the probes are skipped (intrinsic unavailable) and SIMD |
20 | | -# defaults to off; users can override via FFTW_ENABLE_* options. |
21 | | -# - Other architectures: all SIMD off (conservative fallback). |
| 24 | +# AVX / AVX2 : Not part of the baseline ABI; present only on Sandy |
| 25 | +# Bridge (2011) and Haswell (2013) and newer CPUs |
| 26 | +# respectively. Enabling them by default would produce |
| 27 | +# binaries that SIGILL on older (but spec-compliant) |
| 28 | +# x86_64 CPUs. DEFAULT OFF unless the compiler is already |
| 29 | +# targeting a micro-architecture that includes them. |
22 | 30 | # |
23 | | -# Cross-compiled builds (CMAKE_CROSSCOMPILING is true): |
24 | | -# - ARM64: NEON=ON (mandatory); x86 SIMD off. |
25 | | -# - x86_64: SSE+SSE2 only (baseline; AVX/AVX2 not assumed for target). |
26 | | -# - Other: all SIMD off. |
| 31 | +# Opt-in to AVX / AVX2 |
| 32 | +# --------------------- |
| 33 | +# If the user's toolchain is already generating AVX/AVX2 instructions |
| 34 | +# (because they passed -march=native, -mavx2, -march=haswell, or an |
| 35 | +# equivalent MSVC /arch: flag) the compiler pre-defines __AVX__ / __AVX2__. |
| 36 | +# This file detects those macros at cmake configure time via |
| 37 | +# check_c_source_compiles (compile-time, NOT runtime — no build-host CPU |
| 38 | +# probe is performed) and auto-enables the matching FFTW codelets so that |
| 39 | +# FFTW's generated code aligns with the rest of the ITK build. |
| 40 | +# Users who want AVX2 in a redistributed package can set: |
| 41 | +# cmake -DFFTW_ENABLE_AVX2=ON ... |
| 42 | +# |
| 43 | +# macOS universal binary |
| 44 | +# ---------------------- |
| 45 | +# When CMAKE_OSX_ARCHITECTURES lists more than one value (e.g. "arm64;x86_64") |
| 46 | +# a single FFTW configure/build pass cannot correctly serve both slices. |
| 47 | +# SIMD defaults are set to OFF in this case; use ITK_USE_SYSTEM_FFTW with a |
| 48 | +# proper universal FFTW installation (e.g., built with lipo) if SIMD |
| 49 | +# performance is required in a macOS universal build. |
27 | 50 | # |
28 | | -# Every flag is an individually overridable cache option, e.g.: |
29 | | -# cmake -DFFTW_ENABLE_AVX2=OFF ... |
| 51 | +# Every flag remains individually overridable, e.g.: |
| 52 | +# cmake -DFFTW_ENABLE_AVX2=ON # opt in to AVX2 for a non-redistributed build |
| 53 | +# cmake -DFFTW_ENABLE_SSE2=OFF # opt out of SSE2 (unusual) |
30 | 54 | # Note: option() defaults are only applied on the first configure. |
31 | 55 | # To re-detect after a toolchain change, delete the CMake cache or use |
32 | 56 | # cmake --fresh, or pass explicit -DFFTW_ENABLE_*= overrides. |
@@ -84,70 +108,111 @@ if(NOT ITK_USE_SYSTEM_FFTW) |
84 | 108 | set(FFTW_STAGED_INSTALL_PREFIX "${ITK_BINARY_DIR}/fftw") |
85 | 109 |
|
86 | 110 | # Detect SIMD defaults (see file header for full policy description). |
87 | | - # CheckCSourceRuns results are cached after the first cmake configure run. |
88 | | - include(CheckCSourceRuns) |
| 111 | + # |
| 112 | + # Architecture-guaranteed ISA baselines (no runtime probe needed): |
| 113 | + # - x86_64 mandates SSE + SSE2 in the AMD64 ABI. |
| 114 | + # - arm64/aarch64 mandates NEON in the AArch64 ABI. |
| 115 | + # |
| 116 | + # AVX/AVX2 opt-in via compiler predefined macros: |
| 117 | + # check_c_source_compiles (not _runs) reflects what the compiler is |
| 118 | + # generating for the TARGET architecture, not what the BUILD HOST's CPU |
| 119 | + # can execute. This is safe for cross-compilation and redistribution. |
| 120 | + include(CheckCSourceCompiles) |
89 | 121 |
|
90 | 122 | set(_fftw_default_neon OFF) |
91 | 123 | set(_fftw_default_sse OFF) |
92 | 124 | set(_fftw_default_sse2 OFF) |
93 | 125 | set(_fftw_default_avx OFF) |
94 | 126 | set(_fftw_default_avx2 OFF) |
95 | 127 |
|
96 | | - if(NOT CMAKE_CROSSCOMPILING) |
97 | | - if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") |
98 | | - # NEON is mandatory in ARMv8/AArch64 — every arm64 CPU has it. |
99 | | - set(_fftw_default_neon ON) |
100 | | - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i686") |
101 | | - # Probe each x86 SIMD level individually via CPUID so the defaults |
102 | | - # are accurate for the actual build-host CPU (e.g. pre-AVX Sandy Bridge |
103 | | - # or pre-AVX2 Ivy Bridge get only the levels their hardware supports). |
104 | | - # __builtin_cpu_supports is a GCC/Clang intrinsic; skip on MSVC. |
105 | | - if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|AppleClang") |
106 | | - foreach(_fftw_simd IN ITEMS sse sse2 avx avx2) |
107 | | - check_c_source_runs( |
108 | | - "int main(void){return __builtin_cpu_supports(\"${_fftw_simd}\")?0:1;}" |
109 | | - _fftw_cpu_has_${_fftw_simd} |
110 | | - ) |
111 | | - if(_fftw_cpu_has_${_fftw_simd}) |
112 | | - set(_fftw_default_${_fftw_simd} ON) |
113 | | - endif() |
114 | | - endforeach() |
115 | | - endif() |
| 128 | + # Detect macOS universal binary build: a single configure+build pass cannot |
| 129 | + # simultaneously produce correct SIMD for both arm64 and x86_64 slices. |
| 130 | + set(_fftw_is_universal FALSE) |
| 131 | + if(APPLE AND CMAKE_OSX_ARCHITECTURES) |
| 132 | + list(LENGTH CMAKE_OSX_ARCHITECTURES _fftw_arch_count) |
| 133 | + if(_fftw_arch_count GREATER 1) |
| 134 | + set(_fftw_is_universal TRUE) |
| 135 | + message( |
| 136 | + STATUS |
| 137 | + "FFTW: macOS universal binary (${CMAKE_OSX_ARCHITECTURES}): " |
| 138 | + "per-architecture SIMD defaults disabled. " |
| 139 | + "Use ITK_USE_SYSTEM_FFTW with a universal FFTW to enable SIMD." |
| 140 | + ) |
116 | 141 | endif() |
117 | | - else() |
118 | | - # Cross-compiling: conservative architecture-level fallback. |
| 142 | + endif() |
| 143 | + |
| 144 | + if(NOT _fftw_is_universal) |
119 | 145 | if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64") |
| 146 | + # NEON is mandatory in the AArch64 ABI — every arm64 CPU has it. |
| 147 | + # Safe for all conda/pip arm64 packages and manylinux aarch64. |
120 | 148 | set(_fftw_default_neon ON) |
121 | 149 | elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64") |
122 | | - # SSE/SSE2 are baseline on all 64-bit x86 CPUs; AVX/AVX2 not assumed. |
| 150 | + # SSE and SSE2 are required by the AMD64 ABI — universally present on |
| 151 | + # every 64-bit x86 CPU, including the oldest manylinux build targets. |
| 152 | + # Safe for all conda/pip x86_64 packages. |
123 | 153 | set(_fftw_default_sse ON) |
124 | 154 | set(_fftw_default_sse2 ON) |
| 155 | + # AVX and AVX2 are NOT part of the AMD64 baseline. Auto-enable them |
| 156 | + # only when the compiler is already producing those instructions — i.e. |
| 157 | + # when the user explicitly asked for a specific micro-architecture via |
| 158 | + # -march=native, -mavx2, /arch:AVX2, etc. This compile-time check |
| 159 | + # mirrors the approach recommended by seanm in ITK PR #6006: |
| 160 | + # "the compiler knows what CPU it's compiling for." |
| 161 | + # |
| 162 | + # check_c_source_compiles caches its result by variable name. Unset |
| 163 | + # the cache entry first so the probe always re-runs against the current |
| 164 | + # CMAKE_C_FLAGS; this ensures that adding -march=native on a subsequent |
| 165 | + # configure is correctly reflected in the auto-detected default. |
| 166 | + # Note: FFTW_ENABLE_AVX / FFTW_ENABLE_AVX2 follow standard option() |
| 167 | + # caching — they are only auto-set from the detected default when not |
| 168 | + # already present in the cache. To force re-evaluation of the option |
| 169 | + # after a FLAGS change, delete those entries from the CMake cache or |
| 170 | + # pass -DFFTW_ENABLE_AVX2=ON explicitly. |
| 171 | + unset(_fftw_compiler_targets_avx CACHE) |
| 172 | + check_c_source_compiles( |
| 173 | + "#ifndef __AVX__\n#error AVX not enabled\n#endif\nint main(void){return 0;}" |
| 174 | + _fftw_compiler_targets_avx |
| 175 | + ) |
| 176 | + if(_fftw_compiler_targets_avx) |
| 177 | + set(_fftw_default_avx ON) |
| 178 | + endif() |
| 179 | + unset(_fftw_compiler_targets_avx2 CACHE) |
| 180 | + check_c_source_compiles( |
| 181 | + "#ifndef __AVX2__\n#error AVX2 not enabled\n#endif\nint main(void){return 0;}" |
| 182 | + _fftw_compiler_targets_avx2 |
| 183 | + ) |
| 184 | + if(_fftw_compiler_targets_avx2) |
| 185 | + set(_fftw_default_avx2 ON) |
| 186 | + endif() |
| 187 | + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686|i386") |
| 188 | + # 32-bit x86 ABI does not mandate SSE/SSE2. Leave defaults OFF; |
| 189 | + # users may opt in explicitly if their minimum target CPU supports them. |
125 | 190 | endif() |
126 | 191 | endif() |
127 | 192 |
|
128 | 193 | option( |
129 | 194 | FFTW_ENABLE_NEON |
130 | | - "Enable FFTW NEON SIMD codelets (ARM64)" |
| 195 | + "Enable FFTW NEON SIMD codelets (ARM64; ON by default on aarch64/arm64)" |
131 | 196 | ${_fftw_default_neon} |
132 | 197 | ) |
133 | 198 | option( |
134 | 199 | FFTW_ENABLE_SSE |
135 | | - "Enable FFTW SSE SIMD codelets (x86)" |
| 200 | + "Enable FFTW SSE SIMD codelets (x86; ON by default on x86_64 — required by AMD64 ABI)" |
136 | 201 | ${_fftw_default_sse} |
137 | 202 | ) |
138 | 203 | option( |
139 | 204 | FFTW_ENABLE_SSE2 |
140 | | - "Enable FFTW SSE2 SIMD codelets (x86)" |
| 205 | + "Enable FFTW SSE2 SIMD codelets (x86; ON by default on x86_64 — required by AMD64 ABI)" |
141 | 206 | ${_fftw_default_sse2} |
142 | 207 | ) |
143 | 208 | option( |
144 | 209 | FFTW_ENABLE_AVX |
145 | | - "Enable FFTW AVX SIMD codelets (x86)" |
| 210 | + "Enable FFTW AVX SIMD codelets (Sandy Bridge+; OFF by default for redistribution safety)" |
146 | 211 | ${_fftw_default_avx} |
147 | 212 | ) |
148 | 213 | option( |
149 | 214 | FFTW_ENABLE_AVX2 |
150 | | - "Enable FFTW AVX2 SIMD codelets (x86)" |
| 215 | + "Enable FFTW AVX2 SIMD codelets (Haswell+; OFF by default for redistribution safety)" |
151 | 216 | ${_fftw_default_avx2} |
152 | 217 | ) |
153 | 218 |
|
|
0 commit comments