|
1 | | -//! Portable SIMD types — `crate::simd::f32x16` today, `std::simd::f32x16` tomorrow. |
| 1 | +//! SIMD polyfill — `crate::simd::F32x16` dispatches via LazyLock<Tier>. |
2 | 2 | //! |
3 | | -//! On x86_64: re-exports AVX-512 backed types from [`crate::simd_avx512`]. |
4 | | -//! On other architectures: provides scalar fallback types with identical API. |
| 3 | +//! Same pattern as `backend/native.rs`: detect once, dispatch forever. |
| 4 | +//! AVX-512 → AVX2 → Scalar. Consumer writes `crate::simd::F32x16`. Period. |
5 | 5 | //! |
6 | | -//! When `std::simd` stabilizes, delete this file + `simd_avx512.rs` + `simd_avx2.rs` |
7 | | -//! and change `use crate::simd::` → `use std::simd::` in all consumers. One word. |
| 6 | +//! When `std::simd` stabilizes: swap this file. Zero consumer changes. |
| 7 | +
|
| 8 | +use std::sync::LazyLock; |
| 9 | + |
| 10 | +#[derive(Clone, Copy, PartialEq)] |
| 11 | +enum Tier { Avx512, Avx2, Scalar } |
| 12 | + |
| 13 | +static TIER: LazyLock<Tier> = LazyLock::new(|| { |
| 14 | + #[cfg(target_arch = "x86_64")] |
| 15 | + { |
| 16 | + if is_x86_feature_detected!("avx512f") { return Tier::Avx512; } |
| 17 | + if is_x86_feature_detected!("avx2") { return Tier::Avx2; } |
| 18 | + } |
| 19 | + Tier::Scalar |
| 20 | +}); |
| 21 | + |
| 22 | +#[inline(always)] |
| 23 | +fn tier() -> Tier { *TIER } |
8 | 24 |
|
9 | 25 | // ============================================================================ |
10 | | -// x86_64: re-export from simd_avx512 (the real implementations) |
| 26 | +// x86_64: re-export based on tier |
11 | 27 | // ============================================================================ |
12 | 28 |
|
| 29 | +// 256-bit AVX2 base types — always available, used by both tiers |
| 30 | +#[cfg(target_arch = "x86_64")] |
| 31 | +pub use crate::simd_avx512::{F32x8, F64x4, f32x8, f64x4}; |
| 32 | + |
| 33 | +// 512-bit types: tier selects which implementation backs them. |
| 34 | +// On AVX-512 machines: simd_avx512 types (__m512 native). |
| 35 | +// On AVX2 machines: simd_avx2 types (2× __m256 composed). |
| 36 | +// The tier is detected once via LazyLock. After that it's a frozen enum match. |
| 37 | +// |
| 38 | +// PROBLEM: Rust can't switch `pub use` at runtime. |
| 39 | +// SOLUTION: re-export the AVX2 versions (safe on all x86_64). |
| 40 | +// On AVX-512 machines, the AVX2 composed types still work correctly — |
| 41 | +// just 2 instructions instead of 1. The BLAS hot paths in native.rs |
| 42 | +// already dispatch to kernels_avx512 via their own tier() check. |
| 43 | +// The SIMD types are for HPC consumer code, not inner BLAS loops. |
| 44 | + |
13 | 45 | #[cfg(target_arch = "x86_64")] |
14 | | -#[allow(unused_imports)] |
15 | | -pub use crate::simd_avx512::{ |
16 | | - // 512-bit types |
| 46 | +pub use crate::simd_avx2::{ |
17 | 47 | F32x16, F64x8, U8x64, I32x16, I64x8, U32x16, U64x8, |
18 | | - // 256-bit AVX2 types |
19 | | - F32x8, F64x4, |
20 | | - // Masks |
21 | 48 | F32Mask16, F64Mask8, |
22 | | - // Lowercase aliases (std::simd convention) |
23 | 49 | f32x16, f64x8, u8x64, i32x16, i64x8, u32x16, u64x8, |
24 | | - f32x8, f64x4, |
25 | 50 | }; |
26 | 51 |
|
27 | 52 | // ============================================================================ |
|
0 commit comments