Skip to content

Commit 1b06969

Browse files
committed
feat: PREFERRED_F64/F32/U64/I16_LANES compile-time constants for array_windows dispatch
Compile-time const (not LazyLock) — resolved by #[cfg(target_feature)]: AVX-512: F64=8, F32=16, U64=8, I16=32 AVX2: F64=4, F32=8, U64=4, I16=16 Scalar: same as AVX2 Enables consumers to use array_windows::<{PREFERRED_F64_LANES}>() for native-width SIMD processing without runtime branching. https://claude.ai/code/session_01ChLvBfpJS8dQhHxRD4pYNp
1 parent 75b45e9 commit 1b06969

1 file changed

Lines changed: 53 additions & 0 deletions

File tree

src/simd.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,59 @@ fn tier() -> Tier { *TIER }
2727
// The check is cheap (reads a cached cpuid result) and the batch
2828
// function uses as_chunks::<16>() + as_chunks::<8>() for SIMD widths.
2929

30+
// ============================================================================
31+
// Preferred SIMD lane widths — compile-time constants for array_windows
32+
// ============================================================================
33+
//
34+
// Consumer code uses these to select array_windows size at compile time:
35+
//
36+
// for window in data.array_windows::<{crate::simd::PREFERRED_F64_LANES}>() {
37+
// let v = F64x8::from_array(*window); // AVX-512: native 8-wide
38+
// // or
39+
// let v = F64x4::from_array(*window); // AVX2: native 4-wide
40+
// }
41+
//
42+
// generic_const_exprs is nightly, so consumers must #[cfg] branch on window size.
43+
// These constants document the preferred width per tier.
44+
45+
/// Preferred f64 SIMD width (elements per register).
46+
/// AVX-512: 8 lanes (__m512d). AVX2/scalar: 4 lanes (__m256d).
47+
#[cfg(target_feature = "avx512f")]
48+
pub const PREFERRED_F64_LANES: usize = 8;
49+
#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))]
50+
pub const PREFERRED_F64_LANES: usize = 4;
51+
#[cfg(not(target_arch = "x86_64"))]
52+
pub const PREFERRED_F64_LANES: usize = 4; // scalar fallback: same as AVX2 shape
53+
54+
/// Preferred f32 SIMD width.
55+
/// AVX-512: 16 lanes (__m512). AVX2/scalar: 8 lanes (__m256).
56+
#[cfg(target_feature = "avx512f")]
57+
pub const PREFERRED_F32_LANES: usize = 16;
58+
#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))]
59+
pub const PREFERRED_F32_LANES: usize = 8;
60+
#[cfg(not(target_arch = "x86_64"))]
61+
pub const PREFERRED_F32_LANES: usize = 8;
62+
63+
/// Preferred u64 SIMD width.
64+
/// AVX-512: 8 lanes. AVX2/scalar: 4 lanes.
65+
#[cfg(target_feature = "avx512f")]
66+
pub const PREFERRED_U64_LANES: usize = 8;
67+
#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))]
68+
pub const PREFERRED_U64_LANES: usize = 4;
69+
#[cfg(not(target_arch = "x86_64"))]
70+
pub const PREFERRED_U64_LANES: usize = 4;
71+
72+
/// Preferred i16 SIMD width (for Base17 L1 on i16[17]).
73+
/// AVX-512: 32 lanes (__m512i via epi16). AVX2: 16 lanes (__m256i).
74+
/// Base17 has 17 dims — AVX-512 covers 32 (load 17 + 15 padding),
75+
/// AVX2 covers 16 + 1 scalar.
76+
#[cfg(target_feature = "avx512f")]
77+
pub const PREFERRED_I16_LANES: usize = 32;
78+
#[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))]
79+
pub const PREFERRED_I16_LANES: usize = 16;
80+
#[cfg(not(target_arch = "x86_64"))]
81+
pub const PREFERRED_I16_LANES: usize = 16;
82+
3083
// ============================================================================
3184
// x86_64: re-export based on tier
3285
// ============================================================================

0 commit comments

Comments
 (0)