@@ -70,6 +70,24 @@ fn mask_byte(byte: u8, bit_offset: usize, bit_len: usize) -> u8 {
7070fn count_ones_aligned ( bytes : & [ u8 ] ) -> usize {
7171 #[ cfg( target_arch = "x86_64" ) ]
7272 {
73+ // When the target feature is guaranteed at compile time, skip runtime detection.
74+ #[ cfg( all( target_feature = "avx512f" , target_feature = "avx512vpopcntdq" ) ) ]
75+ if bytes. len ( ) >= 64 {
76+ // SAFETY: Compile-time target feature guarantees availability.
77+ return unsafe { count_ones_aligned_avx512 ( bytes) } ;
78+ }
79+
80+ #[ cfg( all(
81+ not( all( target_feature = "avx512f" , target_feature = "avx512vpopcntdq" ) ) ,
82+ target_feature = "avx2"
83+ ) ) ]
84+ if bytes. len ( ) >= 32 {
85+ // SAFETY: Compile-time target feature guarantees availability.
86+ return unsafe { count_ones_aligned_avx2 ( bytes) } ;
87+ }
88+
89+ // Fall back to runtime detection when features aren't compile-time guaranteed.
90+ #[ cfg( not( all( target_feature = "avx512f" , target_feature = "avx512vpopcntdq" ) ) ) ]
7391 if bytes. len ( ) >= 64
7492 && is_x86_feature_detected ! ( "avx512f" )
7593 && is_x86_feature_detected ! ( "avx512vpopcntdq" )
@@ -78,6 +96,7 @@ fn count_ones_aligned(bytes: &[u8]) -> usize {
7896 return unsafe { count_ones_aligned_avx512 ( bytes) } ;
7997 }
8098
99+ #[ cfg( not( target_feature = "avx2" ) ) ]
81100 if bytes. len ( ) >= 32 && is_x86_feature_detected ! ( "avx2" ) {
82101 // SAFETY: Runtime detection guarantees the required target features.
83102 return unsafe { count_ones_aligned_avx2 ( bytes) } ;
0 commit comments