@@ -517,6 +517,283 @@ pub fn dgemm_blocked(
517517 }
518518}
519519
520+ // ============================================================================
521+ // AVX2 512-bit types: composed from 2× 256-bit halves
522+ //
523+ // Same API as simd_avx512::F32x16 etc. but backed by [F32x8; 2].
524+ // Consumer sees crate::simd::F32x16 — simd.rs picks avx512 or avx2 via LazyLock.
525+ // ============================================================================
526+
527+ use core:: fmt;
528+ use core:: ops:: { Add , AddAssign , Sub , SubAssign , Mul , MulAssign , Div , DivAssign , Neg ,
529+ BitAnd , BitAndAssign , BitOr , BitOrAssign , BitXor , BitXorAssign , Not } ;
530+
531+ /// 16×f32 via 2× AVX2 F32x8 (__m256). Same API as simd_avx512::F32x16.
532+ #[ derive( Copy , Clone ) ]
533+ #[ repr( align( 64 ) ) ]
534+ pub struct F32x16 ( pub f32x8 , pub f32x8 ) ;
535+
536+ impl F32x16 {
537+ pub const LANES : usize = 16 ;
538+ #[ inline( always) ] pub fn splat ( v : f32 ) -> Self { Self ( f32x8:: splat ( v) , f32x8:: splat ( v) ) }
539+ #[ inline( always) ] pub fn from_slice ( s : & [ f32 ] ) -> Self {
540+ assert ! ( s. len( ) >= 16 ) ;
541+ Self ( f32x8:: from_slice ( & s[ ..8 ] ) , f32x8:: from_slice ( & s[ 8 ..16 ] ) )
542+ }
543+ #[ inline( always) ] pub fn from_array ( a : [ f32 ; 16 ] ) -> Self {
544+ Self ( f32x8:: from_array ( a[ ..8 ] . try_into ( ) . unwrap ( ) ) , f32x8:: from_array ( a[ 8 ..] . try_into ( ) . unwrap ( ) ) )
545+ }
546+ #[ inline( always) ] pub fn to_array ( self ) -> [ f32 ; 16 ] {
547+ let mut out = [ 0.0f32 ; 16 ] ;
548+ out[ ..8 ] . copy_from_slice ( & self . 0 . to_array ( ) ) ;
549+ out[ 8 ..] . copy_from_slice ( & self . 1 . to_array ( ) ) ;
550+ out
551+ }
552+ #[ inline( always) ] pub fn copy_to_slice ( self , s : & mut [ f32 ] ) {
553+ assert ! ( s. len( ) >= 16 ) ;
554+ self . 0 . copy_to_slice ( & mut s[ ..8 ] ) ;
555+ self . 1 . copy_to_slice ( & mut s[ 8 ..16 ] ) ;
556+ }
557+ #[ inline( always) ] pub fn reduce_sum ( self ) -> f32 { self . 0 . reduce_sum ( ) + self . 1 . reduce_sum ( ) }
558+ #[ inline( always) ] pub fn reduce_min ( self ) -> f32 {
559+ let a = self . to_array ( ) ;
560+ a. iter ( ) . copied ( ) . fold ( f32:: INFINITY , f32:: min)
561+ }
562+ #[ inline( always) ] pub fn reduce_max ( self ) -> f32 {
563+ let a = self . to_array ( ) ;
564+ a. iter ( ) . copied ( ) . fold ( f32:: NEG_INFINITY , f32:: max)
565+ }
566+ #[ inline( always) ] pub fn abs ( self ) -> Self { Self ( self . 0 . abs ( ) , self . 1 . abs ( ) ) }
567+ #[ inline( always) ] pub fn sqrt ( self ) -> Self {
568+ let a = self . to_array ( ) ;
569+ let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] . sqrt ( ) ; } Self :: from_array ( o)
570+ }
571+ #[ inline( always) ] pub fn round ( self ) -> Self {
572+ let a = self . to_array ( ) ;
573+ let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] . round ( ) ; } Self :: from_array ( o)
574+ }
575+ #[ inline( always) ] pub fn floor ( self ) -> Self {
576+ let a = self . to_array ( ) ;
577+ let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] . floor ( ) ; } Self :: from_array ( o)
578+ }
579+ #[ inline( always) ] pub fn mul_add ( self , b : Self , c : Self ) -> Self {
580+ let a = self . to_array ( ) ; let ba = b. to_array ( ) ; let ca = c. to_array ( ) ;
581+ let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] . mul_add ( ba[ i] , ca[ i] ) ; } Self :: from_array ( o)
582+ }
583+ #[ inline( always) ] pub fn simd_min ( self , other : Self ) -> Self {
584+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
585+ let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] . min ( b[ i] ) ; } Self :: from_array ( o)
586+ }
587+ #[ inline( always) ] pub fn simd_max ( self , other : Self ) -> Self {
588+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
589+ let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] . max ( b[ i] ) ; } Self :: from_array ( o)
590+ }
591+ #[ inline( always) ] pub fn simd_clamp ( self , lo : Self , hi : Self ) -> Self { self . simd_max ( lo) . simd_min ( hi) }
592+ #[ inline( always) ] pub fn simd_lt ( self , other : Self ) -> F32Mask16 {
593+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
594+ let mut bits: u16 = 0 ; for i in 0 ..16 { if a[ i] < b[ i] { bits |= 1 << i; } } F32Mask16 ( bits)
595+ }
596+ #[ inline( always) ] pub fn simd_le ( self , other : Self ) -> F32Mask16 {
597+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
598+ let mut bits: u16 = 0 ; for i in 0 ..16 { if a[ i] <= b[ i] { bits |= 1 << i; } } F32Mask16 ( bits)
599+ }
600+ #[ inline( always) ] pub fn simd_gt ( self , other : Self ) -> F32Mask16 { other. simd_lt ( self ) }
601+ #[ inline( always) ] pub fn simd_ge ( self , other : Self ) -> F32Mask16 { other. simd_le ( self ) }
602+ #[ inline( always) ] pub fn simd_eq ( self , other : Self ) -> F32Mask16 {
603+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
604+ let mut bits: u16 = 0 ; for i in 0 ..16 { if a[ i] == b[ i] { bits |= 1 << i; } } F32Mask16 ( bits)
605+ }
606+ #[ inline( always) ] pub fn simd_ne ( self , other : Self ) -> F32Mask16 {
607+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
608+ let mut bits: u16 = 0 ; for i in 0 ..16 { if a[ i] != b[ i] { bits |= 1 << i; } } F32Mask16 ( bits)
609+ }
610+ #[ inline( always) ] pub fn to_bits ( self ) -> U32x16 {
611+ let a = self . to_array ( ) ;
612+ let mut o = [ 0u32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] . to_bits ( ) ; } U32x16 ( o)
613+ }
614+ #[ inline( always) ] pub fn from_bits ( bits : U32x16 ) -> Self {
615+ let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = f32:: from_bits ( bits. 0 [ i] ) ; } Self :: from_array ( o)
616+ }
617+ #[ inline( always) ] pub fn cast_i32 ( self ) -> I32x16 {
618+ let a = self . to_array ( ) ;
619+ let mut o = [ 0i32 ; 16 ] ; for i in 0 ..16 { o[ i] = a[ i] as i32 ; } I32x16 ( o)
620+ }
621+ }
622+
623+ impl Add for F32x16 { type Output = Self ; #[ inline( always) ] fn add ( self , rhs : Self ) -> Self { Self ( self . 0 + rhs. 0 , self . 1 + rhs. 1 ) } }
624+ impl Sub for F32x16 { type Output = Self ; #[ inline( always) ] fn sub ( self , rhs : Self ) -> Self { Self ( self . 0 - rhs. 0 , self . 1 - rhs. 1 ) } }
625+ impl Mul for F32x16 { type Output = Self ; #[ inline( always) ] fn mul ( self , rhs : Self ) -> Self { Self ( self . 0 * rhs. 0 , self . 1 * rhs. 1 ) } }
626+ impl Div for F32x16 { type Output = Self ; #[ inline( always) ] fn div ( self , rhs : Self ) -> Self { Self ( self . 0 / rhs. 0 , self . 1 / rhs. 1 ) } }
627+ impl AddAssign for F32x16 { #[ inline( always) ] fn add_assign ( & mut self , rhs : Self ) { * self = * self + rhs; } }
628+ impl SubAssign for F32x16 { #[ inline( always) ] fn sub_assign ( & mut self , rhs : Self ) { * self = * self - rhs; } }
629+ impl MulAssign for F32x16 { #[ inline( always) ] fn mul_assign ( & mut self , rhs : Self ) { * self = * self * rhs; } }
630+ impl DivAssign for F32x16 { #[ inline( always) ] fn div_assign ( & mut self , rhs : Self ) { * self = * self / rhs; } }
631+ impl Neg for F32x16 { type Output = Self ; #[ inline( always) ] fn neg ( self ) -> Self { let a = self . to_array ( ) ; let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = -a[ i] ; } Self :: from_array ( o) } }
632+ impl fmt:: Debug for F32x16 { fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result { write ! ( f, "F32x16({:?})" , self . to_array( ) ) } }
633+ impl PartialEq for F32x16 { fn eq ( & self , other : & Self ) -> bool { self . to_array ( ) == other. to_array ( ) } }
634+ impl Default for F32x16 { fn default ( ) -> Self { Self :: splat ( 0.0 ) } }
635+
636+ #[ derive( Copy , Clone , Debug ) ]
637+ pub struct F32Mask16 ( pub u16 ) ;
638+ impl F32Mask16 {
639+ #[ inline( always) ]
640+ pub fn select ( self , true_val : F32x16 , false_val : F32x16 ) -> F32x16 {
641+ let t = true_val. to_array ( ) ; let f = false_val. to_array ( ) ;
642+ let mut o = [ 0.0f32 ; 16 ] ;
643+ for i in 0 ..16 { o[ i] = if ( self . 0 >> i) & 1 == 1 { t[ i] } else { f[ i] } ; }
644+ F32x16 :: from_array ( o)
645+ }
646+ }
647+
648+ /// 8×f64 via 2× AVX2 F64x4 (__m256d). Same API as simd_avx512::F64x8.
649+ #[ derive( Copy , Clone ) ]
650+ #[ repr( align( 64 ) ) ]
651+ pub struct F64x8 ( pub f64x4 , pub f64x4 ) ;
652+
653+ impl F64x8 {
654+ pub const LANES : usize = 8 ;
655+ #[ inline( always) ] pub fn splat ( v : f64 ) -> Self { Self ( f64x4:: splat ( v) , f64x4:: splat ( v) ) }
656+ #[ inline( always) ] pub fn from_slice ( s : & [ f64 ] ) -> Self {
657+ assert ! ( s. len( ) >= 8 ) ;
658+ Self ( f64x4:: from_slice ( & s[ ..4 ] ) , f64x4:: from_slice ( & s[ 4 ..8 ] ) )
659+ }
660+ #[ inline( always) ] pub fn from_array ( a : [ f64 ; 8 ] ) -> Self {
661+ Self ( f64x4:: from_array ( a[ ..4 ] . try_into ( ) . unwrap ( ) ) , f64x4:: from_array ( a[ 4 ..] . try_into ( ) . unwrap ( ) ) )
662+ }
663+ #[ inline( always) ] pub fn to_array ( self ) -> [ f64 ; 8 ] {
664+ let mut out = [ 0.0f64 ; 8 ] ;
665+ out[ ..4 ] . copy_from_slice ( & self . 0 . to_array ( ) ) ;
666+ out[ 4 ..] . copy_from_slice ( & self . 1 . to_array ( ) ) ;
667+ out
668+ }
669+ #[ inline( always) ] pub fn copy_to_slice ( self , s : & mut [ f64 ] ) {
670+ assert ! ( s. len( ) >= 8 ) ;
671+ self . 0 . copy_to_slice ( & mut s[ ..4 ] ) ;
672+ self . 1 . copy_to_slice ( & mut s[ 4 ..8 ] ) ;
673+ }
674+ #[ inline( always) ] pub fn reduce_sum ( self ) -> f64 { self . 0 . reduce_sum ( ) + self . 1 . reduce_sum ( ) }
675+ #[ inline( always) ] pub fn reduce_min ( self ) -> f64 { let a = self . to_array ( ) ; a. iter ( ) . copied ( ) . fold ( f64:: INFINITY , f64:: min) }
676+ #[ inline( always) ] pub fn reduce_max ( self ) -> f64 { let a = self . to_array ( ) ; a. iter ( ) . copied ( ) . fold ( f64:: NEG_INFINITY , f64:: max) }
677+ #[ inline( always) ] pub fn abs ( self ) -> Self { let a = self . to_array ( ) ; let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . abs ( ) ; } Self :: from_array ( o) }
678+ #[ inline( always) ] pub fn sqrt ( self ) -> Self { let a = self . to_array ( ) ; let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . sqrt ( ) ; } Self :: from_array ( o) }
679+ #[ inline( always) ] pub fn round ( self ) -> Self { let a = self . to_array ( ) ; let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . round ( ) ; } Self :: from_array ( o) }
680+ #[ inline( always) ] pub fn floor ( self ) -> Self { let a = self . to_array ( ) ; let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . floor ( ) ; } Self :: from_array ( o) }
681+ #[ inline( always) ] pub fn mul_add ( self , b : Self , c : Self ) -> Self {
682+ let a = self . to_array ( ) ; let ba = b. to_array ( ) ; let ca = c. to_array ( ) ;
683+ let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . mul_add ( ba[ i] , ca[ i] ) ; } Self :: from_array ( o)
684+ }
685+ #[ inline( always) ] pub fn simd_min ( self , other : Self ) -> Self { let a = self . to_array ( ) ; let b = other. to_array ( ) ; let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . min ( b[ i] ) ; } Self :: from_array ( o) }
686+ #[ inline( always) ] pub fn simd_max ( self , other : Self ) -> Self { let a = self . to_array ( ) ; let b = other. to_array ( ) ; let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . max ( b[ i] ) ; } Self :: from_array ( o) }
687+ #[ inline( always) ] pub fn simd_clamp ( self , lo : Self , hi : Self ) -> Self { self . simd_max ( lo) . simd_min ( hi) }
688+ #[ inline( always) ] pub fn simd_ge ( self , other : Self ) -> F64Mask8 {
689+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
690+ let mut bits: u8 = 0 ; for i in 0 ..8 { if a[ i] >= b[ i] { bits |= 1 << i; } } F64Mask8 ( bits)
691+ }
692+ #[ inline( always) ] pub fn simd_le ( self , other : Self ) -> F64Mask8 {
693+ let a = self . to_array ( ) ; let b = other. to_array ( ) ;
694+ let mut bits: u8 = 0 ; for i in 0 ..8 { if a[ i] <= b[ i] { bits |= 1 << i; } } F64Mask8 ( bits)
695+ }
696+ #[ inline( always) ] pub fn to_bits ( self ) -> U64x8 {
697+ let a = self . to_array ( ) ; let mut o = [ 0u64 ; 8 ] ; for i in 0 ..8 { o[ i] = a[ i] . to_bits ( ) ; } U64x8 ( o)
698+ }
699+ #[ inline( always) ] pub fn from_bits ( bits : U64x8 ) -> Self {
700+ let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = f64:: from_bits ( bits. 0 [ i] ) ; } Self :: from_array ( o)
701+ }
702+ }
703+
704+ impl Add for F64x8 { type Output = Self ; #[ inline( always) ] fn add ( self , rhs : Self ) -> Self { Self ( self . 0 + rhs. 0 , self . 1 + rhs. 1 ) } }
705+ impl Sub for F64x8 { type Output = Self ; #[ inline( always) ] fn sub ( self , rhs : Self ) -> Self { Self ( self . 0 - rhs. 0 , self . 1 - rhs. 1 ) } }
706+ impl Mul for F64x8 { type Output = Self ; #[ inline( always) ] fn mul ( self , rhs : Self ) -> Self { Self ( self . 0 * rhs. 0 , self . 1 * rhs. 1 ) } }
707+ impl Div for F64x8 { type Output = Self ; #[ inline( always) ] fn div ( self , rhs : Self ) -> Self { Self ( self . 0 / rhs. 0 , self . 1 / rhs. 1 ) } }
708+ impl AddAssign for F64x8 { #[ inline( always) ] fn add_assign ( & mut self , rhs : Self ) { * self = * self + rhs; } }
709+ impl SubAssign for F64x8 { #[ inline( always) ] fn sub_assign ( & mut self , rhs : Self ) { * self = * self - rhs; } }
710+ impl MulAssign for F64x8 { #[ inline( always) ] fn mul_assign ( & mut self , rhs : Self ) { * self = * self * rhs; } }
711+ impl DivAssign for F64x8 { #[ inline( always) ] fn div_assign ( & mut self , rhs : Self ) { * self = * self / rhs; } }
712+ impl Neg for F64x8 { type Output = Self ; #[ inline( always) ] fn neg ( self ) -> Self { let a = self . to_array ( ) ; let mut o = [ 0.0f64 ; 8 ] ; for i in 0 ..8 { o[ i] = -a[ i] ; } Self :: from_array ( o) } }
713+ impl fmt:: Debug for F64x8 { fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result { write ! ( f, "F64x8({:?})" , self . to_array( ) ) } }
714+ impl PartialEq for F64x8 { fn eq ( & self , other : & Self ) -> bool { self . to_array ( ) == other. to_array ( ) } }
715+ impl Default for F64x8 { fn default ( ) -> Self { Self :: splat ( 0.0 ) } }
716+
717+ #[ derive( Copy , Clone , Debug ) ]
718+ pub struct F64Mask8 ( pub u8 ) ;
719+ impl F64Mask8 {
720+ #[ inline( always) ]
721+ pub fn select ( self , true_val : F64x8 , false_val : F64x8 ) -> F64x8 {
722+ let t = true_val. to_array ( ) ; let f = false_val. to_array ( ) ;
723+ let mut o = [ 0.0f64 ; 8 ] ;
724+ for i in 0 ..8 { o[ i] = if ( self . 0 >> i) & 1 == 1 { t[ i] } else { f[ i] } ; }
725+ F64x8 :: from_array ( o)
726+ }
727+ }
728+
729+ // ── Integer types: array-backed, use scalar ops (no AVX2 integer 512-bit) ──
730+
731+ macro_rules! avx2_int_type {
732+ ( $name: ident, $elem: ty, $lanes: expr, $zero: expr) => {
733+ #[ derive( Copy , Clone ) ]
734+ #[ repr( align( 64 ) ) ]
735+ pub struct $name( pub [ $elem; $lanes] ) ;
736+
737+ impl Default for $name { #[ inline( always) ] fn default ( ) -> Self { Self ( [ $zero; $lanes] ) } }
738+ impl $name {
739+ pub const LANES : usize = $lanes;
740+ #[ inline( always) ] pub fn splat( v: $elem) -> Self { Self ( [ v; $lanes] ) }
741+ #[ inline( always) ] pub fn from_slice( s: & [ $elem] ) -> Self { assert!( s. len( ) >= $lanes) ; let mut a = [ $zero; $lanes] ; a. copy_from_slice( & s[ ..$lanes] ) ; Self ( a) }
742+ #[ inline( always) ] pub fn from_array( a: [ $elem; $lanes] ) -> Self { Self ( a) }
743+ #[ inline( always) ] pub fn to_array( self ) -> [ $elem; $lanes] { self . 0 }
744+ #[ inline( always) ] pub fn copy_to_slice( self , s: & mut [ $elem] ) { assert!( s. len( ) >= $lanes) ; s[ ..$lanes] . copy_from_slice( & self . 0 ) ; }
745+ #[ inline( always) ] pub fn reduce_sum( self ) -> $elem { let mut s: $elem = $zero; for i in 0 ..$lanes { s = s. wrapping_add( self . 0 [ i] ) ; } s }
746+ }
747+ impl Add for $name { type Output = Self ; #[ inline( always) ] fn add( self , r: Self ) -> Self { let mut o = [ $zero; $lanes] ; for i in 0 ..$lanes { o[ i] = self . 0 [ i] . wrapping_add( r. 0 [ i] ) ; } Self ( o) } }
748+ impl Sub for $name { type Output = Self ; #[ inline( always) ] fn sub( self , r: Self ) -> Self { let mut o = [ $zero; $lanes] ; for i in 0 ..$lanes { o[ i] = self . 0 [ i] . wrapping_sub( r. 0 [ i] ) ; } Self ( o) } }
749+ impl BitAnd for $name { type Output = Self ; #[ inline( always) ] fn bitand( self , r: Self ) -> Self { let mut o = [ $zero; $lanes] ; for i in 0 ..$lanes { o[ i] = self . 0 [ i] & r. 0 [ i] ; } Self ( o) } }
750+ impl BitOr for $name { type Output = Self ; #[ inline( always) ] fn bitor( self , r: Self ) -> Self { let mut o = [ $zero; $lanes] ; for i in 0 ..$lanes { o[ i] = self . 0 [ i] | r. 0 [ i] ; } Self ( o) } }
751+ impl BitXor for $name { type Output = Self ; #[ inline( always) ] fn bitxor( self , r: Self ) -> Self { let mut o = [ $zero; $lanes] ; for i in 0 ..$lanes { o[ i] = self . 0 [ i] ^ r. 0 [ i] ; } Self ( o) } }
752+ impl BitAndAssign for $name { #[ inline( always) ] fn bitand_assign( & mut self , r: Self ) { for i in 0 ..$lanes { self . 0 [ i] &= r. 0 [ i] ; } } }
753+ impl BitOrAssign for $name { #[ inline( always) ] fn bitor_assign( & mut self , r: Self ) { for i in 0 ..$lanes { self . 0 [ i] |= r. 0 [ i] ; } } }
754+ impl BitXorAssign for $name { #[ inline( always) ] fn bitxor_assign( & mut self , r: Self ) { for i in 0 ..$lanes { self . 0 [ i] ^= r. 0 [ i] ; } } }
755+ impl Not for $name { type Output = Self ; #[ inline( always) ] fn not( self ) -> Self { let mut o = [ $zero; $lanes] ; for i in 0 ..$lanes { o[ i] = !self . 0 [ i] ; } Self ( o) } }
756+ impl AddAssign for $name { #[ inline( always) ] fn add_assign( & mut self , r: Self ) { for i in 0 ..$lanes { self . 0 [ i] = self . 0 [ i] . wrapping_add( r. 0 [ i] ) ; } } }
757+ impl SubAssign for $name { #[ inline( always) ] fn sub_assign( & mut self , r: Self ) { for i in 0 ..$lanes { self . 0 [ i] = self . 0 [ i] . wrapping_sub( r. 0 [ i] ) ; } } }
758+ impl fmt:: Debug for $name { fn fmt( & self , f: & mut fmt:: Formatter <' _>) -> fmt:: Result { write!( f, concat!( stringify!( $name) , "({:?})" ) , & self . 0 [ ..] ) } }
759+ impl PartialEq for $name { fn eq( & self , other: & Self ) -> bool { self . 0 == other. 0 } }
760+ } ;
761+ }
762+
763+ avx2_int_type ! ( U8x64 , u8 , 64 , 0u8 ) ;
764+ avx2_int_type ! ( I32x16 , i32 , 16 , 0i32 ) ;
765+ avx2_int_type ! ( I64x8 , i64 , 8 , 0i64 ) ;
766+ avx2_int_type ! ( U32x16 , u32 , 16 , 0u32 ) ;
767+ avx2_int_type ! ( U64x8 , u64 , 8 , 0u64 ) ;
768+
769+ impl I32x16 {
770+ #[ inline( always) ] pub fn reduce_min ( self ) -> i32 { * self . 0 . iter ( ) . min ( ) . unwrap ( ) }
771+ #[ inline( always) ] pub fn reduce_max ( self ) -> i32 { * self . 0 . iter ( ) . max ( ) . unwrap ( ) }
772+ #[ inline( always) ] pub fn simd_min ( self , other : Self ) -> Self { let mut o = [ 0i32 ; 16 ] ; for i in 0 ..16 { o[ i] = self . 0 [ i] . min ( other. 0 [ i] ) ; } Self ( o) }
773+ #[ inline( always) ] pub fn simd_max ( self , other : Self ) -> Self { let mut o = [ 0i32 ; 16 ] ; for i in 0 ..16 { o[ i] = self . 0 [ i] . max ( other. 0 [ i] ) ; } Self ( o) }
774+ #[ inline( always) ] pub fn cast_f32 ( self ) -> F32x16 { let mut o = [ 0.0f32 ; 16 ] ; for i in 0 ..16 { o[ i] = self . 0 [ i] as f32 ; } F32x16 :: from_array ( o) }
775+ #[ inline( always) ] pub fn abs ( self ) -> Self { let mut o = [ 0i32 ; 16 ] ; for i in 0 ..16 { o[ i] = self . 0 [ i] . abs ( ) ; } Self ( o) }
776+ }
777+ impl Mul for I32x16 { type Output = Self ; #[ inline( always) ] fn mul ( self , r : Self ) -> Self { let mut o = [ 0i32 ; 16 ] ; for i in 0 ..16 { o[ i] = self . 0 [ i] . wrapping_mul ( r. 0 [ i] ) ; } Self ( o) } }
778+ impl MulAssign for I32x16 { #[ inline( always) ] fn mul_assign ( & mut self , r : Self ) { * self = * self * r; } }
779+ impl Neg for I32x16 { type Output = Self ; #[ inline( always) ] fn neg ( self ) -> Self { let mut o = [ 0i32 ; 16 ] ; for i in 0 ..16 { o[ i] = -self . 0 [ i] ; } Self ( o) } }
780+
781+ impl I64x8 {
782+ #[ inline( always) ] pub fn reduce_min ( self ) -> i64 { * self . 0 . iter ( ) . min ( ) . unwrap ( ) }
783+ #[ inline( always) ] pub fn reduce_max ( self ) -> i64 { * self . 0 . iter ( ) . max ( ) . unwrap ( ) }
784+ #[ inline( always) ] pub fn simd_min ( self , other : Self ) -> Self { let mut o = [ 0i64 ; 8 ] ; for i in 0 ..8 { o[ i] = self . 0 [ i] . min ( other. 0 [ i] ) ; } Self ( o) }
785+ #[ inline( always) ] pub fn simd_max ( self , other : Self ) -> Self { let mut o = [ 0i64 ; 8 ] ; for i in 0 ..8 { o[ i] = self . 0 [ i] . max ( other. 0 [ i] ) ; } Self ( o) }
786+ }
787+
788+ /// Lowercase aliases (std::simd convention)
789+ pub type f32x16 = F32x16 ;
790+ pub type f64x8 = F64x8 ;
791+ pub type u8x64 = U8x64 ;
792+ pub type i32x16 = I32x16 ;
793+ pub type i64x8 = I64x8 ;
794+ pub type u32x16 = U32x16 ;
795+ pub type u64x8 = U64x8 ;
796+
520797#[ cfg( test) ]
521798mod tests {
522799 use super :: * ;
0 commit comments