11use std:: cmp:: Ordering ;
2- use std:: marker:: PhantomData ;
32use std:: ops:: { Index , IndexMut , Neg } ;
43use std:: { alloc, fmt, mem, ptr} ;
54
6- use na:: { ComplexField , RealField , SimdRealField } ;
7- use num_traits:: identities:: One ;
5+ #[ cfg( feature = "fearless_simd" ) ]
6+ use fearless_simd:: { Simd , SimdBase , SimdElement , SimdFloat } ;
7+ use na:: { RealField , SimdRealField } ;
88
99/// A dense, fixed-resolution, warped cube map
1010///
@@ -651,17 +651,19 @@ impl Coords {
651651 ///
652652 /// Because this returns data in batches of `S::VF32_WIDTH`, a few excess values will be
653653 /// computed at the end for any `resolution` whose square is not a multiple of the batch size.
654- pub fn samples_ps < S > ( & self , face_resolution : u32 , chunk_resolution : u32 ) -> SampleIterSimd < S >
655- where
656- S : SimdRealField + Copy ,
657- S :: Element : RealField + Copy ,
658- {
654+ #[ cfg( feature = "fearless_simd" ) ]
655+ pub fn samples_ps < S : Simd > (
656+ & self ,
657+ simd : S ,
658+ face_resolution : u32 ,
659+ chunk_resolution : u32 ,
660+ ) -> SampleIterSimd < S > {
659661 SampleIterSimd {
660662 coords : * self ,
661663 face_resolution,
662664 chunk_resolution,
663665 index : 0 ,
664- _simd : PhantomData ,
666+ simd ,
665667 }
666668 }
667669}
@@ -677,6 +679,14 @@ pub(crate) fn warp<N: SimdRealField + Copy>(x: N) -> N {
677679 * x
678680}
679681
682+ /// See `warp`
683+ #[ cfg( feature = "fearless_simd" ) ]
684+ fn warp_ps < S : Simd , N : RealField + SimdElement , F : SimdFloat < N , S > > ( x : F ) -> F {
685+ let x2 = x * x;
686+ x * ( ( x2 * na:: convert :: < _ , N > ( 0.123894434214 ) + na:: convert :: < _ , N > ( 0.130546850193 ) ) * x2
687+ + na:: convert :: < _ , N > ( 0.745558715593 ) )
688+ }
689+
680690/// Derivative of `warp`
681691fn dwarp < N : RealField + Copy > ( x : N ) -> N {
682692 let x2 = x * x;
@@ -804,98 +814,83 @@ impl ExactSizeIterator for SampleIter {
804814///
805815/// Hand-vectorized, returning batches of each dimension in a separate register.
806816#[ derive( Debug ) ]
817+ #[ cfg( feature = "fearless_simd" ) ]
807818pub struct SampleIterSimd < S > {
808819 coords : Coords ,
809820 face_resolution : u32 ,
810821 chunk_resolution : u32 ,
811822 index : u32 ,
812- _simd : PhantomData < fn ( ) -> S > ,
823+ simd : S ,
813824}
814825
815- impl < S > Iterator for SampleIterSimd < S >
816- where
817- S : SimdRealField + Copy ,
818- S :: Element : RealField + Copy ,
819- {
820- type Item = [ S ; 3 ] ;
826+ #[ cfg( feature = "fearless_simd" ) ]
827+ impl < S : Simd > Iterator for SampleIterSimd < S > {
828+ type Item = [ S :: f32s ; 3 ] ;
821829 fn next ( & mut self ) -> Option < Self :: Item > {
822830 if self . index >= self . chunk_resolution * self . chunk_resolution {
823831 return None ;
824832 }
825833 {
826- let edge_length = Coords :: edge_length :: < S :: Element > ( self . face_resolution ) ;
827- let origin_on_face_x = na:: convert :: < _ , S :: Element > ( self . coords . x as f32 )
828- . mul_add ( edge_length, -S :: Element :: one ( ) ) ;
829- let origin_on_face_y = na:: convert :: < _ , S :: Element > ( self . coords . y as f32 )
830- . mul_add ( edge_length, -S :: Element :: one ( ) ) ;
834+ let edge_length = Coords :: edge_length :: < f32 > ( self . face_resolution ) ;
835+ let origin_on_face_x = ( self . coords . x as f32 ) . mul_add ( edge_length, -1.0 ) ;
836+ let origin_on_face_y = ( self . coords . y as f32 ) . mul_add ( edge_length, -1.0 ) ;
831837 let max = self . chunk_resolution - 1 ;
832838 let ( offset_x, offset_y) = if max == 0 {
833- let v = S :: splat ( na :: convert :: < _ , S :: Element > ( 0.5 ) * edge_length) ;
839+ let v = S :: f32s :: splat ( self . simd , 0.5 * edge_length) ;
834840 ( v, v)
835841 } else {
836- let step = edge_length / na:: convert ( max as f32 ) ;
837- let mut xs = S :: zero ( ) ;
838- for i in 0 ..S :: LANES {
839- xs. replace (
840- i,
841- na:: convert ( ( ( self . index + i as u32 ) % self . chunk_resolution ) as f32 ) ,
842- ) ;
842+ let step = edge_length / max as f32 ;
843+ let mut xs = S :: f32s:: splat ( self . simd , 0.0 ) ;
844+ for ( i, x) in xs. as_mut_slice ( ) . iter_mut ( ) . enumerate ( ) {
845+ * x = ( ( self . index + i as u32 ) % self . chunk_resolution ) as f32 ;
843846 }
844- let mut ys = S :: zero ( ) ;
845- for i in 0 ..S :: LANES {
846- ys. replace (
847- i,
848- na:: convert ( ( ( self . index + i as u32 ) / self . chunk_resolution ) as f32 ) ,
849- ) ;
847+ let mut ys = S :: f32s:: splat ( self . simd , 0.0 ) ;
848+ for ( i, y) in ys. as_mut_slice ( ) . iter_mut ( ) . enumerate ( ) {
849+ * y = ( ( self . index + i as u32 ) / self . chunk_resolution ) as f32 ;
850850 }
851- ( xs * S :: splat ( step) , ys * S :: splat ( step) )
851+ ( xs * step, ys * step)
852852 } ;
853- let pos_on_face_x = S :: splat ( origin_on_face_x ) + offset_x ;
854- let pos_on_face_y = S :: splat ( origin_on_face_y ) + offset_y ;
853+ let pos_on_face_x = offset_x + origin_on_face_x ;
854+ let pos_on_face_y = offset_y + origin_on_face_y ;
855855
856- let warped_x = warp ( pos_on_face_x) ;
857- let warped_y = warp ( pos_on_face_y) ;
856+ let warped_x = warp_ps ( pos_on_face_x) ;
857+ let warped_y = warp_ps ( pos_on_face_y) ;
858858
859- let len = warped_y
860- . simd_mul_add ( warped_y, warped_x. simd_mul_add ( warped_x, S :: one ( ) ) )
861- . simd_sqrt ( ) ;
859+ let len = warped_y. madd ( warped_y, warped_x. madd ( warped_x, 1.0 ) ) . sqrt ( ) ;
862860 let dir_x = warped_x / len;
863861 let dir_y = warped_y / len;
864- let dir_z = len . simd_recip ( ) ;
862+ let dir_z = S :: f32s :: splat ( self . simd , 1.0 ) / len ;
865863
866- let basis = self . coords . face . basis :: < S :: Element > ( ) ;
864+ let basis = self . coords . face . basis :: < f32 > ( ) ;
867865 let basis = basis. matrix ( ) ;
868- let x = S :: splat ( basis. m11 ) . simd_mul_add (
866+ let x = S :: f32s :: splat ( self . simd , basis. m11 ) . madd (
869867 dir_x,
870- S :: splat ( basis. m12 ) . simd_mul_add ( dir_y, S :: splat ( basis . m13 ) * dir_z ) ,
868+ S :: f32s :: splat ( self . simd , basis. m12 ) . madd ( dir_y, dir_z * basis . m13 ) ,
871869 ) ;
872- let y = S :: splat ( basis. m21 ) . simd_mul_add (
870+ let y = S :: f32s :: splat ( self . simd , basis. m21 ) . madd (
873871 dir_x,
874- S :: splat ( basis. m22 ) . simd_mul_add ( dir_y, S :: splat ( basis . m23 ) * dir_z ) ,
872+ S :: f32s :: splat ( self . simd , basis. m22 ) . madd ( dir_y, dir_z * basis . m23 ) ,
875873 ) ;
876- let z = S :: splat ( basis. m31 ) . simd_mul_add (
874+ let z = S :: f32s :: splat ( self . simd , basis. m31 ) . madd (
877875 dir_x,
878- S :: splat ( basis. m32 ) . simd_mul_add ( dir_y, S :: splat ( basis . m33 ) * dir_z ) ,
876+ S :: f32s :: splat ( self . simd , basis. m32 ) . madd ( dir_y, dir_z * basis . m33 ) ,
879877 ) ;
880878
881- self . index += S :: LANES as u32 ;
879+ self . index += S :: f32s :: N as u32 ;
882880 Some ( [ x, y, z] )
883881 }
884882 }
885883
886884 fn size_hint ( & self ) -> ( usize , Option < usize > ) {
887885 let total = self . chunk_resolution * self . chunk_resolution ;
888886 let remaining = ( total - self . index ) as usize ;
889- let x = remaining. div_ceil ( S :: LANES ) ;
887+ let x = remaining. div_ceil ( S :: f32s :: N ) ;
890888 ( x, Some ( x) )
891889 }
892890}
893891
894- impl < S > ExactSizeIterator for SampleIterSimd < S >
895- where
896- S : SimdRealField + Copy ,
897- S :: Element : RealField + Copy ,
898- {
892+ #[ cfg( feature = "fearless_simd" ) ]
893+ impl < S : Simd > ExactSizeIterator for SampleIterSimd < S > {
899894 fn len ( & self ) -> usize {
900895 self . size_hint ( ) . 0
901896 }
@@ -1131,20 +1126,32 @@ mod test {
11311126 }
11321127
11331128 #[ test]
1129+ #[ cfg( feature = "fearless_simd" ) ]
11341130 fn simd_samples_consistent ( ) {
1131+ use fearless_simd:: Fallback ;
1132+
11351133 const COORDS : Coords = Coords {
11361134 x : 0 ,
11371135 y : 0 ,
11381136 face : Face :: Py ,
11391137 } ;
11401138 const FACE_RES : u32 = 1 ;
11411139 const CHUNK_RES : u32 = 17 ;
1142- let scalar = COORDS . samples ( FACE_RES , CHUNK_RES ) ;
1143- let simd = COORDS . samples_ps :: < f32 > ( FACE_RES , CHUNK_RES ) ;
1144- assert_eq ! ( simd. len( ) , scalar. len( ) ) ;
1145- for ( scalar, [ x, y, z] ) in scalar. zip ( simd) {
1146- dbg ! ( x, y, z) ;
1147- assert_abs_diff_eq ! ( scalar, na:: Unit :: new_unchecked( na:: Vector3 :: new( x, y, z) ) ) ;
1140+ let mut scalar = COORDS . samples ( FACE_RES , CHUNK_RES ) ;
1141+ let simd = COORDS . samples_ps ( Fallback :: new ( ) , FACE_RES , CHUNK_RES ) ;
1142+ assert_eq ! (
1143+ simd. len( ) ,
1144+ scalar. len( ) . div_ceil( <Fallback as Simd >:: f32s:: N )
1145+ ) ;
1146+ for vs in simd {
1147+ for i in 0 ..<Fallback as Simd >:: f32s:: N {
1148+ let [ x, y, z] = vs. map ( |c| c. as_slice ( ) [ i] ) ;
1149+ let Some ( scalar) = scalar. next ( ) else {
1150+ break ;
1151+ } ;
1152+ let simd = na:: Unit :: new_unchecked ( na:: Vector3 :: new ( x, y, z) ) ;
1153+ assert_abs_diff_eq ! ( scalar, simd) ;
1154+ }
11481155 }
11491156 }
11501157
0 commit comments