@@ -835,13 +835,42 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
835835 }
836836
837837 fn fptoui_sat ( & mut self , val : & ' ll Value , dest_ty : & ' ll Type ) -> & ' ll Value {
838- // NVVM does not have support for saturated conversion. Setting rustc flag
839- // `-Z saturating_float_casts=false` falls back to non-saturated, UB-prone
840- // conversion, and should prevent this codegen. Otherwise, fall back to UB
841- // prone conversion.
842- self . cx ( ) . sess ( ) . dcx ( )
843- . warn ( "Saturated float to int conversion is not supported on NVVM. Defaulting to UB prone conversion." ) ;
844- self . fptoui ( val, dest_ty)
838+ // NVVM does not support saturating casts, however, they are relatively simple to implement.
839+ // (at least for unsigned ints). So, we emulate them here.
840+
841+ // In order to clamp the value, we need to know it's type.
842+ let val_ty = self . val_ty ( val) ;
843+ // Find the min / max intrinsics
844+ let ( min, max) = match self . cx ( ) . float_width ( val_ty) {
845+ 64 => ( "__nv_fmin" , "__nv_fmax" ) ,
846+ 32 => ( "__nv_fminf" , "__nv_fmaxf" ) ,
847+ _ => {
848+ self . cx ( ) . sess ( ) . dcx ( )
849+ . warn ( "Saturated float to int conversion is not supported in NVVM for type {val_ty:?}. Defaulting to UB prone conversion." ) ;
850+ return self . fptoui ( val, dest_ty) ;
851+ }
852+ } ;
853+ let ( max_ty, max) = self . cx ( ) . get_intrinsic ( max) ;
854+ let ( min_ty, min) = self . cx ( ) . get_intrinsic ( min) ;
855+ // Find the zero value, and the max value of a given int.
856+ let zero = self . const_real ( val_ty, 0.0 ) ;
857+ let max_value = match self . int_width ( dest_ty) {
858+ 8 => u8:: MAX as f64 ,
859+ 16 => u16:: MAX as f64 ,
860+ 32 => u32:: MAX as f64 ,
861+ 64 => u64:: MAX as f64 ,
862+ 128 => u128:: MAX as f64 ,
863+ _ => todo ! ( "Unsupported int type {dest_ty:?}" ) ,
864+ } ;
865+ let max_value = self . const_real ( val_ty, max_value) ;
866+ // Compute max(val, 0). This will clamp negative values to zero **AND**
867+ // replace NaNs with 0s(just like how Rust is specified to behave)
868+ let res = self . call ( max_ty, None , None , max, & [ val, zero] , None , None ) ;
869+ // Clamp all values higher than max to max
870+ let res = self . call ( min_ty, None , None , min, & [ res, max_value] , None , None ) ;
871+ // Now, we know that `res` is non-nan, and in range (min, max). So, it is well-defined
872+ // for all inputs :D!
873+ self . fptoui ( res, dest_ty)
845874 }
846875
847876 fn fptosi_sat ( & mut self , val : & ' ll Value , dest_ty : & ' ll Type ) -> & ' ll Value {
0 commit comments