@@ -194,6 +194,10 @@ pub struct CudaBuilder {
194194 /// An optional path where to dump LLVM IR of the final output the codegen will feed to libnvvm. Usually
195195 /// used for debugging.
196196 pub final_module_path : Option < PathBuf > ,
197+ /// The threshold for LLVM's loop unrolling optimization pass. Higher values allow more
198+ /// aggressive unrolling, which can improve performance but increases code size.
199+ /// When `None`, LLVM uses its default threshold.
200+ pub unroll_threshold : Option < u32 > ,
197201}
198202
199203impl CudaBuilder {
@@ -216,6 +220,7 @@ impl CudaBuilder {
216220 debug : DebugInfo :: None ,
217221 build_args : vec ! [ ] ,
218222 final_module_path : None ,
223+ unroll_threshold : None ,
219224 }
220225 }
221226
@@ -351,6 +356,13 @@ impl CudaBuilder {
351356 self
352357 }
353358
359+ /// Sets the threshold for LLVM's loop unrolling optimization pass. Higher values allow more
360+ /// aggressive unrolling, which can improve performance but increases code size.
361+ pub fn unroll_threshold ( mut self , threshold : u32 ) -> Self {
362+ self . unroll_threshold = Some ( threshold) ;
363+ self
364+ }
365+
354366 /// Runs rustc to build the codegen and codegens the gpu crate, returning the path of the final
355367 /// ptx file. If [`ptx_file_copy_path`](Self::ptx_file_copy_path) is set, this returns the copied path.
356368 pub fn build ( self ) -> Result < PathBuf , CudaBuilderError > {
@@ -748,6 +760,10 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
748760 llvm_args. push ( path. to_str ( ) . unwrap ( ) . to_string ( ) ) ;
749761 }
750762
763+ if let Some ( threshold) = builder. unroll_threshold {
764+ llvm_args. push ( format ! ( "-unroll-threshold={threshold}" ) ) ;
765+ }
766+
751767 if builder. debug != DebugInfo :: None {
752768 let ( nvvm_flag, rustc_flag) = builder. debug . into_nvvm_and_rustc_options ( ) ;
753769 llvm_args. push ( nvvm_flag) ;
0 commit comments