@@ -235,6 +235,18 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
235235 codegen_offload ( self , tcx, instance, args) ;
236236 return Ok ( ( ) ) ;
237237 }
238+ sym:: preload => {
239+ if tcx. sess . opts . unstable_opts . offload . is_empty ( ) {
240+ let _ = tcx. dcx ( ) . emit_almost_fatal ( OffloadWithoutEnable ) ;
241+ }
242+
243+ if tcx. sess . lto ( ) != rustc_session:: config:: Lto :: Fat {
244+ let _ = tcx. dcx ( ) . emit_almost_fatal ( OffloadWithoutFatLTO ) ;
245+ }
246+
247+ codegen_offload_preload ( self , tcx, instance, args) ;
248+ return Ok ( ( ) ) ;
249+ }
238250 sym:: is_val_statically_known => {
239251 if let OperandValue :: Immediate ( imm) = args[ 0 ] . val {
240252 self . call_intrinsic (
@@ -1847,6 +1859,43 @@ fn codegen_autodiff<'ll, 'tcx>(
18471859 ) ;
18481860}
18491861
1862+ // For each PreLoad *call*, we now use some of our previous declared globals to move data to the gpu.
1863+ // For now, we only handle the data transfer part of it. Consecutive calls become a no-op on the
1864+ // LLVM side.
1865+ //
1866+ // Current steps:
1867+ // 0. Alloca some variables for the following steps
1868+ // 1. set insert point before PreLoad call.
1869+ // 2. generate all the GEPS and stores, to be used in 3)
1870+ // 3. generate __tgt_target_data_begin calls to move data to the GPU
1871+ //
1872+ // unchanged: keep kernel call. Later move the kernel to the GPU
1873+ //
1874+ // 4. set insert point after kernel call.
1875+ // 5. generate all the GEPS and stores, to be used in 6)
1876+ // 6. generate __tgt_target_data_end calls to move data from the GPU
1877+ fn codegen_offload_preload < ' ll , ' tcx > (
1878+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
1879+ tcx : TyCtxt < ' tcx > ,
1880+ instance : ty:: Instance < ' tcx > ,
1881+ args : & [ OperandRef < ' tcx , & ' ll Value > ] ,
1882+ ) {
1883+ let cx = bx. cx ;
1884+ //let fn_args = instance.args;
1885+
1886+ register_offload ( cx) ;
1887+ let a = OffloadMetadata :: from_ty ( tcx, args[ 0 ] ) ;
1888+
1889+ let offload_globals_ref = cx. offload_globals . borrow ( ) ;
1890+ let offload_globals = match offload_globals_ref. as_ref ( ) {
1891+ Some ( globals) => globals,
1892+ None => {
1893+ // Offload is not initialized, cannot continue
1894+ return ;
1895+ }
1896+ } ;
1897+ }
1898+
18501899// Generates the LLVM code to offload a Rust function to a target device (e.g., GPU).
18511900// For each kernel call, it generates the necessary globals (including metadata such as
18521901// size and pass mode), manages memory mapping to and from the device, handles all
0 commit comments