@@ -235,6 +235,18 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
235235 codegen_offload ( self , tcx, instance, args) ;
236236 return Ok ( ( ) ) ;
237237 }
238+ sym:: preload => {
239+ if tcx. sess . opts . unstable_opts . offload . is_empty ( ) {
240+ let _ = tcx. dcx ( ) . emit_almost_fatal ( OffloadWithoutEnable ) ;
241+ }
242+
243+ if tcx. sess . lto ( ) != rustc_session:: config:: Lto :: Fat {
244+ let _ = tcx. dcx ( ) . emit_almost_fatal ( OffloadWithoutFatLTO ) ;
245+ }
246+
247+ codegen_offload_preload ( self , tcx, instance, args) ;
248+ return Ok ( ( ) ) ;
249+ }
238250 sym:: is_val_statically_known => {
239251 if let OperandValue :: Immediate ( imm) = args[ 0 ] . val {
240252 self . call_intrinsic (
@@ -1847,6 +1859,60 @@ fn codegen_autodiff<'ll, 'tcx>(
18471859 ) ;
18481860}
18491861
1862+ // For each PreLoad *call*, we now use some of our previous declared globals to move data to the gpu.
1863+ // For now, we only handle the data transfer part of it. Consecutive calls become a no-op on the
1864+ // LLVM side.
1865+ //
1866+ // Current steps:
1867+ // 0. Alloca some variables for the following steps
1868+ // 1. set insert point before PreLoad call.
1869+ // 2. generate all the GEPS and stores, to be used in 3)
1870+ // 3. generate __tgt_target_data_begin calls to move data to the GPU
1871+ //
1872+ // unchanged: keep kernel call. Later move the kernel to the GPU
1873+ //
1874+ // 4. set insert point after kernel call.
1875+ // 5. generate all the GEPS and stores, to be used in 6)
1876+ // 6. generate __tgt_target_data_end calls to move data from the GPU
1877+ fn codegen_offload_preload < ' ll , ' tcx > (
1878+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
1879+ tcx : TyCtxt < ' tcx > ,
1880+ instance : ty:: Instance < ' tcx > ,
1881+ args : & [ OperandRef < ' tcx , & ' ll Value > ] ,
1882+ ) {
1883+ let cx = bx. cx ;
1884+ //let fn_args = instance.args;
1885+
1886+ register_offload ( cx) ;
1887+
1888+ //let OperandValue::Ref(place) = args[0].val else {
1889+ // bug!("expected array operand by reference");
1890+ //};
1891+ //let arg_ty = place.layout.llvm_type(bx.cx);
1892+ ////let workgroup_val = bx.load(arr_ty, place.llval, four);
1893+
1894+ //let a = OffloadMetadata::from_ty(tcx, arg_ty);
1895+
1896+ let arg = & args[ 0 ] ;
1897+
1898+ let arg_ty = arg. layout . ty ;
1899+
1900+ let ty:: Ref ( _, pointee_ty, _) = * arg_ty. kind ( ) else {
1901+ bug ! ( "expected preload argument to be a reference, got {arg_ty:?}" ) ;
1902+ } ;
1903+
1904+ let meta = OffloadMetadata :: from_ty ( tcx, pointee_ty) ;
1905+
1906+ let offload_globals_ref = cx. offload_globals . borrow ( ) ;
1907+ let offload_globals = match offload_globals_ref. as_ref ( ) {
1908+ Some ( globals) => globals,
1909+ None => {
1910+ // Offload is not initialized, cannot continue
1911+ return ;
1912+ }
1913+ } ;
1914+ }
1915+
18501916// Generates the LLVM code to offload a Rust function to a target device (e.g., GPU).
18511917// For each kernel call, it generates the necessary globals (including metadata such as
18521918// size and pass mode), manages memory mapping to and from the device, handles all
0 commit comments