forked from Rust-GPU/rust-cuda
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlib.rs
More file actions
442 lines (394 loc) · 13.7 KB
/
lib.rs
File metadata and controls
442 lines (394 loc) · 13.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
#![feature(rustc_private)]
// crate is perma-unstable because of rustc_private so might as well
// make our lives a lot easier for llvm ffi with this. And since rustc's core infra
// relies on it its almost guaranteed to not be removed/broken
#![feature(extern_types)]
#![feature(slice_as_array)]
extern crate rustc_abi;
extern crate rustc_arena;
extern crate rustc_ast;
extern crate rustc_attr_data_structures;
extern crate rustc_attr_parsing;
extern crate rustc_codegen_ssa;
extern crate rustc_data_structures;
extern crate rustc_driver;
extern crate rustc_errors;
extern crate rustc_fs_util;
extern crate rustc_hash;
extern crate rustc_hashes;
extern crate rustc_hir;
extern crate rustc_index;
extern crate rustc_interface;
extern crate rustc_macros;
extern crate rustc_metadata;
extern crate rustc_middle;
extern crate rustc_query_system;
extern crate rustc_session;
extern crate rustc_span;
extern crate rustc_symbol_mangling;
extern crate rustc_target;
extern crate rustc_type_ir;
mod abi;
mod allocator;
mod asm;
mod attributes;
mod back;
mod builder;
mod common;
mod const_ty;
mod consts;
mod context;
mod ctx_intrinsics;
mod debug_info;
mod init;
mod int_replace;
mod intrinsic;
mod link;
mod llvm;
mod lto;
mod mono_item;
mod nvvm;
mod override_fns;
mod ptx_filter;
mod target;
mod ty;
use abi::readjust_fn_abi;
use back::target_machine_factory;
use lto::ThinBuffer;
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
use rustc_codegen_ssa::{
CodegenResults, CompiledModule, ModuleCodegen, TargetConfig,
back::{
lto::{SerializedModule, ThinModule},
write::{CodegenContext, FatLtoInput, ModuleConfig, OngoingCodegen},
},
traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods},
};
use rustc_data_structures::fx::FxIndexMap;
use rustc_errors::{DiagCtxtHandle, FatalError};
use rustc_metadata::creader::MetadataLoaderDyn;
use rustc_middle::util::Providers;
use rustc_middle::{
dep_graph::{WorkProduct, WorkProductId},
ty::TyCtxt,
};
use rustc_session::{
Session,
config::{self, OutputFilenames},
};
use tracing::debug;
use std::ffi::CString;
use std::path::PathBuf;
// codegen dylib entrypoint
#[unsafe(no_mangle)]
pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> {
rustc_driver::install_ice_hook(
"https://github.com/Rust-GPU/rust-cuda/issues/new",
|handler| {
handler.handle().note(concat!(
"`rust-cuda` version `",
env!("CARGO_PKG_VERSION"),
"`"
));
},
);
Box::new(NvvmCodegenBackend)
}
#[derive(Clone)]
pub struct NvvmCodegenBackend;
unsafe impl Send for NvvmCodegenBackend {}
unsafe impl Sync for NvvmCodegenBackend {}
impl CodegenBackend for NvvmCodegenBackend {
fn locale_resource(&self) -> &'static str {
""
}
fn init(&self, sess: &Session) {
let filter = tracing_subscriber::EnvFilter::from_env("NVVM_LOG");
let subscriber = tracing_subscriber::fmt()
.with_env_filter(filter)
.without_time()
.with_ansi(false)
.compact()
.finish();
tracing::subscriber::set_global_default(subscriber).expect("no default subscriber");
init::init(sess);
}
// FIXME If we can use the default metadata loader in the LLVM backend
// we can remove this and use the default provided impl instead.
fn metadata_loader(&self) -> Box<MetadataLoaderDyn> {
Box::new(link::NvvmMetadataLoader)
}
fn provide(&self, providers: &mut Providers) {
// Synthesize compute capability target features from the architecture specified in llvm-args.
// This enables code to use `#[cfg(target_feature = "compute_60")]` etc. for conditional compilation.
// Following NVIDIA semantics, we enable "at least this capability" matching - for example,
// when targeting compute_70, we also enable compute_60, compute_50, and all lower capabilities.
// This allows libraries to gate features based on minimum required compute capability.
providers.global_backend_features = |tcx, ()| {
let mut features = vec![];
// Parse CodegenArgs to get the architecture from llvm-args (e.g., "-arch=compute_70")
let args = context::CodegenArgs::from_session(tcx.sess);
// Find the architecture option and synthesize all implied features
for opt in &args.nvvm_options {
if let ::nvvm::NvvmOption::Arch(arch) = opt {
// Add all features up to and including the current architecture
features.extend(arch.all_target_features());
break;
}
}
features
};
providers.fn_abi_of_fn_ptr = |tcx, key| {
let result = (rustc_interface::DEFAULT_QUERY_PROVIDERS.fn_abi_of_fn_ptr)(tcx, key);
Ok(readjust_fn_abi(tcx, result?))
};
providers.fn_abi_of_instance = |tcx, key| {
let result = (rustc_interface::DEFAULT_QUERY_PROVIDERS.fn_abi_of_instance)(tcx, key);
Ok(readjust_fn_abi(tcx, result?))
};
}
fn codegen_crate(&self, tcx: TyCtxt<'_>) -> Box<dyn std::any::Any> {
debug!("Codegen crate");
Box::new(rustc_codegen_ssa::base::codegen_crate(
Self,
tcx,
tcx.sess
.opts
.cg
.target_cpu
.clone()
.unwrap_or_else(|| tcx.sess.target.cpu.to_string()),
))
}
fn join_codegen(
&self,
ongoing_codegen: Box<dyn std::any::Any>,
sess: &Session,
_outputs: &OutputFilenames,
) -> (CodegenResults, FxIndexMap<WorkProductId, WorkProduct>) {
debug!("Join codegen");
let (codegen_results, work_products) = ongoing_codegen
.downcast::<OngoingCodegen<Self>>()
.expect("Expected OngoingCodegen, found Box<Any>")
.join(sess);
// sess.compile_status();
(codegen_results, work_products)
}
fn link(
&self,
sess: &rustc_session::Session,
codegen_results: rustc_codegen_ssa::CodegenResults,
metadata: rustc_metadata::EncodedMetadata,
outputs: &config::OutputFilenames,
) {
link::link(
sess,
&codegen_results,
outputs,
codegen_results.crate_info.local_crate_name.as_str(),
metadata,
);
}
fn target_config(&self, sess: &Session) -> TargetConfig {
// Parse target features from command line
let cmdline = sess.opts.cg.target_feature.split(',');
let cfg = sess.target.options.features.split(',');
let mut target_features: Vec<_> = cfg
.chain(cmdline)
.filter(|l| l.starts_with('+'))
.map(|l| &l[1..])
.filter(|l| !l.is_empty())
.map(rustc_span::Symbol::intern)
.collect();
// Add backend-synthesized features (e.g., hierarchical compute capabilities)
// Parse CodegenArgs to get the architecture from llvm-args
let args = context::CodegenArgs::from_session(sess);
for opt in &args.nvvm_options {
if let ::nvvm::NvvmOption::Arch(arch) = opt {
// Add all features up to and including the current architecture
let backend_features = arch.all_target_features();
target_features.extend(
backend_features
.iter()
.map(|f| rustc_span::Symbol::intern(f)),
);
break;
}
}
// For NVPTX, all target features are stable
let unstable_target_features = target_features.clone();
TargetConfig {
target_features,
unstable_target_features,
has_reliable_f16: false,
has_reliable_f16_math: false,
has_reliable_f128: false,
has_reliable_f128_math: false,
}
}
}
impl WriteBackendMethods for NvvmCodegenBackend {
type Module = LlvmMod;
type ModuleBuffer = lto::ModuleBuffer;
type TargetMachine = &'static mut llvm::TargetMachine;
type TargetMachineError = String;
type ThinData = ();
type ThinBuffer = ThinBuffer;
fn run_link(
_cgcx: &CodegenContext<Self>,
_diag_handler: DiagCtxtHandle<'_>,
_modules: Vec<ModuleCodegen<Self::Module>>,
) -> Result<ModuleCodegen<Self::Module>, FatalError> {
// TODO(Rdambrosio016):
// we can probably call the llvm codegen to do this, but cgcx
// is a codegen context of NvvmCodegenBackend not LlvmCodegenBackend
// and to make a new cgcx we need to make a new LlvmCodegenBackend which
// cannot be done through the API currently
todo!();
}
fn run_and_optimize_fat_lto(
_cgcx: &CodegenContext<Self>,
_exported_symbols_for_lto: &[String],
_each_linked_rlib_for_lto: &[PathBuf],
_modules: Vec<FatLtoInput<Self>>,
_diff_fncs: Vec<AutoDiffItem>,
) -> Result<ModuleCodegen<Self::Module>, FatalError> {
todo!()
}
fn run_thin_lto(
cgcx: &CodegenContext<Self>,
// FIXME: Limit LTO exports to these symbols
_exported_symbols_for_lto: &[String],
// FIXME: handle these? but only relevant for non-thin LTO?
_each_linked_rlib_for_lto: &[PathBuf],
modules: Vec<(String, Self::ThinBuffer)>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError> {
lto::run_thin(cgcx, modules, cached_modules)
}
fn print_pass_timings(&self) {
// Not applicable, nvvm doesnt expose pass timing info, maybe we could print llvm pass stuff here.
}
fn print_statistics(&self) {
// Not applicable, nvvm doesnt expose pass timing info, maybe we could print llvm pass stuff here.
}
fn optimize(
cgcx: &CodegenContext<Self>,
diag_handler: DiagCtxtHandle<'_>,
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
unsafe { back::optimize(cgcx, diag_handler, module, config) }
}
fn optimize_thin(
cgcx: &CodegenContext<Self>,
thin_module: ThinModule<Self>,
) -> Result<ModuleCodegen<Self::Module>, FatalError> {
unsafe { lto::optimize_thin(cgcx, thin_module) }
}
fn codegen(
cgcx: &CodegenContext<Self>,
module: ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<CompiledModule, FatalError> {
unsafe { back::codegen(cgcx, module, config) }
}
fn prepare_thin(
module: ModuleCodegen<Self::Module>,
_want_summary: bool,
) -> (String, Self::ThinBuffer) {
debug!("Prepare thin");
unsafe {
(
module.name,
lto::ThinBuffer::new(module.module_llvm.llmod.as_ref().unwrap()),
)
}
}
fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
debug!("Serializing module");
unsafe {
(
module.name,
lto::ModuleBuffer::new(module.module_llvm.llmod.as_ref().unwrap()),
)
}
}
}
impl ExtraBackendMethods for NvvmCodegenBackend {
fn codegen_allocator(
&self,
tcx: TyCtxt<'_>,
module_name: &str,
kind: AllocatorKind,
alloc_error_handler_kind: AllocatorKind,
) -> LlvmMod {
let mut module_llvm = LlvmMod::new(module_name);
unsafe {
allocator::codegen(
tcx,
&mut module_llvm,
module_name,
kind,
alloc_error_handler_kind,
);
}
module_llvm
}
fn compile_codegen_unit(
&self,
tcx: TyCtxt<'_>,
cgu_name: rustc_span::Symbol,
) -> (rustc_codegen_ssa::ModuleCodegen<Self::Module>, u64) {
back::compile_codegen_unit(tcx, cgu_name)
}
fn target_machine_factory(
&self,
sess: &Session,
opt_level: config::OptLevel,
_target_features: &[String],
) -> rustc_codegen_ssa::back::write::TargetMachineFactoryFn<Self> {
target_machine_factory(sess, opt_level)
}
}
/// Create the LLVM module for the rest of the compilation, this houses
/// the LLVM bitcode we then add to the NVVM program and feed to libnvvm.
/// LLVM's codegen is never actually called.
pub(crate) unsafe fn create_module<'ll>(
llcx: &'ll llvm::Context,
mod_name: &str,
) -> &'ll llvm::Module {
debug!("Creating llvm module with name `{}`", mod_name);
let mod_name = CString::new(mod_name).expect("nul in module name");
let llmod = unsafe { llvm::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), llcx) };
let data_layout = CString::new(target::DATA_LAYOUT).unwrap();
unsafe { llvm::LLVMSetDataLayout(llmod, data_layout.as_ptr()) };
let target = CString::new(target::TARGET_TRIPLE).unwrap();
unsafe { llvm::LLVMSetTarget(llmod, target.as_ptr()) };
llmod
}
/// Wrapper over raw llvm structures
pub struct LlvmMod {
llcx: &'static mut llvm::Context,
llmod: *const llvm::Module,
}
unsafe impl Send for LlvmMod {}
unsafe impl Sync for LlvmMod {}
impl LlvmMod {
pub fn new(name: &str) -> Self {
unsafe {
// TODO(RDambrosio016): does shouldDiscardNames affect NVVM at all?
let llcx = llvm::LLVMRustContextCreate(false);
let llmod = create_module(llcx, name) as *const _;
LlvmMod { llcx, llmod }
}
}
}
impl Drop for LlvmMod {
fn drop(&mut self) {
unsafe {
llvm::LLVMContextDispose(&mut *(self.llcx as *mut _));
}
}
}