Skip to content

Commit f670790

Browse files
committed
Auto merge of #153430 - JonathanBrouwer:rollup-347i8P6, r=<try>
Rollup of 8 pull requests try-job: test-various try-job: x86_64-gnu-aux try-job: x86_64-gnu-llvm-21-3 try-job: x86_64-msvc-1 try-job: aarch64-apple try-job: x86_64-mingw-1
2 parents f8704be + 005fc16 commit f670790

19 files changed

Lines changed: 499 additions & 170 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,9 +1217,9 @@ dependencies = [
12171217

12181218
[[package]]
12191219
name = "dispatch2"
1220-
version = "0.3.0"
1220+
version = "0.3.1"
12211221
source = "registry+https://github.com/rust-lang/crates.io-index"
1222-
checksum = "89a09f22a6c6069a18470eb92d2298acf25463f14256d24778e1230d789a2aec"
1222+
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
12231223
dependencies = [
12241224
"bitflags",
12251225
"block2",

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ use std::ffi::CString;
33
use bitflags::Flags;
44
use llvm::Linkage::*;
55
use rustc_abi::Align;
6+
use rustc_codegen_ssa::MemFlags;
67
use rustc_codegen_ssa::common::TypeKind;
78
use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
89
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
910
use rustc_middle::bug;
10-
use rustc_middle::ty::offload_meta::{MappingFlags, OffloadMetadata};
11+
use rustc_middle::ty::offload_meta::{MappingFlags, OffloadMetadata, OffloadSize};
1112

1213
use crate::builder::Builder;
1314
use crate::common::CodegenCx;
@@ -450,7 +451,15 @@ pub(crate) fn gen_define_handling<'ll>(
450451
// FIXME(offload): add `OMP_MAP_TARGET_PARAM = 0x20` only if necessary
451452
let transfer_kernel = vec![MappingFlags::TARGET_PARAM.bits(); transfer_to.len()];
452453

453-
let offload_sizes = add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{symbol}"), &sizes);
454+
let actual_sizes = sizes
455+
.iter()
456+
.map(|s| match s {
457+
OffloadSize::Static(sz) => *sz,
458+
OffloadSize::Dynamic => 0,
459+
})
460+
.collect::<Vec<_>>();
461+
let offload_sizes =
462+
add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{symbol}"), &actual_sizes);
454463
let memtransfer_begin =
455464
add_priv_unnamed_arr(&cx, &format!(".offload_maptypes.{symbol}.begin"), &transfer_to);
456465
let memtransfer_kernel =
@@ -499,9 +508,6 @@ pub(crate) fn gen_define_handling<'ll>(
499508
region_id,
500509
};
501510

502-
// FIXME(Sa4dUs): use this global for constant offload sizes
503-
cx.add_compiler_used_global(result.offload_sizes);
504-
505511
cx.offload_kernel_cache.borrow_mut().insert(symbol, result);
506512

507513
result
@@ -535,6 +541,15 @@ pub(crate) fn scalar_width<'ll>(cx: &'ll SimpleCx<'_>, ty: &'ll Type) -> u64 {
535541
}
536542
}
537543

544+
fn get_runtime_size<'ll, 'tcx>(
545+
_cx: &CodegenCx<'ll, 'tcx>,
546+
_val: &'ll Value,
547+
_meta: &OffloadMetadata,
548+
) -> &'ll Value {
549+
// FIXME(Sa4dUs): handle dynamic-size data (e.g. slices)
550+
bug!("offload does not support dynamic sizes yet");
551+
}
552+
538553
// For each kernel *call*, we now use some of our previous declared globals to move data to and from
539554
// the gpu. For now, we only handle the data transfer part of it.
540555
// If two consecutive kernels use the same memory, we still move it to the host and back to the gpu.
@@ -564,15 +579,17 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
564579
) {
565580
let cx = builder.cx;
566581
let OffloadKernelGlobals {
582+
offload_sizes,
567583
memtransfer_begin,
568584
memtransfer_kernel,
569585
memtransfer_end,
570586
region_id,
571-
..
572587
} = offload_data;
573588
let OffloadKernelDims { num_workgroups, threads_per_block, workgroup_dims, thread_dims } =
574589
offload_dims;
575590

591+
let has_dynamic = metadata.iter().any(|m| matches!(m.payload_size, OffloadSize::Dynamic));
592+
576593
let tgt_decl = offload_globals.launcher_fn;
577594
let tgt_target_kernel_ty = offload_globals.launcher_ty;
578595

@@ -596,7 +613,24 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
596613
let a2 = builder.direct_alloca(ty, Align::EIGHT, ".offload_ptrs");
597614
// These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16.
598615
let ty2 = cx.type_array(cx.type_i64(), num_args);
599-
let a4 = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes");
616+
617+
let a4 = if has_dynamic {
618+
let alloc = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes");
619+
620+
builder.memcpy(
621+
alloc,
622+
Align::EIGHT,
623+
offload_sizes,
624+
Align::EIGHT,
625+
cx.get_const_i64(8 * args.len() as u64),
626+
MemFlags::empty(),
627+
None,
628+
);
629+
630+
alloc
631+
} else {
632+
offload_sizes
633+
};
600634

601635
//%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
602636
let a5 = builder.direct_alloca(tgt_kernel_decl, Align::EIGHT, "kernel_args");
@@ -648,9 +682,12 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
648682
builder.store(vals[i as usize], gep1, Align::EIGHT);
649683
let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, idx]);
650684
builder.store(geps[i as usize], gep2, Align::EIGHT);
651-
let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]);
652-
// FIXME(offload): write an offload frontend and handle arbitrary types.
653-
builder.store(cx.get_const_i64(metadata[i as usize].payload_size), gep3, Align::EIGHT);
685+
686+
if matches!(metadata[i as usize].payload_size, OffloadSize::Dynamic) {
687+
let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]);
688+
let size_val = get_runtime_size(cx, args[i as usize], &metadata[i as usize]);
689+
builder.store(size_val, gep3, Align::EIGHT);
690+
}
654691
}
655692

656693
// For now we have a very simplistic indexing scheme into our
@@ -662,13 +699,14 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
662699
a1: &'ll Value,
663700
a2: &'ll Value,
664701
a4: &'ll Value,
702+
is_dynamic: bool,
665703
) -> [&'ll Value; 3] {
666704
let cx = builder.cx;
667705
let i32_0 = cx.get_const_i32(0);
668706

669707
let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, i32_0]);
670708
let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, i32_0]);
671-
let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, i32_0]);
709+
let gep3 = if is_dynamic { builder.inbounds_gep(ty2, a4, &[i32_0, i32_0]) } else { a4 };
672710
[gep1, gep2, gep3]
673711
}
674712

@@ -692,7 +730,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
692730

693731
// Step 2)
694732
let s_ident_t = offload_globals.ident_t_global;
695-
let geps = get_geps(builder, ty, ty2, a1, a2, a4);
733+
let geps = get_geps(builder, ty, ty2, a1, a2, a4, has_dynamic);
696734
generate_mapper_call(
697735
builder,
698736
geps,
@@ -725,7 +763,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
725763
// %41 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args)
726764

727765
// Step 4)
728-
let geps = get_geps(builder, ty, ty2, a1, a2, a4);
766+
let geps = get_geps(builder, ty, ty2, a1, a2, a4, has_dynamic);
729767
generate_mapper_call(
730768
builder,
731769
geps,

compiler/rustc_codegen_ssa/src/mir/block.rs

Lines changed: 28 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1256,55 +1256,37 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
12561256
}
12571257
}
12581258
CallKind::Tail => {
1259-
match fn_abi.args[i].mode {
1260-
PassMode::Indirect { on_stack: false, .. } => {
1261-
let Some(tmp) = tail_call_temporaries[i].take() else {
1262-
span_bug!(
1263-
fn_span,
1264-
"missing temporary for indirect tail call argument #{i}"
1265-
)
1266-
};
1267-
1268-
let local = self.mir.args_iter().nth(i).unwrap();
1269-
1270-
match &self.locals[local] {
1271-
LocalRef::Place(arg) => {
1272-
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
1273-
op.val = Ref(arg.val);
1274-
}
1275-
LocalRef::Operand(arg) => {
1276-
let Ref(place_value) = arg.val else {
1277-
bug!("only `Ref` should use `PassMode::Indirect`");
1278-
};
1279-
bx.typed_place_copy(
1280-
place_value,
1281-
tmp.val,
1282-
fn_abi.args[i].layout,
1283-
);
1284-
op.val = arg.val;
1285-
}
1286-
LocalRef::UnsizedPlace(_) => {
1287-
span_bug!(fn_span, "unsized types are not supported")
1288-
}
1289-
LocalRef::PendingOperand => {
1290-
span_bug!(fn_span, "argument local should not be pending")
1291-
}
1292-
};
1293-
1294-
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
1295-
}
1296-
PassMode::Indirect { on_stack: true, .. } => {
1297-
// FIXME: some LLVM backends (notably x86) do not correctly pass byval
1298-
// arguments to tail calls (as of LLVM 21). See also:
1299-
//
1300-
// - https://github.com/rust-lang/rust/pull/144232#discussion_r2218543841
1301-
// - https://github.com/rust-lang/rust/issues/144855
1259+
if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode {
1260+
let Some(tmp) = tail_call_temporaries[i].take() else {
13021261
span_bug!(
13031262
fn_span,
1304-
"arguments using PassMode::Indirect {{ on_stack: true, .. }} are currently not supported for tail calls"
1263+
"missing temporary for indirect tail call argument #{i}"
13051264
)
1306-
}
1307-
_ => (),
1265+
};
1266+
1267+
let local = self.mir.args_iter().nth(i).unwrap();
1268+
1269+
match &self.locals[local] {
1270+
LocalRef::Place(arg) => {
1271+
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
1272+
op.val = Ref(arg.val);
1273+
}
1274+
LocalRef::Operand(arg) => {
1275+
let Ref(place_value) = arg.val else {
1276+
bug!("only `Ref` should use `PassMode::Indirect`");
1277+
};
1278+
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
1279+
op.val = arg.val;
1280+
}
1281+
LocalRef::UnsizedPlace(_) => {
1282+
span_bug!(fn_span, "unsized types are not supported")
1283+
}
1284+
LocalRef::PendingOperand => {
1285+
span_bug!(fn_span, "argument local should not be pending")
1286+
}
1287+
};
1288+
1289+
bx.lifetime_end(tmp.val.llval, tmp.layout.size);
13081290
}
13091291
}
13101292
}

compiler/rustc_lexer/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,8 @@ pub fn is_whitespace(c: char) -> bool {
367367

368368
/// True if `c` is considered horizontal whitespace according to Rust language definition.
369369
pub fn is_horizontal_whitespace(c: char) -> bool {
370-
// This is Pattern_White_Space.
370+
// This is the horizontal space subset of `Pattern_White_Space` as
371+
// categorized by UAX #31, Section 4.1.
371372
//
372373
// Note that this set is stable (ie, it doesn't change with different
373374
// Unicode versions), so it's ok to just hard-code the values.

compiler/rustc_llvm/build.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,15 +476,15 @@ fn main() {
476476
// C++ runtime library
477477
if !target.contains("msvc") {
478478
if let Some(s) = llvm_static_stdcpp {
479-
assert!(cxxflags_iter.all(|flag| flag != "stdlib=libc++"));
479+
assert!(cxxflags_iter.all(|flag| flag != "-stdlib=libc++"));
480480
let path = PathBuf::from(s);
481481
println!("cargo:rustc-link-search=native={}", path.parent().unwrap().display());
482482
if target.contains("windows") {
483483
println!("cargo:rustc-link-lib=static:-bundle={stdcppname}");
484484
} else {
485485
println!("cargo:rustc-link-lib=static={stdcppname}");
486486
}
487-
} else if cxxflags_iter.any(|flag| flag == "stdlib=libc++") {
487+
} else if cxxflags_iter.any(|flag| flag == "-stdlib=libc++") {
488488
println!("cargo:rustc-link-lib=c++");
489489
} else {
490490
println!("cargo:rustc-link-lib={stdcppname}");

compiler/rustc_middle/src/query/plumbing.rs

Lines changed: 10 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use rustc_macros::HashStable;
1111
use rustc_span::{ErrorGuaranteed, Span};
1212
pub use sealed::IntoQueryParam;
1313

14-
use crate::dep_graph::{DepKind, DepNode, DepNodeIndex, SerializedDepNodeIndex};
14+
use crate::dep_graph::{DepKind, DepNodeIndex, SerializedDepNodeIndex};
1515
use crate::ich::StableHashingContext;
1616
use crate::queries::{ExternProviders, Providers, QueryArenas, QueryVTables};
1717
use crate::query::on_disk_cache::OnDiskCache;
@@ -113,7 +113,6 @@ pub struct QueryVTable<'tcx, C: QueryCache> {
113113
pub cycle_error_handling: CycleErrorHandling,
114114
pub state: QueryState<'tcx, C::Key>,
115115
pub cache: C,
116-
pub will_cache_on_disk_for_key_fn: Option<fn(tcx: TyCtxt<'tcx>, key: &C::Key) -> bool>,
117116

118117
/// Function pointer that calls `tcx.$query(key)` for this query and
119118
/// discards the returned value.
@@ -129,17 +128,17 @@ pub struct QueryVTable<'tcx, C: QueryCache> {
129128
/// This should be the only code that calls the provider function.
130129
pub invoke_provider_fn: fn(tcx: TyCtxt<'tcx>, key: C::Key) -> C::Value,
131130

132-
pub try_load_from_disk_fn: Option<
133-
fn(
134-
tcx: TyCtxt<'tcx>,
135-
key: &C::Key,
136-
prev_index: SerializedDepNodeIndex,
137-
index: DepNodeIndex,
138-
) -> Option<C::Value>,
139-
>,
131+
pub will_cache_on_disk_for_key_fn: fn(tcx: TyCtxt<'tcx>, key: &C::Key) -> bool,
132+
133+
pub try_load_from_disk_fn: fn(
134+
tcx: TyCtxt<'tcx>,
135+
key: &C::Key,
136+
prev_index: SerializedDepNodeIndex,
137+
index: DepNodeIndex,
138+
) -> Option<C::Value>,
140139

141140
pub is_loadable_from_disk_fn:
142-
Option<fn(tcx: TyCtxt<'tcx>, key: &C::Key, index: SerializedDepNodeIndex) -> bool>,
141+
fn(tcx: TyCtxt<'tcx>, key: &C::Key, index: SerializedDepNodeIndex) -> bool,
143142

144143
/// Function pointer that hashes this query's result values.
145144
///
@@ -180,49 +179,6 @@ impl<'tcx, C: QueryCache> fmt::Debug for QueryVTable<'tcx, C> {
180179
}
181180
}
182181

183-
impl<'tcx, C: QueryCache> QueryVTable<'tcx, C> {
184-
#[inline(always)]
185-
pub fn will_cache_on_disk_for_key(&self, tcx: TyCtxt<'tcx>, key: &C::Key) -> bool {
186-
self.will_cache_on_disk_for_key_fn.map_or(false, |f| f(tcx, key))
187-
}
188-
189-
#[inline(always)]
190-
pub fn try_load_from_disk(
191-
&self,
192-
tcx: TyCtxt<'tcx>,
193-
key: &C::Key,
194-
prev_index: SerializedDepNodeIndex,
195-
index: DepNodeIndex,
196-
) -> Option<C::Value> {
197-
// `?` will return None immediately for queries that never cache to disk.
198-
self.try_load_from_disk_fn?(tcx, key, prev_index, index)
199-
}
200-
201-
#[inline]
202-
pub fn is_loadable_from_disk(
203-
&self,
204-
tcx: TyCtxt<'tcx>,
205-
key: &C::Key,
206-
index: SerializedDepNodeIndex,
207-
) -> bool {
208-
self.is_loadable_from_disk_fn.map_or(false, |f| f(tcx, key, index))
209-
}
210-
211-
/// Synthesize an error value to let compilation continue after a cycle.
212-
pub fn value_from_cycle_error(
213-
&self,
214-
tcx: TyCtxt<'tcx>,
215-
cycle_error: CycleError,
216-
guar: ErrorGuaranteed,
217-
) -> C::Value {
218-
(self.value_from_cycle_error)(tcx, cycle_error, guar)
219-
}
220-
221-
pub fn construct_dep_node(&self, tcx: TyCtxt<'tcx>, key: &C::Key) -> DepNode {
222-
DepNode::construct(tcx, self.dep_kind, key)
223-
}
224-
}
225-
226182
pub struct QuerySystem<'tcx> {
227183
pub arenas: WorkerLocal<QueryArenas<'tcx>>,
228184
pub query_vtables: QueryVTables<'tcx>,

0 commit comments

Comments
 (0)