Skip to content

Commit 3d95eb9

Browse files
Auto merge of #150848 - cuviper:beta-next, r=cuviper
[beta] backports - Revert "Rollup merge of #149147 - chenyukang:yukang-fix-unused_assignments-macro-gen-147648, r=JonathanBrouwer" #149657 - Don't lint on interior mutable `const` item coming from derefs #150166 - stdarch subtree update #150639 (partial) r? cuviper
2 parents 72b6488 + d851fe6 commit 3d95eb9

10 files changed

Lines changed: 153 additions & 48 deletions

File tree

compiler/rustc_lint/src/interior_mutable_consts.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use rustc_hir::attrs::AttributeKind;
22
use rustc_hir::def::{DefKind, Res};
33
use rustc_hir::{Expr, ExprKind, ItemKind, Node, find_attr};
4+
use rustc_middle::ty::adjustment::Adjust;
45
use rustc_session::{declare_lint, declare_lint_pass};
56

67
use crate::lints::{ConstItemInteriorMutationsDiag, ConstItemInteriorMutationsSuggestionStatic};
@@ -77,6 +78,13 @@ impl<'tcx> LateLintPass<'tcx> for InteriorMutableConsts {
7778
if let ExprKind::Path(qpath) = &receiver.kind
7879
&& let Res::Def(DefKind::Const | DefKind::AssocConst, const_did) =
7980
typeck.qpath_res(qpath, receiver.hir_id)
81+
// Don't consider derefs as those can do arbitrary things
82+
// like using thread local (see rust-lang/rust#150157)
83+
&& !cx
84+
.typeck_results()
85+
.expr_adjustments(receiver)
86+
.into_iter()
87+
.any(|adj| matches!(adj.kind, Adjust::Deref(_)))
8088
// Let's do the attribute check after the other checks for perf reasons
8189
&& find_attr!(
8290
cx.tcx.get_all_attrs(method_did),

compiler/rustc_mir_transform/src/liveness.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,6 @@ pub(crate) fn check_liveness<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> Den
7575
return DenseBitSet::new_empty(0);
7676
}
7777

78-
// Don't run unused pass for items generated by foreign macros
79-
if tcx.def_span(parent).in_external_macro(tcx.sess.source_map()) {
80-
return DenseBitSet::new_empty(0);
81-
}
82-
8378
let mut body = &*tcx.mir_promoted(def_id).0.borrow();
8479
let mut body_mem;
8580

library/stdarch/crates/core_arch/src/x86/avx2.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1754,12 +1754,19 @@ pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m25
17541754
#[cfg_attr(test, assert_instr(vpmaddwd))]
17551755
#[stable(feature = "simd_x86", since = "1.27.0")]
17561756
pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1757-
unsafe {
1758-
let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16()));
1759-
let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]);
1760-
let odd: i32x8 = simd_shuffle!(r, r, [1, 3, 5, 7, 9, 11, 13, 15]);
1761-
simd_add(even, odd).as_m256i()
1762-
}
1757+
// It's a trick used in the Adler-32 algorithm to perform a widening addition.
1758+
//
1759+
// ```rust
1760+
// #[target_feature(enable = "avx2")]
1761+
// unsafe fn widening_add(mad: __m256i) -> __m256i {
1762+
// _mm256_madd_epi16(mad, _mm256_set1_epi16(1))
1763+
// }
1764+
// ```
1765+
//
1766+
// If we implement this using generic vector intrinsics, the optimizer
1767+
// will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
1768+
// For this reason, we use x86 intrinsics.
1769+
unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
17631770
}
17641771

17651772
/// Vertically multiplies each unsigned 8-bit integer from `a` with the
@@ -3701,6 +3708,8 @@ unsafe extern "C" {
37013708
fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
37023709
#[link_name = "llvm.x86.avx2.phsub.sw"]
37033710
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3711+
#[link_name = "llvm.x86.avx2.pmadd.wd"]
3712+
fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
37043713
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
37053714
fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
37063715
#[link_name = "llvm.x86.avx2.mpsadbw"]

library/stdarch/crates/core_arch/src/x86/avx512bw.rs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5847,20 +5847,19 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128
58475847
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
58485848
#[cfg_attr(test, assert_instr(vpmaddwd))]
58495849
pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5850-
unsafe {
5851-
let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32()));
5852-
let even: i32x16 = simd_shuffle!(
5853-
r,
5854-
r,
5855-
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
5856-
);
5857-
let odd: i32x16 = simd_shuffle!(
5858-
r,
5859-
r,
5860-
[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
5861-
);
5862-
simd_add(even, odd).as_m512i()
5863-
}
5850+
// It's a trick used in the Adler-32 algorithm to perform a widening addition.
5851+
//
5852+
// ```rust
5853+
// #[target_feature(enable = "avx512bw")]
5854+
// unsafe fn widening_add(mad: __m512i) -> __m512i {
5855+
// _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
5856+
// }
5857+
// ```
5858+
//
5859+
// If we implement this using generic vector intrinsics, the optimizer
5860+
// will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
5861+
// For this reason, we use x86 intrinsics.
5862+
unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
58645863
}
58655864

58665865
/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -11687,6 +11686,8 @@ unsafe extern "C" {
1168711686
#[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
1168811687
fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
1168911688

11689+
#[link_name = "llvm.x86.avx512.pmaddw.d.512"]
11690+
fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
1169011691
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
1169111692
fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
1169211693

library/stdarch/crates/core_arch/src/x86/sse2.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,12 +201,19 @@ pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
201201
#[cfg_attr(test, assert_instr(pmaddwd))]
202202
#[stable(feature = "simd_x86", since = "1.27.0")]
203203
pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
204-
unsafe {
205-
let r: i32x8 = simd_mul(simd_cast(a.as_i16x8()), simd_cast(b.as_i16x8()));
206-
let even: i32x4 = simd_shuffle!(r, r, [0, 2, 4, 6]);
207-
let odd: i32x4 = simd_shuffle!(r, r, [1, 3, 5, 7]);
208-
simd_add(even, odd).as_m128i()
209-
}
204+
// It's a trick used in the Adler-32 algorithm to perform a widening addition.
205+
//
206+
// ```rust
207+
// #[target_feature(enable = "sse2")]
208+
// unsafe fn widening_add(mad: __m128i) -> __m128i {
209+
// _mm_madd_epi16(mad, _mm_set1_epi16(1))
210+
// }
211+
// ```
212+
//
213+
// If we implement this using generic vector intrinsics, the optimizer
214+
// will eliminate this pattern, and `pmaddwd` will no longer be emitted.
215+
// For this reason, we use x86 intrinsics.
216+
unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
210217
}
211218

212219
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -3054,6 +3061,8 @@ unsafe extern "C" {
30543061
fn lfence();
30553062
#[link_name = "llvm.x86.sse2.mfence"]
30563063
fn mfence();
3064+
#[link_name = "llvm.x86.sse2.pmadd.wd"]
3065+
fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
30573066
#[link_name = "llvm.x86.sse2.psad.bw"]
30583067
fn psadbw(a: u8x16, b: u8x16) -> u64x2;
30593068
#[link_name = "llvm.x86.sse2.psll.w"]

src/tools/miri/src/shims/x86/avx2.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,42 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
245245

246246
shift_simd_by_scalar(this, left, right, which, dest)?;
247247
}
248+
// Used to implement the _mm256_madd_epi16 function.
249+
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
250+
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
251+
// intermediate 32-bit integers, and pack the results in `dest`.
252+
"pmadd.wd" => {
253+
let [left, right] =
254+
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
255+
256+
let (left, left_len) = this.project_to_simd(left)?;
257+
let (right, right_len) = this.project_to_simd(right)?;
258+
let (dest, dest_len) = this.project_to_simd(dest)?;
259+
260+
assert_eq!(left_len, right_len);
261+
assert_eq!(dest_len.strict_mul(2), left_len);
262+
263+
for i in 0..dest_len {
264+
let j1 = i.strict_mul(2);
265+
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
266+
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
267+
268+
let j2 = j1.strict_add(1);
269+
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
270+
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
271+
272+
let dest = this.project_index(&dest, i)?;
273+
274+
// Multiplications are i16*i16->i32, which will not overflow.
275+
let mul1 = i32::from(left1).strict_mul(right1.into());
276+
let mul2 = i32::from(left2).strict_mul(right2.into());
277+
// However, this addition can overflow in the most extreme case
278+
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
279+
let res = mul1.wrapping_add(mul2);
280+
281+
this.write_scalar(Scalar::from_i32(res), &dest)?;
282+
}
283+
}
248284
_ => return interp_ok(EmulateItemResult::NotSupported),
249285
}
250286
interp_ok(EmulateItemResult::NeedsReturn)

src/tools/miri/src/shims/x86/sse2.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,42 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
278278
this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
279279
}
280280
}
281+
// Used to implement the _mm_madd_epi16 function.
282+
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
283+
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
284+
// intermediate 32-bit integers, and pack the results in `dest`.
285+
"pmadd.wd" => {
286+
let [left, right] =
287+
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
288+
289+
let (left, left_len) = this.project_to_simd(left)?;
290+
let (right, right_len) = this.project_to_simd(right)?;
291+
let (dest, dest_len) = this.project_to_simd(dest)?;
292+
293+
assert_eq!(left_len, right_len);
294+
assert_eq!(dest_len.strict_mul(2), left_len);
295+
296+
for i in 0..dest_len {
297+
let j1 = i.strict_mul(2);
298+
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
299+
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
300+
301+
let j2 = j1.strict_add(1);
302+
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
303+
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
304+
305+
let dest = this.project_index(&dest, i)?;
306+
307+
// Multiplications are i16*i16->i32, which will not overflow.
308+
let mul1 = i32::from(left1).strict_mul(right1.into());
309+
let mul2 = i32::from(left2).strict_mul(right2.into());
310+
// However, this addition can overflow in the most extreme case
311+
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
312+
let res = mul1.wrapping_add(mul2);
313+
314+
this.write_scalar(Scalar::from_i32(res), &dest)?;
315+
}
316+
}
281317
_ => return interp_ok(EmulateItemResult::NotSupported),
282318
}
283319
interp_ok(EmulateItemResult::NeedsReturn)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Regression test for <https://github.com/rust-lang/rust/issues/150157>
2+
//
3+
// We shouldn't lint on user types, including through deref.
4+
5+
//@ check-pass
6+
7+
use std::cell::Cell;
8+
use std::ops::Deref;
9+
10+
// Cut down version of the issue reproducer without the thread local to just a Deref
11+
pub struct LocalKey<T> {
12+
inner: T,
13+
}
14+
15+
impl<T> Deref for LocalKey<T> {
16+
type Target = T;
17+
18+
fn deref(&self) -> &Self::Target {
19+
&self.inner
20+
}
21+
}
22+
23+
const LOCAL_COUNT: LocalKey<Cell<usize>> = LocalKey { inner: Cell::new(8) };
24+
25+
fn main() {
26+
let count = LOCAL_COUNT.get();
27+
LOCAL_COUNT.set(count);
28+
}

tests/ui/liveness/auxiliary/aux_issue_147648.rs

Lines changed: 0 additions & 7 deletions
This file was deleted.

tests/ui/liveness/unused-assignments-from-macro-147648.rs

Lines changed: 0 additions & 10 deletions
This file was deleted.

0 commit comments

Comments
 (0)