Skip to content

Commit 0022920

Browse files
authored
Merge pull request #2141 from jlpetz/clflushopt-intrinsic
Add `_mm_clflushopt` intrinsic
2 parents 2513831 + 206b5a4 commit 0022920

6 files changed

Lines changed: 69 additions & 11 deletions

File tree

crates/core_arch/missing-x86.md

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,6 @@
3333
</p></details>
3434

3535

36-
<details><summary>["CLFLUSHOPT"]</summary><p>
37-
38-
* [ ] [`_mm_clflushopt`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflushopt)
39-
</p></details>
40-
41-
4236
<details><summary>["CLWB"]</summary><p>
4337

4438
* [ ] [`_mm_clwb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clwb)

crates/core_arch/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
const_eval_select,
4141
maybe_uninit_as_bytes,
4242
movrs_target_feature,
43+
clflushopt_target_feature,
4344
min_adt_const_params
4445
)]
4546
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//! `CLFLUSHOPT` cache-line flush.
2+
3+
#[cfg(test)]
4+
use stdarch_test::assert_instr;
5+
6+
#[allow(improper_ctypes)]
7+
unsafe extern "unadjusted" {
8+
#[link_name = "llvm.x86.clflushopt"]
9+
fn clflushopt(p: *const u8);
10+
}
11+
12+
/// Invalidates from every level of the cache hierarchy the cache line that
13+
/// contains `p`.
14+
///
15+
/// Unlike [`_mm_clflush`], `CLFLUSHOPT` is only ordered with respect to older
16+
/// writes to the flushed cache line and with respect to fence/locked
17+
/// operations; it is *not* serialized against other `CLFLUSHOPT`/`CLFLUSH`
18+
/// instructions or unrelated stores. This makes flushing a range of lines
19+
/// substantially faster, but a fence (e.g. [`_mm_sfence`] or [`_mm_mfence`]) is
20+
/// required afterward to order the flushes against subsequent operations.
21+
///
22+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflushopt)
23+
///
24+
/// # Safety
25+
///
26+
/// Unlike the prefetch intrinsics, `CLFLUSHOPT` is subject to all the
27+
/// permission checking and faults associated with a byte load, so `p` must
28+
/// point to a byte that is valid for reads.
29+
///
30+
/// [`_mm_clflush`]: crate::arch::x86::_mm_clflush
31+
/// [`_mm_sfence`]: crate::arch::x86::_mm_sfence
32+
/// [`_mm_mfence`]: crate::arch::x86::_mm_mfence
33+
#[inline]
34+
#[target_feature(enable = "clflushopt")]
35+
#[cfg_attr(test, assert_instr(clflushopt))]
36+
#[unstable(feature = "simd_x86_clflushopt", issue = "157096")]
37+
pub unsafe fn _mm_clflushopt(p: *const u8) {
38+
clflushopt(p);
39+
}
40+
41+
#[cfg(test)]
42+
mod tests {
43+
use crate::core_arch::x86::*;
44+
use stdarch_test::simd_test;
45+
46+
#[simd_test(enable = "clflushopt")]
47+
unsafe fn test_mm_clflushopt() {
48+
let x = 0_u8;
49+
_mm_clflushopt(core::ptr::addr_of!(x));
50+
}
51+
}

crates/core_arch/src/x86/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,10 @@ mod adx;
692692
#[stable(feature = "simd_x86_adx", since = "1.33.0")]
693693
pub use self::adx::*;
694694

695+
mod clflushopt;
696+
#[unstable(feature = "simd_x86_clflushopt", issue = "157096")]
697+
pub use self::clflushopt::*;
698+
695699
#[cfg(test)]
696700
use stdarch_test::assert_instr;
697701

crates/core_arch/src/x86/sse2.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ pub fn _mm_pause() {
2929
/// the cache hierarchy.
3030
///
3131
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
32+
///
33+
/// # Safety
34+
///
35+
/// Unlike the prefetch intrinsics, `CLFLUSH` is subject to all the permission
36+
/// checking and faults associated with a byte load, so `p` must point to a
37+
/// byte that is valid for reads.
3238
#[inline]
3339
#[target_feature(enable = "sse2")]
3440
#[cfg_attr(test, assert_instr(clflush))]

crates/stdarch-verify/tests/x86-intel.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -663,12 +663,14 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
663663
fn pointed_type(intrinsic: &Intrinsic) -> Result<Type, String> {
664664
Ok(
665665
if intrinsic.tech == "AMX"
666-
|| intrinsic
667-
.cpuid
668-
.iter()
669-
.any(|cpuid| matches!(&**cpuid, "KEYLOCKER" | "KEYLOCKER_WIDE" | "XSAVE" | "FXSR"))
666+
|| intrinsic.cpuid.iter().any(|cpuid| {
667+
matches!(
668+
&**cpuid,
669+
"KEYLOCKER" | "KEYLOCKER_WIDE" | "XSAVE" | "FXSR" | "CLFLUSHOPT"
670+
)
671+
})
670672
{
671-
// AMX, KEYLOCKER and XSAVE intrinsics should take `*u8`
673+
// AMX, KEYLOCKER, XSAVE and CLFLUSHOPT intrinsics should take `*u8`
672674
U8
673675
} else if intrinsic.name == "_mm_clflush" {
674676
// Just a false match in the following logic

0 commit comments

Comments
 (0)