Skip to content

Commit 6398eb0

Browse files
authored
Merge pull request #2081 from sayantn/avx512vp2intersect
Implement AVX512-VP2INTERSECT intrinsics
2 parents 8539773 + 10936fd commit 6398eb0

3 files changed

Lines changed: 248 additions & 16 deletions

File tree

crates/core_arch/missing-x86.md

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,6 @@
4444
</p></details>
4545

4646

47-
<details><summary>["AVX512_VP2INTERSECT", "AVX512F"]</summary><p>
48-
49-
* [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
50-
* [ ] [`_mm512_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64)
51-
</p></details>
52-
53-
54-
<details><summary>["AVX512_VP2INTERSECT", "AVX512VL"]</summary><p>
55-
56-
* [ ] [`_mm256_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32)
57-
* [ ] [`_mm256_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64)
58-
* [ ] [`_mm_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32)
59-
* [ ] [`_mm_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64)
60-
</p></details>
61-
62-
6347
<details><summary>["CET_SS"]</summary><p>
6448

6549
* [ ] [`_clrssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clrssbsy)
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
//! Vector Pair Intersection to a Pair of Mask Registers (VP2INTERSECT)
2+
3+
use crate::core_arch::{simd::*, x86::*};
4+
5+
#[cfg(test)]
6+
use stdarch_test::assert_instr;
7+
8+
/// Compute intersection of packed 32-bit integer vectors a and b,
9+
/// and store indication of match in the corresponding bit of two mask registers
10+
/// specified by k1 and k2. A match in corresponding elements of a and b is
11+
/// indicated by a set bit in the corresponding bit of the mask registers.
12+
///
13+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32&expand=0)
14+
#[inline]
15+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
16+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
17+
#[cfg_attr(test, assert_instr(vp2intersectd))]
18+
pub unsafe fn _mm_2intersect_epi32(a: __m128i, b: __m128i, k1: *mut __mmask8, k2: *mut __mmask8) {
19+
(*k1, *k2) = vp2intersectd_128(a.as_i32x4(), b.as_i32x4());
20+
}
21+
22+
/// Compute intersection of packed 64-bit integer vectors a and b,
23+
/// and store indication of match in the corresponding bit of two mask registers
24+
/// specified by k1 and k2. A match in corresponding elements of a and b is
25+
/// indicated by a set bit in the corresponding bit of the mask registers.
26+
///
27+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64&expand=0)
28+
#[inline]
29+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
30+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
31+
#[cfg_attr(test, assert_instr(vp2intersectq))]
32+
pub unsafe fn _mm_2intersect_epi64(a: __m128i, b: __m128i, k1: *mut __mmask8, k2: *mut __mmask8) {
33+
(*k1, *k2) = vp2intersectq_128(a.as_i64x2(), b.as_i64x2());
34+
}
35+
36+
/// Compute intersection of packed 32-bit integer vectors a and b,
37+
/// and store indication of match in the corresponding bit of two mask registers
38+
/// specified by k1 and k2. A match in corresponding elements of a and b is
39+
/// indicated by a set bit in the corresponding bit of the mask registers.
40+
///
41+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32&expand=0)
42+
#[inline]
43+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
44+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
45+
#[cfg_attr(test, assert_instr(vp2intersectd))]
46+
pub unsafe fn _mm256_2intersect_epi32(
47+
a: __m256i,
48+
b: __m256i,
49+
k1: *mut __mmask8,
50+
k2: *mut __mmask8,
51+
) {
52+
(*k1, *k2) = vp2intersectd_256(a.as_i32x8(), b.as_i32x8());
53+
}
54+
55+
/// Compute intersection of packed 64-bit integer vectors a and b,
56+
/// and store indication of match in the corresponding bit of two mask registers
57+
/// specified by k1 and k2. A match in corresponding elements of a and b is
58+
/// indicated by a set bit in the corresponding bit of the mask registers.
59+
///
60+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64&expand=0)
61+
#[inline]
62+
#[target_feature(enable = "avx512vp2intersect,avx512vl")]
63+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
64+
#[cfg_attr(test, assert_instr(vp2intersectq))]
65+
pub unsafe fn _mm256_2intersect_epi64(
66+
a: __m256i,
67+
b: __m256i,
68+
k1: *mut __mmask8,
69+
k2: *mut __mmask8,
70+
) {
71+
(*k1, *k2) = vp2intersectq_256(a.as_i64x4(), b.as_i64x4());
72+
}
73+
74+
/// Compute intersection of packed 32-bit integer vectors a and b,
75+
/// and store indication of match in the corresponding bit of two mask registers
76+
/// specified by k1 and k2. A match in corresponding elements of a and b is
77+
/// indicated by a set bit in the corresponding bit of the mask registers.
78+
///
79+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32&expand=0)
80+
#[inline]
81+
#[target_feature(enable = "avx512vp2intersect,avx512f")]
82+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
83+
#[cfg_attr(test, assert_instr(vp2intersectd))]
84+
pub unsafe fn _mm512_2intersect_epi32(
85+
a: __m512i,
86+
b: __m512i,
87+
k1: *mut __mmask16,
88+
k2: *mut __mmask16,
89+
) {
90+
(*k1, *k2) = vp2intersectd_512(a.as_i32x16(), b.as_i32x16());
91+
}
92+
93+
/// Compute intersection of packed 64-bit integer vectors a and b,
94+
/// and store indication of match in the corresponding bit of two mask registers
95+
/// specified by k1 and k2. A match in corresponding elements of a and b is
96+
/// indicated by a set bit in the corresponding bit of the mask registers.
97+
///
98+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64&expand=0)
99+
#[inline]
100+
#[target_feature(enable = "avx512vp2intersect,avx512f")]
101+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
102+
#[cfg_attr(test, assert_instr(vp2intersectq))]
103+
pub unsafe fn _mm512_2intersect_epi64(
104+
a: __m512i,
105+
b: __m512i,
106+
k1: *mut __mmask8,
107+
k2: *mut __mmask8,
108+
) {
109+
(*k1, *k2) = vp2intersectq_512(a.as_i64x8(), b.as_i64x8());
110+
}
111+
112+
#[allow(improper_ctypes)]
113+
unsafe extern "C" {
114+
#[link_name = "llvm.x86.avx512.vp2intersect.d.128"]
115+
fn vp2intersectd_128(a: i32x4, b: i32x4) -> (u8, u8);
116+
#[link_name = "llvm.x86.avx512.vp2intersect.q.128"]
117+
fn vp2intersectq_128(a: i64x2, b: i64x2) -> (u8, u8);
118+
119+
#[link_name = "llvm.x86.avx512.vp2intersect.d.256"]
120+
fn vp2intersectd_256(a: i32x8, b: i32x8) -> (u8, u8);
121+
#[link_name = "llvm.x86.avx512.vp2intersect.q.256"]
122+
fn vp2intersectq_256(a: i64x4, b: i64x4) -> (u8, u8);
123+
124+
#[link_name = "llvm.x86.avx512.vp2intersect.d.512"]
125+
fn vp2intersectd_512(a: i32x16, b: i32x16) -> (u16, u16);
126+
#[link_name = "llvm.x86.avx512.vp2intersect.q.512"]
127+
fn vp2intersectq_512(a: i64x8, b: i64x8) -> (u8, u8);
128+
}
129+
130+
#[cfg(test)]
131+
mod tests {
132+
use crate::core_arch::x86::*;
133+
use stdarch_test::simd_test;
134+
135+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
136+
unsafe fn test_mm_2intersect_epi32() {
137+
let mut k1 = 0;
138+
let mut k2 = 0;
139+
140+
let a = _mm_set_epi32(1, 2, 3, 4);
141+
let b = _mm_set_epi32(3, 4, 5, 6);
142+
_mm_2intersect_epi32(a, b, &mut k1, &mut k2);
143+
assert_eq!(k1, 0b0011);
144+
assert_eq!(k2, 0b1100);
145+
146+
let a = _mm_set_epi32(1, 2, 3, 4);
147+
let b = _mm_set_epi32(2, 3, 4, 5);
148+
_mm_2intersect_epi32(a, b, &mut k1, &mut k2);
149+
assert_eq!(k1, 0b0111);
150+
assert_eq!(k2, 0b1110);
151+
}
152+
153+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
154+
unsafe fn test_mm_2intersect_epi64() {
155+
let mut k1 = 0;
156+
let mut k2 = 0;
157+
158+
let a = _mm_set_epi64x(1, 2);
159+
let b = _mm_set_epi64x(2, 3);
160+
_mm_2intersect_epi64(a, b, &mut k1, &mut k2);
161+
assert_eq!(k1, 0b01);
162+
assert_eq!(k2, 0b10);
163+
164+
let a = _mm_set_epi64x(1, 2);
165+
let b = _mm_set_epi64x(2, 2);
166+
_mm_2intersect_epi64(a, b, &mut k1, &mut k2);
167+
assert_eq!(k1, 0b01);
168+
assert_eq!(k2, 0b11);
169+
}
170+
171+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
172+
unsafe fn test_mm256_2intersect_epi32() {
173+
let mut k1 = 0;
174+
let mut k2 = 0;
175+
176+
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
177+
let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
178+
_mm256_2intersect_epi32(a, b, &mut k1, &mut k2);
179+
assert_eq!(k1, 0b00001111);
180+
assert_eq!(k2, 0b11110000);
181+
182+
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
183+
let b = _mm256_set_epi32(2, 3, 4, 5, 6, 7, 8, 9);
184+
_mm256_2intersect_epi32(a, b, &mut k1, &mut k2);
185+
assert_eq!(k1, 0b01111111);
186+
assert_eq!(k2, 0b11111110);
187+
}
188+
189+
#[simd_test(enable = "avx512vp2intersect,avx512vl")]
190+
unsafe fn test_mm256_2intersect_epi64() {
191+
let mut k1 = 0;
192+
let mut k2 = 0;
193+
194+
let a = _mm256_set_epi64x(1, 2, 3, 4);
195+
let b = _mm256_set_epi64x(3, 4, 5, 6);
196+
_mm256_2intersect_epi64(a, b, &mut k1, &mut k2);
197+
assert_eq!(k1, 0b0011);
198+
assert_eq!(k2, 0b1100);
199+
200+
let a = _mm256_set_epi64x(1, 2, 3, 4);
201+
let b = _mm256_set_epi64x(2, 3, 4, 5);
202+
_mm256_2intersect_epi64(a, b, &mut k1, &mut k2);
203+
assert_eq!(k1, 0b0111);
204+
assert_eq!(k2, 0b1110);
205+
}
206+
207+
#[simd_test(enable = "avx512vp2intersect,avx512f")]
208+
unsafe fn test_mm512_2intersect_epi32() {
209+
let mut k1 = 0;
210+
let mut k2 = 0;
211+
212+
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
213+
let b = _mm512_set_epi32(
214+
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
215+
);
216+
_mm512_2intersect_epi32(a, b, &mut k1, &mut k2);
217+
assert_eq!(k1, 0b0000000011111111);
218+
assert_eq!(k2, 0b1111111100000000);
219+
220+
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
221+
let b = _mm512_set_epi32(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
222+
_mm512_2intersect_epi32(a, b, &mut k1, &mut k2);
223+
assert_eq!(k1, 0b0111111111111111);
224+
assert_eq!(k2, 0b1111111111111110);
225+
}
226+
227+
#[simd_test(enable = "avx512vp2intersect,avx512f")]
228+
unsafe fn test_mm512_2intersect_epi64() {
229+
let mut k1 = 0;
230+
let mut k2 = 0;
231+
232+
let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
233+
let b = _mm512_set_epi64(5, 6, 7, 8, 9, 10, 11, 12);
234+
_mm512_2intersect_epi64(a, b, &mut k1, &mut k2);
235+
assert_eq!(k1, 0b00001111);
236+
assert_eq!(k2, 0b11110000);
237+
238+
let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
239+
let b = _mm512_set_epi64(2, 3, 4, 5, 6, 7, 8, 9);
240+
_mm512_2intersect_epi64(a, b, &mut k1, &mut k2);
241+
assert_eq!(k1, 0b01111111);
242+
assert_eq!(k2, 0b11111110);
243+
}
244+
}

crates/core_arch/src/x86/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,3 +778,7 @@ pub use self::kl::*;
778778
mod movrs;
779779
#[unstable(feature = "movrs_target_feature", issue = "137976")]
780780
pub use self::movrs::*;
781+
782+
mod avx512vp2intersect;
783+
#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
784+
pub use self::avx512vp2intersect::*;

0 commit comments

Comments
 (0)