Skip to content

Commit 8dfb357

Browse files
authored
Merge pull request #2106 from sayantn/cvtf16s16
Use LLVM intrinsics for `f16` to `{i,u}16` intrinsics
2 parents a8f7da3 + f8df4ea commit 8dfb357

6 files changed

Lines changed: 103 additions & 139 deletions

File tree

ci/docker/aarch64-unknown-linux-gnu/Dockerfile

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1010
qemu-user \
1111
make \
1212
file \
13-
clang \
14-
lld
13+
xz-utils \
14+
wget
15+
16+
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
17+
RUN mkdir llvm
18+
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
19+
20+
ENV PATH="/llvm/bin:$PATH"
1521

1622
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
1723
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \

ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
99
qemu-user \
1010
make \
1111
file \
12-
clang \
1312
curl \
1413
xz-utils \
15-
lld
14+
wget
1615

1716
ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"
1817

@@ -21,6 +20,12 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin
2120
RUN tar -xvf "${TOOLCHAIN}.tar.xz"
2221
RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
2322

23+
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
24+
RUN mkdir llvm
25+
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
26+
27+
ENV PATH="/llvm/bin:$PATH"
28+
2429
ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
2530
ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
2631

ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1010
qemu-user \
1111
make \
1212
file \
13-
clang \
14-
lld
13+
wget
14+
15+
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
16+
RUN mkdir llvm
17+
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
18+
19+
ENV PATH="/llvm/bin:$PATH"
20+
1521
ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
1622
CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
1723
OBJDUMP=arm-linux-gnueabihf-objdump

ci/docker/x86_64-unknown-linux-gnu/Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1515
RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz
1616
RUN mkdir intel-sde
1717
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
18+
19+
RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
20+
RUN mkdir llvm
21+
RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
22+
23+
ENV PATH="/llvm/bin:$PATH"
24+
1825
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
1926
-cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
2027
-rtm-mode full -tsx --"

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5516,7 +5516,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
55165516
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
55175517
#[cfg(not(target_arch = "arm64ec"))]
55185518
pub fn vcvtah_s16_f16(a: f16) -> i16 {
5519-
vcvtah_s32_f16(a) as i16
5519+
unsafe extern "unadjusted" {
5520+
#[cfg_attr(
5521+
any(target_arch = "aarch64", target_arch = "arm64ec"),
5522+
link_name = "llvm.aarch64.neon.fcvtas.i16.f16"
5523+
)]
5524+
fn _vcvtah_s16_f16(a: f16) -> i16;
5525+
}
5526+
unsafe { _vcvtah_s16_f16(a) }
55205527
}
55215528
#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
55225529
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"]
@@ -5560,7 +5567,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 {
55605567
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
55615568
#[cfg(not(target_arch = "arm64ec"))]
55625569
pub fn vcvtah_u16_f16(a: f16) -> u16 {
5563-
vcvtah_u32_f16(a) as u16
5570+
unsafe extern "unadjusted" {
5571+
#[cfg_attr(
5572+
any(target_arch = "aarch64", target_arch = "arm64ec"),
5573+
link_name = "llvm.aarch64.neon.fcvtau.i16.f16"
5574+
)]
5575+
fn _vcvtah_u16_f16(a: f16) -> u16;
5576+
}
5577+
unsafe { _vcvtah_u16_f16(a) }
55645578
}
55655579
#[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
55665580
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"]
@@ -6202,7 +6216,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
62026216
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
62036217
#[cfg(not(target_arch = "arm64ec"))]
62046218
pub fn vcvtmh_s16_f16(a: f16) -> i16 {
6205-
vcvtmh_s32_f16(a) as i16
6219+
unsafe extern "unadjusted" {
6220+
#[cfg_attr(
6221+
any(target_arch = "aarch64", target_arch = "arm64ec"),
6222+
link_name = "llvm.aarch64.neon.fcvtms.i16.f16"
6223+
)]
6224+
fn _vcvtmh_s16_f16(a: f16) -> i16;
6225+
}
6226+
unsafe { _vcvtmh_s16_f16(a) }
62066227
}
62076228
#[doc = "Floating-point convert to integer, rounding towards minus infinity"]
62086229
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"]
@@ -6238,15 +6259,22 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
62386259
}
62396260
unsafe { _vcvtmh_s64_f16(a) }
62406261
}
6241-
#[doc = "Floating-point convert to integer, rounding towards minus infinity"]
6262+
#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
62426263
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"]
62436264
#[inline]
62446265
#[cfg_attr(test, assert_instr(fcvtmu))]
62456266
#[target_feature(enable = "neon,fp16")]
62466267
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
62476268
#[cfg(not(target_arch = "arm64ec"))]
62486269
pub fn vcvtmh_u16_f16(a: f16) -> u16 {
6249-
vcvtmh_u32_f16(a) as u16
6270+
unsafe extern "unadjusted" {
6271+
#[cfg_attr(
6272+
any(target_arch = "aarch64", target_arch = "arm64ec"),
6273+
link_name = "llvm.aarch64.neon.fcvtmu.i16.f16"
6274+
)]
6275+
fn _vcvtmh_u16_f16(a: f16) -> u16;
6276+
}
6277+
unsafe { _vcvtmh_u16_f16(a) }
62506278
}
62516279
#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
62526280
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"]
@@ -6550,7 +6578,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
65506578
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
65516579
#[cfg(not(target_arch = "arm64ec"))]
65526580
pub fn vcvtnh_s16_f16(a: f16) -> i16 {
6553-
vcvtnh_s32_f16(a) as i16
6581+
unsafe extern "unadjusted" {
6582+
#[cfg_attr(
6583+
any(target_arch = "aarch64", target_arch = "arm64ec"),
6584+
link_name = "llvm.aarch64.neon.fcvtns.i16.f16"
6585+
)]
6586+
fn _vcvtnh_s16_f16(a: f16) -> i16;
6587+
}
6588+
unsafe { _vcvtnh_s16_f16(a) }
65546589
}
65556590
#[doc = "Floating-point convert to integer, rounding to nearest with ties to even"]
65566591
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"]
@@ -6594,7 +6629,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 {
65946629
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
65956630
#[cfg(not(target_arch = "arm64ec"))]
65966631
pub fn vcvtnh_u16_f16(a: f16) -> u16 {
6597-
vcvtnh_u32_f16(a) as u16
6632+
unsafe extern "unadjusted" {
6633+
#[cfg_attr(
6634+
any(target_arch = "aarch64", target_arch = "arm64ec"),
6635+
link_name = "llvm.aarch64.neon.fcvtnu.i16.f16"
6636+
)]
6637+
fn _vcvtnh_u16_f16(a: f16) -> u16;
6638+
}
6639+
unsafe { _vcvtnh_u16_f16(a) }
65986640
}
65996641
#[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
66006642
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"]
@@ -6898,7 +6940,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
68986940
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
68996941
#[cfg(not(target_arch = "arm64ec"))]
69006942
pub fn vcvtph_s16_f16(a: f16) -> i16 {
6901-
vcvtph_s32_f16(a) as i16
6943+
unsafe extern "unadjusted" {
6944+
#[cfg_attr(
6945+
any(target_arch = "aarch64", target_arch = "arm64ec"),
6946+
link_name = "llvm.aarch64.neon.fcvtps.i16.f16"
6947+
)]
6948+
fn _vcvtph_s16_f16(a: f16) -> i16;
6949+
}
6950+
unsafe { _vcvtph_s16_f16(a) }
69026951
}
69036952
#[doc = "Floating-point convert to integer, rounding to plus infinity"]
69046953
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"]
@@ -6942,7 +6991,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 {
69426991
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
69436992
#[cfg(not(target_arch = "arm64ec"))]
69446993
pub fn vcvtph_u16_f16(a: f16) -> u16 {
6945-
vcvtph_u32_f16(a) as u16
6994+
unsafe extern "unadjusted" {
6995+
#[cfg_attr(
6996+
any(target_arch = "aarch64", target_arch = "arm64ec"),
6997+
link_name = "llvm.aarch64.neon.fcvtpu.i16.f16"
6998+
)]
6999+
fn _vcvtph_u16_f16(a: f16) -> u16;
7000+
}
7001+
unsafe { _vcvtph_u16_f16(a) }
69467002
}
69477003
#[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"]
69487004
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"]

0 commit comments

Comments
 (0)