Skip to content

Commit 55add51

Browse files
committed
Change implementation of vld1_dup
1 parent 00a2f68 commit 55add51

2 files changed

Lines changed: 9 additions & 9 deletions

File tree

crates/core_arch/src/arm_shared/neon/generated.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14634,8 +14634,8 @@ pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
1463414634
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
1463514635
#[cfg(not(target_arch = "arm64ec"))]
1463614636
pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
14637-
let x: float16x4_t = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
14638-
simd_shuffle!(x, x, [0, 0, 0, 0])
14637+
let x = vld1_lane_f16::<0>(ptr, transmute(f16x4::splat(0.0)));
14638+
vdup_lane_f16::<0>(x)
1463914639
}
1464014640
#[doc = "Load one single-element structure and replicate to all lanes of one register"]
1464114641
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"]
@@ -14653,8 +14653,8 @@ pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
1465314653
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
1465414654
#[cfg(not(target_arch = "arm64ec"))]
1465514655
pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
14656-
let x: float16x8_t = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
14657-
simd_shuffle!(x, x, [0, 0, 0, 0, 0, 0, 0, 0])
14656+
let x = vld1q_lane_f16::<0>(ptr, transmute(f16x8::splat(0.0)));
14657+
vdupq_laneq_f16::<0>(x)
1465814658
}
1465914659
#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
1466014660
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"]

crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2835,7 +2835,7 @@ intrinsics:
28352835
- FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
28362836
- FnCall: [simd_insert!, [src, "LANE as u32", "*ptr"]]
28372837

2838-
- name: "vld1{type[2]}_{neon_type[1]}"
2838+
- name: "vld1{neon_type[1].dup_nox}"
28392839
doc: "Load one single-element structure and replicate to all lanes of one register"
28402840
arguments: ["ptr: {type[0]}"]
28412841
return_type: "{neon_type[1]}"
@@ -2849,11 +2849,11 @@ intrinsics:
28492849
safety:
28502850
unsafe: [neon]
28512851
types:
2852-
- ["*const f16", float16x4_t, '_dup', 'f16x4', "[0, 0, 0, 0]"]
2853-
- ["*const f16", float16x8_t, 'q_dup', 'f16x8', "[0, 0, 0, 0, 0, 0, 0, 0]"]
2852+
- ["*const f16", float16x4_t, '_lane', 'f16x4']
2853+
- ["*const f16", float16x8_t, 'q_laneq', 'f16x8']
28542854
compose:
2855-
- Let: [x, "{neon_type[1]}", "vld1{neon_type[1].lane_nox}::<0>(ptr, transmute({type[3]}::splat(0.0)))"]
2856-
- FnCall: [simd_shuffle!, [x, x, "{type[4]}"]]
2855+
- Let: [x, {FnCall: ["vld1{neon_type[1].lane_nox}", [ptr, {FnCall: [transmute, ["{type[3]}::splat(0.0)"]]}], [0]]}]
2856+
- FnCall: ['vdup{type[2]}_{neon_type[1]}', [x], [0]]
28572857

28582858

28592859
- name: "vld2{neon_type[1].nox}"

0 commit comments

Comments
 (0)