Skip to content

Commit fefc785

Browse files
committed
neon/cvtn: basic implementation of a few functions
Fixes: #1008
1 parent 51bf6f2 commit fefc785

4 files changed

Lines changed: 489 additions & 0 deletions

File tree

meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ simde_neon_families = [
4545
'cmla_rot270',
4646
'cnt',
4747
'cvt',
48+
'cvtn',
4849
'combine',
4950
'create',
5051
'dot',

simde/arm/neon.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
#include "neon/cmla_rot270.h"
6666
#include "neon/cnt.h"
6767
#include "neon/cvt.h"
68+
#include "neon/cvtn.h"
6869
#include "neon/combine.h"
6970
#include "neon/create.h"
7071
#include "neon/dot.h"

simde/arm/neon/cvtn.h

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/* SPDX-License-Identifier: MIT
2+
*
3+
* Permission is hereby granted, free of charge, to any person
4+
* obtaining a copy of this software and associated documentation
5+
* files (the "Software"), to deal in the Software without
6+
* restriction, including without limitation the rights to use, copy,
7+
* modify, merge, publish, distribute, sublicense, and/or sell copies
8+
* of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be
12+
* included in all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
* SOFTWARE.
22+
*
23+
* Copyright:
24+
* 2023 Michael R. Crusoe <crusoe@debian.org>
25+
*/
26+
27+
#if !defined(SIMDE_ARM_NEON_CVTN_H)
28+
#define SIMDE_ARM_NEON_CVTN_H
29+
30+
#include "types.h"
31+
32+
HEDLEY_DIAGNOSTIC_PUSH
33+
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
34+
SIMDE_BEGIN_DECLS_
35+
36+
SIMDE_FUNCTION_ATTRIBUTES
37+
simde_int32x4_t
38+
simde_vcvtnq_s32_f32(simde_float32x4_t a) {
39+
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE)
40+
return vcvtnq_s32_f32(a);
41+
#else
42+
simde_float32x4_private a_ = simde_float32x4_to_private(a);
43+
simde_int32x4_private r_;
44+
45+
SIMDE_VECTORIZE
46+
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
47+
r_.values[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a_.values[i]));
48+
}
49+
50+
return simde_int32x4_from_private(r_);
51+
#endif
52+
}
53+
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
54+
#undef vcvtnq_s32_f32
55+
#define vcvtnq_s32_f32(a) simde_vcvtnq_s32_f32(a)
56+
#endif
57+
58+
SIMDE_FUNCTION_ATTRIBUTES
59+
simde_int64x2_t
60+
simde_vcvtnq_s64_f64(simde_float64x2_t a) {
61+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
62+
return vcvtnq_s64_f64(a);
63+
#else
64+
simde_float64x2_private a_ = simde_float64x2_to_private(a);
65+
simde_int64x2_private r_;
66+
67+
SIMDE_VECTORIZE
68+
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
69+
r_.values[i] = HEDLEY_STATIC_CAST(int64_t, simde_math_roundeven(a_.values[i]));
70+
}
71+
72+
return simde_int64x2_from_private(r_);
73+
#endif
74+
}
75+
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
76+
#undef vcvtnq_s64_f64
77+
#define vcvtnq_s64_f64(a) simde_vcvtnq_s64_f64(a)
78+
#endif
79+
80+
SIMDE_FUNCTION_ATTRIBUTES
81+
uint32_t
82+
simde_vcvtns_u32_f32(simde_float32 a) {
83+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
84+
return vcvtns_u32_f32(a);
85+
#elif defined(SIMDE_FAST_CONVERSION_RANGE)
86+
return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a));
87+
#else
88+
if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) {
89+
return 0;
90+
} else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) {
91+
return UINT32_MAX;
92+
} else if (simde_math_isnanf(a)) {
93+
return 0;
94+
} else {
95+
return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a));
96+
}
97+
#endif
98+
}
99+
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
100+
#undef vcvtns_u32_f32
101+
#define vcvtns_u32_f32(a) simde_vcvtns_u32_f32(a)
102+
#endif
103+
104+
SIMDE_FUNCTION_ATTRIBUTES
105+
simde_uint32x4_t
106+
simde_vcvtnq_u32_f32(simde_float32x4_t a) {
107+
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844)
108+
return vcvtnq_u32_f32(a);
109+
#else
110+
simde_float32x4_private a_ = simde_float32x4_to_private(a);
111+
simde_uint32x4_private r_;
112+
113+
SIMDE_VECTORIZE
114+
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
115+
r_.values[i] = simde_vcvtns_u32_f32(a_.values[i]);
116+
}
117+
118+
return simde_uint32x4_from_private(r_);
119+
#endif
120+
}
121+
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
122+
#undef vcvtnq_u32_f32
123+
#define vcvtnq_u32_f32(a) simde_vcvtnq_u32_f32(a)
124+
#endif
125+
126+
SIMDE_FUNCTION_ATTRIBUTES
127+
uint64_t
128+
simde_vcvtnd_u64_f64(simde_float64 a) {
129+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
130+
return vcvtnd_u64_f64(a);
131+
#elif defined(SIMDE_FAST_CONVERSION_RANGE)
132+
return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundeven(a));
133+
#else
134+
if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT64_C(0.0))) {
135+
return 0;
136+
} else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) {
137+
return UINT64_MAX;
138+
} else if (simde_math_isnan(a)) {
139+
return 0;
140+
} else {
141+
return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundeven(a));
142+
}
143+
#endif
144+
}
145+
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
146+
#undef vcvtnd_u64_f64
147+
#define vcvtnd_u64_f64(a) simde_vcvtnd_u64_f64(a)
148+
#endif
149+
150+
SIMDE_FUNCTION_ATTRIBUTES
151+
simde_uint64x2_t
152+
simde_vcvtnq_u64_f64(simde_float64x2_t a) {
153+
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
154+
return vcvtnq_u64_f64(a);
155+
#else
156+
simde_float64x2_private a_ = simde_float64x2_to_private(a);
157+
simde_uint64x2_private r_;
158+
159+
SIMDE_VECTORIZE
160+
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
161+
r_.values[i] = simde_vcvtnd_u64_f64(a_.values[i]);
162+
}
163+
164+
return simde_uint64x2_from_private(r_);
165+
#endif
166+
}
167+
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
168+
#undef vcvtnq_u64_f64
169+
#define vcvtnq_u64_f64(a) simde_vcvtnq_u64_f64(a)
170+
#endif
171+
172+
SIMDE_END_DECLS_
173+
HEDLEY_DIAGNOSTIC_POP
174+
175+
#endif /* SIMDE_ARM_NEON_CVTN_H */

0 commit comments

Comments
 (0)