@@ -18,24 +18,9 @@ using executorch::runtime::Result;
1818
1919/* Potential NNLIB function/APIs */
2020
21- extern " C" WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32 (
22- FLOAT32* __restrict__ p_out,
23- const WORD32* const p_out_shape,
24- const FLOAT32* __restrict__ p_inp1,
25- const WORD32* const p_inp1_shape,
26- const FLOAT32* __restrict__ p_inp2,
27- const WORD32* const p_inp2_shape);
28-
2921extern " C" void
3022xa_nn_elm_atan2_f32 (FLOAT32* z, const FLOAT32* y, const FLOAT32* x, WORD32 N);
3123
32- extern " C" WORD32 xa_nn_elm_clamp_f32xf32xf32_f32 (
33- FLOAT32* __restrict__ p_out,
34- const FLOAT32* __restrict__ p_inp,
35- const FLOAT32* __restrict__ p_min,
36- const FLOAT32* __restrict__ p_max,
37- WORD32 num_elm);
38-
3924extern " C" WORD32 xa_nn_elm_clamp_broadcast_4D_f32Xf32xf32_f32 (
4025 FLOAT32* __restrict__ p_out,
4126 const WORD32* const p_out_shape,
@@ -46,14 +31,6 @@ extern "C" WORD32 xa_nn_elm_clamp_broadcast_4D_f32Xf32xf32_f32(
4631 const FLOAT32* __restrict__ p_max,
4732 const WORD32* const p_max_shape);
4833
49- extern " C" WORD32 xa_nn_elm_div_broadcast_4D_f32xf32_f32 (
50- FLOAT32* __restrict__ p_out,
51- const WORD32* const p_out_shape,
52- const FLOAT32* __restrict__ p_inp1,
53- const WORD32* const p_inp1_shape,
54- const FLOAT32* __restrict__ p_inp2,
55- const WORD32* const p_inp2_shape);
56-
5734extern " C" WORD32 xa_nn_elm_div_mode_f32xf32_f32 (
5835 FLOAT32* __restrict__ p_out,
5936 const FLOAT32* __restrict__ p_inp1,
@@ -70,22 +47,6 @@ extern "C" WORD32 xa_nn_elm_div_mode_broadcast_4D_f32xf32_f32(
7047 const WORD32* const p_inp2_shape,
7148 WORD32 mode);
7249
73- extern " C" WORD32 xa_nn_elm_greater_lesser_equal_f32xf32_f32 (
74- WORD8* __restrict__ p_out,
75- const FLOAT32* __restrict__ p_inp1,
76- const FLOAT32* __restrict__ p_inp2,
77- WORD32 num_elm,
78- WORD32 kernel_type);
79-
80- extern " C" WORD32 xa_nn_elm_greater_lesser_equal_broadcast_4D_f32xf32_f32 (
81- WORD8* __restrict__ p_out,
82- const WORD32* const p_out_shape,
83- const FLOAT32* __restrict__ p_inp1,
84- const WORD32* const p_inp1_shape,
85- const FLOAT32* __restrict__ p_inp2,
86- const WORD32* const p_inp2_shape,
87- WORD32 kernel_type);
88-
8950extern " C" WORD32 xa_nn_elm_fmod_f32xf32_f32 (
9051 FLOAT32* __restrict__ p_out,
9152 const FLOAT32* __restrict__ p_inp1,
@@ -106,42 +67,6 @@ extern "C" WORD32 xa_nn_elm_logicalxor_boolxbool_bool(
10667 const WORD8* __restrict__ p_inp2,
10768 WORD32 num_elm);
10869
109- extern " C" WORD32 xa_nn_elm_maximum_f32xf32_f32 (
110- FLOAT32* __restrict__ p_out,
111- const FLOAT32* __restrict__ p_inp1,
112- const FLOAT32* __restrict__ p_inp2,
113- WORD32 num_elm);
114-
115- extern " C" WORD32 xa_nn_elm_maximum_broadcast_4D_f32xf32_f32 (
116- FLOAT32* __restrict__ p_out,
117- const WORD32* const p_out_shape,
118- const FLOAT32* __restrict__ p_inp1,
119- const WORD32* const p_inp1_shape,
120- const FLOAT32* __restrict__ p_inp2,
121- const WORD32* const p_inp2_shape);
122-
123- extern " C" WORD32 xa_nn_elm_minimum_f32xf32_f32 (
124- FLOAT32* __restrict__ p_out,
125- const FLOAT32* __restrict__ p_inp1,
126- const FLOAT32* __restrict__ p_inp2,
127- WORD32 num_elm);
128-
129- extern " C" WORD32 xa_nn_elm_minimum_broadcast_4D_f32xf32_f32 (
130- FLOAT32* __restrict__ p_out,
131- const WORD32* const p_out_shape,
132- const FLOAT32* __restrict__ p_inp1,
133- const WORD32* const p_inp1_shape,
134- const FLOAT32* __restrict__ p_inp2,
135- const WORD32* const p_inp2_shape);
136-
137- extern " C" WORD32 xa_nn_elm_mul_broadcast_4D_f32xf32_f32 (
138- FLOAT32* __restrict__ p_out,
139- const WORD32* const p_out_shape,
140- const FLOAT32* __restrict__ p_inp1,
141- const WORD32* const p_inp1_shape,
142- const FLOAT32* __restrict__ p_inp2,
143- const WORD32* const p_inp2_shape);
144-
14570extern " C" void xa_nn_elm_pow_f32 (
14671 FLOAT32* __restrict__ z,
14772 const FLOAT32* __restrict__ x,
@@ -162,23 +87,6 @@ extern "C" WORD32 xa_nn_elm_remainder_broadcast_4D_f32xf32_f32(
16287 const FLOAT32* __restrict__ p_inp2,
16388 const WORD32* const p_inp2_shape);
16489
165- extern " C" WORD32 xa_nn_elm_where_f32xf32_f32 (
166- FLOAT32* __restrict__ p_out,
167- const FLOAT32* __restrict__ p_inp1,
168- const FLOAT32* __restrict__ p_inp2,
169- const unsigned char * __restrict__ p_condition,
170- WORD32 num_elm);
171-
172- extern " C" WORD32 xa_nn_elm_where_broadcast_4D_f32xf32_f32 (
173- FLOAT32* __restrict__ p_out,
174- const WORD32* const p_out_shape,
175- const FLOAT32* __restrict__ p_inp1,
176- const WORD32* const p_inp1_shape,
177- const FLOAT32* __restrict__ p_inp2,
178- const WORD32* const p_inp2_shape,
179- const unsigned char * __restrict__ p_condition,
180- const WORD32* const p_condition_shape);
181-
18290extern " C" WORD32 xa_nn_im2row_quantized (
18391 const WORD8* __restrict__ data_im,
18492 const WORD32 in_zero_point,
@@ -212,60 +120,12 @@ extern "C" WORD32 xa_nn_reduce_mean_4D_f32_f32(
212120 WORD32 num_axis_dims,
213121 void * __restrict__ p_scratch_in);
214122
215- extern " C" WORD32 xa_nn_transpose_32_32 (
216- WORD32* __restrict__ p_out,
217- const WORD32* const p_out_shape,
218- const WORD32* __restrict__ p_inp,
219- const WORD32* const p_inp_shape,
220- const WORD32* __restrict__ p_permute_vec,
221- WORD32 num_out_dims,
222- WORD32 num_inp_dims);
223-
224123namespace impl {
225124namespace HiFi {
226125namespace kernels {
227126
228127void * allocate_temp_memory (KernelRuntimeContext& ctx, size_t size);
229128
230- void memcpy (void * dst, const void * src, size_t num_bytes);
231-
232- WORD32 matmul_asym8uxasym8u_asym8u (
233- UWORD8* __restrict__ p_out, // output uint8 matrix
234- const UWORD8* __restrict__ p_mat1, // weight uint8 matrix
235- const UWORD8* __restrict__ p_vec1, // input uint8 matrix
236- const WORD32* __restrict__ p_bias, // bias int32 vec
237- WORD32 rows, // rows of p_mat1
238- WORD32 cols1, // columns of p_mat1
239- WORD32 row_stride1, // row stride of p_mat1
240- WORD32 vec_count, // rows of p_mat2
241- WORD32 vec_offset, // vec_offset of p_mat2.
242- WORD32 out_offset, // out_offset, i.e., offset of next output element
243- WORD32 out_stride, // out_stride, i.e., stride to go to next output row
244- WORD32 mat1_zero_bias, // zero_point of p_mat1
245- WORD32 vec1_zero_bias, // zero_point of p_vec1
246- const WORD32* __restrict__ out_multiplier,
247- const WORD32* __restrict__ out_shift,
248- WORD32 out_zero_bias,
249- bool per_channel_quantized = false ); // per-channel quantized weight
250-
251- WORD32 xa_nn_matmul_asym8uxasym8u_asym8u (
252- UWORD8* __restrict__ p_out,
253- const UWORD8* __restrict__ p_mat1,
254- const UWORD8* __restrict__ p_mat2,
255- const WORD32* __restrict__ p_bias,
256- WORD32 rows,
257- WORD32 cols,
258- WORD32 row_stride,
259- WORD32 vec_count,
260- WORD32 vec_offset,
261- WORD32 out_offset,
262- WORD32 out_stride,
263- WORD32 mat1_zero_bias,
264- WORD32 vec1_zero_bias,
265- WORD32 out_multiplier,
266- WORD32 out_shift,
267- WORD32 out_zero_bias);
268-
269129template <typename T>
270130T quantize (const float x, float scale, int32_t zero_point);
271131
0 commit comments