Skip to content

Commit fd3457a

Browse files
committed
bench: Gate native component bench per-function and cover full native API
Replace the AArch64-only `MLK_ARITH_BACKEND_AARCH64` guard with per-function `MLK_USE_NATIVE_xxx` / `MLK_USE_FIPS202_xxx_NATIVE` gates, so each native component benchmark is enabled exactly when the active backend provides that function. Extend coverage to all entry points in `mlkem/src/native/api.h` (adds rej_uniform, poly_frombytes, and the D4/D5/D10/D11 compress/decompress families) and `mlkem/src/fips202/native/api.h` (adds keccak_f1600 x1/x4 and the x4 xor_bytes/extract_bytes natives). Signed-off-by: Matthias J. Kannwischer <matthias@zerorisc.com>
1 parent 101f214 commit fd3457a

1 file changed

Lines changed: 139 additions & 34 deletions

File tree

test/bench/bench_components_mlkem.c

Lines changed: 139 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ static int cmp_uint64_t(const void *a, const void *b)
6464
qsort((cyc), NTESTS, sizeof(uint64_t), cmp_uint64_t); \
6565
printf(txt " cycles=%" PRIu64 "\n", (cyc)[NTESTS >> 1] / NITERATIONS);
6666

67+
#define BENCH_NATIVE_OK(txt, call) \
68+
BENCH(txt, CHECK((call) == MLK_NATIVE_FUNC_SUCCESS))
69+
70+
#define BENCH_NATIVE_NOT_FALLBACK(txt, call) \
71+
BENCH(txt, CHECK((call) != MLK_NATIVE_FUNC_FALLBACK))
72+
6773
static int bench(void)
6874
{
6975
MLK_ALIGN uint64_t data0[1024];
@@ -207,46 +213,145 @@ static int bench(void)
207213
BENCH("mlk_gen_matrix",
208214
mlk_gen_matrix((mlk_polymat *)data0, (uint8_t *)data1, 0))
209215

216+
/* Native backend components */
217+
218+
#if defined(MLK_USE_NATIVE_NTT)
219+
BENCH_NATIVE_OK("mlk_ntt_native", mlk_ntt_native((int16_t *)data0));
220+
#endif
221+
222+
#if defined(MLK_USE_NATIVE_INTT)
223+
BENCH_NATIVE_OK("mlk_intt_native", mlk_intt_native((int16_t *)data0));
224+
#endif
225+
226+
#if defined(MLK_USE_NATIVE_POLY_REDUCE)
227+
BENCH_NATIVE_OK("mlk_poly_reduce_native",
228+
mlk_poly_reduce_native((int16_t *)data0));
229+
#endif
230+
231+
#if defined(MLK_USE_NATIVE_POLY_TOMONT)
232+
BENCH_NATIVE_OK("mlk_poly_tomont_native",
233+
mlk_poly_tomont_native((int16_t *)data0));
234+
#endif
235+
236+
#if defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE)
237+
BENCH_NATIVE_OK(
238+
"mlk_poly_mulcache_compute_native",
239+
mlk_poly_mulcache_compute_native((int16_t *)data0, (int16_t *)data1));
240+
#endif
210241

211-
#if defined(MLK_ARITH_BACKEND_AARCH64)
212-
213-
printf("---AArch64 native backend components---\n");
214-
215-
BENCH("ntt-native",
216-
CHECK(mlk_ntt_native((int16_t *)data0) == MLK_NATIVE_FUNC_SUCCESS));
217-
BENCH("intt-native",
218-
CHECK(mlk_intt_native((int16_t *)data0) == MLK_NATIVE_FUNC_SUCCESS));
219-
BENCH("mlk_poly-reduce-native",
220-
CHECK(mlk_poly_reduce_native((int16_t *)data0) ==
221-
MLK_NATIVE_FUNC_SUCCESS));
222-
BENCH("mlk_poly-tomont-native",
223-
CHECK(mlk_poly_tomont_native((int16_t *)data0) ==
224-
MLK_NATIVE_FUNC_SUCCESS));
225-
BENCH("mlk_poly-tobytes-native",
226-
CHECK(mlk_poly_tobytes_native((uint8_t *)data0, (int16_t *)data1) ==
227-
MLK_NATIVE_FUNC_SUCCESS));
228-
BENCH("mlk_poly-mulcache-compute-native",
229-
CHECK(mlk_poly_mulcache_compute_native((int16_t *)data0,
230-
(int16_t *)data1) ==
231-
MLK_NATIVE_FUNC_SUCCESS));
242+
#if defined(MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
232243
#if MLKEM_K == 2
233-
BENCH("mlk_polyvec-basemul-acc-montgomery-cached-k2-native",
234-
CHECK(mlk_polyvec_basemul_acc_montgomery_cached_k2_native(
235-
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
236-
(int16_t *)data3) == MLK_NATIVE_FUNC_SUCCESS));
244+
BENCH_NATIVE_OK("mlk_polyvec_basemul_acc_montgomery_cached_k2_native",
245+
mlk_polyvec_basemul_acc_montgomery_cached_k2_native(
246+
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
247+
(int16_t *)data3));
237248
#elif MLKEM_K == 3
238-
BENCH("mlk_polyvec-basemul-acc-montgomery-cached-k3-native",
239-
CHECK(mlk_polyvec_basemul_acc_montgomery_cached_k3_native(
240-
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
241-
(int16_t *)data3) == MLK_NATIVE_FUNC_SUCCESS));
249+
BENCH_NATIVE_OK("mlk_polyvec_basemul_acc_montgomery_cached_k3_native",
250+
mlk_polyvec_basemul_acc_montgomery_cached_k3_native(
251+
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
252+
(int16_t *)data3));
242253
#elif MLKEM_K == 4
243-
BENCH("mlk_polyvec-basemul-acc-montgomery-cached-k4-native",
244-
CHECK(mlk_polyvec_basemul_acc_montgomery_cached_k4_native(
245-
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
246-
(int16_t *)data3) == MLK_NATIVE_FUNC_SUCCESS));
254+
BENCH_NATIVE_OK("mlk_polyvec_basemul_acc_montgomery_cached_k4_native",
255+
mlk_polyvec_basemul_acc_montgomery_cached_k4_native(
256+
(int16_t *)data0, (int16_t *)data1, (int16_t *)data2,
257+
(int16_t *)data3));
247258
#endif /* MLKEM_K == 4 */
259+
#endif /* MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
260+
261+
#if defined(MLK_USE_NATIVE_POLY_TOBYTES)
262+
BENCH_NATIVE_OK("mlk_poly_tobytes_native",
263+
mlk_poly_tobytes_native((uint8_t *)data0, (int16_t *)data1));
264+
#endif
265+
266+
#if defined(MLK_USE_NATIVE_POLY_FROMBYTES)
267+
BENCH_NATIVE_OK(
268+
"mlk_poly_frombytes_native",
269+
mlk_poly_frombytes_native((int16_t *)data0, (uint8_t *)data1));
270+
#endif
271+
272+
#if defined(MLK_USE_NATIVE_REJ_UNIFORM)
273+
BENCH_NATIVE_NOT_FALLBACK(
274+
"mlk_rej_uniform_native",
275+
mlk_rej_uniform_native((int16_t *)data0, MLKEM_N, (uint8_t *)data1, 768));
276+
#endif
277+
278+
#if MLKEM_K == 2 || MLKEM_K == 3
279+
#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D4)
280+
BENCH_NATIVE_OK(
281+
"mlk_poly_compress_d4_native",
282+
mlk_poly_compress_d4_native((uint8_t *)data0, (int16_t *)data1));
283+
#endif
284+
285+
#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D10)
286+
BENCH_NATIVE_OK(
287+
"mlk_poly_compress_d10_native",
288+
mlk_poly_compress_d10_native((uint8_t *)data0, (int16_t *)data1));
289+
#endif
290+
291+
#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D4)
292+
BENCH_NATIVE_OK(
293+
"mlk_poly_decompress_d4_native",
294+
mlk_poly_decompress_d4_native((int16_t *)data0, (uint8_t *)data1));
295+
#endif
296+
297+
#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D10)
298+
BENCH_NATIVE_OK(
299+
"mlk_poly_decompress_d10_native",
300+
mlk_poly_decompress_d10_native((int16_t *)data0, (uint8_t *)data1));
301+
#endif
302+
#endif /* MLKEM_K == 2 || MLKEM_K == 3 */
303+
304+
#if MLKEM_K == 4
305+
#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D5)
306+
BENCH_NATIVE_OK(
307+
"mlk_poly_compress_d5_native",
308+
mlk_poly_compress_d5_native((uint8_t *)data0, (int16_t *)data1));
309+
#endif
310+
311+
#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D11)
312+
BENCH_NATIVE_OK(
313+
"mlk_poly_compress_d11_native",
314+
mlk_poly_compress_d11_native((uint8_t *)data0, (int16_t *)data1));
315+
#endif
316+
317+
#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D5)
318+
BENCH_NATIVE_OK(
319+
"mlk_poly_decompress_d5_native",
320+
mlk_poly_decompress_d5_native((int16_t *)data0, (uint8_t *)data1));
321+
#endif
322+
323+
#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D11)
324+
BENCH_NATIVE_OK(
325+
"mlk_poly_decompress_d11_native",
326+
mlk_poly_decompress_d11_native((int16_t *)data0, (uint8_t *)data1));
327+
#endif
328+
#endif /* MLKEM_K == 4 */
329+
330+
#if defined(MLK_USE_FIPS202_X1_NATIVE)
331+
BENCH_NATIVE_OK("mlk_keccak_f1600_x1_native",
332+
mlk_keccak_f1600_x1_native(data0));
333+
#endif
334+
335+
#if defined(MLK_USE_FIPS202_X4_NATIVE)
336+
BENCH_NATIVE_OK("mlk_keccak_f1600_x4_native",
337+
mlk_keccak_f1600_x4_native(data0));
338+
#endif
248339

249-
#endif /* MLK_ARITH_BACKEND_AARCH64 */
340+
#if defined(MLK_USE_FIPS202_X4_XOR_BYTES_NATIVE)
341+
BENCH_NATIVE_OK(
342+
"mlk_keccakf1600_xor_bytes_x4_native",
343+
mlk_keccakf1600_xor_bytes_x4_native(
344+
data0, (uint8_t *)data1, (uint8_t *)data2, (uint8_t *)data3,
345+
(uint8_t *)data4, 0, 25 * sizeof(uint64_t)));
346+
#endif /* MLK_USE_FIPS202_X4_XOR_BYTES_NATIVE */
347+
348+
#if defined(MLK_USE_FIPS202_X4_EXTRACT_BYTES_NATIVE)
349+
BENCH_NATIVE_OK(
350+
"mlk_keccakf1600_extract_bytes_x4_native",
351+
mlk_keccakf1600_extract_bytes_x4_native(
352+
data0, (uint8_t *)data1, (uint8_t *)data2, (uint8_t *)data3,
353+
(uint8_t *)data4, 0, 25 * sizeof(uint64_t)));
354+
#endif /* MLK_USE_FIPS202_X4_EXTRACT_BYTES_NATIVE */
250355

251356
return 0;
252357
}

0 commit comments

Comments
 (0)