From ef6f11abc9381f454982488ff79a969a672e116c Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Thu, 19 Mar 2026 15:10:42 +0000 Subject: [PATCH 1/4] Use posix_memalign in custom heap allocation config Replace aligned_alloc + MLK_ALIGN_UP with posix_memalign in custom_heap_alloc_config.h. Unlike aligned_alloc, posix_memalign does not require the size to be a multiple of the alignment, removing the need for MLK_ALIGN_UP rounding. This ensures that allocations are exact-sized, allowing memory-safety tests like valgrind and ASan to detect overflows at precise buffer boundaries. On Windows, where posix_memalign is not available, we use _aligned_malloc instead. This, too, does not require the size to be a multiple of the alignment. Signed-off-by: Hanno Becker --- test/configs/configs.yml | 23 +++++++++++++++++++---- test/configs/custom_heap_alloc_config.h | 21 ++++++++++++++++++++- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/test/configs/configs.yml b/test/configs/configs.yml index 177411cd09..8ad847f82c 100644 --- a/test/configs/configs.yml +++ b/test/configs/configs.yml @@ -397,13 +397,29 @@ configs: defines: MLK_CONFIG_CUSTOM_ALLOC_FREE: content: | + /* In practice, one could just use aligned_alloc here. However, this + * requires aligning up the size to a multiple of the alignment, which + * weakens some of the memory-safety tests we run using this config. */ #define MLK_CONFIG_CUSTOM_ALLOC_FREE #if !defined(__ASSEMBLER__) + #if defined(_WIN32) + #include + #define MLK_CUSTOM_ALLOC(v, T, N) \ + T *v = (T *)_aligned_malloc(sizeof(T) * (N), MLK_DEFAULT_ALIGN) + #define MLK_CUSTOM_FREE(v, T, N) _aligned_free(v) + #else #include - #define MLK_CUSTOM_ALLOC(v, T, N) \ - T* v = (T *)aligned_alloc(MLK_DEFAULT_ALIGN, \ - MLK_ALIGN_UP(sizeof(T) * (N))) + static inline void *mlk_posix_memalign(size_t align, size_t sz) + { + void *ptr = NULL; + if (posix_memalign(&ptr, align, sz) != 0) + return NULL; + return ptr; + } + #define MLK_CUSTOM_ALLOC(v, T, N) \ + T *v = (T *)mlk_posix_memalign(MLK_DEFAULT_ALIGN, sizeof(T) * (N)) #define MLK_CUSTOM_FREE(v, T, N) free(v) + #endif /* _WIN32 */ #endif /* !__ASSEMBLER__ */ - path: examples/basic_deterministic/mlkem_native/mlkem_native_config.h @@ -449,4 +465,3 @@ configs: #endif /* !__ASSEMBLER__ */ MLK_CONFIG_FILE: comment: "/* No need to set this -- we _are_ already in a custom config */" - diff --git a/test/configs/custom_heap_alloc_config.h b/test/configs/custom_heap_alloc_config.h index e57b8231d8..4bf3b89211 100644 --- a/test/configs/custom_heap_alloc_config.h +++ b/test/configs/custom_heap_alloc_config.h @@ -498,12 +498,31 @@ * code will handle this case and invoke MLK_CUSTOM_FREE. * *****************************************************************************/ +/* In practice, one could just use aligned_alloc here. However, this + * requires aligning up the size to a multiple of the alignment, which + * weakens some of the memory-safety tests we run using this config. */ #define MLK_CONFIG_CUSTOM_ALLOC_FREE #if !defined(__ASSEMBLER__) +#if defined(_WIN32) +#include +#define MLK_CUSTOM_ALLOC(v, T, N) \ + T *v = (T *)_aligned_malloc(sizeof(T) * (N), MLK_DEFAULT_ALIGN) +#define MLK_CUSTOM_FREE(v, T, N) _aligned_free(v) +#else /* _WIN32 */ #include +static inline void *mlk_posix_memalign(size_t align, size_t sz) +{ + void *ptr = NULL; + if (posix_memalign(&ptr, align, sz) != 0) + { + return NULL; + } + return ptr; +} #define MLK_CUSTOM_ALLOC(v, T, N) \ - T *v = (T *)aligned_alloc(MLK_DEFAULT_ALIGN, MLK_ALIGN_UP(sizeof(T) * (N))) + T *v = (T *)mlk_posix_memalign(MLK_DEFAULT_ALIGN, sizeof(T) * (N)) #define MLK_CUSTOM_FREE(v, T, N) free(v) +#endif /* !_WIN32 */ #endif /* !__ASSEMBLER__ */ From e3edccfd05b1b4b06959eea2563363c3dda26046 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Thu, 19 Mar 2026 15:10:52 +0000 Subject: [PATCH 2/4] Heap-allocate all buffers in backend unit tests Replace all stack-allocated buffers in test_unit.c with heap allocations via MLK_ALLOC/MLK_FREE, using the custom_heap_alloc_config. This enables valgrind to detect buffer overflows in assembly backends, which operate on these buffers. Build the unit test objects with custom_heap_alloc_config.h by adding the appropriate -DMLK_CONFIG_FILE, -std=c11, and -D_GNU_SOURCE flags in components.mk. Signed-off-by: Hanno Becker --- test/mk/components.mk | 21 +- test/src/test_unit.c | 486 +++++++++++++++++++++++++++++++----------- 2 files changed, 372 insertions(+), 135 deletions(-) diff --git a/test/mk/components.mk b/test/mk/components.mk index af34a048e9..71f22f1284 100644 --- a/test/mk/components.mk +++ b/test/mk/components.mk @@ -32,13 +32,16 @@ $(MLKEM768_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=768 MLKEM1024_OBJS = $(call MAKE_OBJS,$(MLKEM1024_DIR),$(SOURCES) $(FIPS202_SRCS)) $(MLKEM1024_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=1024 -# Unit test object files - same sources but with MLK_STATIC_TESTABLE= +# Unit test object files - same sources but with MLK_STATIC_TESTABLE= and custom heap alloc config +UNIT_CFLAGS = -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes \ + -DMLK_CONFIG_FILE=\"../test/configs/custom_heap_alloc_config.h\" -std=c11 -D_GNU_SOURCE + MLKEM512_UNIT_OBJS = $(call MAKE_OBJS,$(MLKEM512_DIR)/unit,$(SOURCES) $(FIPS202_SRCS)) -$(MLKEM512_UNIT_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=512 -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes +$(MLKEM512_UNIT_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=512 $(UNIT_CFLAGS) MLKEM768_UNIT_OBJS = $(call MAKE_OBJS,$(MLKEM768_DIR)/unit,$(SOURCES) $(FIPS202_SRCS)) -$(MLKEM768_UNIT_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=768 -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes +$(MLKEM768_UNIT_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=768 $(UNIT_CFLAGS) MLKEM1024_UNIT_OBJS = $(call MAKE_OBJS,$(MLKEM1024_DIR)/unit,$(SOURCES) $(FIPS202_SRCS)) -$(MLKEM1024_UNIT_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=1024 -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes +$(MLKEM1024_UNIT_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=1024 $(UNIT_CFLAGS) # Alloc test object files - same sources but with custom alloc config MLKEM512_ALLOC_OBJS = $(call MAKE_OBJS,$(MLKEM512_DIR)/alloc,$(SOURCES) $(FIPS202_SRCS)) @@ -81,9 +84,13 @@ $(MLKEM512_DIR)/test/src/test_alloc.c.o: CFLAGS += -DMLK_CONFIG_FILE=\"../test/c $(MLKEM768_DIR)/test/src/test_alloc.c.o: CFLAGS += -DMLK_CONFIG_FILE=\"../test/configs/test_alloc_config.h\" $(MLKEM1024_DIR)/test/src/test_alloc.c.o: CFLAGS += -DMLK_CONFIG_FILE=\"../test/configs/test_alloc_config.h\" -$(MLKEM512_DIR)/bin/test_unit512: CFLAGS += -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes -$(MLKEM768_DIR)/bin/test_unit768: CFLAGS += -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes -$(MLKEM1024_DIR)/bin/test_unit1024: CFLAGS += -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes +$(MLKEM512_DIR)/test/src/test_unit.c.o: CFLAGS += $(UNIT_CFLAGS) +$(MLKEM768_DIR)/test/src/test_unit.c.o: CFLAGS += $(UNIT_CFLAGS) +$(MLKEM1024_DIR)/test/src/test_unit.c.o: CFLAGS += $(UNIT_CFLAGS) + +$(MLKEM512_DIR)/bin/test_unit512: CFLAGS += $(UNIT_CFLAGS) +$(MLKEM768_DIR)/bin/test_unit768: CFLAGS += $(UNIT_CFLAGS) +$(MLKEM1024_DIR)/bin/test_unit1024: CFLAGS += $(UNIT_CFLAGS) # Unit library object files compiled with MLK_STATIC_TESTABLE= $(MLKEM512_DIR)/unit_%: CFLAGS += -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes diff --git a/test/src/test_unit.c b/test/src/test_unit.c index b5315a877c..9aaeaecebc 100644 --- a/test/src/test_unit.c +++ b/test/src/test_unit.c @@ -273,17 +273,29 @@ static int compare_i16_arrays(const int16_t *a, const int16_t *b, unsigned len, #ifdef MLK_USE_NATIVE_POLY_REDUCE static int test_poly_reduce_core(const int16_t *input, const char *test_name) { - mlk_poly test_poly, ref_poly; + int ret = 1; + MLK_ALLOC(test_poly, mlk_poly, 1, NULL); + MLK_ALLOC(ref_poly, mlk_poly, 1, NULL); - memcpy(test_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); - memcpy(ref_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); + if (test_poly == NULL || ref_poly == NULL) + { + goto cleanup; + } - mlk_poly_reduce(&test_poly); - mlk_poly_reduce_c(&ref_poly); + memcpy(test_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(ref_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); - CHECK(compare_i16_arrays(test_poly.coeffs, ref_poly.coeffs, MLKEM_N, + mlk_poly_reduce(test_poly); + mlk_poly_reduce_c(ref_poly); + + CHECK(compare_i16_arrays(test_poly->coeffs, ref_poly->coeffs, MLKEM_N, test_name, NULL)); - return 0; + ret = 0; + +cleanup: + MLK_FREE(ref_poly, mlk_poly, 1, NULL); + MLK_FREE(test_poly, mlk_poly, 1, NULL); + return ret; } static int test_native_poly_reduce(void) @@ -313,21 +325,33 @@ static int test_native_poly_reduce(void) #ifdef MLK_USE_NATIVE_POLY_TOMONT static int test_poly_tomont_core(const int16_t *input, const char *test_name) { - mlk_poly test_poly, ref_poly; + int ret = 1; + MLK_ALLOC(test_poly, mlk_poly, 1, NULL); + MLK_ALLOC(ref_poly, mlk_poly, 1, NULL); + + if (test_poly == NULL || ref_poly == NULL) + { + goto cleanup; + } - memcpy(test_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); - memcpy(ref_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(test_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(ref_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); - mlk_poly_tomont(&test_poly); - mlk_poly_tomont_c(&ref_poly); + mlk_poly_tomont(test_poly); + mlk_poly_tomont_c(ref_poly); /* Normalize */ - mlk_poly_reduce_c(&ref_poly); - mlk_poly_reduce_c(&test_poly); + mlk_poly_reduce_c(ref_poly); + mlk_poly_reduce_c(test_poly); - CHECK(compare_i16_arrays(test_poly.coeffs, ref_poly.coeffs, MLKEM_N, + CHECK(compare_i16_arrays(test_poly->coeffs, ref_poly->coeffs, MLKEM_N, test_name, NULL)); - return 0; + ret = 0; + +cleanup: + MLK_FREE(ref_poly, mlk_poly, 1, NULL); + MLK_FREE(test_poly, mlk_poly, 1, NULL); + return ret; } static int test_native_poly_tomont(void) @@ -357,25 +381,37 @@ static int test_native_poly_tomont(void) #ifdef MLK_USE_NATIVE_NTT static int test_ntt_core(const int16_t *input, const char *test_name) { - mlk_poly test_poly, ref_poly; + int ret = 1; + MLK_ALLOC(test_poly, mlk_poly, 1, NULL); + MLK_ALLOC(ref_poly, mlk_poly, 1, NULL); + + if (test_poly == NULL || ref_poly == NULL) + { + goto cleanup; + } - memcpy(test_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); - memcpy(ref_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(test_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(ref_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); - mlk_poly_ntt(&test_poly); - mlk_poly_ntt_c(&ref_poly); + mlk_poly_ntt(test_poly); + mlk_poly_ntt_c(ref_poly); #ifdef MLK_USE_NATIVE_NTT_CUSTOM_ORDER - mlk_poly_permute_bitrev_to_custom(ref_poly.coeffs); + mlk_poly_permute_bitrev_to_custom(ref_poly->coeffs); #endif /* Normalize */ - mlk_poly_reduce_c(&ref_poly); - mlk_poly_reduce_c(&test_poly); + mlk_poly_reduce_c(ref_poly); + mlk_poly_reduce_c(test_poly); - CHECK(compare_i16_arrays(test_poly.coeffs, ref_poly.coeffs, MLKEM_N, + CHECK(compare_i16_arrays(test_poly->coeffs, ref_poly->coeffs, MLKEM_N, test_name, input)); - return 0; + ret = 0; + +cleanup: + MLK_FREE(ref_poly, mlk_poly, 1, NULL); + MLK_FREE(test_poly, mlk_poly, 1, NULL); + return ret; } static int test_native_ntt(void) @@ -405,25 +441,37 @@ static int test_native_ntt(void) #ifdef MLK_USE_NATIVE_INTT static int test_intt_core(const int16_t *input, const char *test_name) { - mlk_poly test_poly, ref_poly; + int ret = 1; + MLK_ALLOC(test_poly, mlk_poly, 1, NULL); + MLK_ALLOC(ref_poly, mlk_poly, 1, NULL); - memcpy(test_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); - memcpy(ref_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); + if (test_poly == NULL || ref_poly == NULL) + { + goto cleanup; + } + + memcpy(test_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(ref_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); #ifdef MLK_USE_NATIVE_NTT_CUSTOM_ORDER - mlk_poly_permute_bitrev_to_custom(test_poly.coeffs); + mlk_poly_permute_bitrev_to_custom(test_poly->coeffs); #endif - mlk_poly_invntt_tomont(&test_poly); - mlk_poly_invntt_tomont_c(&ref_poly); + mlk_poly_invntt_tomont(test_poly); + mlk_poly_invntt_tomont_c(ref_poly); /* Normalize */ - mlk_poly_reduce_c(&ref_poly); - mlk_poly_reduce_c(&test_poly); + mlk_poly_reduce_c(ref_poly); + mlk_poly_reduce_c(test_poly); - CHECK(compare_i16_arrays(test_poly.coeffs, ref_poly.coeffs, MLKEM_N, + CHECK(compare_i16_arrays(test_poly->coeffs, ref_poly->coeffs, MLKEM_N, test_name, input)); - return 0; + ret = 0; + +cleanup: + MLK_FREE(ref_poly, mlk_poly, 1, NULL); + MLK_FREE(test_poly, mlk_poly, 1, NULL); + return ret; } static int test_native_intt(void) @@ -480,21 +528,37 @@ static int test_native_intt(void) #ifdef MLK_USE_NATIVE_POLY_TOBYTES static int test_poly_tobytes_core(const int16_t *input, const char *test_name) { - uint8_t test_result[MLKEM_POLYBYTES], ref_result[MLKEM_POLYBYTES]; - mlk_poly test_poly, ref_poly; + int ret = 1; + MLK_ALLOC(test_result, uint8_t, MLKEM_POLYBYTES, NULL); + MLK_ALLOC(ref_result, uint8_t, MLKEM_POLYBYTES, NULL); + MLK_ALLOC(test_poly, mlk_poly, 1, NULL); + MLK_ALLOC(ref_poly, mlk_poly, 1, NULL); + + if (test_result == NULL || ref_result == NULL || test_poly == NULL || + ref_poly == NULL) + { + goto cleanup; + } - memcpy(test_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); - memcpy(ref_poly.coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(test_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); + memcpy(ref_poly->coeffs, input, MLKEM_N * sizeof(int16_t)); #ifdef MLK_USE_NATIVE_NTT_CUSTOM_ORDER - mlk_poly_permute_bitrev_to_custom(test_poly.coeffs); + mlk_poly_permute_bitrev_to_custom(test_poly->coeffs); #endif - mlk_poly_tobytes(test_result, &test_poly); - mlk_poly_tobytes_c(ref_result, &ref_poly); + mlk_poly_tobytes(test_result, test_poly); + mlk_poly_tobytes_c(ref_result, ref_poly); CHECK(compare_u8_arrays(test_result, ref_result, MLKEM_POLYBYTES, test_name)); - return 1; + ret = 0; + +cleanup: + MLK_FREE(ref_poly, mlk_poly, 1, NULL); + MLK_FREE(test_poly, mlk_poly, 1, NULL); + MLK_FREE(ref_result, uint8_t, MLKEM_POLYBYTES, NULL); + MLK_FREE(test_result, uint8_t, MLKEM_POLYBYTES, NULL); + return ret; } static int test_native_poly_tobytes(void) @@ -503,18 +567,18 @@ static int test_native_poly_tobytes(void) int pos, i; generate_i16_array_zeros(test_data, MLKEM_N); - CHECK(test_poly_tobytes_core(test_data, "poly_tobytes_zeros") == 1); + CHECK(test_poly_tobytes_core(test_data, "poly_tobytes_zeros") == 0); for (pos = 0; pos < MLKEM_N; pos += MLKEM_N / 8) { generate_i16_array_single(test_data, MLKEM_N, (size_t)pos, 1); - CHECK(test_poly_tobytes_core(test_data, "poly_tobytes_single") == 1); + CHECK(test_poly_tobytes_core(test_data, "poly_tobytes_single") == 0); } for (i = 0; i < NUM_RANDOM_TESTS; i++) { generate_i16_array_ranged(test_data, MLKEM_N, 0, MLKEM_Q); - CHECK(test_poly_tobytes_core(test_data, "poly_tobytes_random") == 1); + CHECK(test_poly_tobytes_core(test_data, "poly_tobytes_random") == 0); } return 0; @@ -525,18 +589,30 @@ static int test_native_poly_tobytes(void) static int test_poly_frombytes_core(const uint8_t *input_bytes, const char *test_name) { - mlk_poly test_poly, ref_poly; + int ret = 1; + MLK_ALLOC(test_poly, mlk_poly, 1, NULL); + MLK_ALLOC(ref_poly, mlk_poly, 1, NULL); - mlk_poly_frombytes(&test_poly, input_bytes); - mlk_poly_frombytes_c(&ref_poly, input_bytes); + if (test_poly == NULL || ref_poly == NULL) + { + goto cleanup; + } + + mlk_poly_frombytes(test_poly, input_bytes); + mlk_poly_frombytes_c(ref_poly, input_bytes); #ifdef MLK_USE_NATIVE_NTT_CUSTOM_ORDER - mlk_poly_permute_bitrev_to_custom(ref_poly.coeffs); + mlk_poly_permute_bitrev_to_custom(ref_poly->coeffs); #endif - CHECK(compare_i16_arrays(test_poly.coeffs, ref_poly.coeffs, MLKEM_N, + CHECK(compare_i16_arrays(test_poly->coeffs, ref_poly->coeffs, MLKEM_N, test_name, NULL)); - return 0; + ret = 0; + +cleanup: + MLK_FREE(ref_poly, mlk_poly, 1, NULL); + MLK_FREE(test_poly, mlk_poly, 1, NULL); + return ret; } static int test_native_poly_frombytes(void) @@ -574,44 +650,68 @@ static int test_native_poly_frombytes(void) static int test_polyvec_basemul_core(const int16_t *a, const int16_t *b, const char *test_name) { - mlk_poly test_result, ref_result; - mlk_polyvec test_a, test_b, ref_a, ref_b; - mlk_polyvec_mulcache test_cache, ref_cache; + int ret = 1; int i; + MLK_ALLOC(test_result, mlk_poly, 1, NULL); + MLK_ALLOC(ref_result, mlk_poly, 1, NULL); + MLK_ALLOC(test_a, mlk_polyvec, 1, NULL); + MLK_ALLOC(test_b, mlk_polyvec, 1, NULL); + MLK_ALLOC(ref_a, mlk_polyvec, 1, NULL); + MLK_ALLOC(ref_b, mlk_polyvec, 1, NULL); + MLK_ALLOC(test_cache, mlk_polyvec_mulcache, 1, NULL); + MLK_ALLOC(ref_cache, mlk_polyvec_mulcache, 1, NULL); + + if (test_result == NULL || ref_result == NULL || test_a == NULL || + test_b == NULL || ref_a == NULL || ref_b == NULL || test_cache == NULL || + ref_cache == NULL) + { + goto cleanup; + } /* Copy test data to structures */ for (i = 0; i < MLKEM_K; i++) { - memcpy(test_a.vec[i].coeffs, &a[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); - memcpy(test_b.vec[i].coeffs, &b[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); - memcpy(ref_a.vec[i].coeffs, &a[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); - memcpy(ref_b.vec[i].coeffs, &b[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); + memcpy(test_a->vec[i].coeffs, &a[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); + memcpy(test_b->vec[i].coeffs, &b[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); + memcpy(ref_a->vec[i].coeffs, &a[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); + memcpy(ref_b->vec[i].coeffs, &b[i * MLKEM_N], MLKEM_N * sizeof(int16_t)); #ifdef MLK_USE_NATIVE_NTT_CUSTOM_ORDER - mlk_poly_permute_bitrev_to_custom(test_a.vec[i].coeffs); - mlk_poly_permute_bitrev_to_custom(test_b.vec[i].coeffs); + mlk_poly_permute_bitrev_to_custom(test_a->vec[i].coeffs); + mlk_poly_permute_bitrev_to_custom(test_b->vec[i].coeffs); #endif - mlk_poly_mulcache_compute_c(&ref_cache.vec[i], &ref_b.vec[i]); - mlk_poly_mulcache_compute(&test_cache.vec[i], &test_b.vec[i]); + mlk_poly_mulcache_compute_c(&ref_cache->vec[i], &ref_b->vec[i]); + mlk_poly_mulcache_compute(&test_cache->vec[i], &test_b->vec[i]); } - mlk_polyvec_basemul_acc_montgomery_cached(&test_result, &test_a, &test_b, - &test_cache); - mlk_polyvec_basemul_acc_montgomery_cached_c(&ref_result, &ref_a, &ref_b, - &ref_cache); + mlk_polyvec_basemul_acc_montgomery_cached(test_result, test_a, test_b, + test_cache); + mlk_polyvec_basemul_acc_montgomery_cached_c(ref_result, ref_a, ref_b, + ref_cache); #ifdef MLK_USE_NATIVE_NTT_CUSTOM_ORDER - mlk_poly_permute_bitrev_to_custom(ref_result.coeffs); + mlk_poly_permute_bitrev_to_custom(ref_result->coeffs); #endif /* Normalize */ - mlk_poly_reduce_c(&ref_result); - mlk_poly_reduce_c(&test_result); + mlk_poly_reduce_c(ref_result); + mlk_poly_reduce_c(test_result); - CHECK(compare_i16_arrays(test_result.coeffs, ref_result.coeffs, MLKEM_N, + CHECK(compare_i16_arrays(test_result->coeffs, ref_result->coeffs, MLKEM_N, test_name, NULL)); - return 0; + ret = 0; + +cleanup: + MLK_FREE(ref_cache, mlk_polyvec_mulcache, 1, NULL); + MLK_FREE(test_cache, mlk_polyvec_mulcache, 1, NULL); + MLK_FREE(ref_b, mlk_polyvec, 1, NULL); + MLK_FREE(ref_a, mlk_polyvec, 1, NULL); + MLK_FREE(test_b, mlk_polyvec, 1, NULL); + MLK_FREE(test_a, mlk_polyvec, 1, NULL); + MLK_FREE(ref_result, mlk_poly, 1, NULL); + MLK_FREE(test_result, mlk_poly, 1, NULL); + return ret; } static int test_native_polyvec_basemul(void) @@ -640,14 +740,20 @@ static int test_native_polyvec_basemul(void) #ifdef MLK_USE_FIPS202_X1_NATIVE static int test_keccakf1600_permute(void) { - uint64_t state[MLK_KECCAK_LANES]; - uint64_t state_ref[MLK_KECCAK_LANES]; + int ret = 1; int i; + MLK_ALLOC(state, uint64_t, MLK_KECCAK_LANES, NULL); + MLK_ALLOC(state_ref, uint64_t, MLK_KECCAK_LANES, NULL); + + if (state == NULL || state_ref == NULL) + { + goto cleanup; + } for (i = 0; i < NUM_RANDOM_TESTS; i++) { - randombytes((uint8_t *)state, sizeof(state)); - memcpy(state_ref, state, sizeof(state)); + randombytes((uint8_t *)state, MLK_KECCAK_LANES * sizeof(uint64_t)); + memcpy(state_ref, state, MLK_KECCAK_LANES * sizeof(uint64_t)); mlk_keccakf1600_permute(state); mlk_keccakf1600_permute_c(state_ref); @@ -656,7 +762,12 @@ static int test_keccakf1600_permute(void) "keccakf1600_permute")); } - return 0; + ret = 0; + +cleanup: + MLK_FREE(state_ref, uint64_t, MLK_KECCAK_LANES, NULL); + MLK_FREE(state, uint64_t, MLK_KECCAK_LANES, NULL); + return ret; } #endif /* MLK_USE_FIPS202_X1_NATIVE */ @@ -669,13 +780,39 @@ static int test_keccakf1600_permute(void) static int test_keccakf1600x4_xor_permute_extract(void) { - uint64_t state_x4[MLK_KECCAK_LANES * MLK_KECCAK_WAY]; - uint64_t state_x1[MLK_KECCAK_LANES]; - unsigned char output_x4[MLK_KECCAK_WAY][MAX_RATE]; - unsigned char output_x1[MAX_RATE]; - unsigned char input[MLK_KECCAK_WAY][MAX_RATE]; - uint8_t xor_offset, xor_length, ext_offset, ext_length; + int ret = 1; int i, j; + uint8_t xor_offset, xor_length, ext_offset, ext_length; + MLK_ALLOC(state_x4, uint64_t, MLK_KECCAK_LANES *MLK_KECCAK_WAY, NULL); + MLK_ALLOC(state_x1, uint64_t, MLK_KECCAK_LANES, NULL); + MLK_ALLOC(output_x1, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(output_x4_0, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(output_x4_1, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(output_x4_2, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(output_x4_3, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(input_0, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(input_1, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(input_2, unsigned char, MAX_RATE, NULL); + MLK_ALLOC(input_3, unsigned char, MAX_RATE, NULL); + + unsigned char *output_x4[MLK_KECCAK_WAY]; + unsigned char *input[MLK_KECCAK_WAY]; + output_x4[0] = output_x4_0; + output_x4[1] = output_x4_1; + output_x4[2] = output_x4_2; + output_x4[3] = output_x4_3; + input[0] = input_0; + input[1] = input_1; + input[2] = input_2; + input[3] = input_3; + + if (state_x4 == NULL || state_x1 == NULL || output_x1 == NULL || + output_x4_0 == NULL || output_x4_1 == NULL || output_x4_2 == NULL || + output_x4_3 == NULL || input_0 == NULL || input_1 == NULL || + input_2 == NULL || input_3 == NULL) + { + goto cleanup; + } for (i = 0; i < NUM_RANDOM_TESTS; i++) { @@ -698,7 +835,7 @@ static int test_keccakf1600x4_xor_permute_extract(void) } /* Run x4 implementation */ - memset(state_x4, 0, sizeof(state_x4)); + memset(state_x4, 0, MLK_KECCAK_LANES * MLK_KECCAK_WAY * sizeof(uint64_t)); mlk_keccakf1600x4_xor_bytes(state_x4, input[0], input[1], input[2], input[3], xor_offset, xor_length); mlk_keccakf1600x4_permute(state_x4); @@ -709,7 +846,7 @@ static int test_keccakf1600x4_xor_permute_extract(void) /* Compare each lane against x1 C reference */ for (j = 0; j < MLK_KECCAK_WAY; j++) { - memset(state_x1, 0, sizeof(state_x1)); + memset(state_x1, 0, MLK_KECCAK_LANES * sizeof(uint64_t)); mlk_keccakf1600_xor_bytes(state_x1, input[j], xor_offset, xor_length); mlk_keccakf1600_permute_c(state_x1); mlk_keccakf1600_extract_bytes(state_x1, output_x1, ext_offset, @@ -719,7 +856,21 @@ static int test_keccakf1600x4_xor_permute_extract(void) } } - return 0; + ret = 0; + +cleanup: + MLK_FREE(input_3, unsigned char, MAX_RATE, NULL); + MLK_FREE(input_2, unsigned char, MAX_RATE, NULL); + MLK_FREE(input_1, unsigned char, MAX_RATE, NULL); + MLK_FREE(input_0, unsigned char, MAX_RATE, NULL); + MLK_FREE(output_x4_3, unsigned char, MAX_RATE, NULL); + MLK_FREE(output_x4_2, unsigned char, MAX_RATE, NULL); + MLK_FREE(output_x4_1, unsigned char, MAX_RATE, NULL); + MLK_FREE(output_x4_0, unsigned char, MAX_RATE, NULL); + MLK_FREE(output_x1, unsigned char, MAX_RATE, NULL); + MLK_FREE(state_x1, uint64_t, MLK_KECCAK_LANES, NULL); + MLK_FREE(state_x4, uint64_t, MLK_KECCAK_LANES *MLK_KECCAK_WAY, NULL); + return ret; } #undef MAX_RATE @@ -777,69 +928,133 @@ static int test_backend_units(void) /* This test invokes the polynomial (de)compression routines - * with minimally sized buffers. When run with address sanitization, - * this ensures that no buffer overflow is happening. This is of interest - * because the compressed buffers sometimes have unaligned lengths and + * with minimally sized buffers. When run under valgrind or with address + * sanitization, this ensures that no buffer overflow is happening. This is of + * interest because the compressed buffers sometimes have unaligned lengths and * are therefore at risk of being overflowed by vectorized code. */ static int test_poly_compress_no_overflow(void) { #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4]; - mlk_poly s; - memset((uint8_t *)&s, 0, sizeof(s)); - mlk_poly_compress_d4(r, &s); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); + return 1; + } + memset((uint8_t *)s, 0, sizeof(mlk_poly)); + mlk_poly_compress_d4(r, s); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); } { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4]; - mlk_poly s; - memset(r, 0, sizeof(r)); - mlk_poly_decompress_d4(&s, r); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); + return 1; + } + memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D4); + mlk_poly_decompress_d4(s, r); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); } { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10]; - mlk_poly s; - memset((uint8_t *)&s, 0, sizeof(s)); - mlk_poly_compress_d10(r, &s); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); + return 1; + } + memset((uint8_t *)s, 0, sizeof(mlk_poly)); + mlk_poly_compress_d10(r, s); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); } { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10]; - mlk_poly s; - memset(r, 0, sizeof(r)); - mlk_poly_decompress_d10(&s, r); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); + return 1; + } + memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D10); + mlk_poly_decompress_d10(s, r); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); } #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */ #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4 { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5]; - mlk_poly s; - memset((uint8_t *)&s, 0, sizeof(s)); - mlk_poly_compress_d5(r, &s); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); + return 1; + } + memset((uint8_t *)s, 0, sizeof(mlk_poly)); + mlk_poly_compress_d5(r, s); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); } { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5]; - mlk_poly s; - memset(r, 0, sizeof(r)); - mlk_poly_decompress_d5(&s, r); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); + return 1; + } + memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D5); + mlk_poly_decompress_d5(s, r); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); } { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11]; - mlk_poly s; - memset((uint8_t *)&s, 0, sizeof(s)); - mlk_poly_compress_d11(r, &s); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); + return 1; + } + memset((uint8_t *)s, 0, sizeof(mlk_poly)); + mlk_poly_compress_d11(r, s); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); } { - uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11]; - mlk_poly s; - memset(r, 0, sizeof(r)); - mlk_poly_decompress_d11(&s, r); + MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); + MLK_ALLOC(s, mlk_poly, 1, NULL); + if (r == NULL || s == NULL) + { + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); + return 1; + } + memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D11); + mlk_poly_decompress_d11(s, r); + MLK_FREE(s, mlk_poly, 1, NULL); + MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); } #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */ @@ -853,26 +1068,35 @@ static int test_poly_compress_no_overflow(void) #if !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) static int test_poly_rej_uniform_consistency(void) { - mlk_poly vec_x4[4], vec_x1[4]; - MLK_ALIGN uint8_t seed[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)]; + int ret = 1; int i, j; + MLK_ALLOC(vec_x4, mlk_poly, 4, NULL); + MLK_ALLOC(vec_x1, mlk_poly, 4, NULL); + MLK_ALLOC(seed, uint8_t, 4 * MLK_ALIGN_UP(MLKEM_SYMBYTES + 2), NULL); + if (vec_x4 == NULL || vec_x1 == NULL || seed == NULL) + { + goto cleanup; + } for (i = 0; i < NUM_RANDOM_TESTS_REJ_UNIFORM; i++) { for (j = 0; j < 4; j++) { - randombytes(seed[j], MLKEM_SYMBYTES + 2); + randombytes(seed + j * MLK_ALIGN_UP(MLKEM_SYMBYTES + 2), + MLKEM_SYMBYTES + 2); } /* Test x4 version */ mlk_poly_rej_uniform_x4(&vec_x4[0], &vec_x4[1], &vec_x4[2], &vec_x4[3], - seed); + (uint8_t (*)[MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)]) + seed); /* Test x1 version with same seeds */ for (j = 0; j < 4; j++) { - mlk_poly_rej_uniform(&vec_x1[j], seed[j]); + mlk_poly_rej_uniform(&vec_x1[j], + seed + j * MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)); } /* Compare results */ @@ -883,7 +1107,13 @@ static int test_poly_rej_uniform_consistency(void) } } - return 0; + ret = 0; + +cleanup: + MLK_FREE(seed, uint8_t, 4 * MLK_ALIGN_UP(MLKEM_SYMBYTES + 2), NULL); + MLK_FREE(vec_x1, mlk_poly, 4, NULL); + MLK_FREE(vec_x4, mlk_poly, 4, NULL); + return ret; } #endif /* !MLK_CONFIG_SERIAL_FIPS202_ONLY */ From c02cda992d3a455a53d097dd3de22bb5e154dbf1 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Thu, 19 Mar 2026 15:20:23 +0000 Subject: [PATCH 3/4] Add backend unit tests for rej_uniform and poly compress/decompress Add native-vs-C consistency tests for previously untested backends: - mlk_rej_uniform_native: compare against mlk_rej_uniform_c - mlk_poly_compress_d{4,5,10,11}_native: compare against C reference - mlk_poly_decompress_d{4,5,10,11}_native: compare against C reference These tests call the assembly backends directly with heap-allocated buffers, enabling valgrind to detect buffer overflows. In particular, the rej_uniform test would have caught the 4-byte overread in the AVX2 rejection sampling fixed in commit f10b80194. Previous ad-hoc tests for detecting overflow in (de)compression routines are now subsumed by the unit tests, and removed. Signed-off-by: Hanno Becker --- test/src/test_unit.c | 384 +++++++++++++++++++++++++++---------------- 1 file changed, 239 insertions(+), 145 deletions(-) diff --git a/test/src/test_unit.c b/test/src/test_unit.c index 9aaeaecebc..55430a5466 100644 --- a/test/src/test_unit.c +++ b/test/src/test_unit.c @@ -43,6 +43,24 @@ void mlk_polyvec_basemul_acc_montgomery_cached_c( const mlk_polyvec_mulcache *b_cache); void mlk_poly_mulcache_compute_c(mlk_poly_mulcache *x, const mlk_poly *a); void mlk_keccakf1600_permute_c(uint64_t *state); +unsigned mlk_rej_uniform_c(int16_t *r, unsigned target, unsigned offset, + const uint8_t *buf, unsigned buflen); +void mlk_poly_compress_d4_c(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], + const mlk_poly *a); +void mlk_poly_decompress_d4_c(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); +void mlk_poly_compress_d5_c(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], + const mlk_poly *a); +void mlk_poly_decompress_d5_c(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); +void mlk_poly_compress_d10_c(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], + const mlk_poly *a); +void mlk_poly_decompress_d10_c(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +void mlk_poly_compress_d11_c(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], + const mlk_poly *a); +void mlk_poly_decompress_d11_c(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); #define CHECK(x) \ do \ @@ -118,7 +136,15 @@ static int compare_u64_arrays(const uint64_t *a, const uint64_t *b, defined(MLK_USE_NATIVE_POLY_TOMONT) || defined(MLK_USE_NATIVE_NTT) || \ defined(MLK_USE_NATIVE_INTT) || defined(MLK_USE_NATIVE_POLY_TOBYTES) || \ defined(MLK_USE_NATIVE_POLY_FROMBYTES) || \ - defined(MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) + defined(MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D4) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D5) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D10) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D11) || \ + defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D4) || \ + defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D5) || \ + defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D10) || \ + defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D11) static void print_i16_array(const char *label, const int16_t *array, size_t len) { size_t i; @@ -184,12 +210,23 @@ static void generate_i16_array_ranged(int16_t *data, size_t len, int min_incl, ((unsigned)data[i] % (unsigned)(max_excl - min_incl))); } } -#endif /* MLK_USE_NATIVE_POLY_REDUCE || MLK_USE_NATIVE_POLY_TOMONT || \ - MLK_USE_NATIVE_NTT || MLK_USE_NATIVE_INTT || \ - MLK_USE_NATIVE_POLY_TOBYTES || MLK_USE_NATIVE_POLY_FROMBYTES || \ - MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ - -#if defined(MLK_USE_NATIVE_POLY_TOBYTES) +#endif /* MLK_USE_NATIVE_POLY_REDUCE || MLK_USE_NATIVE_POLY_TOMONT || \ + MLK_USE_NATIVE_NTT || MLK_USE_NATIVE_INTT || \ + MLK_USE_NATIVE_POLY_TOBYTES || MLK_USE_NATIVE_POLY_FROMBYTES || \ + MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED || \ + MLK_USE_NATIVE_POLY_COMPRESS_D4 || MLK_USE_NATIVE_POLY_COMPRESS_D5 \ + || MLK_USE_NATIVE_POLY_COMPRESS_D10 || \ + MLK_USE_NATIVE_POLY_COMPRESS_D11 || \ + MLK_USE_NATIVE_POLY_DECOMPRESS_D4 || \ + MLK_USE_NATIVE_POLY_DECOMPRESS_D5 || \ + MLK_USE_NATIVE_POLY_DECOMPRESS_D10 || \ + MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */ + +#if defined(MLK_USE_NATIVE_POLY_TOBYTES) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D4) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D5) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D10) || \ + defined(MLK_USE_NATIVE_POLY_COMPRESS_D11) static void print_u8_array(const char *label, const uint8_t *array, size_t len) { size_t i; @@ -235,7 +272,9 @@ static int compare_u8_arrays(const uint8_t *a, const uint8_t *b, unsigned len, } return 1; } -#endif /* MLK_USE_NATIVE_POLY_TOBYTES */ +#endif /* MLK_USE_NATIVE_POLY_TOBYTES || MLK_USE_NATIVE_POLY_COMPRESS_D4 || \ + MLK_USE_NATIVE_POLY_COMPRESS_D5 || MLK_USE_NATIVE_POLY_COMPRESS_D10 \ + || MLK_USE_NATIVE_POLY_COMPRESS_D11 */ #if defined(MLK_USE_NATIVE_POLY_REDUCE) || \ defined(MLK_USE_NATIVE_POLY_TOMONT) || defined(MLK_USE_NATIVE_NTT) || \ @@ -876,6 +915,165 @@ static int test_keccakf1600x4_xor_permute_extract(void) #undef MAX_RATE #endif /* MLK_USE_FIPS202_X4_NATIVE */ +#ifdef MLK_USE_NATIVE_REJ_UNIFORM +#define REJ_UNIFORM_BUFLEN 504 /* 3 * 168, divisible by 3 */ +static int test_native_rej_uniform(void) +{ + int ret = 1; + int i; + MLK_ALLOC(r_test, int16_t, MLKEM_N, NULL); + MLK_ALLOC(r_ref, int16_t, MLKEM_N, NULL); + MLK_ALLOC(buf, uint8_t, REJ_UNIFORM_BUFLEN, NULL); + + if (r_test == NULL || r_ref == NULL || buf == NULL) + { + goto cleanup; + } + + for (i = 0; i < NUM_RANDOM_TESTS; i++) + { + int native_ret; + unsigned c_ret; + randombytes(buf, REJ_UNIFORM_BUFLEN); + + native_ret = + mlk_rej_uniform_native(r_test, MLKEM_N, buf, REJ_UNIFORM_BUFLEN); + if (native_ret == MLK_NATIVE_FUNC_FALLBACK) + { + ret = 0; + goto cleanup; + } + + c_ret = mlk_rej_uniform_c(r_ref, MLKEM_N, 0, buf, REJ_UNIFORM_BUFLEN); + + CHECK((unsigned)native_ret == c_ret); + CHECK(compare_i16_arrays(r_test, r_ref, (unsigned)native_ret, "rej_uniform", + NULL)); + } + + ret = 0; + +cleanup: + MLK_FREE(buf, uint8_t, REJ_UNIFORM_BUFLEN, NULL); + MLK_FREE(r_ref, int16_t, MLKEM_N, NULL); + MLK_FREE(r_test, int16_t, MLKEM_N, NULL); + return ret; +} +#undef REJ_UNIFORM_BUFLEN +#endif /* MLK_USE_NATIVE_REJ_UNIFORM */ + +/* Backend unit tests for poly compress/decompress native implementations. + * For each variant, we compare the native output against the C reference. */ + +#define DEFINE_COMPRESS_TEST(D, BYTES) \ + static int test_native_poly_compress_d##D(void) \ + { \ + int ret = 1; \ + int i; \ + MLK_ALLOC(r_test, uint8_t, BYTES, NULL); \ + MLK_ALLOC(r_ref, uint8_t, BYTES, NULL); \ + MLK_ALLOC(a, mlk_poly, 1, NULL); \ + \ + if (r_test == NULL || r_ref == NULL || a == NULL) \ + { \ + goto cleanup; \ + } \ + \ + for (i = 0; i < NUM_RANDOM_TESTS; i++) \ + { \ + int native_ret; \ + generate_i16_array_ranged(a->coeffs, MLKEM_N, 0, MLKEM_Q); \ + \ + native_ret = mlk_poly_compress_d##D##_native(r_test, a->coeffs); \ + if (native_ret == MLK_NATIVE_FUNC_FALLBACK) \ + { \ + ret = 0; \ + goto cleanup; \ + } \ + \ + mlk_poly_compress_d##D##_c(r_ref, a); \ + CHECK(compare_u8_arrays(r_test, r_ref, BYTES, "poly_compress_d" #D)); \ + } \ + \ + ret = 0; \ + \ + cleanup: \ + MLK_FREE(a, mlk_poly, 1, NULL); \ + MLK_FREE(r_ref, uint8_t, BYTES, NULL); \ + MLK_FREE(r_test, uint8_t, BYTES, NULL); \ + return ret; \ + } + +#define DEFINE_DECOMPRESS_TEST(D, BYTES) \ + static int test_native_poly_decompress_d##D(void) \ + { \ + int ret = 1; \ + int i; \ + MLK_ALLOC(r_test, mlk_poly, 1, NULL); \ + MLK_ALLOC(r_ref, mlk_poly, 1, NULL); \ + MLK_ALLOC(a, uint8_t, BYTES, NULL); \ + \ + if (r_test == NULL || r_ref == NULL || a == NULL) \ + { \ + goto cleanup; \ + } \ + \ + for (i = 0; i < NUM_RANDOM_TESTS; i++) \ + { \ + int native_ret; \ + randombytes(a, BYTES); \ + \ + native_ret = mlk_poly_decompress_d##D##_native(r_test->coeffs, a); \ + if (native_ret == MLK_NATIVE_FUNC_FALLBACK) \ + { \ + ret = 0; \ + goto cleanup; \ + } \ + \ + mlk_poly_decompress_d##D##_c(r_ref, a); \ + CHECK(compare_i16_arrays(r_test->coeffs, r_ref->coeffs, MLKEM_N, \ + "poly_decompress_d" #D, NULL)); \ + } \ + \ + ret = 0; \ + \ + cleanup: \ + MLK_FREE(a, uint8_t, BYTES, NULL); \ + MLK_FREE(r_ref, mlk_poly, 1, NULL); \ + MLK_FREE(r_test, mlk_poly, 1, NULL); \ + return ret; \ + } + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D4 +DEFINE_COMPRESS_TEST(4, MLKEM_POLYCOMPRESSEDBYTES_D4) +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D4 +DEFINE_DECOMPRESS_TEST(4, MLKEM_POLYCOMPRESSEDBYTES_D4) +#endif +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D10 +DEFINE_COMPRESS_TEST(10, MLKEM_POLYCOMPRESSEDBYTES_D10) +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D10 +DEFINE_DECOMPRESS_TEST(10, MLKEM_POLYCOMPRESSEDBYTES_D10) +#endif +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */ + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4 +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D5 +DEFINE_COMPRESS_TEST(5, MLKEM_POLYCOMPRESSEDBYTES_D5) +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D5 +DEFINE_DECOMPRESS_TEST(5, MLKEM_POLYCOMPRESSEDBYTES_D5) +#endif +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D11 +DEFINE_COMPRESS_TEST(11, MLKEM_POLYCOMPRESSEDBYTES_D11) +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D11 +DEFINE_DECOMPRESS_TEST(11, MLKEM_POLYCOMPRESSEDBYTES_D11) +#endif +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */ + static int test_backend_units(void) { /* Set fixed seed for reproducible tests */ @@ -917,150 +1115,49 @@ static int test_backend_units(void) CHECK(test_keccakf1600x4_xor_permute_extract() == 0); #endif - return 0; -} - -#endif /* MLK_USE_NATIVE_POLY_REDUCE || MLK_USE_NATIVE_POLY_TOMONT || \ - MLK_USE_NATIVE_NTT || MLK_USE_NATIVE_INTT || \ - MLK_USE_NATIVE_POLY_TOBYTES || MLK_USE_NATIVE_POLY_FROMBYTES || \ - MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED || \ - MLK_USE_FIPS202_X1_NATIVE || MLK_USE_FIPS202_X4_NATIVE */ - +#ifdef MLK_USE_NATIVE_REJ_UNIFORM + CHECK(test_native_rej_uniform() == 0); +#endif -/* This test invokes the polynomial (de)compression routines - * with minimally sized buffers. When run under valgrind or with address - * sanitization, this ensures that no buffer overflow is happening. This is of - * interest because the compressed buffers sometimes have unaligned lengths and - * are therefore at risk of being overflowed by vectorized code. */ -static int test_poly_compress_no_overflow(void) -{ #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); - return 1; - } - memset((uint8_t *)s, 0, sizeof(mlk_poly)); - mlk_poly_compress_d4(r, s); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); - } - - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); - return 1; - } - memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D4); - mlk_poly_decompress_d4(s, r); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D4, NULL); - } - - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); - return 1; - } - memset((uint8_t *)s, 0, sizeof(mlk_poly)); - mlk_poly_compress_d10(r, s); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); - } - - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); - return 1; - } - memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D10); - mlk_poly_decompress_d10(s, r); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D10, NULL); - } +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D4 + CHECK(test_native_poly_compress_d4() == 0); +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D4 + CHECK(test_native_poly_decompress_d4() == 0); +#endif +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D10 + CHECK(test_native_poly_compress_d10() == 0); +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D10 + CHECK(test_native_poly_decompress_d10() == 0); +#endif #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */ #if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4 - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); - return 1; - } - memset((uint8_t *)s, 0, sizeof(mlk_poly)); - mlk_poly_compress_d5(r, s); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); - } - - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); - return 1; - } - memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D5); - mlk_poly_decompress_d5(s, r); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D5, NULL); - } - - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); - return 1; - } - memset((uint8_t *)s, 0, sizeof(mlk_poly)); - mlk_poly_compress_d11(r, s); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); - } - - { - MLK_ALLOC(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); - MLK_ALLOC(s, mlk_poly, 1, NULL); - if (r == NULL || s == NULL) - { - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); - return 1; - } - memset(r, 0, MLKEM_POLYCOMPRESSEDBYTES_D11); - mlk_poly_decompress_d11(s, r); - MLK_FREE(s, mlk_poly, 1, NULL); - MLK_FREE(r, uint8_t, MLKEM_POLYCOMPRESSEDBYTES_D11, NULL); - } +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D5 + CHECK(test_native_poly_compress_d5() == 0); +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D5 + CHECK(test_native_poly_decompress_d5() == 0); +#endif +#ifdef MLK_USE_NATIVE_POLY_COMPRESS_D11 + CHECK(test_native_poly_compress_d11() == 0); +#endif +#ifdef MLK_USE_NATIVE_POLY_DECOMPRESS_D11 + CHECK(test_native_poly_decompress_d11() == 0); +#endif #endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */ return 0; } +#endif /* MLK_USE_NATIVE_POLY_REDUCE || MLK_USE_NATIVE_POLY_TOMONT || \ + MLK_USE_NATIVE_NTT || MLK_USE_NATIVE_INTT || \ + MLK_USE_NATIVE_POLY_TOBYTES || MLK_USE_NATIVE_POLY_FROMBYTES || \ + MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED || \ + MLK_USE_FIPS202_X1_NATIVE || MLK_USE_FIPS202_X4_NATIVE */ + /* poly_rej_uniform and poly_rej_uniform_4x implement the same * functionality with different degrees of batching. This unit * test makes sure these functions indeed produce the same @@ -1139,9 +1236,6 @@ int main(void) MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED || \ MLK_USE_FIPS202_X1_NATIVE || MLK_USE_FIPS202_X4_NATIVE */ - /* Test poly compress no overflow */ - CHECK(test_poly_compress_no_overflow() == 0); - #if !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) CHECK(test_poly_rej_uniform_consistency() == 0); #endif From a74b3c4ed7c720eae8794be843dfa3b9bf9c261b Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Thu, 19 Mar 2026 15:21:08 +0000 Subject: [PATCH 4/4] CI: Run backend unit tests under valgrind Add unit_valgrind job to ci.yml that runs the unit tests under valgrind on x86_64 and aarch64 runners. This catches buffer overflows in hand-written assembly that ASan cannot detect, since ASan only instruments compiler-generated code. Signed-off-by: Hanno Becker --- .github/workflows/ci.yml | 39 +++++++++++++++++++++++++++++++++++++++ test/mk/components.mk | 5 ++--- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28732278f5..4b67b5189c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -469,6 +469,45 @@ jobs: examples: false stack: true check_namespace: false + unit_valgrind: + name: Unit tests + valgrind (${{ matrix.target.name }}, ${{ matrix.cflags }}) + strategy: + fail-fast: false + matrix: + external: + - ${{ github.repository_owner != 'pq-code-package' }} + target: + - runner: ubuntu-latest + name: x86_64 + - runner: ubuntu-24.04-arm + name: aarch64 + cflags: ['-O3', '-Os'] + exclude: + - external: true + runs-on: ${{ matrix.target.runner }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Unit tests under valgrind + uses: ./.github/actions/functest + with: + gh_token: ${{ secrets.GITHUB_TOKEN }} + nix-shell: valgrind-varlat_gcc15 + nix-cache: false + opt: opt + cflags: "${{ matrix.cflags }} -std=c11 -D_GNU_SOURCE -DMLK_CONFIG_FILE=\\\\\\\"../test/configs/custom_heap_alloc_config.h\\\\\\\"" + func: false + kat: false + acvp: false + wycheproof: false + examples: false + stack: false + unit: true + alloc: false + rng_fail: false + check_namespace: false + # Disable AArch64 SHA3 extension: valgrind cannot emulate it + extra_env: "MK_COMPILER_SUPPORTS_SHA3=0" + exec_wrapper: "valgrind --error-exitcode=1" config_variations: name: Non-standard configurations strategy: diff --git a/test/mk/components.mk b/test/mk/components.mk index 71f22f1284..4a3768c6c7 100644 --- a/test/mk/components.mk +++ b/test/mk/components.mk @@ -32,9 +32,8 @@ $(MLKEM768_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=768 MLKEM1024_OBJS = $(call MAKE_OBJS,$(MLKEM1024_DIR),$(SOURCES) $(FIPS202_SRCS)) $(MLKEM1024_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=1024 -# Unit test object files - same sources but with MLK_STATIC_TESTABLE= and custom heap alloc config -UNIT_CFLAGS = -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes \ - -DMLK_CONFIG_FILE=\"../test/configs/custom_heap_alloc_config.h\" -std=c11 -D_GNU_SOURCE +# Unit test object files - same sources but with MLK_STATIC_TESTABLE= +UNIT_CFLAGS = -DMLK_STATIC_TESTABLE= -Wno-missing-prototypes MLKEM512_UNIT_OBJS = $(call MAKE_OBJS,$(MLKEM512_DIR)/unit,$(SOURCES) $(FIPS202_SRCS)) $(MLKEM512_UNIT_OBJS): CFLAGS += -DMLK_CONFIG_PARAMETER_SET=512 $(UNIT_CFLAGS)