From 63e950e24d739d5f4f0989644c46cc82c6c8d3c4 Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sat, 11 Jan 2025 19:30:23 +0000
Subject: [PATCH 1/3] ML-KEM: Remove reference implementation

This commit removes the reference implementation of ML-KEM from
the source tree, in preparation for the integration of mlkem-native.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 crypto/fipsmodule/ml_kem/ml_kem.c             | 228 -----------
 crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.c     | 124 ------
 crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.h     |  14 -
 crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.c  | 372 ------------------
 crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.h  |  30 --
 crypto/fipsmodule/ml_kem/ml_kem_ref/kem.c     | 350 ----------------
 crypto/fipsmodule/ml_kem/ml_kem_ref/kem.h     |  22 --
 crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.c     | 146 -------
 crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.h     |  19 -
 crypto/fipsmodule/ml_kem/ml_kem_ref/params.c  |  41 --
 crypto/fipsmodule/ml_kem/ml_kem_ref/params.h  |  53 ---
 crypto/fipsmodule/ml_kem/ml_kem_ref/poly.c    | 371 -----------------
 crypto/fipsmodule/ml_kem/ml_kem_ref/poly.h    |  53 ---
 crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.c | 248 ------------
 crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.h |  36 --
 crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.c  |  42 --
 crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.h  |  16 -
 .../ml_kem/ml_kem_ref/symmetric-shake.c       | 107 -----
 .../fipsmodule/ml_kem/ml_kem_ref/symmetric.h  |  32 --
 crypto/fipsmodule/ml_kem/ml_kem_ref/verify.c  |  46 ---
 crypto/fipsmodule/ml_kem/ml_kem_ref/verify.h  |  14 -
 21 files changed, 2364 deletions(-)
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/kem.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/kem.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/params.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/params.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/poly.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/poly.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric-shake.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric.h
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/verify.c
 delete mode 100644 crypto/fipsmodule/ml_kem/ml_kem_ref/verify.h

diff --git a/crypto/fipsmodule/ml_kem/ml_kem.c b/crypto/fipsmodule/ml_kem/ml_kem.c
deleted file mode 100644
index 771a49c240e..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem.c
+++ /dev/null
@@ -1,228 +0,0 @@
-// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
-// SPDX-License-Identifier: Apache-2.0 OR ISC
-
-#include "./ml_kem.h"
-
-#include "./ml_kem_ref/kem.h"
-#include "./ml_kem_ref/params.h"
-
-#include "./ml_kem_ref/cbd.c"
-#include "./ml_kem_ref/indcpa.c"
-#include "./ml_kem_ref/kem.c"
-#include "./ml_kem_ref/ntt.c"
-#include "./ml_kem_ref/params.c"
-#include "./ml_kem_ref/poly.c"
-#include "./ml_kem_ref/polyvec.c"
-#include "./ml_kem_ref/reduce.c"
-#include "./ml_kem_ref/symmetric-shake.c"
-#include "./ml_kem_ref/verify.c"
-#include "../../internal.h"
-
-// Note: These methods currently default to using the reference code for ML_KEM.
-// In a future where AWS-LC has optimized options available, those can be
-// conditionally (or based on compile-time flags) called here, depending on
-// platform support.
-
-int ml_kem_512_keypair_deterministic(uint8_t *public_key  /* OUT */,
-                                         uint8_t *secret_key  /* OUT */,
-                                         const uint8_t *seed  /* IN */) {
-  boringssl_ensure_ml_kem_self_test();
-  return ml_kem_512_keypair_deterministic_no_self_test(public_key, secret_key, seed);
-}
-
-int ml_kem_512_keypair_deterministic_no_self_test(uint8_t *public_key  /* OUT */,
-                                                  uint8_t *secret_key  /* OUT */,
-                                                  const uint8_t *seed  /* IN */) {
-  ml_kem_params params;
-  int res;
-  ml_kem_512_params_init(&params);
-  res = ml_kem_keypair_derand_ref(&params, public_key, secret_key, seed);
-#if defined(AWSLC_FIPS)
-  /* PCT failure is the only failure condition for key generation. */
-  if (res != 0) {
-      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
-  }
-#endif
-  return res;
-}
-
-int ml_kem_512_keypair(uint8_t *public_key /* OUT */,
-                           uint8_t *secret_key /* OUT */) {
-  boringssl_ensure_ml_kem_self_test();
-  int res;
-  ml_kem_params params;
-  ml_kem_512_params_init(&params);
-  res = ml_kem_keypair_ref(&params, public_key, secret_key);
-#if defined(AWSLC_FIPS)
-  /* PCT failure is the only failure condition for key generation. */
-  if (res != 0) {
-      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
-  }
-#endif
-  return res;
-}
-
-int ml_kem_512_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
-                                             uint8_t *shared_secret    /* OUT */,
-                                             const uint8_t *public_key /* IN  */,
-                                             const uint8_t *seed       /* IN */) {
-  boringssl_ensure_ml_kem_self_test();
-  return ml_kem_512_encapsulate_deterministic_no_self_test(ciphertext, shared_secret, public_key, seed);
-}
-
-int ml_kem_512_encapsulate_deterministic_no_self_test(uint8_t *ciphertext       /* OUT */,
-                                                      uint8_t *shared_secret    /* OUT */,
-                                                      const uint8_t *public_key /* IN  */,
-                                                      const uint8_t *seed       /* IN */) {
-  ml_kem_params params;
-  ml_kem_512_params_init(&params);
-  return ml_kem_enc_derand_ref(&params, ciphertext, shared_secret, public_key,
-                               seed);
-}
-
-int ml_kem_512_encapsulate(uint8_t *ciphertext       /* OUT */,
-                               uint8_t *shared_secret    /* OUT */,
-                               const uint8_t *public_key /* IN  */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  ml_kem_512_params_init(&params);
-  return ml_kem_enc_ref(&params, ciphertext, shared_secret, public_key);
-}
-
-int ml_kem_512_decapsulate(uint8_t *shared_secret    /* OUT */,
-                               const uint8_t *ciphertext /* IN  */,
-                               const uint8_t *secret_key /* IN  */) {
-  boringssl_ensure_ml_kem_self_test();
-  return ml_kem_512_decapsulate_no_self_test(shared_secret, ciphertext, secret_key);
-}
-
-int ml_kem_512_decapsulate_no_self_test(uint8_t *shared_secret    /* OUT */,
-                                        const uint8_t *ciphertext /* IN  */,
-                                        const uint8_t *secret_key /* IN  */) {
-  ml_kem_params params;
-  ml_kem_512_params_init(&params);
-  return ml_kem_dec_ref(&params, shared_secret, ciphertext, secret_key);
-}
-
-
-int ml_kem_768_keypair_deterministic(uint8_t *public_key  /* OUT */,
-                                         uint8_t *secret_key  /* OUT */,
-                                         const uint8_t *seed  /* IN */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  int res;
-  ml_kem_768_params_init(&params);
-  res = ml_kem_keypair_derand_ref(&params, public_key, secret_key, seed);
-#if defined(AWSLC_FIPS)
-  /* PCT failure is the only failure condition for key generation. */
-  if (res != 0) {
-      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
-  }
-#endif
-  return res;
-}
-
-int ml_kem_768_keypair(uint8_t *public_key /* OUT */,
-                           uint8_t *secret_key /* OUT */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  int res;
-  ml_kem_768_params_init(&params);
-  res = ml_kem_keypair_ref(&params, public_key, secret_key);
-#if defined(AWSLC_FIPS)
-  /* PCT failure is the only failure condition for key generation. */
-  if (res != 0) {
-      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
-  }
-#endif
-  return res;
-}
-
-int ml_kem_768_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
-                                             uint8_t *shared_secret    /* OUT */,
-                                             const uint8_t *public_key /* IN  */,
-                                             const uint8_t *seed       /* IN */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  ml_kem_768_params_init(&params);
-  return ml_kem_enc_derand_ref(&params, ciphertext, shared_secret, public_key, seed);
-}
-
-int ml_kem_768_encapsulate(uint8_t *ciphertext       /* OUT */,
-                               uint8_t *shared_secret    /* OUT */,
-                               const uint8_t *public_key /* IN  */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  ml_kem_768_params_init(&params);
-  return ml_kem_enc_ref(&params, ciphertext, shared_secret, public_key);
-}
-
-int ml_kem_768_decapsulate(uint8_t *shared_secret    /* OUT */,
-                               const uint8_t *ciphertext /* IN  */,
-                               const uint8_t *secret_key /* IN  */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  ml_kem_768_params_init(&params);
-  return ml_kem_dec_ref(&params, shared_secret, ciphertext, secret_key);
-}
-
-int ml_kem_1024_keypair_deterministic(uint8_t *public_key  /* OUT */,
-                                          uint8_t *secret_key  /* OUT */,
-                                          const uint8_t *seed  /* IN */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  int res;
-  ml_kem_1024_params_init(&params);
-  res = ml_kem_keypair_derand_ref(&params, public_key, secret_key, seed);
-#if defined(AWSLC_FIPS)
-  /* PCT failure is the only failure condition for key generation. */
-  if (res != 0) {
-      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
-  }
-#endif
-  return res;
-}
-
-int ml_kem_1024_keypair(uint8_t *public_key /* OUT */,
-                            uint8_t *secret_key /* OUT */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  int res;
-  ml_kem_1024_params_init(&params);
-  res = ml_kem_keypair_ref(&params, public_key, secret_key);
-#if defined(AWSLC_FIPS)
-  /* PCT failure is the only failure condition for key generation. */
-  if (res != 0) {
-      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
-  }
-#endif
-  return res;
-}
-
-int ml_kem_1024_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
-                                              uint8_t *shared_secret    /* OUT */,
-                                              const uint8_t *public_key /* IN  */,
-                                              const uint8_t *seed       /* IN */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  ml_kem_1024_params_init(&params);
-  return ml_kem_enc_derand_ref(&params, ciphertext, shared_secret, public_key, seed);
-}
-
-int ml_kem_1024_encapsulate(uint8_t *ciphertext       /* OUT */,
-                                uint8_t *shared_secret    /* OUT */,
-                                const uint8_t *public_key /* IN  */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  ml_kem_1024_params_init(&params);
-  return ml_kem_enc_ref(&params, ciphertext, shared_secret, public_key);
-}
-
-int ml_kem_1024_decapsulate(uint8_t *shared_secret    /* OUT */,
-                                const uint8_t *ciphertext /* IN  */,
-                                const uint8_t *secret_key /* IN  */) {
-  boringssl_ensure_ml_kem_self_test();
-  ml_kem_params params;
-  ml_kem_1024_params_init(&params);
-  return ml_kem_dec_ref(&params, shared_secret, ciphertext, secret_key);
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.c
deleted file mode 100644
index d99a3dd59ff..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.c
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <stdint.h>
-#include <assert.h>
-
-#include "params.h"
-#include "cbd.h"
-
-/*************************************************
-* Name:        load32_littleendian
-*
-* Description: load 4 bytes into a 32-bit integer
-*              in little-endian order
-*
-* Arguments:   - const uint8_t *x: pointer to input byte array
-*
-* Returns 32-bit unsigned integer loaded from x
-**************************************************/
-static uint32_t load32_littleendian(const uint8_t x[4])
-{
-  uint32_t r;
-  r  = (uint32_t)x[0];
-  r |= (uint32_t)x[1] << 8;
-  r |= (uint32_t)x[2] << 16;
-  r |= (uint32_t)x[3] << 24;
-  return r;
-}
-
-/*************************************************
-* Name:        load24_littleendian
-*
-* Description: load 3 bytes into a 32-bit integer
-*              in little-endian order.
-*              This function is only needed for Kyber-512
-*
-* Arguments:   - const uint8_t *x: pointer to input byte array
-*
-* Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
-**************************************************/
-static uint32_t load24_littleendian(const uint8_t x[3])
-{
-  uint32_t r;
-  r  = (uint32_t)x[0];
-  r |= (uint32_t)x[1] << 8;
-  r |= (uint32_t)x[2] << 16;
-  return r;
-}
-
-/*************************************************
-* Name:        cbd2
-*
-* Description: Given an array of uniformly random bytes, compute
-*              polynomial with coefficients distributed according to
-*              a centered binomial distribution with parameter eta=2
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const uint8_t *buf: pointer to input byte array
-**************************************************/
-static void cbd2(poly *r, const uint8_t buf[2*KYBER_N/4])
-{
-  unsigned int i,j;
-  uint32_t t,d;
-  int16_t a,b;
-
-  for(i=0;i<KYBER_N/8;i++) {
-    t  = load32_littleendian(buf+4*i);
-    d  = t & 0x55555555;
-    d += (t>>1) & 0x55555555;
-
-    for(j=0;j<8;j++) {
-      a = (d >> (4*j+0)) & 0x3;
-      b = (d >> (4*j+2)) & 0x3;
-      r->coeffs[8*i+j] = a - b;
-    }
-  }
-}
-
-/*************************************************
-* Name:        cbd3
-*
-* Description: Given an array of uniformly random bytes, compute
-*              polynomial with coefficients distributed according to
-*              a centered binomial distribution with parameter eta=3.
-*              This function is only needed for Kyber-512
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const uint8_t *buf: pointer to input byte array
-**************************************************/
-static void cbd3(poly *r, const uint8_t buf[3*KYBER_N/4])
-{
-  unsigned int i,j;
-  uint32_t t,d;
-  int16_t a,b;
-
-  for(i=0;i<KYBER_N/4;i++) {
-    t  = load24_littleendian(buf+3*i);
-    d  = t & 0x00249249;
-    d += (t>>1) & 0x00249249;
-    d += (t>>2) & 0x00249249;
-
-    for(j=0;j<4;j++) {
-      a = (d >> (6*j+0)) & 0x7;
-      b = (d >> (6*j+3)) & 0x7;
-      r->coeffs[4*i+j] = a - b;
-    }
-  }
-}
-
-void poly_cbd_eta1(ml_kem_params *params, poly *r, const uint8_t *buf)
-{
-  assert((params->eta1 == 2) || (params->eta1 == 3));
-  if (params->eta1 == 2) {
-    cbd2(r, buf);
-  } else {
-    cbd3(r, buf);
-  }
-}
-
-void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4])
-{
-#if KYBER_ETA2 == 2
-  cbd2(r, buf);
-#else
-#error "This implementation requires eta2 = 2"
-#endif
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.h
deleted file mode 100644
index 0ed2facc278..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/cbd.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef ML_KEM_CBD_H
-#define ML_KEM_CBD_H
-
-#include <stdint.h>
-#include "params.h"
-#include "poly.h"
-
-#define poly_cbd_eta1 KYBER_NAMESPACE(poly_cbd_eta1)
-void poly_cbd_eta1(ml_kem_params *params, poly *r, const uint8_t *buf);
-
-#define poly_cbd_eta2 KYBER_NAMESPACE(poly_cbd_eta2)
-void poly_cbd_eta2(poly *r, const uint8_t buf[KYBER_ETA2*KYBER_N/4]);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.c
deleted file mode 100644
index f0002d3a3b8..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.c
+++ /dev/null
@@ -1,372 +0,0 @@
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-
-#include "../../../internal.h"
-
-#include "params.h"
-#include "indcpa.h"
-#include "polyvec.h"
-#include "poly.h"
-#include "ntt.h"
-#include "symmetric.h"
-
-/*************************************************
-* Name:        pack_pk
-*
-* Description: Serialize the public key as concatenation of the
-*              serialized vector of polynomials pk
-*              and the public seed used to generate the matrix A.
-*
-* Arguments:   uint8_t *r: pointer to the output serialized public key
-*              polyvec *pk: pointer to the input public-key polyvec
-*              const uint8_t *seed: pointer to the input public seed
-**************************************************/
-static void pack_pk(ml_kem_params *params,
-                    uint8_t *r,
-                    polyvec *pk,
-                    const uint8_t *seed)
-{
-  polyvec_tobytes(params, r, pk);
-  memcpy(r+params->poly_vec_bytes, seed, KYBER_SYMBYTES);
-}
-
-/*************************************************
-* Name:        unpack_pk
-*
-* Description: De-serialize public key from a byte array;
-*              approximate inverse of pack_pk
-*
-* Arguments:   - polyvec *pk: pointer to output public-key polynomial vector
-*              - uint8_t *seed: pointer to output seed to generate matrix A
-*              - const uint8_t *packedpk: pointer to input serialized public key
-**************************************************/
-static void unpack_pk(ml_kem_params *params,
-                      polyvec *pk,
-                      uint8_t seed[KYBER_SYMBYTES],
-                      const uint8_t *packedpk)
-{
-  polyvec_frombytes(params, pk, packedpk);
-  memcpy(seed, packedpk+params->poly_vec_bytes, KYBER_SYMBYTES);
-}
-
-/*************************************************
-* Name:        pack_sk
-*
-* Description: Serialize the secret key
-*
-* Arguments:   - uint8_t *r: pointer to output serialized secret key
-*              - polyvec *sk: pointer to input vector of polynomials (secret key)
-**************************************************/
-static void pack_sk(ml_kem_params *params, uint8_t *r, polyvec *sk)
-{
-  polyvec_tobytes(params, r, sk);
-}
-
-/*************************************************
-* Name:        unpack_sk
-*
-* Description: De-serialize the secret key; inverse of pack_sk
-*
-* Arguments:   - polyvec *sk: pointer to output vector of polynomials (secret key)
-*              - const uint8_t *packedsk: pointer to input serialized secret key
-**************************************************/
-static void unpack_sk(ml_kem_params *params, polyvec *sk, const uint8_t *packedsk)
-{
-  polyvec_frombytes(params, sk, packedsk);
-}
-
-/*************************************************
-* Name:        pack_ciphertext
-*
-* Description: Serialize the ciphertext as concatenation of the
-*              compressed and serialized vector of polynomials b
-*              and the compressed and serialized polynomial v
-*
-* Arguments:   uint8_t *r: pointer to the output serialized ciphertext
-*              poly *pk: pointer to the input vector of polynomials b
-*              poly *v: pointer to the input polynomial v
-**************************************************/
-static void pack_ciphertext(ml_kem_params *params, uint8_t *r, polyvec *b, poly *v)
-{
-  polyvec_compress(params, r, b);
-  poly_compress(params, r+params->poly_vec_compressed_bytes, v);
-}
-
-/*************************************************
-* Name:        unpack_ciphertext
-*
-* Description: De-serialize and decompress ciphertext from a byte array;
-*              approximate inverse of pack_ciphertext
-*
-* Arguments:   - polyvec *b: pointer to the output vector of polynomials b
-*              - poly *v: pointer to the output polynomial v
-*              - const uint8_t *c: pointer to the input serialized ciphertext
-**************************************************/
-static void unpack_ciphertext(ml_kem_params *params, polyvec *b, poly *v, const uint8_t *c)
-{
-  polyvec_decompress(params, b, c);
-  poly_decompress(params, v, c+params->poly_vec_compressed_bytes);
-}
-
-/*************************************************
-* Name:        rej_uniform
-*
-* Description: Run rejection sampling on uniform random bytes to generate
-*              uniform random integers mod q
-*
-* Arguments:   - int16_t *r: pointer to output buffer
-*              - unsigned int len: requested number of 16-bit integers (uniform mod q)
-*              - const uint8_t *buf: pointer to input buffer (assumed to be uniformly random bytes)
-*              - unsigned int buflen: length of input buffer in bytes
-*
-* Returns number of sampled 16-bit integers (at most len)
-**************************************************/
-static unsigned int rej_uniform(int16_t *r,
-                                unsigned int len,
-                                const uint8_t *buf,
-                                unsigned int buflen)
-{
-  unsigned int ctr, pos;
-  uint16_t val0, val1;
-
-  ctr = pos = 0;
-  while(ctr < len && pos + 3 <= buflen) {
-    val0 = ((buf[pos+0] >> 0) | ((uint16_t)buf[pos+1] << 8)) & 0xFFF;
-    val1 = ((buf[pos+1] >> 4) | ((uint16_t)buf[pos+2] << 4)) & 0xFFF;
-    pos += 3;
-
-    if(val0 < KYBER_Q)
-      r[ctr++] = val0;
-    if(ctr < len && val1 < KYBER_Q)
-      r[ctr++] = val1;
-  }
-
-  return ctr;
-}
-
-#define gen_a(PARAMS,A,B)  gen_matrix(PARAMS,A,B,0)
-#define gen_at(PARAMS,A,B) gen_matrix(PARAMS,A,B,1)
-
-/*************************************************
-* Name:        gen_matrix
-*
-* Description: Deterministically generate matrix A (or the transpose of A)
-*              from a seed. Entries of the matrix are polynomials that look
-*              uniformly random. Performs rejection sampling on output of
-*              a XOF
-*
-* Arguments:   - polyvec *a: pointer to ouptput matrix A
-*              - const uint8_t *seed: pointer to input seed
-*              - int transposed: boolean deciding whether A or A^T is generated
-**************************************************/
-#define GEN_MATRIX_NBLOCKS ((12*KYBER_N/8*(1 << 12)/KYBER_Q + XOF_BLOCKBYTES)/XOF_BLOCKBYTES)
-// Not static for benchmarking
-void gen_matrix(ml_kem_params *params, polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed)
-{
-  unsigned int ctr, i, j, k;
-  unsigned int buflen, off;
-  uint8_t buf[GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES+2];
-  KECCAK1600_CTX ctx;
-
-  for(i=0;i<params->k;i++) {
-    for(j=0;j<params->k;j++) {
-      if(transposed)
-        xof_absorb(&ctx, seed, i, j);
-      else
-        xof_absorb(&ctx, seed, j, i);
-
-      xof_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &ctx);
-      buflen = GEN_MATRIX_NBLOCKS*XOF_BLOCKBYTES;
-      ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, buflen);
-
-      while(ctr < KYBER_N) {
-        off = buflen % 3;
-        for(k = 0; k < off; k++)
-          buf[k] = buf[buflen - off + k];
-        xof_squeezeblocks(buf + off, 1, &ctx);
-        buflen = off + XOF_BLOCKBYTES;
-        ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, buflen);
-      }
-    }
-  }
-  
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(buf, sizeof(buf));
-}
-
-/*************************************************
-* Name:        indcpa_keypair_derand
-*
-* Description: Generates public and private key for the CPA-secure
-*              public-key encryption scheme underlying Kyber
-*
-* Arguments:   - uint8_t *pk: pointer to output public key
-*                             (of length KYBER_INDCPA_PUBLICKEYBYTES bytes)
-*              - uint8_t *sk: pointer to output private key
-*                             (of length KYBER_INDCPA_SECRETKEYBYTES bytes)
-*              - const uint8_t *coins: pointer to input randomness
-*                             (of length KYBER_SYMBYTES bytes)
-**************************************************/
-void indcpa_keypair_derand(ml_kem_params *params,
-                           uint8_t *pk,
-                           uint8_t *sk,
-                           const uint8_t coins[KYBER_SYMBYTES])
-{
-  unsigned int i;
-  uint8_t buf[2*KYBER_SYMBYTES];
-  const uint8_t *publicseed = buf;
-  const uint8_t *noiseseed = buf+KYBER_SYMBYTES;
-  uint8_t nonce = 0;
-  polyvec a[KYBER_K_MAX], e, pkpv, skpv;
-
-  uint8_t coins_with_domain_separator[KYBER_SYMBYTES + 1];
-  memcpy(coins_with_domain_separator, coins, KYBER_SYMBYTES);
-  coins_with_domain_separator[KYBER_SYMBYTES] = params->k;
-
-  hash_g(buf, coins_with_domain_separator, KYBER_SYMBYTES + 1);
-
-  gen_a(params, a, publicseed);
-
-  for(i=0;i<params->k;i++)
-    poly_getnoise_eta1(params, &skpv.vec[i], noiseseed, nonce++);
-  for(i=0;i<params->k;i++)
-    poly_getnoise_eta1(params, &e.vec[i], noiseseed, nonce++);
-
-  polyvec_ntt(params, &skpv);
-  polyvec_ntt(params, &e);
-
-  // matrix-vector multiplication
-  for(i=0;i<params->k;i++) {
-    polyvec_basemul_acc_montgomery(params, &pkpv.vec[i], &a[i], &skpv);
-    poly_tomont(&pkpv.vec[i]);
-  }
-
-  polyvec_add(params, &pkpv, &pkpv, &e);
-  polyvec_reduce(params, &pkpv);
-
-  pack_sk(params, sk, &skpv);
-  pack_pk(params, pk, &pkpv, publicseed);
-
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(buf, sizeof(buf));
-  OPENSSL_cleanse(coins_with_domain_separator, sizeof(coins_with_domain_separator));
-  OPENSSL_cleanse(a, sizeof(a));
-  OPENSSL_cleanse(&e, sizeof(e));
-  OPENSSL_cleanse(&pkpv, sizeof(pkpv));
-  OPENSSL_cleanse(&skpv, sizeof(skpv));
-}
-
-
-/*************************************************
-* Name:        indcpa_enc
-*
-* Description: Encryption function of the CPA-secure
-*              public-key encryption scheme underlying Kyber.
-*
-* Arguments:   - uint8_t *c: pointer to output ciphertext
-*                            (of length KYBER_INDCPA_BYTES bytes)
-*              - const uint8_t *m: pointer to input message
-*                                  (of length KYBER_INDCPA_MSGBYTES bytes)
-*              - const uint8_t *pk: pointer to input public key
-*                                   (of length KYBER_INDCPA_PUBLICKEYBYTES)
-*              - const uint8_t *coins: pointer to input random coins used as seed
-*                                      (of length KYBER_SYMBYTES) to deterministically
-*                                      generate all randomness
-**************************************************/
-void indcpa_enc(ml_kem_params *params,
-                uint8_t *c,
-                const uint8_t *m,
-                const uint8_t *pk,
-                const uint8_t coins[KYBER_SYMBYTES])
-{
-  unsigned int i;
-  uint8_t seed[KYBER_SYMBYTES];
-  uint8_t nonce = 0;
-  polyvec sp, pkpv, ep, at[KYBER_K_MAX], b;
-  poly v, k, epp;
-
-  unpack_pk(params, &pkpv, seed, pk);
-  poly_frommsg(&k, m);
-  gen_at(params, at, seed);
-
-  for(i=0;i<params->k;i++)
-    poly_getnoise_eta1(params, sp.vec+i, coins, nonce++);
-  for(i=0;i<params->k;i++)
-    poly_getnoise_eta2(ep.vec+i, coins, nonce++);
-  poly_getnoise_eta2(&epp, coins, nonce++);
-
-  polyvec_ntt(params, &sp);
-
-  // matrix-vector multiplication
-  for(i=0;i<params->k;i++)
-    polyvec_basemul_acc_montgomery(params, &b.vec[i], &at[i], &sp);
-
-  polyvec_basemul_acc_montgomery(params, &v, &pkpv, &sp);
-
-  polyvec_invntt_tomont(params, &b);
-  poly_invntt_tomont(&v);
-
-  polyvec_add(params, &b, &b, &ep);
-  poly_add(&v, &v, &epp);
-  poly_add(&v, &v, &k);
-  polyvec_reduce(params, &b);
-  poly_reduce(&v);
-
-  pack_ciphertext(params, c, &b, &v);
-
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(seed, sizeof(seed));
-  OPENSSL_cleanse(&sp, sizeof(sp));
-  OPENSSL_cleanse(&pkpv, sizeof(pkpv));
-  OPENSSL_cleanse(&ep, sizeof(ep));
-  OPENSSL_cleanse(at, sizeof(at));
-  OPENSSL_cleanse(&b, sizeof(b));
-  OPENSSL_cleanse(&v, sizeof(v));
-  OPENSSL_cleanse(&k, sizeof(k));
-  OPENSSL_cleanse(&epp, sizeof(epp));
-}
-
-/*************************************************
-* Name:        indcpa_dec
-*
-* Description: Decryption function of the CPA-secure
-*              public-key encryption scheme underlying Kyber.
-*
-* Arguments:   - uint8_t *m: pointer to output decrypted message
-*                            (of length KYBER_INDCPA_MSGBYTES)
-*              - const uint8_t *c: pointer to input ciphertext
-*                                  (of length KYBER_INDCPA_BYTES)
-*              - const uint8_t *sk: pointer to input secret key
-*                                   (of length KYBER_INDCPA_SECRETKEYBYTES)
-**************************************************/
-void indcpa_dec(ml_kem_params *params,
-                uint8_t *m,
-                const uint8_t *c,
-                const uint8_t *sk)
-{
-  polyvec b, skpv;
-  poly v, mp;
-
-  // work-around for gcc-12 which complains that skpv may be used uninitialized.
-  OPENSSL_memset(&skpv, 0, sizeof(polyvec));
-
-  unpack_ciphertext(params, &b, &v, c);
-  unpack_sk(params, &skpv, sk);
-
-  polyvec_ntt(params, &b);
-  polyvec_basemul_acc_montgomery(params, &mp, &skpv, &b);
-  poly_invntt_tomont(&mp);
-
-  poly_sub(&mp, &v, &mp);
-  poly_reduce(&mp);
-
-  poly_tomsg(m, &mp);
-
-
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(&b, sizeof(b));
-  OPENSSL_cleanse(&skpv, sizeof(skpv));
-  OPENSSL_cleanse(&v, sizeof(v));
-  OPENSSL_cleanse(&mp, sizeof(mp));
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.h
deleted file mode 100644
index d13fc48faf7..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/indcpa.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef ML_KEM_INDCPA_H
-#define ML_KEM_INDCPA_H
-
-#include <stdint.h>
-#include "params.h"
-#include "polyvec.h"
-
-#define gen_matrix KYBER_NAMESPACE(gen_matrix)
-void gen_matrix(ml_kem_params *params, polyvec *a, const uint8_t seed[KYBER_SYMBYTES], int transposed);
-
-#define indcpa_keypair_derand KYBER_NAMESPACE(indcpa_keypair_derand)
-void indcpa_keypair_derand(ml_kem_params *params,
-                           uint8_t *pk,
-                           uint8_t *sk,
-                           const uint8_t coins[KYBER_SYMBYTES]);
-
-#define indcpa_enc KYBER_NAMESPACE(indcpa_enc)
-void indcpa_enc(ml_kem_params *params,
-                uint8_t *c,
-                const uint8_t *m,
-                const uint8_t *pk,
-                const uint8_t coins[KYBER_SYMBYTES]);
-
-#define indcpa_dec KYBER_NAMESPACE(indcpa_dec)
-void indcpa_dec(ml_kem_params *params,
-                uint8_t *m,
-                const uint8_t *c,
-                const uint8_t *sk);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/kem.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/kem.c
deleted file mode 100644
index 3aaf55ef000..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/kem.c
+++ /dev/null
@@ -1,350 +0,0 @@
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include "./params.h"
-#include "./kem.h"
-#include "./indcpa.h"
-#include "./verify.h"
-#include "./reduce.h"
-#include "./symmetric.h"
-
-#include "openssl/rand.h"
-
-#if defined(AWSLC_FIPS)
-// FIPS 203. Pair-wise Consistency Test (PCT) required per [FIPS 140-3 IG](https://csrc.nist.gov/csrc/media/Projects/cryptographic-module-validation-program/documents/fips%20140-3/FIPS%20140-3%20IG.pdf):
-// The PCT consists of applying the encapsulation key to encapsulate a shared
-// secret leading to ciphertext, and then applying decapsulation key to
-// retrieve the same shared secret. Returns 0 if the PCT passes, 1 otherwise.
-static int keygen_pct(ml_kem_params *params, const uint8_t *ek, const uint8_t *dk) {
-  uint8_t ct[KYBER_CIPHERTEXTBYTES_MAX];
-  uint8_t ss_enc[KYBER_SSBYTES];
-  uint8_t ss_dec[KYBER_SSBYTES];
-
-  crypto_kem_enc(params, ct, ss_enc, ek);
-  crypto_kem_dec(params, ss_dec, ct, dk);
-
-  if (boringssl_fips_break_test("MLKEM_PWCT")) {
-    ss_enc[0] = ~ss_enc[0];
-  }
-
-  return verify(ss_enc, ss_dec, KYBER_SSBYTES);
-}
-#endif
-
-/*************************************************
-* Name:        crypto_kem_keypair_derand
-*
-* Description: Generates public and private key
-*              for CCA-secure Kyber key encapsulation mechanism
-*
-* Arguments:   - uint8_t *pk: pointer to output public key
-*                (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
-*              - uint8_t *sk: pointer to output private key
-*                (an already allocated array of KYBER_SECRETKEYBYTES bytes)
-*              - uint8_t *coins: pointer to input randomness
-*                (an already allocated array filled with 2*KYBER_SYMBYTES random bytes)
-*
-* Returns:     - 0 on success
-*              - -1 upon PCT failure (if AWSLC_FIPS is set)
-**************************************************/
-int crypto_kem_keypair_derand(ml_kem_params *params,
-                              uint8_t *pk,
-                              uint8_t *sk,
-                              const uint8_t *coins)
-{
-  indcpa_keypair_derand(params, pk, sk, coins);
-  memcpy(sk+params->indcpa_secret_key_bytes, pk, params->public_key_bytes);
-  hash_h(sk+params->secret_key_bytes-2*KYBER_SYMBYTES, pk, params->public_key_bytes);
-  /* Value z for pseudo-random output on reject */
-  memcpy(sk+params->secret_key_bytes-KYBER_SYMBYTES, coins+KYBER_SYMBYTES, KYBER_SYMBYTES);
-
-#if defined(AWSLC_FIPS)
-  if (keygen_pct(params, pk, sk)) {
-    return -1;
-  }
-#endif
-  return 0;
-}
-
-/*************************************************
-* Name:        crypto_kem_keypair
-*
-* Description: Generates public and private key
-*              for CCA-secure Kyber key encapsulation mechanism
-*
-* Arguments:   - uint8_t *pk: pointer to output public key
-*                (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
-*              - uint8_t *sk: pointer to output private key
-*                (an already allocated array of KYBER_SECRETKEYBYTES bytes)
-*
-* Returns:     - 0 on success
-*              - -1 upon PCT failure (if AWSLC_FIPS is set)
-**************************************************/
-int crypto_kem_keypair(ml_kem_params *params,
-                       uint8_t *pk,
-                       uint8_t *sk)
-{
-  uint8_t coins[2*KYBER_SYMBYTES];
-  RAND_bytes(coins, 2*KYBER_SYMBYTES);
-  int res = crypto_kem_keypair_derand(params, pk, sk, coins);
-
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(coins, sizeof(coins));
-
-  return res;
-}
-
-// Converts a centered representative |in| which is an integer in
-// {-(q-1)/2, ..., (q-1)/2}, to a positive representative in {0, ..., q-1}.
-// It implements in constant-time the following operation:
-//   return (in < 0) ? in + KYBER_Q : in;
-static int16_t centered_to_positive_representative(int16_t in) {
-  // mask = (in < 0) ? b11..11 : b00..00;
-  crypto_word_t mask = constant_time_is_zero_w(in >> 15);
-  int16_t in_fixed = in + KYBER_Q;
-  return constant_time_select_int(mask, in, in_fixed);
-}
-
-#define BYTE_ENCODE_12_IN_SIZE  (256)
-#define BYTE_ENCODE_12_OUT_SIZE (32 * 12)
-#define BYTE_DECODE_12_OUT_SIZE (BYTE_ENCODE_12_IN_SIZE)
-#define BYTE_DECODE_12_IN_SIZE  (BYTE_ENCODE_12_OUT_SIZE)
-
-// FIPS 203. Algorithm 5 ByteEncode_12
-// Encodes an array of 256 12-bit integers into a byte array.
-// Intuition for the implementation:
-//   in:  |xxxxxxxxyyyy| |yyyyzzzzzzzz| ...
-//   out: |xxxxxxxx| |yyyyyyyy| |zzzzzzzz| ...
-// We divide the input in pairs of elements (2 x 12 bits = 24 bits),
-// and the output in triplets (3 x 8 bits = 24 bits). For each pair/triplet we:
-//   - out0 <-- first eight bits of in0,
-//   - out1 <-- concatenate last 4 bits of in0 and first 4 bits of in1,
-//   - out2 <-- last 8 bits of in1.
-static void byte_encode_12(uint8_t out[BYTE_ENCODE_12_OUT_SIZE],
-                           const int16_t in[BYTE_ENCODE_12_IN_SIZE]) {
-  for (size_t i = 0; i < BYTE_ENCODE_12_IN_SIZE / 2; i++) {
-    int16_t in0 = in[2 * i];
-    int16_t in1 = in[2 * i + 1];
-    out[3 * i]     = in0 & 0xff;
-    out[3 * i + 1] = ((in0 >> 8) & 0xf) | ((in1 & 0xf) << 4);
-    out[3 * i + 2] = (in1 >> 4) & 0xff;
-  }
-}
-
-// FIPS 203. Algorithm 6 ByteDecode_12
-// Decodes a byte array into an array of 256 12-bit integers.
-// Intuition for the implementation:
-//   in:  |xxxxxxxx| |yyyyyyyy| |zzzzzzzz| ...
-//   out: |xxxxxxxxyyyy| |yyyyzzzzzzzz| ...
-// We divide the input in triplets of elements (3 x 8 bits = 24 bits),
-// and the output in pairs (2 x 12 bits = 24 bits). For each pair/triplet we:
-//   - out[0] <-- concatenate eight bits of in[0] and first 4 bits of in[1],
-//   - out[1] <-- concatenate last 4 bits of in[1] and 8 bits of in[2].
-// Additionally we reduce the output elements mod Q as specified in FIPS 203.
-static void byte_decode_12(int16_t out[BYTE_DECODE_12_OUT_SIZE],
-                           const uint8_t in[BYTE_DECODE_12_IN_SIZE]) {
-  for(size_t i = 0; i < BYTE_DECODE_12_OUT_SIZE / 2; i++) {
-    // Cast to 16-bit wide uint's to avoid any issues
-    // with shifting and implicit casting.
-    uint16_t in0 = (uint16_t) in[3 * i];
-    uint16_t in1 = (uint16_t) in[3 * i + 1];
-    uint16_t in2 = (uint16_t) in[3 * i + 2];
-
-    // Build the output pair.
-    uint16_t out0 = in0 | ((in1 & 0xf) << 8);
-    uint16_t out1 = (in1 >> 4) | (in2 << 4);
-
-    // Reduce mod Q.
-    out[2 * i] = centered_to_positive_representative(barrett_reduce(out0));
-    out[2 * i + 1] = centered_to_positive_representative(barrett_reduce(out1));
-  }
-}
-
-#define ENCAPS_KEY_ENCODED_MAX_SIZE (BYTE_ENCODE_12_OUT_SIZE * KYBER_K_MAX)
-#define ENCAPS_KEY_DECODED_MAX_SIZE (BYTE_DECODE_12_OUT_SIZE * KYBER_K_MAX)
-
-// FIPS 203. Section 7.2 Encapsulation key check.
-// This function implements the encapsulation key modulus check. The other
-// check specified in Section 7.2 is a type check the key. We can safely omit
-// that check here because it is done in higher level functions. The required
-// lengths for all variants of ML-KEM are hard-coded in: fipsmodule/kem/kem.c.
-// If a key is generated by aws-lc then it satisfies the length requirements.
-// If a key is generated outside of aws-lc, it has to be imported into an
-// `EVP_PKEY` object to be used within aws-lc. We provide only these three
-// functions to do that:  `EVP_PKEY_kem_new_raw_key`,
-// `EVP_PKEY_kem_new_raw_secret_key`, `EVP_PKEY_kem_new_raw_public_key`.
-// The lengths are checked in all three functions.
-static int encapsulation_key_modulus_check(ml_kem_params *params, const uint8_t *ek) {
-
-  int16_t ek_decoded[ENCAPS_KEY_DECODED_MAX_SIZE];
-  uint8_t ek_recoded[ENCAPS_KEY_ENCODED_MAX_SIZE];
-
-  for (size_t i = 0; i < params->k; i++) {
-    byte_decode_12(&ek_decoded[i * BYTE_DECODE_12_OUT_SIZE], &ek[i * BYTE_DECODE_12_IN_SIZE]);
-    byte_encode_12(&ek_recoded[i * BYTE_ENCODE_12_OUT_SIZE], &ek_decoded[i * BYTE_ENCODE_12_IN_SIZE]);
-  }
-
-  return verify(ek_recoded, ek, params->k * BYTE_ENCODE_12_OUT_SIZE);
-}
-
-// FIPS 203. Section 7.3 Decapsulation key hash check
-// The spec defines the decapsulation key as following:
-//   dk <-- (dk_pke || ek || H(ek) || z).
-// This check takes |ek| out of |dk|, computes H(ek), and verifies that it is
-// the same as the H(ek) portion stored in |dk|.
-//
-// This function implements the decapsulation key hash check. The other checks
-// specified in Section 7.3 are the ciphertext and the key type check. We can
-// safely omit those checks here because they are done in higher level functions.
-// The required lengths for all variants of ML-KEM are hard-coded in
-// fipsmodule/kem/kem.c. If a key is generated by aws-lc then it satisfies
-// the length requirements. If a key is generated outside of aws-lc, it has to
-// be imported into an `EVP_PKEY` object to be used within aws-lc. We provide
-// only these three functions to do that:  `EVP_PKEY_kem_new_raw_key`,
-// `EVP_PKEY_kem_new_raw_secret_key`, `EVP_PKEY_kem_new_raw_public_key`.
-// The lengths are checked in all three functions. Additionally, the ciphertext
-// length is checked in function pkey_kem_decapsulate in fipsmodule/evp/p_kem.c.
-static int decapsulation_key_hash_check(ml_kem_params *params, const uint8_t *dk) {
-  uint8_t dk_pke_hash_computed[KYBER_SYMBYTES] = {0};
-
-  hash_h(dk_pke_hash_computed, &dk[params->indcpa_secret_key_bytes],
-                               params->indcpa_public_key_bytes);
-  const uint8_t *dk_pke_hash_expected = &dk[params->indcpa_secret_key_bytes +
-                                            params->indcpa_public_key_bytes];
-
-  return verify(dk_pke_hash_computed, dk_pke_hash_expected, KYBER_SYMBYTES);
-}
-
-/*************************************************
-* Name:        crypto_kem_enc_derand
-*
-* Description: Generates cipher text and shared
-*              secret for given public key
-*
-* Arguments:   - uint8_t *ct: pointer to output cipher text
-*                (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
-*              - uint8_t *ss: pointer to output shared secret
-*                (an already allocated array of KYBER_SSBYTES bytes)
-*              - const uint8_t *pk: pointer to input public key
-*                (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
-*              - const uint8_t *coins: pointer to input randomness
-*                (an already allocated array filled with KYBER_SYMBYTES random bytes)
-**
-* Returns 0 (success)
-**************************************************/
-int crypto_kem_enc_derand(ml_kem_params *params,
-                          uint8_t *ct,
-                          uint8_t *ss,
-                          const uint8_t *pk,
-                          const uint8_t *coins)
-{
-  uint8_t buf[2*KYBER_SYMBYTES];
-  /* Will contain key, coins */
-  uint8_t kr[2*KYBER_SYMBYTES];
-
-  memcpy(buf, coins, KYBER_SYMBYTES);
-
-  /* Multitarget countermeasure for coins + contributory KEM */
-  hash_h(buf+KYBER_SYMBYTES, pk, params->public_key_bytes);
-  hash_g(kr, buf, 2*KYBER_SYMBYTES);
-
-  /* coins are in kr+KYBER_SYMBYTES */
-  indcpa_enc(params, ct, buf, pk, kr+KYBER_SYMBYTES);
-
-  memcpy(ss,kr,KYBER_SYMBYTES);
-
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(buf, sizeof(buf));
-  OPENSSL_cleanse(kr, sizeof(kr));
-  return 0;
-}
-
-/*************************************************
-* Name:        crypto_kem_enc
-*
-* Description: Generates cipher text and shared
-*              secret for given public key
-*
-* Arguments:   - uint8_t *ct: pointer to output cipher text
-*                (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
-*              - uint8_t *ss: pointer to output shared secret
-*                (an already allocated array of KYBER_SSBYTES bytes)
-*              - const uint8_t *pk: pointer to input public key
-*                (an already allocated array of KYBER_PUBLICKEYBYTES bytes)
-*
-* Returns 0 (success), or 1 when the encapsulation key check fails.
-**************************************************/
-int crypto_kem_enc(ml_kem_params *params,
-                   uint8_t *ct,
-                   uint8_t *ss,
-                   const uint8_t *pk)
-{
-  if (encapsulation_key_modulus_check(params, pk) != 0) {
-    return 1;
-  }
-
-  uint8_t coins[KYBER_SYMBYTES];
-  RAND_bytes(coins, KYBER_SYMBYTES);
-  crypto_kem_enc_derand(params, ct, ss, pk, coins);
-
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(coins, sizeof(coins));
-  return 0;
-}
-
-/*************************************************
-* Name:        crypto_kem_dec
-*
-* Description: Generates shared secret for given
-*              cipher text and private key
-*
-* Arguments:   - uint8_t *ss: pointer to output shared secret
-*                (an already allocated array of KYBER_SSBYTES bytes)
-*              - const uint8_t *ct: pointer to input cipher text
-*                (an already allocated array of KYBER_CIPHERTEXTBYTES bytes)
-*              - const uint8_t *sk: pointer to input private key
-*                (an already allocated array of KYBER_SECRETKEYBYTES bytes)
-*
-* Returns 0.
-*
-* On failure, ss will contain a pseudo-random value.
-**************************************************/
-int crypto_kem_dec(ml_kem_params *params,
-                   uint8_t *ss,
-                   const uint8_t *ct,
-                   const uint8_t *sk)
-{
-  if (decapsulation_key_hash_check(params, sk) != 0) {
-    return 1;
-  }
-
-  int fail;
-  uint8_t buf[2*KYBER_SYMBYTES];
-  /* Will contain key, coins */
-  uint8_t kr[2*KYBER_SYMBYTES];
-  uint8_t cmp[KYBER_CIPHERTEXTBYTES_MAX+KYBER_SYMBYTES];
-  const uint8_t *pk = sk+params->indcpa_secret_key_bytes;
-
-  indcpa_dec(params, buf, ct, sk);
-
-  /* Multitarget countermeasure for coins + contributory KEM */
-  memcpy(buf+KYBER_SYMBYTES, sk+params->secret_key_bytes-2*KYBER_SYMBYTES, KYBER_SYMBYTES);
-  hash_g(kr, buf, 2*KYBER_SYMBYTES);
-
-  /* coins are in kr+KYBER_SYMBYTES */
-  indcpa_enc(params, cmp, buf, pk, kr+KYBER_SYMBYTES);
-
-  fail = verify(ct, cmp, params->ciphertext_bytes);
-
-  /* Compute rejection key */
-  rkprf(params, ss,sk+params->secret_key_bytes-KYBER_SYMBYTES,ct);
-
-  /* Copy true key to return buffer if fail is false */
-  cmov(ss,kr,KYBER_SYMBYTES,!fail);
-
-  // FIPS 203. Section 3.3 Destruction of intermediate values.
-  OPENSSL_cleanse(buf, sizeof(buf));
-  OPENSSL_cleanse(kr, sizeof(kr));
-  OPENSSL_cleanse(cmp, sizeof(cmp));
-  return 0;
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/kem.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/kem.h
deleted file mode 100644
index 39873a5d8a7..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/kem.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef KEM_H
-#define KEM_H
-
-#include <stdint.h>
-#include "params.h"
-
-#define crypto_kem_keypair_derand KYBER_NAMESPACE(keypair_derand)
-int crypto_kem_keypair_derand(ml_kem_params *params, uint8_t *pk, uint8_t *sk, const uint8_t *coins);
-
-#define crypto_kem_keypair KYBER_NAMESPACE(keypair)
-int crypto_kem_keypair(ml_kem_params *params, uint8_t *pk, uint8_t *sk);
-
-#define crypto_kem_enc_derand KYBER_NAMESPACE(enc_derand)
-int crypto_kem_enc_derand(ml_kem_params * params, uint8_t *ct, uint8_t *ss, const uint8_t *pk, const uint8_t *coins);
-
-#define crypto_kem_enc KYBER_NAMESPACE(enc)
-int crypto_kem_enc(ml_kem_params * params, uint8_t *ct, uint8_t *ss, const uint8_t *pk);
-
-#define crypto_kem_dec KYBER_NAMESPACE(dec)
-int crypto_kem_dec(ml_kem_params * params, uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.c
deleted file mode 100644
index 2f2eb10b2f2..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.c
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <stdint.h>
-#include "params.h"
-#include "ntt.h"
-#include "reduce.h"
-
-/* Code to generate zetas and zetas_inv used in the number-theoretic transform:
-
-#define KYBER_ROOT_OF_UNITY 17
-
-static const uint8_t tree[128] = {
-  0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120,
-  4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124,
-  2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122,
-  6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126,
-  1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121,
-  5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125,
-  3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123,
-  7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127
-};
-
-void init_ntt() {
-  unsigned int i;
-  int16_t tmp[128];
-
-  tmp[0] = MONT;
-  for(i=1;i<128;i++)
-    tmp[i] = fqmul(tmp[i-1],MONT*KYBER_ROOT_OF_UNITY % KYBER_Q);
-
-  for(i=0;i<128;i++) {
-    zetas[i] = tmp[tree[i]];
-    if(zetas[i] > KYBER_Q/2)
-      zetas[i] -= KYBER_Q;
-    if(zetas[i] < -KYBER_Q/2)
-      zetas[i] += KYBER_Q;
-  }
-}
-*/
-
-const int16_t zetas[128] = {
-  -1044,  -758,  -359, -1517,  1493,  1422,   287,   202,
-   -171,   622,  1577,   182,   962, -1202, -1474,  1468,
-    573, -1325,   264,   383,  -829,  1458, -1602,  -130,
-   -681,  1017,   732,   608, -1542,   411,  -205, -1571,
-   1223,   652,  -552,  1015, -1293,  1491,  -282, -1544,
-    516,    -8,  -320,  -666, -1618, -1162,   126,  1469,
-   -853,   -90,  -271,   830,   107, -1421,  -247,  -951,
-   -398,   961, -1508,  -725,   448, -1065,   677, -1275,
-  -1103,   430,   555,   843, -1251,   871,  1550,   105,
-    422,   587,   177,  -235,  -291,  -460,  1574,  1653,
-   -246,   778,  1159,  -147,  -777,  1483,  -602,  1119,
-  -1590,   644,  -872,   349,   418,   329,  -156,   -75,
-    817,  1097,   603,   610,  1322, -1285, -1465,   384,
-  -1215,  -136,  1218, -1335,  -874,   220, -1187, -1659,
-  -1185, -1530, -1278,   794, -1510,  -854,  -870,   478,
-   -108,  -308,   996,   991,   958, -1460,  1522,  1628
-};
-
-/*************************************************
-* Name:        fqmul
-*
-* Description: Multiplication followed by Montgomery reduction
-*
-* Arguments:   - int16_t a: first factor
-*              - int16_t b: second factor
-*
-* Returns 16-bit integer congruent to a*b*R^{-1} mod q
-**************************************************/
-static int16_t fqmul(int16_t a, int16_t b) {
-  return montgomery_reduce((int32_t)a*b);
-}
-
-/*************************************************
-* Name:        ntt
-*
-* Description: Inplace number-theoretic transform (NTT) in Rq.
-*              input is in standard order, output is in bitreversed order
-*
-* Arguments:   - int16_t r[256]: pointer to input/output vector of elements of Zq
-**************************************************/
-void ntt(int16_t r[256]) {
-  unsigned int len, start, j, k;
-  int16_t t, zeta;
-
-  k = 1;
-  for(len = 128; len >= 2; len >>= 1) {
-    for(start = 0; start < 256; start = j + len) {
-      zeta = zetas[k++];
-      for(j = start; j < start + len; j++) {
-        t = fqmul(zeta, r[j + len]);
-        r[j + len] = r[j] - t;
-        r[j] = r[j] + t;
-      }
-    }
-  }
-}
-
-/*************************************************
-* Name:        invntt_tomont
-*
-* Description: Inplace inverse number-theoretic transform in Rq and
-*              multiplication by Montgomery factor 2^16.
-*              Input is in bitreversed order, output is in standard order
-*
-* Arguments:   - int16_t r[256]: pointer to input/output vector of elements of Zq
-**************************************************/
-void invntt(int16_t r[256]) {
-  unsigned int start, len, j, k;
-  int16_t t, zeta;
-  const int16_t f = 1441; // mont^2/128
-
-  k = 127;
-  for(len = 2; len <= 128; len <<= 1) {
-    for(start = 0; start < 256; start = j + len) {
-      zeta = zetas[k--];
-      for(j = start; j < start + len; j++) {
-        t = r[j];
-        r[j] = barrett_reduce(t + r[j + len]);
-        r[j + len] = r[j + len] - t;
-        r[j + len] = fqmul(zeta, r[j + len]);
-      }
-    }
-  }
-
-  for(j = 0; j < 256; j++)
-    r[j] = fqmul(r[j], f);
-}
-
-/*************************************************
-* Name:        basemul
-*
-* Description: Multiplication of polynomials in Zq[X]/(X^2-zeta)
-*              used for multiplication of elements in Rq in NTT domain
-*
-* Arguments:   - int16_t r[2]: pointer to the output polynomial
-*              - const int16_t a[2]: pointer to the first factor
-*              - const int16_t b[2]: pointer to the second factor
-*              - int16_t zeta: integer defining the reduction polynomial
-**************************************************/
-void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta)
-{
-  r[0]  = fqmul(a[1], b[1]);
-  r[0]  = fqmul(r[0], zeta);
-  r[0] += fqmul(a[0], b[0]);
-  r[1]  = fqmul(a[0], b[1]);
-  r[1] += fqmul(a[1], b[0]);
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.h
deleted file mode 100644
index 04636ad0c58..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/ntt.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef ML_KEM_NTT_H
-#define ML_KEM_NTT_H
-
-#include <stdint.h>
-#include "params.h"
-
-#define zetas KYBER_NAMESPACE(zetas)
-extern const int16_t zetas[128];
-
-#define ntt KYBER_NAMESPACE(ntt)
-void ntt(int16_t poly[256]);
-
-#define invntt KYBER_NAMESPACE(invntt)
-void invntt(int16_t poly[256]);
-
-#define basemul KYBER_NAMESPACE(basemul)
-void basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/params.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/params.c
deleted file mode 100644
index bc685b1bbdb..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/params.c
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <openssl/base.h>
-
-#include "params.h"
-
-static void ml_kem_params_init(ml_kem_params *params, size_t k) {
-  assert((k == 2) || (k == 3) || (k == 4));
-
-  size_t eta1 = (k == 2) ? 3 : 2;
-  size_t poly_compressed_bytes = (k == 4) ? 160 : 128;
-  size_t poly_vec_bytes = k * KYBER_POLYBYTES;
-  size_t poly_vec_compressed_bytes = (k == 4) ? (352 * k) : (320 * k);
-  size_t indcpa_public_key_bytes = poly_vec_bytes + KYBER_SYMBYTES;
-  size_t indcpa_secret_key_bytes = poly_vec_bytes;
-  size_t indcpa_bytes = poly_vec_compressed_bytes + poly_compressed_bytes;
-  size_t public_key_bytes = indcpa_public_key_bytes;
-  size_t secret_key_bytes = indcpa_secret_key_bytes + indcpa_public_key_bytes + 2*KYBER_SYMBYTES;
-  size_t ciphertext_bytes = indcpa_bytes;
-
-  params->k = k;
-  params->eta1 = eta1;
-  params->poly_compressed_bytes = poly_compressed_bytes;
-  params->poly_vec_bytes = poly_vec_bytes;
-  params->poly_vec_compressed_bytes = poly_vec_compressed_bytes;
-  params->indcpa_public_key_bytes = indcpa_public_key_bytes;
-  params->indcpa_secret_key_bytes = indcpa_secret_key_bytes;
-  params->indcpa_bytes = indcpa_bytes;
-  params->public_key_bytes = public_key_bytes;
-  params->secret_key_bytes = secret_key_bytes;
-  params->ciphertext_bytes = ciphertext_bytes;
-}
-
-void ml_kem_512_params_init(ml_kem_params *params) {
-  ml_kem_params_init(params, 2);
-}
-void ml_kem_768_params_init(ml_kem_params *params) {
-  ml_kem_params_init(params, 3);
-}
-void ml_kem_1024_params_init(ml_kem_params *params) {
-  ml_kem_params_init(params, 4);
-}
-
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/params.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/params.h
deleted file mode 100644
index fd796614f48..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/params.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef ML_KEM_PARAMS_H
-#define ML_KEM_PARAMS_H
-
-#include <openssl/base.h>
-
-// The only defined parameters are those that don't depend
-// on the parameter set. All other parameters are specified
-// in ml_kem_params structure that is unique for each parameter
-// set (ML-KEM 512/768/1024).
-#define KYBER_N 256
-#define KYBER_Q 3329
-
-#define KYBER_SYMBYTES 32   /* size in bytes of hashes, and seeds */
-#define KYBER_SSBYTES  32   /* size in bytes of shared key */
-
-#define KYBER_POLYBYTES		384
-
-#define KYBER_ETA2 2
-
-#define KYBER_INDCPA_MSGBYTES       (KYBER_SYMBYTES)
-
-// Structure for ML-KEM parameters that depend on the parameter set.
-typedef struct {
-  size_t k;
-  size_t eta1;
-  size_t poly_compressed_bytes;
-  size_t poly_vec_bytes;
-  size_t poly_vec_compressed_bytes;
-  size_t indcpa_public_key_bytes;
-  size_t indcpa_secret_key_bytes;
-  size_t indcpa_bytes;
-  size_t public_key_bytes;
-  size_t secret_key_bytes;
-  size_t ciphertext_bytes;
-} ml_kem_params;
-
-// We define max values for some parameters because they are used
-// for static allocation.
-#define KYBER_K_MAX (4)
-#define KYBER_ETA1_MAX (3)
-#define KYBER_POLYCOMPRESSEDBYTES_MAX    (160)
-#define KYBER_POLYVECCOMPRESSEDBYTES_MAX (4 * 352)
-
-#define KYBER_INDCPA_BYTES_MAX    (KYBER_POLYVECCOMPRESSEDBYTES_MAX + KYBER_POLYCOMPRESSEDBYTES_MAX)
-#define KYBER_CIPHERTEXTBYTES_MAX (KYBER_INDCPA_BYTES_MAX)
-
-#define KYBER_NAMESPACE(s) ml_kem_##s##_ref
-
-void ml_kem_512_params_init(ml_kem_params *params);
-void ml_kem_768_params_init(ml_kem_params *params);
-void ml_kem_1024_params_init(ml_kem_params *params);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/poly.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/poly.c
deleted file mode 100644
index 58f92aff611..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/poly.c
+++ /dev/null
@@ -1,371 +0,0 @@
-#include <stdint.h>
-
-#include <openssl/type_check.h>
-
-#include "params.h"
-#include "poly.h"
-#include "ntt.h"
-#include "reduce.h"
-#include "cbd.h"
-#include "symmetric.h"
-
-#include "../../../internal.h"
-
-/*************************************************
-* Name:        poly_compress
-*
-* Description: Compression and subsequent serialization of a polynomial
-*
-* Arguments:   - uint8_t *r: pointer to output byte array
-*                            (of length KYBER_POLYCOMPRESSEDBYTES)
-*              - const poly *a: pointer to input polynomial
-**************************************************/
-void poly_compress(ml_kem_params *params, uint8_t *r, const poly *a)
-{
-  unsigned int i,j;
-  int32_t u;
-  uint32_t d0;
-  uint8_t t[8];
-
-  assert((params->poly_compressed_bytes == 128) ||
-         (params->poly_compressed_bytes == 160));
-
-  if (params->poly_compressed_bytes == 128) {
-    for(i=0;i<KYBER_N/8;i++) {
-      for(j=0;j<8;j++) {
-        // map to positive standard representatives
-        u  = a->coeffs[8*i+j];
-        u += (u >> 15) & KYBER_Q;
-        // t[j] = ((((uint16_t)u << 4) + KYBER_Q/2)/KYBER_Q) & 15;
-        d0 = u << 4;
-        d0 += 1665;
-        d0 *= 80635;
-        d0 >>= 28;
-        t[j] = d0 & 0xf;
-      }
-  
-      r[0] = t[0] | (t[1] << 4);
-      r[1] = t[2] | (t[3] << 4);
-      r[2] = t[4] | (t[5] << 4);
-      r[3] = t[6] | (t[7] << 4);
-      r += 4;
-    }
-  } else {
-    for(i=0;i<KYBER_N/8;i++) {
-      for(j=0;j<8;j++) {
-        // map to positive standard representatives
-        u  = a->coeffs[8*i+j];
-        u += (u >> 15) & KYBER_Q;
-        // t[j] = ((((uint32_t)u << 5) + KYBER_Q/2)/KYBER_Q) & 31;
-        d0 = u << 5;
-        d0 += 1664;
-        d0 *= 40318;
-        d0 >>= 27;
-        t[j] = d0 & 0x1f;
-      }
-  
-      r[0] = (t[0] >> 0) | (t[1] << 5);
-      r[1] = (t[1] >> 3) | (t[2] << 2) | (t[3] << 7);
-      r[2] = (t[3] >> 1) | (t[4] << 4);
-      r[3] = (t[4] >> 4) | (t[5] << 1) | (t[6] << 6);
-      r[4] = (t[6] >> 2) | (t[7] << 3);
-      r += 5;
-    }
-  }
-}
-
-/*************************************************
-* Name:        poly_decompress
-*
-* Description: De-serialization and subsequent decompression of a polynomial;
-*              approximate inverse of poly_compress
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const uint8_t *a: pointer to input byte array
-*                                  (of length KYBER_POLYCOMPRESSEDBYTES bytes)
-**************************************************/
-void poly_decompress(ml_kem_params *params, poly *r, const uint8_t *a)
-{
-  unsigned int i;
-
-  assert((params->poly_compressed_bytes == 128) ||
-         (params->poly_compressed_bytes == 160));
-
-  if (params->poly_compressed_bytes == 128) {
-    for(i=0;i<KYBER_N/2;i++) {
-      r->coeffs[2*i+0] = (((uint16_t)(a[0] & 15)*KYBER_Q) + 8) >> 4;
-      r->coeffs[2*i+1] = (((uint16_t)(a[0] >> 4)*KYBER_Q) + 8) >> 4;
-      a += 1;
-    }
-  } else {
-    unsigned int j;
-    uint8_t t[8];
-    for(i=0;i<KYBER_N/8;i++) {
-      t[0] = (a[0] >> 0);
-      t[1] = (a[0] >> 5) | (a[1] << 3);
-      t[2] = (a[1] >> 2);
-      t[3] = (a[1] >> 7) | (a[2] << 1);
-      t[4] = (a[2] >> 4) | (a[3] << 4);
-      t[5] = (a[3] >> 1);
-      t[6] = (a[3] >> 6) | (a[4] << 2);
-      t[7] = (a[4] >> 3);
-      a += 5;
-
-      for(j=0;j<8;j++)
-        r->coeffs[8*i+j] = ((uint32_t)(t[j] & 31)*KYBER_Q + 16) >> 5;
-    }
-  }
-}
-
-/*************************************************
-* Name:        poly_tobytes
-*
-* Description: Serialization of a polynomial
-*
-* Arguments:   - uint8_t *r: pointer to output byte array
-*                            (needs space for KYBER_POLYBYTES bytes)
-*              - const poly *a: pointer to input polynomial
-**************************************************/
-void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a)
-{
-  unsigned int i;
-  uint16_t t0, t1;
-
-  for(i=0;i<KYBER_N/2;i++) {
-    // map to positive standard representatives
-    t0  = a->coeffs[2*i];
-    t0 += ((int16_t)t0 >> 15) & KYBER_Q;
-    t1 = a->coeffs[2*i+1];
-    t1 += ((int16_t)t1 >> 15) & KYBER_Q;
-    r[3*i+0] = (t0 >> 0);
-    r[3*i+1] = (t0 >> 8) | (t1 << 4);
-    r[3*i+2] = (t1 >> 4);
-  }
-}
-
-/*************************************************
-* Name:        poly_frombytes
-*
-* Description: De-serialization of a polynomial;
-*              inverse of poly_tobytes
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const uint8_t *a: pointer to input byte array
-*                                  (of KYBER_POLYBYTES bytes)
-**************************************************/
-void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES])
-{
-  unsigned int i;
-  for(i=0;i<KYBER_N/2;i++) {
-    r->coeffs[2*i]   = ((a[3*i+0] >> 0) | ((uint16_t)a[3*i+1] << 8)) & 0xFFF;
-    r->coeffs[2*i+1] = ((a[3*i+1] >> 4) | ((uint16_t)a[3*i+2] << 4)) & 0xFFF;
-  }
-}
-
-/*************************************************
-* Name:        poly_frommsg
-*
-* Description: Convert 32-byte message to polynomial
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const uint8_t *msg: pointer to input message
-**************************************************/
-void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES])
-{
-  unsigned int i,j;
-  crypto_word_t mask;
-
-#if (KYBER_INDCPA_MSGBYTES != KYBER_N/8)
-#error "KYBER_INDCPA_MSGBYTES must be equal to KYBER_N/8 bytes!"
-#endif
-
-  for(i=0;i<KYBER_N/8;i++) {
-    for(j=0;j<8;j++) {
-      mask = constant_time_is_zero_w((msg[i] >> j) & 1);
-      // We cast the result of constant_time_select_w, which is a crypto_word_t,
-      // to int16_t. The constants must be within the range of int16_t.
-      OPENSSL_STATIC_ASSERT(((KYBER_Q+1)/2) <= INT16_MAX,
-                            value_exceeds_int16_max);
-      r->coeffs[8*i+j] = (int16_t) constant_time_select_w(mask,
-                                                          0, ((KYBER_Q+1)/2));
-    }
-  }
-}
-
-/*************************************************
-* Name:        poly_tomsg
-*
-* Description: Convert polynomial to 32-byte message
-*
-* Arguments:   - uint8_t *msg: pointer to output message
-*              - const poly *a: pointer to input polynomial
-**************************************************/
-void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *a)
-{
-  unsigned int i,j;
-  uint32_t t;
-
-  for(i=0;i<KYBER_N/8;i++) {
-    msg[i] = 0;
-    for(j=0;j<8;j++) {
-      t  = a->coeffs[8*i+j];
-      // t += ((int16_t)t >> 15) & KYBER_Q;
-      // t  = (((t << 1) + KYBER_Q/2)/KYBER_Q) & 1;
-      t <<= 1;
-      t += 1665;
-      t *= 80635;
-      t >>= 28;
-      t &= 1;
-      msg[i] |= t << j;
-    }
-  }
-}
-
-/*************************************************
-* Name:        poly_getnoise_eta1
-*
-* Description: Sample a polynomial deterministically from a seed and a nonce,
-*              with output polynomial close to centered binomial distribution
-*              with parameter KYBER_ETA1
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const uint8_t *seed: pointer to input seed
-*                                     (of length KYBER_SYMBYTES bytes)
-*              - uint8_t nonce: one-byte input nonce
-**************************************************/
-void poly_getnoise_eta1(ml_kem_params *params, poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
-{
-  uint8_t buf[KYBER_ETA1_MAX*KYBER_N/4];
-  prf(buf, sizeof(buf), seed, nonce);
-  poly_cbd_eta1(params, r, buf);
-}
-
-/*************************************************
-* Name:        poly_getnoise_eta2
-*
-* Description: Sample a polynomial deterministically from a seed and a nonce,
-*              with output polynomial close to centered binomial distribution
-*              with parameter KYBER_ETA2
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const uint8_t *seed: pointer to input seed
-*                                     (of length KYBER_SYMBYTES bytes)
-*              - uint8_t nonce: one-byte input nonce
-**************************************************/
-void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce)
-{
-  uint8_t buf[KYBER_ETA2*KYBER_N/4];
-  prf(buf, sizeof(buf), seed, nonce);
-  poly_cbd_eta2(r, buf);
-}
-
-
-/*************************************************
-* Name:        poly_ntt
-*
-* Description: Computes negacyclic number-theoretic transform (NTT) of
-*              a polynomial in place;
-*              inputs assumed to be in normal order, output in bitreversed order
-*
-* Arguments:   - uint16_t *r: pointer to in/output polynomial
-**************************************************/
-void poly_ntt(poly *r)
-{
-  ntt(r->coeffs);
-  poly_reduce(r);
-}
-
-/*************************************************
-* Name:        poly_invntt_tomont
-*
-* Description: Computes inverse of negacyclic number-theoretic transform (NTT)
-*              of a polynomial in place;
-*              inputs assumed to be in bitreversed order, output in normal order
-*
-* Arguments:   - uint16_t *a: pointer to in/output polynomial
-**************************************************/
-void poly_invntt_tomont(poly *r)
-{
-  invntt(r->coeffs);
-}
-
-/*************************************************
-* Name:        poly_basemul_montgomery
-*
-* Description: Multiplication of two polynomials in NTT domain
-*
-* Arguments:   - poly *r: pointer to output polynomial
-*              - const poly *a: pointer to first input polynomial
-*              - const poly *b: pointer to second input polynomial
-**************************************************/
-void poly_basemul_montgomery(poly *r, const poly *a, const poly *b)
-{
-  unsigned int i;
-  for(i=0;i<KYBER_N/4;i++) {
-    basemul(&r->coeffs[4*i], &a->coeffs[4*i], &b->coeffs[4*i], zetas[64+i]);
-    basemul(&r->coeffs[4*i+2], &a->coeffs[4*i+2], &b->coeffs[4*i+2], -zetas[64+i]);
-  }
-}
-
-/*************************************************
-* Name:        poly_tomont
-*
-* Description: Inplace conversion of all coefficients of a polynomial
-*              from normal domain to Montgomery domain
-*
-* Arguments:   - poly *r: pointer to input/output polynomial
-**************************************************/
-void poly_tomont(poly *r)
-{
-  unsigned int i;
-  const int16_t f = (1ULL << 32) % KYBER_Q;
-  for(i=0;i<KYBER_N;i++)
-    r->coeffs[i] = montgomery_reduce((int32_t)r->coeffs[i]*f);
-}
-
-/*************************************************
-* Name:        poly_reduce
-*
-* Description: Applies Barrett reduction to all coefficients of a polynomial
-*              for details of the Barrett reduction see comments in reduce.c
-*
-* Arguments:   - poly *r: pointer to input/output polynomial
-**************************************************/
-void poly_reduce(poly *r)
-{
-  unsigned int i;
-  for(i=0;i<KYBER_N;i++)
-    r->coeffs[i] = barrett_reduce(r->coeffs[i]);
-}
-
-/*************************************************
-* Name:        poly_add
-*
-* Description: Add two polynomials; no modular reduction is performed
-*
-* Arguments: - poly *r: pointer to output polynomial
-*            - const poly *a: pointer to first input polynomial
-*            - const poly *b: pointer to second input polynomial
-**************************************************/
-void poly_add(poly *r, const poly *a, const poly *b)
-{
-  unsigned int i;
-  for(i=0;i<KYBER_N;i++)
-    r->coeffs[i] = a->coeffs[i] + b->coeffs[i];
-}
-
-/*************************************************
-* Name:        poly_sub
-*
-* Description: Subtract two polynomials; no modular reduction is performed
-*
-* Arguments: - poly *r:       pointer to output polynomial
-*            - const poly *a: pointer to first input polynomial
-*            - const poly *b: pointer to second input polynomial
-**************************************************/
-void poly_sub(poly *r, const poly *a, const poly *b)
-{
-  unsigned int i;
-  for(i=0;i<KYBER_N;i++)
-    r->coeffs[i] = a->coeffs[i] - b->coeffs[i];
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/poly.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/poly.h
deleted file mode 100644
index df9b1fab140..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/poly.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef ML_KEM_POLY_H
-#define ML_KEM_POLY_H
-
-#include <stdint.h>
-#include "params.h"
-
-/*
- * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
- * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
- */
-typedef struct{
-  int16_t coeffs[KYBER_N];
-} poly;
-
-#define poly_compress KYBER_NAMESPACE(poly_compress)
-void poly_compress(ml_kem_params *params, uint8_t *r, const poly *a);
-#define poly_decompress KYBER_NAMESPACE(poly_decompress)
-void poly_decompress(ml_kem_params *params, poly *r, const uint8_t *a);
-
-#define poly_tobytes KYBER_NAMESPACE(poly_tobytes)
-void poly_tobytes(uint8_t r[KYBER_POLYBYTES], const poly *a);
-#define poly_frombytes KYBER_NAMESPACE(poly_frombytes)
-void poly_frombytes(poly *r, const uint8_t a[KYBER_POLYBYTES]);
-
-#define poly_frommsg KYBER_NAMESPACE(poly_frommsg)
-void poly_frommsg(poly *r, const uint8_t msg[KYBER_INDCPA_MSGBYTES]);
-#define poly_tomsg KYBER_NAMESPACE(poly_tomsg)
-void poly_tomsg(uint8_t msg[KYBER_INDCPA_MSGBYTES], const poly *r);
-
-#define poly_getnoise_eta1 KYBER_NAMESPACE(poly_getnoise_eta1)
-void poly_getnoise_eta1(ml_kem_params *params, poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
-
-#define poly_getnoise_eta2 KYBER_NAMESPACE(poly_getnoise_eta2)
-void poly_getnoise_eta2(poly *r, const uint8_t seed[KYBER_SYMBYTES], uint8_t nonce);
-
-#define poly_ntt KYBER_NAMESPACE(poly_ntt)
-void poly_ntt(poly *r);
-#define poly_invntt_tomont KYBER_NAMESPACE(poly_invntt_tomont)
-void poly_invntt_tomont(poly *r);
-#define poly_basemul_montgomery KYBER_NAMESPACE(poly_basemul_montgomery)
-void poly_basemul_montgomery(poly *r, const poly *a, const poly *b);
-#define poly_tomont KYBER_NAMESPACE(poly_tomont)
-void poly_tomont(poly *r);
-
-#define poly_reduce KYBER_NAMESPACE(poly_reduce)
-void poly_reduce(poly *r);
-
-#define poly_add KYBER_NAMESPACE(poly_add)
-void poly_add(poly *r, const poly *a, const poly *b);
-#define poly_sub KYBER_NAMESPACE(poly_sub)
-void poly_sub(poly *r, const poly *a, const poly *b);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.c
deleted file mode 100644
index 973dc8cd8ba..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.c
+++ /dev/null
@@ -1,248 +0,0 @@
-#include <stdint.h>
-#include "params.h"
-#include "poly.h"
-#include "polyvec.h"
-
-/*************************************************
-* Name:        polyvec_compress
-*
-* Description: Compress and serialize vector of polynomials
-*
-* Arguments:   - uint8_t *r: pointer to output byte array
-*                            (needs space for KYBER_POLYVECCOMPRESSEDBYTES)
-*              - const polyvec *a: pointer to input vector of polynomials
-**************************************************/
-void polyvec_compress(ml_kem_params *params, uint8_t *r, const polyvec *a)
-{
-  unsigned int i,j,k;
-  uint64_t d0;
-
-  assert((params->poly_vec_compressed_bytes == params->k * 352) ||
-         (params->poly_vec_compressed_bytes == params->k * 320));
-
-  if (params->poly_vec_compressed_bytes == params->k * 352) {
-    uint16_t t[8];
-    for(i=0;i<params->k;i++) {
-      for(j=0;j<KYBER_N/8;j++) {
-        for(k=0;k<8;k++) {
-          t[k]  = a->vec[i].coeffs[8*j+k];
-          t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
-          // t[k]  = ((((uint32_t)t[k] << 11) + KYBER_Q/2)/KYBER_Q) & 0x7ff;
-          d0 = t[k];
-          d0 <<= 11;
-          d0 += 1664;
-          d0 *= 645084;
-          d0 >>= 31;
-          t[k] = d0 & 0x7ff;
-        }
-
-        r[ 0] = (t[0] >>  0);
-        r[ 1] = (t[0] >>  8) | (t[1] << 3);
-        r[ 2] = (t[1] >>  5) | (t[2] << 6);
-        r[ 3] = (t[2] >>  2);
-        r[ 4] = (t[2] >> 10) | (t[3] << 1);
-        r[ 5] = (t[3] >>  7) | (t[4] << 4);
-        r[ 6] = (t[4] >>  4) | (t[5] << 7);
-        r[ 7] = (t[5] >>  1);
-        r[ 8] = (t[5] >>  9) | (t[6] << 2);
-        r[ 9] = (t[6] >>  6) | (t[7] << 5);
-        r[10] = (t[7] >>  3);
-        r += 11;
-      }
-    }
-  } else {
-    uint16_t t[4];
-    for(i=0;i<params->k;i++) {
-      for(j=0;j<KYBER_N/4;j++) {
-        for(k=0;k<4;k++) {
-          t[k]  = a->vec[i].coeffs[4*j+k];
-          t[k] += ((int16_t)t[k] >> 15) & KYBER_Q;
-          // t[k]  = ((((uint32_t)t[k] << 10) + KYBER_Q/2)/ KYBER_Q) & 0x3ff;
-          d0 = t[k];
-          d0 <<= 10;
-          d0 += 1665;
-          d0 *= 1290167;
-          d0 >>= 32;
-          t[k] = d0 & 0x3ff;
-        }
-
-        r[0] = (t[0] >> 0);
-        r[1] = (t[0] >> 8) | (t[1] << 2);
-        r[2] = (t[1] >> 6) | (t[2] << 4);
-        r[3] = (t[2] >> 4) | (t[3] << 6);
-        r[4] = (t[3] >> 2);
-        r += 5;
-      }
-    }
-  }
-}
-
-/*************************************************
-* Name:        polyvec_decompress
-*
-* Description: De-serialize and decompress vector of polynomials;
-*              approximate inverse of polyvec_compress
-*
-* Arguments:   - polyvec *r:       pointer to output vector of polynomials
-*              - const uint8_t *a: pointer to input byte array
-*                                  (of length KYBER_POLYVECCOMPRESSEDBYTES)
-**************************************************/
-void polyvec_decompress(ml_kem_params *params, polyvec *r, const uint8_t *a)
-{
-  unsigned int i,j,k;
-
-  assert((params->poly_vec_compressed_bytes == params->k * 352) ||
-         (params->poly_vec_compressed_bytes == params->k * 320));
-
-  if (params->poly_vec_compressed_bytes == params->k * 352) {
-    uint16_t t[8];
-    for(i=0;i<params->k;i++) {
-      for(j=0;j<KYBER_N/8;j++) {
-        t[0] = (a[0] >> 0) | ((uint16_t)a[ 1] << 8);
-        t[1] = (a[1] >> 3) | ((uint16_t)a[ 2] << 5);
-        t[2] = (a[2] >> 6) | ((uint16_t)a[ 3] << 2) | ((uint16_t)a[4] << 10);
-        t[3] = (a[4] >> 1) | ((uint16_t)a[ 5] << 7);
-        t[4] = (a[5] >> 4) | ((uint16_t)a[ 6] << 4);
-        t[5] = (a[6] >> 7) | ((uint16_t)a[ 7] << 1) | ((uint16_t)a[8] << 9);
-        t[6] = (a[8] >> 2) | ((uint16_t)a[ 9] << 6);
-        t[7] = (a[9] >> 5) | ((uint16_t)a[10] << 3);
-        a += 11;
-  
-        for(k=0;k<8;k++)
-          r->vec[i].coeffs[8*j+k] = ((uint32_t)(t[k] & 0x7FF)*KYBER_Q + 1024) >> 11;
-      }
-    }
-  } else {
-    uint16_t t[4];
-    for(i=0;i<params->k;i++) {
-      for(j=0;j<KYBER_N/4;j++) {
-        t[0] = (a[0] >> 0) | ((uint16_t)a[1] << 8);
-        t[1] = (a[1] >> 2) | ((uint16_t)a[2] << 6);
-        t[2] = (a[2] >> 4) | ((uint16_t)a[3] << 4);
-        t[3] = (a[3] >> 6) | ((uint16_t)a[4] << 2);
-        a += 5;
-  
-        for(k=0;k<4;k++)
-          r->vec[i].coeffs[4*j+k] = ((uint32_t)(t[k] & 0x3FF)*KYBER_Q + 512) >> 10;
-      }
-    }
-  }
-}
-
-/*************************************************
-* Name:        polyvec_tobytes
-*
-* Description: Serialize vector of polynomials
-*
-* Arguments:   - uint8_t *r: pointer to output byte array
-*                            (needs space for KYBER_POLYVECBYTES)
-*              - const polyvec *a: pointer to input vector of polynomials
-**************************************************/
-void polyvec_tobytes(ml_kem_params *params, uint8_t *r, const polyvec *a)
-{
-  unsigned int i;
-  for(i=0;i<params->k;i++)
-    poly_tobytes(r+i*KYBER_POLYBYTES, &a->vec[i]);
-}
-
-/*************************************************
-* Name:        polyvec_frombytes
-*
-* Description: De-serialize vector of polynomials;
-*              inverse of polyvec_tobytes
-*
-* Arguments:   - uint8_t *r:       pointer to output byte array
-*              - const polyvec *a: pointer to input vector of polynomials
-*                                  (of length KYBER_POLYVECBYTES)
-**************************************************/
-void polyvec_frombytes(ml_kem_params *params, polyvec *r, const uint8_t *a)
-{
-  unsigned int i;
-  for(i=0;i<params->k;i++)
-    poly_frombytes(&r->vec[i], a+i*KYBER_POLYBYTES);
-}
-
-/*************************************************
-* Name:        polyvec_ntt
-*
-* Description: Apply forward NTT to all elements of a vector of polynomials
-*
-* Arguments:   - polyvec *r: pointer to in/output vector of polynomials
-**************************************************/
-void polyvec_ntt(ml_kem_params *params, polyvec *r)
-{
-  unsigned int i;
-  for(i=0;i<params->k;i++)
-    poly_ntt(&r->vec[i]);
-}
-
-/*************************************************
-* Name:        polyvec_invntt_tomont
-*
-* Description: Apply inverse NTT to all elements of a vector of polynomials
-*              and multiply by Montgomery factor 2^16
-*
-* Arguments:   - polyvec *r: pointer to in/output vector of polynomials
-**************************************************/
-void polyvec_invntt_tomont(ml_kem_params *params, polyvec *r)
-{
-  unsigned int i;
-  for(i=0;i<params->k;i++)
-    poly_invntt_tomont(&r->vec[i]);
-}
-
-/*************************************************
-* Name:        polyvec_basemul_acc_montgomery
-*
-* Description: Multiply elements of a and b in NTT domain, accumulate into r,
-*              and multiply by 2^-16.
-*
-* Arguments: - poly *r: pointer to output polynomial
-*            - const polyvec *a: pointer to first input vector of polynomials
-*            - const polyvec *b: pointer to second input vector of polynomials
-**************************************************/
-void polyvec_basemul_acc_montgomery(ml_kem_params *params, poly *r, const polyvec *a, const polyvec *b)
-{
-  unsigned int i;
-  poly t;
-
-  poly_basemul_montgomery(r, &a->vec[0], &b->vec[0]);
-  for(i=1;i<params->k;i++) {
-    poly_basemul_montgomery(&t, &a->vec[i], &b->vec[i]);
-    poly_add(r, r, &t);
-  }
-
-  poly_reduce(r);
-}
-
-/*************************************************
-* Name:        polyvec_reduce
-*
-* Description: Applies Barrett reduction to each coefficient
-*              of each element of a vector of polynomials;
-*              for details of the Barrett reduction see comments in reduce.c
-*
-* Arguments:   - polyvec *r: pointer to input/output polynomial
-**************************************************/
-void polyvec_reduce(ml_kem_params *params, polyvec *r)
-{
-  unsigned int i;
-  for(i=0;i<params->k;i++)
-    poly_reduce(&r->vec[i]);
-}
-
-/*************************************************
-* Name:        polyvec_add
-*
-* Description: Add vectors of polynomials
-*
-* Arguments: - polyvec *r: pointer to output vector of polynomials
-*            - const polyvec *a: pointer to first input vector of polynomials
-*            - const polyvec *b: pointer to second input vector of polynomials
-**************************************************/
-void polyvec_add(ml_kem_params *params, polyvec *r, const polyvec *a, const polyvec *b)
-{
-  unsigned int i;
-  for(i=0;i<params->k;i++)
-    poly_add(&r->vec[i], &a->vec[i], &b->vec[i]);
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.h
deleted file mode 100644
index a7b57cafc62..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/polyvec.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef ML_KEM_POLYVEC_H
-#define ML_KEM_POLYVEC_H
-
-#include <stdint.h>
-#include "params.h"
-#include "poly.h"
-
-typedef struct{
-  poly vec[KYBER_K_MAX];
-} polyvec;
-
-#define polyvec_compress KYBER_NAMESPACE(polyvec_compress)
-void polyvec_compress(ml_kem_params *params, uint8_t *r, const polyvec *a);
-#define polyvec_decompress KYBER_NAMESPACE(polyvec_decompress)
-void polyvec_decompress(ml_kem_params *params, polyvec *r, const uint8_t *a);
-
-#define polyvec_tobytes KYBER_NAMESPACE(polyvec_tobytes)
-void polyvec_tobytes(ml_kem_params *params, uint8_t *r, const polyvec *a);
-#define polyvec_frombytes KYBER_NAMESPACE(polyvec_frombytes)
-void polyvec_frombytes(ml_kem_params *params, polyvec *r, const uint8_t *a);
-
-#define polyvec_ntt KYBER_NAMESPACE(polyvec_ntt)
-void polyvec_ntt(ml_kem_params *params, polyvec *r);
-#define polyvec_invntt_tomont KYBER_NAMESPACE(polyvec_invntt_tomont)
-void polyvec_invntt_tomont(ml_kem_params *params, polyvec *r);
-
-#define polyvec_basemul_acc_montgomery KYBER_NAMESPACE(polyvec_basemul_acc_montgomery)
-void polyvec_basemul_acc_montgomery(ml_kem_params *params, poly *r, const polyvec *a, const polyvec *b);
-
-#define polyvec_reduce KYBER_NAMESPACE(polyvec_reduce)
-void polyvec_reduce(ml_kem_params *params, polyvec *r);
-
-#define polyvec_add KYBER_NAMESPACE(polyvec_add)
-void polyvec_add(ml_kem_params *params, polyvec *r, const polyvec *a, const polyvec *b);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.c
deleted file mode 100644
index 9d8e7edf832..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.c
+++ /dev/null
@@ -1,42 +0,0 @@
-#include <stdint.h>
-#include "params.h"
-#include "reduce.h"
-
-/*************************************************
-* Name:        montgomery_reduce
-*
-* Description: Montgomery reduction; given a 32-bit integer a, computes
-*              16-bit integer congruent to a * R^-1 mod q, where R=2^16
-*
-* Arguments:   - int32_t a: input integer to be reduced;
-*                           has to be in {-q2^15,...,q2^15-1}
-*
-* Returns:     integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q.
-**************************************************/
-int16_t montgomery_reduce(int32_t a)
-{
-  int16_t t;
-
-  t = (int16_t)a*QINV;
-  t = (a - (int32_t)t*KYBER_Q) >> 16;
-  return t;
-}
-
-/*************************************************
-* Name:        barrett_reduce
-*
-* Description: Barrett reduction; given a 16-bit integer a, computes
-*              centered representative congruent to a mod q in {-(q-1)/2,...,(q-1)/2}
-*
-* Arguments:   - int16_t a: input integer to be reduced
-*
-* Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
-**************************************************/
-int16_t barrett_reduce(int16_t a) {
-  int16_t t;
-  const int16_t v = ((1<<26) + KYBER_Q/2)/KYBER_Q;
-
-  t  = ((int32_t)v*a + (1<<25)) >> 26;
-  t *= KYBER_Q;
-  return a - t;
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.h
deleted file mode 100644
index d4b6603ed6a..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/reduce.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef ML_KEM_REDUCE_H
-#define ML_KEM_REDUCE_H
-
-#include <stdint.h>
-#include "params.h"
-
-#define MONT -1044 // 2^16 mod q
-#define QINV -3327 // q^-1 mod 2^16
-
-#define montgomery_reduce KYBER_NAMESPACE(montgomery_reduce)
-int16_t montgomery_reduce(int32_t a);
-
-#define barrett_reduce KYBER_NAMESPACE(barrett_reduce)
-int16_t barrett_reduce(int16_t a);
-
-#endif
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric-shake.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric-shake.c
deleted file mode 100644
index 72bf98c2f2c..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric-shake.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <stddef.h>
-#include <stdint.h>
-#include <string.h>
-#include "params.h"
-#include "symmetric.h"
-
-/*************************************************
-* Name:        kyber_shake128_absorb
-*
-* Description: Absorb step of the SHAKE128 specialized for the Kyber context.
-*
-* Arguments:   - KECCAK1600_CTX *ctx: pointer to (uninitialized) output Keccak state
-*              - const uint8_t *seed: pointer to KYBER_SYMBYTES input to be absorbed into state
-*              - uint8_t i: additional byte of input
-*              - uint8_t j: additional byte of input
-**************************************************/
-void kyber_shake128_absorb(KECCAK1600_CTX *ctx,
-                           const uint8_t seed[KYBER_SYMBYTES],
-                           uint8_t x,
-                           uint8_t y)
-{
-  uint8_t extseed[KYBER_SYMBYTES+2];
-
-  memcpy(extseed, seed, KYBER_SYMBYTES);
-  extseed[KYBER_SYMBYTES+0] = x;
-  extseed[KYBER_SYMBYTES+1] = y;
-
-  // Return code checks can be omitted
-  // SHAKE_Init always returns 1 when called with correct block size value
-  SHAKE_Init(ctx, SHAKE128_BLOCKSIZE);
-
-  // SHAKE_Absorb always returns 1 on first call of sizeof(extseed) (34 bytes)
-  SHAKE_Absorb(ctx, extseed, sizeof(extseed));
-}
-
-/*************************************************
-* Name:        kyber_shake128_squeeze
-*
-* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of
-*              SHAKE128_BLOCKSIZE bytes each. Can be called multiple times
-*              to keep squeezing. Assumes new block has not yet been
-*              started.
-*
-* Arguments:   - uint8_t *out: pointer to output blocks
-*              - size_t nblocks: number of blocks to be squeezed (written to output)
-*              - KECCAK1600_CTX *ctx: pointer to input/output Keccak state
-**************************************************/
-void kyber_shake128_squeeze(KECCAK1600_CTX *ctx, uint8_t *out, int nblocks)
-{
-  // Return code checks can be omitted
-  // SHAKE_Squeeze always returns 1 when |ctx->state| flag is different 
-  // from |KECCAK1600_STATE_FINAL|
-  SHAKE_Squeeze(out, ctx, nblocks * SHAKE128_BLOCKSIZE);
-}
-
-/*************************************************
-* Name:        kyber_shake256_prf
-*
-* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
-*              and then generates outlen bytes of SHAKE256 output
-*
-* Arguments:   - uint8_t *out: pointer to output
-*              - size_t outlen: number of requested output bytes
-*              - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
-*              - uint8_t nonce: single-byte nonce (public PRF input)
-**************************************************/
-void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce)
-{
-  uint8_t extkey[KYBER_SYMBYTES+1];
-
-  memcpy(extkey, key, KYBER_SYMBYTES);
-  extkey[KYBER_SYMBYTES] = nonce;
-
-  // Return code checks can be omitted
-  // SHAKE256 never returns NULL when the internal SHAKE_Init is called with correct block size value
-  SHAKE256(extkey, sizeof(extkey), out, outlen);
-}
-
-/*************************************************
-* Name:        kyber_shake256_prf
-*
-* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input
-*              and then generates outlen bytes of SHAKE256 output
-*
-* Arguments:   - uint8_t *out: pointer to output
-*              - size_t outlen: number of requested output bytes
-*              - const uint8_t *key: pointer to the key (of length KYBER_SYMBYTES)
-*              - uint8_t nonce: single-byte nonce (public PRF input)
-**************************************************/
-void kyber_shake256_rkprf(ml_kem_params *params, uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t *input)
-{
-  KECCAK1600_CTX ctx;
-
-  // Return code checks can be omitted
-  // SHAKE_Init always returns 1 when called with correct block size value
-  SHAKE_Init(&ctx, SHAKE256_BLOCKSIZE);
-
-  // SHAKE_Absorb always returns 1 on first call of KYBER_SYMBYTES (32 bytes)
-  SHAKE_Absorb(&ctx, key, KYBER_SYMBYTES);
-
-  // SHAKE_Absorb always returns 1 processing all data blocks that don't need pad
-  SHAKE_Absorb(&ctx, input, params->ciphertext_bytes);
-
-  // SHAKE_Final always returns 1 when |ctx->state| flag is set to  
-  // |KECCAK1600_STATE_ABSORB| (no previous calls to SHAKE_Final)
-  SHAKE_Final(out, &ctx, KYBER_SSBYTES);
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric.h
deleted file mode 100644
index 93d8b63a6fb..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/symmetric.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef ML_KEM_SYMMETRIC_H
-#define ML_KEM_SYMMETRIC_H
-
-#include <stddef.h>
-#include <stdint.h>
-#include "params.h"
-
-#include "../../sha/internal.h"
-
-#define kyber_shake128_absorb KYBER_NAMESPACE(kyber_shake128_absorb)
-void kyber_shake128_absorb(KECCAK1600_CTX *ctx,
-                           const uint8_t seed[KYBER_SYMBYTES],
-                           uint8_t x,
-                           uint8_t y);
-
-#define kyber_shake256_prf KYBER_NAMESPACE(kyber_shake256_prf)
-void kyber_shake256_prf(uint8_t *out, size_t outlen, const uint8_t key[KYBER_SYMBYTES], uint8_t nonce);
-
-#define kyber_shake256_rkprf KYBER_NAMESPACE(kyber_shake256_rkprf)
-void kyber_shake256_rkprf(ml_kem_params *params, uint8_t out[KYBER_SSBYTES], const uint8_t key[KYBER_SYMBYTES], const uint8_t *input);
-
-#define kyber_shake128_squeeze KYBER_NAMESPACE(kyber_shake128_squeeze)
-void kyber_shake128_squeeze(KECCAK1600_CTX *ctx, uint8_t *out, int nblocks);
-
-#define hash_h(OUT, IN, INBYTES) SHA3_256(IN, INBYTES, OUT)
-#define hash_g(OUT, IN, INBYTES) SHA3_512(IN, INBYTES, OUT)
-#define xof_absorb(STATE, SEED, X, Y) kyber_shake128_absorb(STATE, SEED, X, Y)
-#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) kyber_shake128_squeeze(STATE, OUT, OUTBLOCKS)
-#define prf(OUT, OUTBYTES, KEY, NONCE) kyber_shake256_prf(OUT, OUTBYTES, KEY, NONCE)
-#define rkprf(PARAMS, OUT, KEY, INPUT) kyber_shake256_rkprf(PARAMS, OUT, KEY, INPUT)
-
-#endif /* SYMMETRIC_H */
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/verify.c b/crypto/fipsmodule/ml_kem/ml_kem_ref/verify.c
deleted file mode 100644
index 799ba586f47..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/verify.c
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <stddef.h>
-#include <stdint.h>
-#include "verify.h"
-
-#include "../../../internal.h"
-
-/*************************************************
-* Name:        verify
-*
-* Description: Compare two arrays for equality in constant time.
-*
-* Arguments:   const uint8_t *a: pointer to first byte array
-*              const uint8_t *b: pointer to second byte array
-*              size_t len:       length of the byte arrays
-*
-* Returns 0 if the byte arrays are equal, 1 otherwise
-**************************************************/
-int verify(const uint8_t *a, const uint8_t *b, size_t len)
-{
-  size_t i;
-  uint8_t r = 0;
-
-  for(i=0;i<len;i++)
-    r |= a[i] ^ b[i];
-
-  return (-(uint64_t)r) >> 63;
-}
-
-/*************************************************
-* Name:        cmov
-*
-* Description: Copy len bytes from x to r if b is 1;
-*              don't modify x if b is 0. Requires b to be in {0,1};
-*              assumes two's complement representation of negative integers.
-*              Runs in constant time.
-*
-* Arguments:   uint8_t *r:       pointer to output byte array
-*              const uint8_t *x: pointer to input byte array
-*              size_t len:       Amount of bytes to be copied
-*              uint8_t b:        Condition bit; has to be in {0,1}
-**************************************************/
-void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b)
-{
-  uint8_t mask = constant_time_is_zero_8(b);
-  constant_time_select_array_8(r, r, (uint8_t*)x, mask, len);
-}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem_ref/verify.h b/crypto/fipsmodule/ml_kem/ml_kem_ref/verify.h
deleted file mode 100644
index 1ae4196fb43..00000000000
--- a/crypto/fipsmodule/ml_kem/ml_kem_ref/verify.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef ML_KEM_VERIFY_H
-#define ML_KEM_VERIFY_H
-
-#include <stddef.h>
-#include <stdint.h>
-#include "params.h"
-
-#define verify KYBER_NAMESPACE(verify)
-int verify(const uint8_t *a, const uint8_t *b, size_t len);
-
-#define cmov KYBER_NAMESPACE(cmov)
-void cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b);
-
-#endif

From b21119d5f69189ec5d62e4b21f07ec51f65ef875 Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Sun, 12 Jan 2025 05:52:55 +0000
Subject: [PATCH 2/3] Use mlkem-native as AWS-LC's ML-KEM implementation

This imports mlkem-native (https://github.com/pq-code-package/mlkem-native,
maintained by myself and @mkannwischer) into AWS-LC, replacing the
reference implementation.

This PR focuses on the minimal configuration of mlkem-native: No assembly
and no FIPS-202 code are imported.

mlkem-native is a high-performance, high-assurance C90 implementation of
ML-KEM developed under the Post-Quantum Cryptography Alliance (PQCA) and
the Linux Foundation. It is a fork of the reference implementation that
AWS-LC previously relied on, and remains close to it. mlkem-native is the
default ML-KEM implementation in
[libOQS](https://github.com/open-quantum-safe/liboqs).

**Import Mechanism**

The mlkem-native source code is unmodified and imported using the importer
script `crypto/fipsmodule/ml_kem/importer.sh`; the details of the import
are in META.yml.

A custom config is provided for mlkem-native which in particular includes a
small 'compatibility layer' between AWS-LC/OpenSSL and mlkem-native -- see
below.

**Import Scope**

mlkem-native has a C-only version as well as native 'backends' in AVX2 and
Neon for high performance. This commit only imports the C-only
version. Integration of native backends will be done separately.

mlkem-native offers its own FIPS-202 implementation, including fast
versions of batched FIPS-202. However, this commit does not import those,
but instead provides glue-code around AWS-LC's own FIPS-202
implementation. The path to leveraging the FIPS-202 performance
improvements in mlkem-native would be to integrate them directly into
[crypto/fipsmodule/sha](crytpo/fipsmodule/sha).

**Impact on build**

None. No build-files are modified.

**Compatibility layer**

The configuration file `mlkem_native_config.h` includes a compatibility
layer between AWS-LC/OpenSSL and mlkem-native, covering:
* FIPS/PCT: If `AWSLC_FIPS` is set, `MLK_CONFIG_KEYGEN_PCT` is
  enabled to includ a PCT.
* FIPS/PCT: If `BORINGSSL_FIPS_BREAK_TESTS` is set,
  `MLK_CONFIG_KEYGEN_PCT_BREAKAGE_TEST` is set and `mlk_break_pct`
  defined via `boringssl_fips_break_test("MLKEM_PWCT")`, to include
  runtime-breakage of the PCT for testing purposes.
* CT: If `BORINGSSL_CONSTANT_TIME_VALIDATION` is set, then
  `MLK_CONFIG_CT_TESTING_ENABLED` is set to enable valgrind testing.
* Zeroization: `MLK_CONFIG_CUSTOM_ZEROIZE` is set and `mlk_zeroize`
  mapped to `OPENSSL_cleanse` to use OpenSSL's zeroization function.
* Randombytes: `MLK_CONFIG_CUSTOM_RANDOMBYTES` is set and `mlk_randombytes`
  mapped to `RAND_bytes` to use AWS-LC's randombytes function.

**Side-channels**

mlkem-native's CI uses a patched version of valgrind to check for various
compilers and compile flags that there are no secret-dependent memory
accesses, branches, or divisions. The relevant assertions have been kept
but are unused unless `MLK_CONFIG_CT_TESTING_ENABLED` is set, which is
the case if and only if `BORINGSSL_CONSTANT_TIME_VALIDATION` is set.

Similar to AWS-LC, mlkem-native uses value barriers to block potentially
harmful compiler reasoning and optimization. Where standard gcc/clang
inline assembly is not available, mlkem-native falls back to a slower 'opt
blocker' based on a volatile global (an idea we picked up from DjB) -- both
is described in
[verify.h](https://github.com/aws/aws-lc/blob/df5b09029e27d54b2b117eeddb6abd983528ae15/crypto/fipsmodule/ml_kem/mlkem/verify.h). It
will be interesting to see if the opt-blocker variant works on all
platforms that AWS-LC cares about.

**Formal Verification**

All C-code imported in this commit is formally verified using the C Bounded
Model Checker ([CBMC](https://github.com/diffblue/cbmc/)) to be free of
various classes of undefined behaviour, including out-of-bounds memory
accesses and arithmetic overflow; the latter is of particular interest for
ML-KEM because of the use of lazy modular reduction for improved
performance.

The heart of the CBMC proofs are function contract and loop annotations to
the C-code. Function contracts are denoted `__contract__(...)` clauses and
occur at the time of declaration, while loop contracts are denoted
`__loop__` and follow the `for` statement.

The function contract and loop statements are kept in the source, but
removed by the preprocessor so long as the CBMC macro is undefined. Keeping
them simplifies the import, and care has been taken to make them readable
to the non-expert, and thereby serve as precise documentation of
assumptions and guarantees upheld by the code.

The CBMC proofs are automatic and don't require further proofs scripts;
yet, they come with their own build system and toolchain dependencies,
which this commit does not attempt to import. See
[proofs/cbmc](https://github.com/pq-code-package/mlkem-native/tree/main/proofs/cbmc)
in the mlkem-native repository. Mid-term, however, CI infrastructure should
be setup that allows to import and check the CBMC proofs as part of the
AWS-LC CI.

**FIPS Compliance**

The current reference implementation in AWS-LC accommodates FIPS (IG)
requirements via:
* Adding explicit stack buffer via `OPENSSL_cleanse`
* Adding a Pairwise Consistency Test (PCT) after key generation (only for
the FIPS-build)

mlkem-native unconditionally includes stack zeroization. mlkem-native's
default secure `memset` is replaced by `OPENSSL_cleanse`.

mlkem-native conditionally includes a PCT, guarded by
`MLK_CONFIG_KEYGEN_PCT`. This is set in the config if and only if
`AWSLC_FIPS` is set.

**Formatting**

Code in `crypto/fipsmodule/ml_kem/mlkem` is directly imported from
mlkem-native and comes with its own
`crypto/fipsmodule/ml_kem/mlkem/.clang-format`.

**Prefix build**

The prefix build should not be affected by the import, since no definitions
of external linkage are imported (everything is tagged either `static`
directly, or via `MLK_CONFIG_EXTERNAL_API_QUALIFIER` or
`MLK_CONFIG_INTERNAL_API_QUALIFIER`, both of which are set to `static`
in the context of the import, too).

**Performance**

It is expected -- but should be checked! -- that the ML-KEM performance
with this PR is comparable to that of the reference implementation. This is
because the mlkem-native's fast backends are not yet imported, the FIPS-202
code remains that of AWS-LC, and mlkem-native is otherwise close to the
reference implementation.

**Multilevel build**

At the core, mlkem-native is currently a 'single-level' implementation of
ML-KEM: A build of the main source tree provides an implementation of
exactly one of ML-KEM-512/768/1024, depending on the MLKEM_K
parameter. This property is inherited from the ML-KEM reference
implementation, while AWS-LC's fork of the reference implementation has
changed this behaviour and passes the security level as a runtime
parameter.

To build all security levels, level-specific sources are built 3 times,
once per security level, and linked with a single build of the
level-independent code. The single-compilation-unit approach pursued by
AWS-LC makes this process fairly simple since one merely needs to include
the single-compilation-unit file provided by mlkem-native three times, and
configure it so that the level-independent code is included only once. The
final include moreover `#undef`'ines all macros defined by mlkem-native,
reducing the risk of name clashes with other parts of
crypto/fipsmodule/bcm.c.

Note that this process is entirely internal to `ml_kem.c`, and does not
affect the AWS-LC build.

**Main differences from reference implementation**

mlkem-native is a fork of the ML-KEM [reference
implementation](https://github.com/pq-crystals/kyber).

The following gives an overview of the major changes:

- CBMC and debug annotations, and minor code restructurings or signature
  changes to facilitate the CBMC proofs. For example, `poly_add(x,a)` only
  comes in a destructive variant to avoid specifying aliasing constraints;
  `poly_rej_uniform` has an additional `offset` parameter indicating the
  position in the sampling buffer, to avoid passing shifted pointers).
- Introduction of 4x-batched versions of some functions from the reference
  implementation. This is to leverage 4x-batched Keccak-f1600 implementations
  if present. The batching happens at the C level even if no native backend
  for FIPS 202 is present.
- FIPS 203 compliance: Introduced PK (FIPS 203, Section 7.2, 'modulus
  check') and SK (FIPS 203, Section 7.3, 'hash check') check, as well as
  optional PCT (FIPS 203, Section 7.1, Pairwise Consistency). Also,
  introduced zeroization of stack buffers as required by (FIPS 203, Section
  3.3, Destruction of intermediate values).
- Introduction of native backend implementations. With the exception of the
  native backend for `poly_rej_uniform()`, which may fail and fall back to
  the C implementation, those are drop-in replacements for the corresponding
  C functions and dispatched at compile-time.
- Restructuring of files to separate level-specific from level-generic
  functionality. This is needed to enable a multi-level build of mlkem-native
  where level-generic code is shared between levels.
- More pervasive use of value barriers to harden constant-time primitives,
  even when Link-Time-Optimization (LTO) is enabled. The use of LTO can lead
  to insecure compilation in case of the reference implementation.
- Use of a multiplication cache ('mulcache') structure to simplify and
  speedup the base multiplication.
- Different placement of modular reductions: We reduce to _unsigned_
  canonical representatives in `poly_reduce()`, and _assume_ such in all
  polynomial compression functions. The reference implementation works with a
  _signed_ `poly_reduce()`, and embeds various signed->unsigned conversions
  in the compression functions.
- More inlining: Modular multiplication and primitives are in a header
  rather than a separate compilation unit.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 crypto/fipsmodule/ml_kem/META.yml             |   5 +
 crypto/fipsmodule/ml_kem/README.md            | 163 ++++-
 crypto/fipsmodule/ml_kem/fips202_glue.h       |  64 ++
 crypto/fipsmodule/ml_kem/fips202x4_glue.h     |  58 ++
 crypto/fipsmodule/ml_kem/importer.sh          | 119 ++++
 crypto/fipsmodule/ml_kem/ml_kem.c             | 196 ++++++
 crypto/fipsmodule/ml_kem/ml_kem.h             |  31 +
 crypto/fipsmodule/ml_kem/mlkem/.clang-format  |  26 +
 crypto/fipsmodule/ml_kem/mlkem/cbmc.h         | 140 ++++
 crypto/fipsmodule/ml_kem/mlkem/common.h       | 144 ++++
 crypto/fipsmodule/ml_kem/mlkem/compress.c     | 532 ++++++++++++++
 crypto/fipsmodule/ml_kem/mlkem/compress.h     | 651 ++++++++++++++++++
 crypto/fipsmodule/ml_kem/mlkem/debug.c        |  64 ++
 crypto/fipsmodule/ml_kem/mlkem/debug.h        | 129 ++++
 crypto/fipsmodule/ml_kem/mlkem/indcpa.c       | 517 ++++++++++++++
 crypto/fipsmodule/ml_kem/mlkem/indcpa.h       | 133 ++++
 crypto/fipsmodule/ml_kem/mlkem/kem.c          | 350 ++++++++++
 crypto/fipsmodule/ml_kem/mlkem/kem.h          | 214 ++++++
 crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h | 275 ++++++++
 .../ml_kem/mlkem/mlkem_native_bcm.c           | 300 ++++++++
 crypto/fipsmodule/ml_kem/mlkem/params.h       |  85 +++
 crypto/fipsmodule/ml_kem/mlkem/poly.c         | 506 ++++++++++++++
 crypto/fipsmodule/ml_kem/mlkem/poly.h         | 336 +++++++++
 crypto/fipsmodule/ml_kem/mlkem/poly_k.c       | 434 ++++++++++++
 crypto/fipsmodule/ml_kem/mlkem/poly_k.h       | 645 +++++++++++++++++
 crypto/fipsmodule/ml_kem/mlkem/randombytes.h  |  22 +
 crypto/fipsmodule/ml_kem/mlkem/sampling.c     | 338 +++++++++
 crypto/fipsmodule/ml_kem/mlkem/sampling.h     | 101 +++
 crypto/fipsmodule/ml_kem/mlkem/symmetric.h    |  59 ++
 crypto/fipsmodule/ml_kem/mlkem/sys.h          | 171 +++++
 crypto/fipsmodule/ml_kem/mlkem/verify.c       |  20 +
 crypto/fipsmodule/ml_kem/mlkem/verify.h       | 423 ++++++++++++
 crypto/fipsmodule/ml_kem/mlkem/zetas.inc      |  30 +
 .../fipsmodule/ml_kem/mlkem_native_config.h   |  66 ++
 34 files changed, 7334 insertions(+), 13 deletions(-)
 create mode 100644 crypto/fipsmodule/ml_kem/META.yml
 create mode 100644 crypto/fipsmodule/ml_kem/fips202_glue.h
 create mode 100644 crypto/fipsmodule/ml_kem/fips202x4_glue.h
 create mode 100755 crypto/fipsmodule/ml_kem/importer.sh
 create mode 100644 crypto/fipsmodule/ml_kem/ml_kem.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/.clang-format
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/cbmc.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/common.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/compress.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/compress.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/debug.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/debug.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/indcpa.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/indcpa.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/kem.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/kem.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/mlkem_native_bcm.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/params.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/poly.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/poly.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/poly_k.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/poly_k.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/randombytes.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/sampling.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/sampling.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/symmetric.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/sys.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/verify.c
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/verify.h
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem/zetas.inc
 create mode 100644 crypto/fipsmodule/ml_kem/mlkem_native_config.h

diff --git a/crypto/fipsmodule/ml_kem/META.yml b/crypto/fipsmodule/ml_kem/META.yml
new file mode 100644
index 00000000000..de460c74194
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/META.yml
@@ -0,0 +1,5 @@
+name: mlkem-native
+source: pq-code-package/mlkem-native.git
+branch: main
+commit: 83d85fe224bd6cf1b75f096a2b2fa01033b3dfda
+imported-at: 2025-04-03T12:37:06+0100
diff --git a/crypto/fipsmodule/ml_kem/README.md b/crypto/fipsmodule/ml_kem/README.md
index 5ad7b64b5d5..0391088af7e 100644
--- a/crypto/fipsmodule/ml_kem/README.md
+++ b/crypto/fipsmodule/ml_kem/README.md
@@ -1,17 +1,154 @@
-# AWS-LC ML-KEM readme file
+# ML-KEM
 
-The source code in this folder implements ML-KEM as defined in FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard ([link](https://csrc.nist.gov/pubs/fips/203/final).
+The source code in this directory implements ML-KEM as defined in
+the [FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard](https://csrc.nist.gov/pubs/fips/203/final).
+It is imported from [mlkem-native](https://github.com/pq-code-package/mlkem-native)
+using [importer.sh](importer.sh); see [META.yml](META.yml) for import details.
 
-**Source code origin and modifications.** The source code was imported from a branch of the official repository of the Crystals-Kyber team that follows the standard draft: https://github.com/pq-crystals/kyber/tree/standard. The code was taken at [commit](https://github.com/pq-crystals/kyber/commit/11d00ff1f20cfca1f72d819e5a45165c1e0a2816) as of 03/26/2024. At the moment, only the reference C implementation is imported.
+## Running the importer
 
-The code was refactored in [this PR](https://github.com/aws/aws-lc/pull/1763) by parametrizing all functions that depend on values that are specific to a parameter set, i.e., that directly or indirectly depend on the value of KYBER_K. To do this, in `params.h` we defined a structure that holds those ML-KEM parameters and functions
-that initialize a given structure with values corresponding to a parameter set. This structure is then passed to every function that requires it as a function argument. In addition, the following changes were made to the source code in `ml_kem_ref` directory:
-- `randombytes.{h|c}` are deleted because we are using the randomness generation functions provided by AWS-LC.
-- `kem.c`: call to randombytes function is replaced with a call to RAND_bytes and the appropriate header file is included (openssl/rand.h).
-- `fips202.{h|c}` are deleted as all SHA3/SHAKE functionality is provided instead by AWS-LC fipsmodule/sha rather than the reference implementation.
-- `symmetric-shake.c`: unnecessary include of fips202.h is removed.
-- `api.h`: `pqcrystals` prefix substituted with `ml_kem` (to be able to build alongside `crypto/kyber`).
-- `poly.c`: the `poly_frommsg` function was modified to address the constant-time issue described [here](https://github.com/pq-crystals/kyber/commit/9b8d30698a3e7449aeb34e62339d4176f11e3c6c).
-- All internal header files were updated with unique `ML_KEM_*` include guards.
+To re-run the importer, do
 
-**Testing.** The KATs were obtained from an independent implementation of ML-KEM written in SPARK Ada subset: https://github.com/awslabs/LibMLKEM.
+```bash
+rm -rf mlkem # Remove old mlkem source
+./importer.sh
+```
+
+By default, the importer will not run if [mlkem](mlkem) already/still exists. To force removal of any existing [mlkem](mlkem), use `./importer.sh --force`.
+
+The repository and branch to be used for the import can be configured through the environment variables `GITHUB_REPOSITORY` and `GITHUB_SHA`, respectively. The default is equivalent to
+
+```bash
+GITHUB_REPOSITORY=pq-code-package/mlkem-native.git GITHUB_SHA=main ./importer.sh
+```
+
+That is, by default importer.sh will clone and install the latest [main](https://github.com/pq-code-package/mlkem-native/tree/main) of mlkem-native.
+
+After a successful import, [META.yml](META.yml) will reflect the source, branch, commit and timestamp of the import.
+
+### Import Scope
+
+mlkem-native has a C-only version as well as native 'backends' in AVX2 and
+Neon for high performance. At present, [importer.sh](importer.sh) imports only
+the C-only version.
+
+mlkem-native offers its own FIPS-202 implementation, including fast
+versions of batched FIPS-202. [importer.sh](importer.sh) does _not_ import those.
+Instead, glue-code around AWS-LC's own FIPS-202 implementation is provided in
+[fips202_glue.h](fips202_glue.h) and [fips202x4_glue.h](fips202x4_glue.h).
+
+## Configuration and compatibility layer
+
+mlkem-native is used with a custom configuration file [mlkem_native_config.h](mlkem_native_config.h). This file includes
+a compatibility layer between AWS-LC/OpenSSL and mlkem-native, covering:
+
+* FIPS/PCT: If `AWSLC_FIPS` is set, `MLK_CONFIG_KEYGEN_PCT` is
+  enabled to includ a PCT.
+* FIPS/PCT: If `BORINGSSL_FIPS_BREAK_TESTS` is set,
+  `MLK_CONFIG_KEYGEN_PCT_BREAKAGE_TEST` is set and `mlk_break_pct`
+  defined via `boringssl_fips_break_test("MLKEM_PWCT")`, to include
+  runtime-breakage of the PCT for testing purposes.
+* CT: If `BORINGSSL_CONSTANT_TIME_VALIDATION` is set, then
+  `MLK_CONFIG_CT_TESTING_ENABLED` is set to enable valgrind testing.
+* Zeroization: `MLK_CONFIG_CUSTOM_ZEROIZE` is set and `mlk_zeroize`
+  mapped to `OPENSSL_cleanse` to use OpenSSL's zeroization function.
+* Randombytes: `MLK_CONFIG_CUSTOM_RANDOMBYTES` is set and `mlk_randombytes`
+  mapped to `RAND_bytes` to use AWS-LC's randombytes function.
+
+## Build process
+
+At the core, mlkem-native is a 'single-level' implementation of ML-KEM:
+A build of the main source tree provides an implementation of
+exactly one of ML-KEM-512/768/1024, depending on the MLK_CONFIG_PARAMETER_SET
+parameter. All source files for a single-build of mlkem-native are bundled in
+[mlkem_native_bcm.c](mlkem/mlkem_native_bcm.c), which is also imported from
+mlkem-native.
+
+To build all security levels, [mlkem_native_bcm.c](mlkem/mlkem_native_bcm.c)
+is included three times into [ml_kem.c](ml_kem.c), once per security level.
+Level-independent code is included only once and shared across the levels;
+this is controlled through the configuration options
+`MLK_CONFIG_MULTILEVEL_WITH_SHARED` and `MLK_CONFIG_MULTILEVEL_NO_SHARED`
+used prior to importing the instances of [mlkem_native_bcm.c](mlkem/mlkem_native_bcm.c) into [ml_kem.c](ml_kem.c).
+
+Note that the multilevel build process is entirely internal to `ml_kem.c`,
+and does not affect the AWS-LC build otherwise.
+
+## Formal Verification
+
+All C-code imported by [importer.sh](importer.sh) is formally verified using the
+C Bounded Model Checker ([CBMC](https://github.com/diffblue/cbmc/)) to be free of
+various classes of undefined behaviour, including out-of-bounds memory accesses and
+arithmetic overflow; the latter is of particular interest for ML-KEM because of
+the use of lazy modular reduction for improved performance.
+
+The heart of the CBMC proofs are function contract and loop annotations to
+the C-code. Function contracts are denoted `__contract__(...)` clauses and
+occur at the time of declaration, while loop contracts are denoted
+`__loop__` and follow the `for` statement.
+
+The function contract and loop statements are kept in the source, but
+removed by the preprocessor so long as the CBMC macro is undefined. Keeping
+them simplifies the import, and care has been taken to make them readable
+to the non-expert, and thereby serve as precise documentation of
+assumptions and guarantees upheld by the code.
+
+## Testing
+
+The KATs were obtained from an independent implementation of ML-KEM written
+in SPARK Ada subset: https://github.com/awslabs/LibMLKEM.
+
+## Side-channels
+
+mlkem-native's CI uses a patched version of valgrind to check for various
+compilers and compile flags that there are no secret-dependent memory
+accesses, branches, or divisions. The relevant assertions are kept
+and used if `MLK_CONFIG_CT_TESTING_ENABLED` is set, which is the case
+if and only if `BORINGSSL_CONSTANT_TIME_VALIDATION` is set.
+
+mlkem-native uses value barriers to block
+potentially harmful compiler reasoning and optimization. Where standard
+gcc/clang inline assembly is not available, mlkem-native falls back to a
+slower 'opt blocker' based on a volatile global -- both are described in
+[verify.h](https://github.com/aws/aws-lc/blob/df5b09029e27d54b2b117eeddb6abd983528ae15/crypto/fipsmodule/ml_kem/mlkem/verify.h).
+
+## Comparison to reference implementation
+
+mlkem-native is a fork of the ML-KEM [reference
+implementation](https://github.com/pq-crystals/kyber).
+
+The following gives an overview of the major changes:
+
+- CBMC and debug annotations, and minor code restructurings or signature
+  changes to facilitate the CBMC proofs. For example, `poly_add(x,a)` only
+  comes in a destructive variant to avoid specifying aliasing constraints;
+  `poly_rej_uniform` has an additional `offset` parameter indicating the
+  position in the sampling buffer, to avoid passing shifted pointers).
+- Introduction of 4x-batched versions of some functions from the reference
+  implementation. This is to leverage 4x-batched Keccak-f1600 implementations
+  if present. The batching happens at the C level even if no native backend
+  for FIPS 202 is present.
+- FIPS 203 compliance: Introduced PK (FIPS 203, Section 7.2, 'modulus
+  check') and SK (FIPS 203, Section 7.3, 'hash check') check, as well as
+  optional PCT (FIPS 203, Section 7.1, Pairwise Consistency). Also,
+  introduced zeroization of stack buffers as required by (FIPS 203, Section
+  3.3, Destruction of intermediate values).
+- Introduction of native backend implementations. With the exception of the
+  native backend for `poly_rej_uniform()`, which may fail and fall back to
+  the C implementation, those are drop-in replacements for the corresponding
+  C functions and dispatched at compile-time.
+- Restructuring of files to separate level-specific from level-generic
+  functionality. This is needed to enable a multi-level build of mlkem-native
+  where level-generic code is shared between levels.
+- More pervasive use of value barriers to harden constant-time primitives,
+  even when Link-Time-Optimization (LTO) is enabled. The use of LTO can lead
+  to insecure compilation in case of the reference implementation.
+- Use of a multiplication cache ('mulcache') structure to simplify and
+  speedup the base multiplication.
+- Different placement of modular reductions: We reduce to _unsigned_
+  canonical representatives in `poly_reduce()`, and _assume_ such in all
+  polynomial compression functions. The reference implementation works with a
+  _signed_ `poly_reduce()`, and embeds various signed->unsigned conversions
+  in the compression functions.
+- More inlining: Modular multiplication and primitives are in a header
+  rather than a separate compilation unit.
diff --git a/crypto/fipsmodule/ml_kem/fips202_glue.h b/crypto/fipsmodule/ml_kem/fips202_glue.h
new file mode 100644
index 00000000000..404ed3a2047
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/fips202_glue.h
@@ -0,0 +1,64 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR ISC
+
+#ifndef MLK_AWSLC_FIPS202_GLUE_H
+#define MLK_AWSLC_FIPS202_GLUE_H
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../sha/internal.h"
+
+#define SHAKE128_RATE 168
+#define SHAKE256_RATE 136
+#define SHA3_256_RATE 136
+#define SHA3_384_RATE 104
+#define SHA3_512_RATE 72
+
+#define mlk_shake128ctx KECCAK1600_CTX
+
+static MLK_INLINE void mlk_shake128_init(mlk_shake128ctx *state) {
+  // Return code checks can be omitted
+  // SHAKE_Init always returns 1 when called with correct block size value.
+  (void) SHAKE_Init(state, SHAKE128_BLOCKSIZE);
+}
+
+static MLK_INLINE void mlk_shake128_release(mlk_shake128ctx *state) {
+  (void) state;
+}
+
+static MLK_INLINE void mlk_shake128_absorb_once(mlk_shake128ctx *state,
+						const uint8_t *input, size_t inlen) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHAKE_Absorb(state, input, inlen);
+}
+
+static MLK_INLINE void mlk_shake128_squeezeblocks(uint8_t *output, size_t nblocks,
+						  mlk_shake128ctx *state) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHAKE_Squeeze(output, state, nblocks * SHAKE128_RATE);
+}
+
+static MLK_INLINE void mlk_shake256(uint8_t *output, size_t outlen,
+				    const uint8_t *input, size_t inlen) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHAKE256(input, inlen, output, outlen);
+}
+
+static MLK_INLINE void mlk_sha3_256(uint8_t *output, const uint8_t *input,
+				    size_t inlen) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHA3_256(input, inlen, output);
+}
+
+static MLK_INLINE void mlk_sha3_512(uint8_t *output, const uint8_t *input,
+				    size_t inlen) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHA3_512(input, inlen, output);
+}
+
+#endif // MLK_AWSLC_FIPS202_GLUE_H
diff --git a/crypto/fipsmodule/ml_kem/fips202x4_glue.h b/crypto/fipsmodule/ml_kem/fips202x4_glue.h
new file mode 100644
index 00000000000..67fb656e3f6
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/fips202x4_glue.h
@@ -0,0 +1,58 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR ISC
+
+//
+// This is a shim establishing the FIPS-202 API required by
+// mlkem-native from the API exposed by AWS-LC.
+//
+
+#ifndef MLK_AWSLC_FIPS202X4_GLUE_H
+#define MLK_AWSLC_FIPS202X4_GLUE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "fips202_glue.h"
+
+#define mlk_shake128x4ctx KECCAK1600_CTX_x4
+
+static MLK_INLINE void mlk_shake128x4_absorb_once(mlk_shake128x4ctx *state,
+						  const uint8_t *in0,
+						  const uint8_t *in1,
+						  const uint8_t *in2,
+						  const uint8_t *in3, size_t inlen) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHAKE128_Absorb_once_x4(state, in0, in1, in2, in3, inlen);
+}
+
+static MLK_INLINE void mlk_shake128x4_squeezeblocks(uint8_t *out0, uint8_t *out1,
+						    uint8_t *out2, uint8_t *out3,
+						    size_t nblocks,
+						    mlk_shake128x4ctx *state) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHAKE128_Squeezeblocks_x4(out0, out1, out2, out3, state, nblocks);
+}
+
+static MLK_INLINE void mlk_shake128x4_init(mlk_shake128x4ctx *state) {
+  // Return code check can be omitted
+  // since mlkem-native adheres to call discipline
+  (void) SHAKE128_Init_x4(state);
+}
+
+static MLK_INLINE void mlk_shake128x4_release(mlk_shake128x4ctx *state) {
+  (void) state;
+}
+
+static MLK_INLINE void mlk_shake256x4(uint8_t *out0, uint8_t *out1, uint8_t *out2,
+				      uint8_t *out3, size_t outlen, uint8_t *in0,
+				      uint8_t *in1, uint8_t *in2, uint8_t *in3,
+				      size_t inlen) {
+  // Return code check can be omitted
+  // since SHAKE256_x4 is documented not to fail for valid inputs.
+  (void) SHAKE256_x4(in0, in1, in2, in3, inlen,
+		     out0, out1, out2, out3, outlen);
+}
+
+#endif // MLK_AWSLC_FIPS202X4_GLUE_H
diff --git a/crypto/fipsmodule/ml_kem/importer.sh b/crypto/fipsmodule/ml_kem/importer.sh
new file mode 100755
index 00000000000..ecd29ba8be7
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/importer.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0 OR ISC
+
+#
+# mlkem-native -> AWS-LC importer script
+#
+# This script imports a version of mlkem-native into AWS-LC.
+# It is meant to do all import work and leave AWS-LC in a fully
+# working state.
+#
+# Usage:
+#
+# ```
+# rm -rf ./mlkem # Remove any previous import
+# ./import.sh
+# ```
+#
+# This imports github.com/pq-code-package/mlkem-native/main and
+# and leaves commit hash and timestamp in META.yml.
+#
+# If you want to import a specific commit, and/or change the
+# upstream repository (for example, to your fork of mlkem-native), use
+#
+# ```
+# GITHUB_REPOSITORY={YOUR REPOSITORY} GITHUB_SHA={COMMIT_HASH} ./import.sh [--force]
+# ```
+#
+
+# Dependencies:
+# - unifdef
+
+GITHUB_SERVER_URL=https://github.com/
+GITHUB_REPOSITORY=${GITHUB_REPOSITORY:=pq-code-package/mlkem-native.git}
+GITHUB_SHA=${GITHUB_SHA:=main}
+
+SRC=mlkem
+TMP=$(mktemp -d) || exit 1
+echo "Temporary working directory: $TMP"
+
+# Check if necessary tools are installed
+if !(which unifdef >/dev/null 2>&1); then
+    echo "You need to install 'unifdef' to run the importer script."
+    exit 1
+fi
+
+# Check if source directory already exists
+if [ -d "$SRC" ]; then
+    if [[ "$1" == "--force" ]]; then
+        echo "Removing previous source directory $SRC as requested by --force"
+        rm -rf $SRC
+    else
+        echo "Source directory $SRC does already exist -- please remove it before re-running the importer or pass --force to force removal"
+        exit 1
+    fi
+fi
+
+# Work in temporary directory
+pushd $TMP
+
+# Fetch repository
+echo "Fetching repository ..."
+git init >/dev/null
+git remote add origin $GITHUB_SERVER_URL/$GITHUB_REPOSITORY >/dev/null
+git fetch origin --depth 1 $GITHUB_SHA >/dev/null
+git checkout FETCH_HEAD >/dev/null
+GITHUB_COMMIT=$(git rev-parse FETCH_HEAD)
+
+# Get back to AWS-LC
+popd
+
+echo "Pull source code from remote repository..."
+
+# Copy mlkem-native source tree -- C-only, no FIPS-202
+mkdir $SRC
+cp $TMP/mlkem/* $SRC
+
+# We use the custom `mlkem_native_config.h`, so can remove the default one
+rm $SRC/config.h
+
+# Copy formatting file
+cp $TMP/.clang-format $SRC
+
+# Copy and statically simplify BCM file
+# The static simplification is not necessary, but improves readability
+# by removing directives related to native backends that are irrelevant
+# for the C-only import.
+unifdef -DMLK_CONFIG_MONOBUILD_CUSTOM_FIPS202                          \
+        -UMLK_CONFIG_MONOBUILD_WITH_NATIVE_ARITH                       \
+        -UMLK_CONFIG_MONOBUILD_WITH_NATIVE_FIPS202                     \
+        $TMP/examples/monolithic_build/mlkem_native_monobuild.c \
+        > $SRC/mlkem_native_bcm.c
+
+# Modify include paths to match position of mlkem_native_bcm.c
+# In mlkem-native, the include path is "mlkem/*", while here we
+# embed mlkem_native_bcm.c in the main source directory of mlkem-native,
+# hence the relative import path is just ".".
+if [[ "$(uname)" == "Darwin" ]]; then
+  SED_I=(-i "")
+else
+  SED_I=(-i)
+fi
+echo "Fixup include paths"
+sed $SED_I 's/#include "mlkem\/\([^"]*\)"/#include "\1"/' $SRC/mlkem_native_bcm.c
+
+echo "Remove temporary artifacts ..."
+rm -rf $TMP
+
+# Log timestamp, repository, and commit
+
+echo "Generating META.yml file ..."
+cat <<EOF > META.yml
+name: mlkem-native
+source: $GITHUB_REPOSITORY
+branch: $GITHUB_SHA
+commit: $GITHUB_COMMIT
+imported-at: $(date "+%Y-%m-%dT%H:%M:%S%z")
+EOF
diff --git a/crypto/fipsmodule/ml_kem/ml_kem.c b/crypto/fipsmodule/ml_kem/ml_kem.c
new file mode 100644
index 00000000000..2d6f5ad917b
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/ml_kem.c
@@ -0,0 +1,196 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR ISC
+
+// mlkem-native source code
+
+// Include level-independent code
+#define MLK_CONFIG_FILE "../mlkem_native_config.h"
+#define MLK_CONFIG_FIPS202_CUSTOM_HEADER "../fips202_glue.h"
+#define MLK_CONFIG_FIPS202X4_CUSTOM_HEADER "../fips202x4_glue.h"
+#define MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS
+
+// MLKEM-512
+#define MLK_CONFIG_PARAMETER_SET 512
+#define MLK_CONFIG_MULTILEVEL_WITH_SHARED // Include level-independent code
+#include "mlkem/mlkem_native_bcm.c"
+// MLKEM-768
+#undef MLK_CONFIG_PARAMETER_SET
+#define MLK_CONFIG_PARAMETER_SET 768
+#define MLK_CONFIG_MULTILEVEL_NO_SHARED // Exclude level-inpendent code
+#include "mlkem/mlkem_native_bcm.c"
+// MLKEM-1024
+#undef MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS
+#undef MLK_CONFIG_PARAMETER_SET
+#define MLK_CONFIG_PARAMETER_SET 1024
+#include "mlkem/mlkem_native_bcm.c"
+
+// End of mlkem-native source code
+
+#include "./ml_kem.h"
+
+int ml_kem_512_keypair_deterministic(uint8_t *public_key  /* OUT */,
+                                     uint8_t *secret_key  /* OUT */,
+                                     const uint8_t *seed  /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return ml_kem_512_keypair_deterministic_no_self_test(public_key, secret_key, seed);
+}
+
+int ml_kem_512_keypair_deterministic_no_self_test(uint8_t *public_key  /* OUT */,
+                                                  uint8_t *secret_key  /* OUT */,
+                                                  const uint8_t *seed  /* IN */) {
+  int res = mlkem512_keypair_derand(public_key, secret_key, seed);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_512_keypair(uint8_t *public_key /* OUT */,
+                           uint8_t *secret_key /* OUT */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem512_keypair(public_key, secret_key);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_512_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
+                                         uint8_t *shared_secret    /* OUT */,
+                                         const uint8_t *public_key /* IN  */,
+                                         const uint8_t *seed       /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return ml_kem_512_encapsulate_deterministic_no_self_test(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_512_encapsulate_deterministic_no_self_test(uint8_t *ciphertext       /* OUT */,
+                                                      uint8_t *shared_secret    /* OUT */,
+                                                      const uint8_t *public_key /* IN  */,
+                                                      const uint8_t *seed       /* IN */) {
+  return mlkem512_enc_derand(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_512_encapsulate(uint8_t *ciphertext       /* OUT */,
+                           uint8_t *shared_secret    /* OUT */,
+                           const uint8_t *public_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem512_enc(ciphertext, shared_secret, public_key);
+}
+
+int ml_kem_512_decapsulate(uint8_t *shared_secret    /* OUT */,
+                           const uint8_t *ciphertext /* IN  */,
+                           const uint8_t *secret_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return ml_kem_512_decapsulate_no_self_test(shared_secret, ciphertext, secret_key);
+}
+
+int ml_kem_512_decapsulate_no_self_test(uint8_t *shared_secret    /* OUT */,
+                                        const uint8_t *ciphertext /* IN  */,
+                                        const uint8_t *secret_key /* IN  */) {
+  return mlkem512_dec(shared_secret, ciphertext, secret_key);
+}
+
+
+int ml_kem_768_keypair_deterministic(uint8_t *public_key  /* OUT */,
+                                     uint8_t *secret_key  /* OUT */,
+                                     const uint8_t *seed  /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem768_keypair_derand(public_key, secret_key, seed);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_768_keypair(uint8_t *public_key /* OUT */,
+                       uint8_t *secret_key /* OUT */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem768_keypair(public_key, secret_key);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_768_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
+                                         uint8_t *shared_secret    /* OUT */,
+                                         const uint8_t *public_key /* IN  */,
+                                         const uint8_t *seed       /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem768_enc_derand(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_768_encapsulate(uint8_t *ciphertext       /* OUT */,
+                           uint8_t *shared_secret    /* OUT */,
+                           const uint8_t *public_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem768_enc(ciphertext, shared_secret, public_key);
+}
+
+int ml_kem_768_decapsulate(uint8_t *shared_secret    /* OUT */,
+                           const uint8_t *ciphertext /* IN  */,
+                           const uint8_t *secret_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem768_dec(shared_secret, ciphertext, secret_key);
+}
+
+int ml_kem_1024_keypair_deterministic(uint8_t *public_key  /* OUT */,
+                                      uint8_t *secret_key  /* OUT */,
+                                      const uint8_t *seed  /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem1024_keypair_derand(public_key, secret_key, seed);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_1024_keypair(uint8_t *public_key /* OUT */,
+                        uint8_t *secret_key /* OUT */) {
+  boringssl_ensure_ml_kem_self_test();
+  int res = mlkem1024_keypair(public_key, secret_key);
+#if defined(AWSLC_FIPS)
+  /* PCT failure is the only failure condition for key generation. */
+  if (res != 0) {
+      AWS_LC_FIPS_failure("ML-KEM keygen PCT failed");
+  }
+#endif
+  return res;
+}
+
+int ml_kem_1024_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
+                                          uint8_t *shared_secret    /* OUT */,
+                                          const uint8_t *public_key /* IN  */,
+                                          const uint8_t *seed       /* IN */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem1024_enc_derand(ciphertext, shared_secret, public_key, seed);
+}
+
+int ml_kem_1024_encapsulate(uint8_t *ciphertext       /* OUT */,
+                            uint8_t *shared_secret    /* OUT */,
+                            const uint8_t *public_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem1024_enc(ciphertext, shared_secret, public_key);
+}
+
+int ml_kem_1024_decapsulate(uint8_t *shared_secret    /* OUT */,
+                            const uint8_t *ciphertext /* IN  */,
+                            const uint8_t *secret_key /* IN  */) {
+  boringssl_ensure_ml_kem_self_test();
+  return mlkem1024_dec(shared_secret, ciphertext, secret_key);
+}
diff --git a/crypto/fipsmodule/ml_kem/ml_kem.h b/crypto/fipsmodule/ml_kem/ml_kem.h
index 752855764fe..0341cd3daff 100644
--- a/crypto/fipsmodule/ml_kem/ml_kem.h
+++ b/crypto/fipsmodule/ml_kem/ml_kem.h
@@ -65,6 +65,11 @@ int ml_kem_768_keypair_deterministic(uint8_t *public_key /* OUT */,
                                          uint8_t *secret_key /* OUT */,
                                          const uint8_t *seed /* IN */);
 
+int ml_kem_768_keypair_deterministic_no_self_test(uint8_t *public_key  /* OUT */,
+                                                  uint8_t *secret_key  /* OUT */,
+                                                  const uint8_t *seed  /* IN */);
+
+
 int ml_kem_768_keypair(uint8_t *public_key /* OUT */,
                            uint8_t *secret_key /* OUT */);
 
@@ -73,6 +78,12 @@ int ml_kem_768_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
                                              const uint8_t *public_key /* IN  */,
                                              const uint8_t *seed /* IN */);
 
+int ml_kem_768_encapsulate_deterministic_no_self_test(uint8_t *ciphertext       /* OUT */,
+                                                      uint8_t *shared_secret    /* OUT */,
+                                                      const uint8_t *public_key /* IN  */,
+                                                      const uint8_t *seed       /* IN */);
+
+
 int ml_kem_768_encapsulate(uint8_t *ciphertext       /* OUT */,
                                uint8_t *shared_secret    /* OUT */,
                                const uint8_t *public_key /* IN  */);
@@ -81,10 +92,18 @@ int ml_kem_768_decapsulate(uint8_t *shared_secret    /* OUT */,
                                const uint8_t *ciphertext /* IN  */,
                                const uint8_t *secret_key /* IN  */);
 
+int ml_kem_768_decapsulate_no_self_test(uint8_t *shared_secret    /* OUT */,
+                                        const uint8_t *ciphertext /* IN  */,
+                                        const uint8_t *secret_key /* IN  */);
+
 int ml_kem_1024_keypair_deterministic(uint8_t *public_key /* OUT */,
                                           uint8_t *secret_key /* OUT */,
                                           const uint8_t *seed /* IN */);
 
+int ml_kem_1024_keypair_deterministic_no_self_test(uint8_t *public_key  /* OUT */,
+                                                  uint8_t *secret_key  /* OUT */,
+                                                  const uint8_t *seed  /* IN */);
+
 int ml_kem_1024_keypair(uint8_t *public_key /* OUT */,
                             uint8_t *secret_key /* OUT */);
 
@@ -93,6 +112,12 @@ int ml_kem_1024_encapsulate_deterministic(uint8_t *ciphertext       /* OUT */,
                                               const uint8_t *public_key /* IN  */,
                                               const uint8_t *seed /* IN */);
 
+int ml_kem_1024_encapsulate_deterministic_no_self_test(uint8_t *ciphertext       /* OUT */,
+                                                      uint8_t *shared_secret    /* OUT */,
+                                                      const uint8_t *public_key /* IN  */,
+                                                      const uint8_t *seed       /* IN */);
+
+
 int ml_kem_1024_encapsulate(uint8_t *ciphertext       /* OUT */,
                                 uint8_t *shared_secret    /* OUT */,
                                 const uint8_t *public_key /* IN  */);
@@ -100,4 +125,10 @@ int ml_kem_1024_encapsulate(uint8_t *ciphertext       /* OUT */,
 int ml_kem_1024_decapsulate(uint8_t *shared_secret    /* OUT */,
                                 const uint8_t *ciphertext /* IN  */,
                                 const uint8_t *secret_key /* IN  */);
+
+int ml_kem_1024_decapsulate_no_self_test(uint8_t *shared_secret    /* OUT */,
+                                        const uint8_t *ciphertext /* IN  */,
+                                        const uint8_t *secret_key /* IN  */);
+
+
 #endif // ML_KEM_H
diff --git a/crypto/fipsmodule/ml_kem/mlkem/.clang-format b/crypto/fipsmodule/ml_kem/mlkem/.clang-format
new file mode 100644
index 00000000000..6f14c0257ac
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/.clang-format
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# clang-format style file for mlkem-native
+#
+BasedOnStyle: Google
+MaxEmptyLinesToKeep: 3
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+DerivePointerAlignment: false
+PointerAlignment: Right
+# TODO(davidben): The default for Google style is now Regroup, but the default
+# IncludeCategories does not recognize <openssl/header.h>. We should
+# reconfigure IncludeCategories to match. For now, keep it at Preserve.
+IncludeBlocks: Preserve
+
+# Designate CBMC contracts/macros that appear in .h files
+# as "attributes" so they don't get increasingly indented line after line
+BreakBeforeBraces: Allman
+InsertBraces: true
+WhitespaceSensitiveMacros: ['__contract__', '__loop__' ]
+Macros:
+ # Make this artifically long to avoid function bodies after short contracts
+ - __contract__(x)={ void a; void b; void c; void d; void e; void f; } void abcdefghijklmnopqrstuvw()
+ - __loop__(x)={}
+ # Make this artifically long to force line break
+ - MLK_INTERNAL_API=void abcdefghijklmnopqrstuvwabcdefghijklmnopqrstuvwabcdefg();
diff --git a/crypto/fipsmodule/ml_kem/mlkem/cbmc.h b/crypto/fipsmodule/ml_kem/mlkem/cbmc.h
new file mode 100644
index 00000000000..38b6ac2b40f
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/cbmc.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_CBMC_H
+#define MLK_CBMC_H
+/***************************************************
+ * Basic replacements for __CPROVER_XXX contracts
+ ***************************************************/
+
+#ifndef CBMC
+
+#define __contract__(x)
+#define __loop__(x)
+
+#else /* !CBMC */
+
+#define __contract__(x) x
+#define __loop__(x) x
+
+/* https://diffblue.github.io/cbmc/contracts-assigns.html */
+#define assigns(...) __CPROVER_assigns(__VA_ARGS__)
+
+/* https://diffblue.github.io/cbmc/contracts-requires-ensures.html */
+#define requires(...) __CPROVER_requires(__VA_ARGS__)
+#define ensures(...) __CPROVER_ensures(__VA_ARGS__)
+/* https://diffblue.github.io/cbmc/contracts-loops.html */
+#define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__)
+#define decreases(...) __CPROVER_decreases(__VA_ARGS__)
+/* cassert to avoid confusion with in-built assert */
+#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed")
+#define assume(...) __CPROVER_assume(__VA_ARGS__)
+
+/***************************************************
+ * Macros for "expression" forms that may appear
+ * _inside_ top-level contracts.
+ ***************************************************/
+
+/*
+ * function return value - useful inside ensures
+ * https://diffblue.github.io/cbmc/contracts-functions.html
+ */
+#define return_value (__CPROVER_return_value)
+
+/*
+ * assigns l-value targets
+ * https://diffblue.github.io/cbmc/contracts-assigns.html
+ */
+#define object_whole(...) __CPROVER_object_whole(__VA_ARGS__)
+#define memory_slice(...) __CPROVER_object_upto(__VA_ARGS__)
+#define same_object(...) __CPROVER_same_object(__VA_ARGS__)
+
+/*
+ * Pointer-related predicates
+ * https://diffblue.github.io/cbmc/contracts-memory-predicates.html
+ */
+#define memory_no_alias(...) __CPROVER_is_fresh(__VA_ARGS__)
+#define readable(...) __CPROVER_r_ok(__VA_ARGS__)
+#define writeable(...) __CPROVER_w_ok(__VA_ARGS__)
+
+/*
+ * History variables
+ * https://diffblue.github.io/cbmc/contracts-history-variables.html
+ */
+#define old(...) __CPROVER_old(__VA_ARGS__)
+#define loop_entry(...) __CPROVER_loop_entry(__VA_ARGS__)
+
+/*
+ * Quantifiers
+ * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub
+ * https://diffblue.github.io/cbmc/contracts-quantifiers.html
+ */
+
+/*
+ * Prevent clang-format from corrupting CBMC's special ==> operator
+ */
+/* clang-format off */
+#define forall(qvar, qvar_lb, qvar_ub, predicate)                 \
+  __CPROVER_forall                                                \
+  {                                                               \
+    unsigned qvar;                                                \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate)   \
+  }
+
+#define EXISTS(qvar, qvar_lb, qvar_ub, predicate)         \
+  __CPROVER_exists                                              \
+  {                                                             \
+    unsigned qvar;                                              \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate)  \
+  }
+/* clang-format on */
+
+/***************************************************
+ * Convenience macros for common contract patterns
+ ***************************************************/
+
+/*
+ * Boolean-value predidate that asserts that "all values of array_var are in
+ * range value_lb (inclusive) .. value_ub (exclusive)"
+ * Example:
+ *  array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)
+ * expands to
+ *  __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> (
+ *  0 <= a->coeffs[k]) && a->coeffs[k] < MLKEM_Q)) }
+ */
+
+/*
+ * Prevent clang-format from corrupting CBMC's special ==> operator
+ */
+/* clang-format off */
+#define CBMC_CONCAT_(left, right) left##right
+#define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right)
+
+#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var,            \
+                         value_lb, value_ub)                           \
+  __CPROVER_forall                                                     \
+  {                                                                    \
+    unsigned qvar;                                                     \
+    ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==>                    \
+        (((int)(value_lb) <= ((array_var)[(qvar)])) &&		       \
+         (((array_var)[(qvar)]) < (int)(value_ub)))		       \
+  }
+
+#define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \
+  array_bound_core(CBMC_CONCAT(_cbmc_idx, __LINE__), (qvar_lb),      \
+      (qvar_ub), (array_var), (value_lb), (value_ub))
+/* clang-format on */
+
+/* Wrapper around array_bound operating on absolute values.
+ *
+ * Note that since the absolute bound is inclusive, but the lower
+ * bound in array_bound is inclusive, we have to raise it by 1.
+ */
+#define array_abs_bound(arr, lb, ub, k) \
+  array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k))
+
+#endif /* CBMC */
+
+#endif /* !MLK_CBMC_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/common.h b/crypto/fipsmodule/ml_kem/mlkem/common.h
new file mode 100644
index 00000000000..8cdb841c3d8
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/common.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_COMMON_H
+#define MLK_COMMON_H
+
+#if defined(MLK_CONFIG_FILE)
+#include MLK_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#include "params.h"
+#include "sys.h"
+
+/* Internal and public API have external linkage by default, but
+ * this can be overwritten by the user, e.g. for single-CU builds. */
+#if !defined(MLK_CONFIG_INTERNAL_API_QUALIFIER)
+#define MLK_INTERNAL_API
+#else
+#define MLK_INTERNAL_API MLK_CONFIG_INTERNAL_API_QUALIFIER
+#endif
+
+#if !defined(MLK_CONFIG_EXTERNAL_API_QUALIFIER)
+#define MLK_EXTERNAL_API
+#else
+#define MLK_EXTERNAL_API MLK_CONFIG_EXTERNAL_API_QUALIFIER
+#endif
+
+#if defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) || \
+    defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED)
+#define MLK_MULTILEVEL_BUILD
+#endif
+
+#define MLK_CONCAT_(x1, x2) x1##x2
+#define MLK_CONCAT(x1, x2) MLK_CONCAT_(x1, x2)
+
+#if defined(MLK_MULTILEVEL_BUILD)
+#define MLK_ADD_LEVEL(s) MLK_CONCAT(s, MLKEM_LVL)
+#else
+#define MLK_ADD_LEVEL(s) s
+#endif
+
+#define MLK_NAMESPACE(s) \
+  MLK_CONCAT(MLK_CONCAT(MLK_CONFIG_NAMESPACE_PREFIX, _), s)
+#define MLK_NAMESPACE_K(s) \
+  MLK_CONCAT(MLK_CONCAT(MLK_ADD_LEVEL(MLK_CONFIG_NAMESPACE_PREFIX), _), s)
+
+/* On Apple platforms, we need to emit leading underscore
+ * in front of assembly symbols. We thus introducee a separate
+ * namespace wrapper for ASM symbols. */
+#if !defined(__APPLE__)
+#define MLK_ASM_NAMESPACE(sym) MLK_NAMESPACE(sym)
+#else
+#define MLK_ASM_NAMESPACE(sym) MLK_CONCAT(_, MLK_NAMESPACE(sym))
+#endif
+
+/*
+ * On X86_64 if control-flow protections (CET) are enabled (through
+ * -fcf-protection=), we add an endbr64 instruction at every global function
+ * label.  See sys.h for more details
+ */
+#if defined(MLK_SYS_X86_64)
+#define MLK_ASM_FN_SYMBOL(sym) MLK_ASM_NAMESPACE(sym) : MLK_CET_ENDBR
+#else
+#define MLK_ASM_FN_SYMBOL(sym) MLK_ASM_NAMESPACE(sym) :
+#endif
+
+/* We aim to simplify the user's life by supporting builds where
+ * all source files are included, even those that are not needed.
+ * Those files are appropriately guarded and will be empty when unneeded.
+ * The following is to avoid compilers complaining about this. */
+#define MLK_EMPTY_CU(s) extern int MLK_NAMESPACE_K(empty_cu_##s);
+
+/* MLK_CONFIG_NO_ASM takes precedence over MLK_USE_NATIVE_XXX */
+#if defined(MLK_CONFIG_NO_ASM)
+#undef MLK_CONFIG_USE_NATIVE_BACKEND_ARITH
+#undef MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202
+#endif
+
+#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH) && \
+    !defined(MLK_CONFIG_ARITH_BACKEND_FILE)
+#error Bad configuration: MLK_CONFIG_USE_NATIVE_BACKEND_ARITH is set, but MLK_CONFIG_ARITH_BACKEND_FILE is not.
+#endif
+
+#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202) && \
+    !defined(MLK_CONFIG_FIPS202_BACKEND_FILE)
+#error Bad configuration: MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 is set, but MLK_CONFIG_FIPS202_BACKEND_FILE is not.
+#endif
+
+#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH)
+/* Include to enforce consistency of API and implementation,
+ * and conduct sanity checks on the backend.
+ *
+ * Keep this _after_ the inclusion of the backend; otherwise,
+ * the sanity checks won't have an effect. */
+#if defined(MLK_CHECK_APIS) && !defined(__ASSEMBLER__)
+#include "native/api.h"
+#endif
+#include MLK_CONFIG_ARITH_BACKEND_FILE
+#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_ARITH */
+
+#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202)
+/* Include to enforce consistency of API and implementation,
+ * and conduct sanity checks on the backend.
+ *
+ * Keep this _after_ the inclusion of the backend; otherwise,
+ * the sanity checks won't have an effect. */
+#if defined(MLK_CHECK_APIS) && !defined(__ASSEMBLER__)
+#include "fips202/native/api.h"
+#endif
+#include MLK_CONFIG_FIPS202_BACKEND_FILE
+#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 */
+
+#if !defined(MLK_CONFIG_FIPS202_CUSTOM_HEADER)
+#define MLK_FIPS202_HEADER_FILE "fips202/fips202.h"
+#else
+#define MLK_FIPS202_HEADER_FILE MLK_CONFIG_FIPS202_CUSTOM_HEADER
+#endif
+
+#if !defined(MLK_CONFIG_FIPS202X4_CUSTOM_HEADER)
+#define MLK_FIPS202X4_HEADER_FILE "fips202/fips202x4.h"
+#else
+#define MLK_FIPS202X4_HEADER_FILE MLK_CONFIG_FIPS202X4_CUSTOM_HEADER
+#endif
+
+/* Just in case we want to include mlkem_native.h, set the configuration
+ * for that header in accordance with the configuration used here. */
+
+/* Double-check that this is not conflicting with pre-existing definitions. */
+#if defined(MLK_CONFIG_API_PARAMETER_SET) ||    \
+    defined(MLK_CONFIG_API_NAMESPACE_PREFIX) || \
+    defined(MLK_CONFIG_API_NO_SUPERCOP) ||      \
+    defined(MLK_CONFIG_API_CONSTANTS_ONLY)
+#error Pre-existing MLK_CONFIG_API_XXX configuration is neither useful nor allowed during an mlkem-native build
+#endif /* MLK_CONFIG_API_PARAMETER_SET || MLK_CONFIG_API_NAMESPACE_PREFIX || \
+          MLK_CONFIG_API_NO_SUPERCOP || MLK_CONFIG_API_CONSTANTS_ONLY */
+
+#define MLK_CONFIG_API_PARAMETER_SET MLK_CONFIG_PARAMETER_SET
+#define MLK_CONFIG_API_NAMESPACE_PREFIX \
+  MLK_ADD_LEVEL(MLK_CONFIG_NAMESPACE_PREFIX)
+
+#endif /* !MLK_COMMON_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/compress.c b/crypto/fipsmodule/ml_kem/mlkem/compress.c
new file mode 100644
index 00000000000..2707ebcbbf8
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/compress.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "common.h"
+#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
+
+#include <stdint.h>
+#include <string.h>
+#include "cbmc.h"
+#include "compress.h"
+#include "debug.h"
+#include "verify.h"
+
+#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3)
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D4)
+/* Reference: `poly_compress()` in the reference implementation,
+ *            for ML-KEM-{512,768}.
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */
+MLK_INTERNAL_API
+void mlk_poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4],
+                          const mlk_poly *a)
+{
+  unsigned i;
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(invariant(i <= MLKEM_N / 8))
+  {
+    unsigned j;
+    uint8_t t[8] = {0};
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8)
+      invariant(array_bound(t, 0, j, 0, 16)))
+    {
+      t[j] = mlk_scalar_compress_d4(a->coeffs[8 * i + j]);
+    }
+
+    r[i * 4] = t[0] | (t[1] << 4);
+    r[i * 4 + 1] = t[2] | (t[3] << 4);
+    r[i * 4 + 2] = t[4] | (t[5] << 4);
+    r[i * 4 + 3] = t[6] | (t[7] << 4);
+  }
+}
+#else  /* !MLK_USE_NATIVE_POLY_COMPRESS_D4 */
+MLK_INTERNAL_API
+void mlk_poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4],
+                          const mlk_poly *a)
+{
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  mlk_poly_compress_d4_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D4 */
+
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D10)
+/* Reference: Embedded into `polyvec_compress()` in the
+ *            reference implementation, for ML-KEM-{512,768}.
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */
+MLK_INTERNAL_API
+void mlk_poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10],
+                           const mlk_poly *a)
+{
+  unsigned j;
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  for (j = 0; j < MLKEM_N / 4; j++)
+  __loop__(invariant(j <= MLKEM_N / 4))
+  {
+    unsigned k;
+    uint16_t t[4];
+    for (k = 0; k < 4; k++)
+    __loop__(
+      invariant(k <= 4)
+      invariant(forall(r, 0, k, t[r] < (1u << 10))))
+    {
+      t[k] = mlk_scalar_compress_d10(a->coeffs[4 * j + k]);
+    }
+
+    /*
+     * Make all implicit truncation explicit. No data is being
+     * truncated for the LHS's since each t[i] is 10-bit in size.
+     */
+    r[5 * j + 0] = (t[0] >> 0) & 0xFF;
+    r[5 * j + 1] = (t[0] >> 8) | ((t[1] << 2) & 0xFF);
+    r[5 * j + 2] = (t[1] >> 6) | ((t[2] << 4) & 0xFF);
+    r[5 * j + 3] = (t[2] >> 4) | ((t[3] << 6) & 0xFF);
+    r[5 * j + 4] = (t[3] >> 2);
+  }
+}
+#else  /* !MLK_USE_NATIVE_POLY_COMPRESS_D10 */
+MLK_INTERNAL_API
+void mlk_poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10],
+                           const mlk_poly *a)
+{
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  mlk_poly_compress_d10_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D10 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D4)
+/* Reference: `poly_decompress()` in the reference implementation,
+ *            for ML-KEM-{512,768}. */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d4(mlk_poly *r,
+                            const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 2)
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q)))
+  {
+    r->coeffs[2 * i + 0] = mlk_scalar_decompress_d4((a[i] >> 0) & 0xF);
+    r->coeffs[2 * i + 1] = mlk_scalar_decompress_d4((a[i] >> 4) & 0xF);
+  }
+
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D4 */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d4(mlk_poly *r,
+                            const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4])
+{
+  mlk_poly_decompress_d4_native(r->coeffs, a);
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D4 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D10)
+/* Reference: Embedded into `polyvec_decompress()` in the
+ *            reference implementation, for ML-KEM-{512,768}. */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d10(mlk_poly *r,
+                             const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10])
+{
+  unsigned j;
+  for (j = 0; j < MLKEM_N / 4; j++)
+  __loop__(
+    invariant(j <= MLKEM_N / 4)
+    invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q)))
+  {
+    unsigned k;
+    uint16_t t[4];
+    uint8_t const *base = &a[5 * j];
+
+    t[0] = 0x3FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8));
+    t[1] = 0x3FF & ((base[1] >> 2) | ((uint16_t)base[2] << 6));
+    t[2] = 0x3FF & ((base[2] >> 4) | ((uint16_t)base[3] << 4));
+    t[3] = 0x3FF & ((base[3] >> 6) | ((uint16_t)base[4] << 2));
+
+    for (k = 0; k < 4; k++)
+    __loop__(
+      invariant(k <= 4)
+      invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q)))
+    {
+      r->coeffs[4 * j + k] = mlk_scalar_decompress_d10(t[k]);
+    }
+  }
+
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D10 */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d10(mlk_poly *r,
+                             const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10])
+{
+  mlk_poly_decompress_d10_native(r->coeffs, a);
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D10 */
+#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */
+
+#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D5)
+/* Reference: `poly_compress()` in the reference implementation,
+ *            for ML-KEM-1024.
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */
+MLK_INTERNAL_API
+void mlk_poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5],
+                          const mlk_poly *a)
+{
+  unsigned i;
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(invariant(i <= MLKEM_N / 8))
+  {
+    unsigned j;
+    uint8_t t[8] = {0};
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8)
+      invariant(array_bound(t, 0, j, 0, 32)))
+    {
+      t[j] = mlk_scalar_compress_d5(a->coeffs[8 * i + j]);
+    }
+
+    /*
+     * Explicitly truncate to avoid warning about
+     * implicit truncation in CBMC, and use array indexing into
+     * r rather than pointer-arithmetic to simplify verification
+     */
+    r[i * 5] = 0xFF & ((t[0] >> 0) | (t[1] << 5));
+    r[i * 5 + 1] = 0xFF & ((t[1] >> 3) | (t[2] << 2) | (t[3] << 7));
+    r[i * 5 + 2] = 0xFF & ((t[3] >> 1) | (t[4] << 4));
+    r[i * 5 + 3] = 0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6));
+    r[i * 5 + 4] = 0xFF & ((t[6] >> 2) | (t[7] << 3));
+  }
+}
+#else  /* !MLK_USE_NATIVE_POLY_COMPRESS_D5 */
+MLK_INTERNAL_API
+void mlk_poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5],
+                          const mlk_poly *a)
+{
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  mlk_poly_compress_d5_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D5 */
+
+#if !defined(MLK_USE_NATIVE_POLY_COMPRESS_D11)
+/* Reference: Embedded into `polyvec_compress()` in the
+ *            reference implementation, for ML-KEM-1024.
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */
+MLK_INTERNAL_API
+void mlk_poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11],
+                           const mlk_poly *a)
+{
+  unsigned j;
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (j = 0; j < MLKEM_N / 8; j++)
+  __loop__(invariant(j <= MLKEM_N / 8))
+  {
+    unsigned k;
+    uint16_t t[8];
+    for (k = 0; k < 8; k++)
+    __loop__(
+      invariant(k <= 8)
+      invariant(forall(r, 0, k, t[r] < (1u << 11))))
+    {
+      t[k] = mlk_scalar_compress_d11(a->coeffs[8 * j + k]);
+    }
+
+    /*
+     * Make all implicit truncation explicit. No data is being
+     * truncated for the LHS's since each t[i] is 11-bit in size.
+     */
+    r[11 * j + 0] = (t[0] >> 0) & 0xFF;
+    r[11 * j + 1] = (t[0] >> 8) | ((t[1] << 3) & 0xFF);
+    r[11 * j + 2] = (t[1] >> 5) | ((t[2] << 6) & 0xFF);
+    r[11 * j + 3] = (t[2] >> 2) & 0xFF;
+    r[11 * j + 4] = (t[2] >> 10) | ((t[3] << 1) & 0xFF);
+    r[11 * j + 5] = (t[3] >> 7) | ((t[4] << 4) & 0xFF);
+    r[11 * j + 6] = (t[4] >> 4) | ((t[5] << 7) & 0xFF);
+    r[11 * j + 7] = (t[5] >> 1) & 0xFF;
+    r[11 * j + 8] = (t[5] >> 9) | ((t[6] << 2) & 0xFF);
+    r[11 * j + 9] = (t[6] >> 6) | ((t[7] << 5) & 0xFF);
+    r[11 * j + 10] = (t[7] >> 3);
+  }
+}
+#else  /* !MLK_USE_NATIVE_POLY_COMPRESS_D11 */
+MLK_INTERNAL_API
+void mlk_poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11],
+                           const mlk_poly *a)
+{
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  mlk_poly_compress_d11_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D11 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D5)
+/* Reference: `poly_decompress()` in the reference implementation,
+ *            for ML-KEM-1024. */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d5(mlk_poly *r,
+                            const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 8)
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q)))
+  {
+    unsigned j;
+    uint8_t t[8];
+    const unsigned offset = i * 5;
+    /*
+     * Explicitly truncate to avoid warning about
+     * implicit truncation in CBMC and unwind loop for ease
+     * of proof.
+     */
+
+    /*
+     * Decompress 5 8-bit bytes (so 40 bits) into
+     * 8 5-bit values stored in t[]
+     */
+    t[0] = 0x1F & (a[offset + 0] >> 0);
+    t[1] = 0x1F & ((a[offset + 0] >> 5) | (a[offset + 1] << 3));
+    t[2] = 0x1F & (a[offset + 1] >> 2);
+    t[3] = 0x1F & ((a[offset + 1] >> 7) | (a[offset + 2] << 1));
+    t[4] = 0x1F & ((a[offset + 2] >> 4) | (a[offset + 3] << 4));
+    t[5] = 0x1F & (a[offset + 3] >> 1);
+    t[6] = 0x1F & ((a[offset + 3] >> 6) | (a[offset + 4] << 2));
+    t[7] = 0x1F & (a[offset + 4] >> 3);
+
+    /* and copy to the correct slice in r[] */
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(j <= 8 && i <= MLKEM_N / 8)
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q)))
+    {
+      r->coeffs[8 * i + j] = mlk_scalar_decompress_d5(t[j]);
+    }
+  }
+
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D5 */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d5(mlk_poly *r,
+                            const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5])
+{
+  mlk_poly_decompress_d5_native(r->coeffs, a);
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D5 */
+
+#if !defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D11)
+/* Reference: Embedded into `polyvec_decompress()` in the
+ *            reference implementation, for ML-KEM-1024. */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d11(mlk_poly *r,
+                             const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11])
+{
+  unsigned j;
+  for (j = 0; j < MLKEM_N / 8; j++)
+  __loop__(
+    invariant(j <= MLKEM_N / 8)
+    invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q)))
+  {
+    unsigned k;
+    uint16_t t[8];
+    uint8_t const *base = &a[11 * j];
+    t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8));
+    t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5));
+    t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) |
+                    ((uint16_t)base[4] << 10));
+    t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7));
+    t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4));
+    t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) |
+                    ((uint16_t)base[8] << 9));
+    t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6));
+    t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3));
+
+    for (k = 0; k < 8; k++)
+    __loop__(
+      invariant(k <= 8)
+      invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q)))
+    {
+      r->coeffs[8 * j + k] = mlk_scalar_decompress_d11(t[k]);
+    }
+  }
+
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* !MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */
+MLK_INTERNAL_API
+void mlk_poly_decompress_d11(mlk_poly *r,
+                             const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11])
+{
+  mlk_poly_decompress_d11_native(r->coeffs, a);
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */
+
+#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
+
+#if !defined(MLK_USE_NATIVE_POLY_TOBYTES)
+/* Reference: `poly_tobytes()` in the reference implementation.
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */
+MLK_INTERNAL_API
+void mlk_poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const mlk_poly *a)
+{
+  unsigned i;
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(invariant(i <= MLKEM_N / 2))
+  {
+    const uint16_t t0 = a->coeffs[2 * i];
+    const uint16_t t1 = a->coeffs[2 * i + 1];
+    /*
+     * t0 and t1 are both < MLKEM_Q, so contain at most 12 bits each of
+     * significant data, so these can be packed into 24 bits or exactly
+     * 3 bytes, as follows.
+     */
+
+    /* Least significant bits 0 - 7 of t0. */
+    r[3 * i + 0] = t0 & 0xFF;
+
+    /*
+     * Most significant bits 8 - 11 of t0 become the least significant
+     * nibble of the second byte. The least significant 4 bits
+     * of t1 become the upper nibble of the second byte.
+     */
+    r[3 * i + 1] = (t0 >> 8) | ((t1 << 4) & 0xF0);
+
+    /* Bits 4 - 11 of t1 become the third byte. */
+    r[3 * i + 2] = t1 >> 4;
+  }
+}
+#else  /* !MLK_USE_NATIVE_POLY_TOBYTES */
+MLK_INTERNAL_API
+void mlk_poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const mlk_poly *a)
+{
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+  mlk_poly_tobytes_native(r, a->coeffs);
+}
+#endif /* MLK_USE_NATIVE_POLY_TOBYTES */
+
+#if !defined(MLK_USE_NATIVE_POLY_FROMBYTES)
+/* Reference: `poly_frombytes()` in the reference implementation. */
+MLK_INTERNAL_API
+void mlk_poly_frombytes(mlk_poly *r, const uint8_t a[MLKEM_POLYBYTES])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 2)
+    invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_UINT12_LIMIT)))
+  {
+    const uint8_t t0 = a[3 * i + 0];
+    const uint8_t t1 = a[3 * i + 1];
+    const uint8_t t2 = a[3 * i + 2];
+    r->coeffs[2 * i + 0] = t0 | ((t1 << 8) & 0xFFF);
+    r->coeffs[2 * i + 1] = (t1 >> 4) | (t2 << 4);
+  }
+
+  /* Note that the coefficients are not canonical */
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+}
+#else  /* !MLK_USE_NATIVE_POLY_FROMBYTES */
+MLK_INTERNAL_API
+void mlk_poly_frombytes(mlk_poly *r, const uint8_t a[MLKEM_POLYBYTES])
+{
+  mlk_poly_frombytes_native(r->coeffs, a);
+}
+#endif /* MLK_USE_NATIVE_POLY_FROMBYTES */
+
+/* Reference: `poly_frommsg()` in the reference implementation.
+ *            - We use a value barrier around the bit-selection mask to
+ *              reduce the risk of compiler-introduced branches.
+ *              The reference implementation contains the expression
+ *              `(msg[i] >> j) & 1` which the compiler can reason must
+ *              be either 0 or 1. */
+MLK_INTERNAL_API
+void mlk_poly_frommsg(mlk_poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
+{
+  unsigned i;
+#if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8)
+#error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!"
+#endif
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 8)
+    invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q)))
+  {
+    unsigned j;
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <  MLKEM_N / 8 && j <= 8)
+      invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q)))
+    {
+      /* mlk_ct_sel_int16(MLKEM_Q_HALF, 0, b) is `Decompress_1(b != 0)`
+       * as per [FIPS 203, Eq (4.8)]. */
+
+      /* Prevent the compiler from recognizing this as a bit selection */
+      uint8_t mask = mlk_value_barrier_u8(1u << j);
+      r->coeffs[8 * i + j] = mlk_ct_sel_int16(MLKEM_Q_HALF, 0, msg[i] & mask);
+    }
+  }
+  mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q);
+}
+
+/* Reference: `poly_tomsg()` in the reference implementation.
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1).
+ */
+MLK_INTERNAL_API
+void mlk_poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const mlk_poly *a)
+{
+  unsigned i;
+  mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(invariant(i <= MLKEM_N / 8))
+  {
+    unsigned j;
+    msg[i] = 0;
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8))
+    {
+      uint32_t t = mlk_scalar_compress_d1(a->coeffs[8 * i + j]);
+      msg[i] |= t << j;
+    }
+  }
+}
+
+#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */
+
+MLK_EMPTY_CU(compress)
+
+#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/compress.h b/crypto/fipsmodule/ml_kem/mlkem/compress.h
new file mode 100644
index 00000000000..0cbafa7e055
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/compress.h
@@ -0,0 +1,651 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_COMPRESS_H
+#define MLK_COMPRESS_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "debug.h"
+#include "poly.h"
+#include "verify.h"
+
+/************************************************************
+ * Name: mlk_scalar_compress_d1
+ *
+ * Description: Computes round(u * 2 / q)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_1 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+
+/* Reference: Embedded in poly_tomsg() in the reference implementation. */
+static MLK_INLINE uint32_t mlk_scalar_compress_d1(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < 2)
+  ensures(return_value == (((uint32_t)u * 2 + MLKEM_Q / 2) / MLKEM_Q) % 2)  )
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 2 / MLKEM_Q)
+   *   = round(u * 2 * (2^31 / MLKEM_Q) / 2^31)
+   *  ~= round(u * 2 * round(2^31 / MLKEM_Q) / 2^31)
+   * ```
+   */
+  /* check-magic: 1290168 == 2*round(2^31 / MLKEM_Q) */
+  uint32_t d0 = (uint32_t)u * 1290168;
+  return (d0 + (1u << 30)) >> 31;
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: mlk_scalar_compress_d4
+ *
+ * Description: Computes round(u * 16 / q) % 16
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_4 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+
+/* Reference: Embedded into `poly_compress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint32_t mlk_scalar_compress_d4(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < 16)
+  ensures(return_value == (((uint32_t)u * 16 + MLKEM_Q / 2) / MLKEM_Q) % 16))
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 16 / MLKEM_Q)
+   *   = round(u * 16 * (2^28 / MLKEM_Q) / 2^28)
+   *  ~= round(u * 16 * round(2^28 / MLKEM_Q) / 2^28)
+   * ```
+   */
+  /* check-magic: 1290160 == 16 * round(2^28 / MLKEM_Q) */
+  uint32_t d0 = (uint32_t)u * 1290160;
+  return (d0 + (1u << 27)) >> 28; /* round(d0/2^28) */
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: mlk_scalar_decompress_d4
+ *
+ * Description: Computes round(u * q / 16)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 16
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_4 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+
+/* Reference: Embedded into `poly_decompress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint16_t mlk_scalar_decompress_d4(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 16)
+  ensures(return_value <= (MLKEM_Q - 1))
+) { return ((u * MLKEM_Q) + 8) >> 4; }
+
+/************************************************************
+ * Name: mlk_scalar_compress_d5
+ *
+ * Description: Computes round(u * 32 / q) % 32
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_5 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+
+/* Reference: Embedded into `poly_compress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint32_t mlk_scalar_compress_d5(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < 32)
+  ensures(return_value == (((uint32_t)u * 32 + MLKEM_Q / 2) / MLKEM_Q) % 32)  )
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 32 / MLKEM_Q)
+   *   = round(u * 32 * (2^27 / MLKEM_Q) / 2^27)
+   *  ~= round(u * 32 * round(2^27 / MLKEM_Q) / 2^27)
+   * ```
+   */
+  /* check-magic: 1290176 == 2^5 * round(2^27 / MLKEM_Q) */
+  uint32_t d0 = (uint32_t)u * 1290176;
+  return (d0 + (1u << 26)) >> 27; /* round(d0/2^27) */
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: mlk_scalar_decompress_d5
+ *
+ * Description: Computes round(u * q / 32)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 32
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_5 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+
+/* Reference: Embedded into `poly_decompress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint16_t mlk_scalar_decompress_d5(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 32)
+  ensures(return_value <= MLKEM_Q - 1)
+) { return ((u * MLKEM_Q) + 16) >> 5; }
+
+/************************************************************
+ * Name: mlk_scalar_compress_d10
+ *
+ * Description: Computes round(u * 2**10 / q) % 2**10
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_10 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+
+/* Reference: Embedded into `polyvec_compress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint32_t mlk_scalar_compress_d10(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < (1u << 10))
+  ensures(return_value == (((uint32_t)u * (1u << 10) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 10)))
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 1024 / MLKEM_Q)
+   *   = round(u * 1024 * (2^33 / MLKEM_Q) / 2^33)
+   *  ~= round(u * 1024 * round(2^33 / MLKEM_Q) / 2^33)
+   * ```
+   */
+  /* check-magic: 2642263040 == 2^10 * round(2^33 / MLKEM_Q) */
+  uint64_t d0 = (uint64_t)u * 2642263040;
+  d0 = (d0 + ((uint64_t)1u << 32)) >> 33; /* round(d0/2^33) */
+  return (d0 & 0x3FF);
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: mlk_scalar_decompress_d10
+ *
+ * Description: Computes round(u * q / 1024)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 1024
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_10 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+
+/* Reference: Embedded into `polyvec_decompress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint16_t mlk_scalar_decompress_d10(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 1024)
+  ensures(return_value <= (MLKEM_Q - 1))
+) { return ((u * MLKEM_Q) + 512) >> 10; }
+
+/************************************************************
+ * Name: mlk_scalar_compress_d11
+ *
+ * Description: Computes round(u * 2**11 / q) % 2**11
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo q
+ *                 to be compressed.
+ *
+ * Specification: Compress_11 from [FIPS 203, Eq (4.7)].
+ *
+ ************************************************************/
+/*
+ * The multiplication in this routine will exceed UINT32_MAX
+ * and wrap around for large values of u. This is expected and required.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+
+/* Reference: Embedded into `polyvec_compress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint32_t mlk_scalar_compress_d11(uint16_t u)
+__contract__(
+  requires(u <= MLKEM_Q - 1)
+  ensures(return_value < (1u << 11))
+  ensures(return_value == (((uint32_t)u * (1u << 11) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 11)))
+{
+  /* Compute as follows:
+   * ```
+   * round(u * 2048 / MLKEM_Q)
+   *   = round(u * 2048 * (2^33 / MLKEM_Q) / 2^33)
+   *  ~= round(u * 2048 * round(2^33 / MLKEM_Q) / 2^33)
+   * ```
+   */
+  /* check-magic: 5284526080 == 2^11 * round(2^33 / MLKEM_Q) */
+  uint64_t d0 = (uint64_t)u * 5284526080;
+  d0 = (d0 + ((uint64_t)1u << 32)) >> 33; /* round(d0/2^33) */
+  return (d0 & 0x7FF);
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/************************************************************
+ * Name: mlk_scalar_decompress_d11
+ *
+ * Description: Computes round(u * q / 2048)
+ *
+ * Arguments: - u: Unsigned canonical modulus modulo 2048
+ *                 to be decompressed.
+ *
+ * Specification: Decompress_11 from [FIPS 203, Eq (4.8)].
+ *
+ ************************************************************/
+
+/* Reference: Embedded into `polyvec_decompress()` in the
+ *            reference implementation. */
+static MLK_INLINE uint16_t mlk_scalar_decompress_d11(uint32_t u)
+__contract__(
+  requires(0 <= u && u < 2048)
+  ensures(return_value <= (MLKEM_Q - 1))
+) { return ((u * MLKEM_Q) + 1024) >> 11; }
+
+#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3)
+#define mlk_poly_compress_d4 MLK_NAMESPACE(poly_compress_d4)
+/*************************************************
+ * Name:        mlk_poly_compress_d4
+ *
+ * Description: Compression (4 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes)
+ *              - const mlk_poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_4 (Compress_4 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_v} (Compress_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L23],
+ *                  where `d_v=4` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4],
+                          const mlk_poly *a);
+
+#define mlk_poly_compress_d10 MLK_NAMESPACE(poly_compress_d10)
+/*************************************************
+ * Name:        mlk_poly_compress_d10
+ *
+ * Description: Compression (10 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes)
+ *              - const mlk_poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_10 (Compress_10 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_u} (Compress_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22],
+ *                  where `d_u=10` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10],
+                           const mlk_poly *a);
+
+#define mlk_poly_decompress_d4 MLK_NAMESPACE(poly_decompress_d4)
+/*************************************************
+ * Name:        mlk_poly_decompress_d4
+ *
+ * Description: De-serialization and subsequent decompression (dv bits) of a
+ *              polynomial; approximate inverse of poly_compress
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_4 (ByteDecode_4 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_v} (ByteDecode_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L4],
+ *                  where `d_v=4` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_decompress_d4(mlk_poly *r,
+                            const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]);
+
+#define mlk_poly_decompress_d10 MLK_NAMESPACE(poly_decompress_d10)
+/*************************************************
+ * Name:        mlk_poly_decompress_d10
+ *
+ * Description: De-serialization and subsequent decompression (10 bits) of a
+ *              polynomial; approximate inverse of mlk_poly_compress_d10
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_10 (ByteDecode_10 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_u} (ByteDecode_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3],
+ *                  where `d_u=10` for ML-KEM-{512,768} [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_decompress_d10(mlk_poly *r,
+                             const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]);
+#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */
+
+#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4
+#define mlk_poly_compress_d5 MLK_NAMESPACE(poly_compress_d5)
+/*************************************************
+ * Name:        mlk_poly_compress_d5
+ *
+ * Description: Compression (5 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes)
+ *              - const mlk_poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_5 (Compress_5 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_v} (Compress_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L23],
+ *                  where `d_v=5` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5],
+                          const mlk_poly *a);
+
+#define mlk_poly_compress_d11 MLK_NAMESPACE(poly_compress_d11)
+/*************************************************
+ * Name:        mlk_poly_compress_d11
+ *
+ * Description: Compression (11 bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes)
+ *              - const mlk_poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: `ByteEncode_11 (Compress_11 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_{d_u} (Compress_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22],
+ *                  where `d_u=11` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11],
+                           const mlk_poly *a);
+
+#define mlk_poly_decompress_d5 MLK_NAMESPACE(poly_decompress_d5)
+/*************************************************
+ * Name:        mlk_poly_decompress_d5
+ *
+ * Description: De-serialization and subsequent decompression (dv bits) of a
+ *              polynomial; approximate inverse of poly_compress
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_5 (ByteDecode_5 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_v} (ByteDecode_{d_v} (v))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L4],
+ *                  where `d_v=5` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_decompress_d5(mlk_poly *r,
+                            const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]);
+
+#define mlk_poly_decompress_d11 MLK_NAMESPACE(poly_decompress_d11)
+/*************************************************
+ * Name:        mlk_poly_decompress_d11
+ *
+ * Description: De-serialization and subsequent decompression (11 bits) of a
+ *              polynomial; approximate inverse of mlk_poly_compress_d11
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_11 (ByteDecode_11 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_{d_u} (ByteDecode_{d_u} (u))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3],
+ *                  where `d_u=11` for ML-KEM-1024 [FIPS 203, Table 2].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_decompress_d11(mlk_poly *r,
+                             const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]);
+#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */
+
+#define mlk_poly_tobytes MLK_NAMESPACE(poly_tobytes)
+/*************************************************
+ * Name:        mlk_poly_tobytes
+ *
+ * Description: Serialization of a polynomial.
+ *              Signed coefficients are converted to
+ *              unsigned form before serialization.
+ *
+ * Arguments:   INPUT:
+ *              - a: const pointer to input polynomial,
+ *                with each coefficient in the range [0,1,..,Q-1]
+ *              OUTPUT
+ *              - r: pointer to output byte array
+ *                   (of MLKEM_POLYBYTES bytes)
+ *
+ * Specification: Implements ByteEncode_12 [FIPS 203, Algorithm 5].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const mlk_poly *a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYBYTES))
+  requires(memory_no_alias(a, sizeof(mlk_poly)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(object_whole(r))
+);
+
+
+#define mlk_poly_frombytes MLK_NAMESPACE(poly_frombytes)
+/*************************************************
+ * Name:        mlk_poly_frombytes
+ *
+ * Description: De-serialization of a polynomial.
+ *
+ * Arguments:   INPUT
+ *              - a: pointer to input byte array
+ *                   (of MLKEM_POLYBYTES bytes)
+ *              OUTPUT
+ *              - r: pointer to output polynomial, with
+ *                   each coefficient unsigned and in the range
+ *                   0 .. 4095
+ *
+ * Specification: Implements ByteDecode_12 [FIPS 203, Algorithm 6].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_frombytes(mlk_poly *r, const uint8_t a[MLKEM_POLYBYTES])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYBYTES))
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT))
+);
+
+
+#define mlk_poly_frommsg MLK_NAMESPACE(poly_frommsg)
+/*************************************************
+ * Name:        mlk_poly_frommsg
+ *
+ * Description: Convert 32-byte message to polynomial
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *msg: pointer to input message
+ *
+ * Specification: Implements `Decompress_1 (ByteDecode_1 (a))`:
+ *                - ByteDecode_d: [FIPS 203, Algorithm 6],
+ *                - Decompress_d: [FIPS 203, Eq (4.8)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `Decompress_1 (ByteDecode_1 (w))` appears in
+ *                  [FIPS 203, Algorithm 15 (K-PKE.Encrypt), L20].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_frommsg(mlk_poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES])
+__contract__(
+  requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  assigns(object_whole(r))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+);
+
+#define mlk_poly_tomsg MLK_NAMESPACE(poly_tomsg)
+/*************************************************
+ * Name:        mlk_poly_tomsg
+ *
+ * Description: Convert polynomial to 32-byte message
+ *
+ * Arguments:   - uint8_t *msg: pointer to output message
+ *              - const mlk_poly *r: pointer to input polynomial
+ *                Coefficients must be unsigned canonical
+ *
+ * Specification: Implements `ByteEncode_1 (Compress_1 (a))`:
+ *                - ByteEncode_d: [FIPS 203, Algorithm 5],
+ *                - Compress_d: [FIPS 203, Eq (4.7)]
+ *                  Extended to vectors as per
+ *                  [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                - `ByteEncode_1 (Compress_1 (w))` appears in
+ *                  [FIPS 203, Algorithm 14 (K-PKE.Decrypt), L7].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const mlk_poly *r)
+__contract__(
+  requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(object_whole(msg))
+);
+
+#endif /* !MLK_COMPRESS_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/debug.c b/crypto/fipsmodule/ml_kem/mlkem/debug.c
new file mode 100644
index 00000000000..5a644a33c10
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/debug.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */
+
+#include "common.h"
+
+#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && defined(MLKEM_DEBUG)
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "debug.h"
+
+#define MLK_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] "
+
+void mlk_debug_check_assert(const char *file, int line, const int val)
+{
+  if (val == 0)
+  {
+    fprintf(stderr, MLK_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n",
+            file, line, val);
+    exit(1);
+  }
+}
+
+void mlk_debug_check_bounds(const char *file, int line, const int16_t *ptr,
+                            unsigned len, int lower_bound_exclusive,
+                            int upper_bound_exclusive)
+{
+  int err = 0;
+  unsigned i;
+  for (i = 0; i < len; i++)
+  {
+    int16_t val = ptr[i];
+    if (!(val > lower_bound_exclusive && val < upper_bound_exclusive))
+    {
+      fprintf(
+          stderr,
+          MLK_DEBUG_ERROR_HEADER
+          "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n",
+          file, line, i, (int)val, lower_bound_exclusive,
+          upper_bound_exclusive);
+      err = 1;
+    }
+  }
+
+  if (err == 1)
+  {
+    exit(1);
+  }
+}
+
+#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED && MLKEM_DEBUG */
+
+MLK_EMPTY_CU(debug)
+
+#endif /* !(!MLK_CONFIG_MULTILEVEL_NO_SHARED && MLKEM_DEBUG) */
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef MLK_DEBUG_ERROR_HEADER
diff --git a/crypto/fipsmodule/ml_kem/mlkem/debug.h b/crypto/fipsmodule/ml_kem/mlkem/debug.h
new file mode 100644
index 00000000000..bc03c230b7c
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/debug.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_DEBUG_H
+#define MLK_DEBUG_H
+#include "common.h"
+
+#if defined(MLKEM_DEBUG)
+#include <stdint.h>
+
+/*************************************************
+ * Name:        mlk_assert
+ *
+ * Description: Check debug assertion
+ *
+ *              Prints an error message to stderr and calls
+ *              exit(1) if not.
+ *
+ * Arguments:   - file: filename
+ *              - line: line number
+ *              - val: Value asserted to be non-zero
+ **************************************************/
+#define mlk_debug_check_assert MLK_NAMESPACE(mlkem_debug_assert)
+void mlk_debug_check_assert(const char *file, int line, const int val);
+
+/*************************************************
+ * Name:        mlk_debug_check_bounds
+ *
+ * Description: Check whether values in an array of int16_t
+ *              are within specified bounds.
+ *
+ *              Prints an error message to stderr and calls
+ *              exit(1) if not.
+ *
+ * Arguments:   - file: filename
+ *              - line: line number
+ *              - ptr: Base of array to be checked
+ *              - len: Number of int16_t in ptr
+ *              - lower_bound_exclusive: Exclusive lower bound
+ *              - upper_bound_exclusive: Exclusive upper bound
+ **************************************************/
+#define mlk_debug_check_bounds MLK_NAMESPACE(mlkem_debug_check_bounds)
+void mlk_debug_check_bounds(const char *file, int line, const int16_t *ptr,
+                            unsigned len, int lower_bound_exclusive,
+                            int upper_bound_exclusive);
+
+/* Check assertion, calling exit() upon failure
+ *
+ * val: Value that's asserted to be non-zero
+ */
+#define mlk_assert(val) mlk_debug_check_assert(__FILE__, __LINE__, (val))
+
+/* Check bounds in array of int16_t's
+ * ptr: Base of int16_t array; will be explicitly cast to int16_t*,
+ *      so you may pass a byte-compatible type such as mlk_poly or mlk_polyvec.
+ * len: Number of int16_t in array
+ * value_lb: Inclusive lower value bound
+ * value_ub: Exclusive upper value bound */
+#define mlk_assert_bound(ptr, len, value_lb, value_ub)                      \
+  mlk_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \
+                         (value_lb) - 1, (value_ub))
+
+/* Check absolute bounds in array of int16_t's
+ * ptr: Base of array, expression of type int16_t*
+ * len: Number of int16_t in array
+ * value_abs_bd: Exclusive absolute upper bound */
+#define mlk_assert_abs_bound(ptr, len, value_abs_bd) \
+  mlk_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd))
+
+/* Version of bounds assertions for 2-dimensional arrays */
+#define mlk_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \
+  mlk_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub))
+
+#define mlk_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \
+  mlk_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd))
+
+/* When running CBMC, convert debug assertions into proof obligations */
+#elif defined(CBMC)
+#include "cbmc.h"
+
+#define mlk_assert(val) cassert(val)
+
+#define mlk_assert_bound(ptr, len, value_lb, value_ub) \
+  cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub)))
+
+#define mlk_assert_abs_bound(ptr, len, value_abs_bd) \
+  cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd)))
+
+/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't
+ * just use a single flattened array_bound(...) here. */
+#define mlk_assert_bound_2d(ptr, M, N, value_lb, value_ub)             \
+  cassert(forall(kN, 0, (M),                                           \
+                 array_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \
+                             (value_lb), (value_ub))))
+
+#define mlk_assert_abs_bound_2d(ptr, M, N, value_abs_bd)                   \
+  cassert(forall(kN, 0, (M),                                               \
+                 array_abs_bound(&((int16_t(*)[(N)])(ptr))[kN][0], 0, (N), \
+                                 (value_abs_bd))))
+
+#else /* !MLKEM_DEBUG && CBMC */
+
+#define mlk_assert(val) \
+  do                    \
+  {                     \
+  } while (0)
+#define mlk_assert_bound(ptr, len, value_lb, value_ub) \
+  do                                                   \
+  {                                                    \
+  } while (0)
+#define mlk_assert_abs_bound(ptr, len, value_abs_bd) \
+  do                                                 \
+  {                                                  \
+  } while (0)
+
+#define mlk_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \
+  do                                                             \
+  {                                                              \
+  } while (0)
+
+#define mlk_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \
+  do                                                           \
+  {                                                            \
+  } while (0)
+
+
+#endif /* !MLKEM_DEBUG && !CBMC */
+#endif /* !MLK_DEBUG_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/indcpa.c b/crypto/fipsmodule/ml_kem/mlkem/indcpa.c
new file mode 100644
index 00000000000..87bf550d842
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/indcpa.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "cbmc.h"
+#include "debug.h"
+#include "indcpa.h"
+#include "poly.h"
+#include "poly_k.h"
+#include "randombytes.h"
+#include "sampling.h"
+#include "symmetric.h"
+
+/* Level namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define mlk_pack_pk MLK_ADD_LEVEL(mlk_pack_pk)
+#define mlk_unpack_pk MLK_ADD_LEVEL(mlk_unpack_pk)
+#define mlk_pack_sk MLK_ADD_LEVEL(mlk_pack_sk)
+#define mlk_unpack_sk MLK_ADD_LEVEL(mlk_unpack_sk)
+#define mlk_pack_ciphertext MLK_ADD_LEVEL(mlk_pack_ciphertext)
+#define mlk_unpack_ciphertext MLK_ADD_LEVEL(mlk_unpack_ciphertext)
+#define mlk_matvec_mul MLK_ADD_LEVEL(mlk_matvec_mul)
+/* End of level namespacing */
+
+/*************************************************
+ * Name:        mlk_pack_pk
+ *
+ * Description: Serialize the public key as concatenation of the
+ *              serialized vector of polynomials pk
+ *              and the public seed used to generate the matrix A.
+ *
+ * Arguments:   uint8_t *r: pointer to the output serialized public key
+ *              mlk_polyvec pk: pointer to the input public-key mlk_polyvec.
+ *                Must have coefficients within [0,..,q-1].
+ *              const uint8_t *seed: pointer to the input public seed
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen), L19]
+ *
+ **************************************************/
+static void mlk_pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], mlk_polyvec pk,
+                        const uint8_t seed[MLKEM_SYMBYTES])
+{
+  mlk_assert_bound_2d(pk, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+  mlk_polyvec_tobytes(r, pk);
+  memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES);
+}
+
+/*************************************************
+ * Name:        mlk_unpack_pk
+ *
+ * Description: De-serialize public key from a byte array;
+ *              approximate inverse of mlk_pack_pk
+ *
+ * Arguments:   - mlk_polyvec pk: pointer to output public-key polynomial
+ *                vector Coefficients will be normalized to [0,..,q-1].
+ *              - uint8_t *seed: pointer to output seed to generate matrix A
+ *              - const uint8_t *packedpk: pointer to input serialized public
+ *                  key.
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L2-3]
+ *
+ **************************************************/
+static void mlk_unpack_pk(mlk_polyvec pk, uint8_t seed[MLKEM_SYMBYTES],
+                          const uint8_t packedpk[MLKEM_INDCPA_PUBLICKEYBYTES])
+{
+  mlk_polyvec_frombytes(pk, packedpk);
+  memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES);
+
+  /* NOTE: If a modulus check was conducted on the PK, we know at this
+   * point that the coefficients of `pk` are unsigned canonical. The
+   * specifications and proofs, however, do _not_ assume this, and instead
+   * work with the easily provable bound by MLKEM_UINT12_LIMIT. */
+}
+
+/*************************************************
+ * Name:        mlk_pack_sk
+ *
+ * Description: Serialize the secret key
+ *
+ * Arguments:   - uint8_t *r: pointer to output serialized secret key
+ *              - mlk_polyvec sk: pointer to input vector of polynomials
+ *                (secret key)
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen), L20]
+ *
+ **************************************************/
+static void mlk_pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], mlk_polyvec sk)
+{
+  mlk_assert_bound_2d(sk, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+  mlk_polyvec_tobytes(r, sk);
+}
+
+/*************************************************
+ * Name:        mlk_unpack_sk
+ *
+ * Description: De-serialize the secret key; inverse of mlk_pack_sk
+ *
+ * Arguments:   - mlk_polyvec sk: pointer to output vector of polynomials
+ *                (secret key)
+ *              - const uint8_t *packedsk: pointer to input serialized secret
+ *                key
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L5]
+ *
+ **************************************************/
+static void mlk_unpack_sk(mlk_polyvec sk,
+                          const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES])
+{
+  mlk_polyvec_frombytes(sk, packedsk);
+}
+
+/*************************************************
+ * Name:        mlk_pack_ciphertext
+ *
+ * Description: Serialize the ciphertext as concatenation of the
+ *              compressed and serialized vector of polynomials b
+ *              and the compressed and serialized polynomial v
+ *
+ * Arguments:   uint8_t *r: pointer to the output serialized ciphertext
+ *              mlk_poly *pk: pointer to the input vector of polynomials b
+ *              mlk_poly *v: pointer to the input polynomial v
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22-23]
+ *
+ **************************************************/
+static void mlk_pack_ciphertext(uint8_t r[MLKEM_INDCPA_BYTES], mlk_polyvec b,
+                                mlk_poly *v)
+{
+  mlk_polyvec_compress_du(r, b);
+  mlk_poly_compress_dv(r + MLKEM_POLYVECCOMPRESSEDBYTES_DU, v);
+}
+
+/*************************************************
+ * Name:        mlk_unpack_ciphertext
+ *
+ * Description: De-serialize and decompress ciphertext from a byte array;
+ *              approximate inverse of mlk_pack_ciphertext
+ *
+ * Arguments:   - mlk_polyvec b: pointer to the output vector of polynomials b
+ *              - mlk_poly *v: pointer to the output polynomial v
+ *              - const uint8_t *c: pointer to the input serialized ciphertext
+ *
+ * Specification:
+ * Implements [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L1-4]
+ *
+ **************************************************/
+static void mlk_unpack_ciphertext(mlk_polyvec b, mlk_poly *v,
+                                  const uint8_t c[MLKEM_INDCPA_BYTES])
+{
+  mlk_polyvec_decompress_du(b, c);
+  mlk_poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU);
+}
+
+#if !defined(MLK_USE_NATIVE_NTT_CUSTOM_ORDER)
+/* This namespacing is not done at the top to avoid a naming conflict
+ * with native backends, which are currently not yet namespaced. */
+#define mlk_poly_permute_bitrev_to_custom \
+  MLK_ADD_LEVEL(mlk_poly_permute_bitrev_to_custom)
+
+static MLK_INLINE void mlk_poly_permute_bitrev_to_custom(int16_t data[MLKEM_N])
+__contract__(
+  /* We don't specify that this should be a permutation, but only
+   * that it does not change the bound established at the end of mlk_gen_matrix. */
+  requires(memory_no_alias(data, sizeof(int16_t) * MLKEM_N))
+  requires(array_bound(data, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(memory_slice(data, sizeof(mlk_poly)))
+  ensures(array_bound(data, 0, MLKEM_N, 0, MLKEM_Q))) { ((void)data); }
+#endif /* !MLK_USE_NATIVE_NTT_CUSTOM_ORDER */
+
+/* Reference: `gen_matrix()` in the reference implementation.
+ *            - We use a special subroutine to generate 4 polynomials
+ *              at a time, to be able to leverage batched Keccak-f1600
+ *              implementations. The reference implementation generates
+ *              one matrix entry a time.
+ *
+ * Not static for benchmarking */
+MLK_INTERNAL_API
+void mlk_gen_matrix(mlk_polymat a, const uint8_t seed[MLKEM_SYMBYTES],
+                    int transposed)
+{
+  unsigned i, j;
+  /*
+   * We generate four separate seed arrays rather than a single one to work
+   * around limitations in CBMC function contracts dealing with disjoint slices
+   * of the same parent object.
+   */
+
+  MLK_ALIGN uint8_t seed_ext[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)];
+
+  for (j = 0; j < 4; j++)
+  {
+    memcpy(seed_ext[j], seed, MLKEM_SYMBYTES);
+  }
+
+  /* Sample 4 matrix entries a time. */
+  for (i = 0; i < (MLKEM_K * MLKEM_K / 4) * 4; i += 4)
+  {
+    uint8_t x, y;
+
+    for (j = 0; j < 4; j++)
+    {
+      x = (i + j) / MLKEM_K;
+      y = (i + j) % MLKEM_K;
+      if (transposed)
+      {
+        seed_ext[j][MLKEM_SYMBYTES + 0] = x;
+        seed_ext[j][MLKEM_SYMBYTES + 1] = y;
+      }
+      else
+      {
+        seed_ext[j][MLKEM_SYMBYTES + 0] = y;
+        seed_ext[j][MLKEM_SYMBYTES + 1] = x;
+      }
+    }
+
+    /*
+     * This call writes across mlk_polyvec boundaries for K=2 and K=3.
+     * This is intentional and safe.
+     */
+    mlk_poly_rej_uniform_x4(&a[i], seed_ext);
+  }
+
+  /* For MLKEM_K == 3, sample the last entry individually. */
+  if (i < MLKEM_K * MLKEM_K)
+  {
+    uint8_t x, y;
+    x = i / MLKEM_K;
+    y = i % MLKEM_K;
+
+    if (transposed)
+    {
+      seed_ext[0][MLKEM_SYMBYTES + 0] = x;
+      seed_ext[0][MLKEM_SYMBYTES + 1] = y;
+    }
+    else
+    {
+      seed_ext[0][MLKEM_SYMBYTES + 0] = y;
+      seed_ext[0][MLKEM_SYMBYTES + 1] = x;
+    }
+
+    mlk_poly_rej_uniform(&a[i], seed_ext[0]);
+    i++;
+  }
+
+  mlk_assert(i == MLKEM_K * MLKEM_K);
+
+  /*
+   * The public matrix is generated in NTT domain. If the native backend
+   * uses a custom order in NTT domain, permute A accordingly.
+   */
+  for (i = 0; i < MLKEM_K * MLKEM_K; i++)
+  {
+    mlk_poly_permute_bitrev_to_custom(a[i].coeffs);
+  }
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(seed_ext, sizeof(seed_ext));
+}
+
+/*************************************************
+ * Name:        mlk_matvec_mul
+ *
+ * Description: Computes matrix-vector product in NTT domain,
+ *              via Montgomery multiplication.
+ *
+ * Arguments:   - mlk_polyvec out: Pointer to output polynomial vector
+ *              - mlk_polymat a: Input matrix. Must be in NTT domain
+ *                  and have coefficients of absolute value < 4096.
+ *              - mlk_polyvec v: Input polynomial vector. Must be in NTT
+ *                  domain.
+ *              - mlk_polyvec vc: Mulcache for v, computed via
+ *                  mlk_polyvec_mulcache_compute().
+ *
+ * Specification: Implements [FIPS 203, Section 2.4.7, Eq (2.12), (2.13)]
+ *
+ **************************************************/
+static void mlk_matvec_mul(mlk_polyvec out, const mlk_polymat a,
+                           const mlk_polyvec v, const mlk_polyvec_mulcache vc)
+__contract__(
+  requires(memory_no_alias(out, sizeof(mlk_polyvec)))
+  requires(memory_no_alias(a, sizeof(mlk_polymat)))
+  requires(memory_no_alias(v, sizeof(mlk_polyvec)))
+  requires(memory_no_alias(vc, sizeof(mlk_polyvec_mulcache)))
+  requires(forall(k0, 0, MLKEM_K * MLKEM_K,
+    array_bound(a[k0].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)))
+  assigns(object_whole(out)))
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  __loop__(
+    assigns(i, object_whole(out))
+    invariant(i <= MLKEM_K))
+  {
+    mlk_polyvec_basemul_acc_montgomery_cached(&out[i], &a[MLKEM_K * i], v, vc);
+  }
+}
+
+/* Reference: `indcpa_keypair_derand()` in the reference implementation.
+ *            - We use x4-batched versions of `poly_getnoise` to leverage
+ *              batched x4-batched Keccak-f1600.
+ *            - We use a different implementation of `gen_matrix()` which
+ *              uses x4-batched Keccak-f1600 (see `mlk_gen_matrix()` above).
+ *            - We use a mulcache to speed up matrix-vector multiplication.
+ *            - We include buffer zeroization.
+ */
+MLK_INTERNAL_API
+void mlk_indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                               uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
+                               const uint8_t coins[MLKEM_SYMBYTES])
+{
+  MLK_ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
+  const uint8_t *publicseed = buf;
+  const uint8_t *noiseseed = buf + MLKEM_SYMBYTES;
+  mlk_polymat a;
+  mlk_polyvec e, pkpv, skpv;
+  mlk_polyvec_mulcache skpv_cache;
+
+  MLK_ALIGN uint8_t coins_with_domain_separator[MLKEM_SYMBYTES + 1];
+  /* Concatenate coins with MLKEM_K for domain separation of security levels */
+  memcpy(coins_with_domain_separator, coins, MLKEM_SYMBYTES);
+  coins_with_domain_separator[MLKEM_SYMBYTES] = MLKEM_K;
+
+  mlk_hash_g(buf, coins_with_domain_separator, MLKEM_SYMBYTES + 1);
+
+  /*
+   * Declassify the public seed.
+   * Required to use it in conditional-branches in rejection sampling.
+   * This is needed because all output of randombytes is marked as secret
+   * (=undefined)
+   */
+  MLK_CT_TESTING_DECLASSIFY(publicseed, MLKEM_SYMBYTES);
+
+  mlk_gen_matrix(a, publicseed, 0 /* no transpose */);
+
+#if MLKEM_K == 2
+  mlk_poly_getnoise_eta1_4x(&skpv[0], &skpv[1], &e[0], &e[1], noiseseed, 0, 1,
+                            2, 3);
+#elif MLKEM_K == 3
+  /*
+   * Only the first three output buffers are needed.
+   * The laster parameter is a dummy that's overwritten later.
+   */
+  mlk_poly_getnoise_eta1_4x(&skpv[0], &skpv[1], &skpv[2],
+                            &pkpv[0] /* irrelevant */, noiseseed, 0, 1, 2,
+                            0xFF /* irrelevant */);
+  /* Same here */
+  mlk_poly_getnoise_eta1_4x(&e[0], &e[1], &e[2], &pkpv[0] /* irrelevant */,
+                            noiseseed, 3, 4, 5, 0xFF /* irrelevant */);
+#elif MLKEM_K == 4
+  mlk_poly_getnoise_eta1_4x(&skpv[0], &skpv[1], &skpv[2], &skpv[3], noiseseed,
+                            0, 1, 2, 3);
+  mlk_poly_getnoise_eta1_4x(&e[0], &e[1], &e[2], &e[3], noiseseed, 4, 5, 6, 7);
+#endif
+
+  mlk_polyvec_ntt(skpv);
+  mlk_polyvec_ntt(e);
+
+  mlk_polyvec_mulcache_compute(skpv_cache, skpv);
+  mlk_matvec_mul(pkpv, a, skpv, skpv_cache);
+  mlk_polyvec_tomont(pkpv);
+
+  mlk_polyvec_add(pkpv, e);
+  mlk_polyvec_reduce(pkpv);
+  mlk_polyvec_reduce(skpv);
+
+  mlk_pack_sk(sk, skpv);
+  mlk_pack_pk(pk, pkpv, publicseed);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+  mlk_zeroize(coins_with_domain_separator, sizeof(coins_with_domain_separator));
+  mlk_zeroize(a, sizeof(a));
+  mlk_zeroize(&e, sizeof(e));
+  mlk_zeroize(&skpv, sizeof(skpv));
+  mlk_zeroize(&skpv_cache, sizeof(skpv_cache));
+}
+
+/* Reference: `indcpa_enc()` in the reference implementation.
+ *            - We use x4-batched versions of `poly_getnoise` to leverage
+ *              batched x4-batched Keccak-f1600.
+ *            - We use a different implementation of `gen_matrix()` which
+ *              uses x4-batched Keccak-f1600 (see `mlk_gen_matrix()` above).
+ *            - We use a mulcache to speed up matrix-vector multiplication.
+ *            - We include buffer zeroization.
+ */
+MLK_INTERNAL_API
+void mlk_indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
+                    const uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                    const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                    const uint8_t coins[MLKEM_SYMBYTES])
+{
+  MLK_ALIGN uint8_t seed[MLKEM_SYMBYTES];
+  mlk_polymat at;
+  mlk_polyvec sp, pkpv, ep, b;
+  mlk_poly v, k, epp;
+  mlk_polyvec_mulcache sp_cache;
+
+  mlk_unpack_pk(pkpv, seed, pk);
+  mlk_poly_frommsg(&k, m);
+
+  /*
+   * Declassify the public seed.
+   * Required to use it in conditional-branches in rejection sampling.
+   * This is needed because in re-encryption the publicseed originated from sk
+   * which is marked undefined.
+   */
+  MLK_CT_TESTING_DECLASSIFY(seed, MLKEM_SYMBYTES);
+
+  mlk_gen_matrix(at, seed, 1 /* transpose */);
+
+#if MLKEM_K == 2
+  mlk_poly_getnoise_eta1122_4x(&sp[0], &sp[1], &ep[0], &ep[1], coins, 0, 1, 2,
+                               3);
+  mlk_poly_getnoise_eta2(&epp, coins, 4);
+#elif MLKEM_K == 3
+  /*
+   * In this call, only the first three output buffers are needed.
+   * The last parameter is a dummy that's overwritten later.
+   */
+  mlk_poly_getnoise_eta1_4x(&sp[0], &sp[1], &sp[2], &b[0], coins, 0, 1, 2,
+                            0xFF);
+  /* The fourth output buffer in this call _is_ used. */
+  mlk_poly_getnoise_eta2_4x(&ep[0], &ep[1], &ep[2], &epp, coins, 3, 4, 5, 6);
+#elif MLKEM_K == 4
+  mlk_poly_getnoise_eta1_4x(&sp[0], &sp[1], &sp[2], &sp[3], coins, 0, 1, 2, 3);
+  mlk_poly_getnoise_eta2_4x(&ep[0], &ep[1], &ep[2], &ep[3], coins, 4, 5, 6, 7);
+  mlk_poly_getnoise_eta2(&epp, coins, 8);
+#endif
+
+  mlk_polyvec_ntt(sp);
+
+  mlk_polyvec_mulcache_compute(sp_cache, sp);
+  mlk_matvec_mul(b, at, sp, sp_cache);
+  mlk_polyvec_basemul_acc_montgomery_cached(&v, pkpv, sp, sp_cache);
+
+  mlk_polyvec_invntt_tomont(b);
+  mlk_poly_invntt_tomont(&v);
+
+  mlk_polyvec_add(b, ep);
+  mlk_poly_add(&v, &epp);
+  mlk_poly_add(&v, &k);
+
+  mlk_polyvec_reduce(b);
+  mlk_poly_reduce(&v);
+
+  mlk_pack_ciphertext(c, b, &v);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(seed, sizeof(seed));
+  mlk_zeroize(&sp, sizeof(sp));
+  mlk_zeroize(&sp_cache, sizeof(sp_cache));
+  mlk_zeroize(&b, sizeof(b));
+  mlk_zeroize(&v, sizeof(v));
+  mlk_zeroize(at, sizeof(at));
+  mlk_zeroize(&k, sizeof(k));
+  mlk_zeroize(&ep, sizeof(ep));
+  mlk_zeroize(&epp, sizeof(epp));
+}
+
+/* Reference: `indcpa_dec()` in the reference implementation.
+ *            - We use a mulcache for the scalar product.
+ *            - We include buffer zeroization. */
+MLK_INTERNAL_API
+void mlk_indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                    const uint8_t c[MLKEM_INDCPA_BYTES],
+                    const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
+{
+  mlk_polyvec b, skpv;
+  mlk_poly v, sb;
+  mlk_polyvec_mulcache b_cache;
+
+  mlk_unpack_ciphertext(b, &v, c);
+  mlk_unpack_sk(skpv, sk);
+
+  mlk_polyvec_ntt(b);
+  mlk_polyvec_mulcache_compute(b_cache, b);
+  mlk_polyvec_basemul_acc_montgomery_cached(&sb, skpv, b, b_cache);
+  mlk_poly_invntt_tomont(&sb);
+
+  mlk_poly_sub(&v, &sb);
+  mlk_poly_reduce(&v);
+
+  mlk_poly_tomsg(m, &v);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(&skpv, sizeof(skpv));
+  mlk_zeroize(&b, sizeof(b));
+  mlk_zeroize(&b_cache, sizeof(b_cache));
+  mlk_zeroize(&v, sizeof(v));
+  mlk_zeroize(&sb, sizeof(sb));
+}
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef mlk_pack_pk
+#undef mlk_unpack_pk
+#undef mlk_pack_sk
+#undef mlk_unpack_sk
+#undef mlk_pack_ciphertext
+#undef mlk_unpack_ciphertext
+#undef mlk_matvec_mul
+#undef mlk_poly_permute_bitrev_to_custom
diff --git a/crypto/fipsmodule/ml_kem/mlkem/indcpa.h b/crypto/fipsmodule/ml_kem/mlkem/indcpa.h
new file mode 100644
index 00000000000..d87bbd5af89
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/indcpa.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_INDCPA_H
+#define MLK_INDCPA_H
+
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "poly_k.h"
+
+#define mlk_gen_matrix MLK_NAMESPACE_K(gen_matrix)
+/*************************************************
+ * Name:        mlk_gen_matrix
+ *
+ * Description: Deterministically generate matrix A (or the transpose of A)
+ *              from a seed. Entries of the matrix are polynomials that look
+ *              uniformly random. Performs rejection sampling on output of
+ *              a XOF
+ *
+ * Arguments:   - mlk_polymat a: pointer to output matrix A
+ *              - const uint8_t *seed: pointer to input seed
+ *              - int transposed: boolean deciding whether A or A^T is generated
+ *
+ * Specification: Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen), L3-7]
+ *                and [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L4-8].
+ *                The `transposed` parameter only affects internal presentation.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_gen_matrix(mlk_polymat a, const uint8_t seed[MLKEM_SYMBYTES],
+                    int transposed)
+__contract__(
+  requires(memory_no_alias(a, sizeof(mlk_polymat)))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  requires(transposed == 0 || transposed == 1)
+  assigns(object_whole(a))
+  ensures(forall(x, 0, MLKEM_K * MLKEM_K,
+    array_bound(a[x].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+);
+
+#define mlk_indcpa_keypair_derand MLK_NAMESPACE_K(indcpa_keypair_derand)
+/*************************************************
+ * Name:        mlk_indcpa_keypair_derand
+ *
+ * Description: Generates public and private key for the CPA-secure
+ *              public-key encryption scheme underlying ML-KEM
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key
+ *                             (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes)
+ *              - uint8_t *sk: pointer to output private key
+ *                             (of length MLKEM_INDCPA_SECRETKEYBYTES bytes)
+ *              - const uint8_t *coins: pointer to input randomness
+ *                             (of length MLKEM_SYMBYTES bytes)
+ *
+ * Specification: Implements [FIPS 203, Algorithm 13 (K-PKE.KeyGen)].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                               uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES],
+                               const uint8_t coins[MLKEM_SYMBYTES])
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES))
+  requires(memory_no_alias(coins, MLKEM_SYMBYTES))
+  assigns(object_whole(pk))
+  assigns(object_whole(sk))
+);
+
+#define mlk_indcpa_enc MLK_NAMESPACE_K(indcpa_enc)
+/*************************************************
+ * Name:        mlk_indcpa_enc
+ *
+ * Description: Encryption function of the CPA-secure
+ *              public-key encryption scheme underlying Kyber.
+ *
+ * Arguments:   - uint8_t *c: pointer to output ciphertext
+ *                            (of length MLKEM_INDCPA_BYTES bytes)
+ *              - const uint8_t *m: pointer to input message
+ *                                  (of length MLKEM_INDCPA_MSGBYTES bytes)
+ *              - const uint8_t *pk: pointer to input public key
+ *                                   (of length MLKEM_INDCPA_PUBLICKEYBYTES)
+ *              - const uint8_t *coins: pointer to input random coins used as
+ *                 seed (of length MLKEM_SYMBYTES) to deterministically generate
+ *                 all randomness
+ *
+ * Specification: Implements [FIPS 203, Algorithm 14 (K-PKE.Encrypt)].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES],
+                    const uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                    const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES],
+                    const uint8_t coins[MLKEM_SYMBYTES])
+__contract__(
+  requires(memory_no_alias(c, MLKEM_INDCPA_BYTES))
+  requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES))
+  requires(memory_no_alias(coins, MLKEM_SYMBYTES))
+  assigns(object_whole(c))
+);
+
+#define mlk_indcpa_dec MLK_NAMESPACE_K(indcpa_dec)
+/*************************************************
+ * Name:        mlk_indcpa_dec
+ *
+ * Description: Decryption function of the CPA-secure
+ *              public-key encryption scheme underlying Kyber.
+ *
+ * Arguments:   - uint8_t *m: pointer to output decrypted message
+ *                            (of length MLKEM_INDCPA_MSGBYTES)
+ *              - const uint8_t *c: pointer to input ciphertext
+ *                                  (of length MLKEM_INDCPA_BYTES)
+ *              - const uint8_t *sk: pointer to input secret key
+ *                                   (of length MLKEM_INDCPA_SECRETKEYBYTES)
+ *
+ * Specification: Implements [FIPS 203, Algorithm 15 (K-PKE.Decrypt)].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES],
+                    const uint8_t c[MLKEM_INDCPA_BYTES],
+                    const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(c, MLKEM_INDCPA_BYTES))
+  requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES))
+  assigns(object_whole(m))
+);
+
+#endif /* !MLK_INDCPA_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/kem.c b/crypto/fipsmodule/ml_kem/mlkem/kem.c
new file mode 100644
index 00000000000..64d61f0da28
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/kem.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "indcpa.h"
+#include "kem.h"
+#include "randombytes.h"
+#include "symmetric.h"
+#include "verify.h"
+
+/* Level namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define mlk_check_pk MLK_ADD_LEVEL(mlk_check_pk)
+#define mlk_check_sk MLK_ADD_LEVEL(mlk_check_sk)
+#define mlk_check_pct MLK_ADD_LEVEL(mlk_check_pct)
+/* End of level namespacing */
+
+#if defined(CBMC)
+/* Redeclaration with contract needed for CBMC only */
+int memcmp(const void *str1, const void *str2, size_t n)
+__contract__(
+  requires(memory_no_alias(str1, n))
+  requires(memory_no_alias(str2, n))
+);
+#endif /* CBMC */
+
+/*************************************************
+ * Name:        mlk_check_pk
+ *
+ * Description: Implements modulus check mandated by FIPS 203,
+ *              i.e., ensures that coefficients are in [0,q-1].
+ *
+ * Arguments:   - const uint8_t *pk: pointer to input public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 on failure
+ *
+ * Specification: Implements [FIPS 203, Section 7.2, 'modulus check']
+ *
+ **************************************************/
+
+/* Reference: Not implemented in the reference implementation. */
+MLK_MUST_CHECK_RETURN_VALUE
+static int mlk_check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
+{
+  int res;
+  mlk_polyvec p;
+  uint8_t p_reencoded[MLKEM_POLYVECBYTES];
+
+  mlk_polyvec_frombytes(p, pk);
+  mlk_polyvec_reduce(p);
+  mlk_polyvec_tobytes(p_reencoded, p);
+
+  /* We use a constant-time memcmp here to avoid having to
+   * declassify the PK before the PCT has succeeded. */
+  res = mlk_ct_memcmp(pk, p_reencoded, MLKEM_POLYVECBYTES) ? -1 : 0;
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(p_reencoded, sizeof(p_reencoded));
+  mlk_zeroize(&p, sizeof(p));
+  return res;
+}
+
+/*************************************************
+ * Name:        mlk_check_sk
+ *
+ * Description: Implements public key hash check mandated by FIPS 203,
+ *              i.e., ensures that
+ *              sk[768𝑘+32 ∶ 768𝑘+64] = H(pk)= H(sk[384𝑘 : 768𝑘+32])
+ *
+ * Arguments:   - const uint8_t *sk: pointer to input private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 on failure
+ *
+ * Specification: Implements [FIPS 203, Section 7.3, 'hash check']
+ *
+ **************************************************/
+
+/* Reference: Not implemented in the reference implementation. */
+MLK_MUST_CHECK_RETURN_VALUE
+static int mlk_check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  int res;
+  MLK_ALIGN uint8_t test[MLKEM_SYMBYTES];
+  /*
+   * The parts of `sk` being hashed and compared here are public, so
+   * no public information is leaked through the runtime or the return value
+   * of this function.
+   */
+
+  /* Declassify the public part of the secret key */
+  MLK_CT_TESTING_DECLASSIFY(sk + MLKEM_INDCPA_SECRETKEYBYTES,
+                            MLKEM_INDCCA_PUBLICKEYBYTES);
+  MLK_CT_TESTING_DECLASSIFY(
+      sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
+
+  mlk_hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES,
+             MLKEM_INDCCA_PUBLICKEYBYTES);
+  res = memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, test,
+               MLKEM_SYMBYTES)
+            ? -1
+            : 0;
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(test, sizeof(test));
+  return res;
+}
+
+MLK_MUST_CHECK_RETURN_VALUE
+static int mlk_check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                         uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES)));
+
+#if defined(MLK_CONFIG_KEYGEN_PCT)
+/* Specification:
+ * Partially implements 'Pairwise Consistency Test' [FIPS 140-3 IG] and
+ * [FIPS 203, Section 7.1, Pairwise Consistency]. */
+
+/* Reference: Not implemented in the reference implementation. */
+static int mlk_check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                         uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  int res;
+  uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES];
+  uint8_t ss_enc[MLKEM_SSBYTES], ss_dec[MLKEM_SSBYTES];
+
+  res = crypto_kem_enc(ct, ss_enc, pk);
+  if (res != 0)
+  {
+    goto cleanup;
+  }
+
+  res = crypto_kem_dec(ss_dec, ct, sk);
+  if (res != 0)
+  {
+    goto cleanup;
+  }
+
+#if defined(MLK_CONFIG_KEYGEN_PCT_BREAKAGE_TEST)
+  /* Deliberately break PCT for testing purposes */
+  if (mlk_break_pct())
+  {
+    ss_enc[0] = ~ss_enc[0];
+  }
+#endif /* MLK_CONFIG_KEYGEN_PCT_BREAKAGE_TEST */
+
+  res = mlk_ct_memcmp(ss_enc, ss_dec, sizeof(ss_dec));
+
+cleanup:
+  /* The result of the PCT is public. */
+  MLK_CT_TESTING_DECLASSIFY(&res, sizeof(res));
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(ct, sizeof(ct));
+  mlk_zeroize(ss_enc, sizeof(ss_enc));
+  mlk_zeroize(ss_dec, sizeof(ss_dec));
+  return res;
+}
+#else  /* MLK_CONFIG_KEYGEN_PCT */
+static int mlk_check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                         uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  /* Skip PCT */
+  ((void)pk);
+  ((void)sk);
+  return 0;
+}
+#endif /* !MLK_CONFIG_KEYGEN_PCT */
+
+/* Reference: `crypto_kem_keypair_derand()` in the reference implementation
+ *            - We optionally include PCT which is not present in
+ *              the reference code. */
+MLK_EXTERNAL_API
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t coins[2 * MLKEM_SYMBYTES])
+{
+  mlk_indcpa_keypair_derand(pk, sk, coins);
+  memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  mlk_hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk,
+             MLKEM_INDCCA_PUBLICKEYBYTES);
+  /* Value z for pseudo-random output on reject */
+  memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES);
+
+  /* Declassify public key */
+  MLK_CT_TESTING_DECLASSIFY(pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+
+  /* Pairwise Consistency Test (PCT) (FIPS 140-3 IPG) */
+  if (mlk_check_pct(pk, sk))
+  {
+    return -1;
+  }
+
+  return 0;
+}
+
+/* Reference: `crypto_kem_keypair()` in the reference implementation
+ *            - We zeroize the stack buffer */
+MLK_EXTERNAL_API
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  int res;
+  MLK_ALIGN uint8_t coins[2 * MLKEM_SYMBYTES];
+
+  /* Acquire necessary randomness, and mark it as secret. */
+  mlk_randombytes(coins, 2 * MLKEM_SYMBYTES);
+  MLK_CT_TESTING_SECRET(coins, sizeof(coins));
+
+  res = crypto_kem_keypair_derand(pk, sk, coins);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(coins, sizeof(coins));
+  return res;
+}
+
+/* Reference: `crypto_kem_enc_derand()` in the reference implementation
+ *            - We include public key check
+ *            - We include stack buffer zeroization */
+MLK_EXTERNAL_API
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
+{
+  MLK_ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
+  /* Will contain key, coins */
+  MLK_ALIGN uint8_t kr[2 * MLKEM_SYMBYTES];
+
+  /* Specification: Implements [FIPS 203, Section 7.2, Modulus check] */
+  if (mlk_check_pk(pk))
+  {
+    return -1;
+  }
+
+  memcpy(buf, coins, MLKEM_SYMBYTES);
+
+  /* Multitarget countermeasure for coins + contributory KEM */
+  mlk_hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES);
+  mlk_hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
+
+  /* coins are in kr+MLKEM_SYMBYTES */
+  mlk_indcpa_enc(ct, buf, pk, kr + MLKEM_SYMBYTES);
+
+  memcpy(ss, kr, MLKEM_SYMBYTES);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+  mlk_zeroize(kr, sizeof(kr));
+
+  return 0;
+}
+
+/* Reference: `crypto_kem_enc()` in the reference implementation
+ *            - We include stack buffer zeroization */
+MLK_EXTERNAL_API
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
+{
+  int res;
+  MLK_ALIGN uint8_t coins[MLKEM_SYMBYTES];
+
+  mlk_randombytes(coins, MLKEM_SYMBYTES);
+  MLK_CT_TESTING_SECRET(coins, sizeof(coins));
+
+  res = crypto_kem_enc_derand(ct, ss, pk, coins);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(coins, sizeof(coins));
+  return res;
+}
+
+/* Reference: `crypto_kem_dec()` in the reference implementation
+ *            - We include secret key check
+ *            - We include stack buffer zeroization */
+MLK_EXTERNAL_API
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+{
+  uint8_t fail;
+  MLK_ALIGN uint8_t buf[2 * MLKEM_SYMBYTES];
+  /* Will contain key, coins */
+  MLK_ALIGN uint8_t kr[2 * MLKEM_SYMBYTES];
+  MLK_ALIGN uint8_t tmp[MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES];
+
+  const uint8_t *pk = sk + MLKEM_INDCPA_SECRETKEYBYTES;
+
+  /* Specification: Implements [FIPS 203, Section 7.3, Hash check] */
+  if (mlk_check_sk(sk))
+  {
+    return -1;
+  }
+
+  mlk_indcpa_dec(buf, ct, sk);
+
+  /* Multitarget countermeasure for coins + contributory KEM */
+  memcpy(buf + MLKEM_SYMBYTES,
+         sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES);
+  mlk_hash_g(kr, buf, 2 * MLKEM_SYMBYTES);
+
+  /* Recompute and compare ciphertext */
+  /* coins are in kr+MLKEM_SYMBYTES */
+  mlk_indcpa_enc(tmp, buf, pk, kr + MLKEM_SYMBYTES);
+  fail = mlk_ct_memcmp(ct, tmp, MLKEM_INDCCA_CIPHERTEXTBYTES);
+
+  /* Compute rejection key */
+  memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES,
+         MLKEM_SYMBYTES);
+  memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES);
+  mlk_hash_j(ss, tmp, sizeof(tmp));
+
+  /* Copy true key to return buffer if fail is 0 */
+  mlk_ct_cmov_zero(ss, kr, MLKEM_SYMBYTES, fail);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+  mlk_zeroize(kr, sizeof(kr));
+  mlk_zeroize(tmp, sizeof(tmp));
+
+  return 0;
+}
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef mlk_check_pk
+#undef mlk_check_sk
+#undef mlk_check_pct
diff --git a/crypto/fipsmodule/ml_kem/mlkem/kem.h b/crypto/fipsmodule/ml_kem/mlkem/kem.h
new file mode 100644
index 00000000000..14890a0ce2b
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/kem.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_KEM_H
+#define MLK_KEM_H
+
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "sys.h"
+
+#if defined(MLK_CHECK_APIS)
+/* Include to ensure consistency between internal kem.h
+ * and external mlkem_native.h. */
+#define MLK_CONFIG_API_NO_SUPERCOP
+#include "mlkem_native.h"
+#undef MLK_CONFIG_API_NO_SUPERCOP
+
+#if MLKEM_INDCCA_SECRETKEYBYTES != MLKEM_SECRETKEYBYTES(MLKEM_LVL)
+#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_PUBLICKEYBYTES != MLKEM_PUBLICKEYBYTES(MLKEM_LVL)
+#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h
+#endif
+
+#if MLKEM_INDCCA_CIPHERTEXTBYTES != MLKEM_CIPHERTEXTBYTES(MLKEM_LVL)
+#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h
+#endif
+
+#endif /* MLK_CHECK_APIS */
+
+#define crypto_kem_keypair_derand MLK_NAMESPACE_K(keypair_derand)
+#define crypto_kem_keypair MLK_NAMESPACE_K(keypair)
+#define crypto_kem_enc_derand MLK_NAMESPACE_K(enc_derand)
+#define crypto_kem_enc MLK_NAMESPACE_K(enc)
+#define crypto_kem_dec MLK_NAMESPACE_K(dec)
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *              - uint8_t *sk: pointer to output private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *              - uint8_t *coins: pointer to input randomness
+ *                (an already allocated array filled with 2*MLKEM_SYMBYTES
+ *                 random bytes)
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_CONFIG_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 16, ML-KEM.KeyGen_Internal]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                              uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES],
+                              const uint8_t coins[2 * MLKEM_SYMBYTES])
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
+  requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES))
+  assigns(object_whole(pk))
+  assigns(object_whole(sk))
+);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *              - uint8_t *sk: pointer to output private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_CONFIG_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 19, ML-KEM.KeyGen]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                       uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
+  assigns(object_whole(pk))
+  assigns(object_whole(sk))
+);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
+ *              - uint8_t *ss: pointer to output shared secret
+ *                (an already allocated array of MLKEM_SSBYTES bytes)
+ *              - const uint8_t *pk: pointer to input public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *              - const uint8_t *coins: pointer to input randomness
+ *                (an already allocated array filled with MLKEM_SYMBYTES random
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 17, ML-KEM.Encaps_Internal]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                          uint8_t ss[MLKEM_SSBYTES],
+                          const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES],
+                          const uint8_t coins[MLKEM_SYMBYTES])
+__contract__(
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ss, MLKEM_SSBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  requires(memory_no_alias(coins, MLKEM_SYMBYTES))
+  assigns(object_whole(ct))
+  assigns(object_whole(ss))
+);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
+ *              - uint8_t *ss: pointer to output shared secret
+ *                (an already allocated array of MLKEM_SSBYTES bytes)
+ *              - const uint8_t *pk: pointer to input public key
+ *                (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 20, ML-KEM.Encaps]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES])
+__contract__(
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(ss, MLKEM_SSBYTES))
+  requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES))
+  assigns(object_whole(ct))
+  assigns(object_whole(ss))
+);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret
+ *                (an already allocated array of MLKEM_SSBYTES bytes)
+ *              - const uint8_t *ct: pointer to input cipher text
+ *                (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES
+ *                 bytes)
+ *              - const uint8_t *sk: pointer to input private key
+ *                (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES
+ *                 bytes)
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'hash check' [FIPS 203, Section 7.3]
+ *            for the secret key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 21, ML-KEM.Decaps]
+ *
+ **************************************************/
+MLK_EXTERNAL_API
+MLK_MUST_CHECK_RETURN_VALUE
+int crypto_kem_dec(uint8_t ss[MLKEM_SSBYTES],
+                   const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES],
+                   const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES])
+__contract__(
+  requires(memory_no_alias(ss, MLKEM_SSBYTES))
+  requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES))
+  requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES))
+  assigns(object_whole(ss))
+);
+
+#endif /* !MLK_KEM_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h b/crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h
new file mode 100644
index 00000000000..7fd910c3e4d
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/mlkem_native.h
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef MLK_H
+#define MLK_H
+
+/******************************************************************************
+ *
+ * Public API for mlkem-native
+ *
+ * This header defines the public API of a single build of mlkem-native.
+ *
+ * # Usage
+ *
+ * To use this header, configure the following options:
+ *
+ * - MLK_CONFIG_API_PARAMETER_SET [required]
+ *
+ *   The parameter set used for the build; 512, 768, or 1024.
+ *
+ * - MLK_CONFIG_API_NAMESPACE_PREFIX [required]
+ *
+ *   The namespace prefix used for the build.
+ *
+ *   NOTE:
+ *   For a multilevel build, you must include the 512/768/1024 suffixes
+ *   in MLK_CONFIG_API_NAMESPACE_PREFIX.
+ *
+ * - MLK_CONFIG_API_NO_SUPERCOP [optional]
+ *
+ *   By default, this header will also expose the mlkem-native API in the
+ *   SUPERCOP naming convention crypto_kem_xxx. If you don't want/need this,
+ *   set MLK_CONFIG_API_NO_SUPERCOP. You must set this for a multilevel build.
+ *
+ * - MLK_CONFIG_API_CONSTANTS_ONLY [optional]
+ *
+ *   If you don't want this header to expose any function declarations,
+ *   but only constants for the sizes of key material, set
+ *   MLK_CONFIG_API_CONSTANTS_ONLY. In this case, you don't need to set
+ *   MLK_CONFIG_API_PARAMETER_SET or MLK_CONFIG_API_NAMESPACE_PREFIX,
+ *   nor include a configuration.
+ *
+ * # Multilevel builds
+ *
+ * This header specifies a build of mlkem-native for a fixed security level.
+ * If you need multiple builds, e.g. to build a library offering multiple
+ * security levels, you need multiple instances of this header. In this case,
+ * make sure to rename or #undef the header guard.
+ *
+ ******************************************************************************/
+
+/******************************* Key sizes ************************************/
+
+/* Sizes of cryptographic material, per level */
+/* See mlke/common.h for the arithmetic expressions giving rise to these */
+/* check-magic: off */
+#define MLKEM512_SECRETKEYBYTES 1632
+#define MLKEM512_PUBLICKEYBYTES 800
+#define MLKEM512_CIPHERTEXTBYTES 768
+
+#define MLKEM768_SECRETKEYBYTES 2400
+#define MLKEM768_PUBLICKEYBYTES 1184
+#define MLKEM768_CIPHERTEXTBYTES 1088
+
+#define MLKEM1024_SECRETKEYBYTES 3168
+#define MLKEM1024_PUBLICKEYBYTES 1568
+#define MLKEM1024_CIPHERTEXTBYTES 1568
+/* check-magic: on */
+
+/* Size of randomness coins in bytes (level-independent) */
+#define MLKEM_SYMBYTES 32
+#define MLKEM512_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM768_SYMBYTES MLKEM_SYMBYTES
+#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES
+/* Size of shared secret in bytes (level-independent) */
+#define MLKEM_BYTES 32
+#define MLKEM512_BYTES MLKEM_BYTES
+#define MLKEM768_BYTES MLKEM_BYTES
+#define MLKEM1024_BYTES MLKEM_BYTES
+
+/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */
+#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES
+#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES
+#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES
+#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL)
+#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL)
+#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL)
+
+/****************************** Function API **********************************/
+
+#if !defined(MLK_CONFIG_API_CONSTANTS_ONLY)
+
+#if !defined(MLK_CONFIG_API_PARAMETER_SET)
+#error MLK_CONFIG_API_PARAMETER_SET not defined
+#endif
+#if !defined(MLK_CONFIG_API_NAMESPACE_PREFIX)
+#error MLK_CONFIG_API_NAMESPACE_PREFIX not defined
+#endif
+
+/* Derive namespacing macro */
+#define MLK_API_CONCAT_(x, y) x##y
+#define MLK_API_CONCAT(x, y) MLK_API_CONCAT_(x, y)
+#define MLK_API_CONCAT_UNDERSCORE(x, y) MLK_API_CONCAT(MLK_API_CONCAT(x, _), y)
+#define MLK_API_NAMESPACE(sym) \
+  MLK_API_CONCAT_UNDERSCORE(MLK_CONFIG_API_NAMESPACE_PREFIX, sym)
+
+#if defined(__GNUC__) || defined(clang)
+#define MLK_API_MUST_CHECK_RETURN_VALUE __attribute__((warn_unused_result))
+#else
+#define MLK_API_MUST_CHECK_RETURN_VALUE
+#endif
+
+#include <stdint.h>
+
+/*************************************************
+ * Name:        crypto_kem_keypair_derand
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t pk[]: pointer to output public key, an array of
+ *                 length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t sk[]: pointer to output private key, an array of
+ *                  of MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *              - uint8_t *coins: pointer to input randomness, an array of
+ *                  2*MLKEM_SYMBYTES uniformly random bytes.
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_CONFIG_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 16, ML-KEM.KeyGen_Internal]
+ *
+ **************************************************/
+MLK_API_MUST_CHECK_RETURN_VALUE
+int MLK_API_NAMESPACE(keypair_derand)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)],
+    const uint8_t coins[2 * MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_keypair
+ *
+ * Description: Generates public and private key
+ *              for CCA-secure ML-KEM key encapsulation mechanism
+ *
+ * Arguments:   - uint8_t *pk: pointer to output public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - uint8_t *sk: pointer to output private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns:     - 0: On success
+ *              - -1: On PCT failure (if MLK_CONFIG_KEYGEN_PCT) is enabled.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 19, ML-KEM.KeyGen]
+ *
+ **************************************************/
+MLK_API_MUST_CHECK_RETURN_VALUE
+int MLK_API_NAMESPACE(keypair)(
+    uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)],
+    uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)]);
+
+/*************************************************
+ * Name:        crypto_kem_enc_derand
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *              - const uint8_t *coins: pointer to input randomness, an array of
+ *                 MLKEM_SYMBYTES bytes.
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 17, ML-KEM.Encaps_Internal]
+ *
+ **************************************************/
+MLK_API_MUST_CHECK_RETURN_VALUE
+int MLK_API_NAMESPACE(enc_derand)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET)],
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)],
+    const uint8_t coins[MLKEM_SYMBYTES]);
+
+/*************************************************
+ * Name:        crypto_kem_enc
+ *
+ * Description: Generates cipher text and shared
+ *              secret for given public key
+ *
+ * Arguments:   - uint8_t *ct: pointer to output cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *pk: pointer to input public key, an array of
+ *                 MLKEM{512,768,1024}_PUBLICKEYBYTES bytes.
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'modulus check' [FIPS 203, Section 7.2]
+ *            for the public key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 20, ML-KEM.Encaps]
+ *
+ **************************************************/
+MLK_API_MUST_CHECK_RETURN_VALUE
+int MLK_API_NAMESPACE(enc)(
+    uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET)],
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)]);
+
+/*************************************************
+ * Name:        crypto_kem_dec
+ *
+ * Description: Generates shared secret for given
+ *              cipher text and private key
+ *
+ * Arguments:   - uint8_t *ss: pointer to output shared secret, an array of
+ *                 MLKEM_BYTES bytes.
+ *              - const uint8_t *ct: pointer to input cipher text, an array of
+ *                 MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes.
+ *              - const uint8_t *sk: pointer to input private key, an array of
+ *                 MLKEM{512,768,1024}_SECRETKEYBYTES bytes.
+ *
+ * Returns: - 0 on success
+ *          - -1 if the 'hash check' [FIPS 203, Section 7.3]
+ *            for the secret key fails.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 21, ML-KEM.Decaps]
+ *
+ **************************************************/
+MLK_API_MUST_CHECK_RETURN_VALUE
+int MLK_API_NAMESPACE(dec)(
+    uint8_t ss[MLKEM_BYTES],
+    const uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET)],
+    const uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)]);
+
+/****************************** SUPERCOP API *********************************/
+
+#if !defined(MLK_CONFIG_API_NO_SUPERCOP)
+/* Export API in SUPERCOP naming scheme CRYPTO_xxx / crypto_kem_xxx */
+#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)
+#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)
+#define CRYPTO_CIPHERTEXTBYTES \
+  MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET)
+#define CRYPTO_SYMBYTES MLKEM_SYMBYTES
+#define CRYPTO_BYTES MLKEM_BYTES
+
+#define crypto_kem_keypair_derand MLK_API_NAMESPACE(keypair_derand)
+#define crypto_kem_keypair MLK_API_NAMESPACE(keypair)
+#define crypto_kem_enc_derand MLK_API_NAMESPACE(enc_derand)
+#define crypto_kem_enc MLK_API_NAMESPACE(enc)
+#define crypto_kem_dec MLK_API_NAMESPACE(dec)
+
+#else /* !MLK_CONFIG_API_NO_SUPERCOP */
+
+/* If the SUPERCOP API is not needed, we can undefine the various helper macros
+ * above. Otherwise, they are needed for lazy evaluation of crypto_kem_xxx. */
+#undef MLK_API_CONCAT
+#undef MLK_API_CONCAT_
+#undef MLK_API_CONCAT_UNDERSCORE
+#undef MLK_API_NAMESPACE
+#undef MLK_API_MUST_CHECK_RETURN_VALUE
+
+#endif /* MLK_CONFIG_API_NO_SUPERCOP */
+#endif /* !MLK_CONFIG_API_CONSTANTS_ONLY */
+
+#endif /* !MLK_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/mlkem_native_bcm.c b/crypto/fipsmodule/ml_kem/mlkem/mlkem_native_bcm.c
new file mode 100644
index 00000000000..a86c5bd0d4a
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/mlkem_native_bcm.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * WARNING: This file is auto-generated from scripts/autogen
+ *          Do not modify it directly.
+ */
+
+/*
+ * Monolithic compilation unit bundling all compilation units within
+ * mlkem-native
+ */
+
+/******************************************************************************
+ *
+ * Single compilation unit (SCU) for fixed-level build of mlkem-native
+ *
+ * This compilation unit bundles together all source files for a build
+ * of mlkem-native for a fixed security level (MLKEM-512/768/1024).
+ *
+ * # API
+ *
+ * The API exposed by this file is described in mlkem_native.h.
+ *
+ * # Multilevel build
+ *
+ * If you want an SCU build of mlkem-native with support for multiple security
+ * levels, you need to include this file multiple times, and set
+ * MLK_CONFIG_MULTILEVEL_WITH_SHARED and MLK_CONFIG_MULTILEVEL_NO_SHARED
+ * appropriately. This is exemplified in examples/monolithic_build_multilevel.
+ *
+ * # Configuration
+ *
+ * - MLK_CONFIG_MONOBUILD_CUSTOM_FIPS202
+ *   Set this option if you use a custom FIPS202 implementation.
+ *
+ * - MLK_CONFIG_MONOBUILD_WITH_NATIVE_ARITH
+ *   Set this option if you want to include the native arithmetic backends
+ *   in your build.
+ *
+ * - MLK_CONFIG_MONOBUILD_WITH_NATIVE_FIPS202
+ *   Set this option if you want to include the native FIPS202 backends
+ *   in your build.
+ *
+ * - MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS
+ *   Set this option if you want to keep the directives defined in
+ *   level-independent headers. This is needed for a multilevel build.
+ */
+
+/* If parts of the mlkem-native source tree are not used,
+ * consider reducing this header via `unifdef`.
+ *
+ * Example:
+ * ```bash
+ * unifdef -UMLK_CONFIG_MONOBUILD_WITH_NATIVE_ARITH mlkem_native_monobuild.c
+ * ```
+ */
+
+#include "sys.h"
+
+#include "compress.c"
+#include "debug.c"
+#include "indcpa.c"
+#include "kem.c"
+#include "poly.c"
+#include "poly_k.c"
+#include "sampling.c"
+#include "verify.c"
+
+
+
+
+/*
+ * Undefine macros from MLK_CONFIG_PARAMETER_SET-specific files
+ */
+/* mlkem/common.h */
+#undef MLK_ADD_LEVEL
+#undef MLK_ASM_FN_SYMBOL
+#undef MLK_ASM_NAMESPACE
+#undef MLK_COMMON_H
+#undef MLK_CONCAT
+#undef MLK_CONCAT_
+#undef MLK_CONFIG_API_NAMESPACE_PREFIX
+#undef MLK_CONFIG_API_PARAMETER_SET
+#undef MLK_EMPTY_CU
+#undef MLK_EXTERNAL_API
+#undef MLK_FIPS202X4_HEADER_FILE
+#undef MLK_FIPS202_HEADER_FILE
+#undef MLK_INTERNAL_API
+#undef MLK_MULTILEVEL_BUILD
+#undef MLK_NAMESPACE
+#undef MLK_NAMESPACE_K
+/* mlkem/indcpa.h */
+#undef MLK_INDCPA_H
+#undef mlk_gen_matrix
+#undef mlk_indcpa_dec
+#undef mlk_indcpa_enc
+#undef mlk_indcpa_keypair_derand
+/* mlkem/kem.h */
+#undef MLK_CONFIG_API_NO_SUPERCOP
+#undef MLK_KEM_H
+#undef crypto_kem_dec
+#undef crypto_kem_enc
+#undef crypto_kem_enc_derand
+#undef crypto_kem_keypair
+#undef crypto_kem_keypair_derand
+/* mlkem/mlkem_native.h */
+#undef CRYPTO_BYTES
+#undef CRYPTO_CIPHERTEXTBYTES
+#undef CRYPTO_PUBLICKEYBYTES
+#undef CRYPTO_SECRETKEYBYTES
+#undef CRYPTO_SYMBYTES
+#undef MLKEM1024_BYTES
+#undef MLKEM1024_CIPHERTEXTBYTES
+#undef MLKEM1024_PUBLICKEYBYTES
+#undef MLKEM1024_SECRETKEYBYTES
+#undef MLKEM1024_SYMBYTES
+#undef MLKEM512_BYTES
+#undef MLKEM512_CIPHERTEXTBYTES
+#undef MLKEM512_PUBLICKEYBYTES
+#undef MLKEM512_SECRETKEYBYTES
+#undef MLKEM512_SYMBYTES
+#undef MLKEM768_BYTES
+#undef MLKEM768_CIPHERTEXTBYTES
+#undef MLKEM768_PUBLICKEYBYTES
+#undef MLKEM768_SECRETKEYBYTES
+#undef MLKEM768_SYMBYTES
+#undef MLKEM_BYTES
+#undef MLKEM_CIPHERTEXTBYTES
+#undef MLKEM_CIPHERTEXTBYTES_
+#undef MLKEM_PUBLICKEYBYTES
+#undef MLKEM_PUBLICKEYBYTES_
+#undef MLKEM_SECRETKEYBYTES
+#undef MLKEM_SECRETKEYBYTES_
+#undef MLKEM_SYMBYTES
+#undef MLK_API_CONCAT
+#undef MLK_API_CONCAT_
+#undef MLK_API_CONCAT_UNDERSCORE
+#undef MLK_API_MUST_CHECK_RETURN_VALUE
+#undef MLK_API_NAMESPACE
+#undef MLK_H
+#undef crypto_kem_dec
+#undef crypto_kem_enc
+#undef crypto_kem_enc_derand
+#undef crypto_kem_keypair
+#undef crypto_kem_keypair_derand
+/* mlkem/params.h */
+#undef MLKEM_DU
+#undef MLKEM_DV
+#undef MLKEM_ETA1
+#undef MLKEM_ETA2
+#undef MLKEM_INDCCA_CIPHERTEXTBYTES
+#undef MLKEM_INDCCA_PUBLICKEYBYTES
+#undef MLKEM_INDCCA_SECRETKEYBYTES
+#undef MLKEM_INDCPA_BYTES
+#undef MLKEM_INDCPA_MSGBYTES
+#undef MLKEM_INDCPA_PUBLICKEYBYTES
+#undef MLKEM_INDCPA_SECRETKEYBYTES
+#undef MLKEM_K
+#undef MLKEM_LVL
+#undef MLKEM_N
+#undef MLKEM_POLYBYTES
+#undef MLKEM_POLYCOMPRESSEDBYTES_D10
+#undef MLKEM_POLYCOMPRESSEDBYTES_D11
+#undef MLKEM_POLYCOMPRESSEDBYTES_D4
+#undef MLKEM_POLYCOMPRESSEDBYTES_D5
+#undef MLKEM_POLYCOMPRESSEDBYTES_DU
+#undef MLKEM_POLYCOMPRESSEDBYTES_DV
+#undef MLKEM_POLYVECBYTES
+#undef MLKEM_POLYVECCOMPRESSEDBYTES_DU
+#undef MLKEM_Q
+#undef MLKEM_Q_HALF
+#undef MLKEM_SSBYTES
+#undef MLKEM_SYMBYTES
+#undef MLKEM_UINT12_LIMIT
+#undef MLK_PARAMS_H
+/* mlkem/poly_k.h */
+#undef MLK_POLY_K_H
+#undef mlk_poly_compress_du
+#undef mlk_poly_compress_dv
+#undef mlk_poly_decompress_du
+#undef mlk_poly_decompress_dv
+#undef mlk_poly_getnoise_eta1122_4x
+#undef mlk_poly_getnoise_eta1_4x
+#undef mlk_poly_getnoise_eta2
+#undef mlk_poly_getnoise_eta2_4x
+#undef mlk_polymat
+#undef mlk_polyvec
+#undef mlk_polyvec_add
+#undef mlk_polyvec_basemul_acc_montgomery_cached
+#undef mlk_polyvec_compress_du
+#undef mlk_polyvec_decompress_du
+#undef mlk_polyvec_frombytes
+#undef mlk_polyvec_invntt_tomont
+#undef mlk_polyvec_mulcache
+#undef mlk_polyvec_mulcache_compute
+#undef mlk_polyvec_ntt
+#undef mlk_polyvec_reduce
+#undef mlk_polyvec_tobytes
+#undef mlk_polyvec_tomont
+/* mlkem/sys.h */
+#undef MLK_ALIGN
+#undef MLK_ALIGN_UP
+#undef MLK_ALWAYS_INLINE
+#undef MLK_CET_ENDBR
+#undef MLK_CT_TESTING_DECLASSIFY
+#undef MLK_CT_TESTING_SECRET
+#undef MLK_DEFAULT_ALIGN
+#undef MLK_HAVE_INLINE_ASM
+#undef MLK_INLINE
+#undef MLK_MUST_CHECK_RETURN_VALUE
+#undef MLK_RESTRICT
+#undef MLK_SYS_AARCH64
+#undef MLK_SYS_AARCH64_EB
+#undef MLK_SYS_BIG_ENDIAN
+#undef MLK_SYS_H
+#undef MLK_SYS_LITTLE_ENDIAN
+#undef MLK_SYS_WINDOWS
+#undef MLK_SYS_X86_64
+#undef MLK_SYS_X86_64_AVX2
+
+#if !defined(MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS)
+/*
+ * Undefine macros from MLK_CONFIG_PARAMETER_SET-generic files
+ */
+/* mlkem/compress.h */
+#undef MLK_COMPRESS_H
+#undef mlk_poly_compress_d10
+#undef mlk_poly_compress_d11
+#undef mlk_poly_compress_d4
+#undef mlk_poly_compress_d5
+#undef mlk_poly_decompress_d10
+#undef mlk_poly_decompress_d11
+#undef mlk_poly_decompress_d4
+#undef mlk_poly_decompress_d5
+#undef mlk_poly_frombytes
+#undef mlk_poly_frommsg
+#undef mlk_poly_tobytes
+#undef mlk_poly_tomsg
+/* mlkem/debug.h */
+#undef MLK_DEBUG_H
+#undef mlk_assert
+#undef mlk_assert_abs_bound
+#undef mlk_assert_abs_bound_2d
+#undef mlk_assert_bound
+#undef mlk_assert_bound_2d
+#undef mlk_debug_check_assert
+#undef mlk_debug_check_bounds
+/* mlkem/poly.h */
+#undef MLK_INVNTT_BOUND
+#undef MLK_NTT_BOUND
+#undef MLK_POLY_H
+#undef mlk_poly_add
+#undef mlk_poly_invntt_tomont
+#undef mlk_poly_mulcache_compute
+#undef mlk_poly_ntt
+#undef mlk_poly_reduce
+#undef mlk_poly_sub
+#undef mlk_poly_tomont
+/* mlkem/randombytes.h */
+#undef MLK_RANDOMBYTES_H
+/* mlkem/sampling.h */
+#undef MLK_SAMPLING_H
+#undef mlk_poly_cbd2
+#undef mlk_poly_cbd3
+#undef mlk_poly_rej_uniform
+#undef mlk_poly_rej_uniform_x4
+/* mlkem/symmetric.h */
+#undef MLK_SYMMETRIC_H
+#undef MLK_XOF_RATE
+#undef mlk_hash_g
+#undef mlk_hash_h
+#undef mlk_hash_j
+#undef mlk_prf_eta
+#undef mlk_prf_eta1
+#undef mlk_prf_eta1_x4
+#undef mlk_prf_eta2
+#undef mlk_xof_absorb
+#undef mlk_xof_ctx
+#undef mlk_xof_init
+#undef mlk_xof_release
+#undef mlk_xof_squeezeblocks
+#undef mlk_xof_x4_absorb
+#undef mlk_xof_x4_ctx
+#undef mlk_xof_x4_init
+#undef mlk_xof_x4_release
+#undef mlk_xof_x4_squeezeblocks
+/* mlkem/verify.h */
+#undef MLK_USE_ASM_VALUE_BARRIER
+#undef MLK_VERIFY_H
+#undef mlk_ct_opt_blocker_u64
+/* mlkem/cbmc.h */
+#undef MLK_CBMC_H
+#undef __contract__
+#undef __loop__
+
+
+#endif /* !MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/params.h b/crypto/fipsmodule/ml_kem/mlkem/params.h
new file mode 100644
index 00000000000..32f311c1e9a
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/params.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_PARAMS_H
+#define MLK_PARAMS_H
+
+#if defined(MLK_CONFIG_FILE)
+#include MLK_CONFIG_FILE
+#else
+#include "config.h"
+#endif
+
+#if !defined(MLK_CONFIG_PARAMETER_SET)
+#error MLK_CONFIG_PARAMETER_SET is not defined
+#endif
+
+#if MLK_CONFIG_PARAMETER_SET == 512
+#define MLKEM_K 2
+#elif MLK_CONFIG_PARAMETER_SET == 768
+#define MLKEM_K 3
+#elif MLK_CONFIG_PARAMETER_SET == 1024
+#define MLKEM_K 4
+#else
+#error Invalid value for MLK_CONFIG_PARAMETER_SET. Must be 512, 768, or 1024.
+#endif
+
+#define MLKEM_N 256
+#define MLKEM_Q 3329
+#define MLKEM_Q_HALF ((MLKEM_Q + 1) / 2) /* 1665 */
+#define MLKEM_UINT12_LIMIT 4096
+
+#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */
+#define MLKEM_SSBYTES 32  /* size in bytes of shared key */
+
+#define MLKEM_POLYBYTES 384
+#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES)
+
+#define MLKEM_POLYCOMPRESSEDBYTES_D4 128
+#define MLKEM_POLYCOMPRESSEDBYTES_D5 160
+#define MLKEM_POLYCOMPRESSEDBYTES_D10 320
+#define MLKEM_POLYCOMPRESSEDBYTES_D11 352
+
+#if MLKEM_K == 2
+#define MLKEM_LVL 512
+#define MLKEM_ETA1 3
+#define MLKEM_DU 10
+#define MLKEM_DV 4
+#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4
+#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10
+#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
+#elif MLKEM_K == 3
+#define MLKEM_LVL 768
+#define MLKEM_ETA1 2
+#define MLKEM_DU 10
+#define MLKEM_DV 4
+#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4
+#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10
+#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
+#elif MLKEM_K == 4
+#define MLKEM_LVL 1024
+#define MLKEM_ETA1 2
+#define MLKEM_DU 11
+#define MLKEM_DV 5
+#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5
+#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11
+#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU)
+#endif /* MLKEM_K == 4 */
+
+#define MLKEM_ETA2 2
+
+#define MLKEM_INDCPA_MSGBYTES (MLKEM_SYMBYTES)
+#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES)
+#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES)
+#define MLKEM_INDCPA_BYTES \
+  (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV)
+
+#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES)
+/* 32 bytes of additional space to save H(pk) */
+#define MLKEM_INDCCA_SECRETKEYBYTES                            \
+  (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \
+   2 * MLKEM_SYMBYTES)
+#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES)
+
+#endif /* !MLK_PARAMS_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly.c b/crypto/fipsmodule/ml_kem/mlkem/poly.c
new file mode 100644
index 00000000000..3439d78d279
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "common.h"
+#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
+
+#include <stdint.h>
+#include <string.h>
+#include "cbmc.h"
+#include "debug.h"
+#include "poly.h"
+#include "sampling.h"
+#include "symmetric.h"
+#include "verify.h"
+
+#if !defined(MLK_USE_NATIVE_POLY_TOMONT) ||           \
+    !defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \
+    !defined(MLK_USE_NATIVE_NTT) || !defined(MLK_USE_NATIVE_INTT)
+/*************************************************
+ * Name:        mlk_fqmul
+ *
+ * Description: Montgomery multiplication modulo MLKEM_Q
+ *
+ * Arguments:   - int16_t a: first factor
+ *                  Can be any int16_t.
+ *              - int16_t b: second factor.
+ *                  Must be signed canonical (abs value <(MLKEM_Q+1)/2)
+ *
+ * Returns 16-bit integer congruent to a*b*R^{-1} mod MLKEM_Q, and
+ * smaller than MLKEM_Q in absolute value.
+ *
+ **************************************************/
+
+/* Reference: `fqmul()` in reference implementation. */
+static MLK_INLINE int16_t mlk_fqmul(int16_t a, int16_t b)
+__contract__(
+  requires(b > -MLKEM_Q_HALF && b < MLKEM_Q_HALF)
+  ensures(return_value > -MLKEM_Q && return_value < MLKEM_Q)
+)
+{
+  int16_t res;
+  mlk_assert_abs_bound(&b, 1, MLKEM_Q_HALF);
+
+  res = mlk_montgomery_reduce((int32_t)a * (int32_t)b);
+  /* Bounds:
+   * |res| <= ceil(|a| * |b| / 2^16) + (MLKEM_Q + 1) / 2
+   *       <= ceil(2^15 * ((MLKEM_Q - 1)/2) / 2^16) + (MLKEM_Q + 1) / 2
+   *       <= ceil((MLKEM_Q - 1) / 4) + (MLKEM_Q + 1) / 2
+   *        < MLKEM_Q
+   */
+
+  mlk_assert_abs_bound(&res, 1, MLKEM_Q);
+  return res;
+}
+#endif /* !MLK_USE_NATIVE_POLY_TOMONT || !MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE \
+          || !MLK_USE_NATIVE_NTT || !MLK_USE_NATIVE_INTT */
+
+#if !defined(MLK_USE_NATIVE_POLY_REDUCE) || !defined(MLK_USE_NATIVE_INTT)
+/*************************************************
+ * Name:        mlk_barrett_reduce
+ *
+ * Description: Barrett reduction; given a 16-bit integer a, computes
+ *              centered representative congruent to a mod q in
+ *              {-(q-1)/2,...,(q-1)/2}
+ *
+ * Arguments:   - int16_t a: input integer to be reduced
+ *
+ * Returns:     integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q.
+ *
+ **************************************************/
+
+/* Reference: `barrett_reduce()` in reference implementation. */
+static MLK_INLINE int16_t mlk_barrett_reduce(int16_t a)
+__contract__(
+  ensures(return_value > -MLKEM_Q_HALF && return_value < MLKEM_Q_HALF)
+)
+{
+  /* Barrett reduction approximates
+   * ```
+   *     round(a/MLKEM_Q)
+   *   = round(a*(2^N/MLKEM_Q))/2^N)
+   *  ~= round(a*round(2^N/MLKEM_Q)/2^N)
+   * ```
+   * Here, we pick N=26.
+   */
+  const int32_t magic = 20159; /* check-magic: 20159 == round(2^26 / MLKEM_Q) */
+
+  /*
+   * PORTABILITY: Right-shift on a signed integer is
+   * implementation-defined for negative left argument.
+   * Here, we assume it's sign-preserving "arithmetic" shift right.
+   * See (C99 6.5.7 (5))
+   */
+  const int32_t t = (magic * a + (1 << 25)) >> 26;
+
+  /*
+   * t is in -10 .. +10, so we need 32-bit math to
+   * evaluate t * MLKEM_Q and the subsequent subtraction
+   */
+  int16_t res = (int16_t)(a - t * MLKEM_Q);
+
+  mlk_assert_abs_bound(&res, 1, MLKEM_Q_HALF);
+  return res;
+}
+#endif /* !MLK_USE_NATIVE_POLY_REDUCE || !MLK_USE_NATIVE_INTT */
+
+#if !defined(MLK_USE_NATIVE_POLY_TOMONT)
+/* Reference: `poly_tomont()` in reference implementation. */
+MLK_INTERNAL_API
+void mlk_poly_tomont(mlk_poly *r)
+{
+  unsigned i;
+  const int16_t f = 1353; /* check-magic: 1353 == signed_mod(2^32, MLKEM_Q) */
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q)))
+  {
+    r->coeffs[i] = mlk_fqmul(r->coeffs[i], f);
+  }
+
+  mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q);
+}
+#else  /* !MLK_USE_NATIVE_POLY_TOMONT */
+MLK_INTERNAL_API
+void mlk_poly_tomont(mlk_poly *r)
+{
+  mlk_poly_tomont_native(r->coeffs);
+  mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_TOMONT */
+
+#if !defined(MLK_USE_NATIVE_POLY_REDUCE)
+/************************************************************
+ * Name: mlk_scalar_signed_to_unsigned_q
+ *
+ * Description: Constant-time conversion of signed representatives
+ *              modulo MLKEM_Q within range (-(MLKEM_Q-1) .. (MLKEM_Q-1))
+ *              into unsigned representatives within range (0..(MLKEM_Q-1)).
+ *
+ * Arguments: c: signed coefficient to be converted
+ *
+ ************************************************************/
+
+/* Reference: Not present in reference implementation.
+ *            - Used here to implement different semantics of `poly_reduce()`;
+ *              see below. In the reference implementation, this logic is
+ *              part of all compression functions (see `compress.c`). */
+static MLK_INLINE uint16_t mlk_scalar_signed_to_unsigned_q(int16_t c)
+__contract__(
+  requires(c > -MLKEM_Q && c < MLKEM_Q)
+  ensures(return_value >= 0 && return_value < MLKEM_Q)
+  ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q)))
+{
+  mlk_assert_abs_bound(&c, 1, MLKEM_Q);
+
+  /* Add Q if c is negative, but in constant time */
+  c = mlk_ct_sel_int16(c + MLKEM_Q, c, mlk_ct_cmask_neg_i16(c));
+
+  /* and therefore cast to uint16_t is safe. */
+  mlk_assert_bound(&c, 1, 0, MLKEM_Q);
+  return (uint16_t)c;
+}
+
+/* Reference: `poly_reduce()` in reference implementation
+ *            - We use _unsigned_ canonical outputs, while the reference
+ *              implementation uses _signed_ canonical outputs.
+ *              Accordingly, we need a conditional addition of MLKEM_Q
+ *              here to go from signed to unsigned representatives.
+ *              This conditional addition is then dropped from all
+ *              polynomial compression functions instead (see `compress.c`). */
+MLK_INTERNAL_API
+void mlk_poly_reduce(mlk_poly *r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q)))
+  {
+    /* Barrett reduction, giving signed canonical representative */
+    int16_t t = mlk_barrett_reduce(r->coeffs[i]);
+    /* Conditional addition to get unsigned canonical representative */
+    r->coeffs[i] = mlk_scalar_signed_to_unsigned_q(t);
+  }
+
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#else  /* !MLK_USE_NATIVE_POLY_REDUCE */
+MLK_INTERNAL_API
+void mlk_poly_reduce(mlk_poly *r)
+{
+  mlk_poly_reduce_native(r->coeffs);
+  mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q);
+}
+#endif /* MLK_USE_NATIVE_POLY_REDUCE */
+
+/* Reference: `poly_add()` in the reference implementation.
+ *            - We use destructive version (output=first input) to avoid
+ *              reasoning about aliasing in the CBMC specification */
+MLK_INTERNAL_API
+void mlk_poly_add(mlk_poly *r, const mlk_poly *b)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1])))
+  {
+    r->coeffs[i] = r->coeffs[i] + b->coeffs[i];
+  }
+}
+
+/* Reference: `poly_sub()` in the reference implementation.
+ *            - We use destructive version (output=first input) to avoid
+ *              reasoning about aliasing in the CBMC specification */
+MLK_INTERNAL_API
+void mlk_poly_sub(mlk_poly *r, const mlk_poly *b)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N; i++)
+  __loop__(
+    invariant(i <= MLKEM_N)
+    invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0]))
+    invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1])))
+  {
+    r->coeffs[i] = r->coeffs[i] - b->coeffs[i];
+  }
+}
+
+/* Include zeta table unless NTT, invNTT and mulcache computation
+ * have been replaced by native implementations. */
+#if !defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE) || \
+    !defined(MLK_USE_NATIVE_NTT) || !defined(MLK_USE_NATIVE_INTT)
+#include "zetas.inc"
+#endif
+
+#if !defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE)
+/* Reference: Does not exist in the reference implementation.
+ *            - The reference implementation does not use a
+ *              multiplication cache ('mulcache'). This is an idea
+ *              originally taken from https://ia.cr/2021/986
+ *              and used at the C level here. */
+MLK_INTERNAL_API
+void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 4; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 4)
+    invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q)))
+  {
+    x->coeffs[2 * i + 0] = mlk_fqmul(a->coeffs[4 * i + 1], zetas[64 + i]);
+    x->coeffs[2 * i + 1] = mlk_fqmul(a->coeffs[4 * i + 3], -zetas[64 + i]);
+  }
+
+  /*
+   * This bound is true for the C implementation, but not needed
+   * in the higher level bounds reasoning. It is thus omitted
+   * them from the spec to not unnecessarily constrain native
+   * implementations, but checked here nonetheless.
+   */
+  mlk_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q);
+}
+#else  /* !MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+MLK_INTERNAL_API
+void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a)
+{
+  mlk_poly_mulcache_compute_native(x->coeffs, a->coeffs);
+  /* Omitting bounds assertion since native implementations may
+   * decide not to use a mulcache. Note that the C backend implementation
+   * of poly_basemul_montgomery_cached() does still include the check. */
+}
+#endif /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */
+
+#if !defined(MLK_USE_NATIVE_NTT)
+/*
+ * Computes a block CT butterflies with a fixed twiddle factor,
+ * using Montgomery multiplication.
+ * Parameters:
+ * - r: Pointer to base of polynomial (_not_ the base of butterfly block)
+ * - root: Twiddle factor to use for the butterfly. This must be in
+ *         Montgomery form and signed canonical.
+ * - start: Offset to the beginning of the butterfly block
+ * - len: Index difference between coefficients subject to a butterfly
+ * - bound: Ghost variable describing coefficient bound: Prior to `start`,
+ *          coefficients must be bound by `bound + MLKEM_Q`. Post `start`,
+ *          they must be bound by `bound`.
+ * When this function returns, output coefficients in the index range
+ * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`.
+ * Example:
+ * - start=8, len=4
+ *   This would compute the following four butterflies
+ *          8     --    12
+ *             9    --     13
+ *                10   --     14
+ *                   11   --     15
+ * - start=4, len=2
+ *   This would compute the following two butterflies
+ *          4 -- 6
+ *             5 -- 7
+ */
+
+/* Reference: Embedded in `ntt()` in the reference implementation. */
+static void mlk_ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta,
+                                    unsigned start, unsigned len, int bound)
+__contract__(
+  requires(start < MLKEM_N)
+  requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N)
+  requires(0 <= bound && bound < INT16_MAX - MLKEM_Q)
+  requires(-MLKEM_Q_HALF < zeta && zeta < MLKEM_Q_HALF)
+  requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
+  requires(array_abs_bound(r, 0, start, bound + MLKEM_Q))
+  requires(array_abs_bound(r, start, MLKEM_N, bound))
+  assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
+  ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q))
+  ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound)))
+{
+  /* `bound` is a ghost variable only needed in the CBMC specification */
+  unsigned j;
+  ((void)bound);
+  for (j = start; j < start + len; j++)
+  __loop__(
+    invariant(start <= j && j <= start + len)
+    /*
+     * Coefficients are updated in strided pairs, so the bounds for the
+     * intermediate states alternate twice between the old and new bound
+     */
+    invariant(array_abs_bound(r, 0,           j,           bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j,           start + len, bound))
+    invariant(array_abs_bound(r, start + len, j + len,     bound + MLKEM_Q))
+    invariant(array_abs_bound(r, j + len,     MLKEM_N,     bound)))
+  {
+    int16_t t;
+    t = mlk_fqmul(r[j + len], zeta);
+    r[j + len] = r[j] - t;
+    r[j] = r[j] + t;
+  }
+}
+
+/*
+ * Compute one layer of forward NTT
+ * Parameters:
+ * - r: Pointer to base of polynomial
+ * - layer: Variable indicating which layer is being applied.
+ */
+
+/* Reference: Embedded in `ntt()` in the reference implementation. */
+static void mlk_ntt_layer(int16_t r[MLKEM_N], unsigned layer)
+__contract__(
+  requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
+  requires(1 <= layer && layer <= 7)
+  requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
+  ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q)))
+{
+  unsigned start, k, len;
+  /* Twiddle factors for layer n are at indices 2^(n-1)..2^n-1. */
+  k = 1u << (layer - 1);
+  len = MLKEM_N >> layer;
+  for (start = 0; start < MLKEM_N; start += 2 * len)
+  __loop__(
+    invariant(start < MLKEM_N + 2 * len)
+    invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N)
+    invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q))
+    invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q)))
+  {
+    int16_t zeta = zetas[k++];
+    mlk_ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q);
+  }
+}
+
+/*
+ * Compute full forward NTT
+ * NOTE: This particular implementation satisfies a much tighter
+ * bound on the output coefficients (5*q) than the contractual one (8*q),
+ * but this is not needed in the calling code. Should we change the
+ * base multiplication strategy to require smaller NTT output bounds,
+ * the proof may need strengthening.
+ */
+
+/* Reference: `ntt()` in the reference implementation.
+ * - Iterate over `layer` instead of `len` in the outer loop
+ *   to simplify computation of zeta index. */
+MLK_INTERNAL_API
+void mlk_poly_ntt(mlk_poly *p)
+{
+  unsigned layer;
+  int16_t *r;
+  mlk_assert_abs_bound(p, MLKEM_N, MLKEM_Q);
+  r = p->coeffs;
+
+  for (layer = 1; layer <= 7; layer++)
+  __loop__(
+    invariant(1 <= layer && layer <= 8)
+    invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)))
+  {
+    mlk_ntt_layer(r, layer);
+  }
+
+  /* Check the stronger bound */
+  mlk_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND);
+}
+#else  /* !MLK_USE_NATIVE_NTT */
+
+MLK_INTERNAL_API
+void mlk_poly_ntt(mlk_poly *p)
+{
+  mlk_assert_abs_bound(p, MLKEM_N, MLKEM_Q);
+  mlk_ntt_native(p->coeffs);
+  mlk_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND);
+}
+#endif /* MLK_USE_NATIVE_NTT */
+
+#if !defined(MLK_USE_NATIVE_INTT)
+
+/* Compute one layer of inverse NTT */
+
+/* Reference: Embedded into `invntt()` in the reference implementation */
+static void mlk_invntt_layer(int16_t *r, unsigned layer)
+__contract__(
+  requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N))
+  requires(1 <= layer && layer <= 7)
+  requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N))
+  ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
+{
+  unsigned start, k, len;
+  len = (MLKEM_N >> layer);
+  k = (1u << layer) - 1;
+  for (start = 0; start < MLKEM_N; start += 2 * len)
+  __loop__(
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))
+    invariant(start <= MLKEM_N && k <= 127)
+    /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */
+    invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len))
+  {
+    unsigned j;
+    int16_t zeta = zetas[k--];
+    for (j = start; j < start + len; j++)
+    __loop__(
+      invariant(start <= j && j <= start + len)
+      invariant(start <= MLKEM_N && k <= 127)
+      invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
+    {
+      int16_t t = r[j];
+      r[j] = mlk_barrett_reduce(t + r[j + len]);
+      r[j + len] = r[j + len] - t;
+      r[j + len] = mlk_fqmul(r[j + len], zeta);
+    }
+  }
+}
+
+/* Reference: `invntt()` in the reference implementation
+ *            - We normalize at the beginning of the inverse NTT,
+ *              while the reference implementation normalizes at
+ *              the end. This allows us to drop a call to `poly_reduce()`
+ *              from the base multiplication. */
+MLK_INTERNAL_API
+void mlk_poly_invntt_tomont(mlk_poly *p)
+{
+  /*
+   * Scale input polynomial to account for Montgomery factor
+   * and NTT twist. This also brings coefficients down to
+   * absolute value < MLKEM_Q.
+   */
+  unsigned j, layer;
+  const int16_t f = 1441; /* check-magic: 1441 == pow(2,32 - 7,MLKEM_Q) */
+  int16_t *r = p->coeffs;
+
+  for (j = 0; j < MLKEM_N; j++)
+  __loop__(
+    invariant(j <= MLKEM_N)
+    invariant(array_abs_bound(r, 0, j, MLKEM_Q)))
+  {
+    r[j] = mlk_fqmul(r[j], f);
+  }
+
+  /* Run the invNTT layers */
+  for (layer = 7; layer > 0; layer--)
+  __loop__(
+    invariant(0 <= layer && layer < 8)
+    invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)))
+  {
+    mlk_invntt_layer(r, layer);
+  }
+
+  mlk_assert_abs_bound(p, MLKEM_N, MLK_INVNTT_BOUND);
+}
+#else  /* !MLK_USE_NATIVE_INTT */
+
+MLK_INTERNAL_API
+void mlk_poly_invntt_tomont(mlk_poly *p)
+{
+  mlk_intt_native(p->coeffs);
+  mlk_assert_abs_bound(p, MLKEM_N, MLK_INVNTT_BOUND);
+}
+#endif /* MLK_USE_NATIVE_INTT */
+
+#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */
+
+MLK_EMPTY_CU(mlk_poly)
+
+#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly.h b/crypto/fipsmodule/ml_kem/mlkem/poly.h
new file mode 100644
index 00000000000..bffe3474492
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly.h
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_POLY_H
+#define MLK_POLY_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include "debug.h"
+#include "verify.h"
+
+/* Absolute exclusive upper bound for the output of the inverse NTT */
+#define MLK_INVNTT_BOUND (8 * MLKEM_Q)
+
+/* Absolute exclusive upper bound for the output of the forward NTT */
+#define MLK_NTT_BOUND (8 * MLKEM_Q)
+
+/*
+ * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial
+ * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1]
+ */
+typedef struct
+{
+  int16_t coeffs[MLKEM_N];
+} MLK_ALIGN mlk_poly;
+
+/*
+ * INTERNAL presentation of precomputed data speeding up
+ * the base multiplication of two polynomials in NTT domain.
+ */
+typedef struct
+{
+  int16_t coeffs[MLKEM_N >> 1];
+} mlk_poly_mulcache;
+
+/*************************************************
+ * Name:        mlk_cast_uint16_to_int16
+ *
+ * Description: Cast uint16 value to int16
+ *
+ * Returns:
+ *   input x in     0 .. 32767: returns value unchanged
+ *   input x in 32768 .. 65535: returns (x - 65536)
+ **************************************************/
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "conversion"
+#endif
+static MLK_ALWAYS_INLINE int16_t mlk_cast_uint16_to_int16(uint16_t x)
+{
+  /*
+   * PORTABILITY: This relies on uint16_t -> int16_t
+   * being implemented as the inverse of int16_t -> uint16_t,
+   * which is implementation-defined (C99 6.3.1.3 (3))
+   * CBMC (correctly) fails to prove this conversion is OK,
+   * so we have to suppress that check here
+   */
+  return (int16_t)x;
+}
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/*************************************************
+ * Name:        mlk_montgomery_reduce
+ *
+ * Description: Generic Montgomery reduction; given a 32-bit integer a, computes
+ *              16-bit integer congruent to a * R^-1 mod q, where R=2^16
+ *
+ * Arguments:   - int32_t a: input integer to be reduced, of absolute value
+ *                smaller or equal to INT32_MAX - 2^15 * MLKEM_Q.
+ *
+ * Returns:     integer congruent to a * R^-1 modulo q, with absolute value
+ *                <= ceil(|a| / 2^16) + (MLKEM_Q + 1)/2
+ *
+ **************************************************/
+static MLK_ALWAYS_INLINE int16_t mlk_montgomery_reduce(int32_t a)
+__contract__(
+    requires(a < +(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)) &&
+	     a > -(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)))
+    /* We don't attempt to express an input-dependent output bound
+     * as the post-condition here. There are two call-sites for this
+     * function:
+     * - The base multiplication: Here, we need no output bound.
+     * - mlk_fqmul: Here, we inline this function and prove another spec
+     *          for mlk_fqmul which does have a post-condition bound. */
+)
+{
+  /* check-magic: 62209 == unsigned_mod(pow(MLKEM_Q, -1, 2^16), 2^16) */
+  const uint32_t QINV = 62209;
+
+  /*  Compute a*q^{-1} mod 2^16 in unsigned representatives */
+  const uint16_t a_reduced = a & UINT16_MAX;
+  const uint16_t a_inverted = (a_reduced * QINV) & UINT16_MAX;
+
+  /* Lift to signed canonical representative mod 2^16. */
+  const int16_t t = mlk_cast_uint16_to_int16(a_inverted);
+
+  int32_t r;
+
+  mlk_assert(a < +(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)) &&
+             a > -(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)));
+
+  r = a - ((int32_t)t * MLKEM_Q);
+
+  /*
+   * PORTABILITY: Right-shift on a signed integer is, strictly-speaking,
+   * implementation-defined for negative left argument. Here,
+   * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5))
+   */
+  r = r >> 16;
+  /* Bounds: |r >> 16| <= ceil(|r| / 2^16)
+   *                   <= ceil(|a| / 2^16 + MLKEM_Q / 2)
+   *                   <= ceil(|a| / 2^16) + (MLKEM_Q + 1) / 2
+   *
+   * (Note that |a >> n| = ceil(|a| / 2^16) for negative a)
+   */
+  return (int16_t)r;
+}
+
+#define mlk_poly_tomont MLK_NAMESPACE(poly_tomont)
+/*************************************************
+ * Name:        mlk_poly_tomont
+ *
+ * Description: Inplace conversion of all coefficients of a polynomial
+ *              from normal domain to Montgomery domain
+ *
+ *              Bounds: Output < q in absolute value.
+ *
+ * Arguments:   - mlk_poly *r: pointer to input/output polynomial
+ *
+ * Specification: Internal normalization required in `mlk_indcpa_keypair_derand`
+ *                as part of matrix-vector multiplication
+ *                [FIPS 203, Algorithm 13, K-PKE.KeyGen, L18].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_tomont(mlk_poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q))
+);
+
+#define mlk_poly_mulcache_compute MLK_NAMESPACE(poly_mulcache_compute)
+/************************************************************
+ * Name: mlk_poly_mulcache_compute
+ *
+ * Description: Computes the mulcache for a polynomial in NTT domain
+ *
+ *              The mulcache of a degree-2 polynomial b := b0 + b1*X
+ *              in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when
+ *              computing products of b in Fq[X]/(X^2-zeta).
+ *
+ *              The mulcache of a polynomial in NTT domain -- which is
+ *              a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta),
+ *              for varying zeta, is the 128-tuple of mulcaches of those
+ *              polynomials.
+ *
+ * Arguments: - x: Pointer to mulcache to be populated
+ *            - a: Pointer to input polynomial
+ *
+ * Specification:
+ * - Caches `b_1 * \gamma` in [FIPS 203, Algorithm 12, BaseCaseMultiply, L1]
+ *
+ ************************************************************/
+/*
+ * NOTE: The default C implementation of this function populates
+ * the mulcache with values in (-q,q), but this is not needed for the
+ * higher level safety proofs, and thus not part of the spec.
+ */
+MLK_INTERNAL_API
+void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a)
+__contract__(
+  requires(memory_no_alias(x, sizeof(mlk_poly_mulcache)))
+  requires(memory_no_alias(a, sizeof(mlk_poly)))
+  assigns(object_whole(x))
+);
+
+#define mlk_poly_reduce MLK_NAMESPACE(poly_reduce)
+/*************************************************
+ * Name:        mlk_poly_reduce
+ *
+ * Description: Converts polynomial to _unsigned canonical_ representatives.
+ *
+ *              The input coefficients can be arbitrary integers in int16_t.
+ *              The output coefficients are in [0,1,...,MLKEM_Q-1].
+ *
+ * Arguments:   - mlk_poly *r: pointer to input/output polynomial
+ *
+ * Specification: Normalizes on unsigned canoncial representatives
+ *                ahead of calling [FIPS 203, Compress_d, Eq (4.7)].
+ *                This is not made explicit in FIPS 203.
+ *
+ **************************************************/
+/*
+ * NOTE: The semantics of mlk_poly_reduce() is different in
+ * the reference implementation, which requires
+ * signed canonical output data. Unsigned canonical
+ * outputs are better suited to the only remaining
+ * use of mlk_poly_reduce() in the context of (de)serialization.
+ */
+MLK_INTERNAL_API
+void mlk_poly_reduce(mlk_poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+);
+
+#define mlk_poly_add MLK_NAMESPACE(poly_add)
+/************************************************************
+ * Name: mlk_poly_add
+ *
+ * Description: Adds two polynomials in place
+ *
+ * Arguments: - r: Pointer to input-output polynomial to be added to.
+ *            - b: Pointer to input polynomial that should be added
+ *                 to r. Must be disjoint from r.
+ *
+ * The coefficients of r and b must be so that the addition does
+ * not overflow. Otherwise, the behaviour of this function is undefined.
+ *
+ * Specification:
+ * - [FIPS 203, 2.4.5, Arithmetic With Polynomials and NTT Representations]
+ * - Used in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L21]
+ *
+ ************************************************************/
+/*
+ * NOTE: The reference implementation uses a 3-argument mlk_poly_add.
+ * We specialize to the accumulator form to avoid reasoning about aliasing.
+ */
+MLK_INTERNAL_API
+void mlk_poly_add(mlk_poly *r, const mlk_poly *b)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(memory_no_alias(b, sizeof(mlk_poly)))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k]))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+);
+
+#define mlk_poly_sub MLK_NAMESPACE(poly_sub)
+/*************************************************
+ * Name:        mlk_poly_sub
+ *
+ * Description: Subtract two polynomials; no modular reduction is performed
+ *
+ * Arguments: - mlk_poly *r: Pointer to input-output polynomial to be added to.
+ *            - const mlk_poly *b: Pointer to second input polynomial
+ *
+ * Specification:
+ * - [FIPS 203, 2.4.5, Arithmetic With Polynomials and NTT Representations]
+ * - Used in [FIPS 203, Algorithm 15, K-PKE.Decrypt, L6]
+ *
+ **************************************************/
+/*
+ * NOTE: The reference implementation uses a 3-argument mlk_poly_sub.
+ * We specialize to the accumulator form to avoid reasoning about aliasing.
+ */
+MLK_INTERNAL_API
+void mlk_poly_sub(mlk_poly *r, const mlk_poly *b)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(memory_no_alias(b, sizeof(mlk_poly)))
+  requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX))
+  requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN))
+  ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k]))
+  assigns(object_whole(r))
+);
+
+#define mlk_poly_ntt MLK_NAMESPACE(poly_ntt)
+/*************************************************
+ * Name:        mlk_poly_ntt
+ *
+ * Description: Computes negacyclic number-theoretic transform (NTT) of
+ *              a polynomial in place.
+ *
+ *              The input is assumed to be in normal order and
+ *              coefficient-wise bound by MLKEM_Q in absolute value.
+ *
+ *              The output polynomial is in bitreversed order, and
+ *              coefficient-wise bound by MLK_NTT_BOUND in absolute value.
+ *
+ *              (NOTE: Sometimes the input to the NTT is actually smaller,
+ *               which gives better bounds.)
+ *
+ * Arguments:   - mlk_poly *p: pointer to in/output polynomial
+ *
+ * Specification: Implements [FIPS 203, Algorithm 9, NTT]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_ntt(mlk_poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLK_NTT_BOUND))
+);
+
+#define mlk_poly_invntt_tomont MLK_NAMESPACE(poly_invntt_tomont)
+/*************************************************
+ * Name:        mlk_poly_invntt_tomont
+ *
+ * Description: Computes inverse of negacyclic number-theoretic transform (NTT)
+ *              of a polynomial in place;
+ *              inputs assumed to be in bitreversed order, output in normal
+ *              order
+ *
+ *              The input is assumed to be in bitreversed order, and can
+ *              have arbitrary coefficients in int16_t.
+ *
+ *              The output polynomial is in normal order, and
+ *              coefficient-wise bound by MLK_INVNTT_BOUND in absolute value.
+ *
+ * Arguments:   - uint16_t *a: pointer to in/output polynomial
+ *
+ * Specification: Implements composition of [FIPS 203, Algorithm 10, NTT^{-1}]
+ *                and elementwise modular multiplication with a suitable
+ *                Montgomery factor introduced during the base multiplication.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_invntt_tomont(mlk_poly *r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND))
+);
+
+#endif /* !MLK_POLY_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly_k.c b/crypto/fipsmodule/ml_kem/mlkem/poly_k.c
new file mode 100644
index 00000000000..b89faffcf4b
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly_k.c
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include <stdint.h>
+#include <string.h>
+
+#include "compress.h"
+#include "debug.h"
+#include "poly_k.h"
+#include "sampling.h"
+#include "symmetric.h"
+
+/* Level namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define mlk_poly_cbd_eta1 MLK_ADD_LEVEL(mlk_poly_cbd_eta1)
+#define mlk_poly_cbd_eta2 MLK_ADD_LEVEL(mlk_poly_cbd_eta2)
+/* End of level namespacing */
+
+/* Reference: `polyvec_compress()` in the reference implementation
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */
+MLK_INTERNAL_API
+void mlk_polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
+                             const mlk_polyvec a)
+{
+  unsigned i;
+  mlk_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_compress_du(r + i * MLKEM_POLYCOMPRESSEDBYTES_DU, &a[i]);
+  }
+}
+
+/* Reference: `polyvec_decompress()` in the reference implementation. */
+MLK_INTERNAL_API
+void mlk_polyvec_decompress_du(mlk_polyvec r,
+                               const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_decompress_du(&r[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU);
+  }
+
+  mlk_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+}
+
+/* Reference: `polyvec_tobytes()` in the reference implementation.
+ *            - In contrast to the reference implementation, we assume
+ *              unsigned canonical coefficients here.
+ *              The reference implementation works with coefficients
+ *              in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */
+MLK_INTERNAL_API
+void mlk_polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const mlk_polyvec a)
+{
+  unsigned i;
+  mlk_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_tobytes(r + i * MLKEM_POLYBYTES, &a[i]);
+  }
+}
+
+/* Reference: `polyvec_frombytes()` in the reference implementation. */
+MLK_INTERNAL_API
+void mlk_polyvec_frombytes(mlk_polyvec r, const uint8_t a[MLKEM_POLYVECBYTES])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_frombytes(&r[i], a + i * MLKEM_POLYBYTES);
+  }
+
+  mlk_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+}
+
+/* Reference: `polyvec_ntt()` in the reference implementation. */
+MLK_INTERNAL_API
+void mlk_polyvec_ntt(mlk_polyvec r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_ntt(&r[i]);
+  }
+
+  mlk_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLK_NTT_BOUND);
+}
+
+/* Reference: `polyvec_invntt_tomont()` in the reference implementation.
+ *            - We normalize at the beginning of the inverse NTT,
+ *              while the reference implementation normalizes at
+ *              the end. This allows us to drop a call to `poly_reduce()`
+ *              from the base multiplication. */
+MLK_INTERNAL_API
+void mlk_polyvec_invntt_tomont(mlk_polyvec r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_invntt_tomont(&r[i]);
+  }
+
+  mlk_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLK_INVNTT_BOUND);
+}
+
+#if !defined(MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED)
+/* Reference: `polyvec_basemul_acc_montgomery()` in the
+ *            reference implementation.
+ *            - We use a multiplication cache ('mulcache') here
+ *              which is not present in the reference implementation.
+ *              This is an idea originally taken from https://ia.cr/2021/986
+ *              and used at the C level here.
+ *            - We compute the coefficients of the scalar product in 32-bit
+ *              coefficients and perform only a single modular reduction
+ *              at the end. The reference implementation uses 2 * MLKEM_K
+ *              more modular reductions since it reduces after every modular
+ *              multiplication. */
+MLK_INTERNAL_API
+void mlk_polyvec_basemul_acc_montgomery_cached(
+    mlk_poly *r, const mlk_polyvec a, const mlk_polyvec b,
+    const mlk_polyvec_mulcache b_cache)
+{
+  unsigned i;
+  mlk_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+  for (i = 0; i < MLKEM_N / 2; i++)
+  __loop__(invariant(i <= MLKEM_N / 2))
+  {
+    unsigned k;
+    int32_t t[2] = {0};
+    for (k = 0; k < MLKEM_K; k++)
+    __loop__(
+      invariant(k <= MLKEM_K &&
+         t[0] <=    (int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768  &&
+         t[0] >= - ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768) &&
+         t[1] <=   ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768) &&
+         t[1] >= - ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768)))
+    {
+      t[0] += (int32_t)a[k].coeffs[2 * i + 1] * b_cache[k].coeffs[i];
+      t[0] += (int32_t)a[k].coeffs[2 * i] * b[k].coeffs[2 * i];
+      t[1] += (int32_t)a[k].coeffs[2 * i] * b[k].coeffs[2 * i + 1];
+      t[1] += (int32_t)a[k].coeffs[2 * i + 1] * b[k].coeffs[2 * i];
+    }
+    r->coeffs[2 * i + 0] = mlk_montgomery_reduce(t[0]);
+    r->coeffs[2 * i + 1] = mlk_montgomery_reduce(t[1]);
+  }
+}
+
+#else /* !MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+MLK_INTERNAL_API
+void mlk_polyvec_basemul_acc_montgomery_cached(
+    mlk_poly *r, const mlk_polyvec a, const mlk_polyvec b,
+    const mlk_polyvec_mulcache b_cache)
+{
+  mlk_assert_bound_2d(a, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT);
+  /* Omitting bounds assertion for cache since native implementations may
+   * decide not to use a mulcache. Note that the C backend implementation
+   * of poly_basemul_montgomery_cached() does still include the check. */
+#if MLKEM_K == 2
+  mlk_polyvec_basemul_acc_montgomery_cached_k2_native(
+      r->coeffs, (const int16_t *)a, (const int16_t *)b,
+      (const int16_t *)b_cache);
+#elif MLKEM_K == 3
+  mlk_polyvec_basemul_acc_montgomery_cached_k3_native(
+      r->coeffs, (const int16_t *)a, (const int16_t *)b,
+      (const int16_t *)b_cache);
+#elif MLKEM_K == 4
+  mlk_polyvec_basemul_acc_montgomery_cached_k4_native(
+      r->coeffs, (const int16_t *)a, (const int16_t *)b,
+      (const int16_t *)b_cache);
+#endif
+}
+#endif /* MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */
+
+/* Reference: Does not exist in the reference implementation.
+ *            - The reference implementation does not use a
+ *              multiplication cache ('mulcache'). This is an idea
+ *              originally taken from https://ia.cr/2021/986
+ *              and used at the C level here. */
+MLK_INTERNAL_API
+void mlk_polyvec_mulcache_compute(mlk_polyvec_mulcache x, const mlk_polyvec a)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_mulcache_compute(&x[i], &a[i]);
+  }
+}
+
+/* Reference: `polyvec_reduce()` in the reference implementation.
+ *            - We use _unsigned_ canonical outputs, while the reference
+ *              implementation uses _signed_ canonical outputs.
+ *              Accordingly, we need a conditional addition of MLKEM_Q
+ *              here to go from signed to unsigned representatives.
+ *              This conditional addition is then dropped from all
+ *              polynomial compression functions instead (see `compress.c`). */
+MLK_INTERNAL_API
+void mlk_polyvec_reduce(mlk_polyvec r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_reduce(&r[i]);
+  }
+
+  mlk_assert_bound_2d(r, MLKEM_K, MLKEM_N, 0, MLKEM_Q);
+}
+
+/* Reference: `polyvec_add()` in the reference implementation.
+ *            - We use destructive version (output=first input) to avoid
+ *              reasoning about aliasing in the CBMC specification */
+MLK_INTERNAL_API
+void mlk_polyvec_add(mlk_polyvec r, const mlk_polyvec b)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_add(&r[i], &b[i]);
+  }
+}
+
+/* Reference: `polyvec_tomont()` in the reference implementation. */
+MLK_INTERNAL_API
+void mlk_polyvec_tomont(mlk_polyvec r)
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_K; i++)
+  {
+    mlk_poly_tomont(&r[i]);
+  }
+
+  mlk_assert_abs_bound_2d(r, MLKEM_K, MLKEM_N, MLKEM_Q);
+}
+
+
+/*************************************************
+ * Name:        mlk_poly_cbd_eta1
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter MLKEM_ETA1.
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_eta1], where
+ *                eta1 is specified per level in [FIPS 203, Table 2]
+ *                and represented as MLKEM_ETA1 here.
+ *
+ **************************************************/
+
+/* Reference: `poly_cbd_eta1` in the reference implementation. */
+static MLK_INLINE void mlk_poly_cbd_eta1(
+    mlk_poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4])
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1))
+)
+{
+#if MLKEM_ETA1 == 2
+  mlk_poly_cbd2(r, buf);
+#elif MLKEM_ETA1 == 3
+  mlk_poly_cbd3(r, buf);
+#else
+#error "Invalid value of MLKEM_ETA1"
+#endif
+}
+
+/* Reference: Does not exist in the reference implementation.
+ *            - This implements a x4-batched version of `poly_getnoise_eta1()`
+ *              from the reference implementation, to leverage
+ *              batched Keccak-f1600.*/
+MLK_INTERNAL_API
+void mlk_poly_getnoise_eta1_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2,
+                               mlk_poly *r3, const uint8_t seed[MLKEM_SYMBYTES],
+                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
+                               uint8_t nonce3)
+{
+  MLK_ALIGN uint8_t buf[4][MLK_ALIGN_UP(MLKEM_ETA1 * MLKEM_N / 4)];
+  MLK_ALIGN uint8_t extkey[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 1)];
+  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
+  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
+  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
+  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
+  extkey[0][MLKEM_SYMBYTES] = nonce0;
+  extkey[1][MLKEM_SYMBYTES] = nonce1;
+  extkey[2][MLKEM_SYMBYTES] = nonce2;
+  extkey[3][MLKEM_SYMBYTES] = nonce3;
+  mlk_prf_eta1_x4(buf, extkey);
+  mlk_poly_cbd_eta1(r0, buf[0]);
+  mlk_poly_cbd_eta1(r1, buf[1]);
+  mlk_poly_cbd_eta1(r2, buf[2]);
+  mlk_poly_cbd_eta1(r3, buf[3]);
+
+  mlk_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1);
+  mlk_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1);
+  mlk_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1);
+  mlk_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+  mlk_zeroize(extkey, sizeof(extkey));
+}
+
+#if MLKEM_K == 2 || MLKEM_K == 4
+/*************************************************
+ * Name:        mlk_poly_cbd_eta2
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter MLKEM_ETA2.
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_eta2], where
+ *                eta2 is specified per level in [FIPS 203, Table 2]
+ *                and represented as MLKEM_ETA2 here.
+ *
+ **************************************************/
+
+/* Reference: `poly_cbd_eta2` in the reference implementation. */
+static MLK_INLINE void mlk_poly_cbd_eta2(
+    mlk_poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4])
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)))
+{
+#if MLKEM_ETA2 == 2
+  mlk_poly_cbd2(r, buf);
+#else
+#error "Invalid value of MLKEM_ETA2"
+#endif
+}
+
+/* Reference: `poly_getnoise_eta1()` in the reference implementation.
+ *            - We include buffer zeroization. */
+MLK_INTERNAL_API
+void mlk_poly_getnoise_eta2(mlk_poly *r, const uint8_t seed[MLKEM_SYMBYTES],
+                            uint8_t nonce)
+{
+  MLK_ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4];
+  MLK_ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1];
+
+  memcpy(extkey, seed, MLKEM_SYMBYTES);
+  extkey[MLKEM_SYMBYTES] = nonce;
+  mlk_prf_eta2(buf, extkey);
+
+  mlk_poly_cbd_eta2(r, buf);
+
+  mlk_assert_abs_bound(r, MLKEM_N, MLKEM_ETA1 + 1);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+  mlk_zeroize(extkey, sizeof(extkey));
+}
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
+
+#if MLKEM_K == 2
+/* Reference: Does not exist in the reference implementation.
+ *            - This implements a x4-batched version of `poly_getnoise_eta1()`
+ *              and `poly_getnoise_eta1()` from the reference implementation,
+ *              leveraging batched Keccak-f1600.
+ *            - If a x4-batched Keccak-f1600 is available, we squeeze
+ *              more random data than needed for the eta2 calls, to be
+ *              be able to use a x4-batched Keccak-f1600. */
+MLK_INTERNAL_API
+void mlk_poly_getnoise_eta1122_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2,
+                                  mlk_poly *r3,
+                                  const uint8_t seed[MLKEM_SYMBYTES],
+                                  uint8_t nonce0, uint8_t nonce1,
+                                  uint8_t nonce2, uint8_t nonce3)
+{
+#if MLKEM_ETA2 >= MLKEM_ETA1
+#error mlk_poly_getnoise_eta1122_4x assumes MLKEM_ETA1 > MLKEM_ETA2
+#endif
+  MLK_ALIGN uint8_t buf[4][MLK_ALIGN_UP(MLKEM_ETA1 * MLKEM_N / 4)];
+  MLK_ALIGN uint8_t extkey[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 1)];
+
+  memcpy(extkey[0], seed, MLKEM_SYMBYTES);
+  memcpy(extkey[1], seed, MLKEM_SYMBYTES);
+  memcpy(extkey[2], seed, MLKEM_SYMBYTES);
+  memcpy(extkey[3], seed, MLKEM_SYMBYTES);
+  extkey[0][MLKEM_SYMBYTES] = nonce0;
+  extkey[1][MLKEM_SYMBYTES] = nonce1;
+  extkey[2][MLKEM_SYMBYTES] = nonce2;
+  extkey[3][MLKEM_SYMBYTES] = nonce3;
+
+  /* On systems with fast batched Keccak, we use 4-fold batched PRF,
+   * even though that means generating more random data in buf[2] and buf[3]
+   * than necessary. */
+#if !defined(FIPS202_X4_DEFAULT_IMPLEMENTATION)
+  mlk_prf_eta1_x4(buf, extkey);
+#else
+  mlk_prf_eta1(buf[0], extkey[0]);
+  mlk_prf_eta1(buf[1], extkey[1]);
+  mlk_prf_eta2(buf[2], extkey[2]);
+  mlk_prf_eta2(buf[3], extkey[3]);
+#endif /* FIPS202_X4_DEFAULT_IMPLEMENTATION */
+
+  mlk_poly_cbd_eta1(r0, buf[0]);
+  mlk_poly_cbd_eta1(r1, buf[1]);
+  mlk_poly_cbd_eta2(r2, buf[2]);
+  mlk_poly_cbd_eta2(r3, buf[3]);
+
+  mlk_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1);
+  mlk_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1);
+  mlk_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1);
+  mlk_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+  mlk_zeroize(extkey, sizeof(extkey));
+}
+#endif /* MLKEM_K == 2 */
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef mlk_poly_cbd_eta1
+#undef mlk_poly_cbd_eta2
diff --git a/crypto/fipsmodule/ml_kem/mlkem/poly_k.h b/crypto/fipsmodule/ml_kem/mlkem/poly_k.h
new file mode 100644
index 00000000000..48f3d12a60c
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/poly_k.h
@@ -0,0 +1,645 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_POLY_K_H
+#define MLK_POLY_K_H
+
+#include <stdint.h>
+#include "common.h"
+#include "compress.h"
+#include "poly.h"
+
+/* Level namespacing
+ * This is to facilitate building multiple instances
+ * of mlkem-native (e.g. with varying security levels)
+ * within a single compilation unit. */
+#define mlk_polyvec MLK_ADD_LEVEL(mlk_polyvec)
+#define mlk_polymat MLK_ADD_LEVEL(mlk_polymat)
+#define mlk_polyvec_mulcache MLK_ADD_LEVEL(mlk_polyvec_mulcache)
+/* End of level namespacing */
+
+typedef mlk_poly mlk_polyvec[MLKEM_K];
+typedef mlk_poly mlk_polymat[MLKEM_K * MLKEM_K];
+typedef mlk_poly_mulcache mlk_polyvec_mulcache[MLKEM_K];
+
+#define mlk_poly_compress_du MLK_NAMESPACE_K(poly_compress_du)
+/*************************************************
+ * Name:        mlk_poly_compress_du
+ *
+ * Description: Compression (du bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                  (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes)
+ *              - const mlk_poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_{d_u} (Compress_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22],
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+static MLK_INLINE void mlk_poly_compress_du(
+    uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const mlk_poly *a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(a, sizeof(mlk_poly)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU)))
+{
+#if MLKEM_DU == 10
+  mlk_poly_compress_d10(r, a);
+#elif MLKEM_DU == 11
+  mlk_poly_compress_d11(r, a);
+#else
+#error "Invalid value of MLKEM_DU"
+#endif
+}
+
+#define mlk_poly_decompress_du MLK_NAMESPACE_K(poly_decompress_du)
+/*************************************************
+ * Name:        mlk_poly_decompress_du
+ *
+ * Description: De-serialization and subsequent decompression (du bits) of a
+ *              polynomial; approximate inverse of mlk_poly_compress_du
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                   (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_{d_u} (ByteDecode_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3].
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+static MLK_INLINE void mlk_poly_decompress_du(
+    mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  assigns(memory_slice(r, sizeof(mlk_poly)))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+{
+#if MLKEM_DU == 10
+  mlk_poly_decompress_d10(r, a);
+#elif MLKEM_DU == 11
+  mlk_poly_decompress_d11(r, a);
+#else
+#error "Invalid value of MLKEM_DU"
+#endif
+}
+
+#define mlk_poly_compress_dv MLK_NAMESPACE_K(poly_compress_dv)
+/*************************************************
+ * Name:        mlk_poly_compress_dv
+ *
+ * Description: Compression (dv bits) and subsequent serialization of a
+ *              polynomial
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                  (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes)
+ *              - const mlk_poly *a: pointer to input polynomial
+ *                  Coefficients must be unsigned canonical,
+ *                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_{d_v} (Compress_{d_v} (v))`
+ *                in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L23].
+ *                with level-specific d_v defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DV here.
+ *
+ **************************************************/
+static MLK_INLINE void mlk_poly_compress_dv(
+    uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const mlk_poly *a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV))
+  requires(memory_no_alias(a, sizeof(mlk_poly)))
+  requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  assigns(object_whole(r)))
+{
+#if MLKEM_DV == 4
+  mlk_poly_compress_d4(r, a);
+#elif MLKEM_DV == 5
+  mlk_poly_compress_d5(r, a);
+#else
+#error "Invalid value of MLKEM_DV"
+#endif
+}
+
+
+#define mlk_poly_decompress_dv MLK_NAMESPACE_K(poly_decompress_dv)
+/*************************************************
+ * Name:        mlk_poly_decompress_dv
+ *
+ * Description: De-serialization and subsequent decompression (dv bits) of a
+ *              polynomial; approximate inverse of poly_compress
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *a: pointer to input byte array
+ *                  (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes)
+ *
+ * Upon return, the coefficients of the output polynomial are unsigned-canonical
+ * (non-negative and smaller than MLKEM_Q).
+ *
+ * Specification: Implements `Decompress_{d_v} (ByteDecode_{d_v} (v))`
+ *                in [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L4].
+ *                with level-specific d_v defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DV here.
+ *
+ **************************************************/
+static MLK_INLINE void mlk_poly_decompress_dv(
+    mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV))
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  assigns(object_whole(r))
+  ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+{
+#if MLKEM_DV == 4
+  mlk_poly_decompress_d4(r, a);
+#elif MLKEM_DV == 5
+  mlk_poly_decompress_d5(r, a);
+#else
+#error "Invalid value of MLKEM_DV"
+#endif
+}
+
+#define mlk_polyvec_compress_du MLK_NAMESPACE_K(polyvec_compress_du)
+/*************************************************
+ * Name:        mlk_polyvec_compress_du
+ *
+ * Description: Compress and serialize vector of polynomials
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                            (needs space for MLKEM_POLYVECCOMPRESSEDBYTES_DU)
+ *              - const mlk_polyvec a: pointer to input vector of polynomials.
+ *                                  Coefficients must be unsigned canonical,
+ *                                  i.e. in [0,1,..,MLKEM_Q-1].
+ *
+ * Specification: Implements `ByteEncode_{d_u} (Compress_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L22].
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU],
+                             const mlk_polyvec a)
+__contract__(
+  requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(a, sizeof(mlk_polyvec)))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+  assigns(object_whole(r))
+);
+
+#define mlk_polyvec_decompress_du MLK_NAMESPACE_K(polyvec_decompress_du)
+/*************************************************
+ * Name:        mlk_polyvec_decompress_du
+ *
+ * Description: De-serialize and decompress vector of polynomials;
+ *              approximate inverse of mlk_polyvec_compress_du
+ *
+ * Arguments:   - mlk_polyvec r:       pointer to output vector of polynomials.
+ *                Output will have coefficients normalized to [0,..,q-1].
+ *              - const uint8_t *a: pointer to input byte array
+ *                                  (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU)
+ *
+ * Specification: Implements `Decompress_{d_u} (ByteDecode_{d_u} (u))`
+ *                in [FIPS 203, Algorithm 15 (K-PKE.Decrypt), L3].
+ *                with level-specific d_u defined in [FIPS 203, Table 2],
+ *                and given by MLKEM_DU here.
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_decompress_du(mlk_polyvec r,
+                               const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU])
+__contract__(
+  requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU))
+  requires(memory_no_alias(r, sizeof(mlk_polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(k0, 0, MLKEM_K,
+         array_bound(r[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+);
+
+#define mlk_polyvec_tobytes MLK_NAMESPACE_K(polyvec_tobytes)
+/*************************************************
+ * Name:        mlk_polyvec_tobytes
+ *
+ * Description: Serialize vector of polynomials
+ *
+ * Arguments:   - uint8_t *r: pointer to output byte array
+ *                            (needs space for MLKEM_POLYVECBYTES)
+ *              - const mlk_polyvec a: pointer to input vector of polynomials
+ *                  Each polynomial must have coefficients in [0,..,q-1].
+ *
+ * Specification: Implements ByteEncode_12 [FIPS 203, Algorithm 5].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                and [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const mlk_polyvec a)
+__contract__(
+  requires(memory_no_alias(a, sizeof(mlk_polyvec)))
+  requires(memory_no_alias(r, MLKEM_POLYVECBYTES))
+  requires(forall(k0, 0, MLKEM_K,
+         array_bound(a[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+  assigns(object_whole(r))
+);
+
+#define mlk_polyvec_frombytes MLK_NAMESPACE_K(polyvec_frombytes)
+/*************************************************
+ * Name:        mlk_polyvec_frombytes
+ *
+ * Description: De-serialize vector of polynomials;
+ *              inverse of mlk_polyvec_tobytes
+ *
+ * Arguments:   - const mlk_polyvec a: pointer to output vector of polynomials
+ *                 (of length MLKEM_POLYVECBYTES). Output will have coefficients
+ *                 normalized in [0..4095].
+ *              - uint8_t *r: pointer to input byte array
+ *
+ * Specification: Implements ByteDecode_12 [FIPS 203, Algorithm 6].
+ *                Extended to vectors as per
+ *                [FIPS 203, 2.4.8 Applying Algorithms to Arrays]
+ *                and [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_frombytes(mlk_polyvec r, const uint8_t a[MLKEM_POLYVECBYTES])
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_polyvec)))
+  requires(memory_no_alias(a, MLKEM_POLYVECBYTES))
+  assigns(object_whole(r))
+  ensures(forall(k0, 0, MLKEM_K,
+        array_bound(r[k0].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)))
+);
+
+#define mlk_polyvec_ntt MLK_NAMESPACE_K(polyvec_ntt)
+/*************************************************
+ * Name:        mlk_polyvec_ntt
+ *
+ * Description: Apply forward NTT to all elements of a vector of polynomials.
+ *
+ *              The input is assumed to be in normal order and
+ *              coefficient-wise bound by MLKEM_Q in absolute value.
+ *
+ *              The output polynomial is in bitreversed order, and
+ *              coefficient-wise bound by MLK_NTT_BOUND in absolute value.
+ *
+ * Arguments:   - mlk_polyvec r: pointer to in/output vector of polynomials
+ *
+ * Specification:
+ * - Implements [FIPS 203, Algorithm 9, NTT]
+ * - Extended to vectors as per [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_ntt(mlk_polyvec r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_polyvec)))
+  requires(forall(j, 0, MLKEM_K,
+  array_abs_bound(r[j].coeffs, 0, MLKEM_N, MLKEM_Q)))
+  assigns(object_whole(r))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r[j].coeffs, 0, MLKEM_N, MLK_NTT_BOUND)))
+);
+
+#define mlk_polyvec_invntt_tomont MLK_NAMESPACE_K(polyvec_invntt_tomont)
+/*************************************************
+ * Name:        mlk_polyvec_invntt_tomont
+ *
+ * Description: Apply inverse NTT to all elements of a vector of polynomials
+ *              and multiply by Montgomery factor 2^16
+ *
+ *              The input is assumed to be in bitreversed order, and can
+ *              have arbitrary coefficients in int16_t.
+ *
+ *              The output polynomial is in normal order, and
+ *              coefficient-wise bound by MLK_INVNTT_BOUND in absolute value.
+ *
+ * Arguments:   - mlk_polyvec r: pointer to in/output vector of polynomials
+ *
+ * Specification:
+ * - Implements [FIPS 203, Algorithm 10, NTT^{-1}]
+ * - Extended to vectors as per [FIPS 203, 2.4.6, Matrices and Vectors]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_invntt_tomont(mlk_polyvec r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(j, 0, MLKEM_K,
+  array_abs_bound(r[j].coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND)))
+);
+
+#define mlk_polyvec_basemul_acc_montgomery_cached \
+  MLK_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached)
+/*************************************************
+ * Name:        mlk_polyvec_basemul_acc_montgomery_cached
+ *
+ * Description: Scalar product of two vectors of polynomials in NTT domain,
+ *              using mulcache for second operand.
+ *
+ *              Bounds:
+ *              - Every coefficient of a is assumed to be in [0..4095]
+ *              - No bounds guarantees for the coefficients in the result.
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const mlk_polyvec a: pointer to first input polynomial vector
+ *              - const mlk_polyvec b: pointer to second input polynomial
+ *                vector
+ *              - const mlk_polyvec_mulcache b_cache: pointer to mulcache
+ *                  for second input polynomial vector. Can be computed
+ *                  via mlk_polyvec_mulcache_compute().
+ *
+ * Specification: Implements
+ *                - [FIPS 203, Section 2.4.7, Eq (2.14)]
+ *                - [FIPS 203, Algorithm 11, MultiplyNTTs]
+ *                - [FIPS 203, Algorithm 12, BaseCaseMultiply]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_basemul_acc_montgomery_cached(
+    mlk_poly *r, const mlk_polyvec a, const mlk_polyvec b,
+    const mlk_polyvec_mulcache b_cache)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(memory_no_alias(a, sizeof(mlk_polyvec)))
+  requires(memory_no_alias(b, sizeof(mlk_polyvec)))
+  requires(memory_no_alias(b_cache, sizeof(mlk_polyvec_mulcache)))
+  requires(forall(k1, 0, MLKEM_K,
+     array_bound(a[k1].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)))
+  assigns(object_whole(r))
+);
+
+#define mlk_polyvec_mulcache_compute MLK_NAMESPACE_K(polyvec_mulcache_compute)
+/************************************************************
+ * Name: mlk_polyvec_mulcache_compute
+ *
+ * Description: Computes the mulcache for a vector of polynomials in NTT domain
+ *
+ *              The mulcache of a degree-2 polynomial b := b0 + b1*X
+ *              in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when
+ *              computing products of b in Fq[X]/(X^2-zeta).
+ *
+ *              The mulcache of a polynomial in NTT domain -- which is
+ *              a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta),
+ *              for varying zeta, is the 128-tuple of mulcaches of those
+ *              polynomials.
+ *
+ *              The mulcache of a vector of polynomials is the vector
+ *              of mulcaches of its entries.
+ *
+ * Arguments: - x: Pointer to mulcache to be populated
+ *            - a: Pointer to input polynomial vector
+ *
+ * Specification:
+ * - Caches `b_1 * \gamma` in [FIPS 203, Algorithm 12, BaseCaseMultiply, L1]
+ *
+ ************************************************************/
+/*
+ * NOTE: The default C implementation of this function populates
+ * the mulcache with values in (-q,q), but this is not needed for the
+ * higher level safety proofs, and thus not part of the spec.
+ */
+MLK_INTERNAL_API
+void mlk_polyvec_mulcache_compute(mlk_polyvec_mulcache x, const mlk_polyvec a)
+__contract__(
+  requires(memory_no_alias(x, sizeof(mlk_polyvec_mulcache)))
+  requires(memory_no_alias(a, sizeof(mlk_polyvec)))
+  assigns(object_whole(x))
+);
+
+#define mlk_polyvec_reduce MLK_NAMESPACE_K(polyvec_reduce)
+/*************************************************
+ * Name:        mlk_polyvec_reduce
+ *
+ * Description: Applies Barrett reduction to each coefficient
+ *              of each element of a vector of polynomials;
+ *              for details of the Barrett reduction see comments in reduce.c
+ *
+ * Arguments:   - mlk_polyvec r: pointer to input/output polynomial
+ *
+ * Specification: Normalizes on unsigned canoncial representatives
+ *                ahead of calling [FIPS 203, Compress_d, Eq (4.7)].
+ *                This is not made explicit in FIPS 203.
+ *
+ **************************************************/
+/*
+ * NOTE: The semantics of mlk_polyvec_reduce() is different in
+ *       the reference implementation, which requires
+ *       signed canonical output data. Unsigned canonical
+ *       outputs are better suited to the only remaining
+ *       use of mlk_poly_reduce() in the context of (de)serialization.
+ */
+MLK_INTERNAL_API
+void mlk_polyvec_reduce(mlk_polyvec r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(k0, 0, MLKEM_K,
+    array_bound(r[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))
+);
+
+#define mlk_polyvec_add MLK_NAMESPACE_K(polyvec_add)
+/*************************************************
+ * Name:        mlk_polyvec_add
+ *
+ * Description: Add vectors of polynomials
+ *
+ * Arguments: - mlk_polyvec r: pointer to input-output vector of polynomials to
+ *              be added to
+ *            - const mlk_polyvec b: pointer to second input vector of
+ *              polynomials
+ *
+ * The coefficients of r and b must be so that the addition does
+ * not overflow. Otherwise, the behaviour of this function is undefined.
+ *
+ * The coefficients returned in *r are in int16_t which is sufficient
+ * to prove type-safety of calling units. Therefore, no stronger
+ * ensures clause is required on this function.
+ *
+ * Specification:
+ * - [FIPS 203, 2.4.5, Arithmetic With Polynomials and NTT Representations]
+ * - Used in [FIPS 203, Algorithm 14 (K-PKE.Encrypt), L19]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_add(mlk_polyvec r, const mlk_polyvec b)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_polyvec)))
+  requires(memory_no_alias(b, sizeof(mlk_polyvec)))
+  requires(forall(j0, 0, MLKEM_K,
+          forall(k0, 0, MLKEM_N,
+            (int32_t)r[j0].coeffs[k0] + b[j0].coeffs[k0] <= INT16_MAX)))
+  requires(forall(j1, 0, MLKEM_K,
+          forall(k1, 0, MLKEM_N,
+            (int32_t)r[j1].coeffs[k1] + b[j1].coeffs[k1] >= INT16_MIN)))
+  assigns(object_whole(r))
+);
+
+#define mlk_polyvec_tomont MLK_NAMESPACE_K(polyvec_tomont)
+/*************************************************
+ * Name:        mlk_polyvec_tomont
+ *
+ * Description: Inplace conversion of all coefficients of a polynomial
+ *              vector from normal domain to Montgomery domain
+ *
+ *              Bounds: Output < q in absolute value.
+ *
+ *
+ * Specification: Internal normalization required in `mlk_indcpa_keypair_derand`
+ *                as part of matrix-vector multiplication
+ *                [FIPS 203, Algorithm 13, K-PKE.KeyGen, L18].
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_polyvec_tomont(mlk_polyvec r)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_polyvec)))
+  assigns(memory_slice(r, sizeof(mlk_polyvec)))
+  assigns(object_whole(r))
+  ensures(forall(j, 0, MLKEM_K,
+    array_abs_bound(r[j].coeffs, 0, MLKEM_N, MLKEM_Q)))
+);
+
+#define mlk_poly_getnoise_eta1_4x MLK_NAMESPACE_K(poly_getnoise_eta1_4x)
+/*************************************************
+ * Name:        mlk_poly_getnoise_eta1_4x
+ *
+ * Description: Batch sample four polynomials deterministically from a seed
+ *              and nonces, with output polynomials close to centered binomial
+ *              distribution with parameter MLKEM_ETA1.
+ *
+ * Arguments:   - mlk_poly *r{0,1,2,3}: pointer to output polynomial
+ *              - const uint8_t *seed: pointer to input seed
+ *                                     (of length MLKEM_SYMBYTES bytes)
+ *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
+ *
+ * Specification:
+ * Implements 4x `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))`:
+ * - [FIPS 203, Algorithm 8, SamplePolyCBD_eta]
+ * - [FIPS 203, Eq (4.3), PRF_eta]
+ * - `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))` appears in
+ *   [FIPS 203, Algorithm 13, K-PKE.KeyGen, L{9, 13}]
+ *   [FIPS 203, Algorithm 14, K-PKE.Encrypt, L10]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_getnoise_eta1_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2,
+                               mlk_poly *r3, const uint8_t seed[MLKEM_SYMBYTES],
+                               uint8_t nonce0, uint8_t nonce1, uint8_t nonce2,
+                               uint8_t nonce3)
+__contract__(
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  requires(memory_no_alias(r0, sizeof(mlk_poly)))
+  requires(memory_no_alias(r1, sizeof(mlk_poly)))
+  requires(memory_no_alias(r2, sizeof(mlk_poly)))
+  requires(memory_no_alias(r3, sizeof(mlk_poly)))
+  assigns(memory_slice(r0, sizeof(mlk_poly)))
+  assigns(memory_slice(r1, sizeof(mlk_poly)))
+  assigns(memory_slice(r2, sizeof(mlk_poly)))
+  assigns(memory_slice(r3, sizeof(mlk_poly)))
+  ensures(
+    array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+    && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1));
+);
+
+#if MLKEM_ETA1 == MLKEM_ETA2
+/*
+ * We only require mlk_poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024
+ * where MLKEM_ETA2 = MLKEM_ETA1 = 2.
+ * For ml-kem-512, mlk_poly_getnoise_eta1122_4x is used instead.
+ */
+#define mlk_poly_getnoise_eta2_4x mlk_poly_getnoise_eta1_4x
+#endif /* MLKEM_ETA1 == MLKEM_ETA2 */
+
+#if MLKEM_K == 2 || MLKEM_K == 4
+#define mlk_poly_getnoise_eta2 MLK_NAMESPACE_K(poly_getnoise_eta2)
+/*************************************************
+ * Name:        mlk_poly_getnoise_eta2
+ *
+ * Description: Sample a polynomial deterministically from a seed and a nonce,
+ *              with output polynomial close to centered binomial distribution
+ *              with parameter MLKEM_ETA2
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *seed: pointer to input seed
+ *                                     (of length MLKEM_SYMBYTES bytes)
+ *              - uint8_t nonce: one-byte input nonce
+ *
+ * Specification:
+ * Implements `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))`:
+ * - [FIPS 203, Algorithm 8, SamplePolyCBD_eta]
+ * - [FIPS 203, Eq (4.3), PRF_eta]
+ * - `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))` appears in
+ *   [FIPS 203, Algorithm 14, K-PKE.Encrypt, L14]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_getnoise_eta2(mlk_poly *r, const uint8_t seed[MLKEM_SYMBYTES],
+                            uint8_t nonce)
+__contract__(
+  requires(memory_no_alias(r, sizeof(mlk_poly)))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  assigns(object_whole(r))
+  ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))
+);
+#endif /* MLKEM_K == 2 || MLKEM_K == 4 */
+
+#if MLKEM_K == 2
+#define mlk_poly_getnoise_eta1122_4x MLK_NAMESPACE_K(poly_getnoise_eta1122_4x)
+/*************************************************
+ * Name:        mlk_poly_getnoise_eta1122_4x
+ *
+ * Description: Batch sample four polynomials deterministically from a seed
+ * and a nonces, with output polynomials close to centered binomial
+ * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2
+ *
+ * Arguments:   - mlk_poly *r{0,1,2,3}: pointer to output polynomial
+ *              - const uint8_t *seed: pointer to input seed
+ *                                     (of length MLKEM_SYMBYTES bytes)
+ *              - uint8_t nonce{0,1,2,3}: one-byte input nonce
+ *
+ * Specification:
+ * Implements two instances each of
+ * `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))` and
+ * `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))`:
+ * - [FIPS 203, Algorithm 8, SamplePolyCBD_eta]
+ * - [FIPS 203, Eq (4.3), PRF_eta]
+ * - `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))` appears in
+ *   [FIPS 203, Algorithm 14, K-PKE.Encrypt, L14]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_getnoise_eta1122_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2,
+                                  mlk_poly *r3,
+                                  const uint8_t seed[MLKEM_SYMBYTES],
+                                  uint8_t nonce0, uint8_t nonce1,
+                                  uint8_t nonce2, uint8_t nonce3)
+__contract__(
+  requires( /* r0, r1 consecutive, r2, r3 consecutive */
+ (memory_no_alias(r0, 2 * sizeof(mlk_poly)) && memory_no_alias(r2, 2 * sizeof(mlk_poly)) &&
+   r1 == r0 + 1 && r3 == r2 + 1 && !same_object(r0, r2)))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES))
+  assigns(object_whole(r0), object_whole(r1), object_whole(r2), object_whole(r3))
+  ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+     && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)
+     && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)
+     && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1));
+);
+#endif /* MLKEM_K == 2 */
+
+#endif /* !MLK_POLY_K_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/randombytes.h b/crypto/fipsmodule/ml_kem/mlkem/randombytes.h
new file mode 100644
index 00000000000..27909e3b53d
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/randombytes.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_RANDOMBYTES_H
+#define MLK_RANDOMBYTES_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "cbmc.h"
+#include "common.h"
+
+#if !defined(MLK_CONFIG_CUSTOM_RANDOMBYTES)
+void randombytes(uint8_t *out, size_t outlen);
+static MLK_INLINE void mlk_randombytes(uint8_t *out, size_t outlen)
+__contract__(
+  requires(memory_no_alias(out, outlen))
+  assigns(memory_slice(out, outlen))) { randombytes(out, outlen); }
+#endif /* !MLK_CONFIG_CUSTOM_RANDOMBYTES */
+
+#endif /* !MLK_RANDOMBYTES_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/sampling.c b/crypto/fipsmodule/ml_kem/mlkem/sampling.c
new file mode 100644
index 00000000000..37cfc3c5c46
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/sampling.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "common.h"
+#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
+
+#include "debug.h"
+#include "sampling.h"
+#include "symmetric.h"
+
+/* Reference: `rej_uniform()` in the reference implementation.
+ *            - Our signature differs from the reference implementation
+ *              in that it adds the offset and always expects the base of the
+ *              target buffer. This avoids shifting the buffer base in the
+ *              caller, which appears tricky to reason about. */
+static unsigned mlk_rej_uniform_scalar(int16_t *r, unsigned target,
+                                       unsigned offset, const uint8_t *buf,
+                                       unsigned buflen)
+__contract__(
+  requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
+  requires(memory_no_alias(r, sizeof(int16_t) * target))
+  requires(memory_no_alias(buf, buflen))
+  requires(array_bound(r, 0, offset, 0, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * target))
+  ensures(offset <= return_value && return_value <= target)
+  ensures(array_bound(r, 0, return_value, 0, MLKEM_Q))
+)
+{
+  unsigned ctr, pos;
+  uint16_t val0, val1;
+
+  mlk_assert_bound(r, offset, 0, MLKEM_Q);
+
+  ctr = offset;
+  pos = 0;
+  /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */
+  while (ctr < target && pos + 3 <= buflen)
+  __loop__(
+    invariant(offset <= ctr && ctr <= target && pos <= buflen)
+    invariant(array_bound(r, 0, ctr, 0, MLKEM_Q)))
+  {
+    val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
+    val1 = ((buf[pos + 1] >> 4) | ((uint16_t)buf[pos + 2] << 4)) & 0xFFF;
+    pos += 3;
+
+    if (val0 < MLKEM_Q)
+    {
+      r[ctr++] = val0;
+    }
+    if (ctr < target && val1 < MLKEM_Q)
+    {
+      r[ctr++] = val1;
+    }
+  }
+
+  mlk_assert_bound(r, ctr, 0, MLKEM_Q);
+  return ctr;
+}
+
+/*************************************************
+ * Name:        mlk_rej_uniform
+ *
+ * Description: Run rejection sampling on uniform random bytes to generate
+ *              uniform random integers mod q
+ *
+ * Arguments:   - int16_t *r:          pointer to output buffer
+ *              - unsigned target:     requested number of 16-bit integers
+ *                                     (uniform mod q).
+ *                                     Must be <= 4096.
+ *              - unsigned offset:     number of 16-bit integers that have
+ *                                     already been sampled.
+ *                                     Must be <= target.
+ *              - const uint8_t *buf:  pointer to input buffer
+ *                                     (assumed to be uniform random bytes)
+ *              - unsigned buflen:     length of input buffer in bytes
+ *                                     Must be <= 4096.
+ *                                     Must be a multiple of 3.
+ *
+ * Note: Strictly speaking, only a few values of buflen near UINT_MAX need
+ * excluding. The limit of 128 is somewhat arbitrary but sufficient for all
+ * uses of this function. Similarly, the actual limit for target is UINT_MAX/2.
+ *
+ * Returns the new offset of sampled 16-bit integers, at most target,
+ * and at least the initial offset.
+ * If the new offset is strictly less than len, all of the input buffers
+ * is guaranteed to have been consumed. If it is equal to len, no information
+ * is provided on how many bytes of the input buffer have been consumed.
+ **************************************************/
+
+/* Reference: `rej_uniform()` in the reference implementation.
+ *            - Our signature differs from the reference implementation
+ *              in that it adds the offset and always expects the base of the
+ *              target buffer. This avoids shifting the buffer base in the
+ *              caller, which appears tricky to reason about.
+ *            - Optional fallback to native implementation. */
+static unsigned mlk_rej_uniform(int16_t *r, unsigned target, unsigned offset,
+                                const uint8_t *buf, unsigned buflen)
+__contract__(
+  requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0)
+  requires(memory_no_alias(r, sizeof(int16_t) * target))
+  requires(memory_no_alias(buf, buflen))
+  requires(array_bound(r, 0, offset, 0, MLKEM_Q))
+  assigns(memory_slice(r, sizeof(int16_t) * target))
+  ensures(offset <= return_value && return_value <= target)
+  ensures(array_bound(r, 0, return_value, 0, MLKEM_Q))
+)
+{
+#if defined(MLK_USE_NATIVE_REJ_UNIFORM)
+  if (offset == 0)
+  {
+    int ret = mlk_rej_uniform_native(r, target, buf, buflen);
+    if (ret != -1)
+    {
+      unsigned res = (unsigned)ret;
+      mlk_assert_bound(r, res, 0, MLKEM_Q);
+      return res;
+    }
+  }
+#endif /* MLK_USE_NATIVE_REJ_UNIFORM */
+
+  return mlk_rej_uniform_scalar(r, target, offset, buf, buflen);
+}
+
+#ifndef MLKEM_GEN_MATRIX_NBLOCKS
+#define MLKEM_GEN_MATRIX_NBLOCKS \
+  ((12 * MLKEM_N / 8 * (1 << 12) / MLKEM_Q + MLK_XOF_RATE) / MLK_XOF_RATE)
+#endif
+
+/* Reference: Does not exist in reference implementation.
+ *            - x4-batched version of `rej_uniform()` from the
+ *              reference implementation, leveraging x4-batched Keccak-f1600. */
+MLK_INTERNAL_API
+void mlk_poly_rej_uniform_x4(mlk_poly *vec,
+                             uint8_t seed[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)])
+{
+  /* Temporary buffers for XOF output before rejection sampling */
+  MLK_ALIGN uint8_t
+      buf[4][MLK_ALIGN_UP(MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE)];
+
+  /* Tracks the number of coefficients we have already sampled */
+  unsigned ctr[4];
+  mlk_xof_x4_ctx statex;
+  unsigned buflen;
+
+  mlk_xof_x4_init(&statex);
+  mlk_xof_x4_absorb(&statex, seed, MLKEM_SYMBYTES + 2);
+
+  /*
+   * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS.
+   * This should generate the matrix entries with high probability.
+   */
+  mlk_xof_x4_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &statex);
+  buflen = MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE;
+  ctr[0] = mlk_rej_uniform(vec[0].coeffs, MLKEM_N, 0, buf[0], buflen);
+  ctr[1] = mlk_rej_uniform(vec[1].coeffs, MLKEM_N, 0, buf[1], buflen);
+  ctr[2] = mlk_rej_uniform(vec[2].coeffs, MLKEM_N, 0, buf[2], buflen);
+  ctr[3] = mlk_rej_uniform(vec[3].coeffs, MLKEM_N, 0, buf[3], buflen);
+
+  /*
+   * So long as not all matrix entries have been generated, squeeze
+   * one more block a time until we're done.
+   */
+  buflen = MLK_XOF_RATE;
+  while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N ||
+         ctr[3] < MLKEM_N)
+  __loop__(
+    assigns(ctr, statex, memory_slice(vec, sizeof(mlk_poly) * 4), object_whole(buf[0]),
+       object_whole(buf[1]), object_whole(buf[2]), object_whole(buf[3]))
+    invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N)
+    invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N)
+    invariant(array_bound(vec[0].coeffs, 0, ctr[0], 0, MLKEM_Q))
+    invariant(array_bound(vec[1].coeffs, 0, ctr[1], 0, MLKEM_Q))
+    invariant(array_bound(vec[2].coeffs, 0, ctr[2], 0, MLKEM_Q))
+    invariant(array_bound(vec[3].coeffs, 0, ctr[3], 0, MLKEM_Q)))
+  {
+    mlk_xof_x4_squeezeblocks(buf, 1, &statex);
+    ctr[0] = mlk_rej_uniform(vec[0].coeffs, MLKEM_N, ctr[0], buf[0], buflen);
+    ctr[1] = mlk_rej_uniform(vec[1].coeffs, MLKEM_N, ctr[1], buf[1], buflen);
+    ctr[2] = mlk_rej_uniform(vec[2].coeffs, MLKEM_N, ctr[2], buf[2], buflen);
+    ctr[3] = mlk_rej_uniform(vec[3].coeffs, MLKEM_N, ctr[3], buf[3], buflen);
+  }
+
+  mlk_xof_x4_release(&statex);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+}
+
+MLK_INTERNAL_API
+void mlk_poly_rej_uniform(mlk_poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+{
+  mlk_xof_ctx state;
+  MLK_ALIGN uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE];
+  unsigned ctr, buflen;
+
+  mlk_xof_init(&state);
+  mlk_xof_absorb(&state, seed, MLKEM_SYMBYTES + 2);
+
+  /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS.
+   */
+  /* This should generate the matrix entry with high probability. */
+  mlk_xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state);
+  buflen = MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE;
+  ctr = mlk_rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen);
+
+  /* Squeeze + sample one more block a time until we're done */
+  buflen = MLK_XOF_RATE;
+  while (ctr < MLKEM_N)
+  __loop__(
+    assigns(ctr, state, memory_slice(entry, sizeof(mlk_poly)), object_whole(buf))
+    invariant(ctr <= MLKEM_N)
+    invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q)))
+  {
+    mlk_xof_squeezeblocks(buf, 1, &state);
+    ctr = mlk_rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen);
+  }
+
+  mlk_xof_release(&state);
+
+  /* Specification: Partially implements
+   * [FIPS 203, Section 3.3, Destruction of intermediate values] */
+  mlk_zeroize(buf, sizeof(buf));
+}
+
+/*************************************************
+ * Name:        mlk_load32_littleendian
+ *
+ * Description: load 4 bytes into a 32-bit integer
+ *              in little-endian order
+ *
+ * Arguments:   - const uint8_t *x: pointer to input byte array
+ *
+ * Returns 32-bit unsigned integer loaded from x
+ *
+ **************************************************/
+
+/* Reference: `load32_littleendian()` in the reference implementation. */
+static uint32_t mlk_load32_littleendian(const uint8_t x[4])
+{
+  uint32_t r;
+  r = (uint32_t)x[0];
+  r |= (uint32_t)x[1] << 8;
+  r |= (uint32_t)x[2] << 16;
+  r |= (uint32_t)x[3] << 24;
+  return r;
+}
+
+/* Reference: `cbd2()` in the reference implementationo. */
+MLK_INTERNAL_API
+void mlk_poly_cbd2(mlk_poly *r, const uint8_t buf[2 * MLKEM_N / 4])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 8; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 8)
+    invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3)))
+  {
+    unsigned j;
+    uint32_t t = mlk_load32_littleendian(buf + 4 * i);
+    uint32_t d = t & 0x55555555;
+    d += (t >> 1) & 0x55555555;
+
+    for (j = 0; j < 8; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 8 && j <= 8)
+      invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3)))
+    {
+      const int16_t a = (d >> (4 * j + 0)) & 0x3;
+      const int16_t b = (d >> (4 * j + 2)) & 0x3;
+      r->coeffs[8 * i + j] = a - b;
+    }
+  }
+}
+
+#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_ETA1 == 3
+/*************************************************
+ * Name:        mlk_load24_littleendian
+ *
+ * Description: load 3 bytes into a 32-bit integer
+ *              in little-endian order.
+ *              This function is only needed for ML-KEM-512
+ *
+ * Arguments:   - const uint8_t *x: pointer to input byte array
+ *
+ * Returns 32-bit unsigned integer loaded from x (most significant byte is zero)
+ *
+ **************************************************/
+
+/* Reference: `load24_littleendian()` in the reference implementation. */
+static uint32_t mlk_load24_littleendian(const uint8_t x[3])
+{
+  uint32_t r;
+  r = (uint32_t)x[0];
+  r |= (uint32_t)x[1] << 8;
+  r |= (uint32_t)x[2] << 16;
+  return r;
+}
+
+/* Reference: `cbd3()` in the reference implementationo. */
+MLK_INTERNAL_API
+void mlk_poly_cbd3(mlk_poly *r, const uint8_t buf[3 * MLKEM_N / 4])
+{
+  unsigned i;
+  for (i = 0; i < MLKEM_N / 4; i++)
+  __loop__(
+    invariant(i <= MLKEM_N / 4)
+    invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4)))
+  {
+    unsigned j;
+    const uint32_t t = mlk_load24_littleendian(buf + 3 * i);
+    uint32_t d = t & 0x00249249;
+    d += (t >> 1) & 0x00249249;
+    d += (t >> 2) & 0x00249249;
+
+    for (j = 0; j < 4; j++)
+    __loop__(
+      invariant(i <= MLKEM_N / 4 && j <= 4)
+      invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4)))
+    {
+      const int16_t a = (d >> (6 * j + 0)) & 0x7;
+      const int16_t b = (d >> (6 * j + 3)) & 0x7;
+      r->coeffs[4 * i + j] = a - b;
+    }
+  }
+}
+#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_ETA1 == 3 */
+
+#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */
+
+MLK_EMPTY_CU(sampling)
+
+#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */
+
+/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
+ * Don't modify by hand -- this is auto-generated by scripts/autogen. */
+#undef MLKEM_GEN_MATRIX_NBLOCKS
diff --git a/crypto/fipsmodule/ml_kem/mlkem/sampling.h b/crypto/fipsmodule/ml_kem/mlkem/sampling.h
new file mode 100644
index 00000000000..48bd1fcbc54
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/sampling.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_SAMPLING_H
+#define MLK_SAMPLING_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "cbmc.h"
+#include "common.h"
+#include "poly.h"
+
+#define mlk_poly_cbd2 MLK_NAMESPACE(poly_cbd2)
+/*************************************************
+ * Name:        mlk_poly_cbd2
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter eta=2
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_2]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_cbd2(mlk_poly *r, const uint8_t buf[2 * MLKEM_N / 4]);
+
+#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_ETA1 == 3
+#define mlk_poly_cbd3 MLK_NAMESPACE(poly_cbd3)
+/*************************************************
+ * Name:        mlk_poly_cbd3
+ *
+ * Description: Given an array of uniformly random bytes, compute
+ *              polynomial with coefficients distributed according to
+ *              a centered binomial distribution with parameter eta=3.
+ *              This function is only needed for ML-KEM-512
+ *
+ * Arguments:   - mlk_poly *r: pointer to output polynomial
+ *              - const uint8_t *buf: pointer to input byte array
+ *
+ * Specification: Implements [FIPS 203, Algorithm 8, SamplePolyCBD_3]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_cbd3(mlk_poly *r, const uint8_t buf[3 * MLKEM_N / 4]);
+#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_ETA1 == 3 */
+
+#define mlk_poly_rej_uniform_x4 MLK_NAMESPACE(poly_rej_uniform_x4)
+/*************************************************
+ * Name:        mlk_poly_rej_uniform_x4
+ *
+ * Description: Generate four polynomials using rejection sampling
+ *              on (pseudo-)uniformly random bytes sampled from a seed.
+ *
+ * Arguments:   - mlk_poly *vec:
+ *                Pointer to an array of 4 polynomials to be sampled.
+ *              - uint8_t seed[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)]:
+ *                Pointer consecutive array of seed buffers of size
+ *                MLKEM_SYMBYTES + 2 each, plus padding for alignment.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 7, SampleNTT]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_rej_uniform_x4(mlk_poly *vec,
+                             uint8_t seed[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)])
+__contract__(
+  requires(memory_no_alias(vec, sizeof(mlk_poly) * 4))
+  requires(memory_no_alias(seed, 4 * MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)))
+  assigns(memory_slice(vec, sizeof(mlk_poly) * 4))
+  ensures(array_bound(vec[0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  ensures(array_bound(vec[1].coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  ensures(array_bound(vec[2].coeffs, 0, MLKEM_N, 0, MLKEM_Q))
+  ensures(array_bound(vec[3].coeffs, 0, MLKEM_N, 0, MLKEM_Q)));
+
+#define mlk_poly_rej_uniform MLK_NAMESPACE(poly_rej_uniform)
+/*************************************************
+ * Name:        mlk_poly_rej_uniform
+ *
+ * Description: Generate polynomial using rejection sampling
+ *              on (pseudo-)uniformly random bytes sampled from a seed.
+ *
+ * Arguments:   - mlk_poly *vec:           Pointer to polynomial to be sampled.
+ *              - uint8_t *seed:       Pointer to seed buffer of size
+ *                                     MLKEM_SYMBYTES + 2 each.
+ *
+ * Specification: Implements [FIPS 203, Algorithm 7, SampleNTT]
+ *
+ **************************************************/
+MLK_INTERNAL_API
+void mlk_poly_rej_uniform(mlk_poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2])
+__contract__(
+  requires(memory_no_alias(entry, sizeof(mlk_poly)))
+  requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2))
+  assigns(memory_slice(entry, sizeof(mlk_poly)))
+  ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q)));
+
+#endif /* !MLK_SAMPLING_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/symmetric.h b/crypto/fipsmodule/ml_kem/mlkem/symmetric.h
new file mode 100644
index 00000000000..54240c6054f
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/symmetric.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_SYMMETRIC_H
+#define MLK_SYMMETRIC_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+#include MLK_FIPS202_HEADER_FILE
+#include MLK_FIPS202X4_HEADER_FILE
+
+/* Macros denoting FIPS 203 specific Hash functions */
+
+/* Hash function H, [FIPS 203, Section 4.1, Eq (4.4)] */
+#define mlk_hash_h(OUT, IN, INBYTES) mlk_sha3_256(OUT, IN, INBYTES)
+
+/* Hash function G, [FIPS 203, Section 4.1, Eq (4.5)] */
+#define mlk_hash_g(OUT, IN, INBYTES) mlk_sha3_512(OUT, IN, INBYTES)
+
+/* Hash function J, [FIPS 203, Section 4.1, Eq (4.4)] */
+#define mlk_hash_j(OUT, IN, INBYTES) \
+  mlk_shake256(OUT, MLKEM_SYMBYTES, IN, INBYTES)
+
+/* PRF function, [FIPS 203, Section 4.1, Eq (4.3)]
+ * Referring to (eq 4.3), `OUT` is assumed to contain `s || b`. */
+#define mlk_prf_eta(ETA, OUT, IN) \
+  mlk_shake256(OUT, (ETA) * MLKEM_N / 4, IN, MLKEM_SYMBYTES + 1)
+#define mlk_prf_eta1(OUT, IN) mlk_prf_eta(MLKEM_ETA1, OUT, IN)
+#define mlk_prf_eta2(OUT, IN) mlk_prf_eta(MLKEM_ETA2, OUT, IN)
+#define mlk_prf_eta1_x4(OUT, IN)                                        \
+  mlk_shake256x4((OUT)[0], (OUT)[1], (OUT)[2], (OUT)[3],                \
+                 (MLKEM_ETA1 * MLKEM_N / 4), (IN)[0], (IN)[1], (IN)[2], \
+                 (IN)[3], MLKEM_SYMBYTES + 1)
+
+/* XOF function, FIPS 203 4.1 */
+#define mlk_xof_ctx mlk_shake128ctx
+#define mlk_xof_x4_ctx mlk_shake128x4ctx
+#define mlk_xof_init(CTX) mlk_shake128_init((CTX))
+#define mlk_xof_absorb(CTX, IN, INBYTES) \
+  mlk_shake128_absorb_once((CTX), (IN), (INBYTES))
+#define mlk_xof_squeezeblocks(BUF, NBLOCKS, CTX) \
+  mlk_shake128_squeezeblocks((BUF), (NBLOCKS), (CTX))
+#define mlk_xof_release(CTX) mlk_shake128_release((CTX))
+
+#define mlk_xof_x4_init(CTX) mlk_shake128x4_init((CTX))
+#define mlk_xof_x4_absorb(CTX, IN, INBYTES)                             \
+  mlk_shake128x4_absorb_once((CTX), (IN)[0], (IN)[1], (IN)[2], (IN)[3], \
+                             (INBYTES))
+#define mlk_xof_x4_squeezeblocks(BUF, NBLOCKS, CTX)                    \
+  mlk_shake128x4_squeezeblocks((BUF)[0], (BUF)[1], (BUF)[2], (BUF)[3], \
+                               (NBLOCKS), (CTX))
+#define mlk_xof_x4_release(CTX) mlk_shake128x4_release((CTX))
+
+#define MLK_XOF_RATE SHAKE128_RATE
+
+#endif /* !MLK_SYMMETRIC_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/sys.h b/crypto/fipsmodule/ml_kem/mlkem/sys.h
new file mode 100644
index 00000000000..ec05066021b
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/sys.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_SYS_H
+#define MLK_SYS_H
+
+/* Check if we're running on an AArch64 little endian system. _M_ARM64 is set by
+ * MSVC. */
+#if defined(__AARCH64EL__) || defined(_M_ARM64)
+#define MLK_SYS_AARCH64
+#endif
+
+/* Check if we're running on an AArch64 big endian system. */
+#if defined(__AARCH64EB__)
+#define MLK_SYS_AARCH64_EB
+#endif
+
+#if defined(__x86_64__)
+#define MLK_SYS_X86_64
+#if defined(__AVX2__)
+#define MLK_SYS_X86_64_AVX2
+#endif
+#endif /* __x86_64__ */
+
+#if defined(_WIN32)
+#define MLK_SYS_WINDOWS
+#endif
+
+#if !defined(MLK_CONFIG_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+#define MLK_HAVE_INLINE_ASM
+#endif
+
+/* Try to find endianness, if not forced through CFLAGS already */
+#if !defined(MLK_SYS_LITTLE_ENDIAN) && !defined(MLK_SYS_BIG_ENDIAN)
+#if defined(__BYTE_ORDER__)
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define MLK_SYS_LITTLE_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define MLK_SYS_BIG_ENDIAN
+#else
+#error "__BYTE_ORDER__ defined, but don't recognize value."
+#endif
+#endif /* __BYTE_ORDER__ */
+#endif /* !MLK_SYS_LITTLE_ENDIAN && !MLK_SYS_BIG_ENDIAN */
+
+/* If MLK_FORCE_AARCH64 is set, assert that we're indeed on an AArch64 system.
+ */
+#if defined(MLK_FORCE_AARCH64) && !defined(MLK_SYS_AARCH64)
+#error "MLK_FORCE_AARCH64 is set, but we don't seem to be on an AArch64 system."
+#endif
+
+/* If MLK_FORCE_AARCH64_EB is set, assert that we're indeed on a big endian
+ * AArch64 system. */
+#if defined(MLK_FORCE_AARCH64_EB) && !defined(MLK_SYS_AARCH64_EB)
+#error \
+    "MLK_FORCE_AARCH64_EB is set, but we don't seem to be on an AArch64 system."
+#endif
+
+/* If MLK_FORCE_X86_64 is set, assert that we're indeed on an X86_64 system. */
+#if defined(MLK_FORCE_X86_64) && !defined(MLK_SYS_X86_64)
+#error "MLK_FORCE_X86_64 is set, but we don't seem to be on an X86_64 system."
+#endif
+
+/*
+ * C90 does not have the inline compiler directive yet.
+ * We don't use it in C90 builds.
+ * However, in that case the compiler warns about some inline functions in
+ * header files not being used in every compilation unit that includes that
+ * header. To work around it we silence that warning in that case using
+ * __attribute__((unused)).
+ */
+
+/* Do not use inline for C90 builds*/
+#if !defined(MLK_INLINE)
+#if !defined(inline)
+#if defined(_MSC_VER)
+#define MLK_INLINE __inline
+/* Don't combine __inline and __forceinline */
+#define MLK_ALWAYS_INLINE __forceinline
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define MLK_INLINE inline
+#define MLK_ALWAYS_INLINE MLK_INLINE __attribute__((always_inline))
+#else
+#define MLK_INLINE __attribute__((unused))
+#define MLK_ALWAYS_INLINE MLK_INLINE
+#endif
+
+#else /* !inline */
+#define MLK_INLINE inline
+#define MLK_ALWAYS_INLINE MLK_INLINE __attribute__((always_inline))
+#endif /* inline */
+#endif /* !MLK_INLINE */
+
+/*
+ * C90 does not have the restrict compiler directive yet.
+ * We don't use it in C90 builds.
+ */
+#if !defined(restrict)
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define MLK_RESTRICT restrict
+#else
+#define MLK_RESTRICT
+#endif
+
+#else /* !restrict */
+
+#define MLK_RESTRICT restrict
+#endif /* restrict */
+
+#define MLK_DEFAULT_ALIGN 32
+#define MLK_ALIGN_UP(N) \
+  ((((N) + (MLK_DEFAULT_ALIGN - 1)) / MLK_DEFAULT_ALIGN) * MLK_DEFAULT_ALIGN)
+#if defined(__GNUC__)
+#define MLK_ALIGN __attribute__((aligned(MLK_DEFAULT_ALIGN)))
+#elif defined(_MSC_VER)
+#define MLK_ALIGN __declspec(align(MLK_DEFAULT_ALIGN))
+#else
+#define MLK_ALIGN /* No known support for alignment constraints */
+#endif
+
+
+/* New X86_64 CPUs support Conflow-flow protection using the CET instructions.
+ * When enabled (through -fcf-protection=), all compilation units (including
+ * empty ones) need to support CET for this to work.
+ * For assembly, this means that source files need to signal support for
+ * CET by setting the appropriate note.gnu.property section.
+ * This can be achieved by including the <cet.h> header in all assembly file.
+ * This file also provides the _CET_ENDBR macro which needs to be placed at
+ * every potential target of an indirect branch.
+ * If CET is enabled _CET_ENDBR maps to the endbr64 instruction, otherwise
+ * it is empty.
+ * In case the compiler does not support CET (e.g., <gcc8, <clang11),
+ * the __CET__ macro is not set and we default to nothing.
+ * Note that we only issue _CET_ENDBR instructions through the MLK_ASM_FN_SYMBOL
+ * macro as the global symbols are the only possible targets of indirect
+ * branches in our code.
+ */
+#if defined(MLK_SYS_X86_64)
+#if defined(__CET__)
+#include <cet.h>
+#define MLK_CET_ENDBR _CET_ENDBR
+#else
+#define MLK_CET_ENDBR
+#endif
+#endif /* MLK_SYS_X86_64 */
+
+#if defined(MLK_CONFIG_CT_TESTING_ENABLED) && !defined(__ASSEMBLER__)
+#include <valgrind/memcheck.h>
+#define MLK_CT_TESTING_SECRET(ptr, len) \
+  VALGRIND_MAKE_MEM_UNDEFINED((ptr), (len))
+#define MLK_CT_TESTING_DECLASSIFY(ptr, len) \
+  VALGRIND_MAKE_MEM_DEFINED((ptr), (len))
+#else /* MLK_CONFIG_CT_TESTING_ENABLED && !__ASSEMBLER__ */
+#define MLK_CT_TESTING_SECRET(ptr, len) \
+  do                                    \
+  {                                     \
+  } while (0)
+#define MLK_CT_TESTING_DECLASSIFY(ptr, len) \
+  do                                        \
+  {                                         \
+  } while (0)
+#endif /* !(MLK_CONFIG_CT_TESTING_ENABLED && !__ASSEMBLER__) */
+
+#if defined(__GNUC__) || defined(clang)
+#define MLK_MUST_CHECK_RETURN_VALUE __attribute__((warn_unused_result))
+#else
+#define MLK_MUST_CHECK_RETURN_VALUE
+#endif
+
+#endif /* !MLK_SYS_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/verify.c b/crypto/fipsmodule/ml_kem/mlkem/verify.c
new file mode 100644
index 00000000000..981c568a871
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/verify.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#include "verify.h"
+
+#if !defined(MLK_USE_ASM_VALUE_BARRIER) && \
+    !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
+/*
+ * Masking value used in constant-time functions from
+ * verify.h to block the compiler's range analysis and
+ * thereby reduce the risk of compiler-introduced branches.
+ */
+volatile uint64_t mlk_ct_opt_blocker_u64 = 0;
+
+#else /* !MLK_USE_ASM_VALUE_BARRIER && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
+
+MLK_EMPTY_CU(verify)
+
+#endif /* !(!MLK_USE_ASM_VALUE_BARRIER && !MLK_CONFIG_MULTILEVEL_NO_SHARED) */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/verify.h b/crypto/fipsmodule/ml_kem/mlkem/verify.h
new file mode 100644
index 00000000000..47b5547c7a3
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/verify.h
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef MLK_VERIFY_H
+#define MLK_VERIFY_H
+
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "cbmc.h"
+#include "common.h"
+
+/* Constant-time comparisons and conditional operations
+
+   We reduce the risk for compilation into variable-time code
+   through the use of 'value barriers'.
+
+   Functionally, a value barrier is a no-op. To the compiler, however,
+   it constitutes an arbitrary modification of its input, and therefore
+   harden's value propagation and range analysis.
+
+   We consider two approaches to implement a value barrier:
+   - An empty inline asm block which marks the target value as clobbered.
+   - XOR'ing with the value of a volatile global that's set to 0;
+     for a discussion / implementation of this idea, see e.g.
+     * https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/hqbtIGFKIpU/m/H14H0wOlBgAJ
+     * https://lib.mceliece.org/libmceliece-20240513/inttypes/crypto_intN.h.html
+
+   The first approach is cheap because it only prevents the compiler
+   from reasoning about the value of the variable past the barrier,
+   but does not directly generate additional instructions.
+
+   The second approach generates redundant loads and XOR operations
+   and therefore comes at a higher runtime cost. However, it appears
+   more robust towards optimization, as compilers should never drop
+   a volatile load.
+
+   We use the empty-ASM value barrier for GCC and clang, and fall
+   back to the global volatile barrier otherwise.
+
+   The global value barrier can be forced by setting
+   MLK_CONFIG_NO_ASM_VALUE_BARRIER.
+
+*/
+
+#if defined(MLK_HAVE_INLINE_ASM) && !defined(MLK_CONFIG_NO_ASM_VALUE_BARRIER)
+#define MLK_USE_ASM_VALUE_BARRIER
+#endif
+
+#if !defined(MLK_USE_ASM_VALUE_BARRIER)
+
+/*
+ * Declaration of global volatile that the global value barrier
+ * is loading from and masking with.
+ */
+#define mlk_ct_opt_blocker_u64 MLK_NAMESPACE(ct_opt_blocker_u64)
+extern volatile uint64_t mlk_ct_opt_blocker_u64;
+
+/* Helper functions for obtaining global masks of various sizes */
+
+/* This contract is not proved but treated as an axiom.
+ *
+ * Its validity relies on the assumption that the global opt-blocker
+ * constant mlk_ct_opt_blocker_u64 is not modified.
+ */
+static MLK_INLINE uint64_t mlk_ct_get_optblocker_u64(void)
+__contract__(ensures(return_value == 0)) { return mlk_ct_opt_blocker_u64; }
+
+static MLK_INLINE uint8_t mlk_ct_get_optblocker_u8(void)
+__contract__(ensures(return_value == 0)) { return (uint8_t)mlk_ct_get_optblocker_u64(); }
+
+static MLK_INLINE uint32_t mlk_ct_get_optblocker_u32(void)
+__contract__(ensures(return_value == 0)) { return (uint32_t)mlk_ct_get_optblocker_u64(); }
+
+static MLK_INLINE int32_t mlk_ct_get_optblocker_i32(void)
+__contract__(ensures(return_value == 0)) { return (int32_t)mlk_ct_get_optblocker_u64(); }
+
+/* Opt-blocker based implementation of value barriers */
+static MLK_INLINE uint32_t mlk_value_barrier_u32(uint32_t b)
+__contract__(ensures(return_value == b)) { return (b ^ mlk_ct_get_optblocker_u32()); }
+
+static MLK_INLINE int32_t mlk_value_barrier_i32(int32_t b)
+__contract__(ensures(return_value == b)) { return (b ^ mlk_ct_get_optblocker_i32()); }
+
+static MLK_INLINE uint8_t mlk_value_barrier_u8(uint8_t b)
+__contract__(ensures(return_value == b)) { return (b ^ mlk_ct_get_optblocker_u8()); }
+
+#else /* !MLK_USE_ASM_VALUE_BARRIER */
+
+static MLK_INLINE uint32_t mlk_value_barrier_u32(uint32_t b)
+__contract__(ensures(return_value == b))
+{
+  __asm__("" : "+r"(b));
+  return b;
+}
+
+static MLK_INLINE int32_t mlk_value_barrier_i32(int32_t b)
+__contract__(ensures(return_value == b))
+{
+  __asm__("" : "+r"(b));
+  return b;
+}
+
+static MLK_INLINE uint8_t mlk_value_barrier_u8(uint8_t b)
+__contract__(ensures(return_value == b))
+{
+  __asm__("" : "+r"(b));
+  return b;
+}
+
+#endif /* MLK_USE_ASM_VALUE_BARRIER */
+
+/*
+ * The ct_cmask_nonzero_xxx functions below make deliberate use of unsigned
+ * overflow, which is fully defined behaviour in C. It is thus safe to disable
+ * this warning.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "unsigned-overflow"
+#endif
+
+/*************************************************
+ * Name:        mlk_ct_cmask_nonzero_u16
+ *
+ * Description: Return 0 if input is zero, and -1 otherwise.
+ *
+ * Arguments:   uint16_t x: Value to be converted into a mask
+ *
+ **************************************************/
+
+/* Reference: Embedded in `cmov_int16()` in the reference implementation.
+ *            - Use value barrier and shift instead of `b = -b` to
+ *              convert condition into mask. */
+static MLK_INLINE uint16_t mlk_ct_cmask_nonzero_u16(uint16_t x)
+__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF)))
+{
+  uint32_t tmp = mlk_value_barrier_u32(-((uint32_t)x));
+  tmp >>= 16;
+  return tmp;
+}
+
+/*************************************************
+ * Name:        mlk_ct_cmask_nonzero_u8
+ *
+ * Description: Return 0 if input is zero, and -1 otherwise.
+ *
+ * Arguments:   uint8_t x: Value to be converted into a mask
+ *
+ **************************************************/
+
+/* Reference: Embedded in `verify()` and `cmov()` in the
+ *            reference implementation.
+ *            - We include a value barrier not present in the
+ *              reference implementation, to prevent the compiler
+ *              from realizing that this function returns a mask. */
+static MLK_INLINE uint8_t mlk_ct_cmask_nonzero_u8(uint8_t x)
+__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF)))
+{
+  uint32_t tmp = mlk_value_barrier_u32(-((uint32_t)x));
+  tmp >>= 24;
+  return tmp;
+}
+
+/* Put unsigned overflow warnings in CBMC back into scope */
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/*
+ * The mlk_ct_cmask_neg_i16 function below makes deliberate use of
+ * signed to unsigned integer conversion, which is fully defined
+ * behaviour in C. It is thus safe to disable this warning.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "conversion"
+#endif
+
+/*************************************************
+ * Name:        mlk_ct_cmask_neg_i16
+ *
+ * Description: Return 0 if input is non-negative, and -1 otherwise.
+ *
+ * Arguments:   uint16_t x: Value to be converted into a mask
+ *
+ **************************************************/
+
+/* Reference: Embedded in polynomial compression function in the
+ *            reference implementation.
+ *            - Used as part of signed->unsigned conversion for modular
+ *              representatives to detect whether the input is negative.
+ *              This happen in `mlk_poly_reduce()` here, and as part of
+ *              polynomial compression functions in the reference
+ *              implementation. See `mlk_poly_reduce()`.
+ *            - We use value barriers to reduce the risk of
+ *              compiler-introduced branches. */
+static MLK_INLINE uint16_t mlk_ct_cmask_neg_i16(int16_t x)
+__contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0)))
+{
+  int32_t tmp = mlk_value_barrier_i32((int32_t)x);
+  tmp >>= 16;
+  return (int16_t)tmp;
+}
+
+/* Put unsigned-to-signed warnings in CBMC back into scope */
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/*
+ * The ct_csel_xxx functions below make deliberate use of unsigned
+ * to signed integer conversion, which is implementation-defined
+ * behaviour. Here, we assume that uint16_t -> int16_t is inverse
+ * to int16_t -> uint16_t.
+ */
+#ifdef CBMC
+#pragma CPROVER check push
+#pragma CPROVER check disable "conversion"
+#endif
+
+/*************************************************
+ * Name:        mlk_ct_sel_int16
+ *
+ * Description: Functionally equivalent to cond ? a : b,
+ *              but implemented with guards against
+ *              compiler-introduced branches.
+ *
+ * Arguments:   int16_t a:       First alternative
+ *              int16_t b:       Second alternative
+ *              uint16_t cond:   Condition variable.
+ *
+ * Specification:
+ * - With `a = MLKEM_Q_HALF` and `b=0`, this essentially
+ *   implements `Decompress_1` [FIPS 203, Eq (4.8)] in `mlk_poly_frommsg()`.
+ * - With `a = x + MLKEM_Q`, `b = x`, and `cond` indicating whether `x`
+ *   is negative, implements signed->unsigned conversion of modular
+ *   representatives. Questions of representation are not considered
+ *   in the specification [FIPS 203, Section 2.4.1, "The pseudocode is
+ *   agnostic regarding how an integer modulo 𝑚 is represented in
+ *   actual implementations"].
+ *
+ **************************************************/
+
+/* Reference: Embedded in polynomial compression function in the
+ *            reference implementation.
+ *            - Used as part of signed->unsigned conversion for modular
+ *              representatives. This happen in `mlk_poly_reduce()` here,
+ *              and as part of polynomial compression functions in the
+ *              reference implementation. See `mlk_poly_reduce()`.
+ *            - Barrier to reduce the risk of compiler-introduced branches.
+ *            For `a = MLKEM_Q_HALF` and `b=0`, also embedded in
+ *            `poly_frommsg()` from the reference implementation, which uses
+ *            `cmov_int16()` instead. */
+static MLK_INLINE int16_t mlk_ct_sel_int16(int16_t a, int16_t b, uint16_t cond)
+__contract__(ensures(return_value == (cond ? a : b)))
+{
+  uint16_t au = a, bu = b;
+  uint16_t res = bu ^ (mlk_ct_cmask_nonzero_u16(cond) & (au ^ bu));
+  return (int16_t)res;
+}
+
+/* Put unsigned-to-signed warnings in CBMC back into scope */
+#ifdef CBMC
+#pragma CPROVER check pop
+#endif
+
+/*************************************************
+ * Name:        mlk_ct_sel_uint8
+ *
+ * Description: Functionally equivalent to cond ? a : b,
+ *              but implemented with guards against
+ *              compiler-introduced branches.
+ *
+ * Arguments:   uint8_t a:       First alternative
+ *              uint8_t b:       Second alternative
+ *              uuint8_t cond:   Condition variable.
+ *
+ **************************************************/
+
+/* Reference: Embedded into `cmov()` in the reference implementation.
+ *            - Use value barrier to get mask from condition value. */
+static MLK_INLINE uint8_t mlk_ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond)
+__contract__(ensures(return_value == (cond ? a : b)))
+{
+  return b ^ (mlk_ct_cmask_nonzero_u8(cond) & (a ^ b));
+}
+
+/*************************************************
+ * Name:        mlk_ct_memcmp
+ *
+ * Description: Compare two arrays for equality in constant time.
+ *
+ * Arguments:   const uint8_t *a: pointer to first byte array
+ *              const uint8_t *b: pointer to second byte array
+ *              size_t len:       length of the byte arrays
+ *
+ * Returns 0 if the byte arrays are equal, a non-zero value otherwise
+ *
+ * Specification:
+ * - Used to securely compute conditional move in
+ *   [FIPS 203, Algorithm 18 (ML-KEM.Decaps_Internal, L9-11]
+ *
+ **************************************************/
+
+/* Reference: `cmov()` in the reference implementation
+ *            - We return `uint8_t`, not `int`.
+ *            - We use an additional XOR-accumulator in the comparison loop
+ *              which prevents early abort if the OR-accumulator is 0xFF.
+ *            - We use a value barrier to convert the OR-accumulator into
+ *              a mask. The reference implementation uses a shift which the
+ *              compiler can argue to result in either 0 of 0xFF..FF. */
+static MLK_INLINE uint8_t mlk_ct_memcmp(const uint8_t *a, const uint8_t *b,
+                                        const size_t len)
+__contract__(
+  requires(memory_no_alias(a, len))
+  requires(memory_no_alias(b, len))
+  requires(len <= INT_MAX)
+  ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i]))))
+{
+  uint8_t r = 0, s = 0;
+  unsigned i;
+
+  for (i = 0; i < len; i++)
+  __loop__(
+    invariant(i <= len)
+    invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k])))))
+  {
+    r |= a[i] ^ b[i];
+    /* s is useless, but prevents the loop from being aborted once r=0xff. */
+    s ^= a[i] ^ b[i];
+  }
+
+  /*
+   * - Convert r into a mask; this may not be necessary, but is an additional
+   *   safeguard
+   *   towards leaking information about a and b.
+   * - XOR twice with s, separated by a value barrier, to prevent the compile
+   *   from dropping the s computation in the loop.
+   */
+  return (mlk_value_barrier_u8(mlk_ct_cmask_nonzero_u8(r) ^ s) ^ s);
+}
+
+/*************************************************
+ * Name:        mlk_ct_cmov_zero
+ *
+ * Description: Copy len bytes from x to r if b is zero;
+ *              don't modify x if b is non-zero.
+ *              assumes two's complement representation of negative integers.
+ *              Runs in constant time.
+ *
+ * Arguments:   uint8_t *r:       pointer to output byte array
+ *              const uint8_t *x: pointer to input byte array
+ *              size_t len:       Amount of bytes to be copied
+ *              uint8_t b:        Condition value.
+ *
+ * Specification:
+ * - Used to securely compute conditional move in
+ *   [FIPS 203, Algorithm 18 (ML-KEM.Decaps_Internal, L9-11]
+ *
+ **************************************************/
+
+/* Reference: `cmov()` in the reference implementation.
+ *            - We move if condition value is `0`, not `1`.
+ *            - We use `mlk_ct_sel_uint8` for constant-time selection. */
+static MLK_INLINE void mlk_ct_cmov_zero(uint8_t *r, const uint8_t *x,
+                                        size_t len, uint8_t b)
+__contract__(
+  requires(memory_no_alias(r, len))
+  requires(memory_no_alias(x, len))
+  assigns(memory_slice(r, len)))
+{
+  size_t i;
+  for (i = 0; i < len; i++)
+  __loop__(invariant(i <= len))
+  {
+    r[i] = mlk_ct_sel_uint8(r[i], x[i], b);
+  }
+}
+
+/*************************************************
+ * Name:        mlk_zeroize
+ *
+ * Description: Force-zeroize a buffer.
+ *
+ * Arguments:   uint8_t *r:       pointer to byte array to be zeroed
+ *              size_t len:       Amount of bytes to be zeroed
+ *
+ * Specification: Used to implement
+ * [FIPS 203, Section 3.3, Destruction of intermediate values]
+ *
+ **************************************************/
+
+/* Reference: Not present in the reference implementation. */
+#if !defined(MLK_CONFIG_CUSTOM_ZEROIZE)
+#if defined(MLK_SYS_WINDOWS)
+#include <windows.h>
+static MLK_INLINE void mlk_zeroize(void *ptr, size_t len)
+__contract__(
+  requires(memory_no_alias(ptr, len))
+  assigns(memory_slice(ptr, len))) { SecureZeroMemory(ptr, len); }
+#elif defined(MLK_HAVE_INLINE_ASM)
+#include <string.h>
+static MLK_INLINE void mlk_zeroize(void *ptr, size_t len)
+__contract__(
+  requires(memory_no_alias(ptr, len))
+  assigns(memory_slice(ptr, len)))
+{
+  memset(ptr, 0, len);
+  /* This follows OpenSSL and seems sufficient to prevent the compiler
+   * from optimizing away the memset.
+   *
+   * If there was a reliable way to detect availability of memset_s(),
+   * that would be preferred. */
+  __asm__ __volatile__("" : : "r"(ptr) : "memory");
+}
+#else /* !MLK_SYS_WINDOWS && MLK_HAVE_INLINE_ASM */
+#error No plausibly-secure implementation of mlk_zeroize available. Please provide your own using MLK_CONFIG_CUSTOM_ZEROIZE.
+#endif /* !MLK_SYS_WINDOWS && !MLK_HAVE_INLINE_ASM */
+#endif /* !MLK_CONFIG_CUSTOM_ZEROIZE */
+
+#endif /* !MLK_VERIFY_H */
diff --git a/crypto/fipsmodule/ml_kem/mlkem/zetas.inc b/crypto/fipsmodule/ml_kem/mlkem/zetas.inc
new file mode 100644
index 00000000000..66cb7d15185
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem/zetas.inc
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2024-2025 The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * WARNING: This file is auto-generated from scripts/autogen
+ *          Do not modify it directly.
+ */
+
+#include <stdint.h>
+
+/*
+ * Table of zeta values used in the reference NTT and inverse NTT.
+ * See autogen for details.
+ */
+static MLK_ALIGN const int16_t zetas[128] = {
+    -1044, -758,  -359,  -1517, 1493,  1422,  287,   202,  -171,  622,   1577,
+    182,   962,   -1202, -1474, 1468,  573,   -1325, 264,  383,   -829,  1458,
+    -1602, -130,  -681,  1017,  732,   608,   -1542, 411,  -205,  -1571, 1223,
+    652,   -552,  1015,  -1293, 1491,  -282,  -1544, 516,  -8,    -320,  -666,
+    -1618, -1162, 126,   1469,  -853,  -90,   -271,  830,  107,   -1421, -247,
+    -951,  -398,  961,   -1508, -725,  448,   -1065, 677,  -1275, -1103, 430,
+    555,   843,   -1251, 871,   1550,  105,   422,   587,  177,   -235,  -291,
+    -460,  1574,  1653,  -246,  778,   1159,  -147,  -777, 1483,  -602,  1119,
+    -1590, 644,   -872,  349,   418,   329,   -156,  -75,  817,   1097,  603,
+    610,   1322,  -1285, -1465, 384,   -1215, -136,  1218, -1335, -874,  220,
+    -1187, -1659, -1185, -1530, -1278, 794,   -1510, -854, -870,  478,   -108,
+    -308,  996,   991,   958,   -1460, 1522,  1628,
+};
diff --git a/crypto/fipsmodule/ml_kem/mlkem_native_config.h b/crypto/fipsmodule/ml_kem/mlkem_native_config.h
new file mode 100644
index 00000000000..643b8a83317
--- /dev/null
+++ b/crypto/fipsmodule/ml_kem/mlkem_native_config.h
@@ -0,0 +1,66 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR ISC
+
+#ifndef MLK_CONFIG_H
+#define MLK_CONFIG_H
+
+#include "../../internal.h"
+
+// Namespacing: All symbols are of the form mlkem*. Level-specific
+// symbols are further prefixed with their security level, e.g.
+// mlkem512*, mlkem768*, mlkem1024*.
+#define MLK_CONFIG_NAMESPACE_PREFIX mlkem
+
+// Everything is built in a single CU, so both internal and external
+// mlkem-native API can have internal linkage.
+#define MLK_CONFIG_INTERNAL_API_QUALIFIER static
+#define MLK_CONFIG_EXTERNAL_API_QUALIFIER static
+
+// Enable PCT if and only if AWS-LC is built in FIPS-mode.
+#if defined(AWSLC_FIPS)
+#define MLK_CONFIG_KEYGEN_PCT
+#endif
+
+#if defined(BORINGSSL_FIPS_BREAK_TESTS)
+#define MLK_CONFIG_KEYGEN_PCT_BREAKAGE_TEST
+#if !defined(__ASSEMBLER__) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
+#include "mlkem/sys.h"
+static MLK_INLINE int mlk_break_pct(void) {
+  return boringssl_fips_break_test("MLKEM_PWCT");
+}
+#endif // !__ASSEMBLER__
+#endif // BORINGSSL_FIPS_BREAK_TESTS
+
+// Enable valgrind-based assertions in mlkem-native through macro
+// from AWS-LC/BoringSSL.
+#if defined(BORINGSSL_CONSTANT_TIME_VALIDATION)
+#define MLK_CONFIG_CT_TESTING_ENABLED
+#endif
+
+// Map zeroization function to the one used by AWS-LC
+#define MLK_CONFIG_CUSTOM_ZEROIZE
+#if !defined(__ASSEMBLER__) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
+#include <stdint.h>
+#include "mlkem/sys.h"
+#include <openssl/base.h>
+static MLK_INLINE void mlk_zeroize(void *ptr, size_t len) {
+    OPENSSL_cleanse(ptr, len);
+}
+#endif // !__ASSEMBLER__
+
+// Map randombytes function to the one used by AWS-LC
+#define MLK_CONFIG_CUSTOM_RANDOMBYTES
+#if !defined(__ASSEMBLER__) && !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)
+#include <stdint.h>
+#include "mlkem/sys.h"
+#include <openssl/rand.h>
+static MLK_INLINE void mlk_randombytes(void *ptr, size_t len) {
+    RAND_bytes(ptr, len);
+}
+#endif // !__ASSEMBLER__
+
+#if defined(OPENSSL_NO_ASM)
+#define MLK_CONFIG_NO_ASM
+#endif
+
+#endif // MLkEM_NATIVE_CONFIG_H

From 705bb1b9b67bbffb8dfe53e4a38a4af927649844 Mon Sep 17 00:00:00 2001
From: Hanno Becker <beckphan@amazon.co.uk>
Date: Thu, 20 Feb 2025 05:33:22 +0000
Subject: [PATCH 3/3] Update third party licensing note

This commit updates the LICENSE file to indicate that
mlkem-native is distributed under Apache 2.0.

Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
---
 LICENSE | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index 0d2effbe974..18d3cd7176e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -215,6 +215,9 @@ For Keccak and AES we are using public-domain
 code from sources and by authors listed in
 comments on top of the respective files.
 
+The code in crypto/fipsmodule/ml_kem/mlkem is imported from mlkem-native
+(https://github.com/pq-code-package/mlkem-native) and carries the
+Apache 2.0 license. This license is reproduced at the bottom of this file.
 
 Licenses for support code
 -------------------------
@@ -286,10 +289,10 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
  
-Apache 2.0 license for AWS-LC content
--------------------------------------
  
  
+Apache 2.0 license for AWS-LC content and mlkem-native
+------------------------------------------------------
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/