From ff8eca86e43441e71a15f1ff47d01a064cc62125 Mon Sep 17 00:00:00 2001
From: "Matthias J. Kannwischer" <matthias@kannwischer.eu>
Date: Sat, 14 Feb 2026 17:42:32 +0100
Subject: [PATCH 1/2] HOL-Light: Add HOL Light proof for aarch64
 polyz_unpack_{17,19}

Add functional correctness proofs for polyz_unpack_17 and polyz_unpack_19.
Closely following the decompress proofs from mlkem-native:
https://github.com/pq-code-package/mlkem-native/pull/1543

Signed-off-by: Matthias J. Kannwischer <matthias@kannwischer.eu>
---
 .github/workflows/hol_light.yml               |   6 +
 nix/s2n_bignum/default.nix                    |   4 +-
 proofs/hol_light/aarch64/Makefile             |   4 +-
 .../aarch64/mldsa/mldsa_polyz_unpack_17.S     |  68 ++++
 .../aarch64/mldsa/mldsa_polyz_unpack_19.S     |  65 ++++
 .../hol_light/aarch64/proofs/aarch64_utils.ml | 234 +++++++++++-
 .../hol_light/aarch64/proofs/dump_bytecode.ml |   8 +
 .../aarch64/proofs/mldsa_polyz_unpack_17.ml   | 338 ++++++++++++++++++
 .../aarch64/proofs/mldsa_polyz_unpack_19.ml   | 255 +++++++++++++
 .../proofs/mldsa_polyz_unpack_consts.ml       |  39 ++
 proofs/hol_light/common/mldsa_specs.ml        | 312 ++++++++++++++++
 scripts/autogen                               |  39 ++
 12 files changed, 1368 insertions(+), 4 deletions(-)
 create mode 100644 proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S
 create mode 100644 proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S
 create mode 100644 proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml
 create mode 100644 proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml
 create mode 100644 proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml

diff --git a/.github/workflows/hol_light.yml b/.github/workflows/hol_light.yml
index 5d9356dc8..d15d55067 100644
--- a/.github/workflows/hol_light.yml
+++ b/.github/workflows/hol_light.yml
@@ -13,6 +13,7 @@ on:
      - 'proofs/hol_light/aarch64/Makefile'
      - 'proofs/hol_light/aarch64/**/*.S'
      - 'proofs/hol_light/aarch64/**/*.ml'
+     - 'proofs/hol_light/common/**/*.ml'
      - 'proofs/hol_light/x86_64/Makefile'
      - 'proofs/hol_light/x86_64/**/*.S'
      - 'proofs/hol_light/x86_64/**/*.ml'
@@ -28,6 +29,7 @@ on:
       - 'proofs/hol_light/aarch64/Makefile'
       - 'proofs/hol_light/aarch64/**/*.S'
       - 'proofs/hol_light/aarch64/**/*.ml'
+      - 'proofs/hol_light/common/**/*.ml'
       - 'proofs/hol_light/x86_64/Makefile'
       - 'proofs/hol_light/x86_64/**/*.S'
       - 'proofs/hol_light/x86_64/**/*.ml'
@@ -87,6 +89,10 @@ jobs:
             needs: ["aarch64_utils.ml"]
           - name: mldsa_poly_chknorm
             needs: ["aarch64_utils.ml"]
+          - name: mldsa_polyz_unpack_17
+            needs: ["aarch64_utils.ml", "mldsa_polyz_unpack_consts.ml", "mldsa_specs.ml"]
+          - name: mldsa_polyz_unpack_19
+            needs: ["aarch64_utils.ml", "mldsa_polyz_unpack_consts.ml", "mldsa_specs.ml"]
     name: AArch64 HOL Light proof for ${{ matrix.proof.name }}.S
     runs-on: pqcp-arm64
     if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
diff --git a/nix/s2n_bignum/default.nix b/nix/s2n_bignum/default.nix
index 6857fe64e..04e2339b0 100644
--- a/nix/s2n_bignum/default.nix
+++ b/nix/s2n_bignum/default.nix
@@ -4,12 +4,12 @@
 { stdenv, fetchFromGitHub, writeText, ... }:
 stdenv.mkDerivation rec {
   pname = "s2n_bignum";
-  version = "3bfe68deb9fbdaf23be0a927c362a87a799adc28";
+  version = "cba3956c7a20d22f08ef6f49fe162e9d7c07867c";
   src = fetchFromGitHub {
     owner = "awslabs";
     repo = "s2n-bignum";
     rev = "${version}";
-    hash = "sha256-C3SdYZ/PhfTmHefz3klO3l/lbEgtA8Hq9wjDmjd+Fek=";
+    hash = "sha256-5UOu1aUqPGiDPhpBlUDjkM/GqP4SILT/p4pKL9DBFjg=";
   };
   setupHook = writeText "setup-hook.sh" ''
     export S2N_BIGNUM_DIR="$1"
diff --git a/proofs/hol_light/aarch64/Makefile b/proofs/hol_light/aarch64/Makefile
index 0d87b2555..306d657a4 100644
--- a/proofs/hol_light/aarch64/Makefile
+++ b/proofs/hol_light/aarch64/Makefile
@@ -55,7 +55,9 @@ SPLIT=tr ';' '\n'
 
 OBJ = mldsa/mldsa_ntt.o \
       mldsa/mldsa_poly_caddq.o \
-      mldsa/mldsa_poly_chknorm.o
+      mldsa/mldsa_poly_chknorm.o \
+      mldsa/mldsa_polyz_unpack_17.o \
+      mldsa/mldsa_polyz_unpack_19.o
 
 # According to
 # https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms,
diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S
new file mode 100644
index 000000000..25bfc8e88
--- /dev/null
+++ b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) The mldsa-native project authors
+ * Copyright (c) The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+ */
+
+
+/*
+ * WARNING: This file is auto-derived from the mldsa-native source file
+ *   dev/aarch64_opt/src/polyz_unpack_17_asm.S using scripts/simpasm. Do not modify it directly.
+ */
+
+#if defined(__ELF__)
+.section .note.GNU-stack,"",@progbits
+#endif
+
+.text
+.balign 4
+#ifdef __APPLE__
+.global _PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm
+_PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm:
+#else
+.global PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm
+PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm:
+#endif
+
+        .cfi_startproc
+        ldr	q24, [x2]
+        ldr	q25, [x2, #0x10]
+        ldr	q26, [x2, #0x20]
+        ldr	q27, [x2, #0x30]
+        mov	x3, #0xfe00000000       // =1090921693184
+        mov	v28.d[0], x3
+        mov	x3, #0xfc               // =252
+        movk	x3, #0xfa, lsl #32
+        mov	v28.d[1], x3
+        movi	v29.4s, #0x3, msl #16
+        movi	v30.4s, #0x2, lsl #16
+        mov	x9, #0x10               // =16
+
+Lpolyz_unpack_17_loop:
+        ld1	{ v0.16b, v1.16b }, [x1]
+        add	x1, x1, #0x14
+        ld1	{ v2.16b }, [x1], #16
+        tbl	v4.16b, { v0.16b }, v24.16b
+        tbl	v5.16b, { v0.16b, v1.16b }, v25.16b
+        tbl	v6.16b, { v1.16b }, v26.16b
+        tbl	v7.16b, { v1.16b, v2.16b }, v27.16b
+        ushl	v4.4s, v4.4s, v28.4s
+        and	v4.16b, v4.16b, v29.16b
+        sub	v4.4s, v30.4s, v4.4s
+        ushl	v5.4s, v5.4s, v28.4s
+        and	v5.16b, v5.16b, v29.16b
+        sub	v5.4s, v30.4s, v5.4s
+        ushl	v6.4s, v6.4s, v28.4s
+        and	v6.16b, v6.16b, v29.16b
+        sub	v6.4s, v30.4s, v6.4s
+        ushl	v7.4s, v7.4s, v28.4s
+        and	v7.16b, v7.16b, v29.16b
+        sub	v7.4s, v30.4s, v7.4s
+        str	q5, [x0, #0x10]
+        str	q6, [x0, #0x20]
+        str	q7, [x0, #0x30]
+        str	q4, [x0], #0x40
+        subs	x9, x9, #0x1
+        b.ne	Lpolyz_unpack_17_loop
+        ret
+        .cfi_endproc
diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S
new file mode 100644
index 000000000..f921ed77c
--- /dev/null
+++ b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) The mldsa-native project authors
+ * Copyright (c) The mlkem-native project authors
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+ */
+
+
+/*
+ * WARNING: This file is auto-derived from the mldsa-native source file
+ *   dev/aarch64_opt/src/polyz_unpack_19_asm.S using scripts/simpasm. Do not modify it directly.
+ */
+
+#if defined(__ELF__)
+.section .note.GNU-stack,"",@progbits
+#endif
+
+.text
+.balign 4
+#ifdef __APPLE__
+.global _PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm
+_PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm:
+#else
+.global PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm
+PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm:
+#endif
+
+        .cfi_startproc
+        ldr	q24, [x2]
+        ldr	q25, [x2, #0x10]
+        ldr	q26, [x2, #0x20]
+        ldr	q27, [x2, #0x30]
+        mov	x3, #0xfc00000000       // =1082331758592
+        dup	v28.2d, x3
+        movi	v29.4s, #0xf, msl #16
+        movi	v30.4s, #0x8, lsl #16
+        mov	x9, #0x10               // =16
+
+Lpolyz_unpack_19_loop:
+        ld1	{ v0.16b, v1.16b }, [x1]
+        add	x1, x1, #0x18
+        ld1	{ v2.16b }, [x1], #16
+        tbl	v4.16b, { v0.16b }, v24.16b
+        tbl	v5.16b, { v0.16b, v1.16b }, v25.16b
+        tbl	v6.16b, { v1.16b }, v26.16b
+        tbl	v7.16b, { v1.16b, v2.16b }, v27.16b
+        ushl	v4.4s, v4.4s, v28.4s
+        and	v4.16b, v4.16b, v29.16b
+        sub	v4.4s, v30.4s, v4.4s
+        ushl	v5.4s, v5.4s, v28.4s
+        and	v5.16b, v5.16b, v29.16b
+        sub	v5.4s, v30.4s, v5.4s
+        ushl	v6.4s, v6.4s, v28.4s
+        and	v6.16b, v6.16b, v29.16b
+        sub	v6.4s, v30.4s, v6.4s
+        ushl	v7.4s, v7.4s, v28.4s
+        and	v7.16b, v7.16b, v29.16b
+        sub	v7.4s, v30.4s, v7.4s
+        str	q5, [x0, #0x10]
+        str	q6, [x0, #0x20]
+        str	q7, [x0, #0x30]
+        str	q4, [x0], #0x40
+        subs	x9, x9, #0x1
+        b.ne	Lpolyz_unpack_19_loop
+        ret
+        .cfi_endproc
diff --git a/proofs/hol_light/aarch64/proofs/aarch64_utils.ml b/proofs/hol_light/aarch64/proofs/aarch64_utils.ml
index 1187b79aa..34d06c696 100644
--- a/proofs/hol_light/aarch64/proofs/aarch64_utils.ml
+++ b/proofs/hol_light/aarch64/proofs/aarch64_utils.ml
@@ -3,6 +3,8 @@
  * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
  *)
 
+needs "common/mldsa_specs.ml";;
+
 (* Merge 4 x bytes32 reads into bytes128 reads *)
 let MEMORY_128_FROM_32_TAC =
   let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64`
@@ -14,7 +16,9 @@ let MEMORY_128_FROM_32_TAC =
       READ_MEMORY_MERGE_CONV 2 (subst[itm,n_tm] pat') in
     MP_TAC(end_itlist CONJ (map f (0--(n-1))));;
 
-(* Utility for executing until target PC is reached *)
+(* ------------------------------------------------------------------------- *)
+(* Symbolic execution until target PC is reached.                            *)
+(* ------------------------------------------------------------------------- *)
 let MAP_UNTIL_TARGET_PC f n = fun (asl, w) ->
   let is_pc_condition = can (term_match [] `read PC some_state = some_value`) in
   let extract_target_pc_from_goal goal =
@@ -33,3 +37,231 @@ let MAP_UNTIL_TARGET_PC f n = fun (asl, w) ->
   let rec core n (asl, w) =
     (TARGET_PC_REACHED_TAC target_pc ORELSE (f n THEN core (n + 1))) (asl, w)
   in core n (asl, w);;
+
+(* ========================================================================= *)
+(* SIMD simplification: subword extraction + numeric reduction + folding.    *)
+(* ========================================================================= *)
+
+let SIMD_SIMPLIFY_CONV unfold_defs =
+  TOP_DEPTH_CONV
+   (REWR_CONV WORD_SUBWORD_AND ORELSEC WORD_SIMPLE_SUBWORD_CONV) THENC
+  DEPTH_CONV WORD_NUM_RED_CONV THENC
+  REWRITE_CONV (map GSYM unfold_defs);;
+
+let SIMD_SIMPLIFY_TAC unfold_defs =
+  let simdable = can (term_match [] `read X (s:armstate):int128 = whatever`) in
+  TRY(FIRST_X_ASSUM
+   (ASSUME_TAC o
+    CONV_RULE(RAND_CONV (SIMD_SIMPLIFY_CONV unfold_defs)) o
+    check (simdable o concl)));;
+
+(* ========================================================================= *)
+(* Parametric infrastructure for d-bit packed coefficients (SIMD).           *)
+(* Supports d=18 (GAMMA1=2^17) and d=20 (GAMMA1=2^19).                      *)
+(* ========================================================================= *)
+
+(* Convert MOD/DIV expressions to word_subword of (16*d)-bit word *)
+let mk_base_simps d =
+  let total = 16 * d in
+  let rem = total - 256 in
+  let total_ty = mk_finty (Num.num_of_int total) in
+  let rem_ty = mk_finty (Num.num_of_int rem) in
+  let mod_128 = CONV_RULE NUM_REDUCE_CONV (prove(
+    inst [total_ty, `:N`]
+      `word (t MOD 2 EXP 128) : 128 word =
+       word_subword (word t : N word) (0, 128)`,
+    REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD; DIMINDEX_128] THEN
+    REWRITE_TAC[EXP; DIV_1; MOD_MOD_REFL; MIN] THEN CONV_TAC NUM_REDUCE_CONV THEN
+    CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN
+    MP_TAC (SPECL [`t:num`; `2`; mk_small_numeral total; `128`] MOD_MOD_EXP_MIN) THEN
+    CONV_TAC NUM_REDUCE_CONV THEN DISCH_THEN (SUBST1_TAC o SYM) THEN REFL_TAC)) in
+  let div_128_mod_128 = CONV_RULE NUM_REDUCE_CONV (prove(
+    inst [total_ty, `:N`]
+      `word ((t DIV 2 EXP 128) MOD 2 EXP 128) : 128 word =
+       word_subword (word t : N word) (128, 128)`,
+    REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD; DIMINDEX_128] THEN
+    CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN
+    REWRITE_TAC[ARITH_RULE `MIN 128 128 = 128`; MOD_MOD_REFL] THEN
+    REWRITE_TAC[DIV_MOD; GSYM EXP_ADD; MOD_MOD_EXP_MIN] THEN
+    CONV_TAC NUM_REDUCE_CONV)) in
+  let div_256 = CONV_RULE NUM_REDUCE_CONV (prove(
+    inst [total_ty, `:N`; rem_ty, `:M`]
+      `word (t DIV 2 EXP 256) : M word =
+       word_subword (word t : N word) (256, dimindex(:M))`,
+    REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD] THEN
+    CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+    REWRITE_TAC[DIV_MOD; GSYM EXP_ADD; MOD_MOD_EXP_MIN] THEN
+    CONV_TAC NUM_REDUCE_CONV THEN REWRITE_TAC[MOD_MOD_REFL])) in
+  [mod_128; div_128_mod_128; div_256];;
+
+(* Split ncoeffs d-bit coefficients into chunks of chunk_size *)
+let mk_split_theorem d ncoeffs chunk_size =
+  let total = d * chunk_size in
+  let nchunks = ncoeffs / chunk_size in
+  let d_ty = mk_finty (Num.num_of_int d) in
+  let total_ty = mk_finty (Num.num_of_int total) in
+  prove(
+    subst [mk_small_numeral ncoeffs, `ncoeffs:num`;
+           mk_small_numeral chunk_size, `cs:num`;
+           mk_small_numeral nchunks, `nc:num`]
+    (inst [d_ty, `:D`; total_ty, `:T`]
+      `!(l: (D word) list). LENGTH l = ncoeffs ==>
+         num_of_wordlist l = num_of_wordlist (MAP ((word:num->T word) o num_of_wordlist)
+           (list_of_seq (\i. SUB_LIST (cs * i, cs) l) nc))`),
+    REPEAT STRIP_TAC THEN
+    UNDISCH_THEN (subst [mk_small_numeral ncoeffs, `n:num`]
+      (inst [d_ty, `:D`] `LENGTH (l : (D word) list) = n`)) (fun th ->
+       GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV)
+         [MATCH_MP (CONV_RULE NUM_REDUCE_CONV
+           (ISPECL [mk_small_numeral chunk_size; mk_small_numeral nchunks;
+                    `l:'a list`] SUBLIST_PARTITION)) th]
+       THEN ASSUME_TAC th) THEN
+    IMP_REWRITE_TAC [CONV_RULE (ONCE_DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV)
+      (ISPECL [inst [d_ty, `:D`] `ll: ((D word) list) list`;
+               mk_small_numeral chunk_size]
+        (INST_TYPE [d_ty, `:N`; total_ty, `:M`] NUM_OF_WORDLIST_FLATTEN))] THEN
+    CONV_TAC(ONCE_DEPTH_CONV LIST_OF_SEQ_CONV) THEN
+    ASM_REWRITE_TAC[ALL; LENGTH_SUB_LIST] THEN
+    ARITH_TAC);;
+
+(* Extract individual d-bit coefficients from (d*chunk_size)-bit word *)
+let mk_subword_cases d chunk_size =
+  let total = d * chunk_size in
+  let d_ty = mk_finty (Num.num_of_int d) in
+  let total_ty = mk_finty (Num.num_of_int total) in
+  let arith_simp =
+    let lhs = mk_eq(mk_small_numeral total,
+                mk_comb(mk_comb(`( * ):num->num->num`,
+                  mk_small_numeral d), `n:num`)) in
+    let rhs = mk_eq(`n:num`, mk_small_numeral chunk_size) in
+    ARITH_RULE (mk_eq(lhs, rhs)) in
+  let meson_simp =
+    let n_eq = mk_eq(`n:num`, mk_small_numeral chunk_size) in
+    let k_lt_n = mk_comb(mk_comb(`(<):num->num->bool`, `k:num`), `n:num`) in
+    let k_lt_cs = mk_comb(mk_comb(`(<):num->num->bool`, `k:num`),
+                    mk_small_numeral chunk_size) in
+    MESON[] (mk_eq(mk_conj(n_eq, k_lt_n), mk_conj(n_eq, k_lt_cs))) in
+  let base =
+    let th = INST_TYPE [total_ty, `:KL`; d_ty, `:L`] WORD_SUBWORD_NUM_OF_WORDLIST in
+    let th = CONV_RULE(DEPTH_CONV DIMINDEX_CONV) th in
+    REWRITE_RULE[arith_simp; meson_simp] th in
+  let mk k =
+    let th = SPEC (mk_small_numeral k)
+      (SPEC (inst [d_ty, `:L`] `ls:(L word)list`) base) in
+    CONV_RULE NUM_REDUCE_CONV (REWRITE_RULE[ARITH] th) in
+  map mk (0 -- (chunk_size - 1));;
+
+(* ========================================================================= *)
+(* zunpack lane conversion for TBL + USHL + AND + SUB pipeline.              *)
+(* ========================================================================= *)
+
+let ZUNPACK_LANE_CONV d i tm =
+  let gamma1 = 1 lsl (d - 1) in
+  let word_bits = 16 * d in
+  let t_ty = mk_finty (Num.num_of_int word_bits) in
+  let is_target t =
+    try fst(dest_type(type_of t)) = "word" &&
+        Num.int_of_num(dest_finty(hd(snd(dest_type(type_of t))))) = word_bits
+    with _ -> false in
+  let t_var_opt = try Some(find_term is_target tm) with _ -> None in
+  match t_var_opt with
+  | Some t_var ->
+      let d_ty = mk_finty (Num.num_of_int d) in
+      let goal = mk_eq(tm,
+        subst [mk_small_numeral (d*i), `pos:num`;
+               mk_small_numeral d, `bw:num`;
+               mk_small_numeral gamma1, `g:num`;
+               t_var, mk_var("t", mk_type("word",[t_ty]))]
+          (inst [d_ty, `:B`; t_ty, `:T`]
+            `word_sub (word g : 32 word)
+                      (word_zx (word_subword (t : T word) (pos,bw) : B word))`)) in
+      WORD_BLAST goal
+  | None -> failwith ("ZUNPACK_LANE_CONV: no " ^ string_of_int word_bits ^ "-bit word");;
+
+let ZUNPACK_128_CONV d tm =
+  tryfind (fun base_i ->
+    let f i = ZUNPACK_LANE_CONV d (base_i + i) in
+    RAND_CONV (
+      COMB2_CONV
+        (RAND_CONV (COMB2_CONV (RAND_CONV (f 3)) (f 2)))
+        (COMB2_CONV (RAND_CONV (f 1)) (f 0)))
+    tm) [0; 4; 8; 12];;
+
+let SIMP_ZUNPACK_TAC d zunpack_correct =
+  let zunpack_const =
+    fst(strip_comb(rhs(snd(strip_forall(concl zunpack_correct))))) in
+  let already_processed tm =
+    can (find_term ((=) zunpack_const)) tm in
+  RULE_ASSUM_TAC (fun th ->
+    if already_processed (concl th) then th
+    else CONV_RULE (TRY_CONV (ZUNPACK_128_CONV d) THENC
+                    TRY_CONV (ONCE_REWRITE_CONV [zunpack_correct])) th);;
+
+(* ------------------------------------------------------------------------- *)
+(* Overlapping memory read: derive bytes128 at an unaligned offset from      *)
+(* bytes128@16 and a tail read at offset 32.                                 *)
+(* For D=20: bytes128@16 + bytes64@32 -> bytes128@24                         *)
+(* For D=18: bytes128@16 + bytes32@32 -> bytes128@20                         *)
+(* ------------------------------------------------------------------------- *)
+
+let split_k_l_at base k l =
+  let a_tm = mk_comb(mk_comb(`word_add:int64->int64->int64`, `a:int64`),
+    mk_comb(`word:num->int64`, mk_small_numeral base)) in
+  CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC DEPTH_CONV NUM_MULT_CONV)
+    (INST [mk_small_numeral k,`k:num`; mk_small_numeral l,`l:num`;
+           a_tm, `a:int64`] READ_BYTES_SPLIT_ANY);;
+
+(* For D=20: bytes128@16 + bytes64@32 -> bytes128@24 *)
+let BYTES128_FROM_OVERLAP_64 = prove
+ (`read (memory :> bytes128 (word_add a (word 16))) (s:armstate) = (word m16 : int128) /\
+   read (memory :> bytes64 (word_add a (word 32))) (s:armstate) = (word m64 : int64)
+   ==> read (memory :> bytes128 (word_add a (word 24))) s =
+       (word_join (word m64 : int64) (word(m16 DIV 2 EXP 64) : int64) : int128)`,
+  REWRITE_TAC[BYTES128_WBYTES; BYTES64_WBYTES; READ_COMPONENT_COMPOSE;
+              GSYM VAL_EQ; VAL_READ_WBYTES] THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+  ABBREV_TAC `m = read memory (s:armstate)` THEN
+  REWRITE_TAC[VAL_WORD_JOIN; DIMINDEX_64; VAL_WORD; DIMINDEX_128] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN
+  ONCE_REWRITE_TAC[split_k_l_at 16 8 8] THEN
+  ONCE_REWRITE_TAC[split_k_l_at 24 8 8] THEN
+  REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN
+  CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN
+  CONV_TAC NUM_REDUCE_CONV THEN STRIP_TAC THEN ASM_REWRITE_TAC[] THEN
+  let COMMON_SETUP =
+    SUBGOAL_THEN
+     `(m16 DIV 18446744073709551616) MOD 18446744073709551616 < 18446744073709551616 /\
+      m64 MOD 18446744073709551616 < 18446744073709551616`
+     STRIP_ASSUME_TAC
+      THENL [REWRITE_TAC[MOD_LT_EQ; EXP_EQ_0; ARITH_EQ] THEN ARITH_TAC; ALL_TAC] THEN
+    SUBGOAL_THEN
+     `(18446744073709551616 * m64 MOD 18446744073709551616 +
+       (m16 DIV 18446744073709551616) MOD 18446744073709551616) <
+      340282366920938463463374607431768211456`
+     ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN
+    ASM_SIMP_TAC[MOD_LT] in
+  CONJ_TAC THENL [
+    COMMON_SETUP THEN
+    REWRITE_TAC[MOD_MULT_ADD; MOD_MOD_EXP_MIN; GSYM EXP_ADD] THEN
+    CONV_TAC NUM_REDUCE_CONV THEN REWRITE_TAC[MOD_MOD_REFL] THEN
+    REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV
+      (SPECL [`m16:num`; `18446744073709551616`; `18446744073709551616`] DIV_MOD))];
+    COMMON_SETUP THEN
+    SIMP_TAC[DIV_MULT_ADD; ARITH_EQ] THEN ASM_SIMP_TAC[DIV_LT] THEN ARITH_TAC]);;
+
+(* Instantiate overlap theorem for chunk i and ASSUME_TAC the result *)
+let DERIVE_OVERLAP_TAC overlap_thm chunk_size i =
+  let off16 = chunk_size*i + 16 and off32 = chunk_size*i + 32 in
+  let w16 = mk_small_numeral off16 and w32 = mk_small_numeral off32 in
+  let has t th = can (find_term ((=) t)) (concl th) in
+  let a_val = mk_comb(mk_comb(`word_add:int64->int64->int64`, `b:int64`),
+    mk_comb(`word:num->int64`, mk_small_numeral (chunk_size * i))) in
+  let inst = INST [a_val, `a:int64`; `s0:armstate`, `s:armstate`] overlap_thm in
+  let thm = CONV_RULE (ONCE_DEPTH_CONV(GEN_REWRITE_CONV I [WORD_ADD_ASSOC_CONSTS]) THENC
+             DEPTH_CONV NUM_ADD_CONV) inst in
+  FIRST_ASSUM(fun th128 ->
+    if not(has w16 th128 && has `bytes128` th128 && has `b:int64` th128) then failwith "" else
+    FIRST_ASSUM(fun thtail ->
+      if not(has w32 thtail && has `b:int64` thtail &&
+             not(has `bytes128` thtail)) then failwith "" else
+      ASSUME_TAC(MATCH_MP thm (CONJ th128 thtail))));;
diff --git a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml
index 358750c8e..52a0aef26 100644
--- a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml
+++ b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml
@@ -17,3 +17,11 @@ print_string "==== bytecode end =====================================\n\n";;
 print_string "=== bytecode start: aarch64/mldsa/mldsa_poly_chknorm.o ===\n";;
 print_literal_from_elf "aarch64/mldsa/mldsa_poly_chknorm.o";;
 print_string "==== bytecode end =====================================\n\n";;
+
+print_string "=== bytecode start: aarch64/mldsa/mldsa_polyz_unpack_17.o ===\n";;
+print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_17.o";;
+print_string "==== bytecode end =====================================\n\n";;
+
+print_string "=== bytecode start: aarch64/mldsa/mldsa_polyz_unpack_19.o ===\n";;
+print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_19.o";;
+print_string "==== bytecode end =====================================\n\n";;
diff --git a/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml
new file mode 100644
index 000000000..38e0cff90
--- /dev/null
+++ b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml
@@ -0,0 +1,338 @@
+(*
+ * Copyright (c) The mldsa-native project authors
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0
+ *)
+
+(* ========================================================================= *)
+(* Functional correctness of polyz_unpack_17:                                *)
+(* Unpack polynomial z with 18-bit packed coefficients (GAMMA1 = 2^17)       *)
+(* Maps packed [0, 2^18-1] to signed [-(2^17-1), 2^17] via GAMMA1 - x       *)
+(* ========================================================================= *)
+
+needs "arm/proofs/base.ml";;
+needs "aarch64/proofs/aarch64_utils.ml";;
+needs "aarch64/proofs/mldsa_polyz_unpack_consts.ml";;
+
+(**** print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_17.o";;
+ ****)
+
+let mldsa_polyz_unpack_17_mc = define_assert_from_elf
+  "mldsa_polyz_unpack_17_mc" "aarch64/mldsa/mldsa_polyz_unpack_17.o"
+(*** BYTECODE START ***)
+[
+  0x3dc00058;       (* arm_LDR Q24 X2 (Immediate_Offset (word 0)) *)
+  0x3dc00459;       (* arm_LDR Q25 X2 (Immediate_Offset (word 16)) *)
+  0x3dc0085a;       (* arm_LDR Q26 X2 (Immediate_Offset (word 32)) *)
+  0x3dc00c5b;       (* arm_LDR Q27 X2 (Immediate_Offset (word 48)) *)
+  0xd2c01fc3;       (* arm_MOVZ X3 (word 254) 32 *)
+  0x4e081c7c;       (* arm_INS_GEN Q28 X3 0 64 *)
+  0xd2801f83;       (* arm_MOV X3 (rvalue (word 252)) *)
+  0xf2c01f43;       (* arm_MOVK X3 (word 250) 32 *)
+  0x4e181c7c;       (* arm_INS_GEN Q28 X3 64 64 *)
+  0x4f00d47d;       (* arm_MOVI Q29 (word 1125895612137471) *)
+  0x4f00445e;       (* arm_MOVI Q30 (word 562949953552384) *)
+  0xd2800209;       (* arm_MOV X9 (rvalue (word 16)) *)
+  0x4c40a020;       (* arm_LDP Q0 Q1 X1 No_Offset *)
+  0x91005021;       (* arm_ADD X1 X1 (rvalue (word 20)) *)
+  0x4cdf7022;       (* arm_LDR Q2 X1 (Postimmediate_Offset (word 16)) *)
+  0x4e180004;       (* arm_TBL Q4 [Q0] Q24 128 *)
+  0x4e192005;       (* arm_TBL2 Q5 Q0 Q1 Q25 128 *)
+  0x4e1a0026;       (* arm_TBL Q6 [Q1] Q26 128 *)
+  0x4e1b2027;       (* arm_TBL2 Q7 Q1 Q2 Q27 128 *)
+  0x6ebc4484;       (* arm_USHL_VEC Q4 Q4 Q28 32 128 *)
+  0x4e3d1c84;       (* arm_AND_VEC Q4 Q4 Q29 128 *)
+  0x6ea487c4;       (* arm_SUB_VEC Q4 Q30 Q4 32 128 *)
+  0x6ebc44a5;       (* arm_USHL_VEC Q5 Q5 Q28 32 128 *)
+  0x4e3d1ca5;       (* arm_AND_VEC Q5 Q5 Q29 128 *)
+  0x6ea587c5;       (* arm_SUB_VEC Q5 Q30 Q5 32 128 *)
+  0x6ebc44c6;       (* arm_USHL_VEC Q6 Q6 Q28 32 128 *)
+  0x4e3d1cc6;       (* arm_AND_VEC Q6 Q6 Q29 128 *)
+  0x6ea687c6;       (* arm_SUB_VEC Q6 Q30 Q6 32 128 *)
+  0x6ebc44e7;       (* arm_USHL_VEC Q7 Q7 Q28 32 128 *)
+  0x4e3d1ce7;       (* arm_AND_VEC Q7 Q7 Q29 128 *)
+  0x6ea787c7;       (* arm_SUB_VEC Q7 Q30 Q7 32 128 *)
+  0x3d800405;       (* arm_STR Q5 X0 (Immediate_Offset (word 16)) *)
+  0x3d800806;       (* arm_STR Q6 X0 (Immediate_Offset (word 32)) *)
+  0x3d800c07;       (* arm_STR Q7 X0 (Immediate_Offset (word 48)) *)
+  0x3c840404;       (* arm_STR Q4 X0 (Postimmediate_Offset (word 64)) *)
+  0xf1000529;       (* arm_SUBS X9 X9 (rvalue (word 1)) *)
+  0x54fffd01;       (* arm_BNE (word 2097056) *)
+  0xd65f03c0        (* arm_RET X30 *)
+];;
+(*** BYTECODE END ***)
+
+let MLDSA_POLYZ_UNPACK_17_EXEC = ARM_MK_EXEC_RULE mldsa_polyz_unpack_17_mc;;
+
+(* ------------------------------------------------------------------------- *)
+(* Code length constants                                                     *)
+(* ------------------------------------------------------------------------- *)
+
+let LENGTH_MLDSA_POLYZ_UNPACK_17_MC =
+  REWRITE_CONV[mldsa_polyz_unpack_17_mc] `LENGTH mldsa_polyz_unpack_17_mc`
+  |> CONV_RULE (RAND_CONV LENGTH_CONV);;
+
+let MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH = new_definition
+  `MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH = 0`;;
+
+let MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH = new_definition
+  `MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH = 4`;;
+
+let MLDSA_POLYZ_UNPACK_17_CORE_START = new_definition
+  `MLDSA_POLYZ_UNPACK_17_CORE_START = MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH`;;
+
+let MLDSA_POLYZ_UNPACK_17_CORE_END = new_definition
+  `MLDSA_POLYZ_UNPACK_17_CORE_END =
+     LENGTH mldsa_polyz_unpack_17_mc - MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH`;;
+
+let LENGTH_SIMPLIFY_CONV =
+  REWRITE_CONV[LENGTH_MLDSA_POLYZ_UNPACK_17_MC;
+              MLDSA_POLYZ_UNPACK_17_CORE_START; MLDSA_POLYZ_UNPACK_17_CORE_END;
+              MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH;
+              MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH] THENC
+  NUM_REDUCE_CONV THENC REWRITE_CONV [ADD_0];;
+
+(* ------------------------------------------------------------------------- *)
+(* D=18 instantiations for SIMD infrastructure                               *)
+(* ------------------------------------------------------------------------- *)
+
+let BASE_SIMPS_D18 = mk_base_simps 18;;
+let NUM_OF_WORDLIST_SPLIT_18_256 = mk_split_theorem 18 256 16;;
+let READ_MEMORY_WBYTES_SPLIT_128_128_32 = prove
+ (`t < 2 EXP 288
+    ==> (read (memory :> wbytes a) (s:armstate) = (word t : 288 word) <=>
+         read (memory :> bytes128 a) s = (word (t MOD 2 EXP 128) : int128) /\
+         read (memory :> bytes128 (word_add a (word 16))) s =
+         (word ((t DIV 2 EXP 128) MOD 2 EXP 128) : int128) /\
+         read (memory :> bytes32 (word_add a (word 32))) s =
+         (word (t DIV 2 EXP 256) : int32))`,
+  let split_16_20 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC
+                                DEPTH_CONV NUM_MULT_CONV)
+    (INST [`16`,`k:num`; `20`,`l:num`] READ_BYTES_SPLIT_ANY) in
+  let split_16_4 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC
+                               DEPTH_CONV NUM_MULT_CONV)
+    (INST [`16`,`k:num`; `4`,`l:num`] READ_BYTES_SPLIT_ANY) in
+  STRIP_TAC THEN
+  REWRITE_TAC[BYTES128_WBYTES; BYTES32_WBYTES; GSYM VAL_EQ;
+              VAL_READ_WBYTES; READ_COMPONENT_COMPOSE] THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+  REWRITE_TAC[split_16_20] THEN REWRITE_TAC[split_16_4] THEN
+  REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC NUM_REDUCE_CONV THEN
+  REWRITE_TAC[DIV_DIV; GSYM EXP_ADD] THEN CONV_TAC NUM_REDUCE_CONV THEN
+  IMP_REWRITE_TAC[VAL_WORD_EXACT] THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN ASM_ARITH_TAC);;
+let WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D18 = mk_subword_cases 18 16;;
+
+(* Rewrite overlap expression to word_subword of full 288-bit word *)
+let OVERLAP_SIMP_D18 = prove
+ (`word(((t DIV 2 EXP 128) MOD 2 EXP 128) DIV 2 EXP 32 +
+        (t DIV 2 EXP 256) * 2 EXP 96) : int128 =
+   word_subword (word t : 288 word) (160, 128)`,
+  REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD] THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+  REWRITE_TAC[DIV_MOD; MOD_MOD_EXP_MIN; GSYM EXP_ADD; DIV_DIV] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN REWRITE_TAC[MOD_MOD_REFL] THEN
+  REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV
+    (SPECL [`t:num`; `1461501637330902918203684832716283019655932542976`;
+            `340282366920938463463374607431768211456`] DIV_MOD))] THEN
+  REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV
+    (SPECL [`t:num`; `1461501637330902918203684832716283019655932542976`;
+            `79228162514264337593543950336`] DIV_MOD))] THEN
+  REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV
+    (SPECL [`t:num`; `1461501637330902918203684832716283019655932542976`;
+            `79228162514264337593543950336`] DIV_DIV))] THEN
+  ONCE_REWRITE_TAC[MULT_SYM] THEN
+  REWRITE_TAC[ONCE_REWRITE_RULE[ADD_SYM] DIVISION_SIMP]);;
+
+(* For D=18: bytes128@16 + bytes32@32 -> bytes128@20 *)
+let BYTES128_FROM_OVERLAP_32 = prove
+ (`m16 < 2 EXP 128 /\
+   read (memory :> bytes128 (word_add a (word 16))) (s:armstate) = (word m16 : int128) /\
+   read (memory :> bytes32 (word_add a (word 32))) (s:armstate) = (word m32 : int32)
+   ==> read (memory :> bytes128 (word_add a (word 20))) s =
+       (word(m16 DIV 2 EXP 32 + m32 * 2 EXP 96) : int128)`,
+  REWRITE_TAC[GSYM VAL_EQ; BYTES128_WBYTES; BYTES32_WBYTES; READ_COMPONENT_COMPOSE;
+              VAL_READ_WBYTES; VAL_WORD] THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+  ABBREV_TAC `m = read memory (s:armstate)` THEN
+  ONCE_REWRITE_TAC[split_k_l_at 16 4 12] THEN
+  ONCE_REWRITE_TAC[split_k_l_at 20 12 4] THEN
+  REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN
+  CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN
+  CONV_TAC NUM_REDUCE_CONV THEN STRIP_TAC THEN ASM_REWRITE_TAC[] THEN
+  ASM_SIMP_TAC[MOD_LT] THEN
+  SUBGOAL_THEN `m16 DIV 4294967296 < 79228162514264337593543950336` ASSUME_TAC
+    THENL [ASM_SIMP_TAC[RDIV_LT_EQ; ARITH_EQ] THEN ASM_ARITH_TAC; ALL_TAC] THEN
+  SUBGOAL_THEN `m32 MOD 4294967296 < 4294967296` ASSUME_TAC
+    THENL [REWRITE_TAC[MOD_LT_EQ] THEN ARITH_TAC; ALL_TAC] THEN
+  SUBGOAL_THEN `m16 DIV 340282366920938463463374607431768211456 = 0` ASSUME_TAC
+    THENL [MATCH_MP_TAC DIV_LT THEN ASM_REWRITE_TAC[]; ALL_TAC] THEN
+  REWRITE_TAC[ARITH_RULE `340282366920938463463374607431768211456 =
+    79228162514264337593543950336 * 4294967296`] THEN
+  CONJ_TAC THENL [
+    REWRITE_TAC[MOD_MOD; MOD_MULT_ADD] THEN ASM_SIMP_TAC[MOD_LT];
+    REWRITE_TAC[GSYM DIV_MOD] THEN SIMP_TAC[DIV_MULT_ADD; ARITH_EQ] THEN
+    REWRITE_TAC[CONV_RULE NUM_REDUCE_CONV
+      (SPECL [`m16:num`; `4294967296`; `79228162514264337593543950336`] DIV_DIV)] THEN
+    ASM_REWRITE_TAC[ADD_CLAUSES; MOD_MULT_ADD] THEN ASM_SIMP_TAC[MOD_LT]]);;
+
+(* D=18 overlap tactic: derives bytes128@20, discharges bound, rewrites to word_subword *)
+let BOUND_REORDER = TAUT `(p /\ q ==> r) <=> (q ==> p ==> r)`;;
+
+let DERIVE_OVERLAP_TAC_D18 i =
+  let chunk = 36 in
+  let off16 = chunk*i + 16 and off32 = chunk*i + 32 in
+  let w16 = mk_small_numeral off16 and w32 = mk_small_numeral off32 in
+  let has t th = can (find_term ((=) t)) (concl th) in
+  let a_val = mk_comb(mk_comb(`word_add:int64->int64->int64`, `b:int64`),
+    mk_comb(`word:num->int64`, mk_small_numeral (chunk * i))) in
+  let inst = INST [a_val, `a:int64`; `s0:armstate`, `s:armstate`] BYTES128_FROM_OVERLAP_32 in
+  let thm = CONV_RULE (ONCE_DEPTH_CONV(GEN_REWRITE_CONV I [WORD_ADD_ASSOC_CONSTS]) THENC
+             DEPTH_CONV NUM_ADD_CONV) inst in
+  let thm_reord = CONV_RULE (REWR_CONV BOUND_REORDER) thm in
+  FIRST_ASSUM(fun th128 ->
+    if not(has w16 th128 && has `bytes128` th128 && has `b:int64` th128) then failwith "" else
+    FIRST_ASSUM(fun th32 ->
+      if not(has w32 th32 && has `bytes32` th32 && has `b:int64` th32) then failwith "" else
+      let matched = MATCH_MP thm_reord (CONJ th128 th32) in
+      MP_TAC matched THEN
+      ANTS_TAC THENL [REWRITE_TAC[MOD_LT_EQ; EXP_EQ_0; ARITH_EQ] THEN ARITH_TAC;
+                       DISCH_THEN(ASSUME_TAC o
+                         CONV_RULE(RAND_CONV(REWRITE_CONV [OVERLAP_SIMP_D18])))]));;
+
+(* ------------------------------------------------------------------------- *)
+(* Core correctness theorem                                                  *)
+(* ------------------------------------------------------------------------- *)
+
+let MLDSA_POLYZ_UNPACK_17_CORRECT = prove
+ (`!r b t l pc.
+        LENGTH l = 256 /\
+        nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_17_mc) (r,1024) /\
+        nonoverlapping (b,576) (r,1024)
+        ==> ensures arm
+             (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_17_mc /\
+                  read PC s = word (pc + MLDSA_POLYZ_UNPACK_17_CORE_START) /\
+                  C_ARGUMENTS [r; b; t] s /\
+                  read(memory :> bytes(t,64)) s =
+                    num_of_wordlist mldsa_polyz_unpack_17_indices /\
+                  read(memory :> bytes(b,576)) s = num_of_wordlist l)
+             (\s. read PC s = word(pc + MLDSA_POLYZ_UNPACK_17_CORE_END) /\
+                  read(memory :> bytes(r,1024)) s =
+                       num_of_wordlist (MAP zunpack17 l))
+             (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
+              MAYCHANGE [memory :> bytes(r,1024)])`,
+  CONV_TAC LENGTH_SIMPLIFY_CONV THEN
+  MAP_EVERY X_GEN_TAC [`r:int64`; `b:int64`; `t:int64`;
+                        `l:(18 word) list`; `pc:num`] THEN
+  REWRITE_TAC[C_ARGUMENTS; MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;
+              NONOVERLAPPING_CLAUSES] THEN
+  DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN
+
+  (*** Ghost Q28: constructed via two INS_GEN partial writes ***)
+  GHOST_INTRO_TAC `q28_init:int128` `read Q28` THEN
+
+  ENSURES_INIT_TAC "s0" THEN
+
+  (*** Expand table precondition into 4 x bytes128 reads ***)
+  FIRST_X_ASSUM(MP_TAC o check (can (term_match []
+    `read(memory :> bytes(t:int64,64)) s = x`) o concl)) THEN
+  REWRITE_TAC[mldsa_polyz_unpack_17_indices] THEN
+  REPLICATE_TAC 4
+   (GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV)
+         [GSYM NUM_OF_PAIR_WORDLIST]) THEN
+  REWRITE_TAC[pair_wordlist] THEN
+  CONV_TAC WORD_REDUCE_CONV THEN
+  CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN
+  REWRITE_TAC[GSYM BYTES128_WBYTES] THEN
+  STRIP_TAC THEN
+
+  (*** Split 256 18-bit coefficients into 16 chunks of 16 as 288-bit words ***)
+  UNDISCH_TAC `read(memory :> bytes(b,576)) s0 = num_of_wordlist(l:(18 word) list)` THEN
+  IMP_REWRITE_TAC [NUM_OF_WORDLIST_SPLIT_18_256] THEN
+  CONV_TAC (ONCE_DEPTH_CONV LIST_OF_SEQ_CONV) THEN
+  REWRITE_TAC [MAP; o_DEF] THEN
+  CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN
+
+  (*** Split each 288-bit wbytes into bytes128 + bytes128 + bytes32 ***)
+  IMP_REWRITE_TAC [READ_MEMORY_WBYTES_SPLIT_128_128_32] THEN
+  MAP_EVERY (fun n -> SUBGOAL_THEN (subst[mk_small_numeral n,`k:num`]
+    `num_of_wordlist (SUB_LIST (16 * k,16) (l : (18 word) list)) < 2 EXP 288`)
+     (fun th -> REWRITE_TAC[th]) THENL [
+       TRANS_TAC LTE_TRANS (subst[mk_small_numeral n,`k:num`]
+                            `2 EXP (dimindex(:18) * LENGTH(SUB_LIST(16*k,16) (l : (18 word) list)))`) THEN
+       REWRITE_TAC[NUM_OF_WORDLIST_BOUND] THEN
+       REWRITE_TAC[LENGTH_SUB_LIST; DIMINDEX_CONV `dimindex (:18)`] THEN
+       ASM_SIMP_TAC [] THEN NUM_REDUCE_TAC;
+       ALL_TAC]) (0--15) THEN
+  REWRITE_TAC [WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC (TOP_SWEEP_CONV NUM_ADD_CONV) THEN
+  STRIP_TAC THEN
+
+  (*** Derive overlapping bytes128 reads at offset 20 for each chunk ***)
+  MAP_EVERY DERIVE_OVERLAP_TAC_D18 (0--15) THEN
+
+  (*** Gather LENGTH assumptions for sublists ***)
+  MAP_EVERY (fun i -> SUBGOAL_THEN
+    (subst [mk_small_numeral (16 * i), `i: num`]
+      `LENGTH (SUB_LIST (i, 16) (l : (18 word) list)) = 16`) ASSUME_TAC
+    THENL [ASM_REWRITE_TAC [LENGTH_SUB_LIST] THEN NUM_REDUCE_TAC; ALL_TAC])
+    (0 -- 15) THEN
+
+  (*** Symbolic execution with per-step simplification ***)
+  MAP_UNTIL_TARGET_PC (fun n ->
+    ARM_STEPS_TAC MLDSA_POLYZ_UNPACK_17_EXEC [n] THEN
+    SIMD_SIMPLIFY_TAC (map GSYM BASE_SIMPS_D18) THEN
+    SIMP_ZUNPACK_TAC 18 ZUNPACK17_CORRECT) 1 THEN
+
+  ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN
+
+  (*** Fold output back to MAP zunpack17 l ***)
+  REPEAT (FIRST_X_ASSUM(MP_TAC o check
+     (can (term_match [] `read (memory :> bytes128 r) s0 = xxx`) o concl))) THEN
+  TRY (IMP_REWRITE_TAC WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D18) THEN
+  UNDISCH_THEN `LENGTH (l : (18 word) list) = 256`
+    (fun th -> CONV_TAC (TOP_SWEEP_CONV (EL_SUB_LIST_CONV th)) THEN ASSUME_TAC th) THEN
+  REPEAT DISCH_TAC THEN
+  GEN_REWRITE_TAC (RAND_CONV o RAND_CONV o RAND_CONV) [GSYM LIST_OF_SEQ_EQ_SELF] THEN
+  ASM_REWRITE_TAC[LENGTH_MAP] THEN
+  CONV_TAC (TOP_SWEEP_CONV LIST_OF_SEQ_CONV) THEN
+  ASM_REWRITE_TAC [MAP] THEN
+  REPLICATE_TAC 2 (CONV_TAC (ONCE_REWRITE_CONV [GSYM NUM_OF_PAIR_WORDLIST])) THEN
+  REWRITE_TAC[pair_wordlist] THEN
+  CONV_TAC (ONCE_DEPTH_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN
+  ASM_REWRITE_TAC[GSYM BYTES128_WBYTES]);;
+
+(* ------------------------------------------------------------------------- *)
+(* Subroutine correctness                                                    *)
+(* ------------------------------------------------------------------------- *)
+
+(* NOTE: This must be kept in sync with the CBMC specification
+ * in mldsa/src/native/aarch64/src/arith_native_aarch64.h *)
+
+let MLDSA_POLYZ_UNPACK_17_SUBROUTINE_CORRECT = prove
+ (`!r b t l pc returnaddress.
+        LENGTH l = 256 /\
+        nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_17_mc) (r,1024) /\
+        nonoverlapping (b,576) (r,1024)
+        ==> ensures arm
+             (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_17_mc /\
+                  read PC s = word pc /\
+                  read X30 s = returnaddress /\
+                  C_ARGUMENTS [r; b; t] s /\
+                  read(memory :> bytes(t,64)) s =
+                    num_of_wordlist mldsa_polyz_unpack_17_indices /\
+                  read(memory :> bytes(b,576)) s = num_of_wordlist l)
+             (\s. read PC s = returnaddress /\
+                  read(memory :> bytes(r,1024)) s =
+                       num_of_wordlist (MAP zunpack17 l) /\
+                  (!i. i < 256 ==>
+                       --(&(2 EXP 17) - &1) <= ival(EL i (MAP zunpack17 l)) /\
+                       ival(EL i (MAP zunpack17 l)) <= &(2 EXP 17)))
+             (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
+              MAYCHANGE [memory :> bytes(r,1024)])`,
+  CONV_TAC LENGTH_SIMPLIFY_CONV THEN
+  ARM_ADD_RETURN_NOSTACK_TAC MLDSA_POLYZ_UNPACK_17_EXEC
+   (CONV_RULE LENGTH_SIMPLIFY_CONV MLDSA_POLYZ_UNPACK_17_CORRECT) THEN
+  REPEAT STRIP_TAC THEN
+  MP_TAC(CONV_RULE NUM_REDUCE_CONV
+    (ISPECL [`l:(18 word) list`; `i:num`] ZUNPACK17_MAP_BOUND)) THEN
+  ASM_REWRITE_TAC[] THEN SIMP_TAC[]);;
diff --git a/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml
new file mode 100644
index 000000000..afa25f82d
--- /dev/null
+++ b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml
@@ -0,0 +1,255 @@
+(*
+ * Copyright (c) The mldsa-native project authors
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0
+ *)
+
+(* ========================================================================= *)
+(* Functional correctness of polyz_unpack_19:                                *)
+(* Unpack polynomial z with 20-bit packed coefficients (GAMMA1 = 2^19)       *)
+(* Maps packed [0, 2^20-1] to signed [-(2^19-1), 2^19] via GAMMA1 - x       *)
+(* ========================================================================= *)
+
+needs "arm/proofs/base.ml";;
+needs "aarch64/proofs/aarch64_utils.ml";;
+needs "aarch64/proofs/mldsa_polyz_unpack_consts.ml";;
+
+(**** print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_19.o";;
+ ****)
+
+let mldsa_polyz_unpack_19_mc = define_assert_from_elf
+  "mldsa_polyz_unpack_19_mc" "aarch64/mldsa/mldsa_polyz_unpack_19.o"
+(*** BYTECODE START ***)
+[
+  0x3dc00058;       (* arm_LDR Q24 X2 (Immediate_Offset (word 0)) *)
+  0x3dc00459;       (* arm_LDR Q25 X2 (Immediate_Offset (word 16)) *)
+  0x3dc0085a;       (* arm_LDR Q26 X2 (Immediate_Offset (word 32)) *)
+  0x3dc00c5b;       (* arm_LDR Q27 X2 (Immediate_Offset (word 48)) *)
+  0xd2c01f83;       (* arm_MOVZ X3 (word 252) 32 *)
+  0x4e080c7c;       (* arm_DUP_GEN Q28 X3 64 128 *)
+  0x4f00d5fd;       (* arm_MOVI Q29 (word 4503595333451775) *)
+  0x4f00451e;       (* arm_MOVI Q30 (word 2251799814209536) *)
+  0xd2800209;       (* arm_MOV X9 (rvalue (word 16)) *)
+  0x4c40a020;       (* arm_LDP Q0 Q1 X1 No_Offset *)
+  0x91006021;       (* arm_ADD X1 X1 (rvalue (word 24)) *)
+  0x4cdf7022;       (* arm_LDR Q2 X1 (Postimmediate_Offset (word 16)) *)
+  0x4e180004;       (* arm_TBL Q4 [Q0] Q24 128 *)
+  0x4e192005;       (* arm_TBL2 Q5 Q0 Q1 Q25 128 *)
+  0x4e1a0026;       (* arm_TBL Q6 [Q1] Q26 128 *)
+  0x4e1b2027;       (* arm_TBL2 Q7 Q1 Q2 Q27 128 *)
+  0x6ebc4484;       (* arm_USHL_VEC Q4 Q4 Q28 32 128 *)
+  0x4e3d1c84;       (* arm_AND_VEC Q4 Q4 Q29 128 *)
+  0x6ea487c4;       (* arm_SUB_VEC Q4 Q30 Q4 32 128 *)
+  0x6ebc44a5;       (* arm_USHL_VEC Q5 Q5 Q28 32 128 *)
+  0x4e3d1ca5;       (* arm_AND_VEC Q5 Q5 Q29 128 *)
+  0x6ea587c5;       (* arm_SUB_VEC Q5 Q30 Q5 32 128 *)
+  0x6ebc44c6;       (* arm_USHL_VEC Q6 Q6 Q28 32 128 *)
+  0x4e3d1cc6;       (* arm_AND_VEC Q6 Q6 Q29 128 *)
+  0x6ea687c6;       (* arm_SUB_VEC Q6 Q30 Q6 32 128 *)
+  0x6ebc44e7;       (* arm_USHL_VEC Q7 Q7 Q28 32 128 *)
+  0x4e3d1ce7;       (* arm_AND_VEC Q7 Q7 Q29 128 *)
+  0x6ea787c7;       (* arm_SUB_VEC Q7 Q30 Q7 32 128 *)
+  0x3d800405;       (* arm_STR Q5 X0 (Immediate_Offset (word 16)) *)
+  0x3d800806;       (* arm_STR Q6 X0 (Immediate_Offset (word 32)) *)
+  0x3d800c07;       (* arm_STR Q7 X0 (Immediate_Offset (word 48)) *)
+  0x3c840404;       (* arm_STR Q4 X0 (Postimmediate_Offset (word 64)) *)
+  0xf1000529;       (* arm_SUBS X9 X9 (rvalue (word 1)) *)
+  0x54fffd01;       (* arm_BNE (word 2097056) *)
+  0xd65f03c0        (* arm_RET X30 *)
+];;
+(*** BYTECODE END ***)
+
+let MLDSA_POLYZ_UNPACK_19_EXEC = ARM_MK_EXEC_RULE mldsa_polyz_unpack_19_mc;;
+
+(* ------------------------------------------------------------------------- *)
+(* Code length constants                                                     *)
+(* ------------------------------------------------------------------------- *)
+
+let LENGTH_MLDSA_POLYZ_UNPACK_19_MC =
+  REWRITE_CONV[mldsa_polyz_unpack_19_mc] `LENGTH mldsa_polyz_unpack_19_mc`
+  |> CONV_RULE (RAND_CONV LENGTH_CONV);;
+
+let MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH = new_definition
+  `MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH = 0`;;
+
+let MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH = new_definition
+  `MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH = 4`;;
+
+let MLDSA_POLYZ_UNPACK_19_CORE_START = new_definition
+  `MLDSA_POLYZ_UNPACK_19_CORE_START = MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH`;;
+
+let MLDSA_POLYZ_UNPACK_19_CORE_END = new_definition
+  `MLDSA_POLYZ_UNPACK_19_CORE_END =
+     LENGTH mldsa_polyz_unpack_19_mc - MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH`;;
+
+let LENGTH_SIMPLIFY_CONV_19 =
+  REWRITE_CONV[LENGTH_MLDSA_POLYZ_UNPACK_19_MC;
+              MLDSA_POLYZ_UNPACK_19_CORE_START; MLDSA_POLYZ_UNPACK_19_CORE_END;
+              MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH;
+              MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH] THENC
+  NUM_REDUCE_CONV THENC REWRITE_CONV [ADD_0];;
+
+(* ------------------------------------------------------------------------- *)
+(* D=20 instantiations for SIMD infrastructure                               *)
+(* ------------------------------------------------------------------------- *)
+
+let BASE_SIMPS_D20 = mk_base_simps 20;;
+let NUM_OF_WORDLIST_SPLIT_20_256 = mk_split_theorem 20 256 16;;
+let READ_MEMORY_WBYTES_SPLIT_128_128_64 = prove
+ (`t < 2 EXP 320
+    ==> (read (memory :> wbytes a) (s:armstate) = (word t : 320 word) <=>
+         read (memory :> bytes128 a) s = (word (t MOD 2 EXP 128) : int128) /\
+         read (memory :> bytes128 (word_add a (word 16))) s =
+         (word ((t DIV 2 EXP 128) MOD 2 EXP 128) : int128) /\
+         read (memory :> bytes64 (word_add a (word 32))) s =
+         (word (t DIV 2 EXP 256) : int64))`,
+  let split_16_24 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC
+                                DEPTH_CONV NUM_MULT_CONV)
+    (INST [`16`,`k:num`; `24`,`l:num`] READ_BYTES_SPLIT_ANY) in
+  let split_16_8 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC
+                               DEPTH_CONV NUM_MULT_CONV)
+    (INST [`16`,`k:num`; `8`,`l:num`] READ_BYTES_SPLIT_ANY) in
+  STRIP_TAC THEN
+  REWRITE_TAC[BYTES128_WBYTES; BYTES64_WBYTES; GSYM VAL_EQ;
+              VAL_READ_WBYTES; READ_COMPONENT_COMPOSE] THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+  REWRITE_TAC[split_16_24] THEN REWRITE_TAC[split_16_8] THEN
+  REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC NUM_REDUCE_CONV THEN
+  REWRITE_TAC[DIV_DIV; GSYM EXP_ADD] THEN CONV_TAC NUM_REDUCE_CONV THEN
+  IMP_REWRITE_TAC[VAL_WORD_EXACT] THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN ASM_ARITH_TAC);;
+let WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D20 = mk_subword_cases 20 16;;
+
+(* ------------------------------------------------------------------------- *)
+(* Core correctness theorem                                                  *)
+(* ------------------------------------------------------------------------- *)
+
+let MLDSA_POLYZ_UNPACK_19_CORRECT = prove
+ (`!r b t l pc.
+        LENGTH l = 256 /\
+        nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_19_mc) (r,1024) /\
+        nonoverlapping (b,640) (r,1024)
+        ==> ensures arm
+             (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_19_mc /\
+                  read PC s = word (pc + MLDSA_POLYZ_UNPACK_19_CORE_START) /\
+                  C_ARGUMENTS [r; b; t] s /\
+                  read(memory :> bytes(t,64)) s =
+                    num_of_wordlist mldsa_polyz_unpack_19_indices /\
+                  read(memory :> bytes(b,640)) s = num_of_wordlist l)
+             (\s. read PC s = word(pc + MLDSA_POLYZ_UNPACK_19_CORE_END) /\
+                  read(memory :> bytes(r,1024)) s =
+                       num_of_wordlist (MAP zunpack19 l))
+             (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
+              MAYCHANGE [memory :> bytes(r,1024)])`,
+  CONV_TAC LENGTH_SIMPLIFY_CONV_19 THEN
+  MAP_EVERY X_GEN_TAC [`r:int64`; `b:int64`; `t:int64`;
+                        `l:(20 word) list`; `pc:num`] THEN
+  REWRITE_TAC[C_ARGUMENTS; MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI;
+              NONOVERLAPPING_CLAUSES] THEN
+  DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN
+
+  ENSURES_INIT_TAC "s0" THEN
+
+  (*** Expand table precondition into 4 x bytes128 reads ***)
+  FIRST_X_ASSUM(MP_TAC o check (can (term_match []
+    `read(memory :> bytes(t:int64,64)) s = x`) o concl)) THEN
+  REWRITE_TAC[mldsa_polyz_unpack_19_indices] THEN
+  REPLICATE_TAC 4
+   (GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV)
+         [GSYM NUM_OF_PAIR_WORDLIST]) THEN
+  REWRITE_TAC[pair_wordlist] THEN
+  CONV_TAC WORD_REDUCE_CONV THEN
+  CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN
+  REWRITE_TAC[GSYM BYTES128_WBYTES] THEN
+  STRIP_TAC THEN
+
+  (*** Split 256 20-bit coefficients into 16 chunks of 16 as 320-bit words ***)
+  UNDISCH_TAC `read(memory :> bytes(b,640)) s0 = num_of_wordlist(l:(20 word) list)` THEN
+  IMP_REWRITE_TAC [NUM_OF_WORDLIST_SPLIT_20_256] THEN
+  CONV_TAC (ONCE_DEPTH_CONV LIST_OF_SEQ_CONV) THEN
+  REWRITE_TAC [MAP; o_DEF] THEN
+  CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN
+
+  (*** Split each 320-bit wbytes into bytes128 + bytes128 + bytes64 ***)
+  IMP_REWRITE_TAC [READ_MEMORY_WBYTES_SPLIT_128_128_64] THEN
+  MAP_EVERY (fun n -> SUBGOAL_THEN (subst[mk_small_numeral n,`k:num`]
+    `num_of_wordlist (SUB_LIST (16 * k,16) (l : (20 word) list)) < 2 EXP 320`)
+     (fun th -> REWRITE_TAC[th]) THENL [
+       TRANS_TAC LTE_TRANS (subst[mk_small_numeral n,`k:num`]
+                            `2 EXP (dimindex(:20) * LENGTH(SUB_LIST(16*k,16) (l : (20 word) list)))`) THEN
+       REWRITE_TAC[NUM_OF_WORDLIST_BOUND] THEN
+       REWRITE_TAC[LENGTH_SUB_LIST; DIMINDEX_CONV `dimindex (:20)`] THEN
+       ASM_SIMP_TAC [] THEN NUM_REDUCE_TAC;
+       ALL_TAC]) (0--15) THEN
+  REWRITE_TAC [WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC (TOP_SWEEP_CONV NUM_ADD_CONV) THEN
+  STRIP_TAC THEN
+
+  (*** Derive overlapping bytes128 reads at offset 24 for each chunk ***)
+  MAP_EVERY (DERIVE_OVERLAP_TAC BYTES128_FROM_OVERLAP_64 40) (0--15) THEN
+
+  (*** Gather LENGTH assumptions for sublists ***)
+  MAP_EVERY (fun i -> SUBGOAL_THEN
+    (subst [mk_small_numeral (16 * i), `i: num`]
+      `LENGTH (SUB_LIST (i, 16) (l : (20 word) list)) = 16`) ASSUME_TAC
+    THENL [ASM_REWRITE_TAC [LENGTH_SUB_LIST] THEN NUM_REDUCE_TAC; ALL_TAC])
+    (0 -- 15) THEN
+
+  (*** Symbolic execution with per-step simplification ***)
+  MAP_UNTIL_TARGET_PC (fun n ->
+    ARM_STEPS_TAC MLDSA_POLYZ_UNPACK_19_EXEC [n] THEN
+    SIMD_SIMPLIFY_TAC (map GSYM BASE_SIMPS_D20) THEN
+    SIMP_ZUNPACK_TAC 20 ZUNPACK19_CORRECT) 1 THEN
+
+  ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN
+
+  (*** Fold output back to MAP zunpack19 l ***)
+  REPEAT (FIRST_X_ASSUM(MP_TAC o check
+     (can (term_match [] `read (memory :> bytes128 r) s0 = xxx`) o concl))) THEN
+  TRY (IMP_REWRITE_TAC WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D20) THEN
+  UNDISCH_THEN `LENGTH (l : (20 word) list) = 256`
+    (fun th -> CONV_TAC (TOP_SWEEP_CONV (EL_SUB_LIST_CONV th)) THEN ASSUME_TAC th) THEN
+  REPEAT DISCH_TAC THEN
+  GEN_REWRITE_TAC (RAND_CONV o RAND_CONV o RAND_CONV) [GSYM LIST_OF_SEQ_EQ_SELF] THEN
+  ASM_REWRITE_TAC[LENGTH_MAP] THEN
+  CONV_TAC (TOP_SWEEP_CONV LIST_OF_SEQ_CONV) THEN
+  ASM_REWRITE_TAC [MAP] THEN
+  REPLICATE_TAC 2 (CONV_TAC (ONCE_REWRITE_CONV [GSYM NUM_OF_PAIR_WORDLIST])) THEN
+  REWRITE_TAC[pair_wordlist] THEN
+  CONV_TAC (ONCE_DEPTH_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN
+  ASM_REWRITE_TAC[GSYM BYTES128_WBYTES]);;
+
+(* ------------------------------------------------------------------------- *)
+(* Subroutine correctness                                                    *)
+(* ------------------------------------------------------------------------- *)
+
+(* NOTE: This must be kept in sync with the CBMC specification
+ * in mldsa/src/native/aarch64/src/arith_native_aarch64.h *)
+
+let MLDSA_POLYZ_UNPACK_19_SUBROUTINE_CORRECT = prove
+ (`!r b t l pc returnaddress.
+        LENGTH l = 256 /\
+        nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_19_mc) (r,1024) /\
+        nonoverlapping (b,640) (r,1024)
+        ==> ensures arm
+             (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_19_mc /\
+                  read PC s = word pc /\
+                  read X30 s = returnaddress /\
+                  C_ARGUMENTS [r; b; t] s /\
+                  read(memory :> bytes(t,64)) s =
+                    num_of_wordlist mldsa_polyz_unpack_19_indices /\
+                  read(memory :> bytes(b,640)) s = num_of_wordlist l)
+             (\s. read PC s = returnaddress /\
+                  read(memory :> bytes(r,1024)) s =
+                       num_of_wordlist (MAP zunpack19 l) /\
+                  (!i. i < 256 ==>
+                       --(&(2 EXP 19) - &1) <= ival(EL i (MAP zunpack19 l)) /\
+                       ival(EL i (MAP zunpack19 l)) <= &(2 EXP 19)))
+             (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,,
+              MAYCHANGE [memory :> bytes(r,1024)])`,
+  CONV_TAC LENGTH_SIMPLIFY_CONV_19 THEN
+  ARM_ADD_RETURN_NOSTACK_TAC MLDSA_POLYZ_UNPACK_19_EXEC
+   (CONV_RULE LENGTH_SIMPLIFY_CONV_19 MLDSA_POLYZ_UNPACK_19_CORRECT) THEN
+  REPEAT STRIP_TAC THEN
+  MP_TAC(CONV_RULE NUM_REDUCE_CONV
+    (ISPECL [`l:(20 word) list`; `i:num`] ZUNPACK19_MAP_BOUND)) THEN
+  ASM_REWRITE_TAC[] THEN SIMP_TAC[]);;
diff --git a/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml
new file mode 100644
index 000000000..a5a02e2c7
--- /dev/null
+++ b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml
@@ -0,0 +1,39 @@
+(*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0
+ *)
+
+(*
+ * WARNING: This file is auto-generated from scripts/autogen
+ *          in the mldsa-native repository.
+ *          Do not modify it directly.
+ *)
+
+(*
+ * TBL index tables for polyz_unpack_{17,19}
+ * See autogen for details.
+ *)
+
+let mldsa_polyz_unpack_17_indices = (REWRITE_RULE[MAP] o define)
+  `mldsa_polyz_unpack_17_indices:byte list = MAP word [
+    0;   1;   2; 255;   2;   3;   4; 255;
+    4;   5;   6; 255;   6;   7;   8; 255;
+    9;  10;  11; 255;  11;  12;  13; 255;
+   13;  14;  15; 255;  15;  16;  17; 255;
+    2;   3;   4; 255;   4;   5;   6; 255;
+    6;   7;   8; 255;   8;   9;  10; 255;
+   11;  12;  13; 255;  13;  14;  15; 255;
+   15;  28;  29; 255;  29;  30;  31; 255
+]`;;
+
+let mldsa_polyz_unpack_19_indices = (REWRITE_RULE[MAP] o define)
+  `mldsa_polyz_unpack_19_indices:byte list = MAP word [
+    0;   1;   2; 255;   2;   3;   4; 255;
+    5;   6;   7; 255;   7;   8;   9; 255;
+   10;  11;  12; 255;  12;  13;  14; 255;
+   15;  16;  17; 255;  17;  18;  19; 255;
+    4;   5;   6; 255;   6;   7;   8; 255;
+    9;  10;  11; 255;  11;  12;  13; 255;
+   14;  15;  24; 255;  24;  25;  26; 255;
+   27;  28;  29; 255;  29;  30;  31; 255
+]`;;
diff --git a/proofs/hol_light/common/mldsa_specs.ml b/proofs/hol_light/common/mldsa_specs.ml
index 661c42f33..e39a17a6b 100644
--- a/proofs/hol_light/common/mldsa_specs.ml
+++ b/proofs/hol_light/common/mldsa_specs.ml
@@ -843,3 +843,315 @@ let SIMD_SIMPLIFY_ABBREV_TAC =
       let tms = sort free_in (find_terms pam (rand(concl th''))) in
       (MP_TAC th'' THEN MAP_EVERY AUTO_ABBREV_TAC tms THEN DISCH_TAC) (asl,w) in
   TRY(FIRST_X_ASSUM(ttac o check (simdable o concl)));;
+
+
+(* ========================================================================= *)
+(* zunpack: gamma1 - x unpacking for polyz                                   *)
+(*                                                                           *)
+(* zunpack_d maps a d-bit packed coefficient x in [0, 2^d - 1] to           *)
+(* gamma1 - x in [-(gamma1-1), gamma1], where gamma1 = 2^(d-1).             *)
+(* ========================================================================= *)
+
+(* --- zunpack17: GAMMA1 = 2^17, 18-bit packed coefficients --- *)
+
+let zunpack17 = new_definition
+ `zunpack17 (x:(18)word) : (32)word =
+  word_sub (word(2 EXP 17)) (word_zx x)`;;
+
+let ZUNPACK17_CORRECT = prove(
+  `!x:(18)word.
+    word_sub (word 131072 : 32 word)
+             (word_zx (x : 18 word) : 32 word) = zunpack17 x`,
+  REWRITE_TAC[zunpack17] THEN CONV_TAC NUM_REDUCE_CONV);;
+
+let ZUNPACK17_IVAL = prove(
+ `!x:(18)word. ival(zunpack17 x) = &(2 EXP 17) - &(val x)`,
+  GEN_TAC THEN REWRITE_TAC[zunpack17] THEN
+  SUBGOAL_THEN `word_zx (x:18 word) : 32 word = word(val x)` SUBST1_TAC THENL
+  [REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_ZX_GEN; VAL_WORD] THEN
+   CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+   REWRITE_TAC[MOD_MOD_EXP_MIN] THEN CONV_TAC NUM_REDUCE_CONV THEN
+   MP_TAC(ISPEC `x:18 word` VAL_BOUND) THEN
+   CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN SIMP_TAC[MOD_LT];
+   ALL_TAC] THEN
+  ONCE_REWRITE_TAC[WORD_IWORD] THEN
+  REWRITE_TAC[GSYM IWORD_INT_SUB] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN
+  MATCH_MP_TAC IVAL_IWORD THEN REWRITE_TAC[DIMINDEX_32] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN
+  MP_TAC(ISPEC `x:18 word` VAL_BOUND) THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN
+  REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);;
+
+let ZUNPACK17_BOUND = prove(
+ `!x:(18)word. --(&(2 EXP 17) - &1) <= ival(zunpack17 x) /\
+               ival(zunpack17 x) <= &(2 EXP 17)`,
+  GEN_TAC THEN REWRITE_TAC[ZUNPACK17_IVAL] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN
+  MP_TAC(ISPEC `x:18 word` VAL_BOUND) THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN
+  REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);;
+
+let ZUNPACK17_MAP_BOUND = prove(
+ `!l:(18 word) list. !i. i < LENGTH l ==>
+    --(&(2 EXP 17) - &1) <= ival(EL i (MAP zunpack17 l)) /\
+    ival(EL i (MAP zunpack17 l)) <= &(2 EXP 17)`,
+  REPEAT STRIP_TAC THEN ASM_SIMP_TAC[EL_MAP] THEN
+  REWRITE_TAC[ZUNPACK17_BOUND]);;
+
+(* --- zunpack19: GAMMA1 = 2^19, 20-bit packed coefficients --- *)
+
+let zunpack19 = new_definition
+ `zunpack19 (x:(20)word) : (32)word =
+  word_sub (word(2 EXP 19)) (word_zx x)`;;
+
+let ZUNPACK19_CORRECT = prove(
+  `!x:(20)word.
+    word_sub (word 524288 : 32 word)
+             (word_zx (x : 20 word) : 32 word) = zunpack19 x`,
+  REWRITE_TAC[zunpack19] THEN CONV_TAC NUM_REDUCE_CONV);;
+
+let ZUNPACK19_IVAL = prove(
+ `!x:(20)word. ival(zunpack19 x) = &(2 EXP 19) - &(val x)`,
+  GEN_TAC THEN REWRITE_TAC[zunpack19] THEN
+  SUBGOAL_THEN `word_zx (x:20 word) : 32 word = word(val x)` SUBST1_TAC THENL
+  [REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_ZX_GEN; VAL_WORD] THEN
+   CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN
+   REWRITE_TAC[MOD_MOD_EXP_MIN] THEN CONV_TAC NUM_REDUCE_CONV THEN
+   MP_TAC(ISPEC `x:20 word` VAL_BOUND) THEN
+   CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN SIMP_TAC[MOD_LT];
+   ALL_TAC] THEN
+  ONCE_REWRITE_TAC[WORD_IWORD] THEN
+  REWRITE_TAC[GSYM IWORD_INT_SUB] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN
+  MATCH_MP_TAC IVAL_IWORD THEN REWRITE_TAC[DIMINDEX_32] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN
+  MP_TAC(ISPEC `x:20 word` VAL_BOUND) THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN
+  REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);;
+
+let ZUNPACK19_BOUND = prove(
+ `!x:(20)word. --(&(2 EXP 19) - &1) <= ival(zunpack19 x) /\
+               ival(zunpack19 x) <= &(2 EXP 19)`,
+  GEN_TAC THEN REWRITE_TAC[ZUNPACK19_IVAL] THEN
+  CONV_TAC NUM_REDUCE_CONV THEN
+  MP_TAC(ISPEC `x:20 word` VAL_BOUND) THEN
+  CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN
+  REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);;
+
+let ZUNPACK19_MAP_BOUND = prove(
+ `!l:(20 word) list. !i. i < LENGTH l ==>
+    --(&(2 EXP 19) - &1) <= ival(EL i (MAP zunpack19 l)) /\
+    ival(EL i (MAP zunpack19 l)) <= &(2 EXP 19)`,
+  REPEAT STRIP_TAC THEN ASM_SIMP_TAC[EL_MAP] THEN
+  REWRITE_TAC[ZUNPACK19_BOUND]);;
+
+(* ========================================================================= *)
+(* Helper lemmas: list operations                                            *)
+(* ========================================================================= *)
+
+let EL_SUB_LIST = prove(
+ `!l:'a list. !i k n. i < n /\ k + n <= LENGTH l
+   ==> EL i (SUB_LIST (k, n) l) = EL (k + i) l`,
+  LIST_INDUCT_TAC THENL [
+    REWRITE_TAC[LENGTH; LE; ADD_EQ_0] THEN ARITH_TAC;
+    REWRITE_TAC[LENGTH] THEN REPEAT GEN_TAC THEN
+    STRUCT_CASES_TAC (SPEC `k:num` num_CASES) THEN
+    STRUCT_CASES_TAC (SPEC `n:num` num_CASES) THEN
+    REWRITE_TAC[LT; SUB_LIST_CLAUSES; ADD_CLAUSES] THENL [
+      STRUCT_CASES_TAC (SPEC `i:num` num_CASES) THEN
+      REWRITE_TAC[EL; HD; TL; ADD_CLAUSES] THEN STRIP_TAC THEN
+      FIRST_X_ASSUM (MP_TAC o SPECL [`n:num`; `0`; `n':num`]) THEN
+      REWRITE_TAC[ADD_CLAUSES] THEN DISCH_THEN MATCH_MP_TAC THEN ASM_ARITH_TAC;
+      REWRITE_TAC[EL; TL] THEN STRIP_TAC THEN
+      FIRST_X_ASSUM (MP_TAC o SPECL [`i:num`; `n':num`; `SUC n''`]) THEN
+      ASM_REWRITE_TAC[LT_SUC] THEN DISCH_THEN MATCH_MP_TAC THEN ASM_ARITH_TAC]]);;
+
+let EL_SUB_LIST_CONV len_thm tm =
+  let i_tm,sublist_tm = dest_comb tm in
+  let el_const,i = dest_comb i_tm in
+  let sublist_pair,ls = dest_comb sublist_tm in
+  let sublist_const,pair_tm = dest_comb sublist_pair in
+  let base,len = dest_pair pair_tm in
+  let i_num = dest_numeral i and
+      len_num = dest_numeral len in
+  if i_num >= len_num then failwith "EL_SUB_LIST_CONV: index out of bounds" else
+  let th1 = ISPECL [ls; i; base; len] EL_SUB_LIST in
+  let th2 = REWRITE_RULE[len_thm] th1 in
+  let th3 = MP th2 (EQT_ELIM(NUM_REDUCE_CONV (fst(dest_imp(concl th2))))) in
+  CONV_RULE (RAND_CONV (LAND_CONV NUM_ADD_CONV)) th3;;
+
+let LENGTH_SUB_LIST_0 = prove
+ (`!n (l:'a list). n <= LENGTH l ==> LENGTH (SUB_LIST (0, n) l) = n`,
+  REPEAT STRIP_TAC THEN REWRITE_TAC[LENGTH_SUB_LIST; SUB_0] THEN ASM_ARITH_TAC);;
+
+let SUB_LIST_SUB_LIST_0 = prove(
+ `!k n m (l:'a list). k + n <= m /\ m <= LENGTH l
+   ==> SUB_LIST (k, n) (SUB_LIST (0, m) l) = SUB_LIST (k, n) l`,
+  REPEAT STRIP_TAC THEN REWRITE_TAC[LIST_EQ; LENGTH_SUB_LIST; SUB_0] THEN
+  CONJ_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN REPEAT STRIP_TAC THEN
+  SUBGOAL_THEN `n' < n` ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN
+  MP_TAC (ISPECL [`SUB_LIST (0,m) l:'a list`; `n':num`; `k:num`; `n:num`] EL_SUB_LIST) THEN
+  MP_TAC (ISPECL [`l:'a list`; `n':num`; `k:num`; `n:num`] EL_SUB_LIST) THEN
+  ASM_REWRITE_TAC[LENGTH_SUB_LIST; SUB_0] THEN
+  SUBGOAL_THEN `k + n <= LENGTH (l:'a list)` ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN
+  SUBGOAL_THEN `k + n <= MIN m (LENGTH (l:'a list))` ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN
+  ASM_SIMP_TAC[] THEN REPEAT DISCH_TAC THEN
+  MP_TAC (ISPECL [`l:'a list`; `k + n':num`; `0`; `m:num`] EL_SUB_LIST) THEN
+  ASM_REWRITE_TAC[ADD_CLAUSES] THEN DISCH_THEN MATCH_MP_TAC THEN ASM_ARITH_TAC);;
+
+let SUB_LIST_SPLIT_EQ = prove
+ (`!n r (l:'a list). n + r = LENGTH l
+   ==> APPEND (SUB_LIST (0, n) l) (SUB_LIST (n, r) l) = l`,
+  REPEAT STRIP_TAC THEN
+  MP_TAC (ISPECL [`l:'a list`; `n:num`] SUB_LIST_TOPSPLIT) THEN
+  FIRST_X_ASSUM (SUBST1_TAC o SYM) THEN REWRITE_TAC[ADD_SUB2]);;
+
+let APPEND_ITLIST_APPEND_NIL = prove
+ (`!(l:('a list) list) (x:'a list). APPEND (ITLIST APPEND l []) x = ITLIST APPEND l x`,
+  LIST_INDUCT_TAC THEN REWRITE_TAC[ITLIST; APPEND] THEN
+  GEN_TAC THEN REWRITE_TAC[GSYM APPEND_ASSOC] THEN ASM_REWRITE_TAC[]);;
+
+let LIST_OF_SEQ_EQ = prove
+ (`!(f:num->'a) g n. (!i. i < n ==> f i = g i) ==> list_of_seq f n = list_of_seq g n`,
+  GEN_TAC THEN GEN_TAC THEN INDUCT_TAC THEN REWRITE_TAC[list_of_seq] THEN
+  DISCH_TAC THEN BINOP_TAC THENL [
+    FIRST_X_ASSUM MATCH_MP_TAC THEN GEN_TAC THEN DISCH_TAC THEN
+    FIRST_X_ASSUM MATCH_MP_TAC THEN ASM_ARITH_TAC;
+    REWRITE_TAC[CONS_11] THEN FIRST_X_ASSUM MATCH_MP_TAC THEN ARITH_TAC
+  ]);;
+
+let SUBLIST_PARTITION = prove
+ (`!r s (l:'a list). LENGTH l = r * s ==>
+       l = ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i, r) l) s) []`,
+  GEN_TAC THEN INDUCT_TAC THENL [
+    REWRITE_TAC[MULT_CLAUSES; list_of_seq; ITLIST; LENGTH_EQ_NIL];
+    REWRITE_TAC[list_of_seq; ITLIST_EXTRA; APPEND_NIL] THEN
+    GEN_TAC THEN DISCH_TAC THEN
+    SUBGOAL_THEN
+      `SUB_LIST (0, r * s) l =
+       ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i, r) (SUB_LIST (0, r * s) l)) s) []:'a list`
+      ASSUME_TAC THENL [
+      FIRST_X_ASSUM MATCH_MP_TAC THEN
+      MATCH_MP_TAC LENGTH_SUB_LIST_0 THEN ASM_ARITH_TAC;
+      ALL_TAC
+    ] THEN
+    SUBGOAL_THEN
+      `list_of_seq (\i. SUB_LIST (r * i, r) (SUB_LIST (0, r * s) l):'a list) s =
+       list_of_seq (\i. SUB_LIST (r * i, r) l) s`
+      ASSUME_TAC THENL [
+      MATCH_MP_TAC LIST_OF_SEQ_EQ THEN REPEAT STRIP_TAC THEN REWRITE_TAC[] THEN
+      MATCH_MP_TAC SUB_LIST_SUB_LIST_0 THEN CONJ_TAC THENL [
+        REWRITE_TAC[ARITH_RULE `r * i + r = r * (i + 1)`] THEN
+        REWRITE_TAC[LE_MULT_LCANCEL] THEN ASM_ARITH_TAC;
+        ASM_ARITH_TAC
+      ];
+      ALL_TAC
+    ] THEN
+    SUBGOAL_THEN
+      `APPEND (SUB_LIST (0, r * s) l) (SUB_LIST (r * s, r) l) = l:'a list`
+      ASSUME_TAC THENL [
+      MATCH_MP_TAC SUB_LIST_SPLIT_EQ THEN ASM_REWRITE_TAC[MULT_SUC] THEN ARITH_TAC;
+      ALL_TAC
+    ] THEN
+    SUBGOAL_THEN
+      `SUB_LIST (0, r * s) l =
+       ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i, r) l) s) []:'a list`
+      ASSUME_TAC THENL [ASM_MESON_TAC[]; ALL_TAC] THEN
+    UNDISCH_TAC `APPEND (SUB_LIST (0,r * s) l) (SUB_LIST (r * s,r) l) = l:'a list` THEN
+    UNDISCH_TAC `SUB_LIST (0,r * s) l = ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i,r) l) s) []:'a list` THEN
+    SIMP_TAC[APPEND_ITLIST_APPEND_NIL]
+  ]);;
+
+(* ========================================================================= *)
+(* Helper lemmas: word arithmetic                                            *)
+(* ========================================================================= *)
+
+let VAL_WORD_EXACT = prove(
+  `!n. n < 2 EXP dimindex(:N) ==> val(word n : N word) = n`,
+  REWRITE_TAC[VAL_WORD] THEN SIMP_TAC[MOD_LT]);;
+
+let WORD_PACKED_EQ = prove(
+ `!(x:N word) (y:N word).
+    dimindex(:N) = l * k /\ 0 < l /\ l <= dimindex(:M)
+    ==> (x = y <=>
+         !i. i < k
+             ==> word_subword x (l*i, l) : (M) word =
+                 word_subword y (l*i, l))`,
+  REPEAT GEN_TAC THEN STRIP_TAC THEN EQ_TAC THENL
+  [DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[];
+   DISCH_TAC THEN
+   GEN_REWRITE_TAC I [WORD_EQ_BITS_ALT] THEN
+   X_GEN_TAC `j:num` THEN DISCH_TAC THEN
+   FIRST_X_ASSUM(MP_TAC o SPEC `j DIV l`) THEN
+   ANTS_TAC THENL
+   [UNDISCH_TAC `j < dimindex(:N)` THEN ASM_REWRITE_TAC[] THEN
+    ASM_SIMP_TAC[RDIV_LT_EQ; ARITH_RULE `0 < l ==> ~(l = 0)`; MULT_SYM];
+    DISCH_THEN(fun th ->
+      MP_TAC(AP_TERM `\(w:M word). bit (j MOD l) w` th)) THEN
+    REWRITE_TAC[BIT_WORD_SUBWORD] THEN
+    SUBGOAL_THEN `j MOD l < MIN l (dimindex(:M))`
+      (fun th -> REWRITE_TAC[th]) THENL
+    [ASM_SIMP_TAC[ARITH_RULE `l <= m ==> MIN l m = l`;
+                   MOD_LT_EQ; ARITH_RULE `0 < l ==> ~(l = 0)`];
+     ASM_SIMP_TAC[DIVISION_SIMP; ARITH_RULE `0 < l ==> ~(l = 0)`]]]]);;
+
+let WORD_SUBWORD_NUM_OF_WORDLIST = prove
+ (`!(ls:(L word)list) k.
+    dimindex(:KL) = dimindex(:L) * LENGTH ls /\
+    k < LENGTH ls
+    ==> word_subword (word (num_of_wordlist ls) : KL word) (dimindex(:L)*k, dimindex(:L)) : L word = EL k ls`,
+  REPEAT STRIP_TAC THEN REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD] THEN
+  REWRITE_TAC[ARITH_RULE `MIN n n = n`] THEN
+  SUBGOAL_THEN `val (word (num_of_wordlist (ls:(L word)list)) : KL word) = num_of_wordlist ls` SUBST1_TAC THENL
+  [W(MP_TAC o PART_MATCH (lhand o rand) VAL_WORD_EQ o lhand o snd) THEN
+   ANTS_TAC THENL
+   [TRANS_TAC LTE_TRANS `2 EXP (dimindex(:L) * LENGTH (ls:(L word)list))` THEN
+    REWRITE_TAC[NUM_OF_WORDLIST_BOUND; LE_EXP; LE_REFL] THEN ASM_ARITH_TAC;
+    SIMP_TAC[]];
+   MP_TAC(ISPECL [`ls:(L word)list`; `k:num`] NUM_OF_WORDLIST_EL) THEN
+   ASM_REWRITE_TAC[]]);;
+
+let NUM_OF_WORDLIST_FLATTEN = prove
+ (`!(ll:((N word) list) list) k.
+     ALL (\l. LENGTH l = k) ll /\
+     dimindex(:N) * k = dimindex(:M)
+     ==> num_of_wordlist (ITLIST APPEND ll []) =
+         num_of_wordlist (MAP ((word:num->M word) o num_of_wordlist) ll)`,
+  LIST_INDUCT_TAC THEN REWRITE_TAC[ITLIST; MAP; num_of_wordlist; ALL] THEN
+  X_GEN_TAC `k:num` THEN STRIP_TAC THEN
+  FIRST_X_ASSUM(MP_TAC o SPEC `k:num`) THEN
+  ASM_REWRITE_TAC[] THEN DISCH_TAC THEN
+  REWRITE_TAC[NUM_OF_WORDLIST_APPEND; num_of_wordlist; o_THM] THEN
+  ASM_REWRITE_TAC[] THEN
+  AP_THM_TAC THEN AP_TERM_TAC THEN
+  IMP_REWRITE_TAC [VAL_WORD_EXACT] THEN
+  TRANS_TAC LTE_TRANS `2 EXP (dimindex(:N) * LENGTH(h:(N word)list))` THEN
+  REWRITE_TAC[NUM_OF_WORDLIST_BOUND_LENGTH] THEN
+  ASM_REWRITE_TAC[LE_REFL]);;
+
+(* ========================================================================= *)
+(* Helper lemmas: byte splitting                                             *)
+(* ========================================================================= *)
+
+let NUM_BIT_DECOMPOSE_UNIQ = prove(
+  `!a b t k. a < 2 EXP k
+    ==> (a + 2 EXP k * b = t <=> (a = t MOD 2 EXP k /\ b = t DIV 2 EXP k))`,
+  REPEAT STRIP_TAC THEN EQ_TAC THENL [
+    DISCH_THEN (SUBST1_TAC o SYM) THEN
+    SIMP_TAC[MOD_MULT_ADD; DIV_MULT_ADD; EXP_EQ_0; ARITH_EQ] THEN
+    ASM_SIMP_TAC[MOD_LT; DIV_LT; ADD_CLAUSES];
+    STRIP_TAC THEN
+    MP_TAC (SPECL [`t:num`; `2 EXP k`] DIVISION) THEN
+    SIMP_TAC[EXP_EQ_0; ARITH_EQ] THEN ASM_REWRITE_TAC[] THEN ARITH_TAC]);;
+
+let READ_BYTES_SPLIT_ANY = prove(
+  `read (bytes(a : int64,k+l)) s = t <=>
+   read (bytes(a,k)) s = t MOD 2 EXP (8*k) /\
+   read (bytes(word_add a (word k), l)) s = t DIV 2 EXP (8*k)`,
+  let bound = prove(`read (bytes (a : int64,k)) s < 2 EXP (8*k)`,
+    REWRITE_TAC[READ_BYTES_BOUND]) in
+  REWRITE_TAC[GSYM VAL_EQ; VAL_READ_WBYTES; READ_COMPONENT_COMPOSE] THEN
+  REWRITE_TAC[READ_BYTES_COMBINE] THEN
+  REWRITE_TAC[MATCH_MP NUM_BIT_DECOMPOSE_UNIQ bound]);;
+
diff --git a/scripts/autogen b/scripts/autogen
index 08cd36219..baac7e7dd 100755
--- a/scripts/autogen
+++ b/scripts/autogen
@@ -1133,6 +1133,30 @@ def gen_aarch64_polyz_unpack_table():
     update_file("dev/aarch64_clean/src/polyz_unpack_table.c", "\n".join(gen()))
 
 
+def gen_hol_light_polyz_unpack_table():
+    def gen():
+        yield from gen_hol_light_header()
+        yield "(*"
+        yield " * TBL index tables for polyz_unpack_{17,19}"
+        yield " * See autogen for details."
+        yield " *)"
+        yield ""
+        for gamma1_bits in [17, 19]:
+            bit_width = gamma1_bits + 1
+            name = f"mldsa_polyz_unpack_{gamma1_bits}_indices"
+            indices = list(gen_aarch64_polyz_unpack_indices(bit_width))
+            yield f"let {name} = (REWRITE_RULE[MAP] o define)"
+            yield f"  `{name}:byte list = MAP word ["
+            yield from print_hol_light_array(indices, as_int=False, pad=3)
+            yield "]`;;"
+            yield ""
+
+    update_file(
+        "proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml",
+        "\n".join(gen()),
+    )
+
+
 def gen_avx2_rej_uniform_table_rows():
     # The index into the lookup table is an 8-bit bitmap, i.e. a number 0..255.
     # Conceptually, the table entry at index i is a vector of 8 16-bit values, of
@@ -2308,6 +2332,20 @@ def gen_hol_light_asm():
             f"-Imldsa/src/native/aarch64/src {aarch64_flags}",
             "aarch64",
         ),
+        (
+            "polyz_unpack_17_asm.S",
+            "mldsa_polyz_unpack_17.S",
+            "dev/aarch64_opt/src",
+            f"-Imldsa/src/native/aarch64/src {aarch64_flags}",
+            "aarch64",
+        ),
+        (
+            "polyz_unpack_19_asm.S",
+            "mldsa_polyz_unpack_19.S",
+            "dev/aarch64_opt/src",
+            f"-Imldsa/src/native/aarch64/src {aarch64_flags}",
+            "aarch64",
+        ),
     ]
 
     x86_64_flags = "-mavx2 -mbmi2 -msse4 -fcf-protection=full"
@@ -3342,6 +3380,7 @@ def _main():
         gen_aarch64_rej_uniform_table()
         gen_aarch64_rej_uniform_eta_table()
         gen_aarch64_polyz_unpack_table()
+        gen_hol_light_polyz_unpack_table()
         gen_avx2_hol_light_zeta_file()
         gen_avx2_zeta_file()
         gen_avx2_rej_uniform_table()

From 83bf55d54868985e08b699993ae79df26d0bbc11 Mon Sep 17 00:00:00 2001
From: "Matthias J. Kannwischer" <matthias@kannwischer.eu>
Date: Sat, 14 Feb 2026 17:43:21 +0100
Subject: [PATCH 2/2] CBMC: Add CBMC proof for aarch64 polyz_unpack

Add CBMC contracts for native AArch64 polyz_unpack_17 and polyz_unpack_19
following corresponding HOL-Light specs.

Signed-off-by: Matthias J. Kannwischer <matthias@kannwischer.eu>
---
 dev/aarch64_clean/src/arith_native_aarch64.h  | 22 ++++++-
 dev/aarch64_opt/src/arith_native_aarch64.h    | 22 ++++++-
 .../native/aarch64/src/arith_native_aarch64.h | 22 ++++++-
 .../polyz_unpack_17_native_aarch64/Makefile   | 63 ++++++++++++++++++
 .../polyz_unpack_17_native_aarch64_harness.c  | 19 ++++++
 .../polyz_unpack_19_native_aarch64/Makefile   | 66 +++++++++++++++++++
 .../polyz_unpack_19_native_aarch64_harness.c  | 19 ++++++
 7 files changed, 227 insertions(+), 6 deletions(-)
 create mode 100644 proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile
 create mode 100644 proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c
 create mode 100644 proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile
 create mode 100644 proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c

diff --git a/dev/aarch64_clean/src/arith_native_aarch64.h b/dev/aarch64_clean/src/arith_native_aarch64.h
index 48c7d98c5..6a87db0ba 100644
--- a/dev/aarch64_clean/src/arith_native_aarch64.h
+++ b/dev/aarch64_clean/src/arith_native_aarch64.h
@@ -121,11 +121,29 @@ __contract__(
 
 #define mld_polyz_unpack_17_asm MLD_NAMESPACE(polyz_unpack_17_asm)
 void mld_polyz_unpack_17_asm(int32_t *r, const uint8_t *buf,
-                             const uint8_t *indices);
+                             const uint8_t *indices)
+/* This must be kept in sync with the HOL-Light specification
+ * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml */
+__contract__(
+  requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
+  requires(memory_no_alias(buf, 576))
+  requires(indices == mld_polyz_unpack_17_indices)
+  assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
+  ensures(array_bound(r, 0, MLDSA_N, -((1 << 17) - 1), (1 << 17) + 1))
+);
 
 #define mld_polyz_unpack_19_asm MLD_NAMESPACE(polyz_unpack_19_asm)
 void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf,
-                             const uint8_t *indices);
+                             const uint8_t *indices)
+/* This must be kept in sync with the HOL-Light specification
+ * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml */
+__contract__(
+  requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
+  requires(memory_no_alias(buf, 640))
+  requires(indices == mld_polyz_unpack_19_indices)
+  assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
+  ensures(array_bound(r, 0, MLDSA_N, -((1 << 19) - 1), (1 << 19) + 1))
+);
 
 #define mld_poly_pointwise_montgomery_asm \
   MLD_NAMESPACE(poly_pointwise_montgomery_asm)
diff --git a/dev/aarch64_opt/src/arith_native_aarch64.h b/dev/aarch64_opt/src/arith_native_aarch64.h
index 48c7d98c5..6a87db0ba 100644
--- a/dev/aarch64_opt/src/arith_native_aarch64.h
+++ b/dev/aarch64_opt/src/arith_native_aarch64.h
@@ -121,11 +121,29 @@ __contract__(
 
 #define mld_polyz_unpack_17_asm MLD_NAMESPACE(polyz_unpack_17_asm)
 void mld_polyz_unpack_17_asm(int32_t *r, const uint8_t *buf,
-                             const uint8_t *indices);
+                             const uint8_t *indices)
+/* This must be kept in sync with the HOL-Light specification
+ * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml */
+__contract__(
+  requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
+  requires(memory_no_alias(buf, 576))
+  requires(indices == mld_polyz_unpack_17_indices)
+  assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
+  ensures(array_bound(r, 0, MLDSA_N, -((1 << 17) - 1), (1 << 17) + 1))
+);
 
 #define mld_polyz_unpack_19_asm MLD_NAMESPACE(polyz_unpack_19_asm)
 void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf,
-                             const uint8_t *indices);
+                             const uint8_t *indices)
+/* This must be kept in sync with the HOL-Light specification
+ * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml */
+__contract__(
+  requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
+  requires(memory_no_alias(buf, 640))
+  requires(indices == mld_polyz_unpack_19_indices)
+  assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
+  ensures(array_bound(r, 0, MLDSA_N, -((1 << 19) - 1), (1 << 19) + 1))
+);
 
 #define mld_poly_pointwise_montgomery_asm \
   MLD_NAMESPACE(poly_pointwise_montgomery_asm)
diff --git a/mldsa/src/native/aarch64/src/arith_native_aarch64.h b/mldsa/src/native/aarch64/src/arith_native_aarch64.h
index 48c7d98c5..6a87db0ba 100644
--- a/mldsa/src/native/aarch64/src/arith_native_aarch64.h
+++ b/mldsa/src/native/aarch64/src/arith_native_aarch64.h
@@ -121,11 +121,29 @@ __contract__(
 
 #define mld_polyz_unpack_17_asm MLD_NAMESPACE(polyz_unpack_17_asm)
 void mld_polyz_unpack_17_asm(int32_t *r, const uint8_t *buf,
-                             const uint8_t *indices);
+                             const uint8_t *indices)
+/* This must be kept in sync with the HOL-Light specification
+ * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml */
+__contract__(
+  requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
+  requires(memory_no_alias(buf, 576))
+  requires(indices == mld_polyz_unpack_17_indices)
+  assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
+  ensures(array_bound(r, 0, MLDSA_N, -((1 << 17) - 1), (1 << 17) + 1))
+);
 
 #define mld_polyz_unpack_19_asm MLD_NAMESPACE(polyz_unpack_19_asm)
 void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf,
-                             const uint8_t *indices);
+                             const uint8_t *indices)
+/* This must be kept in sync with the HOL-Light specification
+ * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml */
+__contract__(
+  requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N))
+  requires(memory_no_alias(buf, 640))
+  requires(indices == mld_polyz_unpack_19_indices)
+  assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N))
+  ensures(array_bound(r, 0, MLDSA_N, -((1 << 19) - 1), (1 << 19) + 1))
+);
 
 #define mld_poly_pointwise_montgomery_asm \
   MLD_NAMESPACE(poly_pointwise_montgomery_asm)
diff --git a/proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile b/proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile
new file mode 100644
index 000000000..f6ac6ca51
--- /dev/null
+++ b/proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile
@@ -0,0 +1,63 @@
+# Copyright (c) The mldsa-native project authors
+# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+
+include ../Makefile_params.common
+
+HARNESS_ENTRY = harness
+HARNESS_FILE = polyz_unpack_17_native_aarch64_harness
+
+# This should be a unique identifier for this proof, and will appear on the
+# Litani dashboard. It can be human-readable and contain spaces if you wish.
+PROOF_UID = polyz_unpack_17_native_aarch64
+
+# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be
+# included, which contains the CBMC specifications.
+DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/aarch64/meta.h\"" -DMLD_CHECK_APIS
+INCLUDES +=
+
+REMOVE_FUNCTION_BODY +=
+UNWINDSET +=
+
+PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
+PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly_kl.c $(SRCDIR)/mldsa/src/native/aarch64/src/polyz_unpack_table.c
+
+# polyz_unpack_17 is only used with ML-DSA-44
+ifeq ($(MLD_CONFIG_PARAMETER_SET),44)
+CHECK_FUNCTION_CONTRACTS=mld_polyz_unpack_17_native
+USE_FUNCTION_CONTRACTS = mld_polyz_unpack_17_asm
+else
+CHECK_FUNCTION_CONTRACTS=
+USE_FUNCTION_CONTRACTS =
+endif
+APPLY_LOOP_CONTRACTS=on
+USE_DYNAMIC_FRAMES=1
+
+# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
+EXTERNAL_SAT_SOLVER=
+CBMCFLAGS=--smt2
+
+FUNCTION_NAME = polyz_unpack_17_native_aarch64
+
+# If this proof is found to consume huge amounts of RAM, you can set the
+# EXPENSIVE variable. With new enough versions of the proof tools, this will
+# restrict the number of EXPENSIVE CBMC jobs running at once. See the
+# documentation in Makefile.common under the "Job Pools" heading for details.
+# EXPENSIVE = true
+
+# This function is large enough to need...
+CBMC_OBJECT_BITS = 8
+
+# If you require access to a file-local ("static") function or object to conduct
+# your proof, set the following (and do not include the original source file
+# ("mldsa/poly_kl.c") in PROJECT_SOURCES).
+# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
+# include ../Makefile.common
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/src/poly_kl.c
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
+# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
+# be set before including Makefile.common, but any use of variables on the
+# left-hand side requires those variables to be defined. Hence, _SOURCE,
+# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
+
+include ../Makefile.common
diff --git a/proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c b/proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c
new file mode 100644
index 000000000..ff2983c44
--- /dev/null
+++ b/proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c
@@ -0,0 +1,19 @@
+// Copyright (c) The mldsa-native project authors
+// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+
+#include <stdint.h>
+#include "cbmc.h"
+#include "params.h"
+
+int mld_polyz_unpack_17_native(int32_t *r, const uint8_t *a);
+
+void harness(void)
+{
+  /* mld_polyz_unpack_17_native is only defined for ML-DSA-44 */
+#if MLD_CONFIG_PARAMETER_SET == 44
+  int32_t *r;
+  const uint8_t *a;
+  int t;
+  t = mld_polyz_unpack_17_native(r, a);
+#endif /* MLD_CONFIG_PARAMETER_SET == 44 */
+}
diff --git a/proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile b/proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile
new file mode 100644
index 000000000..1a5e73d64
--- /dev/null
+++ b/proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile
@@ -0,0 +1,66 @@
+# Copyright (c) The mldsa-native project authors
+# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+
+include ../Makefile_params.common
+
+HARNESS_ENTRY = harness
+HARNESS_FILE = polyz_unpack_19_native_aarch64_harness
+
+# This should be a unique identifier for this proof, and will appear on the
+# Litani dashboard. It can be human-readable and contain spaces if you wish.
+PROOF_UID = polyz_unpack_19_native_aarch64
+
+# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be
+# included, which contains the CBMC specifications.
+DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/aarch64/meta.h\"" -DMLD_CHECK_APIS
+INCLUDES +=
+
+REMOVE_FUNCTION_BODY +=
+UNWINDSET +=
+
+PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
+PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly_kl.c $(SRCDIR)/mldsa/src/native/aarch64/src/polyz_unpack_table.c
+
+# polyz_unpack_19 is only used with ML-DSA-65 and ML-DSA-87
+ifeq ($(MLD_CONFIG_PARAMETER_SET),65)
+CHECK_FUNCTION_CONTRACTS=mld_polyz_unpack_19_native
+USE_FUNCTION_CONTRACTS = mld_polyz_unpack_19_asm
+else ifeq ($(MLD_CONFIG_PARAMETER_SET),87)
+CHECK_FUNCTION_CONTRACTS=mld_polyz_unpack_19_native
+USE_FUNCTION_CONTRACTS = mld_polyz_unpack_19_asm
+else
+CHECK_FUNCTION_CONTRACTS=
+USE_FUNCTION_CONTRACTS =
+endif
+APPLY_LOOP_CONTRACTS=on
+USE_DYNAMIC_FRAMES=1
+
+# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
+EXTERNAL_SAT_SOLVER=
+CBMCFLAGS=--smt2
+
+FUNCTION_NAME = polyz_unpack_19_native_aarch64
+
+# If this proof is found to consume huge amounts of RAM, you can set the
+# EXPENSIVE variable. With new enough versions of the proof tools, this will
+# restrict the number of EXPENSIVE CBMC jobs running at once. See the
+# documentation in Makefile.common under the "Job Pools" heading for details.
+# EXPENSIVE = true
+
+# This function is large enough to need...
+CBMC_OBJECT_BITS = 8
+
+# If you require access to a file-local ("static") function or object to conduct
+# your proof, set the following (and do not include the original source file
+# ("mldsa/poly_kl.c") in PROJECT_SOURCES).
+# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
+# include ../Makefile.common
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/src/poly_kl.c
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
+# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
+# be set before including Makefile.common, but any use of variables on the
+# left-hand side requires those variables to be defined. Hence, _SOURCE,
+# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
+
+include ../Makefile.common
diff --git a/proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c b/proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c
new file mode 100644
index 000000000..f4cfecced
--- /dev/null
+++ b/proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c
@@ -0,0 +1,19 @@
+// Copyright (c) The mldsa-native project authors
+// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+
+#include <stdint.h>
+#include "cbmc.h"
+#include "params.h"
+
+int mld_polyz_unpack_19_native(int32_t *r, const uint8_t *a);
+
+void harness(void)
+{
+  /* mld_polyz_unpack_19_native is only defined for ML-DSA-65 and ML-DSA-87 */
+#if MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87
+  int32_t *r;
+  const uint8_t *a;
+  int t;
+  t = mld_polyz_unpack_19_native(r, a);
+#endif /* MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87 */
+}