From ff8eca86e43441e71a15f1ff47d01a064cc62125 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Sat, 14 Feb 2026 17:42:32 +0100 Subject: [PATCH 1/2] HOL-Light: Add HOL Light proof for aarch64 polyz_unpack_{17,19} Add functional correctness proofs for polyz_unpack_17 and polyz_unpack_19. Closely following the decompress proofs from mlkem-native: https://github.com/pq-code-package/mlkem-native/pull/1543 Signed-off-by: Matthias J. Kannwischer --- .github/workflows/hol_light.yml | 6 + nix/s2n_bignum/default.nix | 4 +- proofs/hol_light/aarch64/Makefile | 4 +- .../aarch64/mldsa/mldsa_polyz_unpack_17.S | 68 ++++ .../aarch64/mldsa/mldsa_polyz_unpack_19.S | 65 ++++ .../hol_light/aarch64/proofs/aarch64_utils.ml | 234 +++++++++++- .../hol_light/aarch64/proofs/dump_bytecode.ml | 8 + .../aarch64/proofs/mldsa_polyz_unpack_17.ml | 338 ++++++++++++++++++ .../aarch64/proofs/mldsa_polyz_unpack_19.ml | 255 +++++++++++++ .../proofs/mldsa_polyz_unpack_consts.ml | 39 ++ proofs/hol_light/common/mldsa_specs.ml | 312 ++++++++++++++++ scripts/autogen | 39 ++ 12 files changed, 1368 insertions(+), 4 deletions(-) create mode 100644 proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S create mode 100644 proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S create mode 100644 proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml create mode 100644 proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml create mode 100644 proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml diff --git a/.github/workflows/hol_light.yml b/.github/workflows/hol_light.yml index 5d9356dc8..d15d55067 100644 --- a/.github/workflows/hol_light.yml +++ b/.github/workflows/hol_light.yml @@ -13,6 +13,7 @@ on: - 'proofs/hol_light/aarch64/Makefile' - 'proofs/hol_light/aarch64/**/*.S' - 'proofs/hol_light/aarch64/**/*.ml' + - 'proofs/hol_light/common/**/*.ml' - 'proofs/hol_light/x86_64/Makefile' - 'proofs/hol_light/x86_64/**/*.S' - 'proofs/hol_light/x86_64/**/*.ml' @@ -28,6 +29,7 @@ on: - 'proofs/hol_light/aarch64/Makefile' - 'proofs/hol_light/aarch64/**/*.S' - 'proofs/hol_light/aarch64/**/*.ml' + - 'proofs/hol_light/common/**/*.ml' - 'proofs/hol_light/x86_64/Makefile' - 'proofs/hol_light/x86_64/**/*.S' - 'proofs/hol_light/x86_64/**/*.ml' @@ -87,6 +89,10 @@ jobs: needs: ["aarch64_utils.ml"] - name: mldsa_poly_chknorm needs: ["aarch64_utils.ml"] + - name: mldsa_polyz_unpack_17 + needs: ["aarch64_utils.ml", "mldsa_polyz_unpack_consts.ml", "mldsa_specs.ml"] + - name: mldsa_polyz_unpack_19 + needs: ["aarch64_utils.ml", "mldsa_polyz_unpack_consts.ml", "mldsa_specs.ml"] name: AArch64 HOL Light proof for ${{ matrix.proof.name }}.S runs-on: pqcp-arm64 if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork diff --git a/nix/s2n_bignum/default.nix b/nix/s2n_bignum/default.nix index 6857fe64e..04e2339b0 100644 --- a/nix/s2n_bignum/default.nix +++ b/nix/s2n_bignum/default.nix @@ -4,12 +4,12 @@ { stdenv, fetchFromGitHub, writeText, ... }: stdenv.mkDerivation rec { pname = "s2n_bignum"; - version = "3bfe68deb9fbdaf23be0a927c362a87a799adc28"; + version = "cba3956c7a20d22f08ef6f49fe162e9d7c07867c"; src = fetchFromGitHub { owner = "awslabs"; repo = "s2n-bignum"; rev = "${version}"; - hash = "sha256-C3SdYZ/PhfTmHefz3klO3l/lbEgtA8Hq9wjDmjd+Fek="; + hash = "sha256-5UOu1aUqPGiDPhpBlUDjkM/GqP4SILT/p4pKL9DBFjg="; }; setupHook = writeText "setup-hook.sh" '' export S2N_BIGNUM_DIR="$1" diff --git a/proofs/hol_light/aarch64/Makefile b/proofs/hol_light/aarch64/Makefile index 0d87b2555..306d657a4 100644 --- a/proofs/hol_light/aarch64/Makefile +++ b/proofs/hol_light/aarch64/Makefile @@ -55,7 +55,9 @@ SPLIT=tr ';' '\n' OBJ = mldsa/mldsa_ntt.o \ mldsa/mldsa_poly_caddq.o \ - mldsa/mldsa_poly_chknorm.o + mldsa/mldsa_poly_chknorm.o \ + mldsa/mldsa_polyz_unpack_17.o \ + mldsa/mldsa_polyz_unpack_19.o # According to # https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms, diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S new file mode 100644 index 000000000..25bfc8e88 --- /dev/null +++ b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_17.S @@ -0,0 +1,68 @@ +/* + * Copyright (c) The mldsa-native project authors + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/aarch64_opt/src/polyz_unpack_17_asm.S using scripts/simpasm. Do not modify it directly. + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm +_PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm +PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm: +#endif + + .cfi_startproc + ldr q24, [x2] + ldr q25, [x2, #0x10] + ldr q26, [x2, #0x20] + ldr q27, [x2, #0x30] + mov x3, #0xfe00000000 // =1090921693184 + mov v28.d[0], x3 + mov x3, #0xfc // =252 + movk x3, #0xfa, lsl #32 + mov v28.d[1], x3 + movi v29.4s, #0x3, msl #16 + movi v30.4s, #0x2, lsl #16 + mov x9, #0x10 // =16 + +Lpolyz_unpack_17_loop: + ld1 { v0.16b, v1.16b }, [x1] + add x1, x1, #0x14 + ld1 { v2.16b }, [x1], #16 + tbl v4.16b, { v0.16b }, v24.16b + tbl v5.16b, { v0.16b, v1.16b }, v25.16b + tbl v6.16b, { v1.16b }, v26.16b + tbl v7.16b, { v1.16b, v2.16b }, v27.16b + ushl v4.4s, v4.4s, v28.4s + and v4.16b, v4.16b, v29.16b + sub v4.4s, v30.4s, v4.4s + ushl v5.4s, v5.4s, v28.4s + and v5.16b, v5.16b, v29.16b + sub v5.4s, v30.4s, v5.4s + ushl v6.4s, v6.4s, v28.4s + and v6.16b, v6.16b, v29.16b + sub v6.4s, v30.4s, v6.4s + ushl v7.4s, v7.4s, v28.4s + and v7.16b, v7.16b, v29.16b + sub v7.4s, v30.4s, v7.4s + str q5, [x0, #0x10] + str q6, [x0, #0x20] + str q7, [x0, #0x30] + str q4, [x0], #0x40 + subs x9, x9, #0x1 + b.ne Lpolyz_unpack_17_loop + ret + .cfi_endproc diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S new file mode 100644 index 000000000..f921ed77c --- /dev/null +++ b/proofs/hol_light/aarch64/mldsa/mldsa_polyz_unpack_19.S @@ -0,0 +1,65 @@ +/* + * Copyright (c) The mldsa-native project authors + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/aarch64_opt/src/polyz_unpack_19_asm.S using scripts/simpasm. Do not modify it directly. + */ + +#if defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm +_PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm +PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_19_asm: +#endif + + .cfi_startproc + ldr q24, [x2] + ldr q25, [x2, #0x10] + ldr q26, [x2, #0x20] + ldr q27, [x2, #0x30] + mov x3, #0xfc00000000 // =1082331758592 + dup v28.2d, x3 + movi v29.4s, #0xf, msl #16 + movi v30.4s, #0x8, lsl #16 + mov x9, #0x10 // =16 + +Lpolyz_unpack_19_loop: + ld1 { v0.16b, v1.16b }, [x1] + add x1, x1, #0x18 + ld1 { v2.16b }, [x1], #16 + tbl v4.16b, { v0.16b }, v24.16b + tbl v5.16b, { v0.16b, v1.16b }, v25.16b + tbl v6.16b, { v1.16b }, v26.16b + tbl v7.16b, { v1.16b, v2.16b }, v27.16b + ushl v4.4s, v4.4s, v28.4s + and v4.16b, v4.16b, v29.16b + sub v4.4s, v30.4s, v4.4s + ushl v5.4s, v5.4s, v28.4s + and v5.16b, v5.16b, v29.16b + sub v5.4s, v30.4s, v5.4s + ushl v6.4s, v6.4s, v28.4s + and v6.16b, v6.16b, v29.16b + sub v6.4s, v30.4s, v6.4s + ushl v7.4s, v7.4s, v28.4s + and v7.16b, v7.16b, v29.16b + sub v7.4s, v30.4s, v7.4s + str q5, [x0, #0x10] + str q6, [x0, #0x20] + str q7, [x0, #0x30] + str q4, [x0], #0x40 + subs x9, x9, #0x1 + b.ne Lpolyz_unpack_19_loop + ret + .cfi_endproc diff --git a/proofs/hol_light/aarch64/proofs/aarch64_utils.ml b/proofs/hol_light/aarch64/proofs/aarch64_utils.ml index 1187b79aa..34d06c696 100644 --- a/proofs/hol_light/aarch64/proofs/aarch64_utils.ml +++ b/proofs/hol_light/aarch64/proofs/aarch64_utils.ml @@ -3,6 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT *) +needs "common/mldsa_specs.ml";; + (* Merge 4 x bytes32 reads into bytes128 reads *) let MEMORY_128_FROM_32_TAC = let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64` @@ -14,7 +16,9 @@ let MEMORY_128_FROM_32_TAC = READ_MEMORY_MERGE_CONV 2 (subst[itm,n_tm] pat') in MP_TAC(end_itlist CONJ (map f (0--(n-1))));; -(* Utility for executing until target PC is reached *) +(* ------------------------------------------------------------------------- *) +(* Symbolic execution until target PC is reached. *) +(* ------------------------------------------------------------------------- *) let MAP_UNTIL_TARGET_PC f n = fun (asl, w) -> let is_pc_condition = can (term_match [] `read PC some_state = some_value`) in let extract_target_pc_from_goal goal = @@ -33,3 +37,231 @@ let MAP_UNTIL_TARGET_PC f n = fun (asl, w) -> let rec core n (asl, w) = (TARGET_PC_REACHED_TAC target_pc ORELSE (f n THEN core (n + 1))) (asl, w) in core n (asl, w);; + +(* ========================================================================= *) +(* SIMD simplification: subword extraction + numeric reduction + folding. *) +(* ========================================================================= *) + +let SIMD_SIMPLIFY_CONV unfold_defs = + TOP_DEPTH_CONV + (REWR_CONV WORD_SUBWORD_AND ORELSEC WORD_SIMPLE_SUBWORD_CONV) THENC + DEPTH_CONV WORD_NUM_RED_CONV THENC + REWRITE_CONV (map GSYM unfold_defs);; + +let SIMD_SIMPLIFY_TAC unfold_defs = + let simdable = can (term_match [] `read X (s:armstate):int128 = whatever`) in + TRY(FIRST_X_ASSUM + (ASSUME_TAC o + CONV_RULE(RAND_CONV (SIMD_SIMPLIFY_CONV unfold_defs)) o + check (simdable o concl)));; + +(* ========================================================================= *) +(* Parametric infrastructure for d-bit packed coefficients (SIMD). *) +(* Supports d=18 (GAMMA1=2^17) and d=20 (GAMMA1=2^19). *) +(* ========================================================================= *) + +(* Convert MOD/DIV expressions to word_subword of (16*d)-bit word *) +let mk_base_simps d = + let total = 16 * d in + let rem = total - 256 in + let total_ty = mk_finty (Num.num_of_int total) in + let rem_ty = mk_finty (Num.num_of_int rem) in + let mod_128 = CONV_RULE NUM_REDUCE_CONV (prove( + inst [total_ty, `:N`] + `word (t MOD 2 EXP 128) : 128 word = + word_subword (word t : N word) (0, 128)`, + REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD; DIMINDEX_128] THEN + REWRITE_TAC[EXP; DIV_1; MOD_MOD_REFL; MIN] THEN CONV_TAC NUM_REDUCE_CONV THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN + MP_TAC (SPECL [`t:num`; `2`; mk_small_numeral total; `128`] MOD_MOD_EXP_MIN) THEN + CONV_TAC NUM_REDUCE_CONV THEN DISCH_THEN (SUBST1_TAC o SYM) THEN REFL_TAC)) in + let div_128_mod_128 = CONV_RULE NUM_REDUCE_CONV (prove( + inst [total_ty, `:N`] + `word ((t DIV 2 EXP 128) MOD 2 EXP 128) : 128 word = + word_subword (word t : N word) (128, 128)`, + REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD; DIMINDEX_128] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN + REWRITE_TAC[ARITH_RULE `MIN 128 128 = 128`; MOD_MOD_REFL] THEN + REWRITE_TAC[DIV_MOD; GSYM EXP_ADD; MOD_MOD_EXP_MIN] THEN + CONV_TAC NUM_REDUCE_CONV)) in + let div_256 = CONV_RULE NUM_REDUCE_CONV (prove( + inst [total_ty, `:N`; rem_ty, `:M`] + `word (t DIV 2 EXP 256) : M word = + word_subword (word t : N word) (256, dimindex(:M))`, + REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[DIV_MOD; GSYM EXP_ADD; MOD_MOD_EXP_MIN] THEN + CONV_TAC NUM_REDUCE_CONV THEN REWRITE_TAC[MOD_MOD_REFL])) in + [mod_128; div_128_mod_128; div_256];; + +(* Split ncoeffs d-bit coefficients into chunks of chunk_size *) +let mk_split_theorem d ncoeffs chunk_size = + let total = d * chunk_size in + let nchunks = ncoeffs / chunk_size in + let d_ty = mk_finty (Num.num_of_int d) in + let total_ty = mk_finty (Num.num_of_int total) in + prove( + subst [mk_small_numeral ncoeffs, `ncoeffs:num`; + mk_small_numeral chunk_size, `cs:num`; + mk_small_numeral nchunks, `nc:num`] + (inst [d_ty, `:D`; total_ty, `:T`] + `!(l: (D word) list). LENGTH l = ncoeffs ==> + num_of_wordlist l = num_of_wordlist (MAP ((word:num->T word) o num_of_wordlist) + (list_of_seq (\i. SUB_LIST (cs * i, cs) l) nc))`), + REPEAT STRIP_TAC THEN + UNDISCH_THEN (subst [mk_small_numeral ncoeffs, `n:num`] + (inst [d_ty, `:D`] `LENGTH (l : (D word) list) = n`)) (fun th -> + GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) + [MATCH_MP (CONV_RULE NUM_REDUCE_CONV + (ISPECL [mk_small_numeral chunk_size; mk_small_numeral nchunks; + `l:'a list`] SUBLIST_PARTITION)) th] + THEN ASSUME_TAC th) THEN + IMP_REWRITE_TAC [CONV_RULE (ONCE_DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) + (ISPECL [inst [d_ty, `:D`] `ll: ((D word) list) list`; + mk_small_numeral chunk_size] + (INST_TYPE [d_ty, `:N`; total_ty, `:M`] NUM_OF_WORDLIST_FLATTEN))] THEN + CONV_TAC(ONCE_DEPTH_CONV LIST_OF_SEQ_CONV) THEN + ASM_REWRITE_TAC[ALL; LENGTH_SUB_LIST] THEN + ARITH_TAC);; + +(* Extract individual d-bit coefficients from (d*chunk_size)-bit word *) +let mk_subword_cases d chunk_size = + let total = d * chunk_size in + let d_ty = mk_finty (Num.num_of_int d) in + let total_ty = mk_finty (Num.num_of_int total) in + let arith_simp = + let lhs = mk_eq(mk_small_numeral total, + mk_comb(mk_comb(`( * ):num->num->num`, + mk_small_numeral d), `n:num`)) in + let rhs = mk_eq(`n:num`, mk_small_numeral chunk_size) in + ARITH_RULE (mk_eq(lhs, rhs)) in + let meson_simp = + let n_eq = mk_eq(`n:num`, mk_small_numeral chunk_size) in + let k_lt_n = mk_comb(mk_comb(`(<):num->num->bool`, `k:num`), `n:num`) in + let k_lt_cs = mk_comb(mk_comb(`(<):num->num->bool`, `k:num`), + mk_small_numeral chunk_size) in + MESON[] (mk_eq(mk_conj(n_eq, k_lt_n), mk_conj(n_eq, k_lt_cs))) in + let base = + let th = INST_TYPE [total_ty, `:KL`; d_ty, `:L`] WORD_SUBWORD_NUM_OF_WORDLIST in + let th = CONV_RULE(DEPTH_CONV DIMINDEX_CONV) th in + REWRITE_RULE[arith_simp; meson_simp] th in + let mk k = + let th = SPEC (mk_small_numeral k) + (SPEC (inst [d_ty, `:L`] `ls:(L word)list`) base) in + CONV_RULE NUM_REDUCE_CONV (REWRITE_RULE[ARITH] th) in + map mk (0 -- (chunk_size - 1));; + +(* ========================================================================= *) +(* zunpack lane conversion for TBL + USHL + AND + SUB pipeline. *) +(* ========================================================================= *) + +let ZUNPACK_LANE_CONV d i tm = + let gamma1 = 1 lsl (d - 1) in + let word_bits = 16 * d in + let t_ty = mk_finty (Num.num_of_int word_bits) in + let is_target t = + try fst(dest_type(type_of t)) = "word" && + Num.int_of_num(dest_finty(hd(snd(dest_type(type_of t))))) = word_bits + with _ -> false in + let t_var_opt = try Some(find_term is_target tm) with _ -> None in + match t_var_opt with + | Some t_var -> + let d_ty = mk_finty (Num.num_of_int d) in + let goal = mk_eq(tm, + subst [mk_small_numeral (d*i), `pos:num`; + mk_small_numeral d, `bw:num`; + mk_small_numeral gamma1, `g:num`; + t_var, mk_var("t", mk_type("word",[t_ty]))] + (inst [d_ty, `:B`; t_ty, `:T`] + `word_sub (word g : 32 word) + (word_zx (word_subword (t : T word) (pos,bw) : B word))`)) in + WORD_BLAST goal + | None -> failwith ("ZUNPACK_LANE_CONV: no " ^ string_of_int word_bits ^ "-bit word");; + +let ZUNPACK_128_CONV d tm = + tryfind (fun base_i -> + let f i = ZUNPACK_LANE_CONV d (base_i + i) in + RAND_CONV ( + COMB2_CONV + (RAND_CONV (COMB2_CONV (RAND_CONV (f 3)) (f 2))) + (COMB2_CONV (RAND_CONV (f 1)) (f 0))) + tm) [0; 4; 8; 12];; + +let SIMP_ZUNPACK_TAC d zunpack_correct = + let zunpack_const = + fst(strip_comb(rhs(snd(strip_forall(concl zunpack_correct))))) in + let already_processed tm = + can (find_term ((=) zunpack_const)) tm in + RULE_ASSUM_TAC (fun th -> + if already_processed (concl th) then th + else CONV_RULE (TRY_CONV (ZUNPACK_128_CONV d) THENC + TRY_CONV (ONCE_REWRITE_CONV [zunpack_correct])) th);; + +(* ------------------------------------------------------------------------- *) +(* Overlapping memory read: derive bytes128 at an unaligned offset from *) +(* bytes128@16 and a tail read at offset 32. *) +(* For D=20: bytes128@16 + bytes64@32 -> bytes128@24 *) +(* For D=18: bytes128@16 + bytes32@32 -> bytes128@20 *) +(* ------------------------------------------------------------------------- *) + +let split_k_l_at base k l = + let a_tm = mk_comb(mk_comb(`word_add:int64->int64->int64`, `a:int64`), + mk_comb(`word:num->int64`, mk_small_numeral base)) in + CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC DEPTH_CONV NUM_MULT_CONV) + (INST [mk_small_numeral k,`k:num`; mk_small_numeral l,`l:num`; + a_tm, `a:int64`] READ_BYTES_SPLIT_ANY);; + +(* For D=20: bytes128@16 + bytes64@32 -> bytes128@24 *) +let BYTES128_FROM_OVERLAP_64 = prove + (`read (memory :> bytes128 (word_add a (word 16))) (s:armstate) = (word m16 : int128) /\ + read (memory :> bytes64 (word_add a (word 32))) (s:armstate) = (word m64 : int64) + ==> read (memory :> bytes128 (word_add a (word 24))) s = + (word_join (word m64 : int64) (word(m16 DIV 2 EXP 64) : int64) : int128)`, + REWRITE_TAC[BYTES128_WBYTES; BYTES64_WBYTES; READ_COMPONENT_COMPOSE; + GSYM VAL_EQ; VAL_READ_WBYTES] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + ABBREV_TAC `m = read memory (s:armstate)` THEN + REWRITE_TAC[VAL_WORD_JOIN; DIMINDEX_64; VAL_WORD; DIMINDEX_128] THEN + CONV_TAC NUM_REDUCE_CONV THEN + ONCE_REWRITE_TAC[split_k_l_at 16 8 8] THEN + ONCE_REWRITE_TAC[split_k_l_at 24 8 8] THEN + REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + CONV_TAC NUM_REDUCE_CONV THEN STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + let COMMON_SETUP = + SUBGOAL_THEN + `(m16 DIV 18446744073709551616) MOD 18446744073709551616 < 18446744073709551616 /\ + m64 MOD 18446744073709551616 < 18446744073709551616` + STRIP_ASSUME_TAC + THENL [REWRITE_TAC[MOD_LT_EQ; EXP_EQ_0; ARITH_EQ] THEN ARITH_TAC; ALL_TAC] THEN + SUBGOAL_THEN + `(18446744073709551616 * m64 MOD 18446744073709551616 + + (m16 DIV 18446744073709551616) MOD 18446744073709551616) < + 340282366920938463463374607431768211456` + ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN + ASM_SIMP_TAC[MOD_LT] in + CONJ_TAC THENL [ + COMMON_SETUP THEN + REWRITE_TAC[MOD_MULT_ADD; MOD_MOD_EXP_MIN; GSYM EXP_ADD] THEN + CONV_TAC NUM_REDUCE_CONV THEN REWRITE_TAC[MOD_MOD_REFL] THEN + REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV + (SPECL [`m16:num`; `18446744073709551616`; `18446744073709551616`] DIV_MOD))]; + COMMON_SETUP THEN + SIMP_TAC[DIV_MULT_ADD; ARITH_EQ] THEN ASM_SIMP_TAC[DIV_LT] THEN ARITH_TAC]);; + +(* Instantiate overlap theorem for chunk i and ASSUME_TAC the result *) +let DERIVE_OVERLAP_TAC overlap_thm chunk_size i = + let off16 = chunk_size*i + 16 and off32 = chunk_size*i + 32 in + let w16 = mk_small_numeral off16 and w32 = mk_small_numeral off32 in + let has t th = can (find_term ((=) t)) (concl th) in + let a_val = mk_comb(mk_comb(`word_add:int64->int64->int64`, `b:int64`), + mk_comb(`word:num->int64`, mk_small_numeral (chunk_size * i))) in + let inst = INST [a_val, `a:int64`; `s0:armstate`, `s:armstate`] overlap_thm in + let thm = CONV_RULE (ONCE_DEPTH_CONV(GEN_REWRITE_CONV I [WORD_ADD_ASSOC_CONSTS]) THENC + DEPTH_CONV NUM_ADD_CONV) inst in + FIRST_ASSUM(fun th128 -> + if not(has w16 th128 && has `bytes128` th128 && has `b:int64` th128) then failwith "" else + FIRST_ASSUM(fun thtail -> + if not(has w32 thtail && has `b:int64` thtail && + not(has `bytes128` thtail)) then failwith "" else + ASSUME_TAC(MATCH_MP thm (CONJ th128 thtail))));; diff --git a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml index 358750c8e..52a0aef26 100644 --- a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml +++ b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml @@ -17,3 +17,11 @@ print_string "==== bytecode end =====================================\n\n";; print_string "=== bytecode start: aarch64/mldsa/mldsa_poly_chknorm.o ===\n";; print_literal_from_elf "aarch64/mldsa/mldsa_poly_chknorm.o";; print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: aarch64/mldsa/mldsa_polyz_unpack_17.o ===\n";; +print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_17.o";; +print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: aarch64/mldsa/mldsa_polyz_unpack_19.o ===\n";; +print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_19.o";; +print_string "==== bytecode end =====================================\n\n";; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml new file mode 100644 index 000000000..38e0cff90 --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml @@ -0,0 +1,338 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Functional correctness of polyz_unpack_17: *) +(* Unpack polynomial z with 18-bit packed coefficients (GAMMA1 = 2^17) *) +(* Maps packed [0, 2^18-1] to signed [-(2^17-1), 2^17] via GAMMA1 - x *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "aarch64/proofs/aarch64_utils.ml";; +needs "aarch64/proofs/mldsa_polyz_unpack_consts.ml";; + +(**** print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_17.o";; + ****) + +let mldsa_polyz_unpack_17_mc = define_assert_from_elf + "mldsa_polyz_unpack_17_mc" "aarch64/mldsa/mldsa_polyz_unpack_17.o" +(*** BYTECODE START ***) +[ + 0x3dc00058; (* arm_LDR Q24 X2 (Immediate_Offset (word 0)) *) + 0x3dc00459; (* arm_LDR Q25 X2 (Immediate_Offset (word 16)) *) + 0x3dc0085a; (* arm_LDR Q26 X2 (Immediate_Offset (word 32)) *) + 0x3dc00c5b; (* arm_LDR Q27 X2 (Immediate_Offset (word 48)) *) + 0xd2c01fc3; (* arm_MOVZ X3 (word 254) 32 *) + 0x4e081c7c; (* arm_INS_GEN Q28 X3 0 64 *) + 0xd2801f83; (* arm_MOV X3 (rvalue (word 252)) *) + 0xf2c01f43; (* arm_MOVK X3 (word 250) 32 *) + 0x4e181c7c; (* arm_INS_GEN Q28 X3 64 64 *) + 0x4f00d47d; (* arm_MOVI Q29 (word 1125895612137471) *) + 0x4f00445e; (* arm_MOVI Q30 (word 562949953552384) *) + 0xd2800209; (* arm_MOV X9 (rvalue (word 16)) *) + 0x4c40a020; (* arm_LDP Q0 Q1 X1 No_Offset *) + 0x91005021; (* arm_ADD X1 X1 (rvalue (word 20)) *) + 0x4cdf7022; (* arm_LDR Q2 X1 (Postimmediate_Offset (word 16)) *) + 0x4e180004; (* arm_TBL Q4 [Q0] Q24 128 *) + 0x4e192005; (* arm_TBL2 Q5 Q0 Q1 Q25 128 *) + 0x4e1a0026; (* arm_TBL Q6 [Q1] Q26 128 *) + 0x4e1b2027; (* arm_TBL2 Q7 Q1 Q2 Q27 128 *) + 0x6ebc4484; (* arm_USHL_VEC Q4 Q4 Q28 32 128 *) + 0x4e3d1c84; (* arm_AND_VEC Q4 Q4 Q29 128 *) + 0x6ea487c4; (* arm_SUB_VEC Q4 Q30 Q4 32 128 *) + 0x6ebc44a5; (* arm_USHL_VEC Q5 Q5 Q28 32 128 *) + 0x4e3d1ca5; (* arm_AND_VEC Q5 Q5 Q29 128 *) + 0x6ea587c5; (* arm_SUB_VEC Q5 Q30 Q5 32 128 *) + 0x6ebc44c6; (* arm_USHL_VEC Q6 Q6 Q28 32 128 *) + 0x4e3d1cc6; (* arm_AND_VEC Q6 Q6 Q29 128 *) + 0x6ea687c6; (* arm_SUB_VEC Q6 Q30 Q6 32 128 *) + 0x6ebc44e7; (* arm_USHL_VEC Q7 Q7 Q28 32 128 *) + 0x4e3d1ce7; (* arm_AND_VEC Q7 Q7 Q29 128 *) + 0x6ea787c7; (* arm_SUB_VEC Q7 Q30 Q7 32 128 *) + 0x3d800405; (* arm_STR Q5 X0 (Immediate_Offset (word 16)) *) + 0x3d800806; (* arm_STR Q6 X0 (Immediate_Offset (word 32)) *) + 0x3d800c07; (* arm_STR Q7 X0 (Immediate_Offset (word 48)) *) + 0x3c840404; (* arm_STR Q4 X0 (Postimmediate_Offset (word 64)) *) + 0xf1000529; (* arm_SUBS X9 X9 (rvalue (word 1)) *) + 0x54fffd01; (* arm_BNE (word 2097056) *) + 0xd65f03c0 (* arm_RET X30 *) +];; +(*** BYTECODE END ***) + +let MLDSA_POLYZ_UNPACK_17_EXEC = ARM_MK_EXEC_RULE mldsa_polyz_unpack_17_mc;; + +(* ------------------------------------------------------------------------- *) +(* Code length constants *) +(* ------------------------------------------------------------------------- *) + +let LENGTH_MLDSA_POLYZ_UNPACK_17_MC = + REWRITE_CONV[mldsa_polyz_unpack_17_mc] `LENGTH mldsa_polyz_unpack_17_mc` + |> CONV_RULE (RAND_CONV LENGTH_CONV);; + +let MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH = new_definition + `MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH = 0`;; + +let MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH = new_definition + `MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH = 4`;; + +let MLDSA_POLYZ_UNPACK_17_CORE_START = new_definition + `MLDSA_POLYZ_UNPACK_17_CORE_START = MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH`;; + +let MLDSA_POLYZ_UNPACK_17_CORE_END = new_definition + `MLDSA_POLYZ_UNPACK_17_CORE_END = + LENGTH mldsa_polyz_unpack_17_mc - MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH`;; + +let LENGTH_SIMPLIFY_CONV = + REWRITE_CONV[LENGTH_MLDSA_POLYZ_UNPACK_17_MC; + MLDSA_POLYZ_UNPACK_17_CORE_START; MLDSA_POLYZ_UNPACK_17_CORE_END; + MLDSA_POLYZ_UNPACK_17_PREAMBLE_LENGTH; + MLDSA_POLYZ_UNPACK_17_POSTAMBLE_LENGTH] THENC + NUM_REDUCE_CONV THENC REWRITE_CONV [ADD_0];; + +(* ------------------------------------------------------------------------- *) +(* D=18 instantiations for SIMD infrastructure *) +(* ------------------------------------------------------------------------- *) + +let BASE_SIMPS_D18 = mk_base_simps 18;; +let NUM_OF_WORDLIST_SPLIT_18_256 = mk_split_theorem 18 256 16;; +let READ_MEMORY_WBYTES_SPLIT_128_128_32 = prove + (`t < 2 EXP 288 + ==> (read (memory :> wbytes a) (s:armstate) = (word t : 288 word) <=> + read (memory :> bytes128 a) s = (word (t MOD 2 EXP 128) : int128) /\ + read (memory :> bytes128 (word_add a (word 16))) s = + (word ((t DIV 2 EXP 128) MOD 2 EXP 128) : int128) /\ + read (memory :> bytes32 (word_add a (word 32))) s = + (word (t DIV 2 EXP 256) : int32))`, + let split_16_20 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC + DEPTH_CONV NUM_MULT_CONV) + (INST [`16`,`k:num`; `20`,`l:num`] READ_BYTES_SPLIT_ANY) in + let split_16_4 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC + DEPTH_CONV NUM_MULT_CONV) + (INST [`16`,`k:num`; `4`,`l:num`] READ_BYTES_SPLIT_ANY) in + STRIP_TAC THEN + REWRITE_TAC[BYTES128_WBYTES; BYTES32_WBYTES; GSYM VAL_EQ; + VAL_READ_WBYTES; READ_COMPONENT_COMPOSE] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[split_16_20] THEN REWRITE_TAC[split_16_4] THEN + REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[DIV_DIV; GSYM EXP_ADD] THEN CONV_TAC NUM_REDUCE_CONV THEN + IMP_REWRITE_TAC[VAL_WORD_EXACT] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN ASM_ARITH_TAC);; +let WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D18 = mk_subword_cases 18 16;; + +(* Rewrite overlap expression to word_subword of full 288-bit word *) +let OVERLAP_SIMP_D18 = prove + (`word(((t DIV 2 EXP 128) MOD 2 EXP 128) DIV 2 EXP 32 + + (t DIV 2 EXP 256) * 2 EXP 96) : int128 = + word_subword (word t : 288 word) (160, 128)`, + REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD; VAL_WORD] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[DIV_MOD; MOD_MOD_EXP_MIN; GSYM EXP_ADD; DIV_DIV] THEN + CONV_TAC NUM_REDUCE_CONV THEN REWRITE_TAC[MOD_MOD_REFL] THEN + REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV + (SPECL [`t:num`; `1461501637330902918203684832716283019655932542976`; + `340282366920938463463374607431768211456`] DIV_MOD))] THEN + REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV + (SPECL [`t:num`; `1461501637330902918203684832716283019655932542976`; + `79228162514264337593543950336`] DIV_MOD))] THEN + REWRITE_TAC[GSYM(CONV_RULE NUM_REDUCE_CONV + (SPECL [`t:num`; `1461501637330902918203684832716283019655932542976`; + `79228162514264337593543950336`] DIV_DIV))] THEN + ONCE_REWRITE_TAC[MULT_SYM] THEN + REWRITE_TAC[ONCE_REWRITE_RULE[ADD_SYM] DIVISION_SIMP]);; + +(* For D=18: bytes128@16 + bytes32@32 -> bytes128@20 *) +let BYTES128_FROM_OVERLAP_32 = prove + (`m16 < 2 EXP 128 /\ + read (memory :> bytes128 (word_add a (word 16))) (s:armstate) = (word m16 : int128) /\ + read (memory :> bytes32 (word_add a (word 32))) (s:armstate) = (word m32 : int32) + ==> read (memory :> bytes128 (word_add a (word 20))) s = + (word(m16 DIV 2 EXP 32 + m32 * 2 EXP 96) : int128)`, + REWRITE_TAC[GSYM VAL_EQ; BYTES128_WBYTES; BYTES32_WBYTES; READ_COMPONENT_COMPOSE; + VAL_READ_WBYTES; VAL_WORD] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + ABBREV_TAC `m = read memory (s:armstate)` THEN + ONCE_REWRITE_TAC[split_k_l_at 16 4 12] THEN + ONCE_REWRITE_TAC[split_k_l_at 20 12 4] THEN + REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + CONV_TAC NUM_REDUCE_CONV THEN STRIP_TAC THEN ASM_REWRITE_TAC[] THEN + ASM_SIMP_TAC[MOD_LT] THEN + SUBGOAL_THEN `m16 DIV 4294967296 < 79228162514264337593543950336` ASSUME_TAC + THENL [ASM_SIMP_TAC[RDIV_LT_EQ; ARITH_EQ] THEN ASM_ARITH_TAC; ALL_TAC] THEN + SUBGOAL_THEN `m32 MOD 4294967296 < 4294967296` ASSUME_TAC + THENL [REWRITE_TAC[MOD_LT_EQ] THEN ARITH_TAC; ALL_TAC] THEN + SUBGOAL_THEN `m16 DIV 340282366920938463463374607431768211456 = 0` ASSUME_TAC + THENL [MATCH_MP_TAC DIV_LT THEN ASM_REWRITE_TAC[]; ALL_TAC] THEN + REWRITE_TAC[ARITH_RULE `340282366920938463463374607431768211456 = + 79228162514264337593543950336 * 4294967296`] THEN + CONJ_TAC THENL [ + REWRITE_TAC[MOD_MOD; MOD_MULT_ADD] THEN ASM_SIMP_TAC[MOD_LT]; + REWRITE_TAC[GSYM DIV_MOD] THEN SIMP_TAC[DIV_MULT_ADD; ARITH_EQ] THEN + REWRITE_TAC[CONV_RULE NUM_REDUCE_CONV + (SPECL [`m16:num`; `4294967296`; `79228162514264337593543950336`] DIV_DIV)] THEN + ASM_REWRITE_TAC[ADD_CLAUSES; MOD_MULT_ADD] THEN ASM_SIMP_TAC[MOD_LT]]);; + +(* D=18 overlap tactic: derives bytes128@20, discharges bound, rewrites to word_subword *) +let BOUND_REORDER = TAUT `(p /\ q ==> r) <=> (q ==> p ==> r)`;; + +let DERIVE_OVERLAP_TAC_D18 i = + let chunk = 36 in + let off16 = chunk*i + 16 and off32 = chunk*i + 32 in + let w16 = mk_small_numeral off16 and w32 = mk_small_numeral off32 in + let has t th = can (find_term ((=) t)) (concl th) in + let a_val = mk_comb(mk_comb(`word_add:int64->int64->int64`, `b:int64`), + mk_comb(`word:num->int64`, mk_small_numeral (chunk * i))) in + let inst = INST [a_val, `a:int64`; `s0:armstate`, `s:armstate`] BYTES128_FROM_OVERLAP_32 in + let thm = CONV_RULE (ONCE_DEPTH_CONV(GEN_REWRITE_CONV I [WORD_ADD_ASSOC_CONSTS]) THENC + DEPTH_CONV NUM_ADD_CONV) inst in + let thm_reord = CONV_RULE (REWR_CONV BOUND_REORDER) thm in + FIRST_ASSUM(fun th128 -> + if not(has w16 th128 && has `bytes128` th128 && has `b:int64` th128) then failwith "" else + FIRST_ASSUM(fun th32 -> + if not(has w32 th32 && has `bytes32` th32 && has `b:int64` th32) then failwith "" else + let matched = MATCH_MP thm_reord (CONJ th128 th32) in + MP_TAC matched THEN + ANTS_TAC THENL [REWRITE_TAC[MOD_LT_EQ; EXP_EQ_0; ARITH_EQ] THEN ARITH_TAC; + DISCH_THEN(ASSUME_TAC o + CONV_RULE(RAND_CONV(REWRITE_CONV [OVERLAP_SIMP_D18])))]));; + +(* ------------------------------------------------------------------------- *) +(* Core correctness theorem *) +(* ------------------------------------------------------------------------- *) + +let MLDSA_POLYZ_UNPACK_17_CORRECT = prove + (`!r b t l pc. + LENGTH l = 256 /\ + nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_17_mc) (r,1024) /\ + nonoverlapping (b,576) (r,1024) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_17_mc /\ + read PC s = word (pc + MLDSA_POLYZ_UNPACK_17_CORE_START) /\ + C_ARGUMENTS [r; b; t] s /\ + read(memory :> bytes(t,64)) s = + num_of_wordlist mldsa_polyz_unpack_17_indices /\ + read(memory :> bytes(b,576)) s = num_of_wordlist l) + (\s. read PC s = word(pc + MLDSA_POLYZ_UNPACK_17_CORE_END) /\ + read(memory :> bytes(r,1024)) s = + num_of_wordlist (MAP zunpack17 l)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r,1024)])`, + CONV_TAC LENGTH_SIMPLIFY_CONV THEN + MAP_EVERY X_GEN_TAC [`r:int64`; `b:int64`; `t:int64`; + `l:(18 word) list`; `pc:num`] THEN + REWRITE_TAC[C_ARGUMENTS; MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; + NONOVERLAPPING_CLAUSES] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + (*** Ghost Q28: constructed via two INS_GEN partial writes ***) + GHOST_INTRO_TAC `q28_init:int128` `read Q28` THEN + + ENSURES_INIT_TAC "s0" THEN + + (*** Expand table precondition into 4 x bytes128 reads ***) + FIRST_X_ASSUM(MP_TAC o check (can (term_match [] + `read(memory :> bytes(t:int64,64)) s = x`) o concl)) THEN + REWRITE_TAC[mldsa_polyz_unpack_17_indices] THEN + REPLICATE_TAC 4 + (GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) + [GSYM NUM_OF_PAIR_WORDLIST]) THEN + REWRITE_TAC[pair_wordlist] THEN + CONV_TAC WORD_REDUCE_CONV THEN + CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + REWRITE_TAC[GSYM BYTES128_WBYTES] THEN + STRIP_TAC THEN + + (*** Split 256 18-bit coefficients into 16 chunks of 16 as 288-bit words ***) + UNDISCH_TAC `read(memory :> bytes(b,576)) s0 = num_of_wordlist(l:(18 word) list)` THEN + IMP_REWRITE_TAC [NUM_OF_WORDLIST_SPLIT_18_256] THEN + CONV_TAC (ONCE_DEPTH_CONV LIST_OF_SEQ_CONV) THEN + REWRITE_TAC [MAP; o_DEF] THEN + CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + + (*** Split each 288-bit wbytes into bytes128 + bytes128 + bytes32 ***) + IMP_REWRITE_TAC [READ_MEMORY_WBYTES_SPLIT_128_128_32] THEN + MAP_EVERY (fun n -> SUBGOAL_THEN (subst[mk_small_numeral n,`k:num`] + `num_of_wordlist (SUB_LIST (16 * k,16) (l : (18 word) list)) < 2 EXP 288`) + (fun th -> REWRITE_TAC[th]) THENL [ + TRANS_TAC LTE_TRANS (subst[mk_small_numeral n,`k:num`] + `2 EXP (dimindex(:18) * LENGTH(SUB_LIST(16*k,16) (l : (18 word) list)))`) THEN + REWRITE_TAC[NUM_OF_WORDLIST_BOUND] THEN + REWRITE_TAC[LENGTH_SUB_LIST; DIMINDEX_CONV `dimindex (:18)`] THEN + ASM_SIMP_TAC [] THEN NUM_REDUCE_TAC; + ALL_TAC]) (0--15) THEN + REWRITE_TAC [WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC (TOP_SWEEP_CONV NUM_ADD_CONV) THEN + STRIP_TAC THEN + + (*** Derive overlapping bytes128 reads at offset 20 for each chunk ***) + MAP_EVERY DERIVE_OVERLAP_TAC_D18 (0--15) THEN + + (*** Gather LENGTH assumptions for sublists ***) + MAP_EVERY (fun i -> SUBGOAL_THEN + (subst [mk_small_numeral (16 * i), `i: num`] + `LENGTH (SUB_LIST (i, 16) (l : (18 word) list)) = 16`) ASSUME_TAC + THENL [ASM_REWRITE_TAC [LENGTH_SUB_LIST] THEN NUM_REDUCE_TAC; ALL_TAC]) + (0 -- 15) THEN + + (*** Symbolic execution with per-step simplification ***) + MAP_UNTIL_TARGET_PC (fun n -> + ARM_STEPS_TAC MLDSA_POLYZ_UNPACK_17_EXEC [n] THEN + SIMD_SIMPLIFY_TAC (map GSYM BASE_SIMPS_D18) THEN + SIMP_ZUNPACK_TAC 18 ZUNPACK17_CORRECT) 1 THEN + + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Fold output back to MAP zunpack17 l ***) + REPEAT (FIRST_X_ASSUM(MP_TAC o check + (can (term_match [] `read (memory :> bytes128 r) s0 = xxx`) o concl))) THEN + TRY (IMP_REWRITE_TAC WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D18) THEN + UNDISCH_THEN `LENGTH (l : (18 word) list) = 256` + (fun th -> CONV_TAC (TOP_SWEEP_CONV (EL_SUB_LIST_CONV th)) THEN ASSUME_TAC th) THEN + REPEAT DISCH_TAC THEN + GEN_REWRITE_TAC (RAND_CONV o RAND_CONV o RAND_CONV) [GSYM LIST_OF_SEQ_EQ_SELF] THEN + ASM_REWRITE_TAC[LENGTH_MAP] THEN + CONV_TAC (TOP_SWEEP_CONV LIST_OF_SEQ_CONV) THEN + ASM_REWRITE_TAC [MAP] THEN + REPLICATE_TAC 2 (CONV_TAC (ONCE_REWRITE_CONV [GSYM NUM_OF_PAIR_WORDLIST])) THEN + REWRITE_TAC[pair_wordlist] THEN + CONV_TAC (ONCE_DEPTH_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + ASM_REWRITE_TAC[GSYM BYTES128_WBYTES]);; + +(* ------------------------------------------------------------------------- *) +(* Subroutine correctness *) +(* ------------------------------------------------------------------------- *) + +(* NOTE: This must be kept in sync with the CBMC specification + * in mldsa/src/native/aarch64/src/arith_native_aarch64.h *) + +let MLDSA_POLYZ_UNPACK_17_SUBROUTINE_CORRECT = prove + (`!r b t l pc returnaddress. + LENGTH l = 256 /\ + nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_17_mc) (r,1024) /\ + nonoverlapping (b,576) (r,1024) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_17_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; b; t] s /\ + read(memory :> bytes(t,64)) s = + num_of_wordlist mldsa_polyz_unpack_17_indices /\ + read(memory :> bytes(b,576)) s = num_of_wordlist l) + (\s. read PC s = returnaddress /\ + read(memory :> bytes(r,1024)) s = + num_of_wordlist (MAP zunpack17 l) /\ + (!i. i < 256 ==> + --(&(2 EXP 17) - &1) <= ival(EL i (MAP zunpack17 l)) /\ + ival(EL i (MAP zunpack17 l)) <= &(2 EXP 17))) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r,1024)])`, + CONV_TAC LENGTH_SIMPLIFY_CONV THEN + ARM_ADD_RETURN_NOSTACK_TAC MLDSA_POLYZ_UNPACK_17_EXEC + (CONV_RULE LENGTH_SIMPLIFY_CONV MLDSA_POLYZ_UNPACK_17_CORRECT) THEN + REPEAT STRIP_TAC THEN + MP_TAC(CONV_RULE NUM_REDUCE_CONV + (ISPECL [`l:(18 word) list`; `i:num`] ZUNPACK17_MAP_BOUND)) THEN + ASM_REWRITE_TAC[] THEN SIMP_TAC[]);; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml new file mode 100644 index 000000000..afa25f82d --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml @@ -0,0 +1,255 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Functional correctness of polyz_unpack_19: *) +(* Unpack polynomial z with 20-bit packed coefficients (GAMMA1 = 2^19) *) +(* Maps packed [0, 2^20-1] to signed [-(2^19-1), 2^19] via GAMMA1 - x *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "aarch64/proofs/aarch64_utils.ml";; +needs "aarch64/proofs/mldsa_polyz_unpack_consts.ml";; + +(**** print_literal_from_elf "aarch64/mldsa/mldsa_polyz_unpack_19.o";; + ****) + +let mldsa_polyz_unpack_19_mc = define_assert_from_elf + "mldsa_polyz_unpack_19_mc" "aarch64/mldsa/mldsa_polyz_unpack_19.o" +(*** BYTECODE START ***) +[ + 0x3dc00058; (* arm_LDR Q24 X2 (Immediate_Offset (word 0)) *) + 0x3dc00459; (* arm_LDR Q25 X2 (Immediate_Offset (word 16)) *) + 0x3dc0085a; (* arm_LDR Q26 X2 (Immediate_Offset (word 32)) *) + 0x3dc00c5b; (* arm_LDR Q27 X2 (Immediate_Offset (word 48)) *) + 0xd2c01f83; (* arm_MOVZ X3 (word 252) 32 *) + 0x4e080c7c; (* arm_DUP_GEN Q28 X3 64 128 *) + 0x4f00d5fd; (* arm_MOVI Q29 (word 4503595333451775) *) + 0x4f00451e; (* arm_MOVI Q30 (word 2251799814209536) *) + 0xd2800209; (* arm_MOV X9 (rvalue (word 16)) *) + 0x4c40a020; (* arm_LDP Q0 Q1 X1 No_Offset *) + 0x91006021; (* arm_ADD X1 X1 (rvalue (word 24)) *) + 0x4cdf7022; (* arm_LDR Q2 X1 (Postimmediate_Offset (word 16)) *) + 0x4e180004; (* arm_TBL Q4 [Q0] Q24 128 *) + 0x4e192005; (* arm_TBL2 Q5 Q0 Q1 Q25 128 *) + 0x4e1a0026; (* arm_TBL Q6 [Q1] Q26 128 *) + 0x4e1b2027; (* arm_TBL2 Q7 Q1 Q2 Q27 128 *) + 0x6ebc4484; (* arm_USHL_VEC Q4 Q4 Q28 32 128 *) + 0x4e3d1c84; (* arm_AND_VEC Q4 Q4 Q29 128 *) + 0x6ea487c4; (* arm_SUB_VEC Q4 Q30 Q4 32 128 *) + 0x6ebc44a5; (* arm_USHL_VEC Q5 Q5 Q28 32 128 *) + 0x4e3d1ca5; (* arm_AND_VEC Q5 Q5 Q29 128 *) + 0x6ea587c5; (* arm_SUB_VEC Q5 Q30 Q5 32 128 *) + 0x6ebc44c6; (* arm_USHL_VEC Q6 Q6 Q28 32 128 *) + 0x4e3d1cc6; (* arm_AND_VEC Q6 Q6 Q29 128 *) + 0x6ea687c6; (* arm_SUB_VEC Q6 Q30 Q6 32 128 *) + 0x6ebc44e7; (* arm_USHL_VEC Q7 Q7 Q28 32 128 *) + 0x4e3d1ce7; (* arm_AND_VEC Q7 Q7 Q29 128 *) + 0x6ea787c7; (* arm_SUB_VEC Q7 Q30 Q7 32 128 *) + 0x3d800405; (* arm_STR Q5 X0 (Immediate_Offset (word 16)) *) + 0x3d800806; (* arm_STR Q6 X0 (Immediate_Offset (word 32)) *) + 0x3d800c07; (* arm_STR Q7 X0 (Immediate_Offset (word 48)) *) + 0x3c840404; (* arm_STR Q4 X0 (Postimmediate_Offset (word 64)) *) + 0xf1000529; (* arm_SUBS X9 X9 (rvalue (word 1)) *) + 0x54fffd01; (* arm_BNE (word 2097056) *) + 0xd65f03c0 (* arm_RET X30 *) +];; +(*** BYTECODE END ***) + +let MLDSA_POLYZ_UNPACK_19_EXEC = ARM_MK_EXEC_RULE mldsa_polyz_unpack_19_mc;; + +(* ------------------------------------------------------------------------- *) +(* Code length constants *) +(* ------------------------------------------------------------------------- *) + +let LENGTH_MLDSA_POLYZ_UNPACK_19_MC = + REWRITE_CONV[mldsa_polyz_unpack_19_mc] `LENGTH mldsa_polyz_unpack_19_mc` + |> CONV_RULE (RAND_CONV LENGTH_CONV);; + +let MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH = new_definition + `MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH = 0`;; + +let MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH = new_definition + `MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH = 4`;; + +let MLDSA_POLYZ_UNPACK_19_CORE_START = new_definition + `MLDSA_POLYZ_UNPACK_19_CORE_START = MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH`;; + +let MLDSA_POLYZ_UNPACK_19_CORE_END = new_definition + `MLDSA_POLYZ_UNPACK_19_CORE_END = + LENGTH mldsa_polyz_unpack_19_mc - MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH`;; + +let LENGTH_SIMPLIFY_CONV_19 = + REWRITE_CONV[LENGTH_MLDSA_POLYZ_UNPACK_19_MC; + MLDSA_POLYZ_UNPACK_19_CORE_START; MLDSA_POLYZ_UNPACK_19_CORE_END; + MLDSA_POLYZ_UNPACK_19_PREAMBLE_LENGTH; + MLDSA_POLYZ_UNPACK_19_POSTAMBLE_LENGTH] THENC + NUM_REDUCE_CONV THENC REWRITE_CONV [ADD_0];; + +(* ------------------------------------------------------------------------- *) +(* D=20 instantiations for SIMD infrastructure *) +(* ------------------------------------------------------------------------- *) + +let BASE_SIMPS_D20 = mk_base_simps 20;; +let NUM_OF_WORDLIST_SPLIT_20_256 = mk_split_theorem 20 256 16;; +let READ_MEMORY_WBYTES_SPLIT_128_128_64 = prove + (`t < 2 EXP 320 + ==> (read (memory :> wbytes a) (s:armstate) = (word t : 320 word) <=> + read (memory :> bytes128 a) s = (word (t MOD 2 EXP 128) : int128) /\ + read (memory :> bytes128 (word_add a (word 16))) s = + (word ((t DIV 2 EXP 128) MOD 2 EXP 128) : int128) /\ + read (memory :> bytes64 (word_add a (word 32))) s = + (word (t DIV 2 EXP 256) : int64))`, + let split_16_24 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC + DEPTH_CONV NUM_MULT_CONV) + (INST [`16`,`k:num`; `24`,`l:num`] READ_BYTES_SPLIT_ANY) in + let split_16_8 = CONV_RULE (ONCE_DEPTH_CONV NUM_ADD_CONV THENC + DEPTH_CONV NUM_MULT_CONV) + (INST [`16`,`k:num`; `8`,`l:num`] READ_BYTES_SPLIT_ANY) in + STRIP_TAC THEN + REWRITE_TAC[BYTES128_WBYTES; BYTES64_WBYTES; GSYM VAL_EQ; + VAL_READ_WBYTES; READ_COMPONENT_COMPOSE] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[split_16_24] THEN REWRITE_TAC[split_16_8] THEN + REWRITE_TAC[WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[DIV_DIV; GSYM EXP_ADD] THEN CONV_TAC NUM_REDUCE_CONV THEN + IMP_REWRITE_TAC[VAL_WORD_EXACT] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN ASM_ARITH_TAC);; +let WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D20 = mk_subword_cases 20 16;; + +(* ------------------------------------------------------------------------- *) +(* Core correctness theorem *) +(* ------------------------------------------------------------------------- *) + +let MLDSA_POLYZ_UNPACK_19_CORRECT = prove + (`!r b t l pc. + LENGTH l = 256 /\ + nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_19_mc) (r,1024) /\ + nonoverlapping (b,640) (r,1024) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_19_mc /\ + read PC s = word (pc + MLDSA_POLYZ_UNPACK_19_CORE_START) /\ + C_ARGUMENTS [r; b; t] s /\ + read(memory :> bytes(t,64)) s = + num_of_wordlist mldsa_polyz_unpack_19_indices /\ + read(memory :> bytes(b,640)) s = num_of_wordlist l) + (\s. read PC s = word(pc + MLDSA_POLYZ_UNPACK_19_CORE_END) /\ + read(memory :> bytes(r,1024)) s = + num_of_wordlist (MAP zunpack19 l)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r,1024)])`, + CONV_TAC LENGTH_SIMPLIFY_CONV_19 THEN + MAP_EVERY X_GEN_TAC [`r:int64`; `b:int64`; `t:int64`; + `l:(20 word) list`; `pc:num`] THEN + REWRITE_TAC[C_ARGUMENTS; MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; + NONOVERLAPPING_CLAUSES] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + ENSURES_INIT_TAC "s0" THEN + + (*** Expand table precondition into 4 x bytes128 reads ***) + FIRST_X_ASSUM(MP_TAC o check (can (term_match [] + `read(memory :> bytes(t:int64,64)) s = x`) o concl)) THEN + REWRITE_TAC[mldsa_polyz_unpack_19_indices] THEN + REPLICATE_TAC 4 + (GEN_REWRITE_TAC (LAND_CONV o ONCE_DEPTH_CONV) + [GSYM NUM_OF_PAIR_WORDLIST]) THEN + REWRITE_TAC[pair_wordlist] THEN + CONV_TAC WORD_REDUCE_CONV THEN + CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + REWRITE_TAC[GSYM BYTES128_WBYTES] THEN + STRIP_TAC THEN + + (*** Split 256 20-bit coefficients into 16 chunks of 16 as 320-bit words ***) + UNDISCH_TAC `read(memory :> bytes(b,640)) s0 = num_of_wordlist(l:(20 word) list)` THEN + IMP_REWRITE_TAC [NUM_OF_WORDLIST_SPLIT_20_256] THEN + CONV_TAC (ONCE_DEPTH_CONV LIST_OF_SEQ_CONV) THEN + REWRITE_TAC [MAP; o_DEF] THEN + CONV_TAC(LAND_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + + (*** Split each 320-bit wbytes into bytes128 + bytes128 + bytes64 ***) + IMP_REWRITE_TAC [READ_MEMORY_WBYTES_SPLIT_128_128_64] THEN + MAP_EVERY (fun n -> SUBGOAL_THEN (subst[mk_small_numeral n,`k:num`] + `num_of_wordlist (SUB_LIST (16 * k,16) (l : (20 word) list)) < 2 EXP 320`) + (fun th -> REWRITE_TAC[th]) THENL [ + TRANS_TAC LTE_TRANS (subst[mk_small_numeral n,`k:num`] + `2 EXP (dimindex(:20) * LENGTH(SUB_LIST(16*k,16) (l : (20 word) list)))`) THEN + REWRITE_TAC[NUM_OF_WORDLIST_BOUND] THEN + REWRITE_TAC[LENGTH_SUB_LIST; DIMINDEX_CONV `dimindex (:20)`] THEN + ASM_SIMP_TAC [] THEN NUM_REDUCE_TAC; + ALL_TAC]) (0--15) THEN + REWRITE_TAC [WORD_ADD_ASSOC_CONSTS] THEN CONV_TAC (TOP_SWEEP_CONV NUM_ADD_CONV) THEN + STRIP_TAC THEN + + (*** Derive overlapping bytes128 reads at offset 24 for each chunk ***) + MAP_EVERY (DERIVE_OVERLAP_TAC BYTES128_FROM_OVERLAP_64 40) (0--15) THEN + + (*** Gather LENGTH assumptions for sublists ***) + MAP_EVERY (fun i -> SUBGOAL_THEN + (subst [mk_small_numeral (16 * i), `i: num`] + `LENGTH (SUB_LIST (i, 16) (l : (20 word) list)) = 16`) ASSUME_TAC + THENL [ASM_REWRITE_TAC [LENGTH_SUB_LIST] THEN NUM_REDUCE_TAC; ALL_TAC]) + (0 -- 15) THEN + + (*** Symbolic execution with per-step simplification ***) + MAP_UNTIL_TARGET_PC (fun n -> + ARM_STEPS_TAC MLDSA_POLYZ_UNPACK_19_EXEC [n] THEN + SIMD_SIMPLIFY_TAC (map GSYM BASE_SIMPS_D20) THEN + SIMP_ZUNPACK_TAC 20 ZUNPACK19_CORRECT) 1 THEN + + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Fold output back to MAP zunpack19 l ***) + REPEAT (FIRST_X_ASSUM(MP_TAC o check + (can (term_match [] `read (memory :> bytes128 r) s0 = xxx`) o concl))) THEN + TRY (IMP_REWRITE_TAC WORD_SUBWORD_NUM_OF_WORDLIST_CASES_D20) THEN + UNDISCH_THEN `LENGTH (l : (20 word) list) = 256` + (fun th -> CONV_TAC (TOP_SWEEP_CONV (EL_SUB_LIST_CONV th)) THEN ASSUME_TAC th) THEN + REPEAT DISCH_TAC THEN + GEN_REWRITE_TAC (RAND_CONV o RAND_CONV o RAND_CONV) [GSYM LIST_OF_SEQ_EQ_SELF] THEN + ASM_REWRITE_TAC[LENGTH_MAP] THEN + CONV_TAC (TOP_SWEEP_CONV LIST_OF_SEQ_CONV) THEN + ASM_REWRITE_TAC [MAP] THEN + REPLICATE_TAC 2 (CONV_TAC (ONCE_REWRITE_CONV [GSYM NUM_OF_PAIR_WORDLIST])) THEN + REWRITE_TAC[pair_wordlist] THEN + CONV_TAC (ONCE_DEPTH_CONV BYTES_EQ_NUM_OF_WORDLIST_EXPAND_CONV) THEN + ASM_REWRITE_TAC[GSYM BYTES128_WBYTES]);; + +(* ------------------------------------------------------------------------- *) +(* Subroutine correctness *) +(* ------------------------------------------------------------------------- *) + +(* NOTE: This must be kept in sync with the CBMC specification + * in mldsa/src/native/aarch64/src/arith_native_aarch64.h *) + +let MLDSA_POLYZ_UNPACK_19_SUBROUTINE_CORRECT = prove + (`!r b t l pc returnaddress. + LENGTH l = 256 /\ + nonoverlapping (word pc,LENGTH mldsa_polyz_unpack_19_mc) (r,1024) /\ + nonoverlapping (b,640) (r,1024) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_polyz_unpack_19_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; b; t] s /\ + read(memory :> bytes(t,64)) s = + num_of_wordlist mldsa_polyz_unpack_19_indices /\ + read(memory :> bytes(b,640)) s = num_of_wordlist l) + (\s. read PC s = returnaddress /\ + read(memory :> bytes(r,1024)) s = + num_of_wordlist (MAP zunpack19 l) /\ + (!i. i < 256 ==> + --(&(2 EXP 19) - &1) <= ival(EL i (MAP zunpack19 l)) /\ + ival(EL i (MAP zunpack19 l)) <= &(2 EXP 19))) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r,1024)])`, + CONV_TAC LENGTH_SIMPLIFY_CONV_19 THEN + ARM_ADD_RETURN_NOSTACK_TAC MLDSA_POLYZ_UNPACK_19_EXEC + (CONV_RULE LENGTH_SIMPLIFY_CONV_19 MLDSA_POLYZ_UNPACK_19_CORRECT) THEN + REPEAT STRIP_TAC THEN + MP_TAC(CONV_RULE NUM_REDUCE_CONV + (ISPECL [`l:(20 word) list`; `i:num`] ZUNPACK19_MAP_BOUND)) THEN + ASM_REWRITE_TAC[] THEN SIMP_TAC[]);; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml new file mode 100644 index 000000000..a5a02e2c7 --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml @@ -0,0 +1,39 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* + * WARNING: This file is auto-generated from scripts/autogen + * in the mldsa-native repository. + * Do not modify it directly. + *) + +(* + * TBL index tables for polyz_unpack_{17,19} + * See autogen for details. + *) + +let mldsa_polyz_unpack_17_indices = (REWRITE_RULE[MAP] o define) + `mldsa_polyz_unpack_17_indices:byte list = MAP word [ + 0; 1; 2; 255; 2; 3; 4; 255; + 4; 5; 6; 255; 6; 7; 8; 255; + 9; 10; 11; 255; 11; 12; 13; 255; + 13; 14; 15; 255; 15; 16; 17; 255; + 2; 3; 4; 255; 4; 5; 6; 255; + 6; 7; 8; 255; 8; 9; 10; 255; + 11; 12; 13; 255; 13; 14; 15; 255; + 15; 28; 29; 255; 29; 30; 31; 255 +]`;; + +let mldsa_polyz_unpack_19_indices = (REWRITE_RULE[MAP] o define) + `mldsa_polyz_unpack_19_indices:byte list = MAP word [ + 0; 1; 2; 255; 2; 3; 4; 255; + 5; 6; 7; 255; 7; 8; 9; 255; + 10; 11; 12; 255; 12; 13; 14; 255; + 15; 16; 17; 255; 17; 18; 19; 255; + 4; 5; 6; 255; 6; 7; 8; 255; + 9; 10; 11; 255; 11; 12; 13; 255; + 14; 15; 24; 255; 24; 25; 26; 255; + 27; 28; 29; 255; 29; 30; 31; 255 +]`;; diff --git a/proofs/hol_light/common/mldsa_specs.ml b/proofs/hol_light/common/mldsa_specs.ml index 661c42f33..e39a17a6b 100644 --- a/proofs/hol_light/common/mldsa_specs.ml +++ b/proofs/hol_light/common/mldsa_specs.ml @@ -843,3 +843,315 @@ let SIMD_SIMPLIFY_ABBREV_TAC = let tms = sort free_in (find_terms pam (rand(concl th''))) in (MP_TAC th'' THEN MAP_EVERY AUTO_ABBREV_TAC tms THEN DISCH_TAC) (asl,w) in TRY(FIRST_X_ASSUM(ttac o check (simdable o concl)));; + + +(* ========================================================================= *) +(* zunpack: gamma1 - x unpacking for polyz *) +(* *) +(* zunpack_d maps a d-bit packed coefficient x in [0, 2^d - 1] to *) +(* gamma1 - x in [-(gamma1-1), gamma1], where gamma1 = 2^(d-1). *) +(* ========================================================================= *) + +(* --- zunpack17: GAMMA1 = 2^17, 18-bit packed coefficients --- *) + +let zunpack17 = new_definition + `zunpack17 (x:(18)word) : (32)word = + word_sub (word(2 EXP 17)) (word_zx x)`;; + +let ZUNPACK17_CORRECT = prove( + `!x:(18)word. + word_sub (word 131072 : 32 word) + (word_zx (x : 18 word) : 32 word) = zunpack17 x`, + REWRITE_TAC[zunpack17] THEN CONV_TAC NUM_REDUCE_CONV);; + +let ZUNPACK17_IVAL = prove( + `!x:(18)word. ival(zunpack17 x) = &(2 EXP 17) - &(val x)`, + GEN_TAC THEN REWRITE_TAC[zunpack17] THEN + SUBGOAL_THEN `word_zx (x:18 word) : 32 word = word(val x)` SUBST1_TAC THENL + [REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_ZX_GEN; VAL_WORD] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[MOD_MOD_EXP_MIN] THEN CONV_TAC NUM_REDUCE_CONV THEN + MP_TAC(ISPEC `x:18 word` VAL_BOUND) THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN SIMP_TAC[MOD_LT]; + ALL_TAC] THEN + ONCE_REWRITE_TAC[WORD_IWORD] THEN + REWRITE_TAC[GSYM IWORD_INT_SUB] THEN + CONV_TAC NUM_REDUCE_CONV THEN + MATCH_MP_TAC IVAL_IWORD THEN REWRITE_TAC[DIMINDEX_32] THEN + CONV_TAC NUM_REDUCE_CONV THEN + MP_TAC(ISPEC `x:18 word` VAL_BOUND) THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN + REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);; + +let ZUNPACK17_BOUND = prove( + `!x:(18)word. --(&(2 EXP 17) - &1) <= ival(zunpack17 x) /\ + ival(zunpack17 x) <= &(2 EXP 17)`, + GEN_TAC THEN REWRITE_TAC[ZUNPACK17_IVAL] THEN + CONV_TAC NUM_REDUCE_CONV THEN + MP_TAC(ISPEC `x:18 word` VAL_BOUND) THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN + REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);; + +let ZUNPACK17_MAP_BOUND = prove( + `!l:(18 word) list. !i. i < LENGTH l ==> + --(&(2 EXP 17) - &1) <= ival(EL i (MAP zunpack17 l)) /\ + ival(EL i (MAP zunpack17 l)) <= &(2 EXP 17)`, + REPEAT STRIP_TAC THEN ASM_SIMP_TAC[EL_MAP] THEN + REWRITE_TAC[ZUNPACK17_BOUND]);; + +(* --- zunpack19: GAMMA1 = 2^19, 20-bit packed coefficients --- *) + +let zunpack19 = new_definition + `zunpack19 (x:(20)word) : (32)word = + word_sub (word(2 EXP 19)) (word_zx x)`;; + +let ZUNPACK19_CORRECT = prove( + `!x:(20)word. + word_sub (word 524288 : 32 word) + (word_zx (x : 20 word) : 32 word) = zunpack19 x`, + REWRITE_TAC[zunpack19] THEN CONV_TAC NUM_REDUCE_CONV);; + +let ZUNPACK19_IVAL = prove( + `!x:(20)word. ival(zunpack19 x) = &(2 EXP 19) - &(val x)`, + GEN_TAC THEN REWRITE_TAC[zunpack19] THEN + SUBGOAL_THEN `word_zx (x:20 word) : 32 word = word(val x)` SUBST1_TAC THENL + [REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_ZX_GEN; VAL_WORD] THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN CONV_TAC NUM_REDUCE_CONV THEN + REWRITE_TAC[MOD_MOD_EXP_MIN] THEN CONV_TAC NUM_REDUCE_CONV THEN + MP_TAC(ISPEC `x:20 word` VAL_BOUND) THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV) THEN SIMP_TAC[MOD_LT]; + ALL_TAC] THEN + ONCE_REWRITE_TAC[WORD_IWORD] THEN + REWRITE_TAC[GSYM IWORD_INT_SUB] THEN + CONV_TAC NUM_REDUCE_CONV THEN + MATCH_MP_TAC IVAL_IWORD THEN REWRITE_TAC[DIMINDEX_32] THEN + CONV_TAC NUM_REDUCE_CONV THEN + MP_TAC(ISPEC `x:20 word` VAL_BOUND) THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN + REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);; + +let ZUNPACK19_BOUND = prove( + `!x:(20)word. --(&(2 EXP 19) - &1) <= ival(zunpack19 x) /\ + ival(zunpack19 x) <= &(2 EXP 19)`, + GEN_TAC THEN REWRITE_TAC[ZUNPACK19_IVAL] THEN + CONV_TAC NUM_REDUCE_CONV THEN + MP_TAC(ISPEC `x:20 word` VAL_BOUND) THEN + CONV_TAC(DEPTH_CONV DIMINDEX_CONV THENC NUM_REDUCE_CONV) THEN + REWRITE_TAC[GSYM INT_OF_NUM_LT] THEN INT_ARITH_TAC);; + +let ZUNPACK19_MAP_BOUND = prove( + `!l:(20 word) list. !i. i < LENGTH l ==> + --(&(2 EXP 19) - &1) <= ival(EL i (MAP zunpack19 l)) /\ + ival(EL i (MAP zunpack19 l)) <= &(2 EXP 19)`, + REPEAT STRIP_TAC THEN ASM_SIMP_TAC[EL_MAP] THEN + REWRITE_TAC[ZUNPACK19_BOUND]);; + +(* ========================================================================= *) +(* Helper lemmas: list operations *) +(* ========================================================================= *) + +let EL_SUB_LIST = prove( + `!l:'a list. !i k n. i < n /\ k + n <= LENGTH l + ==> EL i (SUB_LIST (k, n) l) = EL (k + i) l`, + LIST_INDUCT_TAC THENL [ + REWRITE_TAC[LENGTH; LE; ADD_EQ_0] THEN ARITH_TAC; + REWRITE_TAC[LENGTH] THEN REPEAT GEN_TAC THEN + STRUCT_CASES_TAC (SPEC `k:num` num_CASES) THEN + STRUCT_CASES_TAC (SPEC `n:num` num_CASES) THEN + REWRITE_TAC[LT; SUB_LIST_CLAUSES; ADD_CLAUSES] THENL [ + STRUCT_CASES_TAC (SPEC `i:num` num_CASES) THEN + REWRITE_TAC[EL; HD; TL; ADD_CLAUSES] THEN STRIP_TAC THEN + FIRST_X_ASSUM (MP_TAC o SPECL [`n:num`; `0`; `n':num`]) THEN + REWRITE_TAC[ADD_CLAUSES] THEN DISCH_THEN MATCH_MP_TAC THEN ASM_ARITH_TAC; + REWRITE_TAC[EL; TL] THEN STRIP_TAC THEN + FIRST_X_ASSUM (MP_TAC o SPECL [`i:num`; `n':num`; `SUC n''`]) THEN + ASM_REWRITE_TAC[LT_SUC] THEN DISCH_THEN MATCH_MP_TAC THEN ASM_ARITH_TAC]]);; + +let EL_SUB_LIST_CONV len_thm tm = + let i_tm,sublist_tm = dest_comb tm in + let el_const,i = dest_comb i_tm in + let sublist_pair,ls = dest_comb sublist_tm in + let sublist_const,pair_tm = dest_comb sublist_pair in + let base,len = dest_pair pair_tm in + let i_num = dest_numeral i and + len_num = dest_numeral len in + if i_num >= len_num then failwith "EL_SUB_LIST_CONV: index out of bounds" else + let th1 = ISPECL [ls; i; base; len] EL_SUB_LIST in + let th2 = REWRITE_RULE[len_thm] th1 in + let th3 = MP th2 (EQT_ELIM(NUM_REDUCE_CONV (fst(dest_imp(concl th2))))) in + CONV_RULE (RAND_CONV (LAND_CONV NUM_ADD_CONV)) th3;; + +let LENGTH_SUB_LIST_0 = prove + (`!n (l:'a list). n <= LENGTH l ==> LENGTH (SUB_LIST (0, n) l) = n`, + REPEAT STRIP_TAC THEN REWRITE_TAC[LENGTH_SUB_LIST; SUB_0] THEN ASM_ARITH_TAC);; + +let SUB_LIST_SUB_LIST_0 = prove( + `!k n m (l:'a list). k + n <= m /\ m <= LENGTH l + ==> SUB_LIST (k, n) (SUB_LIST (0, m) l) = SUB_LIST (k, n) l`, + REPEAT STRIP_TAC THEN REWRITE_TAC[LIST_EQ; LENGTH_SUB_LIST; SUB_0] THEN + CONJ_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN REPEAT STRIP_TAC THEN + SUBGOAL_THEN `n' < n` ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN + MP_TAC (ISPECL [`SUB_LIST (0,m) l:'a list`; `n':num`; `k:num`; `n:num`] EL_SUB_LIST) THEN + MP_TAC (ISPECL [`l:'a list`; `n':num`; `k:num`; `n:num`] EL_SUB_LIST) THEN + ASM_REWRITE_TAC[LENGTH_SUB_LIST; SUB_0] THEN + SUBGOAL_THEN `k + n <= LENGTH (l:'a list)` ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN + SUBGOAL_THEN `k + n <= MIN m (LENGTH (l:'a list))` ASSUME_TAC THENL [ASM_ARITH_TAC; ALL_TAC] THEN + ASM_SIMP_TAC[] THEN REPEAT DISCH_TAC THEN + MP_TAC (ISPECL [`l:'a list`; `k + n':num`; `0`; `m:num`] EL_SUB_LIST) THEN + ASM_REWRITE_TAC[ADD_CLAUSES] THEN DISCH_THEN MATCH_MP_TAC THEN ASM_ARITH_TAC);; + +let SUB_LIST_SPLIT_EQ = prove + (`!n r (l:'a list). n + r = LENGTH l + ==> APPEND (SUB_LIST (0, n) l) (SUB_LIST (n, r) l) = l`, + REPEAT STRIP_TAC THEN + MP_TAC (ISPECL [`l:'a list`; `n:num`] SUB_LIST_TOPSPLIT) THEN + FIRST_X_ASSUM (SUBST1_TAC o SYM) THEN REWRITE_TAC[ADD_SUB2]);; + +let APPEND_ITLIST_APPEND_NIL = prove + (`!(l:('a list) list) (x:'a list). APPEND (ITLIST APPEND l []) x = ITLIST APPEND l x`, + LIST_INDUCT_TAC THEN REWRITE_TAC[ITLIST; APPEND] THEN + GEN_TAC THEN REWRITE_TAC[GSYM APPEND_ASSOC] THEN ASM_REWRITE_TAC[]);; + +let LIST_OF_SEQ_EQ = prove + (`!(f:num->'a) g n. (!i. i < n ==> f i = g i) ==> list_of_seq f n = list_of_seq g n`, + GEN_TAC THEN GEN_TAC THEN INDUCT_TAC THEN REWRITE_TAC[list_of_seq] THEN + DISCH_TAC THEN BINOP_TAC THENL [ + FIRST_X_ASSUM MATCH_MP_TAC THEN GEN_TAC THEN DISCH_TAC THEN + FIRST_X_ASSUM MATCH_MP_TAC THEN ASM_ARITH_TAC; + REWRITE_TAC[CONS_11] THEN FIRST_X_ASSUM MATCH_MP_TAC THEN ARITH_TAC + ]);; + +let SUBLIST_PARTITION = prove + (`!r s (l:'a list). LENGTH l = r * s ==> + l = ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i, r) l) s) []`, + GEN_TAC THEN INDUCT_TAC THENL [ + REWRITE_TAC[MULT_CLAUSES; list_of_seq; ITLIST; LENGTH_EQ_NIL]; + REWRITE_TAC[list_of_seq; ITLIST_EXTRA; APPEND_NIL] THEN + GEN_TAC THEN DISCH_TAC THEN + SUBGOAL_THEN + `SUB_LIST (0, r * s) l = + ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i, r) (SUB_LIST (0, r * s) l)) s) []:'a list` + ASSUME_TAC THENL [ + FIRST_X_ASSUM MATCH_MP_TAC THEN + MATCH_MP_TAC LENGTH_SUB_LIST_0 THEN ASM_ARITH_TAC; + ALL_TAC + ] THEN + SUBGOAL_THEN + `list_of_seq (\i. SUB_LIST (r * i, r) (SUB_LIST (0, r * s) l):'a list) s = + list_of_seq (\i. SUB_LIST (r * i, r) l) s` + ASSUME_TAC THENL [ + MATCH_MP_TAC LIST_OF_SEQ_EQ THEN REPEAT STRIP_TAC THEN REWRITE_TAC[] THEN + MATCH_MP_TAC SUB_LIST_SUB_LIST_0 THEN CONJ_TAC THENL [ + REWRITE_TAC[ARITH_RULE `r * i + r = r * (i + 1)`] THEN + REWRITE_TAC[LE_MULT_LCANCEL] THEN ASM_ARITH_TAC; + ASM_ARITH_TAC + ]; + ALL_TAC + ] THEN + SUBGOAL_THEN + `APPEND (SUB_LIST (0, r * s) l) (SUB_LIST (r * s, r) l) = l:'a list` + ASSUME_TAC THENL [ + MATCH_MP_TAC SUB_LIST_SPLIT_EQ THEN ASM_REWRITE_TAC[MULT_SUC] THEN ARITH_TAC; + ALL_TAC + ] THEN + SUBGOAL_THEN + `SUB_LIST (0, r * s) l = + ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i, r) l) s) []:'a list` + ASSUME_TAC THENL [ASM_MESON_TAC[]; ALL_TAC] THEN + UNDISCH_TAC `APPEND (SUB_LIST (0,r * s) l) (SUB_LIST (r * s,r) l) = l:'a list` THEN + UNDISCH_TAC `SUB_LIST (0,r * s) l = ITLIST APPEND (list_of_seq (\i. SUB_LIST (r * i,r) l) s) []:'a list` THEN + SIMP_TAC[APPEND_ITLIST_APPEND_NIL] + ]);; + +(* ========================================================================= *) +(* Helper lemmas: word arithmetic *) +(* ========================================================================= *) + +let VAL_WORD_EXACT = prove( + `!n. n < 2 EXP dimindex(:N) ==> val(word n : N word) = n`, + REWRITE_TAC[VAL_WORD] THEN SIMP_TAC[MOD_LT]);; + +let WORD_PACKED_EQ = prove( + `!(x:N word) (y:N word). + dimindex(:N) = l * k /\ 0 < l /\ l <= dimindex(:M) + ==> (x = y <=> + !i. i < k + ==> word_subword x (l*i, l) : (M) word = + word_subword y (l*i, l))`, + REPEAT GEN_TAC THEN STRIP_TAC THEN EQ_TAC THENL + [DISCH_THEN SUBST1_TAC THEN REWRITE_TAC[]; + DISCH_TAC THEN + GEN_REWRITE_TAC I [WORD_EQ_BITS_ALT] THEN + X_GEN_TAC `j:num` THEN DISCH_TAC THEN + FIRST_X_ASSUM(MP_TAC o SPEC `j DIV l`) THEN + ANTS_TAC THENL + [UNDISCH_TAC `j < dimindex(:N)` THEN ASM_REWRITE_TAC[] THEN + ASM_SIMP_TAC[RDIV_LT_EQ; ARITH_RULE `0 < l ==> ~(l = 0)`; MULT_SYM]; + DISCH_THEN(fun th -> + MP_TAC(AP_TERM `\(w:M word). bit (j MOD l) w` th)) THEN + REWRITE_TAC[BIT_WORD_SUBWORD] THEN + SUBGOAL_THEN `j MOD l < MIN l (dimindex(:M))` + (fun th -> REWRITE_TAC[th]) THENL + [ASM_SIMP_TAC[ARITH_RULE `l <= m ==> MIN l m = l`; + MOD_LT_EQ; ARITH_RULE `0 < l ==> ~(l = 0)`]; + ASM_SIMP_TAC[DIVISION_SIMP; ARITH_RULE `0 < l ==> ~(l = 0)`]]]]);; + +let WORD_SUBWORD_NUM_OF_WORDLIST = prove + (`!(ls:(L word)list) k. + dimindex(:KL) = dimindex(:L) * LENGTH ls /\ + k < LENGTH ls + ==> word_subword (word (num_of_wordlist ls) : KL word) (dimindex(:L)*k, dimindex(:L)) : L word = EL k ls`, + REPEAT STRIP_TAC THEN REWRITE_TAC[GSYM VAL_EQ; VAL_WORD_SUBWORD] THEN + REWRITE_TAC[ARITH_RULE `MIN n n = n`] THEN + SUBGOAL_THEN `val (word (num_of_wordlist (ls:(L word)list)) : KL word) = num_of_wordlist ls` SUBST1_TAC THENL + [W(MP_TAC o PART_MATCH (lhand o rand) VAL_WORD_EQ o lhand o snd) THEN + ANTS_TAC THENL + [TRANS_TAC LTE_TRANS `2 EXP (dimindex(:L) * LENGTH (ls:(L word)list))` THEN + REWRITE_TAC[NUM_OF_WORDLIST_BOUND; LE_EXP; LE_REFL] THEN ASM_ARITH_TAC; + SIMP_TAC[]]; + MP_TAC(ISPECL [`ls:(L word)list`; `k:num`] NUM_OF_WORDLIST_EL) THEN + ASM_REWRITE_TAC[]]);; + +let NUM_OF_WORDLIST_FLATTEN = prove + (`!(ll:((N word) list) list) k. + ALL (\l. LENGTH l = k) ll /\ + dimindex(:N) * k = dimindex(:M) + ==> num_of_wordlist (ITLIST APPEND ll []) = + num_of_wordlist (MAP ((word:num->M word) o num_of_wordlist) ll)`, + LIST_INDUCT_TAC THEN REWRITE_TAC[ITLIST; MAP; num_of_wordlist; ALL] THEN + X_GEN_TAC `k:num` THEN STRIP_TAC THEN + FIRST_X_ASSUM(MP_TAC o SPEC `k:num`) THEN + ASM_REWRITE_TAC[] THEN DISCH_TAC THEN + REWRITE_TAC[NUM_OF_WORDLIST_APPEND; num_of_wordlist; o_THM] THEN + ASM_REWRITE_TAC[] THEN + AP_THM_TAC THEN AP_TERM_TAC THEN + IMP_REWRITE_TAC [VAL_WORD_EXACT] THEN + TRANS_TAC LTE_TRANS `2 EXP (dimindex(:N) * LENGTH(h:(N word)list))` THEN + REWRITE_TAC[NUM_OF_WORDLIST_BOUND_LENGTH] THEN + ASM_REWRITE_TAC[LE_REFL]);; + +(* ========================================================================= *) +(* Helper lemmas: byte splitting *) +(* ========================================================================= *) + +let NUM_BIT_DECOMPOSE_UNIQ = prove( + `!a b t k. a < 2 EXP k + ==> (a + 2 EXP k * b = t <=> (a = t MOD 2 EXP k /\ b = t DIV 2 EXP k))`, + REPEAT STRIP_TAC THEN EQ_TAC THENL [ + DISCH_THEN (SUBST1_TAC o SYM) THEN + SIMP_TAC[MOD_MULT_ADD; DIV_MULT_ADD; EXP_EQ_0; ARITH_EQ] THEN + ASM_SIMP_TAC[MOD_LT; DIV_LT; ADD_CLAUSES]; + STRIP_TAC THEN + MP_TAC (SPECL [`t:num`; `2 EXP k`] DIVISION) THEN + SIMP_TAC[EXP_EQ_0; ARITH_EQ] THEN ASM_REWRITE_TAC[] THEN ARITH_TAC]);; + +let READ_BYTES_SPLIT_ANY = prove( + `read (bytes(a : int64,k+l)) s = t <=> + read (bytes(a,k)) s = t MOD 2 EXP (8*k) /\ + read (bytes(word_add a (word k), l)) s = t DIV 2 EXP (8*k)`, + let bound = prove(`read (bytes (a : int64,k)) s < 2 EXP (8*k)`, + REWRITE_TAC[READ_BYTES_BOUND]) in + REWRITE_TAC[GSYM VAL_EQ; VAL_READ_WBYTES; READ_COMPONENT_COMPOSE] THEN + REWRITE_TAC[READ_BYTES_COMBINE] THEN + REWRITE_TAC[MATCH_MP NUM_BIT_DECOMPOSE_UNIQ bound]);; + diff --git a/scripts/autogen b/scripts/autogen index 08cd36219..baac7e7dd 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -1133,6 +1133,30 @@ def gen_aarch64_polyz_unpack_table(): update_file("dev/aarch64_clean/src/polyz_unpack_table.c", "\n".join(gen())) +def gen_hol_light_polyz_unpack_table(): + def gen(): + yield from gen_hol_light_header() + yield "(*" + yield " * TBL index tables for polyz_unpack_{17,19}" + yield " * See autogen for details." + yield " *)" + yield "" + for gamma1_bits in [17, 19]: + bit_width = gamma1_bits + 1 + name = f"mldsa_polyz_unpack_{gamma1_bits}_indices" + indices = list(gen_aarch64_polyz_unpack_indices(bit_width)) + yield f"let {name} = (REWRITE_RULE[MAP] o define)" + yield f" `{name}:byte list = MAP word [" + yield from print_hol_light_array(indices, as_int=False, pad=3) + yield "]`;;" + yield "" + + update_file( + "proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_consts.ml", + "\n".join(gen()), + ) + + def gen_avx2_rej_uniform_table_rows(): # The index into the lookup table is an 8-bit bitmap, i.e. a number 0..255. # Conceptually, the table entry at index i is a vector of 8 16-bit values, of @@ -2308,6 +2332,20 @@ def gen_hol_light_asm(): f"-Imldsa/src/native/aarch64/src {aarch64_flags}", "aarch64", ), + ( + "polyz_unpack_17_asm.S", + "mldsa_polyz_unpack_17.S", + "dev/aarch64_opt/src", + f"-Imldsa/src/native/aarch64/src {aarch64_flags}", + "aarch64", + ), + ( + "polyz_unpack_19_asm.S", + "mldsa_polyz_unpack_19.S", + "dev/aarch64_opt/src", + f"-Imldsa/src/native/aarch64/src {aarch64_flags}", + "aarch64", + ), ] x86_64_flags = "-mavx2 -mbmi2 -msse4 -fcf-protection=full" @@ -3342,6 +3380,7 @@ def _main(): gen_aarch64_rej_uniform_table() gen_aarch64_rej_uniform_eta_table() gen_aarch64_polyz_unpack_table() + gen_hol_light_polyz_unpack_table() gen_avx2_hol_light_zeta_file() gen_avx2_zeta_file() gen_avx2_rej_uniform_table() From 83bf55d54868985e08b699993ae79df26d0bbc11 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Sat, 14 Feb 2026 17:43:21 +0100 Subject: [PATCH 2/2] CBMC: Add CBMC proof for aarch64 polyz_unpack Add CBMC contracts for native AArch64 polyz_unpack_17 and polyz_unpack_19 following corresponding HOL-Light specs. Signed-off-by: Matthias J. Kannwischer --- dev/aarch64_clean/src/arith_native_aarch64.h | 22 ++++++- dev/aarch64_opt/src/arith_native_aarch64.h | 22 ++++++- .../native/aarch64/src/arith_native_aarch64.h | 22 ++++++- .../polyz_unpack_17_native_aarch64/Makefile | 63 ++++++++++++++++++ .../polyz_unpack_17_native_aarch64_harness.c | 19 ++++++ .../polyz_unpack_19_native_aarch64/Makefile | 66 +++++++++++++++++++ .../polyz_unpack_19_native_aarch64_harness.c | 19 ++++++ 7 files changed, 227 insertions(+), 6 deletions(-) create mode 100644 proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile create mode 100644 proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c create mode 100644 proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile create mode 100644 proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c diff --git a/dev/aarch64_clean/src/arith_native_aarch64.h b/dev/aarch64_clean/src/arith_native_aarch64.h index 48c7d98c5..6a87db0ba 100644 --- a/dev/aarch64_clean/src/arith_native_aarch64.h +++ b/dev/aarch64_clean/src/arith_native_aarch64.h @@ -121,11 +121,29 @@ __contract__( #define mld_polyz_unpack_17_asm MLD_NAMESPACE(polyz_unpack_17_asm) void mld_polyz_unpack_17_asm(int32_t *r, const uint8_t *buf, - const uint8_t *indices); + const uint8_t *indices) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(buf, 576)) + requires(indices == mld_polyz_unpack_17_indices) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, -((1 << 17) - 1), (1 << 17) + 1)) +); #define mld_polyz_unpack_19_asm MLD_NAMESPACE(polyz_unpack_19_asm) void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf, - const uint8_t *indices); + const uint8_t *indices) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(buf, 640)) + requires(indices == mld_polyz_unpack_19_indices) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, -((1 << 19) - 1), (1 << 19) + 1)) +); #define mld_poly_pointwise_montgomery_asm \ MLD_NAMESPACE(poly_pointwise_montgomery_asm) diff --git a/dev/aarch64_opt/src/arith_native_aarch64.h b/dev/aarch64_opt/src/arith_native_aarch64.h index 48c7d98c5..6a87db0ba 100644 --- a/dev/aarch64_opt/src/arith_native_aarch64.h +++ b/dev/aarch64_opt/src/arith_native_aarch64.h @@ -121,11 +121,29 @@ __contract__( #define mld_polyz_unpack_17_asm MLD_NAMESPACE(polyz_unpack_17_asm) void mld_polyz_unpack_17_asm(int32_t *r, const uint8_t *buf, - const uint8_t *indices); + const uint8_t *indices) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(buf, 576)) + requires(indices == mld_polyz_unpack_17_indices) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, -((1 << 17) - 1), (1 << 17) + 1)) +); #define mld_polyz_unpack_19_asm MLD_NAMESPACE(polyz_unpack_19_asm) void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf, - const uint8_t *indices); + const uint8_t *indices) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(buf, 640)) + requires(indices == mld_polyz_unpack_19_indices) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, -((1 << 19) - 1), (1 << 19) + 1)) +); #define mld_poly_pointwise_montgomery_asm \ MLD_NAMESPACE(poly_pointwise_montgomery_asm) diff --git a/mldsa/src/native/aarch64/src/arith_native_aarch64.h b/mldsa/src/native/aarch64/src/arith_native_aarch64.h index 48c7d98c5..6a87db0ba 100644 --- a/mldsa/src/native/aarch64/src/arith_native_aarch64.h +++ b/mldsa/src/native/aarch64/src/arith_native_aarch64.h @@ -121,11 +121,29 @@ __contract__( #define mld_polyz_unpack_17_asm MLD_NAMESPACE(polyz_unpack_17_asm) void mld_polyz_unpack_17_asm(int32_t *r, const uint8_t *buf, - const uint8_t *indices); + const uint8_t *indices) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_17.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(buf, 576)) + requires(indices == mld_polyz_unpack_17_indices) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, -((1 << 17) - 1), (1 << 17) + 1)) +); #define mld_polyz_unpack_19_asm MLD_NAMESPACE(polyz_unpack_19_asm) void mld_polyz_unpack_19_asm(int32_t *r, const uint8_t *buf, - const uint8_t *indices); + const uint8_t *indices) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_polyz_unpack_19.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(buf, 640)) + requires(indices == mld_polyz_unpack_19_indices) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_bound(r, 0, MLDSA_N, -((1 << 19) - 1), (1 << 19) + 1)) +); #define mld_poly_pointwise_montgomery_asm \ MLD_NAMESPACE(poly_pointwise_montgomery_asm) diff --git a/proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile b/proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile new file mode 100644 index 000000000..f6ac6ca51 --- /dev/null +++ b/proofs/cbmc/polyz_unpack_17_native_aarch64/Makefile @@ -0,0 +1,63 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = polyz_unpack_17_native_aarch64_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = polyz_unpack_17_native_aarch64 + +# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be +# included, which contains the CBMC specifications. +DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/aarch64/meta.h\"" -DMLD_CHECK_APIS +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly_kl.c $(SRCDIR)/mldsa/src/native/aarch64/src/polyz_unpack_table.c + +# polyz_unpack_17 is only used with ML-DSA-44 +ifeq ($(MLD_CONFIG_PARAMETER_SET),44) +CHECK_FUNCTION_CONTRACTS=mld_polyz_unpack_17_native +USE_FUNCTION_CONTRACTS = mld_polyz_unpack_17_asm +else +CHECK_FUNCTION_CONTRACTS= +USE_FUNCTION_CONTRACTS = +endif +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = polyz_unpack_17_native_aarch64 + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +# If you require access to a file-local ("static") function or object to conduct +# your proof, set the following (and do not include the original source file +# ("mldsa/poly_kl.c") in PROJECT_SOURCES). +# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i +# include ../Makefile.common +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/src/poly_kl.c +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz +# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must +# be set before including Makefile.common, but any use of variables on the +# left-hand side requires those variables to be defined. Hence, _SOURCE, +# _FUNCTIONS, _OBJECTS is set after including Makefile.common. + +include ../Makefile.common diff --git a/proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c b/proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c new file mode 100644 index 000000000..ff2983c44 --- /dev/null +++ b/proofs/cbmc/polyz_unpack_17_native_aarch64/polyz_unpack_17_native_aarch64_harness.c @@ -0,0 +1,19 @@ +// Copyright (c) The mldsa-native project authors +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +#include +#include "cbmc.h" +#include "params.h" + +int mld_polyz_unpack_17_native(int32_t *r, const uint8_t *a); + +void harness(void) +{ + /* mld_polyz_unpack_17_native is only defined for ML-DSA-44 */ +#if MLD_CONFIG_PARAMETER_SET == 44 + int32_t *r; + const uint8_t *a; + int t; + t = mld_polyz_unpack_17_native(r, a); +#endif /* MLD_CONFIG_PARAMETER_SET == 44 */ +} diff --git a/proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile b/proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile new file mode 100644 index 000000000..1a5e73d64 --- /dev/null +++ b/proofs/cbmc/polyz_unpack_19_native_aarch64/Makefile @@ -0,0 +1,66 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = polyz_unpack_19_native_aarch64_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = polyz_unpack_19_native_aarch64 + +# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be +# included, which contains the CBMC specifications. +DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/aarch64/meta.h\"" -DMLD_CHECK_APIS +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly_kl.c $(SRCDIR)/mldsa/src/native/aarch64/src/polyz_unpack_table.c + +# polyz_unpack_19 is only used with ML-DSA-65 and ML-DSA-87 +ifeq ($(MLD_CONFIG_PARAMETER_SET),65) +CHECK_FUNCTION_CONTRACTS=mld_polyz_unpack_19_native +USE_FUNCTION_CONTRACTS = mld_polyz_unpack_19_asm +else ifeq ($(MLD_CONFIG_PARAMETER_SET),87) +CHECK_FUNCTION_CONTRACTS=mld_polyz_unpack_19_native +USE_FUNCTION_CONTRACTS = mld_polyz_unpack_19_asm +else +CHECK_FUNCTION_CONTRACTS= +USE_FUNCTION_CONTRACTS = +endif +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--smt2 + +FUNCTION_NAME = polyz_unpack_19_native_aarch64 + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +# If you require access to a file-local ("static") function or object to conduct +# your proof, set the following (and do not include the original source file +# ("mldsa/poly_kl.c") in PROJECT_SOURCES). +# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i +# include ../Makefile.common +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mldsa/src/poly_kl.c +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar +# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz +# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must +# be set before including Makefile.common, but any use of variables on the +# left-hand side requires those variables to be defined. Hence, _SOURCE, +# _FUNCTIONS, _OBJECTS is set after including Makefile.common. + +include ../Makefile.common diff --git a/proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c b/proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c new file mode 100644 index 000000000..f4cfecced --- /dev/null +++ b/proofs/cbmc/polyz_unpack_19_native_aarch64/polyz_unpack_19_native_aarch64_harness.c @@ -0,0 +1,19 @@ +// Copyright (c) The mldsa-native project authors +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +#include +#include "cbmc.h" +#include "params.h" + +int mld_polyz_unpack_19_native(int32_t *r, const uint8_t *a); + +void harness(void) +{ + /* mld_polyz_unpack_19_native is only defined for ML-DSA-65 and ML-DSA-87 */ +#if MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87 + int32_t *r; + const uint8_t *a; + int t; + t = mld_polyz_unpack_19_native(r, a); +#endif /* MLD_CONFIG_PARAMETER_SET == 65 || MLD_CONFIG_PARAMETER_SET == 87 */ +}