diff --git a/.github/workflows/hol_light.yml b/.github/workflows/hol_light.yml index cdbc883f4..431dfab46 100644 --- a/.github/workflows/hol_light.yml +++ b/.github/workflows/hol_light.yml @@ -85,6 +85,12 @@ jobs: needs: ["mldsa_specs.ml", "mldsa_zetas.ml", "aarch64_utils.ml", "subroutine_signatures.ml"] - name: mldsa_pointwise needs: ["mldsa_specs.ml", "aarch64_utils.ml", "subroutine_signatures.ml"] + - name: mldsa_pointwise_acc_l4 + needs: ["mldsa_specs.ml", "aarch64_utils.ml", "subroutine_signatures.ml"] + - name: mldsa_pointwise_acc_l5 + needs: ["mldsa_specs.ml", "aarch64_utils.ml", "subroutine_signatures.ml"] + - name: mldsa_pointwise_acc_l7 + needs: ["mldsa_specs.ml", "aarch64_utils.ml", "subroutine_signatures.ml"] - name: mldsa_poly_caddq needs: ["aarch64_utils.ml"] - name: mldsa_poly_chknorm @@ -185,6 +191,12 @@ jobs: needs: ["mldsa_specs.ml", "mldsa_utils.ml", "mldsa_zetas.ml"] - name: mldsa_pointwise needs: ["mldsa_specs.ml", "mldsa_utils.ml", "mldsa_zetas.ml"] + - name: mldsa_pointwise_acc_l4 + needs: ["mldsa_specs.ml", "mldsa_utils.ml", "mldsa_zetas.ml", "subroutine_signatures.ml"] + - name: mldsa_pointwise_acc_l5 + needs: ["mldsa_specs.ml", "mldsa_utils.ml", "mldsa_zetas.ml", "subroutine_signatures.ml"] + - name: mldsa_pointwise_acc_l7 + needs: ["mldsa_specs.ml", "mldsa_utils.ml", "mldsa_zetas.ml", "subroutine_signatures.ml"] name: x86_64 HOL Light proof for ${{ matrix.proof.name }}.S runs-on: pqcp-x64 if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork diff --git a/BIBLIOGRAPHY.md b/BIBLIOGRAPHY.md index 70b4799ab..38a2ce6cb 100644 --- a/BIBLIOGRAPHY.md +++ b/BIBLIOGRAPHY.md @@ -254,6 +254,9 @@ source code and documentation. - [proofs/hol_light/x86_64/mldsa/mldsa_intt.S](proofs/hol_light/x86_64/mldsa/mldsa_intt.S) - [proofs/hol_light/x86_64/mldsa/mldsa_ntt.S](proofs/hol_light/x86_64/mldsa/mldsa_ntt.S) - [proofs/hol_light/x86_64/mldsa/mldsa_pointwise.S](proofs/hol_light/x86_64/mldsa/mldsa_pointwise.S) + - [proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l4.S](proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l4.S) + - [proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l5.S](proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l5.S) + - [proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l7.S](proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l7.S) ### `Round3_Spec` diff --git a/dev/aarch64_clean/meta.h b/dev/aarch64_clean/meta.h index 2923b8c55..ed726d502 100644 --- a/dev/aarch64_clean/meta.h +++ b/dev/aarch64_clean/meta.h @@ -213,8 +213,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l4_native( int32_t w[MLDSA_N], const int32_t u[4][MLDSA_N], const int32_t v[4][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l4_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l4_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 4 */ @@ -225,8 +224,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l5_native( int32_t w[MLDSA_N], const int32_t u[5][MLDSA_N], const int32_t v[5][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l5_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l5_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 5 */ @@ -237,8 +235,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l7_native( int32_t w[MLDSA_N], const int32_t u[7][MLDSA_N], const int32_t v[7][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l7_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l7_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7 */ diff --git a/dev/aarch64_clean/src/arith_native_aarch64.h b/dev/aarch64_clean/src/arith_native_aarch64.h index ca0a23d83..04e3f0a9a 100644 --- a/dev/aarch64_clean/src/arith_native_aarch64.h +++ b/dev/aarch64_clean/src/arith_native_aarch64.h @@ -152,17 +152,59 @@ __contract__( #define mld_polyvecl_pointwise_acc_montgomery_l4_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l4_asm) -void mld_polyvecl_pointwise_acc_montgomery_l4_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l4_asm(int32_t *r, + const int32_t a[4][MLDSA_N], + const int32_t b[4][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l4.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 4 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 4 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 4, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 4, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_polyvecl_pointwise_acc_montgomery_l5_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l5_asm) -void mld_polyvecl_pointwise_acc_montgomery_l5_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l5_asm(int32_t *r, + const int32_t a[5][MLDSA_N], + const int32_t b[5][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l5.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 5 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 5 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 5, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 5, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_polyvecl_pointwise_acc_montgomery_l7_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l7_asm) -void mld_polyvecl_pointwise_acc_montgomery_l7_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l7_asm(int32_t *r, + const int32_t a[7][MLDSA_N], + const int32_t b[7][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l7.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 7 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 7 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 7, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 7, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #endif /* !MLD_NATIVE_AARCH64_SRC_ARITH_NATIVE_AARCH64_H */ diff --git a/dev/aarch64_opt/meta.h b/dev/aarch64_opt/meta.h index 2923b8c55..ed726d502 100644 --- a/dev/aarch64_opt/meta.h +++ b/dev/aarch64_opt/meta.h @@ -213,8 +213,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l4_native( int32_t w[MLDSA_N], const int32_t u[4][MLDSA_N], const int32_t v[4][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l4_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l4_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 4 */ @@ -225,8 +224,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l5_native( int32_t w[MLDSA_N], const int32_t u[5][MLDSA_N], const int32_t v[5][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l5_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l5_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 5 */ @@ -237,8 +235,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l7_native( int32_t w[MLDSA_N], const int32_t u[7][MLDSA_N], const int32_t v[7][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l7_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l7_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7 */ diff --git a/dev/aarch64_opt/src/arith_native_aarch64.h b/dev/aarch64_opt/src/arith_native_aarch64.h index ca0a23d83..04e3f0a9a 100644 --- a/dev/aarch64_opt/src/arith_native_aarch64.h +++ b/dev/aarch64_opt/src/arith_native_aarch64.h @@ -152,17 +152,59 @@ __contract__( #define mld_polyvecl_pointwise_acc_montgomery_l4_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l4_asm) -void mld_polyvecl_pointwise_acc_montgomery_l4_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l4_asm(int32_t *r, + const int32_t a[4][MLDSA_N], + const int32_t b[4][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l4.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 4 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 4 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 4, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 4, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_polyvecl_pointwise_acc_montgomery_l5_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l5_asm) -void mld_polyvecl_pointwise_acc_montgomery_l5_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l5_asm(int32_t *r, + const int32_t a[5][MLDSA_N], + const int32_t b[5][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l5.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 5 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 5 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 5, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 5, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_polyvecl_pointwise_acc_montgomery_l7_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l7_asm) -void mld_polyvecl_pointwise_acc_montgomery_l7_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l7_asm(int32_t *r, + const int32_t a[7][MLDSA_N], + const int32_t b[7][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l7.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 7 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 7 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 7, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 7, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #endif /* !MLD_NATIVE_AARCH64_SRC_ARITH_NATIVE_AARCH64_H */ diff --git a/dev/x86_64/src/arith_native_x86_64.h b/dev/x86_64/src/arith_native_x86_64.h index a098ac9cf..39c3beed9 100644 --- a/dev/x86_64/src/arith_native_x86_64.h +++ b/dev/x86_64/src/arith_native_x86_64.h @@ -123,16 +123,58 @@ __contract__( #define mld_pointwise_acc_l4_avx2 MLD_NAMESPACE(pointwise_acc_l4_avx2) void mld_pointwise_acc_l4_avx2(int32_t c[MLDSA_N], const int32_t a[4][MLDSA_N], const int32_t b[4][MLDSA_N], - const int32_t *qdata); + const int32_t *qdata) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l4.ml */ +__contract__( + requires(memory_no_alias(c, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 4 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 4 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 4, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 4, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + requires(qdata == mld_qdata) + assigns(memory_slice(c, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(c, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_pointwise_acc_l5_avx2 MLD_NAMESPACE(pointwise_acc_l5_avx2) void mld_pointwise_acc_l5_avx2(int32_t c[MLDSA_N], const int32_t a[5][MLDSA_N], const int32_t b[5][MLDSA_N], - const int32_t *qdata); + const int32_t *qdata) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l5.ml */ +__contract__( + requires(memory_no_alias(c, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 5 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 5 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 5, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 5, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + requires(qdata == mld_qdata) + assigns(memory_slice(c, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(c, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_pointwise_acc_l7_avx2 MLD_NAMESPACE(pointwise_acc_l7_avx2) void mld_pointwise_acc_l7_avx2(int32_t c[MLDSA_N], const int32_t a[7][MLDSA_N], const int32_t b[7][MLDSA_N], - const int32_t *qdata); + const int32_t *qdata) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l7.ml */ +__contract__( + requires(memory_no_alias(c, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 7 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 7 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 7, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 7, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + requires(qdata == mld_qdata) + assigns(memory_slice(c, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(c, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #endif /* !MLD_NATIVE_X86_64_SRC_ARITH_NATIVE_X86_64_H */ diff --git a/mldsa/src/native/aarch64/meta.h b/mldsa/src/native/aarch64/meta.h index 2923b8c55..ed726d502 100644 --- a/mldsa/src/native/aarch64/meta.h +++ b/mldsa/src/native/aarch64/meta.h @@ -213,8 +213,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l4_native( int32_t w[MLDSA_N], const int32_t u[4][MLDSA_N], const int32_t v[4][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l4_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l4_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 4 */ @@ -225,8 +224,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l5_native( int32_t w[MLDSA_N], const int32_t u[5][MLDSA_N], const int32_t v[5][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l5_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l5_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 5 */ @@ -237,8 +235,7 @@ static MLD_INLINE int mld_polyvecl_pointwise_acc_montgomery_l7_native( int32_t w[MLDSA_N], const int32_t u[7][MLDSA_N], const int32_t v[7][MLDSA_N]) { - mld_polyvecl_pointwise_acc_montgomery_l7_asm(w, (const int32_t *)u, - (const int32_t *)v); + mld_polyvecl_pointwise_acc_montgomery_l7_asm(w, u, v); return MLD_NATIVE_FUNC_SUCCESS; } #endif /* MLD_CONFIG_MULTILEVEL_WITH_SHARED || MLDSA_L == 7 */ diff --git a/mldsa/src/native/aarch64/src/arith_native_aarch64.h b/mldsa/src/native/aarch64/src/arith_native_aarch64.h index ca0a23d83..04e3f0a9a 100644 --- a/mldsa/src/native/aarch64/src/arith_native_aarch64.h +++ b/mldsa/src/native/aarch64/src/arith_native_aarch64.h @@ -152,17 +152,59 @@ __contract__( #define mld_polyvecl_pointwise_acc_montgomery_l4_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l4_asm) -void mld_polyvecl_pointwise_acc_montgomery_l4_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l4_asm(int32_t *r, + const int32_t a[4][MLDSA_N], + const int32_t b[4][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l4.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 4 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 4 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 4, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 4, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_polyvecl_pointwise_acc_montgomery_l5_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l5_asm) -void mld_polyvecl_pointwise_acc_montgomery_l5_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l5_asm(int32_t *r, + const int32_t a[5][MLDSA_N], + const int32_t b[5][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l5.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 5 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 5 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 5, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 5, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_polyvecl_pointwise_acc_montgomery_l7_asm \ MLD_NAMESPACE(polyvecl_pointwise_acc_montgomery_l7_asm) -void mld_polyvecl_pointwise_acc_montgomery_l7_asm(int32_t *, const int32_t *, - const int32_t *); +void mld_polyvecl_pointwise_acc_montgomery_l7_asm(int32_t *r, + const int32_t a[7][MLDSA_N], + const int32_t b[7][MLDSA_N]) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l7.ml */ +__contract__( + requires(memory_no_alias(r, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 7 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 7 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 7, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 7, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + assigns(memory_slice(r, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(r, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #endif /* !MLD_NATIVE_AARCH64_SRC_ARITH_NATIVE_AARCH64_H */ diff --git a/mldsa/src/native/x86_64/src/arith_native_x86_64.h b/mldsa/src/native/x86_64/src/arith_native_x86_64.h index a098ac9cf..39c3beed9 100644 --- a/mldsa/src/native/x86_64/src/arith_native_x86_64.h +++ b/mldsa/src/native/x86_64/src/arith_native_x86_64.h @@ -123,16 +123,58 @@ __contract__( #define mld_pointwise_acc_l4_avx2 MLD_NAMESPACE(pointwise_acc_l4_avx2) void mld_pointwise_acc_l4_avx2(int32_t c[MLDSA_N], const int32_t a[4][MLDSA_N], const int32_t b[4][MLDSA_N], - const int32_t *qdata); + const int32_t *qdata) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l4.ml */ +__contract__( + requires(memory_no_alias(c, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 4 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 4 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 4, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 4, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + requires(qdata == mld_qdata) + assigns(memory_slice(c, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(c, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_pointwise_acc_l5_avx2 MLD_NAMESPACE(pointwise_acc_l5_avx2) void mld_pointwise_acc_l5_avx2(int32_t c[MLDSA_N], const int32_t a[5][MLDSA_N], const int32_t b[5][MLDSA_N], - const int32_t *qdata); + const int32_t *qdata) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l5.ml */ +__contract__( + requires(memory_no_alias(c, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 5 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 5 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 5, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 5, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + requires(qdata == mld_qdata) + assigns(memory_slice(c, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(c, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #define mld_pointwise_acc_l7_avx2 MLD_NAMESPACE(pointwise_acc_l7_avx2) void mld_pointwise_acc_l7_avx2(int32_t c[MLDSA_N], const int32_t a[7][MLDSA_N], const int32_t b[7][MLDSA_N], - const int32_t *qdata); + const int32_t *qdata) +/* This must be kept in sync with the HOL-Light specification + * in proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l7.ml */ +__contract__( + requires(memory_no_alias(c, sizeof(int32_t) * MLDSA_N)) + requires(memory_no_alias(a, sizeof(int32_t) * 7 * MLDSA_N)) + requires(memory_no_alias(b, sizeof(int32_t) * 7 * MLDSA_N)) + /* check-magic: off */ + requires(forall(l0, 0, 7, array_abs_bound(a[l0], 0, MLDSA_N, 8380417))) + requires(forall(l1, 0, 7, array_abs_bound(b[l1], 0, MLDSA_N, 75423753))) + requires(qdata == mld_qdata) + assigns(memory_slice(c, sizeof(int32_t) * MLDSA_N)) + ensures(array_abs_bound(c, 0, MLDSA_N, 8380417)) + /* check-magic: on */ +); #endif /* !MLD_NATIVE_X86_64_SRC_ARITH_NATIVE_X86_64_H */ diff --git a/proofs/cbmc/pointwise_acc_native_aarch64/Makefile b/proofs/cbmc/pointwise_acc_native_aarch64/Makefile new file mode 100644 index 000000000..2a087a7b1 --- /dev/null +++ b/proofs/cbmc/pointwise_acc_native_aarch64/Makefile @@ -0,0 +1,53 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = pointwise_acc_native_aarch64_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = pointwise_acc_native_aarch64 + +# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be +# included, which contains the CBMC specifications. +DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/aarch64/meta.h\"" -DMLD_CHECK_APIS +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mldsa/src/polyvec.c + +ifeq ($(MLD_CONFIG_PARAMETER_SET),44) + CHECK_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l4_native + USE_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l4_asm +else ifeq ($(MLD_CONFIG_PARAMETER_SET),65) + CHECK_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l5_native + USE_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l5_asm +else ifeq ($(MLD_CONFIG_PARAMETER_SET),87) + CHECK_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l7_native + USE_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l7_asm +endif +USE_FUNCTION_CONTRACTS+=mld_sys_check_capability +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--external-smt2-solver $(PROOF_ROOT)/lib/z3_smt_only --z3 + +FUNCTION_NAME = pointwise_acc_native_aarch64 + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +include ../Makefile.common diff --git a/proofs/cbmc/pointwise_acc_native_aarch64/pointwise_acc_native_aarch64_harness.c b/proofs/cbmc/pointwise_acc_native_aarch64/pointwise_acc_native_aarch64_harness.c new file mode 100644 index 000000000..791d7f97b --- /dev/null +++ b/proofs/cbmc/pointwise_acc_native_aarch64/pointwise_acc_native_aarch64_harness.c @@ -0,0 +1,37 @@ +// Copyright (c) The mldsa-native project authors +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +#include +#include "cbmc.h" +#include "params.h" + +#if MLDSA_L == 4 +int mld_polyvecl_pointwise_acc_montgomery_l4_native( + int32_t w[MLDSA_N], const int32_t u[4][MLDSA_N], + const int32_t v[4][MLDSA_N]); +#elif MLDSA_L == 5 +int mld_polyvecl_pointwise_acc_montgomery_l5_native( + int32_t w[MLDSA_N], const int32_t u[5][MLDSA_N], + const int32_t v[5][MLDSA_N]); +#elif MLDSA_L == 7 +int mld_polyvecl_pointwise_acc_montgomery_l7_native( + int32_t w[MLDSA_N], const int32_t u[7][MLDSA_N], + const int32_t v[7][MLDSA_N]); +#endif + +void harness(void) +{ + int32_t *w; + int t; + +#if MLDSA_L == 4 + int32_t (*u)[MLDSA_N], (*v)[MLDSA_N]; + t = mld_polyvecl_pointwise_acc_montgomery_l4_native(w, u, v); +#elif MLDSA_L == 5 + int32_t (*u)[MLDSA_N], (*v)[MLDSA_N]; + t = mld_polyvecl_pointwise_acc_montgomery_l5_native(w, u, v); +#elif MLDSA_L == 7 + int32_t (*u)[MLDSA_N], (*v)[MLDSA_N]; + t = mld_polyvecl_pointwise_acc_montgomery_l7_native(w, u, v); +#endif +} diff --git a/proofs/cbmc/pointwise_acc_native_x86_64/Makefile b/proofs/cbmc/pointwise_acc_native_x86_64/Makefile new file mode 100644 index 000000000..928011224 --- /dev/null +++ b/proofs/cbmc/pointwise_acc_native_x86_64/Makefile @@ -0,0 +1,53 @@ +# Copyright (c) The mldsa-native project authors +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +include ../Makefile_params.common + +HARNESS_ENTRY = harness +HARNESS_FILE = pointwise_acc_native_x86_64_harness + +# This should be a unique identifier for this proof, and will appear on the +# Litani dashboard. It can be human-readable and contain spaces if you wish. +PROOF_UID = pointwise_acc_native_x86_64 + +# We need to set MLD_CHECK_APIS as otherwise mldsa/src/native/api.h won't be +# included, which contains the CBMC specifications. +DEFINES += -DMLD_CONFIG_USE_NATIVE_BACKEND_ARITH -DMLD_CONFIG_ARITH_BACKEND_FILE="\"$(SRCDIR)/mldsa/src/native/x86_64/meta.h\"" -DMLD_CHECK_APIS +INCLUDES += + +REMOVE_FUNCTION_BODY += +UNWINDSET += + +PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c +PROJECT_SOURCES += $(SRCDIR)/mldsa/src/polyvec.c $(SRCDIR)/mldsa/src/native/x86_64/src/consts.c + +ifeq ($(MLD_CONFIG_PARAMETER_SET),44) + CHECK_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l4_native + USE_FUNCTION_CONTRACTS=mld_pointwise_acc_l4_avx2 +else ifeq ($(MLD_CONFIG_PARAMETER_SET),65) + CHECK_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l5_native + USE_FUNCTION_CONTRACTS=mld_pointwise_acc_l5_avx2 +else ifeq ($(MLD_CONFIG_PARAMETER_SET),87) + CHECK_FUNCTION_CONTRACTS=mld_polyvecl_pointwise_acc_montgomery_l7_native + USE_FUNCTION_CONTRACTS=mld_pointwise_acc_l7_avx2 +endif +USE_FUNCTION_CONTRACTS+=mld_sys_check_capability +APPLY_LOOP_CONTRACTS=on +USE_DYNAMIC_FRAMES=1 + +# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead +EXTERNAL_SAT_SOLVER= +CBMCFLAGS=--external-smt2-solver $(PROOF_ROOT)/lib/z3_smt_only --z3 + +FUNCTION_NAME = pointwise_acc_native_x86_64 + +# If this proof is found to consume huge amounts of RAM, you can set the +# EXPENSIVE variable. With new enough versions of the proof tools, this will +# restrict the number of EXPENSIVE CBMC jobs running at once. See the +# documentation in Makefile.common under the "Job Pools" heading for details. +# EXPENSIVE = true + +# This function is large enough to need... +CBMC_OBJECT_BITS = 8 + +include ../Makefile.common diff --git a/proofs/cbmc/pointwise_acc_native_x86_64/pointwise_acc_native_x86_64_harness.c b/proofs/cbmc/pointwise_acc_native_x86_64/pointwise_acc_native_x86_64_harness.c new file mode 100644 index 000000000..791d7f97b --- /dev/null +++ b/proofs/cbmc/pointwise_acc_native_x86_64/pointwise_acc_native_x86_64_harness.c @@ -0,0 +1,37 @@ +// Copyright (c) The mldsa-native project authors +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + +#include +#include "cbmc.h" +#include "params.h" + +#if MLDSA_L == 4 +int mld_polyvecl_pointwise_acc_montgomery_l4_native( + int32_t w[MLDSA_N], const int32_t u[4][MLDSA_N], + const int32_t v[4][MLDSA_N]); +#elif MLDSA_L == 5 +int mld_polyvecl_pointwise_acc_montgomery_l5_native( + int32_t w[MLDSA_N], const int32_t u[5][MLDSA_N], + const int32_t v[5][MLDSA_N]); +#elif MLDSA_L == 7 +int mld_polyvecl_pointwise_acc_montgomery_l7_native( + int32_t w[MLDSA_N], const int32_t u[7][MLDSA_N], + const int32_t v[7][MLDSA_N]); +#endif + +void harness(void) +{ + int32_t *w; + int t; + +#if MLDSA_L == 4 + int32_t (*u)[MLDSA_N], (*v)[MLDSA_N]; + t = mld_polyvecl_pointwise_acc_montgomery_l4_native(w, u, v); +#elif MLDSA_L == 5 + int32_t (*u)[MLDSA_N], (*v)[MLDSA_N]; + t = mld_polyvecl_pointwise_acc_montgomery_l5_native(w, u, v); +#elif MLDSA_L == 7 + int32_t (*u)[MLDSA_N], (*v)[MLDSA_N]; + t = mld_polyvecl_pointwise_acc_montgomery_l7_native(w, u, v); +#endif +} diff --git a/proofs/hol_light/README.md b/proofs/hol_light/README.md index f5ffd396a..9426f6baf 100644 --- a/proofs/hol_light/README.md +++ b/proofs/hol_light/README.md @@ -55,9 +55,15 @@ echo '1+1;;' | nc -w 5 127.0.0.1 2012 * AArch64 poly_caddq: [mldsa_poly_caddq.S](aarch64/mldsa/mldsa_poly_caddq.S) * AArch64 poly_chknorm: [mldsa_poly_chknorm.S](aarch64/mldsa/mldsa_poly_chknorm.S) * AArch64 pointwise multiplication: [mldsa_pointwise.S](aarch64/mldsa/mldsa_pointwise.S) + * AArch64 pointwise multiplication-accumulation (l=4): [mldsa_pointwise_acc_l4.S](aarch64/mldsa/mldsa_pointwise_acc_l4.S) + * AArch64 pointwise multiplication-accumulation (l=5): [mldsa_pointwise_acc_l5.S](aarch64/mldsa/mldsa_pointwise_acc_l5.S) + * AArch64 pointwise multiplication-accumulation (l=7): [mldsa_pointwise_acc_l7.S](aarch64/mldsa/mldsa_pointwise_acc_l7.S) * x86_64 forward NTT: [mldsa_ntt.S](x86_64/mldsa/mldsa_ntt.S) * x86_64 inverse NTT: [mldsa_intt.S](x86_64/mldsa/mldsa_intt.S) * x86_64 pointwise multiplication: [mldsa_pointwise.S](x86_64/mldsa/mldsa_pointwise.S) + * x86_64 pointwise multiplication-accumulation (l=4): [mldsa_pointwise_acc_l4.S](x86_64/mldsa/mldsa_pointwise_acc_l4.S) + * x86_64 pointwise multiplication-accumulation (l=5): [mldsa_pointwise_acc_l5.S](x86_64/mldsa/mldsa_pointwise_acc_l5.S) + * x86_64 pointwise multiplication-accumulation (l=7): [mldsa_pointwise_acc_l7.S](x86_64/mldsa/mldsa_pointwise_acc_l7.S) - FIPS202: * Keccak-F1600 using lazy rotations[^HYBRID]: [keccak_f1600_x1_scalar.S](aarch64/mldsa/keccak_f1600_x1_scalar.S) * Keccak-F1600 using v8.4-A SHA3 instructions: [keccak_f1600_x1_v84a.S](aarch64/mldsa/keccak_f1600_x1_v84a.S) diff --git a/proofs/hol_light/aarch64/Makefile b/proofs/hol_light/aarch64/Makefile index 06941b4aa..b2f104c56 100644 --- a/proofs/hol_light/aarch64/Makefile +++ b/proofs/hol_light/aarch64/Makefile @@ -57,6 +57,9 @@ OBJ = mldsa/mldsa_ntt.o \ mldsa/mldsa_pointwise.o \ mldsa/mldsa_poly_caddq.o \ mldsa/mldsa_poly_chknorm.o \ + mldsa/mldsa_pointwise_acc_l4.o \ + mldsa/mldsa_pointwise_acc_l5.o \ + mldsa/mldsa_pointwise_acc_l7.o \ mldsa/keccak_f1600_x1_scalar.o \ mldsa/keccak_f1600_x1_v84a.o \ mldsa/keccak_f1600_x2_v84a.o \ diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l4.S b/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l4.S new file mode 100644 index 000000000..bf12cf520 --- /dev/null +++ b/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l4.S @@ -0,0 +1,126 @@ +/* Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l4.S using scripts/simpasm. Do not modify it directly. + */ + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l4_asm +_PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l4_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l4_asm +PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l4_asm: +#endif + + .cfi_startproc + mov w3, #0xe001 // =57345 + movk w3, #0x7f, lsl #16 + dup v0.4s, w3 + mov w3, #0x2001 // =8193 + movk w3, #0x380, lsl #16 + dup v1.4s, w3 + mov x3, #0x40 // =64 + +Lpolyvecl_pointwise_acc_montgomery_l4_loop_start: + ldr q17, [x1, #0x10] + ldr q18, [x1, #0x20] + ldr q19, [x1, #0x30] + ldr q16, [x1], #0x40 + ldr q21, [x2, #0x10] + ldr q22, [x2, #0x20] + ldr q23, [x2, #0x30] + ldr q20, [x2], #0x40 + smull v24.2d, v16.2s, v20.2s + smull2 v25.2d, v16.4s, v20.4s + smull v26.2d, v17.2s, v21.2s + smull2 v27.2d, v17.4s, v21.4s + smull v28.2d, v18.2s, v22.2s + smull2 v29.2d, v18.4s, v22.4s + smull v30.2d, v19.2s, v23.2s + smull2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x3c0] + ldr q17, [x1, #0x3d0] + ldr q18, [x1, #0x3e0] + ldr q19, [x1, #0x3f0] + ldr q20, [x2, #0x3c0] + ldr q21, [x2, #0x3d0] + ldr q22, [x2, #0x3e0] + ldr q23, [x2, #0x3f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x7c0] + ldr q17, [x1, #0x7d0] + ldr q18, [x1, #0x7e0] + ldr q19, [x1, #0x7f0] + ldr q20, [x2, #0x7c0] + ldr q21, [x2, #0x7d0] + ldr q22, [x2, #0x7e0] + ldr q23, [x2, #0x7f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0xbc0] + ldr q17, [x1, #0xbd0] + ldr q18, [x1, #0xbe0] + ldr q19, [x1, #0xbf0] + ldr q20, [x2, #0xbc0] + ldr q21, [x2, #0xbd0] + ldr q22, [x2, #0xbe0] + ldr q23, [x2, #0xbf0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + uzp1 v16.4s, v24.4s, v25.4s + mul v16.4s, v16.4s, v1.4s + smlsl v24.2d, v16.2s, v0.2s + smlsl2 v25.2d, v16.4s, v0.4s + uzp2 v16.4s, v24.4s, v25.4s + uzp1 v17.4s, v26.4s, v27.4s + mul v17.4s, v17.4s, v1.4s + smlsl v26.2d, v17.2s, v0.2s + smlsl2 v27.2d, v17.4s, v0.4s + uzp2 v17.4s, v26.4s, v27.4s + uzp1 v18.4s, v28.4s, v29.4s + mul v18.4s, v18.4s, v1.4s + smlsl v28.2d, v18.2s, v0.2s + smlsl2 v29.2d, v18.4s, v0.4s + uzp2 v18.4s, v28.4s, v29.4s + uzp1 v19.4s, v30.4s, v31.4s + mul v19.4s, v19.4s, v1.4s + smlsl v30.2d, v19.2s, v0.2s + smlsl2 v31.2d, v19.4s, v0.4s + uzp2 v19.4s, v30.4s, v31.4s + str q17, [x0, #0x10] + str q18, [x0, #0x20] + str q19, [x0, #0x30] + str q16, [x0], #0x40 + subs x3, x3, #0x4 + cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l4_loop_start + ret + .cfi_endproc + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l5.S b/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l5.S new file mode 100644 index 000000000..c0af5cc1b --- /dev/null +++ b/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l5.S @@ -0,0 +1,142 @@ +/* Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l5.S using scripts/simpasm. Do not modify it directly. + */ + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l5_asm +_PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l5_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l5_asm +PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l5_asm: +#endif + + .cfi_startproc + mov w3, #0xe001 // =57345 + movk w3, #0x7f, lsl #16 + dup v0.4s, w3 + mov w3, #0x2001 // =8193 + movk w3, #0x380, lsl #16 + dup v1.4s, w3 + mov x3, #0x40 // =64 + +Lpolyvecl_pointwise_acc_montgomery_l5_loop_start: + ldr q17, [x1, #0x10] + ldr q18, [x1, #0x20] + ldr q19, [x1, #0x30] + ldr q16, [x1], #0x40 + ldr q21, [x2, #0x10] + ldr q22, [x2, #0x20] + ldr q23, [x2, #0x30] + ldr q20, [x2], #0x40 + smull v24.2d, v16.2s, v20.2s + smull2 v25.2d, v16.4s, v20.4s + smull v26.2d, v17.2s, v21.2s + smull2 v27.2d, v17.4s, v21.4s + smull v28.2d, v18.2s, v22.2s + smull2 v29.2d, v18.4s, v22.4s + smull v30.2d, v19.2s, v23.2s + smull2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x3c0] + ldr q17, [x1, #0x3d0] + ldr q18, [x1, #0x3e0] + ldr q19, [x1, #0x3f0] + ldr q20, [x2, #0x3c0] + ldr q21, [x2, #0x3d0] + ldr q22, [x2, #0x3e0] + ldr q23, [x2, #0x3f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x7c0] + ldr q17, [x1, #0x7d0] + ldr q18, [x1, #0x7e0] + ldr q19, [x1, #0x7f0] + ldr q20, [x2, #0x7c0] + ldr q21, [x2, #0x7d0] + ldr q22, [x2, #0x7e0] + ldr q23, [x2, #0x7f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0xbc0] + ldr q17, [x1, #0xbd0] + ldr q18, [x1, #0xbe0] + ldr q19, [x1, #0xbf0] + ldr q20, [x2, #0xbc0] + ldr q21, [x2, #0xbd0] + ldr q22, [x2, #0xbe0] + ldr q23, [x2, #0xbf0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0xfc0] + ldr q17, [x1, #0xfd0] + ldr q18, [x1, #0xfe0] + ldr q19, [x1, #0xff0] + ldr q20, [x2, #0xfc0] + ldr q21, [x2, #0xfd0] + ldr q22, [x2, #0xfe0] + ldr q23, [x2, #0xff0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + uzp1 v16.4s, v24.4s, v25.4s + mul v16.4s, v16.4s, v1.4s + smlsl v24.2d, v16.2s, v0.2s + smlsl2 v25.2d, v16.4s, v0.4s + uzp2 v16.4s, v24.4s, v25.4s + uzp1 v17.4s, v26.4s, v27.4s + mul v17.4s, v17.4s, v1.4s + smlsl v26.2d, v17.2s, v0.2s + smlsl2 v27.2d, v17.4s, v0.4s + uzp2 v17.4s, v26.4s, v27.4s + uzp1 v18.4s, v28.4s, v29.4s + mul v18.4s, v18.4s, v1.4s + smlsl v28.2d, v18.2s, v0.2s + smlsl2 v29.2d, v18.4s, v0.4s + uzp2 v18.4s, v28.4s, v29.4s + uzp1 v19.4s, v30.4s, v31.4s + mul v19.4s, v19.4s, v1.4s + smlsl v30.2d, v19.2s, v0.2s + smlsl2 v31.2d, v19.4s, v0.4s + uzp2 v19.4s, v30.4s, v31.4s + str q17, [x0, #0x10] + str q18, [x0, #0x20] + str q19, [x0, #0x30] + str q16, [x0], #0x40 + subs x3, x3, #0x4 + cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l5_loop_start + ret + .cfi_endproc + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l7.S b/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l7.S new file mode 100644 index 000000000..c1fa119b2 --- /dev/null +++ b/proofs/hol_light/aarch64/mldsa/mldsa_pointwise_acc_l7.S @@ -0,0 +1,174 @@ +/* Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/aarch64_opt/src/mld_polyvecl_pointwise_acc_montgomery_l7.S using scripts/simpasm. Do not modify it directly. + */ + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l7_asm +_PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l7_asm: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l7_asm +PQCP_MLDSA_NATIVE_MLDSA44_polyvecl_pointwise_acc_montgomery_l7_asm: +#endif + + .cfi_startproc + mov w3, #0xe001 // =57345 + movk w3, #0x7f, lsl #16 + dup v0.4s, w3 + mov w3, #0x2001 // =8193 + movk w3, #0x380, lsl #16 + dup v1.4s, w3 + mov x3, #0x40 // =64 + +Lpolyvecl_pointwise_acc_montgomery_l7_loop_start: + ldr q17, [x1, #0x10] + ldr q18, [x1, #0x20] + ldr q19, [x1, #0x30] + ldr q16, [x1], #0x40 + ldr q21, [x2, #0x10] + ldr q22, [x2, #0x20] + ldr q23, [x2, #0x30] + ldr q20, [x2], #0x40 + smull v24.2d, v16.2s, v20.2s + smull2 v25.2d, v16.4s, v20.4s + smull v26.2d, v17.2s, v21.2s + smull2 v27.2d, v17.4s, v21.4s + smull v28.2d, v18.2s, v22.2s + smull2 v29.2d, v18.4s, v22.4s + smull v30.2d, v19.2s, v23.2s + smull2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x3c0] + ldr q17, [x1, #0x3d0] + ldr q18, [x1, #0x3e0] + ldr q19, [x1, #0x3f0] + ldr q20, [x2, #0x3c0] + ldr q21, [x2, #0x3d0] + ldr q22, [x2, #0x3e0] + ldr q23, [x2, #0x3f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x7c0] + ldr q17, [x1, #0x7d0] + ldr q18, [x1, #0x7e0] + ldr q19, [x1, #0x7f0] + ldr q20, [x2, #0x7c0] + ldr q21, [x2, #0x7d0] + ldr q22, [x2, #0x7e0] + ldr q23, [x2, #0x7f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0xbc0] + ldr q17, [x1, #0xbd0] + ldr q18, [x1, #0xbe0] + ldr q19, [x1, #0xbf0] + ldr q20, [x2, #0xbc0] + ldr q21, [x2, #0xbd0] + ldr q22, [x2, #0xbe0] + ldr q23, [x2, #0xbf0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0xfc0] + ldr q17, [x1, #0xfd0] + ldr q18, [x1, #0xfe0] + ldr q19, [x1, #0xff0] + ldr q20, [x2, #0xfc0] + ldr q21, [x2, #0xfd0] + ldr q22, [x2, #0xfe0] + ldr q23, [x2, #0xff0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x13c0] + ldr q17, [x1, #0x13d0] + ldr q18, [x1, #0x13e0] + ldr q19, [x1, #0x13f0] + ldr q20, [x2, #0x13c0] + ldr q21, [x2, #0x13d0] + ldr q22, [x2, #0x13e0] + ldr q23, [x2, #0x13f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + ldr q16, [x1, #0x17c0] + ldr q17, [x1, #0x17d0] + ldr q18, [x1, #0x17e0] + ldr q19, [x1, #0x17f0] + ldr q20, [x2, #0x17c0] + ldr q21, [x2, #0x17d0] + ldr q22, [x2, #0x17e0] + ldr q23, [x2, #0x17f0] + smlal v24.2d, v16.2s, v20.2s + smlal2 v25.2d, v16.4s, v20.4s + smlal v26.2d, v17.2s, v21.2s + smlal2 v27.2d, v17.4s, v21.4s + smlal v28.2d, v18.2s, v22.2s + smlal2 v29.2d, v18.4s, v22.4s + smlal v30.2d, v19.2s, v23.2s + smlal2 v31.2d, v19.4s, v23.4s + uzp1 v16.4s, v24.4s, v25.4s + mul v16.4s, v16.4s, v1.4s + smlsl v24.2d, v16.2s, v0.2s + smlsl2 v25.2d, v16.4s, v0.4s + uzp2 v16.4s, v24.4s, v25.4s + uzp1 v17.4s, v26.4s, v27.4s + mul v17.4s, v17.4s, v1.4s + smlsl v26.2d, v17.2s, v0.2s + smlsl2 v27.2d, v17.4s, v0.4s + uzp2 v17.4s, v26.4s, v27.4s + uzp1 v18.4s, v28.4s, v29.4s + mul v18.4s, v18.4s, v1.4s + smlsl v28.2d, v18.2s, v0.2s + smlsl2 v29.2d, v18.4s, v0.4s + uzp2 v18.4s, v28.4s, v29.4s + uzp1 v19.4s, v30.4s, v31.4s + mul v19.4s, v19.4s, v1.4s + smlsl v30.2d, v19.2s, v0.2s + smlsl2 v31.2d, v19.4s, v0.4s + uzp2 v19.4s, v30.4s, v31.4s + str q17, [x0, #0x10] + str q18, [x0, #0x20] + str q19, [x0, #0x30] + str q16, [x0], #0x40 + subs x3, x3, #0x4 + cbnz x3, Lpolyvecl_pointwise_acc_montgomery_l7_loop_start + ret + .cfi_endproc + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml index 0df8d7529..32b418cb9 100644 --- a/proofs/hol_light/aarch64/proofs/dump_bytecode.ml +++ b/proofs/hol_light/aarch64/proofs/dump_bytecode.ml @@ -14,6 +14,18 @@ print_string "=== bytecode start: aarch64/mldsa/mldsa_pointwise.o ===\n";; print_literal_from_elf "aarch64/mldsa/mldsa_pointwise.o";; print_string "==== bytecode end =====================================\n\n";; +print_string "=== bytecode start: aarch64/mldsa/mldsa_pointwise_acc_l4.o ===\n";; +print_literal_from_elf "aarch64/mldsa/mldsa_pointwise_acc_l4.o";; +print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: aarch64/mldsa/mldsa_pointwise_acc_l5.o ===\n";; +print_literal_from_elf "aarch64/mldsa/mldsa_pointwise_acc_l5.o";; +print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: aarch64/mldsa/mldsa_pointwise_acc_l7.o ===\n";; +print_literal_from_elf "aarch64/mldsa/mldsa_pointwise_acc_l7.o";; +print_string "==== bytecode end =====================================\n\n";; + print_string "=== bytecode start: aarch64/mldsa/mldsa_poly_caddq.o ===\n";; print_literal_from_elf "aarch64/mldsa/mldsa_poly_caddq.o";; print_string "==== bytecode end =====================================\n\n";; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l4.ml b/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l4.ml new file mode 100644 index 000000000..7d548b974 --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l4.ml @@ -0,0 +1,363 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Pointwise multiplication and accumulation of polynomials in ML-DSA NTT *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "common/mldsa_specs.ml";; +needs "aarch64/proofs/aarch64_utils.ml";; + +(**** print_literal_from_elf "aarch64/mldsa/mldsa_pointwise_acc_l4.o";; + ****) + +let mldsa_pointwise_acc_l4_mc = define_assert_from_elf + "mldsa_pointwise_acc_l4_mc" "aarch64/mldsa/mldsa_pointwise_acc_l4.o" +(*** BYTECODE START ***) +[ + 0x529c0023; (* arm_MOV W3 (rvalue (word 57345)) *) + 0x72a00fe3; (* arm_MOVK W3 (word 127) 16 *) + 0x4e040c60; (* arm_DUP_GEN Q0 X3 32 128 *) + 0x52840023; (* arm_MOV W3 (rvalue (word 8193)) *) + 0x72a07003; (* arm_MOVK W3 (word 896) 16 *) + 0x4e040c61; (* arm_DUP_GEN Q1 X3 32 128 *) + 0xd2800803; (* arm_MOV X3 (rvalue (word 64)) *) + 0x3dc00431; (* arm_LDR Q17 X1 (Immediate_Offset (word 16)) *) + 0x3dc00832; (* arm_LDR Q18 X1 (Immediate_Offset (word 32)) *) + 0x3dc00c33; (* arm_LDR Q19 X1 (Immediate_Offset (word 48)) *) + 0x3cc40430; (* arm_LDR Q16 X1 (Postimmediate_Offset (word 64)) *) + 0x3dc00455; (* arm_LDR Q21 X2 (Immediate_Offset (word 16)) *) + 0x3dc00856; (* arm_LDR Q22 X2 (Immediate_Offset (word 32)) *) + 0x3dc00c57; (* arm_LDR Q23 X2 (Immediate_Offset (word 48)) *) + 0x3cc40454; (* arm_LDR Q20 X2 (Postimmediate_Offset (word 64)) *) + 0x0eb4c218; (* arm_SMULL_VEC Q24 Q16 Q20 32 *) + 0x4eb4c219; (* arm_SMULL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5c23a; (* arm_SMULL_VEC Q26 Q17 Q21 32 *) + 0x4eb5c23b; (* arm_SMULL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6c25c; (* arm_SMULL_VEC Q28 Q18 Q22 32 *) + 0x4eb6c25d; (* arm_SMULL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7c27e; (* arm_SMULL_VEC Q30 Q19 Q23 32 *) + 0x4eb7c27f; (* arm_SMULL2_VEC Q31 Q19 Q23 32 *) + 0x3dc0f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 960)) *) + 0x3dc0f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 976)) *) + 0x3dc0f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 992)) *) + 0x3dc0fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 1008)) *) + 0x3dc0f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 960)) *) + 0x3dc0f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 976)) *) + 0x3dc0f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 992)) *) + 0x3dc0fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 1008)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc1f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 1984)) *) + 0x3dc1f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 2000)) *) + 0x3dc1f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 2016)) *) + 0x3dc1fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 2032)) *) + 0x3dc1f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 1984)) *) + 0x3dc1f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 2000)) *) + 0x3dc1f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 2016)) *) + 0x3dc1fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 2032)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc2f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 3008)) *) + 0x3dc2f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 3024)) *) + 0x3dc2f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 3040)) *) + 0x3dc2fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 3056)) *) + 0x3dc2f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 3008)) *) + 0x3dc2f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 3024)) *) + 0x3dc2f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 3040)) *) + 0x3dc2fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 3056)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x4e991b10; (* arm_UZP1 Q16 Q24 Q25 32 *) + 0x4ea19e10; (* arm_MUL_VEC Q16 Q16 Q1 32 128 *) + 0x0ea0a218; (* arm_SMLSL_VEC Q24 Q16 Q0 32 *) + 0x4ea0a219; (* arm_SMLSL2_VEC Q25 Q16 Q0 32 *) + 0x4e995b10; (* arm_UZP2 Q16 Q24 Q25 32 *) + 0x4e9b1b51; (* arm_UZP1 Q17 Q26 Q27 32 *) + 0x4ea19e31; (* arm_MUL_VEC Q17 Q17 Q1 32 128 *) + 0x0ea0a23a; (* arm_SMLSL_VEC Q26 Q17 Q0 32 *) + 0x4ea0a23b; (* arm_SMLSL2_VEC Q27 Q17 Q0 32 *) + 0x4e9b5b51; (* arm_UZP2 Q17 Q26 Q27 32 *) + 0x4e9d1b92; (* arm_UZP1 Q18 Q28 Q29 32 *) + 0x4ea19e52; (* arm_MUL_VEC Q18 Q18 Q1 32 128 *) + 0x0ea0a25c; (* arm_SMLSL_VEC Q28 Q18 Q0 32 *) + 0x4ea0a25d; (* arm_SMLSL2_VEC Q29 Q18 Q0 32 *) + 0x4e9d5b92; (* arm_UZP2 Q18 Q28 Q29 32 *) + 0x4e9f1bd3; (* arm_UZP1 Q19 Q30 Q31 32 *) + 0x4ea19e73; (* arm_MUL_VEC Q19 Q19 Q1 32 128 *) + 0x0ea0a27e; (* arm_SMLSL_VEC Q30 Q19 Q0 32 *) + 0x4ea0a27f; (* arm_SMLSL2_VEC Q31 Q19 Q0 32 *) + 0x4e9f5bd3; (* arm_UZP2 Q19 Q30 Q31 32 *) + 0x3d800411; (* arm_STR Q17 X0 (Immediate_Offset (word 16)) *) + 0x3d800812; (* arm_STR Q18 X0 (Immediate_Offset (word 32)) *) + 0x3d800c13; (* arm_STR Q19 X0 (Immediate_Offset (word 48)) *) + 0x3c840410; (* arm_STR Q16 X0 (Postimmediate_Offset (word 64)) *) + 0xf1001063; (* arm_SUBS X3 X3 (rvalue (word 4)) *) + 0xb5fff4e3; (* arm_CBNZ X3 (word 2096796) *) + 0xd65f03c0 (* arm_RET X30 *) +];; +(*** BYTECODE END ***) + +let MLDSA_POINTWISE_ACC_L4_EXEC = ARM_MK_EXEC_RULE mldsa_pointwise_acc_l4_mc;; + +(* ========================================================================= *) +(* Correctness proof *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L4_CORRECT = prove + (`!r a b x y pc. + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (r, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (a, 4096) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (b, 4096) /\ + nonoverlapping (r, 1024) (a, 4096) /\ + nonoverlapping (r, 1024) (b, 4096) /\ + nonoverlapping (a, 4096) (b, 4096) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_pointwise_acc_l4_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [r; a; b] s /\ + (!i. i < 1024 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1024 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read PC s = word(pc + 0x184) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add r (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l4 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r, 1024)])`, + + (* Setup *) + MAP_EVERY X_GEN_TAC + [`r:int64`; `a:int64`; `b:int64`; + `x:num->int32`; `y:num->int32`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL; + fst MLDSA_POINTWISE_ACC_L4_EXEC] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + GLOBALIZE_PRECONDITION_TAC THEN + + (* Lift x bound to match y bound for product lemma *) + SUBGOAL_THEN + `!i. i < 1024 ==> abs(ival((x:num->int32) i)) <= &75423752` + ASSUME_TAC THENL + [GEN_TAC THEN DISCH_TAC THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416:int` THEN + CONJ_TAC THENL [ASM_MESON_TAC[]; CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN + CONV_TAC NUM_REDUCE_CONV THEN + REPEAT STRIP_TAC THEN + REWRITE_TAC[SOME_FLAGS; MODIFIABLE_SIMD_REGS] THEN + + (* Initialize and merge memory *) + ENSURES_INIT_TAC "s0" THEN + + (* Merge a: 4096 bytes = 256 x 128-bit blocks *) + MEMORY_128_FROM_32_TAC "a" 0 256 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + STRIP_TAC THEN + + (* Merge b: 4096 bytes = 256 x 128-bit blocks *) + MEMORY_128_FROM_32_TAC "b" 0 256 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + STRIP_TAC THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + + (* Simulate all 1463 instructions with SIMD simplification *) + MAP_EVERY (fun n -> ARM_STEPS_TAC MLDSA_POINTWISE_ACC_L4_EXEC [n] THEN + SIMD_SIMPLIFY_TAC[arm_mldsa_pointwise_montred']) + (1--1447) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (* Split bytes128 -> bytes32 for output memory *) + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE (SIMD_SIMPLIFY_CONV []) o + CONV_RULE(READ_MEMORY_SPLIT_CONV 2) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (* Expand output cases, substitute, simplify subwords *) + CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN + CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC[WORD_ADD_0] THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + + (* Rewrite ARM montred to standard montred for CONGBOUND *) + REWRITE_TAC[ARM_MLDSA_MONTRED_EQ] THEN + + (* Product bounds (tight: 8380416 * 75423752 = 632082418040832) *) + SUBGOAL_THEN + `!i. i < 1024 ==> + abs(ival(word_mul (word_sx ((x:num->int32) i):int64) + (word_sx ((y:num->int32) i):int64))) <= &632082418040832` + ASSUME_TAC THENL + [REPEAT STRIP_TAC THEN + MP_TAC(ISPECL [`(x:num->int32) i`; `(y:num->int32) i`] IVAL_WORD_MUL_SX32_64) THEN + ANTS_TAC THENL + [ASM_MESON_TAC[]; DISCH_THEN(fun th -> REWRITE_TAC[th])] THEN + REWRITE_TAC[INT_ABS_MUL] THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416 * &75423752:int` THEN + CONJ_TAC THENL + [MATCH_MP_TAC INT_LE_MUL2 THEN REWRITE_TAC[INT_ABS_POS] THEN ASM_MESON_TAC[]; + CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + (* Prove postcondition - congruence + bounds for each coefficient *) + W(fun (asl,w) -> + let lfn = PROCESS_BOUND_ASSUMPTIONS + (CONJUNCTS(tryfind (CONV_RULE EXPAND_CASES_CONV o snd) asl)) + in + (* Pre-compute 1024 ival_mul theorems via ISPECL + assumption lookup *) + let ival_mul_thms = Array.init 1024 (fun i -> + let iterm = mk_small_numeral i in + let xi = mk_comb(`x:num->int32`, iterm) in + let yi = mk_comb(`y:num->int32`, iterm) in + let th = ISPECL [xi; yi] IVAL_WORD_MUL_SX32_64 in + let ante = lhand(concl th) in + let ante_x, ante_y = dest_conj ante in + let ilt = ARITH_RULE(mk_comb(mk_comb(`(<):num->num->bool`, iterm), `1024`)) in + let prove_bound bt = + tryfind (fun (_,ath) -> + try let a' = SPEC iterm ath in + let a'' = MP a' ilt in + if aconv (concl a'') bt then a'' else failwith "" + with _ -> failwith "") asl in + MP th (CONJ (prove_bound ante_x) (prove_bound ante_y))) in + (* Extract 256 coefficient pairs from the goal conjunction *) + let rec pair_up = function + | a :: b :: rest -> mk_conj(a,b) :: pair_up rest + | [x] -> [x] | [] -> [] in + let pairs = pair_up (conjuncts w) in + (* Prove each pair independently *) + let prove_pair idx pair = + let mr = rand(lhand(rator(lhand pair))) in + let cb_th = ASM_CONGBOUND_RULE lfn mr in + let relevant_ival = map (fun k -> ival_mul_thms.(idx + 256 * k)) [0;1;2;3] in + let (_,sgs,just) = ( + MP_TAC cb_th THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [(* Congruence branch *) + REWRITE_TAC[INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM; mldsa_pointwise_acc_l4; + INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + CONV_TAC INT_REM_DOWN_CONV THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC relevant_ival THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN INT_ARITH_TAC; + (* Bounds branch *) + REWRITE_TAC[INT_ABS_BOUNDS] THEN + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]) (asl, pair) in + if sgs <> [] then failwith ("prove_pair " ^ string_of_int idx) + else just null_inst [] in + let all_thms = List.map2 prove_pair (0--255) pairs in + ACCEPT_TAC(end_itlist CONJ all_thms)));; + +(* ========================================================================= *) +(* Subroutine form *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L4_SUBROUTINE_CORRECT = prove + (`!r a b x y pc returnaddress. + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (r, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (a, 4096) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (b, 4096) /\ + nonoverlapping (r, 1024) (a, 4096) /\ + nonoverlapping (r, 1024) (b, 4096) /\ + nonoverlapping (a, 4096) (b, 4096) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_pointwise_acc_l4_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; a; b] s /\ + (!i. i < 1024 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1024 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read PC s = returnaddress /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add r (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l4 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r, 1024)])`, + REWRITE_TAC[fst MLDSA_POINTWISE_ACC_L4_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC MLDSA_POINTWISE_ACC_L4_EXEC + (REWRITE_RULE[fst MLDSA_POINTWISE_ACC_L4_EXEC] + MLDSA_POINTWISE_ACC_L4_CORRECT));; + +(* ========================================================================= *) +(* Constant-time and memory safety proof. *) +(* ========================================================================= *) + +needs "arm/proofs/consttime.ml";; +needs "aarch64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:false + (assoc "mldsa_pointwise_acc_l4" subroutine_signatures) + MLDSA_POINTWISE_ACC_L4_SUBROUTINE_CORRECT + MLDSA_POINTWISE_ACC_L4_EXEC;; + +let MLDSA_POINTWISE_ACC_L4_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e r a b pc returnaddress. + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_mc) (r,1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_mc) (a,4096) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_mc) (b,4096) /\ + nonoverlapping (r,1024) (a,4096) /\ + nonoverlapping (r,1024) (b,4096) /\ + nonoverlapping (a,4096) (b,4096) + ==> ensures arm + (\s. + aligned_bytes_loaded s (word pc) + mldsa_pointwise_acc_l4_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; a; b] s /\ + read events s = e) + (\s. + read PC s = returnaddress /\ + (exists e2. + read events s = APPEND e2 e /\ + e2 = f_events a b r pc returnaddress /\ + memaccess_inbounds e2 [a,4096; b,4096; r,1024] + [r,1024])) + (\s s'. true)`, + ASSERT_CONCL_TAC full_spec THEN + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars MLDSA_POINTWISE_ACC_L4_EXEC);; diff --git a/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l5.ml b/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l5.ml new file mode 100644 index 000000000..efa4a3254 --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l5.ml @@ -0,0 +1,390 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Pointwise multiplication and accumulation of polynomials in ML-DSA NTT *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "common/mldsa_specs.ml";; +needs "aarch64/proofs/aarch64_utils.ml";; + +(**** print_literal_from_elf "aarch64/mldsa/mldsa_pointwise_acc_l5.o";; + ****) + +let mldsa_pointwise_acc_l5_mc = define_assert_from_elf + "mldsa_pointwise_acc_l5_mc" "aarch64/mldsa/mldsa_pointwise_acc_l5.o" +(*** BYTECODE START ***) +[ + 0x529c0023; (* arm_MOV W3 (rvalue (word 57345)) *) + 0x72a00fe3; (* arm_MOVK W3 (word 127) 16 *) + 0x4e040c60; (* arm_DUP_GEN Q0 X3 32 128 *) + 0x52840023; (* arm_MOV W3 (rvalue (word 8193)) *) + 0x72a07003; (* arm_MOVK W3 (word 896) 16 *) + 0x4e040c61; (* arm_DUP_GEN Q1 X3 32 128 *) + 0xd2800803; (* arm_MOV X3 (rvalue (word 64)) *) + 0x3dc00431; (* arm_LDR Q17 X1 (Immediate_Offset (word 16)) *) + 0x3dc00832; (* arm_LDR Q18 X1 (Immediate_Offset (word 32)) *) + 0x3dc00c33; (* arm_LDR Q19 X1 (Immediate_Offset (word 48)) *) + 0x3cc40430; (* arm_LDR Q16 X1 (Postimmediate_Offset (word 64)) *) + 0x3dc00455; (* arm_LDR Q21 X2 (Immediate_Offset (word 16)) *) + 0x3dc00856; (* arm_LDR Q22 X2 (Immediate_Offset (word 32)) *) + 0x3dc00c57; (* arm_LDR Q23 X2 (Immediate_Offset (word 48)) *) + 0x3cc40454; (* arm_LDR Q20 X2 (Postimmediate_Offset (word 64)) *) + 0x0eb4c218; (* arm_SMULL_VEC Q24 Q16 Q20 32 *) + 0x4eb4c219; (* arm_SMULL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5c23a; (* arm_SMULL_VEC Q26 Q17 Q21 32 *) + 0x4eb5c23b; (* arm_SMULL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6c25c; (* arm_SMULL_VEC Q28 Q18 Q22 32 *) + 0x4eb6c25d; (* arm_SMULL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7c27e; (* arm_SMULL_VEC Q30 Q19 Q23 32 *) + 0x4eb7c27f; (* arm_SMULL2_VEC Q31 Q19 Q23 32 *) + 0x3dc0f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 960)) *) + 0x3dc0f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 976)) *) + 0x3dc0f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 992)) *) + 0x3dc0fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 1008)) *) + 0x3dc0f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 960)) *) + 0x3dc0f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 976)) *) + 0x3dc0f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 992)) *) + 0x3dc0fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 1008)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc1f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 1984)) *) + 0x3dc1f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 2000)) *) + 0x3dc1f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 2016)) *) + 0x3dc1fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 2032)) *) + 0x3dc1f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 1984)) *) + 0x3dc1f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 2000)) *) + 0x3dc1f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 2016)) *) + 0x3dc1fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 2032)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc2f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 3008)) *) + 0x3dc2f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 3024)) *) + 0x3dc2f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 3040)) *) + 0x3dc2fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 3056)) *) + 0x3dc2f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 3008)) *) + 0x3dc2f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 3024)) *) + 0x3dc2f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 3040)) *) + 0x3dc2fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 3056)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc3f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 4032)) *) + 0x3dc3f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 4048)) *) + 0x3dc3f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 4064)) *) + 0x3dc3fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 4080)) *) + 0x3dc3f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 4032)) *) + 0x3dc3f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 4048)) *) + 0x3dc3f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 4064)) *) + 0x3dc3fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 4080)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x4e991b10; (* arm_UZP1 Q16 Q24 Q25 32 *) + 0x4ea19e10; (* arm_MUL_VEC Q16 Q16 Q1 32 128 *) + 0x0ea0a218; (* arm_SMLSL_VEC Q24 Q16 Q0 32 *) + 0x4ea0a219; (* arm_SMLSL2_VEC Q25 Q16 Q0 32 *) + 0x4e995b10; (* arm_UZP2 Q16 Q24 Q25 32 *) + 0x4e9b1b51; (* arm_UZP1 Q17 Q26 Q27 32 *) + 0x4ea19e31; (* arm_MUL_VEC Q17 Q17 Q1 32 128 *) + 0x0ea0a23a; (* arm_SMLSL_VEC Q26 Q17 Q0 32 *) + 0x4ea0a23b; (* arm_SMLSL2_VEC Q27 Q17 Q0 32 *) + 0x4e9b5b51; (* arm_UZP2 Q17 Q26 Q27 32 *) + 0x4e9d1b92; (* arm_UZP1 Q18 Q28 Q29 32 *) + 0x4ea19e52; (* arm_MUL_VEC Q18 Q18 Q1 32 128 *) + 0x0ea0a25c; (* arm_SMLSL_VEC Q28 Q18 Q0 32 *) + 0x4ea0a25d; (* arm_SMLSL2_VEC Q29 Q18 Q0 32 *) + 0x4e9d5b92; (* arm_UZP2 Q18 Q28 Q29 32 *) + 0x4e9f1bd3; (* arm_UZP1 Q19 Q30 Q31 32 *) + 0x4ea19e73; (* arm_MUL_VEC Q19 Q19 Q1 32 128 *) + 0x0ea0a27e; (* arm_SMLSL_VEC Q30 Q19 Q0 32 *) + 0x4ea0a27f; (* arm_SMLSL2_VEC Q31 Q19 Q0 32 *) + 0x4e9f5bd3; (* arm_UZP2 Q19 Q30 Q31 32 *) + 0x3d800411; (* arm_STR Q17 X0 (Immediate_Offset (word 16)) *) + 0x3d800812; (* arm_STR Q18 X0 (Immediate_Offset (word 32)) *) + 0x3d800c13; (* arm_STR Q19 X0 (Immediate_Offset (word 48)) *) + 0x3c840410; (* arm_STR Q16 X0 (Postimmediate_Offset (word 64)) *) + 0xf1001063; (* arm_SUBS X3 X3 (rvalue (word 4)) *) + 0xb5fff2e3; (* arm_CBNZ X3 (word 2096732) *) + 0xd65f03c0 (* arm_RET X30 *) +];; +(*** BYTECODE END ***) + +let MLDSA_POINTWISE_ACC_L5_EXEC = ARM_MK_EXEC_RULE mldsa_pointwise_acc_l5_mc;; + +(* ========================================================================= *) +(* Correctness proof *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L5_CORRECT = prove + (`!r a b x y pc. + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (r, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (a, 5120) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (b, 5120) /\ + nonoverlapping (r, 1024) (a, 5120) /\ + nonoverlapping (r, 1024) (b, 5120) /\ + nonoverlapping (a, 5120) (b, 5120) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_pointwise_acc_l5_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [r; a; b] s /\ + (!i. i < 1280 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1280 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read PC s = word(pc + 0x1C4) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add r (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l5 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r, 1024)])`, + + (* Setup *) + MAP_EVERY X_GEN_TAC + [`r:int64`; `a:int64`; `b:int64`; + `x:num->int32`; `y:num->int32`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL; + fst MLDSA_POINTWISE_ACC_L5_EXEC] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + GLOBALIZE_PRECONDITION_TAC THEN + + (* Lift x bound to match y bound for product lemma *) + SUBGOAL_THEN + `!i. i < 1280 ==> abs(ival((x:num->int32) i)) <= &75423752` + ASSUME_TAC THENL + [GEN_TAC THEN DISCH_TAC THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416:int` THEN + CONJ_TAC THENL [ASM_MESON_TAC[]; CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN + CONV_TAC NUM_REDUCE_CONV THEN + REPEAT STRIP_TAC THEN + REWRITE_TAC[SOME_FLAGS; MODIFIABLE_SIMD_REGS] THEN + + (* Initialize and merge memory *) + ENSURES_INIT_TAC "s0" THEN + + (* Merge a: 5120 bytes = 320 x 128-bit blocks *) + MEMORY_128_FROM_32_TAC "a" 0 320 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + STRIP_TAC THEN + + (* Merge b: 5120 bytes = 320 x 128-bit blocks *) + MEMORY_128_FROM_32_TAC "b" 0 320 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + STRIP_TAC THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + + (* Simulate all 1703 instructions with SIMD simplification *) + MAP_EVERY (fun n -> ARM_STEPS_TAC MLDSA_POINTWISE_ACC_L5_EXEC [n] THEN + SIMD_SIMPLIFY_TAC[arm_mldsa_pointwise_montred']) + (1--1703) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (* Discard unused input memory (a, b) bytes128 reads *) + REPEAT(FIRST_X_ASSUM(K ALL_TAC o + check (fun th -> + let t = concl th in + is_eq t && + let lhs = fst(dest_eq t) in + can (find_term (fun t -> is_const t && fst(dest_const t) = "memory")) lhs && + (can (find_term (fun t -> t = `a:int64`)) lhs || + can (find_term (fun t -> t = `b:int64`)) lhs)))) THEN + + (* Split remaining bytes128 -> bytes32 for output memory *) + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE (SIMD_SIMPLIFY_CONV []) o + CONV_RULE(READ_MEMORY_SPLIT_CONV 2) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (* Expand output cases, substitute, simplify subwords *) + CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN + CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC[WORD_ADD_0] THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + + (* Rewrite ARM montred to standard montred for CONGBOUND *) + REWRITE_TAC[ARM_MLDSA_MONTRED_EQ] THEN + + (* Product bounds (tight: 8380416 * 75423752 = 632082418040832) *) + SUBGOAL_THEN + `!i. i < 1280 ==> + abs(ival(word_mul (word_sx ((x:num->int32) i):int64) + (word_sx ((y:num->int32) i):int64))) <= &632082418040832` + ASSUME_TAC THENL + [REPEAT STRIP_TAC THEN + MP_TAC(ISPECL [`(x:num->int32) i`; `(y:num->int32) i`] IVAL_WORD_MUL_SX32_64) THEN + ANTS_TAC THENL + [ASM_MESON_TAC[]; DISCH_THEN(fun th -> REWRITE_TAC[th])] THEN + REWRITE_TAC[INT_ABS_MUL] THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416 * &75423752:int` THEN + CONJ_TAC THENL + [MATCH_MP_TAC INT_LE_MUL2 THEN REWRITE_TAC[INT_ABS_POS] THEN ASM_MESON_TAC[]; + CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + (* Prove postcondition - congruence + bounds for each coefficient *) + W(fun (asl,w) -> + let lfn = PROCESS_BOUND_ASSUMPTIONS + (CONJUNCTS(tryfind (CONV_RULE EXPAND_CASES_CONV o snd) asl)) + in + (* Pre-compute 1280 ival_mul theorems via ISPECL + assumption lookup *) + let ival_mul_thms = Array.init 1280 (fun i -> + let iterm = mk_small_numeral i in + let xi = mk_comb(`x:num->int32`, iterm) in + let yi = mk_comb(`y:num->int32`, iterm) in + let th = ISPECL [xi; yi] IVAL_WORD_MUL_SX32_64 in + let ante = lhand(concl th) in + let ante_x, ante_y = dest_conj ante in + let ilt = ARITH_RULE(mk_comb(mk_comb(`(<):num->num->bool`, iterm), `1280`)) in + let prove_bound bt = + tryfind (fun (_,ath) -> + try let a' = SPEC iterm ath in + let a'' = MP a' ilt in + if aconv (concl a'') bt then a'' else failwith "" + with _ -> failwith "") asl in + MP th (CONJ (prove_bound ante_x) (prove_bound ante_y))) in + (* Extract 256 coefficient pairs from the goal conjunction *) + let rec pair_up = function + | a :: b :: rest -> mk_conj(a,b) :: pair_up rest + | [x] -> [x] | [] -> [] in + let pairs = pair_up (conjuncts w) in + (* Prove each pair independently *) + let prove_pair idx pair = + let mr = rand(lhand(rator(lhand pair))) in + let cb_th = ASM_CONGBOUND_RULE lfn mr in + let relevant_ival = map (fun k -> ival_mul_thms.(idx + 256 * k)) [0;1;2;3;4] in + let (_,sgs,just) = ( + MP_TAC cb_th THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [(* Congruence branch *) + REWRITE_TAC[INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM; mldsa_pointwise_acc_l5; + INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + CONV_TAC INT_REM_DOWN_CONV THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC relevant_ival THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN INT_ARITH_TAC; + (* Bounds branch *) + REWRITE_TAC[INT_ABS_BOUNDS] THEN + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]) (asl, pair) in + if sgs <> [] then failwith ("prove_pair " ^ string_of_int idx) + else just null_inst [] in + let all_thms = List.map2 prove_pair (0--255) pairs in + ACCEPT_TAC(end_itlist CONJ all_thms)));; + +(* ========================================================================= *) +(* Subroutine form *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L5_SUBROUTINE_CORRECT = prove + (`!r a b x y pc returnaddress. + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (r, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (a, 5120) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (b, 5120) /\ + nonoverlapping (r, 1024) (a, 5120) /\ + nonoverlapping (r, 1024) (b, 5120) /\ + nonoverlapping (a, 5120) (b, 5120) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_pointwise_acc_l5_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; a; b] s /\ + (!i. i < 1280 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1280 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read PC s = returnaddress /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add r (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l5 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r, 1024)])`, + REWRITE_TAC[fst MLDSA_POINTWISE_ACC_L5_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC MLDSA_POINTWISE_ACC_L5_EXEC + (REWRITE_RULE[fst MLDSA_POINTWISE_ACC_L5_EXEC] + MLDSA_POINTWISE_ACC_L5_CORRECT));; + +(* ========================================================================= *) +(* Constant-time and memory safety proof. *) +(* ========================================================================= *) + +needs "arm/proofs/consttime.ml";; +needs "aarch64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:false + (assoc "mldsa_pointwise_acc_l5" subroutine_signatures) + MLDSA_POINTWISE_ACC_L5_SUBROUTINE_CORRECT + MLDSA_POINTWISE_ACC_L5_EXEC;; + +let MLDSA_POINTWISE_ACC_L5_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e r a b pc returnaddress. + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_mc) (r,1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_mc) (a,5120) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_mc) (b,5120) /\ + nonoverlapping (r,1024) (a,5120) /\ + nonoverlapping (r,1024) (b,5120) /\ + nonoverlapping (a,5120) (b,5120) + ==> ensures arm + (\s. + aligned_bytes_loaded s (word pc) + mldsa_pointwise_acc_l5_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; a; b] s /\ + read events s = e) + (\s. + read PC s = returnaddress /\ + (exists e2. + read events s = APPEND e2 e /\ + e2 = f_events a b r pc returnaddress /\ + memaccess_inbounds e2 [a,5120; b,5120; r,1024] + [r,1024])) + (\s s'. true)`, + ASSERT_CONCL_TAC full_spec THEN + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars MLDSA_POINTWISE_ACC_L5_EXEC);; + diff --git a/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l7.ml b/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l7.ml new file mode 100644 index 000000000..60ee8860e --- /dev/null +++ b/proofs/hol_light/aarch64/proofs/mldsa_pointwise_acc_l7.ml @@ -0,0 +1,422 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Pointwise multiplication and accumulation of polynomials in ML-DSA NTT *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "common/mldsa_specs.ml";; +needs "aarch64/proofs/aarch64_utils.ml";; + +(**** print_literal_from_elf "aarch64/mldsa/mldsa_pointwise_acc_l7.o";; + ****) + +let mldsa_pointwise_acc_l7_mc = define_assert_from_elf + "mldsa_pointwise_acc_l7_mc" "aarch64/mldsa/mldsa_pointwise_acc_l7.o" +(*** BYTECODE START ***) +[ + 0x529c0023; (* arm_MOV W3 (rvalue (word 57345)) *) + 0x72a00fe3; (* arm_MOVK W3 (word 127) 16 *) + 0x4e040c60; (* arm_DUP_GEN Q0 X3 32 128 *) + 0x52840023; (* arm_MOV W3 (rvalue (word 8193)) *) + 0x72a07003; (* arm_MOVK W3 (word 896) 16 *) + 0x4e040c61; (* arm_DUP_GEN Q1 X3 32 128 *) + 0xd2800803; (* arm_MOV X3 (rvalue (word 64)) *) + 0x3dc00431; (* arm_LDR Q17 X1 (Immediate_Offset (word 16)) *) + 0x3dc00832; (* arm_LDR Q18 X1 (Immediate_Offset (word 32)) *) + 0x3dc00c33; (* arm_LDR Q19 X1 (Immediate_Offset (word 48)) *) + 0x3cc40430; (* arm_LDR Q16 X1 (Postimmediate_Offset (word 64)) *) + 0x3dc00455; (* arm_LDR Q21 X2 (Immediate_Offset (word 16)) *) + 0x3dc00856; (* arm_LDR Q22 X2 (Immediate_Offset (word 32)) *) + 0x3dc00c57; (* arm_LDR Q23 X2 (Immediate_Offset (word 48)) *) + 0x3cc40454; (* arm_LDR Q20 X2 (Postimmediate_Offset (word 64)) *) + 0x0eb4c218; (* arm_SMULL_VEC Q24 Q16 Q20 32 *) + 0x4eb4c219; (* arm_SMULL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5c23a; (* arm_SMULL_VEC Q26 Q17 Q21 32 *) + 0x4eb5c23b; (* arm_SMULL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6c25c; (* arm_SMULL_VEC Q28 Q18 Q22 32 *) + 0x4eb6c25d; (* arm_SMULL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7c27e; (* arm_SMULL_VEC Q30 Q19 Q23 32 *) + 0x4eb7c27f; (* arm_SMULL2_VEC Q31 Q19 Q23 32 *) + 0x3dc0f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 960)) *) + 0x3dc0f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 976)) *) + 0x3dc0f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 992)) *) + 0x3dc0fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 1008)) *) + 0x3dc0f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 960)) *) + 0x3dc0f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 976)) *) + 0x3dc0f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 992)) *) + 0x3dc0fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 1008)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc1f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 1984)) *) + 0x3dc1f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 2000)) *) + 0x3dc1f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 2016)) *) + 0x3dc1fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 2032)) *) + 0x3dc1f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 1984)) *) + 0x3dc1f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 2000)) *) + 0x3dc1f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 2016)) *) + 0x3dc1fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 2032)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc2f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 3008)) *) + 0x3dc2f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 3024)) *) + 0x3dc2f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 3040)) *) + 0x3dc2fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 3056)) *) + 0x3dc2f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 3008)) *) + 0x3dc2f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 3024)) *) + 0x3dc2f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 3040)) *) + 0x3dc2fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 3056)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc3f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 4032)) *) + 0x3dc3f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 4048)) *) + 0x3dc3f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 4064)) *) + 0x3dc3fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 4080)) *) + 0x3dc3f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 4032)) *) + 0x3dc3f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 4048)) *) + 0x3dc3f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 4064)) *) + 0x3dc3fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 4080)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc4f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 5056)) *) + 0x3dc4f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 5072)) *) + 0x3dc4f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 5088)) *) + 0x3dc4fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 5104)) *) + 0x3dc4f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 5056)) *) + 0x3dc4f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 5072)) *) + 0x3dc4f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 5088)) *) + 0x3dc4fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 5104)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x3dc5f030; (* arm_LDR Q16 X1 (Immediate_Offset (word 6080)) *) + 0x3dc5f431; (* arm_LDR Q17 X1 (Immediate_Offset (word 6096)) *) + 0x3dc5f832; (* arm_LDR Q18 X1 (Immediate_Offset (word 6112)) *) + 0x3dc5fc33; (* arm_LDR Q19 X1 (Immediate_Offset (word 6128)) *) + 0x3dc5f054; (* arm_LDR Q20 X2 (Immediate_Offset (word 6080)) *) + 0x3dc5f455; (* arm_LDR Q21 X2 (Immediate_Offset (word 6096)) *) + 0x3dc5f856; (* arm_LDR Q22 X2 (Immediate_Offset (word 6112)) *) + 0x3dc5fc57; (* arm_LDR Q23 X2 (Immediate_Offset (word 6128)) *) + 0x0eb48218; (* arm_SMLAL_VEC Q24 Q16 Q20 32 *) + 0x4eb48219; (* arm_SMLAL2_VEC Q25 Q16 Q20 32 *) + 0x0eb5823a; (* arm_SMLAL_VEC Q26 Q17 Q21 32 *) + 0x4eb5823b; (* arm_SMLAL2_VEC Q27 Q17 Q21 32 *) + 0x0eb6825c; (* arm_SMLAL_VEC Q28 Q18 Q22 32 *) + 0x4eb6825d; (* arm_SMLAL2_VEC Q29 Q18 Q22 32 *) + 0x0eb7827e; (* arm_SMLAL_VEC Q30 Q19 Q23 32 *) + 0x4eb7827f; (* arm_SMLAL2_VEC Q31 Q19 Q23 32 *) + 0x4e991b10; (* arm_UZP1 Q16 Q24 Q25 32 *) + 0x4ea19e10; (* arm_MUL_VEC Q16 Q16 Q1 32 128 *) + 0x0ea0a218; (* arm_SMLSL_VEC Q24 Q16 Q0 32 *) + 0x4ea0a219; (* arm_SMLSL2_VEC Q25 Q16 Q0 32 *) + 0x4e995b10; (* arm_UZP2 Q16 Q24 Q25 32 *) + 0x4e9b1b51; (* arm_UZP1 Q17 Q26 Q27 32 *) + 0x4ea19e31; (* arm_MUL_VEC Q17 Q17 Q1 32 128 *) + 0x0ea0a23a; (* arm_SMLSL_VEC Q26 Q17 Q0 32 *) + 0x4ea0a23b; (* arm_SMLSL2_VEC Q27 Q17 Q0 32 *) + 0x4e9b5b51; (* arm_UZP2 Q17 Q26 Q27 32 *) + 0x4e9d1b92; (* arm_UZP1 Q18 Q28 Q29 32 *) + 0x4ea19e52; (* arm_MUL_VEC Q18 Q18 Q1 32 128 *) + 0x0ea0a25c; (* arm_SMLSL_VEC Q28 Q18 Q0 32 *) + 0x4ea0a25d; (* arm_SMLSL2_VEC Q29 Q18 Q0 32 *) + 0x4e9d5b92; (* arm_UZP2 Q18 Q28 Q29 32 *) + 0x4e9f1bd3; (* arm_UZP1 Q19 Q30 Q31 32 *) + 0x4ea19e73; (* arm_MUL_VEC Q19 Q19 Q1 32 128 *) + 0x0ea0a27e; (* arm_SMLSL_VEC Q30 Q19 Q0 32 *) + 0x4ea0a27f; (* arm_SMLSL2_VEC Q31 Q19 Q0 32 *) + 0x4e9f5bd3; (* arm_UZP2 Q19 Q30 Q31 32 *) + 0x3d800411; (* arm_STR Q17 X0 (Immediate_Offset (word 16)) *) + 0x3d800812; (* arm_STR Q18 X0 (Immediate_Offset (word 32)) *) + 0x3d800c13; (* arm_STR Q19 X0 (Immediate_Offset (word 48)) *) + 0x3c840410; (* arm_STR Q16 X0 (Postimmediate_Offset (word 64)) *) + 0xf1001063; (* arm_SUBS X3 X3 (rvalue (word 4)) *) + 0xb5ffeee3; (* arm_CBNZ X3 (word 2096604) *) + 0xd65f03c0 (* arm_RET X30 *) +];; +(*** BYTECODE END ***) + +let MLDSA_POINTWISE_ACC_L7_EXEC = ARM_MK_EXEC_RULE mldsa_pointwise_acc_l7_mc;; + +(* ========================================================================= *) +(* Correctness proof *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L7_CORRECT = prove + (`!r a b x y pc. + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (r, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (a, 7168) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (b, 7168) /\ + nonoverlapping (r, 1024) (a, 7168) /\ + nonoverlapping (r, 1024) (b, 7168) /\ + nonoverlapping (a, 7168) (b, 7168) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_pointwise_acc_l7_mc /\ + read PC s = word pc /\ + C_ARGUMENTS [r; a; b] s /\ + (!i. i < 1792 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1792 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read PC s = word(pc + 0x244) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add r (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l7 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r, 1024)])`, + + (* Setup *) + MAP_EVERY X_GEN_TAC + [`r:int64`; `a:int64`; `b:int64`; + `x:num->int32`; `y:num->int32`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL; + fst MLDSA_POINTWISE_ACC_L7_EXEC] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + GLOBALIZE_PRECONDITION_TAC THEN + + (* Lift x bound to match y bound for product lemma *) + SUBGOAL_THEN + `!i. i < 1792 ==> abs(ival((x:num->int32) i)) <= &75423752` + ASSUME_TAC THENL + [GEN_TAC THEN DISCH_TAC THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416:int` THEN + CONJ_TAC THENL [ASM_MESON_TAC[]; CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN + CONV_TAC NUM_REDUCE_CONV THEN + REPEAT STRIP_TAC THEN + REWRITE_TAC[SOME_FLAGS; MODIFIABLE_SIMD_REGS] THEN + + (* Initialize and merge memory *) + ENSURES_INIT_TAC "s0" THEN + + (* Merge a: 7168 bytes = 448 x 128-bit blocks *) + MEMORY_128_FROM_32_TAC "a" 0 448 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + STRIP_TAC THEN + + (* Merge b: 7168 bytes = 448 x 128-bit blocks *) + MEMORY_128_FROM_32_TAC "b" 0 448 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + STRIP_TAC THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + + (* Simulate all 2215 instructions with SIMD simplification *) + MAP_EVERY (fun n -> ARM_STEPS_TAC MLDSA_POINTWISE_ACC_L7_EXEC [n] THEN + SIMD_SIMPLIFY_TAC[arm_mldsa_pointwise_montred']) + (1--2215) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (* Discard unused input memory (a, b) bytes128 reads *) + REPEAT(FIRST_X_ASSUM(K ALL_TAC o + check (fun th -> + let t = concl th in + is_eq t && + let lhs = fst(dest_eq t) in + can (find_term (fun t -> is_const t && fst(dest_const t) = "memory")) lhs && + (can (find_term (fun t -> t = `a:int64`)) lhs || + can (find_term (fun t -> t = `b:int64`)) lhs)))) THEN + + (* Split remaining bytes128 -> bytes32 for output memory *) + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE (SIMD_SIMPLIFY_CONV []) o + CONV_RULE(READ_MEMORY_SPLIT_CONV 2) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (* Expand output cases, substitute, simplify subwords *) + CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN + CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC[WORD_ADD_0] THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + + (* Rewrite ARM montred to standard montred for CONGBOUND *) + REWRITE_TAC[ARM_MLDSA_MONTRED_EQ] THEN + + (* Product bounds (tight: 8380416 * 75423752 = 632082418040832) *) + SUBGOAL_THEN + `!i. i < 1792 ==> + abs(ival(word_mul (word_sx ((x:num->int32) i):int64) + (word_sx ((y:num->int32) i):int64))) <= &632082418040832` + ASSUME_TAC THENL + [REPEAT STRIP_TAC THEN + MP_TAC(ISPECL [`(x:num->int32) i`; `(y:num->int32) i`] IVAL_WORD_MUL_SX32_64) THEN + ANTS_TAC THENL + [ASM_MESON_TAC[]; DISCH_THEN(fun th -> REWRITE_TAC[th])] THEN + REWRITE_TAC[INT_ABS_MUL] THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416 * &75423752:int` THEN + CONJ_TAC THENL + [MATCH_MP_TAC INT_LE_MUL2 THEN REWRITE_TAC[INT_ABS_POS] THEN ASM_MESON_TAC[]; + CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + (* Prove postcondition - congruence + bounds for each coefficient *) + W(fun (asl,w) -> + let lfn = PROCESS_BOUND_ASSUMPTIONS + (CONJUNCTS(tryfind (CONV_RULE EXPAND_CASES_CONV o snd) asl)) + in + (* Pre-compute 1792 ival_mul theorems via ISPECL + assumption lookup *) + let ival_mul_thms = Array.init 1792 (fun i -> + let iterm = mk_small_numeral i in + let xi = mk_comb(`x:num->int32`, iterm) in + let yi = mk_comb(`y:num->int32`, iterm) in + let th = ISPECL [xi; yi] IVAL_WORD_MUL_SX32_64 in + let ante = lhand(concl th) in + let ante_x, ante_y = dest_conj ante in + let ilt = ARITH_RULE(mk_comb(mk_comb(`(<):num->num->bool`, iterm), `1792`)) in + let prove_bound bt = + tryfind (fun (_,ath) -> + try let a' = SPEC iterm ath in + let a'' = MP a' ilt in + if aconv (concl a'') bt then a'' else failwith "" + with _ -> failwith "") asl in + MP th (CONJ (prove_bound ante_x) (prove_bound ante_y))) in + (* Extract 256 coefficient pairs from the goal conjunction *) + let rec pair_up = function + | a :: b :: rest -> mk_conj(a,b) :: pair_up rest + | [x] -> [x] | [] -> [] in + let pairs = pair_up (conjuncts w) in + (* Prove each pair independently *) + let prove_pair idx pair = + let mr = rand(lhand(rator(lhand pair))) in + let cb_th = ASM_CONGBOUND_RULE lfn mr in + let relevant_ival = map (fun k -> ival_mul_thms.(idx + 256 * k)) [0;1;2;3;4;5;6] in + let (_,sgs,just) = ( + MP_TAC cb_th THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [(* Congruence branch *) + REWRITE_TAC[INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM; mldsa_pointwise_acc_l7; + INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + CONV_TAC INT_REM_DOWN_CONV THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC relevant_ival THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN INT_ARITH_TAC; + (* Bounds branch *) + REWRITE_TAC[INT_ABS_BOUNDS] THEN + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]) (asl, pair) in + if sgs <> [] then failwith ("prove_pair " ^ string_of_int idx) + else just null_inst [] in + let all_thms = List.map2 prove_pair (0--255) pairs in + ACCEPT_TAC(end_itlist CONJ all_thms)));; + +(* ========================================================================= *) +(* Subroutine form *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L7_SUBROUTINE_CORRECT = prove + (`!r a b x y pc returnaddress. + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (r, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (a, 7168) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (b, 7168) /\ + nonoverlapping (r, 1024) (a, 7168) /\ + nonoverlapping (r, 1024) (b, 7168) /\ + nonoverlapping (a, 7168) (b, 7168) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mldsa_pointwise_acc_l7_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; a; b] s /\ + (!i. i < 1792 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1792 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read PC s = returnaddress /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add r (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l7 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(r, 1024)])`, + REWRITE_TAC[fst MLDSA_POINTWISE_ACC_L7_EXEC] THEN + ARM_ADD_RETURN_NOSTACK_TAC MLDSA_POINTWISE_ACC_L7_EXEC + (REWRITE_RULE[fst MLDSA_POINTWISE_ACC_L7_EXEC] + MLDSA_POINTWISE_ACC_L7_CORRECT));; + + +(* ========================================================================= *) +(* Constant-time and memory safety proof. *) +(* ========================================================================= *) + +needs "arm/proofs/consttime.ml";; +needs "aarch64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:false + (assoc "mldsa_pointwise_acc_l7" subroutine_signatures) + MLDSA_POINTWISE_ACC_L7_SUBROUTINE_CORRECT + MLDSA_POINTWISE_ACC_L7_EXEC;; + +let MLDSA_POINTWISE_ACC_L7_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e r a b pc returnaddress. + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_mc) (r,1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_mc) (a,7168) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_mc) (b,7168) /\ + nonoverlapping (r,1024) (a,7168) /\ + nonoverlapping (r,1024) (b,7168) /\ + nonoverlapping (a,7168) (b,7168) + ==> ensures arm + (\s. + aligned_bytes_loaded s (word pc) + mldsa_pointwise_acc_l7_mc /\ + read PC s = word pc /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [r; a; b] s /\ + read events s = e) + (\s. + read PC s = returnaddress /\ + (exists e2. + read events s = APPEND e2 e /\ + e2 = f_events a b r pc returnaddress /\ + memaccess_inbounds e2 [a,7168; b,7168; r,1024] + [r,1024])) + (\s s'. true)`, + ASSERT_CONCL_TAC full_spec THEN + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars MLDSA_POINTWISE_ACC_L7_EXEC);; diff --git a/proofs/hol_light/aarch64/proofs/subroutine_signatures.ml b/proofs/hol_light/aarch64/proofs/subroutine_signatures.ml index 01ba29e17..125613025 100644 --- a/proofs/hol_light/aarch64/proofs/subroutine_signatures.ml +++ b/proofs/hol_light/aarch64/proofs/subroutine_signatures.ml @@ -46,4 +46,58 @@ let subroutine_signatures = [ [(* temporary buffers *) ]) ); + +("mldsa_pointwise_acc_l4", + ([(*args*) + ("r", "int32_t[static 256]", (*is const?*)"false"); + ("a", "int32_t[static 1024]", (*is const?*)"true"); + ("b", "int32_t[static 1024]", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("a", "1024"(* num elems *), 4(* elem bytesize *)); + ("b", "1024"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("r", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); + +("mldsa_pointwise_acc_l5", + ([(*args*) + ("r", "int32_t[static 256]", (*is const?*)"false"); + ("a", "int32_t[static 1280]", (*is const?*)"true"); + ("b", "int32_t[static 1280]", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("a", "1280"(* num elems *), 4(* elem bytesize *)); + ("b", "1280"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("r", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); + +("mldsa_pointwise_acc_l7", + ([(*args*) + ("r", "int32_t[static 256]", (*is const?*)"false"); + ("a", "int32_t[static 1792]", (*is const?*)"true"); + ("b", "int32_t[static 1792]", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("a", "1792"(* num elems *), 4(* elem bytesize *)); + ("b", "1792"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("r", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); ];; diff --git a/proofs/hol_light/common/mldsa_specs.ml b/proofs/hol_light/common/mldsa_specs.ml index 6c6ab2b74..aedba80bc 100644 --- a/proofs/hol_light/common/mldsa_specs.ml +++ b/proofs/hol_light/common/mldsa_specs.ml @@ -193,6 +193,34 @@ let ARM_MLDSA_FORWARD_NTT_CONV = GEN_REWRITE_CONV DEPTH_CONV [INT_OF_NUM_POW; INT_OF_NUM_REM] THENC ONCE_DEPTH_CONV EXP_MOD_CONV THENC INT_REDUCE_CONV;; +let mldsa_pointwise_acc_l4 = define + `mldsa_pointwise_acc_l4 (f:num->int) (g:num->int) i = + ((f i * g i + + f (i + 256) * g (i + 256) + + f (i + 512) * g (i + 512) + + f (i + 768) * g (i + 768)) * + &(inverse_mod 8380417 4294967296)) rem &8380417`;; + +let mldsa_pointwise_acc_l5 = define + `mldsa_pointwise_acc_l5 (f:num->int) (g:num->int) i = + ((f i * g i + + f (i + 256) * g (i + 256) + + f (i + 512) * g (i + 512) + + f (i + 768) * g (i + 768) + + f (i + 1024) * g (i + 1024)) * + &(inverse_mod 8380417 4294967296)) rem &8380417`;; + +let mldsa_pointwise_acc_l7 = define + `mldsa_pointwise_acc_l7 (f:num->int) (g:num->int) i = + ((f i * g i + + f (i + 256) * g (i + 256) + + f (i + 512) * g (i + 512) + + f (i + 768) * g (i + 768) + + f (i + 1024) * g (i + 1024) + + f (i + 1280) * g (i + 1280) + + f (i + 1536) * g (i + 1536)) * + &(inverse_mod 8380417 4294967296)) rem &8380417`;; + (* ------------------------------------------------------------------------- *) (* Abbreviate the Barrett reduction and multiplication and Montgomery *) (* reduction patterns in the x86 code. *) diff --git a/proofs/hol_light/x86_64/Makefile b/proofs/hol_light/x86_64/Makefile index 6ade9d75e..6feeb89ad 100644 --- a/proofs/hol_light/x86_64/Makefile +++ b/proofs/hol_light/x86_64/Makefile @@ -50,7 +50,10 @@ endif SPLIT=tr ';' '\n' -OBJ = mldsa/mldsa_ntt.o mldsa/mldsa_intt.o mldsa/mldsa_pointwise.o +OBJ = mldsa/mldsa_ntt.o mldsa/mldsa_intt.o mldsa/mldsa_pointwise.o \ + mldsa/mldsa_pointwise_acc_l4.o \ + mldsa/mldsa_pointwise_acc_l5.o \ + mldsa/mldsa_pointwise_acc_l7.o # Build object files from assembly sources $(OBJ): %.o : %.S diff --git a/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l4.S b/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l4.S new file mode 100644 index 000000000..9f4fc35a1 --- /dev/null +++ b/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l4.S @@ -0,0 +1,136 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [REF_AVX2] + * CRYSTALS-Dilithium optimized AVX2 implementation + * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé + * https://github.com/pq-crystals/dilithium/tree/master/avx2 + */ + +/* + * This file is derived from the public domain + * AVX2 Dilithium implementation @[REF_AVX2]. + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/x86_64/src/pointwise_acc_l4.S using scripts/simpasm. Do not modify it directly. + */ + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l4_avx2 +_PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l4_avx2: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l4_avx2 +PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l4_avx2: +#endif + + .cfi_startproc + endbr64 + vmovdqa 0x20(%rcx), %ymm0 + vmovdqa (%rcx), %ymm1 + xorl %eax, %eax + +Lpointwise_acc_l4_avx2_looptop2: + vmovdqa (%rsi), %ymm6 + vmovdqa 0x20(%rsi), %ymm8 + vmovdqa (%rdx), %ymm10 + vmovdqa 0x20(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vmovdqa %ymm6, %ymm2 + vmovdqa %ymm7, %ymm3 + vmovdqa %ymm8, %ymm4 + vmovdqa %ymm9, %ymm5 + vmovdqa 0x400(%rsi), %ymm6 + vmovdqa 0x420(%rsi), %ymm8 + vmovdqa 0x400(%rdx), %ymm10 + vmovdqa 0x420(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0x800(%rsi), %ymm6 + vmovdqa 0x820(%rsi), %ymm8 + vmovdqa 0x800(%rdx), %ymm10 + vmovdqa 0x820(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0xc00(%rsi), %ymm6 + vmovdqa 0xc20(%rsi), %ymm8 + vmovdqa 0xc00(%rdx), %ymm10 + vmovdqa 0xc20(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vpmuldq %ymm2, %ymm0, %ymm6 + vpmuldq %ymm3, %ymm0, %ymm7 + vpmuldq %ymm4, %ymm0, %ymm8 + vpmuldq %ymm5, %ymm0, %ymm9 + vpmuldq %ymm6, %ymm1, %ymm6 + vpmuldq %ymm7, %ymm1, %ymm7 + vpmuldq %ymm8, %ymm1, %ymm8 + vpmuldq %ymm9, %ymm1, %ymm9 + vpsubq %ymm6, %ymm2, %ymm2 + vpsubq %ymm7, %ymm3, %ymm3 + vpsubq %ymm8, %ymm4, %ymm4 + vpsubq %ymm9, %ymm5, %ymm5 + vpsrlq $0x20, %ymm2, %ymm2 + vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7] + vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] + vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7] + vmovdqa %ymm2, (%rdi) + vmovdqa %ymm4, 0x20(%rdi) + addq $0x40, %rsi + addq $0x40, %rdx + addq $0x40, %rdi + addl $0x1, %eax + cmpl $0x10, %eax + jb Lpointwise_acc_l4_avx2_looptop2 + retq + .cfi_endproc + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l5.S b/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l5.S new file mode 100644 index 000000000..0882ffa6c --- /dev/null +++ b/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l5.S @@ -0,0 +1,152 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [REF_AVX2] + * CRYSTALS-Dilithium optimized AVX2 implementation + * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé + * https://github.com/pq-crystals/dilithium/tree/master/avx2 + */ + +/* + * This file is derived from the public domain + * AVX2 Dilithium implementation @[REF_AVX2]. + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/x86_64/src/pointwise_acc_l5.S using scripts/simpasm. Do not modify it directly. + */ + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l5_avx2 +_PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l5_avx2: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l5_avx2 +PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l5_avx2: +#endif + + .cfi_startproc + endbr64 + vmovdqa 0x20(%rcx), %ymm0 + vmovdqa (%rcx), %ymm1 + xorl %eax, %eax + +Lpointwise_acc_l5_avx2_looptop2: + vmovdqa (%rsi), %ymm6 + vmovdqa 0x20(%rsi), %ymm8 + vmovdqa (%rdx), %ymm10 + vmovdqa 0x20(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vmovdqa %ymm6, %ymm2 + vmovdqa %ymm7, %ymm3 + vmovdqa %ymm8, %ymm4 + vmovdqa %ymm9, %ymm5 + vmovdqa 0x400(%rsi), %ymm6 + vmovdqa 0x420(%rsi), %ymm8 + vmovdqa 0x400(%rdx), %ymm10 + vmovdqa 0x420(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0x800(%rsi), %ymm6 + vmovdqa 0x820(%rsi), %ymm8 + vmovdqa 0x800(%rdx), %ymm10 + vmovdqa 0x820(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0xc00(%rsi), %ymm6 + vmovdqa 0xc20(%rsi), %ymm8 + vmovdqa 0xc00(%rdx), %ymm10 + vmovdqa 0xc20(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0x1000(%rsi), %ymm6 + vmovdqa 0x1020(%rsi), %ymm8 + vmovdqa 0x1000(%rdx), %ymm10 + vmovdqa 0x1020(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vpmuldq %ymm2, %ymm0, %ymm6 + vpmuldq %ymm3, %ymm0, %ymm7 + vpmuldq %ymm4, %ymm0, %ymm8 + vpmuldq %ymm5, %ymm0, %ymm9 + vpmuldq %ymm6, %ymm1, %ymm6 + vpmuldq %ymm7, %ymm1, %ymm7 + vpmuldq %ymm8, %ymm1, %ymm8 + vpmuldq %ymm9, %ymm1, %ymm9 + vpsubq %ymm6, %ymm2, %ymm2 + vpsubq %ymm7, %ymm3, %ymm3 + vpsubq %ymm8, %ymm4, %ymm4 + vpsubq %ymm9, %ymm5, %ymm5 + vpsrlq $0x20, %ymm2, %ymm2 + vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7] + vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] + vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7] + vmovdqa %ymm2, (%rdi) + vmovdqa %ymm4, 0x20(%rdi) + addq $0x40, %rsi + addq $0x40, %rdx + addq $0x40, %rdi + addl $0x1, %eax + cmpl $0x10, %eax + jb Lpointwise_acc_l5_avx2_looptop2 + retq + .cfi_endproc + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l7.S b/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l7.S new file mode 100644 index 000000000..6066ccc75 --- /dev/null +++ b/proofs/hol_light/x86_64/mldsa/mldsa_pointwise_acc_l7.S @@ -0,0 +1,184 @@ +/* + * Copyright (c) The mldsa-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [REF_AVX2] + * CRYSTALS-Dilithium optimized AVX2 implementation + * Bai, Ducas, Kiltz, Lepoint, Lyubashevsky, Schwabe, Seiler, Stehlé + * https://github.com/pq-crystals/dilithium/tree/master/avx2 + */ + +/* + * This file is derived from the public domain + * AVX2 Dilithium implementation @[REF_AVX2]. + */ + + +/* + * WARNING: This file is auto-derived from the mldsa-native source file + * dev/x86_64/src/pointwise_acc_l7.S using scripts/simpasm. Do not modify it directly. + */ + +.text +.balign 4 +#ifdef __APPLE__ +.global _PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l7_avx2 +_PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l7_avx2: +#else +.global PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l7_avx2 +PQCP_MLDSA_NATIVE_MLDSA44_pointwise_acc_l7_avx2: +#endif + + .cfi_startproc + endbr64 + vmovdqa 0x20(%rcx), %ymm0 + vmovdqa (%rcx), %ymm1 + xorl %eax, %eax + +Lpointwise_acc_l7_avx2_looptop2: + vmovdqa (%rsi), %ymm6 + vmovdqa 0x20(%rsi), %ymm8 + vmovdqa (%rdx), %ymm10 + vmovdqa 0x20(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vmovdqa %ymm6, %ymm2 + vmovdqa %ymm7, %ymm3 + vmovdqa %ymm8, %ymm4 + vmovdqa %ymm9, %ymm5 + vmovdqa 0x400(%rsi), %ymm6 + vmovdqa 0x420(%rsi), %ymm8 + vmovdqa 0x400(%rdx), %ymm10 + vmovdqa 0x420(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0x800(%rsi), %ymm6 + vmovdqa 0x820(%rsi), %ymm8 + vmovdqa 0x800(%rdx), %ymm10 + vmovdqa 0x820(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0xc00(%rsi), %ymm6 + vmovdqa 0xc20(%rsi), %ymm8 + vmovdqa 0xc00(%rdx), %ymm10 + vmovdqa 0xc20(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0x1000(%rsi), %ymm6 + vmovdqa 0x1020(%rsi), %ymm8 + vmovdqa 0x1000(%rdx), %ymm10 + vmovdqa 0x1020(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0x1400(%rsi), %ymm6 + vmovdqa 0x1420(%rsi), %ymm8 + vmovdqa 0x1400(%rdx), %ymm10 + vmovdqa 0x1420(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vmovdqa 0x1800(%rsi), %ymm6 + vmovdqa 0x1820(%rsi), %ymm8 + vmovdqa 0x1800(%rdx), %ymm10 + vmovdqa 0x1820(%rdx), %ymm12 + vpsrlq $0x20, %ymm6, %ymm7 + vpsrlq $0x20, %ymm8, %ymm9 + vmovshdup %ymm10, %ymm11 # ymm11 = ymm10[1,1,3,3,5,5,7,7] + vmovshdup %ymm12, %ymm13 # ymm13 = ymm12[1,1,3,3,5,5,7,7] + vpmuldq %ymm10, %ymm6, %ymm6 + vpmuldq %ymm11, %ymm7, %ymm7 + vpmuldq %ymm12, %ymm8, %ymm8 + vpmuldq %ymm13, %ymm9, %ymm9 + vpaddq %ymm2, %ymm6, %ymm2 + vpaddq %ymm3, %ymm7, %ymm3 + vpaddq %ymm4, %ymm8, %ymm4 + vpaddq %ymm5, %ymm9, %ymm5 + vpmuldq %ymm2, %ymm0, %ymm6 + vpmuldq %ymm3, %ymm0, %ymm7 + vpmuldq %ymm4, %ymm0, %ymm8 + vpmuldq %ymm5, %ymm0, %ymm9 + vpmuldq %ymm6, %ymm1, %ymm6 + vpmuldq %ymm7, %ymm1, %ymm7 + vpmuldq %ymm8, %ymm1, %ymm8 + vpmuldq %ymm9, %ymm1, %ymm9 + vpsubq %ymm6, %ymm2, %ymm2 + vpsubq %ymm7, %ymm3, %ymm3 + vpsubq %ymm8, %ymm4, %ymm4 + vpsubq %ymm9, %ymm5, %ymm5 + vpsrlq $0x20, %ymm2, %ymm2 + vmovshdup %ymm4, %ymm4 # ymm4 = ymm4[1,1,3,3,5,5,7,7] + vpblendd $0xaa, %ymm3, %ymm2, %ymm2 # ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] + vpblendd $0xaa, %ymm5, %ymm4, %ymm4 # ymm4 = ymm4[0],ymm5[1],ymm4[2],ymm5[3],ymm4[4],ymm5[5],ymm4[6],ymm5[7] + vmovdqa %ymm2, (%rdi) + vmovdqa %ymm4, 0x20(%rdi) + addq $0x40, %rsi + addq $0x40, %rdx + addq $0x40, %rdi + addl $0x1, %eax + cmpl $0x10, %eax + jb Lpointwise_acc_l7_avx2_looptop2 + retq + .cfi_endproc + +#if defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/proofs/hol_light/x86_64/proofs/dump_bytecode.ml b/proofs/hol_light/x86_64/proofs/dump_bytecode.ml index e62d31461..5b5e5522d 100644 --- a/proofs/hol_light/x86_64/proofs/dump_bytecode.ml +++ b/proofs/hol_light/x86_64/proofs/dump_bytecode.ml @@ -16,3 +16,15 @@ print_string "==== bytecode end =====================================\n\n";; print_string "=== bytecode start: x86_64/mldsa/mldsa_pointwise.o ================\n";; print_literal_from_elf "x86_64/mldsa/mldsa_pointwise.o";; print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: x86_64/mldsa/mldsa_pointwise_acc_l4.o ================\n";; +print_literal_from_elf "x86_64/mldsa/mldsa_pointwise_acc_l4.o";; +print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: x86_64/mldsa/mldsa_pointwise_acc_l5.o ================\n";; +print_literal_from_elf "x86_64/mldsa/mldsa_pointwise_acc_l5.o";; +print_string "==== bytecode end =====================================\n\n";; + +print_string "=== bytecode start: x86_64/mldsa/mldsa_pointwise_acc_l7.o ================\n";; +print_literal_from_elf "x86_64/mldsa/mldsa_pointwise_acc_l7.o";; +print_string "==== bytecode end =====================================\n\n";; diff --git a/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l4.ml b/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l4.ml new file mode 100644 index 000000000..5304dacc4 --- /dev/null +++ b/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l4.ml @@ -0,0 +1,567 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Pointwise multiplication and accumulation of polynomials in ML-DSA NTT *) +(* ========================================================================= *) + +needs "x86/proofs/base.ml";; +needs "common/mldsa_specs.ml";; +needs "x86_64/proofs/mldsa_zetas.ml";; +needs "x86_64/proofs/mldsa_utils.ml";; + +(*** print_literal_from_elf "x86_64/mldsa/mldsa_pointwise_acc_l4.o";; + ***) + +let mldsa_pointwise_acc_l4_mc = define_assert_from_elf "mldsa_pointwise_acc_l4_mc" "x86_64/mldsa/mldsa_pointwise_acc_l4.o" +(*** BYTECODE START ***) +[ + 0xf3; 0x0f; 0x1e; 0xfa; (* ENDBR64 *) + 0xc5; 0xfd; 0x6f; 0x41; 0x20; + (* VMOVDQA (%_% ymm0) (Memop Word256 (%% (rcx,32))) *) + 0xc5; 0xfd; 0x6f; 0x09; (* VMOVDQA (%_% ymm1) (Memop Word256 (%% (rcx,0))) *) + 0x31; 0xc0; (* XOR (% eax) (% eax) *) + 0xc5; 0xfd; 0x6f; 0x36; (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,0))) *) + 0xc5; 0x7d; 0x6f; 0x46; 0x20; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,32))) *) + 0xc5; 0x7d; 0x6f; 0x12; (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,0))) *) + 0xc5; 0x7d; 0x6f; 0x62; 0x20; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,32))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xfd; 0x6f; 0xd6; (* VMOVDQA (%_% ymm2) (%_% ymm6) *) + 0xc5; 0xfd; 0x6f; 0xdf; (* VMOVDQA (%_% ymm3) (%_% ymm7) *) + 0xc5; 0x7d; 0x7f; 0xc4; (* VMOVDQA (%_% ymm4) (%_% ymm8) *) + 0xc5; 0x7d; 0x7f; 0xcd; (* VMOVDQA (%_% ymm5) (%_% ymm9) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,1024))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,1056))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,1024))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,1056))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,2048))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,2080))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,2048))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,2080))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,3072))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,3104))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,3072))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,3104))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc4; 0xe2; 0x7d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm0) (%_% ymm2) *) + 0xc4; 0xe2; 0x7d; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm0) (%_% ymm3) *) + 0xc4; 0x62; 0x7d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm0) (%_% ymm4) *) + 0xc4; 0x62; 0x7d; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm0) (%_% ymm5) *) + 0xc4; 0xe2; 0x75; 0x28; 0xf6; + (* VPMULDQ (%_% ymm6) (%_% ymm1) (%_% ymm6) *) + 0xc4; 0xe2; 0x75; 0x28; 0xff; + (* VPMULDQ (%_% ymm7) (%_% ymm1) (%_% ymm7) *) + 0xc4; 0x42; 0x75; 0x28; 0xc0; + (* VPMULDQ (%_% ymm8) (%_% ymm1) (%_% ymm8) *) + 0xc4; 0x42; 0x75; 0x28; 0xc9; + (* VPMULDQ (%_% ymm9) (%_% ymm1) (%_% ymm9) *) + 0xc5; 0xed; 0xfb; 0xd6; (* VPSUBQ (%_% ymm2) (%_% ymm2) (%_% ymm6) *) + 0xc5; 0xe5; 0xfb; 0xdf; (* VPSUBQ (%_% ymm3) (%_% ymm3) (%_% ymm7) *) + 0xc4; 0xc1; 0x5d; 0xfb; 0xe0; + (* VPSUBQ (%_% ymm4) (%_% ymm4) (%_% ymm8) *) + 0xc4; 0xc1; 0x55; 0xfb; 0xe9; + (* VPSUBQ (%_% ymm5) (%_% ymm5) (%_% ymm9) *) + 0xc5; 0xed; 0x73; 0xd2; 0x20; + (* VPSRLQ (%_% ymm2) (%_% ymm2) (Imm8 (word 32)) *) + 0xc5; 0xfe; 0x16; 0xe4; (* VMOVSHDUP (%_% ymm4) (%_% ymm4) *) + 0xc4; 0xe3; 0x6d; 0x02; 0xd3; 0xaa; + (* VPBLENDD (%_% ymm2) (%_% ymm2) (%_% ymm3) (Imm8 (word 170)) *) + 0xc4; 0xe3; 0x5d; 0x02; 0xe5; 0xaa; + (* VPBLENDD (%_% ymm4) (%_% ymm4) (%_% ymm5) (Imm8 (word 170)) *) + 0xc5; 0xfd; 0x7f; 0x17; (* VMOVDQA (Memop Word256 (%% (rdi,0))) (%_% ymm2) *) + 0xc5; 0xfd; 0x7f; 0x67; 0x20; + (* VMOVDQA (Memop Word256 (%% (rdi,32))) (%_% ymm4) *) + 0x48; 0x83; 0xc6; 0x40; (* ADD (% rsi) (Imm8 (word 64)) *) + 0x48; 0x83; 0xc2; 0x40; (* ADD (% rdx) (Imm8 (word 64)) *) + 0x48; 0x83; 0xc7; 0x40; (* ADD (% rdi) (Imm8 (word 64)) *) + 0x83; 0xc0; 0x01; (* ADD (% eax) (Imm8 (word 1)) *) + 0x83; 0xf8; 0x10; (* CMP (% eax) (Imm8 (word 16)) *) + 0x0f; 0x82; 0x3a; 0xfe; 0xff; 0xff; + (* JB (Imm32 (word 4294966842)) *) + 0xc3 (* RET *) +];; +(*** BYTECODE END ***) + +let mldsa_pointwise_acc_l4_tmc = define_trimmed "mldsa_pointwise_acc_l4_tmc" mldsa_pointwise_acc_l4_mc;; +let MLDSA_POINTWISE_ACC_L4_TMC_EXEC = X86_MK_CORE_EXEC_RULE mldsa_pointwise_acc_l4_tmc;; + +(* ========================================================================= *) +(* Correctness proof *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L4_CORRECT = prove + (`!c a b consts x y pc. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc, 0x1D2) (c, 1024) /\ + nonoverlapping (word pc, 0x1D2) (a, 4096) /\ + nonoverlapping (word pc, 0x1D2) (b, 4096) /\ + nonoverlapping (word pc, 0x1D2) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 4096) /\ + nonoverlapping (c, 1024) (b, 4096) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 4096) (b, 4096) /\ + nonoverlapping (a, 4096) (consts, 2496) /\ + nonoverlapping (b, 4096) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) (BUTLAST mldsa_pointwise_acc_l4_tmc) /\ + read RIP s = word pc /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1024 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1024 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = word(pc + 0x1D1) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l4 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [ZMM0; ZMM1; ZMM2; ZMM3; ZMM4; ZMM5; ZMM6; ZMM7; + ZMM8; ZMM9; ZMM10; ZMM11; ZMM12; ZMM13; ZMM14; ZMM15] ,, + MAYCHANGE [RAX] ,, MAYCHANGE SOME_FLAGS ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + + MAP_EVERY X_GEN_TAC + [`c:int64`; `a:int64`; `b:int64`; `consts:int64`; + `x:num->int32`; `y:num->int32`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + GLOBALIZE_PRECONDITION_TAC THEN + SUBGOAL_THEN + `!i. i < 1024 ==> abs(ival((x:num->int32) i)) <= &75423752` + ASSUME_TAC THENL + [GEN_TAC THEN DISCH_TAC THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416:int` THEN + CONJ_TAC THENL [ASM_MESON_TAC[]; CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN + CONV_TAC NUM_REDUCE_CONV THEN + REPEAT STRIP_TAC THEN + REWRITE_TAC [SOME_FLAGS; fst MLDSA_POINTWISE_ACC_L4_TMC_EXEC] THEN + + GHOST_INTRO_TAC `init_ymm0:int256` `read YMM0` THEN + GHOST_INTRO_TAC `init_ymm1:int256` `read YMM1` THEN + GHOST_INTRO_TAC `init_ymm2:int256` `read YMM2` THEN + GHOST_INTRO_TAC `init_ymm3:int256` `read YMM3` THEN + GHOST_INTRO_TAC `init_ymm4:int256` `read YMM4` THEN + GHOST_INTRO_TAC `init_ymm5:int256` `read YMM5` THEN + GHOST_INTRO_TAC `init_ymm6:int256` `read YMM6` THEN + GHOST_INTRO_TAC `init_ymm7:int256` `read YMM7` THEN + GHOST_INTRO_TAC `init_ymm8:int256` `read YMM8` THEN + GHOST_INTRO_TAC `init_ymm9:int256` `read YMM9` THEN + GHOST_INTRO_TAC `init_ymm10:int256` `read YMM10` THEN + GHOST_INTRO_TAC `init_ymm11:int256` `read YMM11` THEN + GHOST_INTRO_TAC `init_ymm12:int256` `read YMM12` THEN + GHOST_INTRO_TAC `init_ymm13:int256` `read YMM13` THEN + GHOST_INTRO_TAC `init_ymm14:int256` `read YMM14` THEN + GHOST_INTRO_TAC `init_ymm15:int256` `read YMM15` THEN + + MAP_EVERY (fun n -> + let vname = "init_c" ^ string_of_int n in + GHOST_INTRO_TAC (mk_var(vname, `:int256`)) + (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add c (word n)))`)) + (0--31) THEN + ENSURES_INIT_TAC "s0" THEN + + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add a (word n))) s0`)) (0--127))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add b (word n))) s0`)) (0--127))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + + FIRST_X_ASSUM(MP_TAC o CONV_RULE (LAND_CONV WORDLIST_FROM_MEMORY_CONV)) THEN + REWRITE_TAC[mldsa_complete_qdata; MAP; CONS_11] THEN + STRIP_TAC THEN + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add consts (word n))) s0`)) (0--1))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 consts) s = z`] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + (* Product bounds (tight: 8380416 * 75423752 = 632082418040832) *) + SUBGOAL_THEN + `!i. i < 1024 ==> + abs(ival(word_mul (word_sx ((x:num->int32) i):int64) + (word_sx ((y:num->int32) i):int64))) <= &632082418040832` + ASSUME_TAC THENL + [REPEAT STRIP_TAC THEN + MP_TAC(ISPECL [`(x:num->int32) i`; `(y:num->int32) i`] IVAL_WORD_MUL_SX32_64) THEN + ANTS_TAC THENL + [ASM_MESON_TAC[]; DISCH_THEN(fun th -> REWRITE_TAC[th])] THEN + REWRITE_TAC[INT_ABS_MUL] THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416 * &75423752:int` THEN + CONJ_TAC THENL + [MATCH_MP_TAC INT_LE_MUL2 THEN REWRITE_TAC[INT_ABS_POS] THEN ASM_MESON_TAC[]; + CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + MAP_EVERY (fun n -> X86_STEPS_TAC MLDSA_POINTWISE_ACC_L4_TMC_EXEC [n] THEN + SIMD_SIMPLIFY_TAC[mldsa_pointwise_montred]) + (1--1411) THEN + ENSURES_FINAL_STATE_TAC THEN + ASM_REWRITE_TAC[] THEN + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s1411:int256 = xxx`) o concl))) THEN + + CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN + CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC[WORD_ADD_0] THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + CONV_TAC(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN + REWRITE_TAC[USHR32_SUBWORD; DUP32_SUBWORD] THEN + REWRITE_TAC[Q_MUL_COMM; GSYM mldsa_pointwise_montred] THEN + REWRITE_TAC[WORD_JOIN_SUBWORD] THEN + + W(fun (asl,w) -> + let lfn = PROCESS_BOUND_ASSUMPTIONS + (CONJUNCTS(tryfind (CONV_RULE EXPAND_CASES_CONV o snd) asl)) + in + (* Pre-compute 1024 ival_mul theorems via ISPECL + assumption lookup *) + let ival_mul_thms = Array.init 1024 (fun i -> + let iterm = mk_small_numeral i in + let xi = mk_comb(`x:num->int32`, iterm) in + let yi = mk_comb(`y:num->int32`, iterm) in + let th = ISPECL [xi; yi] IVAL_WORD_MUL_SX32_64 in + let ante = lhand(concl th) in + let ante_x, ante_y = dest_conj ante in + let ilt = ARITH_RULE(mk_comb(mk_comb(`(<):num->num->bool`, iterm), `1024`)) in + let prove_bound bt = + tryfind (fun (_,ath) -> + try let a' = SPEC iterm ath in + let a'' = MP a' ilt in + if aconv (concl a'') bt then a'' else failwith "" + with _ -> failwith "") asl in + MP th (CONJ (prove_bound ante_x) (prove_bound ante_y))) in + (* Extract 256 coefficient pairs from the goal conjunction *) + let rec pair_up = function + | a :: b :: rest -> mk_conj(a,b) :: pair_up rest + | [x] -> [x] | [] -> [] in + let pairs = pair_up (conjuncts w) in + (* Prove each pair independently *) + let prove_pair idx pair = + let mr = rand(lhand(rator(lhand pair))) in + let cb_th = ASM_CONGBOUND_RULE lfn mr in + let relevant_ival = map (fun k -> ival_mul_thms.(idx + 256 * k)) [0;1;2;3] in + let (_,sgs,just) = ( + MP_TAC cb_th THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [REWRITE_TAC[INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM; mldsa_pointwise_acc_l4; + INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + CONV_TAC INT_REM_DOWN_CONV THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC relevant_ival THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN INT_ARITH_TAC; + REWRITE_TAC[INT_ABS_BOUNDS] THEN + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]) (asl, pair) in + if sgs <> [] then failwith ("prove_pair " ^ string_of_int idx) + else just null_inst [] in + let all_thms = List.map2 prove_pair (0--255) pairs in + ACCEPT_TAC(end_itlist CONJ all_thms)));; + +(* ========================================================================= *) +(* Subroutine form *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L4_NOIBT_SUBROUTINE_CORRECT = prove + (`!c a b consts x y pc stackpointer returnaddress. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_tmc) (c, 1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_tmc) (a, 4096) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_tmc) (b, 4096) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_tmc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 4096) /\ + nonoverlapping (c, 1024) (b, 4096) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 4096) (b, 4096) /\ + nonoverlapping (a, 4096) (consts, 2496) /\ + nonoverlapping (b, 4096) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 4096) /\ + nonoverlapping (stackpointer, 8) (b, 4096) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l4_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1024 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1024 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l4 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in + CONV_TAC TWEAK_CONV THEN + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_pointwise_acc_l4_tmc + (CONV_RULE TWEAK_CONV MLDSA_POINTWISE_ACC_L4_CORRECT));; + +let MLDSA_POINTWISE_ACC_L4_SUBROUTINE_CORRECT = prove + (`!c a b consts x y pc stackpointer returnaddress. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_mc) (c, 1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_mc) (a, 4096) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_mc) (b, 4096) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l4_mc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 4096) /\ + nonoverlapping (c, 1024) (b, 4096) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 4096) (b, 4096) /\ + nonoverlapping (a, 4096) (consts, 2496) /\ + nonoverlapping (b, 4096) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 4096) /\ + nonoverlapping (stackpointer, 8) (b, 4096) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l4_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1024 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1024 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1024 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l4 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in + CONV_TAC TWEAK_CONV THEN + MATCH_ACCEPT_TAC(ADD_IBT_RULE + (CONV_RULE TWEAK_CONV MLDSA_POINTWISE_ACC_L4_NOIBT_SUBROUTINE_CORRECT)));; + +(* ========================================================================= *) +(* Constant-time and memory safety proof. *) +(* ========================================================================= *) + +needs "x86/proofs/consttime.ml";; +needs "x86_64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:true + (assoc "mldsa_pointwise_acc_l4_x86" subroutine_signatures) + MLDSA_POINTWISE_ACC_L4_CORRECT + MLDSA_POINTWISE_ACC_L4_TMC_EXEC;; + +let MLDSA_POINTWISE_ACC_L4_SAFE = + REWRITE_RULE [MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; SOME_FLAGS] + (time prove + (full_spec, + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; SOME_FLAGS] THEN + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars + MLDSA_POINTWISE_ACC_L4_TMC_EXEC));; + +let MLDSA_POINTWISE_ACC_L4_NOIBT_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e c a b consts pc stackpointer returnaddress. + aligned 32 c /\ aligned 32 a /\ aligned 32 b /\ aligned 32 consts /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_tmc) (c, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_tmc) (a, 4096) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_tmc) (b, 4096) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_tmc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 4096) /\ nonoverlapping (c, 1024) (b, 4096) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ nonoverlapping (a, 4096) (b, 4096) /\ + nonoverlapping (a, 4096) (consts, 2496) /\ nonoverlapping (b, 4096) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 4096) /\ + nonoverlapping (stackpointer, 8) (b, 4096) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l4_tmc /\ + read RIP s = word pc /\ read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. read events s = APPEND e2 e /\ + e2 = f_events c a b consts pc stackpointer returnaddress /\ + memaccess_inbounds e2 + [a,4096; b,4096; consts,2496; c,1024; stackpointer,8] + [c,1024; stackpointer,8])) + (\s s'. true)`, + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_pointwise_acc_l4_tmc + MLDSA_POINTWISE_ACC_L4_SAFE THEN DISCHARGE_SAFETY_PROPERTY_TAC);; + +let MLDSA_POINTWISE_ACC_L4_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e c a b consts pc stackpointer returnaddress. + aligned 32 c /\ aligned 32 a /\ aligned 32 b /\ aligned 32 consts /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (c, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (a, 4096) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (b, 4096) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l4_mc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 4096) /\ nonoverlapping (c, 1024) (b, 4096) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ nonoverlapping (a, 4096) (b, 4096) /\ + nonoverlapping (a, 4096) (consts, 2496) /\ nonoverlapping (b, 4096) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 4096) /\ + nonoverlapping (stackpointer, 8) (b, 4096) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l4_mc /\ + read RIP s = word pc /\ read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. read events s = APPEND e2 e /\ + e2 = f_events c a b consts pc stackpointer returnaddress /\ + memaccess_inbounds e2 + [a,4096; b,4096; consts,2496; c,1024; stackpointer,8] + [c,1024; stackpointer,8])) + (\s s'. true)`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE MLDSA_POINTWISE_ACC_L4_NOIBT_SUBROUTINE_SAFE));; diff --git a/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l5.ml b/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l5.ml new file mode 100644 index 000000000..bd80bf8d7 --- /dev/null +++ b/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l5.ml @@ -0,0 +1,594 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Pointwise multiplication and accumulation of polynomials in ML-DSA NTT *) +(* ========================================================================= *) + +needs "x86/proofs/base.ml";; +needs "common/mldsa_specs.ml";; +needs "x86_64/proofs/mldsa_zetas.ml";; +needs "x86_64/proofs/mldsa_utils.ml";; + +(*** print_literal_from_elf "x86_64/mldsa/mldsa_pointwise_acc_l5.o";; + ***) + +let mldsa_pointwise_acc_l5_mc = define_assert_from_elf "mldsa_pointwise_acc_l5_mc" "x86_64/mldsa/mldsa_pointwise_acc_l5.o" +(*** BYTECODE START ***) +[ + 0xf3; 0x0f; 0x1e; 0xfa; (* ENDBR64 *) + 0xc5; 0xfd; 0x6f; 0x41; 0x20; + (* VMOVDQA (%_% ymm0) (Memop Word256 (%% (rcx,32))) *) + 0xc5; 0xfd; 0x6f; 0x09; (* VMOVDQA (%_% ymm1) (Memop Word256 (%% (rcx,0))) *) + 0x31; 0xc0; (* XOR (% eax) (% eax) *) + 0xc5; 0xfd; 0x6f; 0x36; (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,0))) *) + 0xc5; 0x7d; 0x6f; 0x46; 0x20; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,32))) *) + 0xc5; 0x7d; 0x6f; 0x12; (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,0))) *) + 0xc5; 0x7d; 0x6f; 0x62; 0x20; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,32))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xfd; 0x6f; 0xd6; (* VMOVDQA (%_% ymm2) (%_% ymm6) *) + 0xc5; 0xfd; 0x6f; 0xdf; (* VMOVDQA (%_% ymm3) (%_% ymm7) *) + 0xc5; 0x7d; 0x7f; 0xc4; (* VMOVDQA (%_% ymm4) (%_% ymm8) *) + 0xc5; 0x7d; 0x7f; 0xcd; (* VMOVDQA (%_% ymm5) (%_% ymm9) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,1024))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,1056))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,1024))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,1056))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,2048))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,2080))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,2048))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,2080))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,3072))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,3104))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,3072))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,3104))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,4096))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,4128))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,4096))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,4128))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc4; 0xe2; 0x7d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm0) (%_% ymm2) *) + 0xc4; 0xe2; 0x7d; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm0) (%_% ymm3) *) + 0xc4; 0x62; 0x7d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm0) (%_% ymm4) *) + 0xc4; 0x62; 0x7d; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm0) (%_% ymm5) *) + 0xc4; 0xe2; 0x75; 0x28; 0xf6; + (* VPMULDQ (%_% ymm6) (%_% ymm1) (%_% ymm6) *) + 0xc4; 0xe2; 0x75; 0x28; 0xff; + (* VPMULDQ (%_% ymm7) (%_% ymm1) (%_% ymm7) *) + 0xc4; 0x42; 0x75; 0x28; 0xc0; + (* VPMULDQ (%_% ymm8) (%_% ymm1) (%_% ymm8) *) + 0xc4; 0x42; 0x75; 0x28; 0xc9; + (* VPMULDQ (%_% ymm9) (%_% ymm1) (%_% ymm9) *) + 0xc5; 0xed; 0xfb; 0xd6; (* VPSUBQ (%_% ymm2) (%_% ymm2) (%_% ymm6) *) + 0xc5; 0xe5; 0xfb; 0xdf; (* VPSUBQ (%_% ymm3) (%_% ymm3) (%_% ymm7) *) + 0xc4; 0xc1; 0x5d; 0xfb; 0xe0; + (* VPSUBQ (%_% ymm4) (%_% ymm4) (%_% ymm8) *) + 0xc4; 0xc1; 0x55; 0xfb; 0xe9; + (* VPSUBQ (%_% ymm5) (%_% ymm5) (%_% ymm9) *) + 0xc5; 0xed; 0x73; 0xd2; 0x20; + (* VPSRLQ (%_% ymm2) (%_% ymm2) (Imm8 (word 32)) *) + 0xc5; 0xfe; 0x16; 0xe4; (* VMOVSHDUP (%_% ymm4) (%_% ymm4) *) + 0xc4; 0xe3; 0x6d; 0x02; 0xd3; 0xaa; + (* VPBLENDD (%_% ymm2) (%_% ymm2) (%_% ymm3) (Imm8 (word 170)) *) + 0xc4; 0xe3; 0x5d; 0x02; 0xe5; 0xaa; + (* VPBLENDD (%_% ymm4) (%_% ymm4) (%_% ymm5) (Imm8 (word 170)) *) + 0xc5; 0xfd; 0x7f; 0x17; (* VMOVDQA (Memop Word256 (%% (rdi,0))) (%_% ymm2) *) + 0xc5; 0xfd; 0x7f; 0x67; 0x20; + (* VMOVDQA (Memop Word256 (%% (rdi,32))) (%_% ymm4) *) + 0x48; 0x83; 0xc6; 0x40; (* ADD (% rsi) (Imm8 (word 64)) *) + 0x48; 0x83; 0xc2; 0x40; (* ADD (% rdx) (Imm8 (word 64)) *) + 0x48; 0x83; 0xc7; 0x40; (* ADD (% rdi) (Imm8 (word 64)) *) + 0x83; 0xc0; 0x01; (* ADD (% eax) (Imm8 (word 1)) *) + 0x83; 0xf8; 0x10; (* CMP (% eax) (Imm8 (word 16)) *) + 0x0f; 0x82; 0xe1; 0xfd; 0xff; 0xff; + (* JB (Imm32 (word 4294966753)) *) + 0xc3 (* RET *) +];; +(*** BYTECODE END ***) + +let mldsa_pointwise_acc_l5_tmc = define_trimmed "mldsa_pointwise_acc_l5_tmc" mldsa_pointwise_acc_l5_mc;; +let MLDSA_POINTWISE_ACC_L5_TMC_EXEC = X86_MK_CORE_EXEC_RULE mldsa_pointwise_acc_l5_tmc;; + +(* ========================================================================= *) +(* Correctness proof *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L5_CORRECT = prove + (`!c a b consts x y pc. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc, 0x22B) (c, 1024) /\ + nonoverlapping (word pc, 0x22B) (a, 5120) /\ + nonoverlapping (word pc, 0x22B) (b, 5120) /\ + nonoverlapping (word pc, 0x22B) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 5120) /\ + nonoverlapping (c, 1024) (b, 5120) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 5120) (b, 5120) /\ + nonoverlapping (a, 5120) (consts, 2496) /\ + nonoverlapping (b, 5120) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) (BUTLAST mldsa_pointwise_acc_l5_tmc) /\ + read RIP s = word pc /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1280 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1280 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = word(pc + 0x22A) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l5 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [ZMM0; ZMM1; ZMM2; ZMM3; ZMM4; ZMM5; ZMM6; ZMM7; + ZMM8; ZMM9; ZMM10; ZMM11; ZMM12; ZMM13; ZMM14; ZMM15] ,, + MAYCHANGE [RAX] ,, MAYCHANGE SOME_FLAGS ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + + MAP_EVERY X_GEN_TAC + [`c:int64`; `a:int64`; `b:int64`; `consts:int64`; + `x:num->int32`; `y:num->int32`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + GLOBALIZE_PRECONDITION_TAC THEN + SUBGOAL_THEN + `!i. i < 1280 ==> abs(ival((x:num->int32) i)) <= &75423752` + ASSUME_TAC THENL + [GEN_TAC THEN DISCH_TAC THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416:int` THEN + CONJ_TAC THENL [ASM_MESON_TAC[]; CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN + CONV_TAC NUM_REDUCE_CONV THEN + REPEAT STRIP_TAC THEN + REWRITE_TAC [SOME_FLAGS; fst MLDSA_POINTWISE_ACC_L5_TMC_EXEC] THEN + + GHOST_INTRO_TAC `init_ymm0:int256` `read YMM0` THEN + GHOST_INTRO_TAC `init_ymm1:int256` `read YMM1` THEN + GHOST_INTRO_TAC `init_ymm2:int256` `read YMM2` THEN + GHOST_INTRO_TAC `init_ymm3:int256` `read YMM3` THEN + GHOST_INTRO_TAC `init_ymm4:int256` `read YMM4` THEN + GHOST_INTRO_TAC `init_ymm5:int256` `read YMM5` THEN + GHOST_INTRO_TAC `init_ymm6:int256` `read YMM6` THEN + GHOST_INTRO_TAC `init_ymm7:int256` `read YMM7` THEN + GHOST_INTRO_TAC `init_ymm8:int256` `read YMM8` THEN + GHOST_INTRO_TAC `init_ymm9:int256` `read YMM9` THEN + GHOST_INTRO_TAC `init_ymm10:int256` `read YMM10` THEN + GHOST_INTRO_TAC `init_ymm11:int256` `read YMM11` THEN + GHOST_INTRO_TAC `init_ymm12:int256` `read YMM12` THEN + GHOST_INTRO_TAC `init_ymm13:int256` `read YMM13` THEN + GHOST_INTRO_TAC `init_ymm14:int256` `read YMM14` THEN + GHOST_INTRO_TAC `init_ymm15:int256` `read YMM15` THEN + + MAP_EVERY (fun n -> + let vname = "init_c" ^ string_of_int n in + GHOST_INTRO_TAC (mk_var(vname, `:int256`)) + (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add c (word n)))`)) + (0--31) THEN + ENSURES_INIT_TAC "s0" THEN + + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add a (word n))) s0`)) (0--159))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add b (word n))) s0`)) (0--159))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + + FIRST_X_ASSUM(MP_TAC o CONV_RULE (LAND_CONV WORDLIST_FROM_MEMORY_CONV)) THEN + REWRITE_TAC[mldsa_complete_qdata; MAP; CONS_11] THEN + STRIP_TAC THEN + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add consts (word n))) s0`)) (0--1))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 consts) s = z`] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + SUBGOAL_THEN + `!i. i < 1280 ==> + abs(ival(word_mul (word_sx ((x:num->int32) i):int64) + (word_sx ((y:num->int32) i):int64))) <= &632082418040832` + ASSUME_TAC THENL + [REPEAT STRIP_TAC THEN + MP_TAC(ISPECL [`(x:num->int32) i`; `(y:num->int32) i`] IVAL_WORD_MUL_SX32_64) THEN + ANTS_TAC THENL + [ASM_MESON_TAC[]; DISCH_THEN(fun th -> REWRITE_TAC[th])] THEN + REWRITE_TAC[INT_ABS_MUL] THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416 * &75423752:int` THEN + CONJ_TAC THENL + [MATCH_MP_TAC INT_LE_MUL2 THEN REWRITE_TAC[INT_ABS_POS] THEN ASM_MESON_TAC[]; + CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + MAP_EVERY (fun n -> X86_STEPS_TAC MLDSA_POINTWISE_ACC_L5_TMC_EXEC [n] THEN + SIMD_SIMPLIFY_TAC[mldsa_pointwise_montred]) + (1--1667) THEN + ENSURES_FINAL_STATE_TAC THEN + ASM_REWRITE_TAC[] THEN + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s1667:int256 = xxx`) o concl))) THEN + + CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN + CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC[WORD_ADD_0] THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + CONV_TAC(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN + REWRITE_TAC[USHR32_SUBWORD; DUP32_SUBWORD] THEN + REWRITE_TAC[Q_MUL_COMM; GSYM mldsa_pointwise_montred] THEN + REWRITE_TAC[WORD_JOIN_SUBWORD] THEN + + W(fun (asl,w) -> + let lfn = PROCESS_BOUND_ASSUMPTIONS + (CONJUNCTS(tryfind (CONV_RULE EXPAND_CASES_CONV o snd) asl)) + in + (* Pre-compute 1280 ival_mul theorems via ISPECL + assumption lookup *) + let ival_mul_thms = Array.init 1280 (fun i -> + let iterm = mk_small_numeral i in + let xi = mk_comb(`x:num->int32`, iterm) in + let yi = mk_comb(`y:num->int32`, iterm) in + let th = ISPECL [xi; yi] IVAL_WORD_MUL_SX32_64 in + let ante = lhand(concl th) in + let ante_x, ante_y = dest_conj ante in + let ilt = ARITH_RULE(mk_comb(mk_comb(`(<):num->num->bool`, iterm), `1280`)) in + let prove_bound bt = + tryfind (fun (_,ath) -> + try let a' = SPEC iterm ath in + let a'' = MP a' ilt in + if aconv (concl a'') bt then a'' else failwith "" + with _ -> failwith "") asl in + MP th (CONJ (prove_bound ante_x) (prove_bound ante_y))) in + (* Extract 256 coefficient pairs from the goal conjunction *) + let rec pair_up = function + | a :: b :: rest -> mk_conj(a,b) :: pair_up rest + | [x] -> [x] | [] -> [] in + let pairs = pair_up (conjuncts w) in + (* Prove each pair independently *) + let prove_pair idx pair = + let mr = rand(lhand(rator(lhand pair))) in + let cb_th = ASM_CONGBOUND_RULE lfn mr in + let relevant_ival = map (fun k -> ival_mul_thms.(idx + 256 * k)) [0;1;2;3;4] in + let (_,sgs,just) = ( + MP_TAC cb_th THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [REWRITE_TAC[INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM; mldsa_pointwise_acc_l5; + INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + CONV_TAC INT_REM_DOWN_CONV THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC relevant_ival THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN INT_ARITH_TAC; + REWRITE_TAC[INT_ABS_BOUNDS] THEN + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]) (asl, pair) in + if sgs <> [] then failwith ("prove_pair " ^ string_of_int idx) + else just null_inst [] in + let all_thms = List.map2 prove_pair (0--255) pairs in + ACCEPT_TAC(end_itlist CONJ all_thms)));; + +(* ========================================================================= *) +(* Subroutine form *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L5_NOIBT_SUBROUTINE_CORRECT = prove + (`!c a b consts x y pc stackpointer returnaddress. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_tmc) (c, 1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_tmc) (a, 5120) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_tmc) (b, 5120) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_tmc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 5120) /\ + nonoverlapping (c, 1024) (b, 5120) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 5120) (b, 5120) /\ + nonoverlapping (a, 5120) (consts, 2496) /\ + nonoverlapping (b, 5120) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 5120) /\ + nonoverlapping (stackpointer, 8) (b, 5120) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l5_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1280 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1280 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l5 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in + CONV_TAC TWEAK_CONV THEN + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_pointwise_acc_l5_tmc + (CONV_RULE TWEAK_CONV MLDSA_POINTWISE_ACC_L5_CORRECT));; + +let MLDSA_POINTWISE_ACC_L5_SUBROUTINE_CORRECT = prove + (`!c a b consts x y pc stackpointer returnaddress. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_mc) (c, 1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_mc) (a, 5120) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_mc) (b, 5120) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l5_mc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 5120) /\ + nonoverlapping (c, 1024) (b, 5120) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 5120) (b, 5120) /\ + nonoverlapping (a, 5120) (consts, 2496) /\ + nonoverlapping (b, 5120) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 5120) /\ + nonoverlapping (stackpointer, 8) (b, 5120) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l5_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1280 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1280 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1280 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l5 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in + CONV_TAC TWEAK_CONV THEN + MATCH_ACCEPT_TAC(ADD_IBT_RULE + (CONV_RULE TWEAK_CONV MLDSA_POINTWISE_ACC_L5_NOIBT_SUBROUTINE_CORRECT)));; + +(* ========================================================================= *) +(* Constant-time and memory safety proof. *) +(* ========================================================================= *) + +needs "x86/proofs/consttime.ml";; +needs "x86_64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:true + (assoc "mldsa_pointwise_acc_l5_x86" subroutine_signatures) + MLDSA_POINTWISE_ACC_L5_CORRECT + MLDSA_POINTWISE_ACC_L5_TMC_EXEC;; + +let MLDSA_POINTWISE_ACC_L5_SAFE = + REWRITE_RULE [MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; SOME_FLAGS] + (time prove + (full_spec, + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; SOME_FLAGS] THEN + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars + MLDSA_POINTWISE_ACC_L5_TMC_EXEC));; + +let MLDSA_POINTWISE_ACC_L5_NOIBT_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e c a b consts pc stackpointer returnaddress. + aligned 32 c /\ aligned 32 a /\ aligned 32 b /\ aligned 32 consts /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_tmc) (c, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_tmc) (a, 5120) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_tmc) (b, 5120) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_tmc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 5120) /\ nonoverlapping (c, 1024) (b, 5120) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ nonoverlapping (a, 5120) (b, 5120) /\ + nonoverlapping (a, 5120) (consts, 2496) /\ nonoverlapping (b, 5120) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 5120) /\ + nonoverlapping (stackpointer, 8) (b, 5120) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l5_tmc /\ + read RIP s = word pc /\ read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. read events s = APPEND e2 e /\ + e2 = f_events c a b consts pc stackpointer returnaddress /\ + memaccess_inbounds e2 + [a,5120; b,5120; consts,2496; c,1024; stackpointer,8] + [c,1024; stackpointer,8])) + (\s s'. true)`, + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_pointwise_acc_l5_tmc + MLDSA_POINTWISE_ACC_L5_SAFE THEN DISCHARGE_SAFETY_PROPERTY_TAC);; + +let MLDSA_POINTWISE_ACC_L5_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e c a b consts pc stackpointer returnaddress. + aligned 32 c /\ aligned 32 a /\ aligned 32 b /\ aligned 32 consts /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (c, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (a, 5120) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (b, 5120) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l5_mc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 5120) /\ nonoverlapping (c, 1024) (b, 5120) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ nonoverlapping (a, 5120) (b, 5120) /\ + nonoverlapping (a, 5120) (consts, 2496) /\ nonoverlapping (b, 5120) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 5120) /\ + nonoverlapping (stackpointer, 8) (b, 5120) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l5_mc /\ + read RIP s = word pc /\ read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. read events s = APPEND e2 e /\ + e2 = f_events c a b consts pc stackpointer returnaddress /\ + memaccess_inbounds e2 + [a,5120; b,5120; consts,2496; c,1024; stackpointer,8] + [c,1024; stackpointer,8])) + (\s s'. true)`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE MLDSA_POINTWISE_ACC_L5_NOIBT_SUBROUTINE_SAFE));; diff --git a/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l7.ml b/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l7.ml new file mode 100644 index 000000000..730e2b798 --- /dev/null +++ b/proofs/hol_light/x86_64/proofs/mldsa_pointwise_acc_l7.ml @@ -0,0 +1,657 @@ +(* + * Copyright (c) The mldsa-native project authors + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Pointwise multiplication and accumulation of polynomials in ML-DSA NTT *) +(* ========================================================================= *) + +needs "x86/proofs/base.ml";; +needs "common/mldsa_specs.ml";; +needs "x86_64/proofs/mldsa_zetas.ml";; +needs "x86_64/proofs/mldsa_utils.ml";; + +(*** print_literal_from_elf "x86_64/mldsa/mldsa_pointwise_acc_l7.o";; + ***) + +let mldsa_pointwise_acc_l7_mc = define_assert_from_elf "mldsa_pointwise_acc_l7_mc" "x86_64/mldsa/mldsa_pointwise_acc_l7.o" +(*** BYTECODE START ***) +[ + 0xf3; 0x0f; 0x1e; 0xfa; (* ENDBR64 *) + 0xc5; 0xfd; 0x6f; 0x41; 0x20; + (* VMOVDQA (%_% ymm0) (Memop Word256 (%% (rcx,32))) *) + 0xc5; 0xfd; 0x6f; 0x09; (* VMOVDQA (%_% ymm1) (Memop Word256 (%% (rcx,0))) *) + 0x31; 0xc0; (* XOR (% eax) (% eax) *) + 0xc5; 0xfd; 0x6f; 0x36; (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,0))) *) + 0xc5; 0x7d; 0x6f; 0x46; 0x20; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,32))) *) + 0xc5; 0x7d; 0x6f; 0x12; (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,0))) *) + 0xc5; 0x7d; 0x6f; 0x62; 0x20; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,32))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xfd; 0x6f; 0xd6; (* VMOVDQA (%_% ymm2) (%_% ymm6) *) + 0xc5; 0xfd; 0x6f; 0xdf; (* VMOVDQA (%_% ymm3) (%_% ymm7) *) + 0xc5; 0x7d; 0x7f; 0xc4; (* VMOVDQA (%_% ymm4) (%_% ymm8) *) + 0xc5; 0x7d; 0x7f; 0xcd; (* VMOVDQA (%_% ymm5) (%_% ymm9) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,1024))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,1056))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,1024))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x04; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,1056))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,2048))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,2080))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,2048))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x08; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,2080))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,3072))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,3104))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,3072))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x0c; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,3104))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,4096))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,4128))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,4096))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x10; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,4128))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x14; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,5120))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x14; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,5152))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x14; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,5120))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x14; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,5152))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc5; 0xfd; 0x6f; 0xb6; 0x00; 0x18; 0x00; 0x00; + (* VMOVDQA (%_% ymm6) (Memop Word256 (%% (rsi,6144))) *) + 0xc5; 0x7d; 0x6f; 0x86; 0x20; 0x18; 0x00; 0x00; + (* VMOVDQA (%_% ymm8) (Memop Word256 (%% (rsi,6176))) *) + 0xc5; 0x7d; 0x6f; 0x92; 0x00; 0x18; 0x00; 0x00; + (* VMOVDQA (%_% ymm10) (Memop Word256 (%% (rdx,6144))) *) + 0xc5; 0x7d; 0x6f; 0xa2; 0x20; 0x18; 0x00; 0x00; + (* VMOVDQA (%_% ymm12) (Memop Word256 (%% (rdx,6176))) *) + 0xc5; 0xc5; 0x73; 0xd6; 0x20; + (* VPSRLQ (%_% ymm7) (%_% ymm6) (Imm8 (word 32)) *) + 0xc4; 0xc1; 0x35; 0x73; 0xd0; 0x20; + (* VPSRLQ (%_% ymm9) (%_% ymm8) (Imm8 (word 32)) *) + 0xc4; 0x41; 0x7e; 0x16; 0xda; + (* VMOVSHDUP (%_% ymm11) (%_% ymm10) *) + 0xc4; 0x41; 0x7e; 0x16; 0xec; + (* VMOVSHDUP (%_% ymm13) (%_% ymm12) *) + 0xc4; 0xc2; 0x4d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm6) (%_% ymm10) *) + 0xc4; 0xc2; 0x45; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm7) (%_% ymm11) *) + 0xc4; 0x42; 0x3d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm8) (%_% ymm12) *) + 0xc4; 0x42; 0x35; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm9) (%_% ymm13) *) + 0xc5; 0xcd; 0xd4; 0xd2; (* VPADDQ (%_% ymm2) (%_% ymm6) (%_% ymm2) *) + 0xc5; 0xc5; 0xd4; 0xdb; (* VPADDQ (%_% ymm3) (%_% ymm7) (%_% ymm3) *) + 0xc5; 0xbd; 0xd4; 0xe4; (* VPADDQ (%_% ymm4) (%_% ymm8) (%_% ymm4) *) + 0xc5; 0xb5; 0xd4; 0xed; (* VPADDQ (%_% ymm5) (%_% ymm9) (%_% ymm5) *) + 0xc4; 0xe2; 0x7d; 0x28; 0xf2; + (* VPMULDQ (%_% ymm6) (%_% ymm0) (%_% ymm2) *) + 0xc4; 0xe2; 0x7d; 0x28; 0xfb; + (* VPMULDQ (%_% ymm7) (%_% ymm0) (%_% ymm3) *) + 0xc4; 0x62; 0x7d; 0x28; 0xc4; + (* VPMULDQ (%_% ymm8) (%_% ymm0) (%_% ymm4) *) + 0xc4; 0x62; 0x7d; 0x28; 0xcd; + (* VPMULDQ (%_% ymm9) (%_% ymm0) (%_% ymm5) *) + 0xc4; 0xe2; 0x75; 0x28; 0xf6; + (* VPMULDQ (%_% ymm6) (%_% ymm1) (%_% ymm6) *) + 0xc4; 0xe2; 0x75; 0x28; 0xff; + (* VPMULDQ (%_% ymm7) (%_% ymm1) (%_% ymm7) *) + 0xc4; 0x42; 0x75; 0x28; 0xc0; + (* VPMULDQ (%_% ymm8) (%_% ymm1) (%_% ymm8) *) + 0xc4; 0x42; 0x75; 0x28; 0xc9; + (* VPMULDQ (%_% ymm9) (%_% ymm1) (%_% ymm9) *) + 0xc5; 0xed; 0xfb; 0xd6; (* VPSUBQ (%_% ymm2) (%_% ymm2) (%_% ymm6) *) + 0xc5; 0xe5; 0xfb; 0xdf; (* VPSUBQ (%_% ymm3) (%_% ymm3) (%_% ymm7) *) + 0xc4; 0xc1; 0x5d; 0xfb; 0xe0; + (* VPSUBQ (%_% ymm4) (%_% ymm4) (%_% ymm8) *) + 0xc4; 0xc1; 0x55; 0xfb; 0xe9; + (* VPSUBQ (%_% ymm5) (%_% ymm5) (%_% ymm9) *) + 0xc5; 0xed; 0x73; 0xd2; 0x20; + (* VPSRLQ (%_% ymm2) (%_% ymm2) (Imm8 (word 32)) *) + 0xc5; 0xfe; 0x16; 0xe4; (* VMOVSHDUP (%_% ymm4) (%_% ymm4) *) + 0xc4; 0xe3; 0x6d; 0x02; 0xd3; 0xaa; + (* VPBLENDD (%_% ymm2) (%_% ymm2) (%_% ymm3) (Imm8 (word 170)) *) + 0xc4; 0xe3; 0x5d; 0x02; 0xe5; 0xaa; + (* VPBLENDD (%_% ymm4) (%_% ymm4) (%_% ymm5) (Imm8 (word 170)) *) + 0xc5; 0xfd; 0x7f; 0x17; (* VMOVDQA (Memop Word256 (%% (rdi,0))) (%_% ymm2) *) + 0xc5; 0xfd; 0x7f; 0x67; 0x20; + (* VMOVDQA (Memop Word256 (%% (rdi,32))) (%_% ymm4) *) + 0x48; 0x83; 0xc6; 0x40; (* ADD (% rsi) (Imm8 (word 64)) *) + 0x48; 0x83; 0xc2; 0x40; (* ADD (% rdx) (Imm8 (word 64)) *) + 0x48; 0x83; 0xc7; 0x40; (* ADD (% rdi) (Imm8 (word 64)) *) + 0x83; 0xc0; 0x01; (* ADD (% eax) (Imm8 (word 1)) *) + 0x83; 0xf8; 0x10; (* CMP (% eax) (Imm8 (word 16)) *) + 0x0f; 0x82; 0x2f; 0xfd; 0xff; 0xff; + (* JB (Imm32 (word 4294966575)) *) + 0xc3 (* RET *) +];; +(*** BYTECODE END ***) + +let mldsa_pointwise_acc_l7_tmc = define_trimmed "mldsa_pointwise_acc_l7_tmc" mldsa_pointwise_acc_l7_mc;; +let MLDSA_POINTWISE_ACC_L7_TMC_EXEC = X86_MK_CORE_EXEC_RULE mldsa_pointwise_acc_l7_tmc;; + +(* ========================================================================= *) +(* Correctness proof *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L7_CORRECT = prove + (`!c a b consts x y pc. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc, 0x2DD) (c, 1024) /\ + nonoverlapping (word pc, 0x2DD) (a, 7168) /\ + nonoverlapping (word pc, 0x2DD) (b, 7168) /\ + nonoverlapping (word pc, 0x2DD) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 7168) /\ + nonoverlapping (c, 1024) (b, 7168) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 7168) (b, 7168) /\ + nonoverlapping (a, 7168) (consts, 2496) /\ + nonoverlapping (b, 7168) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) (BUTLAST mldsa_pointwise_acc_l7_tmc) /\ + read RIP s = word pc /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1792 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1792 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = word(pc + 0x2DC) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l7 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [ZMM0; ZMM1; ZMM2; ZMM3; ZMM4; ZMM5; ZMM6; ZMM7; + ZMM8; ZMM9; ZMM10; ZMM11; ZMM12; ZMM13; ZMM14; ZMM15] ,, + MAYCHANGE [RAX] ,, MAYCHANGE SOME_FLAGS ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + + MAP_EVERY X_GEN_TAC + [`c:int64`; `a:int64`; `b:int64`; `consts:int64`; + `x:num->int32`; `y:num->int32`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + GLOBALIZE_PRECONDITION_TAC THEN + SUBGOAL_THEN + `!i. i < 1792 ==> abs(ival((x:num->int32) i)) <= &75423752` + ASSUME_TAC THENL + [GEN_TAC THEN DISCH_TAC THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416:int` THEN + CONJ_TAC THENL [ASM_MESON_TAC[]; CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV EXPAND_CASES_CONV))) THEN + CONV_TAC NUM_REDUCE_CONV THEN + REPEAT STRIP_TAC THEN + REWRITE_TAC [SOME_FLAGS; fst MLDSA_POINTWISE_ACC_L7_TMC_EXEC] THEN + + GHOST_INTRO_TAC `init_ymm0:int256` `read YMM0` THEN + GHOST_INTRO_TAC `init_ymm1:int256` `read YMM1` THEN + GHOST_INTRO_TAC `init_ymm2:int256` `read YMM2` THEN + GHOST_INTRO_TAC `init_ymm3:int256` `read YMM3` THEN + GHOST_INTRO_TAC `init_ymm4:int256` `read YMM4` THEN + GHOST_INTRO_TAC `init_ymm5:int256` `read YMM5` THEN + GHOST_INTRO_TAC `init_ymm6:int256` `read YMM6` THEN + GHOST_INTRO_TAC `init_ymm7:int256` `read YMM7` THEN + GHOST_INTRO_TAC `init_ymm8:int256` `read YMM8` THEN + GHOST_INTRO_TAC `init_ymm9:int256` `read YMM9` THEN + GHOST_INTRO_TAC `init_ymm10:int256` `read YMM10` THEN + GHOST_INTRO_TAC `init_ymm11:int256` `read YMM11` THEN + GHOST_INTRO_TAC `init_ymm12:int256` `read YMM12` THEN + GHOST_INTRO_TAC `init_ymm13:int256` `read YMM13` THEN + GHOST_INTRO_TAC `init_ymm14:int256` `read YMM14` THEN + GHOST_INTRO_TAC `init_ymm15:int256` `read YMM15` THEN + + MAP_EVERY (fun n -> + let vname = "init_c" ^ string_of_int n in + GHOST_INTRO_TAC (mk_var(vname, `:int256`)) + (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add c (word n)))`)) + (0--31) THEN + ENSURES_INIT_TAC "s0" THEN + + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add a (word n))) s0`)) (0--223))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add b (word n))) s0`)) (0--223))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 a) s = x`] THEN + + FIRST_X_ASSUM(MP_TAC o CONV_RULE (LAND_CONV WORDLIST_FROM_MEMORY_CONV)) THEN + REWRITE_TAC[mldsa_complete_qdata; MAP; CONS_11] THEN + STRIP_TAC THEN + MP_TAC(end_itlist CONJ (map (fun n -> + READ_MEMORY_MERGE_CONV 3 (subst[mk_small_numeral(32*n),`n:num`] + `read (memory :> bytes256(word_add consts (word n))) s0`)) (0--1))) THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes32 consts) s = z`] THEN + CONV_TAC(LAND_CONV WORD_REDUCE_CONV) THEN + STRIP_TAC THEN + + SUBGOAL_THEN + `!i. i < 1792 ==> + abs(ival(word_mul (word_sx ((x:num->int32) i):int64) + (word_sx ((y:num->int32) i):int64))) <= &632082418040832` + ASSUME_TAC THENL + [REPEAT STRIP_TAC THEN + MP_TAC(ISPECL [`(x:num->int32) i`; `(y:num->int32) i`] IVAL_WORD_MUL_SX32_64) THEN + ANTS_TAC THENL + [ASM_MESON_TAC[]; DISCH_THEN(fun th -> REWRITE_TAC[th])] THEN + REWRITE_TAC[INT_ABS_MUL] THEN + MATCH_MP_TAC INT_LE_TRANS THEN EXISTS_TAC `&8380416 * &75423752:int` THEN + CONJ_TAC THENL + [MATCH_MP_TAC INT_LE_MUL2 THEN REWRITE_TAC[INT_ABS_POS] THEN ASM_MESON_TAC[]; + CONV_TAC INT_REDUCE_CONV]; + ALL_TAC] THEN + + MAP_EVERY (fun n -> X86_STEPS_TAC MLDSA_POINTWISE_ACC_L7_TMC_EXEC [n] THEN + SIMD_SIMPLIFY_TAC[mldsa_pointwise_montred]) + (1--2179) THEN + ENSURES_FINAL_STATE_TAC THEN + ASM_REWRITE_TAC[] THEN + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s2179:int256 = xxx`) o concl))) THEN + + CONV_TAC(TOP_DEPTH_CONV EXPAND_CASES_CONV) THEN + CONV_TAC(DEPTH_CONV NUM_MULT_CONV THENC DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC[WORD_ADD_0] THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN ASM_REWRITE_TAC[] THEN + CONV_TAC(TOP_DEPTH_CONV let_CONV) THEN + CONV_TAC(TOP_DEPTH_CONV WORD_SIMPLE_SUBWORD_CONV) THEN + REWRITE_TAC[USHR32_SUBWORD; DUP32_SUBWORD] THEN + REWRITE_TAC[Q_MUL_COMM; GSYM mldsa_pointwise_montred] THEN + REWRITE_TAC[WORD_JOIN_SUBWORD] THEN + + W(fun (asl,w) -> + let lfn = PROCESS_BOUND_ASSUMPTIONS + (CONJUNCTS(tryfind (CONV_RULE EXPAND_CASES_CONV o snd) asl)) + in + (* Pre-compute 1792 ival_mul theorems via ISPECL + assumption lookup *) + let ival_mul_thms = Array.init 1792 (fun i -> + let iterm = mk_small_numeral i in + let xi = mk_comb(`x:num->int32`, iterm) in + let yi = mk_comb(`y:num->int32`, iterm) in + let th = ISPECL [xi; yi] IVAL_WORD_MUL_SX32_64 in + let ante = lhand(concl th) in + let ante_x, ante_y = dest_conj ante in + let ilt = ARITH_RULE(mk_comb(mk_comb(`(<):num->num->bool`, iterm), `1792`)) in + let prove_bound bt = + tryfind (fun (_,ath) -> + try let a' = SPEC iterm ath in + let a'' = MP a' ilt in + if aconv (concl a'') bt then a'' else failwith "" + with _ -> failwith "") asl in + MP th (CONJ (prove_bound ante_x) (prove_bound ante_y))) in + (* Extract 256 coefficient pairs from the goal conjunction *) + let rec pair_up = function + | a :: b :: rest -> mk_conj(a,b) :: pair_up rest + | [x] -> [x] | [] -> [] in + let pairs = pair_up (conjuncts w) in + (* Prove each pair independently *) + let prove_pair idx pair = + let mr = rand(lhand(rator(lhand pair))) in + let cb_th = ASM_CONGBOUND_RULE lfn mr in + let relevant_ival = map (fun k -> ival_mul_thms.(idx + 256 * k)) [0;1;2;3;4;5;6] in + let (_,sgs,just) = ( + MP_TAC cb_th THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [REWRITE_TAC[INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM; mldsa_pointwise_acc_l7; + INVERSE_MOD_CONV `inverse_mod 8380417 4294967296`] THEN + CONV_TAC INT_REM_DOWN_CONV THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + REWRITE_TAC relevant_ival THEN + CONV_TAC(DEPTH_CONV NUM_ADD_CONV) THEN + AP_THM_TAC THEN AP_TERM_TAC THEN INT_ARITH_TAC; + REWRITE_TAC[INT_ABS_BOUNDS] THEN + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]) (asl, pair) in + if sgs <> [] then failwith ("prove_pair " ^ string_of_int idx) + else just null_inst [] in + let all_thms = List.map2 prove_pair (0--255) pairs in + ACCEPT_TAC(end_itlist CONJ all_thms)));; + +(* ========================================================================= *) +(* Subroutine form *) +(* ========================================================================= *) + +let MLDSA_POINTWISE_ACC_L7_NOIBT_SUBROUTINE_CORRECT = prove + (`!c a b consts x y pc stackpointer returnaddress. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_tmc) (c, 1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_tmc) (a, 7168) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_tmc) (b, 7168) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_tmc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 7168) /\ + nonoverlapping (c, 1024) (b, 7168) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 7168) (b, 7168) /\ + nonoverlapping (a, 7168) (consts, 2496) /\ + nonoverlapping (b, 7168) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 7168) /\ + nonoverlapping (stackpointer, 8) (b, 7168) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l7_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1792 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1792 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l7 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in + CONV_TAC TWEAK_CONV THEN + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_pointwise_acc_l7_tmc + (CONV_RULE TWEAK_CONV MLDSA_POINTWISE_ACC_L7_CORRECT));; + +let MLDSA_POINTWISE_ACC_L7_SUBROUTINE_CORRECT = prove + (`!c a b consts x y pc stackpointer returnaddress. + aligned 32 c /\ + aligned 32 a /\ + aligned 32 b /\ + aligned 32 consts /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_mc) (c, 1024) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_mc) (a, 7168) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_mc) (b, 7168) /\ + nonoverlapping (word pc,LENGTH mldsa_pointwise_acc_l7_mc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 7168) /\ + nonoverlapping (c, 1024) (b, 7168) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ + nonoverlapping (a, 7168) (b, 7168) /\ + nonoverlapping (a, 7168) (consts, 2496) /\ + nonoverlapping (b, 7168) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 7168) /\ + nonoverlapping (stackpointer, 8) (b, 7168) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l7_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + wordlist_from_memory(consts,624) s = + MAP (iword: int -> 32 word) mldsa_complete_qdata /\ + (!i. i < 1792 ==> abs(ival(x i)) <= &8380416) /\ + (!i. i < 1792 ==> abs(ival(y i)) <= &75423752) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add a (word(4 * i)))) s = x i) /\ + (!i. i < 1792 ==> + read(memory :> bytes32(word_add b (word(4 * i)))) s = y i)) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (!i. i < 256 ==> + let zi = read(memory :> bytes32(word_add c (word(4 * i)))) s in + (ival zi == mldsa_pointwise_acc_l7 (ival o x) (ival o y) i) + (mod &8380417) /\ + abs(ival zi) <= &8380416)) + (MAYCHANGE [RSP] ,, MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(c, 1024)])`, + let TWEAK_CONV = ONCE_DEPTH_CONV WORDLIST_FROM_MEMORY_CONV in + CONV_TAC TWEAK_CONV THEN + MATCH_ACCEPT_TAC(ADD_IBT_RULE + (CONV_RULE TWEAK_CONV MLDSA_POINTWISE_ACC_L7_NOIBT_SUBROUTINE_CORRECT)));; + +(* ========================================================================= *) +(* Constant-time and memory safety proof. *) +(* ========================================================================= *) + +needs "x86/proofs/consttime.ml";; +needs "x86_64/proofs/subroutine_signatures.ml";; + +let full_spec,public_vars = mk_safety_spec + ~keep_maychanges:true + (assoc "mldsa_pointwise_acc_l7_x86" subroutine_signatures) + MLDSA_POINTWISE_ACC_L7_CORRECT + MLDSA_POINTWISE_ACC_L7_TMC_EXEC;; + +let MLDSA_POINTWISE_ACC_L7_SAFE = + REWRITE_RULE [MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; SOME_FLAGS] + (time prove + (full_spec, + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; SOME_FLAGS] THEN + PROVE_SAFETY_SPEC_TAC ~public_vars:public_vars + MLDSA_POINTWISE_ACC_L7_TMC_EXEC));; + +let MLDSA_POINTWISE_ACC_L7_NOIBT_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e c a b consts pc stackpointer returnaddress. + aligned 32 c /\ aligned 32 a /\ aligned 32 b /\ aligned 32 consts /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_tmc) (c, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_tmc) (a, 7168) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_tmc) (b, 7168) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_tmc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 7168) /\ nonoverlapping (c, 1024) (b, 7168) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ nonoverlapping (a, 7168) (b, 7168) /\ + nonoverlapping (a, 7168) (consts, 2496) /\ nonoverlapping (b, 7168) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 7168) /\ + nonoverlapping (stackpointer, 8) (b, 7168) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l7_tmc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. + read events s = APPEND e2 e /\ + e2 = f_events c a b consts pc stackpointer returnaddress /\ + memaccess_inbounds e2 + [a,7168; b,7168; consts,2496; c,1024; stackpointer,8] + [c,1024; stackpointer,8])) + (\s s'. true)`, + X86_PROMOTE_RETURN_NOSTACK_TAC mldsa_pointwise_acc_l7_tmc + MLDSA_POINTWISE_ACC_L7_SAFE THEN + DISCHARGE_SAFETY_PROPERTY_TAC);; + +let MLDSA_POINTWISE_ACC_L7_SUBROUTINE_SAFE = time prove + (`exists f_events. + forall e c a b consts pc stackpointer returnaddress. + aligned 32 c /\ aligned 32 a /\ aligned 32 b /\ aligned 32 consts /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (c, 1024) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (a, 7168) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (b, 7168) /\ + nonoverlapping (word pc, LENGTH mldsa_pointwise_acc_l7_mc) (consts, 2496) /\ + nonoverlapping (c, 1024) (a, 7168) /\ nonoverlapping (c, 1024) (b, 7168) /\ + nonoverlapping (c, 1024) (consts, 2496) /\ nonoverlapping (a, 7168) (b, 7168) /\ + nonoverlapping (a, 7168) (consts, 2496) /\ nonoverlapping (b, 7168) (consts, 2496) /\ + nonoverlapping (stackpointer, 8) (c, 1024) /\ + nonoverlapping (stackpointer, 8) (a, 7168) /\ + nonoverlapping (stackpointer, 8) (b, 7168) /\ + nonoverlapping (stackpointer, 8) (consts, 2496) + ==> ensures x86 + (\s. bytes_loaded s (word pc) mldsa_pointwise_acc_l7_mc /\ + read RIP s = word pc /\ + read RSP s = stackpointer /\ + read (memory :> bytes64 stackpointer) s = returnaddress /\ + C_ARGUMENTS [c; a; b; consts] s /\ + read events s = e) + (\s. read RIP s = returnaddress /\ + read RSP s = word_add stackpointer (word 8) /\ + (exists e2. + read events s = APPEND e2 e /\ + e2 = f_events c a b consts pc stackpointer returnaddress /\ + memaccess_inbounds e2 + [a,7168; b,7168; consts,2496; c,1024; stackpointer,8] + [c,1024; stackpointer,8])) + (\s s'. true)`, + MATCH_ACCEPT_TAC(ADD_IBT_RULE MLDSA_POINTWISE_ACC_L7_NOIBT_SUBROUTINE_SAFE));; diff --git a/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml b/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml index 5ea5867e4..c790d6e66 100644 --- a/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml +++ b/proofs/hol_light/x86_64/proofs/subroutine_signatures.ml @@ -29,4 +29,64 @@ let subroutine_signatures = [ [(* temporary buffers *) ]) ); + +("mldsa_pointwise_acc_l4_x86", + ([(*args*) + ("c", "int32_t[static 256]", (*is const?*)"false"); + ("a", "int32_t[static 1024]", (*is const?*)"true"); + ("b", "int32_t[static 1024]", (*is const?*)"true"); + ("qdata", "int32_t[static 624]", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("a", "1024"(* num elems *), 4(* elem bytesize *)); + ("b", "1024"(* num elems *), 4(* elem bytesize *)); + ("qdata", "624"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("c", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); + +("mldsa_pointwise_acc_l5_x86", + ([(*args*) + ("c", "int32_t[static 256]", (*is const?*)"false"); + ("a", "int32_t[static 1280]", (*is const?*)"true"); + ("b", "int32_t[static 1280]", (*is const?*)"true"); + ("qdata", "int32_t[static 624]", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("a", "1280"(* num elems *), 4(* elem bytesize *)); + ("b", "1280"(* num elems *), 4(* elem bytesize *)); + ("qdata", "624"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("c", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); + +("mldsa_pointwise_acc_l7_x86", + ([(*args*) + ("c", "int32_t[static 256]", (*is const?*)"false"); + ("a", "int32_t[static 1792]", (*is const?*)"true"); + ("b", "int32_t[static 1792]", (*is const?*)"true"); + ("qdata", "int32_t[static 624]", (*is const?*)"true"); + ], + "void", + [(* input buffers *) + ("a", "1792"(* num elems *), 4(* elem bytesize *)); + ("b", "1792"(* num elems *), 4(* elem bytesize *)); + ("qdata", "624"(* num elems *), 4(* elem bytesize *)); + ], + [(* output buffers *) + ("c", "256"(* num elems *), 4(* elem bytesize *)); + ], + [(* temporary buffers *) + ]) +); ];; diff --git a/scripts/autogen b/scripts/autogen index 8beb9150e..6d96724ae 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -2320,6 +2320,27 @@ def gen_hol_light_asm(): f"-Imldsa/src/native/aarch64/src {aarch64_flags}", "aarch64", ), + ( + "mld_polyvecl_pointwise_acc_montgomery_l4.S", + "mldsa_pointwise_acc_l4.S", + "dev/aarch64_opt/src", + f"-Imldsa/src/native/aarch64/src {aarch64_flags}", + "aarch64", + ), + ( + "mld_polyvecl_pointwise_acc_montgomery_l5.S", + "mldsa_pointwise_acc_l5.S", + "dev/aarch64_opt/src", + f"-Imldsa/src/native/aarch64/src {aarch64_flags}", + "aarch64", + ), + ( + "mld_polyvecl_pointwise_acc_montgomery_l7.S", + "mldsa_pointwise_acc_l7.S", + "dev/aarch64_opt/src", + f"-Imldsa/src/native/aarch64/src {aarch64_flags}", + "aarch64", + ), ( "keccak_f1600_x1_scalar_asm.S", "keccak_f1600_x1_scalar.S", @@ -2387,6 +2408,27 @@ def gen_hol_light_asm(): f"-Imldsa/src/native/x86_64/src -Imldsa/src/common.h {x86_64_flags}", "x86_64", ), + ( + "pointwise_acc_l4.S", + "mldsa_pointwise_acc_l4.S", + "dev/x86_64/src", + f"-Imldsa/src/native/x86_64/src -Imldsa/src/common.h {x86_64_flags}", + "x86_64", + ), + ( + "pointwise_acc_l5.S", + "mldsa_pointwise_acc_l5.S", + "dev/x86_64/src", + f"-Imldsa/src/native/x86_64/src -Imldsa/src/common.h {x86_64_flags}", + "x86_64", + ), + ( + "pointwise_acc_l7.S", + "mldsa_pointwise_acc_l7.S", + "dev/x86_64/src", + f"-Imldsa/src/native/x86_64/src -Imldsa/src/common.h {x86_64_flags}", + "x86_64", + ), ] joblist = joblist_aarch64 + joblist_x86_64