diff --git a/dev/fips202/x86_64/src/keccak_f1600_x4_avx2.S b/dev/fips202/x86_64/src/keccak_f1600_x4_avx2.S index b993ee7a98..becee0a0e0 100644 --- a/dev/fips202/x86_64/src/keccak_f1600_x4_avx2.S +++ b/dev/fips202/x86_64/src/keccak_f1600_x4_avx2.S @@ -40,6 +40,8 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_avx2) // 0x1e0(%rsp) A23 (state0[23), state1[23], state2[23], state3[23]] Input (%rdi) offsets: 0xB8, 0x180, 0x248, 0x310 // %ymm2 A24 (state0[24), state1[24], state2[24], state3[24]] Input (%rdi) offsets: 0xC0, 0x188, 0x250, 0x318 + movq %rsp, %r11 + andq $0xffffffffffffffe0, %rsp subq $0x300, %rsp // Load 32 bytes from each of the 4 states (A(0-3)) @@ -615,7 +617,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_avx2) vmovhpd %xmm3, 0x188(%rdi) vmovq %xmm15, 0x250(%rdi) vmovhpd %xmm15, 0x318(%rdi) - addq $0x300, %rsp + movq %r11, %rsp ret /* simpasm: footer-start */ diff --git a/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S b/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S index a903951bd8..7a010254ae 100644 --- a/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S +++ b/mlkem/src/fips202/native/x86_64/src/keccak_f1600_x4_avx2.S @@ -22,6 +22,8 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_avx2) .cfi_startproc + movq %rsp, %r11 + andq $-0x20, %rsp subq $0x300, %rsp # imm = 0x300 .cfi_adjust_cfa_offset 0x300 vmovdqu (%rdi), %ymm0 @@ -441,8 +443,7 @@ LLkeccak_f1600_x4_avx2: vmovhpd %xmm3, 0x188(%rdi) vmovq %xmm15, 0x250(%rdi) vmovhpd %xmm15, 0x318(%rdi) - addq $0x300, %rsp # imm = 0x300 - .cfi_adjust_cfa_offset -0x300 + movq %r11, %rsp retq .cfi_endproc diff --git a/proofs/hol_light/x86_64/mlkem/keccak_f1600_x4_avx2.S b/proofs/hol_light/x86_64/mlkem/keccak_f1600_x4_avx2.S index 4079ca6775..79280b2c2c 100644 --- a/proofs/hol_light/x86_64/mlkem/keccak_f1600_x4_avx2.S +++ b/proofs/hol_light/x86_64/mlkem/keccak_f1600_x4_avx2.S @@ -25,6 +25,8 @@ PQCP_MLKEM_NATIVE_MLKEM768_keccak_f1600_x4_avx2: .cfi_startproc endbr64 + movq %rsp, %r11 + andq $-0x20, %rsp subq $0x300, %rsp # imm = 0x300 .cfi_adjust_cfa_offset 0x300 vmovdqu (%rdi), %ymm0 @@ -444,7 +446,6 @@ LLkeccak_f1600_x4_avx2: vmovhpd %xmm3, 0x188(%rdi) vmovq %xmm15, 0x250(%rdi) vmovhpd %xmm15, 0x318(%rdi) - addq $0x300, %rsp # imm = 0x300 - .cfi_adjust_cfa_offset -0x300 + movq %r11, %rsp retq .cfi_endproc