|
| 1 | +/* |
| 2 | + * Copyright (c) The mldsa-native project authors |
| 3 | + * Copyright (c) The mlkem-native project authors |
| 4 | + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT |
| 5 | + */ |
| 6 | + |
| 7 | + |
| 8 | +/* |
| 9 | + * WARNING: This file is auto-derived from the mldsa-native source file |
| 10 | + * dev/aarch64_opt/src/polyz_unpack_17_asm.S using scripts/simpasm. Do not modify it directly. |
| 11 | + */ |
| 12 | + |
| 13 | +#if defined(__ELF__) |
| 14 | +.section .note.GNU-stack,"",@progbits |
| 15 | +#endif |
| 16 | + |
| 17 | +.text |
| 18 | +.balign 4 |
| 19 | +#ifdef __APPLE__ |
| 20 | +.global _PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm |
| 21 | +_PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm: |
| 22 | +#else |
| 23 | +.global PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm |
| 24 | +PQCP_MLDSA_NATIVE_MLDSA44_polyz_unpack_17_asm: |
| 25 | +#endif |
| 26 | + |
| 27 | + .cfi_startproc |
| 28 | + ldr q24, [x2] |
| 29 | + ldr q25, [x2, #0x10] |
| 30 | + ldr q26, [x2, #0x20] |
| 31 | + ldr q27, [x2, #0x30] |
| 32 | + mov x3, #0xfe00000000 // =1090921693184 |
| 33 | + mov v28.d[0], x3 |
| 34 | + mov x3, #0xfc // =252 |
| 35 | + movk x3, #0xfa, lsl #32 |
| 36 | + mov v28.d[1], x3 |
| 37 | + movi v29.4s, #0x3, msl #16 |
| 38 | + movi v30.4s, #0x2, lsl #16 |
| 39 | + mov x9, #0x10 // =16 |
| 40 | + |
| 41 | +Lpolyz_unpack_17_loop: |
| 42 | + ld1 { v0.16b, v1.16b }, [x1] |
| 43 | + add x1, x1, #0x14 |
| 44 | + ld1 { v2.16b }, [x1], #16 |
| 45 | + tbl v4.16b, { v0.16b }, v24.16b |
| 46 | + tbl v5.16b, { v0.16b, v1.16b }, v25.16b |
| 47 | + tbl v6.16b, { v1.16b }, v26.16b |
| 48 | + tbl v7.16b, { v1.16b, v2.16b }, v27.16b |
| 49 | + ushl v4.4s, v4.4s, v28.4s |
| 50 | + and v4.16b, v4.16b, v29.16b |
| 51 | + sub v4.4s, v30.4s, v4.4s |
| 52 | + ushl v5.4s, v5.4s, v28.4s |
| 53 | + and v5.16b, v5.16b, v29.16b |
| 54 | + sub v5.4s, v30.4s, v5.4s |
| 55 | + ushl v6.4s, v6.4s, v28.4s |
| 56 | + and v6.16b, v6.16b, v29.16b |
| 57 | + sub v6.4s, v30.4s, v6.4s |
| 58 | + ushl v7.4s, v7.4s, v28.4s |
| 59 | + and v7.16b, v7.16b, v29.16b |
| 60 | + sub v7.4s, v30.4s, v7.4s |
| 61 | + str q5, [x0, #0x10] |
| 62 | + str q6, [x0, #0x20] |
| 63 | + str q7, [x0, #0x30] |
| 64 | + str q4, [x0], #0x40 |
| 65 | + subs x9, x9, #0x1 |
| 66 | + b.ne Lpolyz_unpack_17_loop |
| 67 | + ret |
| 68 | + .cfi_endproc |
0 commit comments