diff --git a/.github/workflows/test_basic.yaml b/.github/workflows/test_basic.yaml index fdc4e758..4bb3af9e 100644 --- a/.github/workflows/test_basic.yaml +++ b/.github/workflows/test_basic.yaml @@ -91,6 +91,14 @@ jobs: - name: Run examples run: | python3 example.py --examples ntt_kyber_123_4567_a55,ntt_dilithium_123_45678_a55 --timeout=300 + examples_x25519_a55: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: ./.github/actions/setup-ubuntu + - name: Run examples + run: | + python3 example.py --examples x25519_scalarmult_ci_a55 sqmag: runs-on: ubuntu-latest steps: diff --git a/example.py b/example.py index afc09e07..e8d02a7c 100644 --- a/example.py +++ b/example.py @@ -52,6 +52,9 @@ from examples.naive.aarch64.keccak._example import ( example_instances as example_instances_aarch64_keccak, ) +from examples.naive.aarch64.x25519._example import ( + example_instances as example_instances_aarch64_x25519, +) from examples.naive.armv7m.dilithium._example import ( example_instances as example_instances_armv7m_dilithium, @@ -115,6 +118,7 @@ def main(): + example_instances_aarch64_dilithium + example_instances_aarch64_kyber + example_instances_aarch64_keccak + + example_instances_aarch64_x25519 + example_instances_armv8m_kyber + example_instances_armv8m_dilithium + example_instances_armv8m_flt_r4_fft diff --git a/examples/naive/aarch64/x25519/_example.py b/examples/naive/aarch64/x25519/_example.py index e69de29b..879dc4d5 100644 --- a/examples/naive/aarch64/x25519/_example.py +++ b/examples/naive/aarch64/x25519/_example.py @@ -0,0 +1,369 @@ +# +# Copyright (c) 2022 Arm Limited +# Copyright (c) 2022 Hanno Becker +# Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Author: Amin Abdulrahman +# + +import os + +from common.OptimizationRunner import OptimizationRunner +import slothy.targets.aarch64.aarch64_neon as AArch64_Neon +import slothy.targets.aarch64.cortex_a55 as Target_CortexA55 + +SUBFOLDER = os.path.basename(os.path.dirname(__file__)) + "/" + + +class x25519_scalarmult(OptimizationRunner): + """Multi-pass optimization of X25519 scalar multiplication for Cortex-A55. + + Implements the optimization pipeline from paper/scripts/slothy_x25519.sh: + + Step 0: Resolve symbolic registers (functional-only, no reordering). + Step 1: Preprocessing pass using naive interleaving heuristic. + Steps 2-5: Stepwise optimization sweeps without latency modeling. + Steps 6-8: Final sweeps with full latency modeling and seam optimization. + """ + + def __init__(self, arch=AArch64_Neon, target=Target_CortexA55, timeout=None): + name = "x25519_scalarmult" + infile = "X25519-AArch64-simple" + + super().__init__( + infile, + name, + funcname="x25519_scalarmult_alt_orig", + rename=True, + arch=arch, + target=target, + timeout=timeout, + subfolder=SUBFOLDER, + ) + + def core(self, slothy): + # Detect whether the framework put us in dry-run / functional-only mode + # so we can skip the expensive optimization passes. + dry_run = slothy.config.constraints.functional_only + + # ------------------------------------------------------------------ + # Step 0: Resolve symbolic registers. + # Use functional-only mode to just resolve register allocation without + # any scheduling, preserving the original instruction order. + # ------------------------------------------------------------------ + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.constraints.functional_only = True + slothy.config.constraints.allow_reordering = False + slothy.optimize(start="mainloop", end="end_label") + + if dry_run: + return + + slothy.config.constraints.functional_only = False + slothy.config.constraints.allow_reordering = True + + # Save the base config so each subsequent pass can start clean. + conf = slothy.config.copy() + + # ------------------------------------------------------------------ + # Step 1: Preprocessing. + # Use the naive interleaving heuristic to get an initial interleaved + # code layout without caring about performance estimation yet. + # ------------------------------------------------------------------ + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.split_heuristic = True + slothy.config.split_heuristic_repeat = 0 + slothy.config.split_heuristic_estimate_performance = False + slothy.config.split_heuristic_preprocess_naive_interleaving = True + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # ------------------------------------------------------------------ + # Steps 2-5: Stepwise optimization sweeps without latency modeling. + # The goal is to build up good interleaving by combing stalls toward + # the middle repeatedly. Latency constraints are intentionally omitted + # here to allow more freedom in scheduling. + # ------------------------------------------------------------------ + + # Step 2: Sweep full region [0, 1], factor 6. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 512 + slothy.config.timeout = 300 + slothy.config.constraints.stalls_first_attempt = 32 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0, 1] + slothy.config.objective_precision = 0.1 + slothy.config.split_heuristic_stepsize = 0.1 + slothy.config.split_heuristic_factor = 6 + slothy.config.constraints.model_latencies = False + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 3: Sweep first 60% [0, 0.6], push stalls toward the bottom. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 512 + slothy.config.timeout = 180 + slothy.config.constraints.stalls_first_attempt = 32 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0, 0.6] + slothy.config.objective_precision = 0.1 + slothy.config.constraints.move_stalls_to_bottom = True + slothy.config.split_heuristic_stepsize = 0.1 + slothy.config.split_heuristic_factor = 4 + slothy.config.constraints.model_latencies = False + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 4: Sweep last 70% [0.3, 1], bottom-to-top, push stalls up, + # repeat once. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 512 + slothy.config.timeout = 240 + slothy.config.constraints.stalls_first_attempt = 32 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0.3, 1] + slothy.config.objective_precision = 0.1 + slothy.config.constraints.move_stalls_to_top = True + slothy.config.split_heuristic_bottom_to_top = True + slothy.config.split_heuristic_stepsize = 0.2 + slothy.config.split_heuristic_factor = 6 + slothy.config.split_heuristic_repeat = 1 + slothy.config.constraints.model_latencies = False + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 5: Sweep last 70% [0.3, 1] again, push stalls up, repeat once. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 512 + slothy.config.timeout = 240 + slothy.config.constraints.stalls_first_attempt = 32 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0.3, 1] + slothy.config.objective_precision = 0.1 + slothy.config.constraints.move_stalls_to_top = True + slothy.config.split_heuristic_stepsize = 0.2 + slothy.config.split_heuristic_factor = 6 + slothy.config.split_heuristic_repeat = 1 + slothy.config.constraints.model_latencies = False + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # ------------------------------------------------------------------ + # Steps 6-8: Final optimization passes with full latency modeling. + # These refine the schedule to account for actual CPU latencies and + # use seam optimization across split boundaries. + # ------------------------------------------------------------------ + + # Step 6: Full sweep [0, 1] with latencies, seam optimization, repeat once. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 512 + slothy.config.timeout = 300 + slothy.config.constraints.stalls_first_attempt = 32 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0, 1] + slothy.config.objective_precision = 0.1 + slothy.config.split_heuristic_stepsize = 0.05 + slothy.config.split_heuristic_optimize_seam = 10 + slothy.config.split_heuristic_factor = 8 + slothy.config.split_heuristic_repeat = 1 + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 7: Full sweep [0, 1] bottom-to-top, push stalls up, + # seam optimization, repeat twice. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 512 + slothy.config.timeout = 300 + slothy.config.constraints.stalls_first_attempt = 32 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0, 1] + slothy.config.split_heuristic_bottom_to_top = True + slothy.config.objective_precision = 0.1 + slothy.config.split_heuristic_stepsize = 0.05 + slothy.config.split_heuristic_optimize_seam = 10 + slothy.config.constraints.move_stalls_to_top = True + slothy.config.split_heuristic_factor = 8 + slothy.config.split_heuristic_repeat = 2 + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 8: Final full sweep [0, 1], push stalls up, seam optimization. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 512 + slothy.config.timeout = 300 + slothy.config.constraints.stalls_first_attempt = 32 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0, 1] + slothy.config.objective_precision = 0.1 + slothy.config.split_heuristic_stepsize = 0.05 + slothy.config.split_heuristic_optimize_seam = 10 + slothy.config.constraints.move_stalls_to_top = True + slothy.config.split_heuristic_factor = 8 + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + +class x25519_scalarmult_ci(OptimizationRunner): + """Reduced x25519 optimization pipeline for CI. + + Covers each distinct feature used in x25519_scalarmult at least once, but + with aggressive parameters (high stall budgets, coarse step sizes, low + timeouts, no repeats) and with redundant passes removed: + + Step 0: Symbolic register resolution (functional-only, same as full). + Step 1: Naive interleaving preprocessing. + Step 2: No-latency sweep, move_stalls_to_bottom. + Step 3: No-latency sweep, move_stalls_to_top + bottom_to_top. + Step 4: Latency-aware sweep with seam optimization. + """ + + def __init__(self, arch=AArch64_Neon, target=Target_CortexA55, timeout=None): + name = "x25519_scalarmult_ci" + infile = "X25519-AArch64-simple" + + super().__init__( + infile, + name, + funcname="x25519_scalarmult_alt_orig", + rename=True, + arch=arch, + target=target, + timeout=timeout, + subfolder=SUBFOLDER, + ) + + def core(self, slothy): + dry_run = slothy.config.constraints.functional_only + + # Step 0: Resolve symbolic registers (fast, always runs). + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.constraints.functional_only = True + slothy.config.constraints.allow_reordering = False + slothy.optimize(start="mainloop", end="end_label") + + if dry_run: + return + + slothy.config.constraints.functional_only = False + slothy.config.constraints.allow_reordering = True + + conf = slothy.config.copy() + + # Step 1: Naive interleaving preprocessing. + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.split_heuristic = True + slothy.config.split_heuristic_repeat = 0 + slothy.config.split_heuristic_estimate_performance = False + slothy.config.split_heuristic_preprocess_naive_interleaving = True + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 2: No-latency sweep, move stalls to bottom. + # Covers: model_latencies=False, move_stalls_to_bottom. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 2 + slothy.config.timeout = 30 + slothy.config.constraints.stalls_first_attempt = 256 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0.1, 0.9] + slothy.config.objective_precision = 0.5 + slothy.config.split_heuristic_stepsize = 0.5 + slothy.config.split_heuristic_factor = 20 + slothy.config.constraints.move_stalls_to_bottom = True + slothy.config.constraints.model_latencies = False + slothy.config.split_heuristic_estimate_performance = False + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 3: No-latency sweep, move stalls to top, bottom-to-top direction. + # Covers: move_stalls_to_top, split_heuristic_bottom_to_top. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 2 + slothy.config.timeout = 30 + slothy.config.constraints.stalls_first_attempt = 256 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0, 1] + slothy.config.objective_precision = 0.5 + slothy.config.constraints.move_stalls_to_top = True + slothy.config.split_heuristic_bottom_to_top = True + slothy.config.split_heuristic_stepsize = 0.5 + slothy.config.split_heuristic_factor = 20 + slothy.config.constraints.model_latencies = False + slothy.config.split_heuristic_estimate_performance = False + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + # Step 4: Latency-aware sweep with seam optimization. + # Covers: full latency modeling, split_heuristic_optimize_seam. + slothy.config = conf.copy() + slothy.config.inputs_are_outputs = True + slothy.config.outputs = ["x0"] + slothy.config.variable_size = True + slothy.config.max_solutions = 2 + slothy.config.timeout = 30 + slothy.config.constraints.stalls_first_attempt = 256 + slothy.config.split_heuristic = True + slothy.config.split_heuristic_region = [0, 1] + slothy.config.objective_precision = 0.5 + slothy.config.split_heuristic_stepsize = 0.5 + slothy.config.split_heuristic_optimize_seam = 2 + slothy.config.split_heuristic_factor = 20 + slothy.config.split_heuristic_estimate_performance = False + slothy.config.selftest = False + slothy.optimize(start="mainloop", end="end_label") + + +example_instances = [ + x25519_scalarmult(), + x25519_scalarmult_ci(), +] diff --git a/examples/opt/aarch64/x25519/X25519-AArch64-simple_opt_a55.s b/examples/opt/aarch64/x25519/X25519-AArch64-simple_opt_a55.s new file mode 100644 index 00000000..86f4d9c1 --- /dev/null +++ b/examples/opt/aarch64/x25519/X25519-AArch64-simple_opt_a55.s @@ -0,0 +1,3701 @@ + /* X25519-AArch64 by Emil Lenngren (2018) + * + * To the extent possible under law, the person who associated CvC0 with + * X25519-AArch64 has waived all copyright and related or neighboring rights + * to X25519-AArch64. + * + * You should have received a copy of the CvC0 legalcode along with this + * work. If not, see . + */ + +/* + * This is an AArch64 implementation of X25519. + * It follows the reference implementation where the representation of + * a field element [0..2^255-19) is represented by a 256-bit little endian integer, + * reduced modulo 2^256-38, and may possibly be in the range [2^256-38..2^256). + * The scalar is a 256-bit integer where certain bits are hardcoded per specification. + * + * The implementation runs in constant time (~145k cycles on Cortex-vA53), + * and no conditional branches or memory access pattern depend on secret data. + */ + +/* + * Implementation manually de-interleaved and modularized for use with SLOTHY. See + * + * Fast and Clean: Auditable High Performance Assembly via Constraint Solving + * (Abdulrahman, Becker, Kannwischer, Klein) + */ + +#include +#include "instruction_wrappers.i" + +#define STACK_MASK1 0 +#define STACK_MASK2 8 +#define STACK_A_0 16 +#define STACK_A_8 (STACK_A_0+ 8) +#define STACK_A_16 (STACK_A_0+16) +#define STACK_A_24 (STACK_A_0+24) +#define STACK_A_32 (STACK_A_0+32) +#define STACK_B_0 64 +#define STACK_B_8 (STACK_B_0+ 8) +#define STACK_B_16 (STACK_B_0+16) +#define STACK_B_24 (STACK_B_0+24) +#define STACK_B_32 (STACK_B_0+32) +#define STACK_CTR 104 +#define STACK_LASTBIT 108 +#define STACK_SCALAR 112 +#define STACK_X_0 168 +#define STACK_X_8 (STACK_X_0+ 8) +#define STACK_X_16 (STACK_X_0+16) +#define STACK_X_24 (STACK_X_0+24) +#define STACK_X_32 (STACK_X_0+32) +#define STACK_OUT_PTR (STACK_X_0+48) + + .cpu generic+fp+simd + .text + .align 2 + + // in: x0: pointer + // out: x0: loaded value + // .type load64unaligned, %function +load64unaligned: + ldrb w1, [x0] + ldrb w2, [x0, #1] + ldrb w3, [x0, #2] + ldrb w4, [x0, #3] + ldrb w5, [x0, #4] + ldrb w6, [x0, #5] + ldrb w7, [x0, #6] + ldrb w8, [x0, #7] + + orr w1, w1, w2, lsl #8 + orr w3, w3, w4, lsl #8 + orr w5, w5, w6, lsl #8 + orr w7, w7, w8, lsl #8 + + orr w1, w1, w3, lsl #16 + orr w5, w5, w7, lsl #16 + + orr x0, x1, x5, lsl #32 + + ret + // .size load64unaligned, .-load64unaligned + + // in: x0: pointer + // out: x0-x3: loaded value + // .type load256unaligned, %function +load256unaligned: + stp x29, x30, [sp, #-64]! + mov x29, sp + stp x19, x20, [sp, #16] + stp x21, x22, [sp, #32] + + mov x19, x0 + bl load64unaligned + mov x20, x0 + add x0, x19, #8 + bl load64unaligned + mov x21, x0 + add x0, x19, #16 + bl load64unaligned + mov x22, x0 + add x0, x19, #24 + bl load64unaligned + mov x3, x0 + + mov x0, x20 + mov x1, x21 + mov x2, x22 + + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x29, x30, [sp], #64 + ret + // .size load256unaligned, .-load256unaligned + +vAB0 .req v0 +vAB1 .req v1 +vAB2 .req v2 +vAB3 .req v3 +vAB4 .req v4 +vAB5 .req v5 +vAB6 .req v6 +vAB7 .req v7 +vAB8 .req v8 +vAB9 .req v9 + +vT0 .req vAB0 +vT1 .req vAB1 +vT2 .req vAB2 +vT3 .req vAB3 +vT4 .req vAB4 +vT5 .req vAB5 +vT6 .req vAB6 +vT7 .req vAB7 +vT8 .req vAB8 +vT9 .req vAB9 + +vTA0 .req vAB0 +vTA1 .req vAB1 +vTA2 .req vAB2 +vTA3 .req vAB3 +vTA4 .req vAB4 +vTA5 .req vAB5 +vTA6 .req vAB6 +vTA7 .req vAB7 +vTA8 .req vAB8 +vTA9 .req vAB9 + +vBX0 .req v10 +vBX1 .req v11 +vBX2 .req v12 +vBX3 .req v13 +vBX4 .req v14 +vBX5 .req v15 +vBX6 .req v16 +vBX7 .req v17 +vBX8 .req v18 +vBX9 .req v19 + +vDC0 .req vBX0 +vDC1 .req vBX1 +vDC2 .req vBX2 +vDC3 .req vBX3 +vDC4 .req vBX4 +vDC5 .req vBX5 +vDC6 .req vBX6 +vDC7 .req vBX7 +vDC8 .req vBX8 +vDC9 .req vBX9 + +vADBC0 .req v20 +vADBC1 .req v21 +vADBC2 .req v22 +vADBC3 .req v23 +vADBC4 .req v24 +vADBC5 .req v25 +vADBC6 .req v26 +vADBC7 .req v27 +vADBC8 .req v28 +vADBC9 .req v29 + +vX4Z50 .req vADBC0 +vX4Z51 .req vADBC1 +vX4Z52 .req vADBC2 +vX4Z53 .req vADBC3 +vX4Z54 .req vADBC4 +vX4Z55 .req vADBC5 +vX4Z56 .req vADBC6 +vX4Z57 .req vADBC7 +vX4Z58 .req vADBC8 +vX4Z59 .req vADBC9 + +vMaskA .req v30 +vMaskB .req v15 + +vZ20 .req v1 +vZ22 .req v3 +vZ24 .req v5 +vZ26 .req v7 +vZ28 .req v9 + +vZ30 .req v11 +vZ32 .req v13 +vZ34 .req v15 +vZ36 .req v17 +vZ38 .req v19 + +vX20 .req v0 +vX22 .req v2 +vX24 .req v4 +vX26 .req v6 +vX28 .req v8 + +vX30 .req v10 +vX32 .req v12 +vX34 .req v14 +vX36 .req v16 +vX38 .req v18 + +vB0 .req v20 +vB2 .req v21 +vB4 .req v22 +vB6 .req v23 +vB8 .req v24 + +vA0 .req v0 +vA2 .req v2 +vA4 .req v4 +vA6 .req v6 +vA8 .req v8 + +vC0 .req v10 +vC2 .req v12 +vC4 .req v14 +vC6 .req v16 +vC8 .req v18 + +vD0 .req v25 +vD2 .req v26 +vD4 .req v27 +vD6 .req v28 +vD8 .req v29 + +vF0 .req v1 +vF2 .req v3 +vF4 .req v5 +vF6 .req v7 +vF8 .req v9 + +vG0 .req v20 +vG2 .req v21 +vG4 .req v22 +vG6 .req v23 +vG8 .req v24 + +// F +sF0 .req x0 +sF1 .req x1 +sF2 .req x2 +sF3 .req x3 +sF4 .req x4 +sF5 .req x5 +sF6 .req x6 +sF7 .req x7 +sF8 .req x8 +sF9 .req x9 + +sAA0 .req x20 +sAA1 .req x21 +sAA2 .req x22 +sAA3 .req x23 +sAA4 .req x24 +sAA5 .req x25 +sAA6 .req x26 +sAA7 .req x27 +sAA8 .req x28 +sAA9 .req x19 + +stmp .req x2 + +// G +sG0 .req x0 +sG1 .req x1 +sG2 .req x2 +sG3 .req x3 +sG4 .req x4 +sG5 .req x5 +sG6 .req x6 +sG7 .req x7 +sG8 .req x8 +sG9 .req x9 + +sBB0 .req x0 +sBB1 .req x1 +sBB2 .req x2 +sBB3 .req x3 +sBB4 .req x4 +sBB5 .req x5 +sBB6 .req x6 +sBB7 .req x7 +sBB8 .req x8 +sBB9 .req x9 + +// E +sE0 .req x10 +sE1 .req x11 +sE2 .req x12 +sE3 .req x13 +sE4 .req x14 +sE5 .req x15 +sE6 .req x16 +sE7 .req x17 +sE8 .req x19 +sE9 .req x20 + +sZ40 .req x23 +sZ41 .req x3 +sZ42 .req x21 +sZ44 .req x7 +sZ45 .req x6 +sZ46 .req x24 +sZ48 .req x22 + +// D-form aliases for fcsel with named vector registers +dform_vA0 .req d0 +dform_vA2 .req d2 +dform_vA4 .req d4 +dform_vA6 .req d6 +dform_vA8 .req d8 + +dform_vB0 .req d20 +dform_vB2 .req d21 +dform_vB4 .req d22 +dform_vB6 .req d23 +dform_vB8 .req d24 + +dform_vC0 .req d10 +dform_vC2 .req d12 +dform_vC4 .req d14 +dform_vC6 .req d16 +dform_vC8 .req d18 + +dform_vD0 .req d25 +dform_vD2 .req d26 +dform_vD4 .req d27 +dform_vD6 .req d28 +dform_vD8 .req d29 + +dform_vF0 .req d1 +dform_vF2 .req d3 +dform_vF4 .req d5 +dform_vF6 .req d7 +dform_vF8 .req d9 + +dform_vG0 .req d20 +dform_vG2 .req d21 +dform_vG4 .req d22 +dform_vG6 .req d23 +dform_vG8 .req d24 + +START: + + +.macro scalar_stack_ldr sA, offset, name + ldr \sA\()0, [sp, #\offset\()_0] // @slothy:reads=['\\name\\()0'] + ldr \sA\()2, [sp, #\offset\()_8] // @slothy:reads=['\\name\\()8'] + ldr \sA\()4, [sp, #\offset\()_16] // @slothy:reads=['\\name\\()16'] + ldr \sA\()6, [sp, #\offset\()_24] // @slothy:reads=['\\name\\()24'] + ldr \sA\()8, [sp, #\offset\()_32] // @slothy:reads=['\\name\\()32'] +.endm + +.macro scalar_stack_str offset, sA, name + stp \sA\()0, \sA\()2, [sp, #\offset\()_0] // @slothy:writes=['\\name\\()0', '\\name\\()8'] + stp \sA\()4, \sA\()6, [sp, #\offset\()_16] // @slothy:writes=['\\name\\()16', '\\name\\()24'] + str \sA\()8, [sp, #\offset\()_32] // @slothy:writes=['\\name\\()32'] +.endm + +.macro vector_stack_str offset, vA, name + stp D<\vA\()0>, D<\vA\()2>, [sp, #\offset\()_0] // @slothy:writes=['\\name\\()0', '\\name\\()8'] + stp D<\vA\()4>, D<\vA\()6>, [sp, #\offset\()_16] // @slothy:writes=['\\name\\()16', '\\name\\()24'] + str D<\vA\()8>, [sp, #\offset\()_32] // @slothy:writes=['\\name\\()32'] +.endm + + // TODO: eliminate this explicit register assignment by converting stack_vld2_lane to AArch64Instruction + xvector_load_lane_tmp .req x26 + +.macro vector_load_lane vA, offset, lane, name + add xvector_load_lane_tmp, sp, #\offset\()_0 + ld2 { \vA\()0.s, \vA\()1.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=['\\name\\()0'] + ld2 { \vA\()2.s, \vA\()3.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=['\\name\\()8'] + ld2 { \vA\()4.s, \vA\()5.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=['\\name\\()16'] + ld2 { \vA\()6.s, \vA\()7.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=['\\name\\()24'] + ld2 { \vA\()8.s, \vA\()9.s }[\lane\()], [xvector_load_lane_tmp], #8 // @slothy:reads=['\\name\\()32'] +.endm + +.macro vector_sub_inner vC0, vC2, vC4, vC6, vC8, vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8 + // (2^255-19)*4 - vB + sub \vC0\().2s, v28.2s, \vB0\().2s + sub \vC2\().2s, v29.2s, \vB2\().2s + sub \vC4\().2s, v29.2s, \vB4\().2s + sub \vC6\().2s, v29.2s, \vB6\().2s + sub \vC8\().2s, v29.2s, \vB8\().2s + + // ... + vA + add \vC0\().2s, \vA0\().2s, \vC0\().2s + add \vC2\().2s, \vA2\().2s, \vC2\().2s + add \vC4\().2s, \vA4\().2s, \vC4\().2s + add \vC6\().2s, \vA6\().2s, \vC6\().2s + add \vC8\().2s, \vA8\().2s, \vC8\().2s +.endm + +.macro vector_sub vC, vA, vB + vector_sub_inner \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + + +.macro vector_add_inner vC0, vC2, vC4, vC6, vC8, vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8 + add \vC0\().2s, \vA0\().2s, \vB0\().2s + add \vC2\().2s, \vA2\().2s, \vB2\().2s + add \vC4\().2s, \vA4\().2s, \vB4\().2s + add \vC6\().2s, \vA6\().2s, \vB6\().2s + add \vC8\().2s, \vA8\().2s, \vB8\().2s +.endm + +.macro vector_add vC, vA, vB + vector_add_inner \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + +.macro vector_cmov_inner vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8, vC0, vC2, vC4, vC6, vC8 + fcsel dform_\vA0, dform_\vB0, dform_\vC0, eq + fcsel dform_\vA2, dform_\vB2, dform_\vC2, eq + fcsel dform_\vA4, dform_\vB4, dform_\vC4, eq + fcsel dform_\vA6, dform_\vB6, dform_\vC6, eq + fcsel dform_\vA8, dform_\vB8, dform_\vC8, eq +.endm + +.macro vector_cmov vA, vB, vC + vector_cmov_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8, \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, +.endm + +.macro vector_transpose_inner vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9, vB0, vB2, vB4, vB6, vB8, vC0, vC2, vC4, vC6, vC8 + trn2 \vA1\().2s, \vB0\().2s, \vC0\().2s + trn1 \vA0\().2s, \vB0\().2s, \vC0\().2s + trn2 \vA3\().2s, \vB2\().2s, \vC2\().2s + trn1 \vA2\().2s, \vB2\().2s, \vC2\().2s + trn2 \vA5\().2s, \vB4\().2s, \vC4\().2s + trn1 \vA4\().2s, \vB4\().2s, \vC4\().2s + trn2 \vA7\().2s, \vB6\().2s, \vC6\().2s + trn1 \vA6\().2s, \vB6\().2s, \vC6\().2s + trn2 \vA9\().2s, \vB8\().2s, \vC8\().2s + trn1 \vA8\().2s, \vB8\().2s, \vC8\().2s +.endm + +.macro vector_transpose vA, vB, vC + vector_transpose_inner \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8, \vC\()0, \vC\()2, \vC\()4, \vC\()6, \vC\()8, +.endm + +.macro vector_to_scalar_inner sA0, sA2, sA4, sA6, sA8, vB0, vB2, vB4, vB6, vB8 + mov \sA0, \vB0\().d[0] + mov \sA2, \vB2\().d[0] + mov \sA4, \vB4\().d[0] + mov \sA6, \vB6\().d[0] + mov \sA8, \vB8\().d[0] +.endm + +.macro vector_to_scalar sA, vB + vector_to_scalar_inner \sA\()0, \sA\()2, \sA\()4, \sA\()6, \sA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + +.macro scalar_to_vector_inner vA0, vA2, vA4, vA6, vA8, sB0, sB2, sB4, sB6, sB8 + mov \vA0\().d[0], \sB0 + mov \vA2\().d[0], \sB2 + mov \vA4\().d[0], \sB4 + mov \vA6\().d[0], \sB6 + mov \vA8\().d[0], \sB8 +.endm + +.macro scalar_to_vector vA, sB + scalar_to_vector_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \sB\()0, \sB\()2, \sB\()4, \sB\()6, \sB\()8 +.endm + + +.macro vector_extract_upper_inner vA0, vA2, vA4, vA6, vA8, vB0, vB2, vB4, vB6, vB8 + mov \vA0\().d[0], \vB0\().d[1] + mov \vA2\().d[0], \vB2\().d[1] + mov \vA4\().d[0], \vB4\().d[1] + mov \vA6\().d[0], \vB6\().d[1] + mov \vA8\().d[0], \vB8\().d[1] +.endm + +.macro vector_extract_upper vA, vB + vector_extract_upper_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()2, \vB\()4, \vB\()6, \vB\()8 +.endm + +.macro vector_compress_inner vA0, vA2, vA4, vA6, vA8, vB0, vB1, vB2, vB3, vB4, vB5, vB6, vB7, vB8, vB9 + trn1 \vA0\().4s, \vB0\().4s, \vB1\().4s + trn1 \vA2\().4s, \vB2\().4s, \vB3\().4s + trn1 \vA4\().4s, \vB4\().4s, \vB5\().4s + trn1 \vA6\().4s, \vB6\().4s, \vB7\().4s + trn1 \vA8\().4s, \vB8\().4s, \vB9\().4s +.endm + +.macro vector_compress vA, vB + vector_compress_inner \vA\()0, \vA\()2, \vA\()4, \vA\()6, \vA\()8, \vB\()0, \vB\()1, \vB\()2, \vB\()3, \vB\()4, \vB\()5, \vB\()6, \vB\()7, \vB\()8, \vB\()9, +.endm + +.macro scalar_clear_carries_inner sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9 + and \sA1, \sA1, #0x1ffffff + and \sA3, \sA3, #0x1ffffff + and \sA5, \sA5, #0x1ffffff + and \sA7, \sA7, #0x1ffffff + mov W<\sA0>, W<\sA0> + mov W<\sA2>, W<\sA2> + mov W<\sA4>, W<\sA4> + mov W<\sA6>, W<\sA6> + mov W<\sA8>, W<\sA8> +.endm + +.macro scalar_clear_carries sA + scalar_clear_carries_inner \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9 +.endm + +.macro scalar_decompress_inner sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9 + lsr \sA1, \sA0, #32 + lsr \sA3, \sA2, #32 + lsr \sA5, \sA4, #32 + lsr \sA7, \sA6, #32 + lsr \sA9, \sA8, #32 +.endm + +.macro scalar_decompress sA + scalar_decompress_inner \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9 +.endm + + // TODO: eliminate those. should be easy + vR_l4h4l5h5 .req vADBC4 + vR_l6h6l7h7 .req vADBC5 + + vR_l0h0l1h1 .req vADBC0 + vR_l2h2l3h3 .req vADBC1 + + vR_l0123 .req vADBC4 + vR_l4567 .req vADBC6 + vR_h0123 .req vADBC5 + vR_h4567 .req vADBC7 + vR_l89h89 .req vADBC8 + + vR_h89xx .req vADBC9 + + vSum0123 .req vADBC0 + vSum4567 .req vADBC1 + vSum89xx .req vADBC2 + + vDiff0123 .req v10 + vDiff4567 .req v11 + vDiff89xx .req v12 + + // TODO: eliminate those explicit register assignments by converting stack_vld1r and stack_vldr_bform to AArch64Instruction + vrepack_inner_tmp .req v19 + vrepack_inner_tmp2 .req v0 + +.macro vector_addsub_repack_inner vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9, vC0, vC1, vC2, vC3, vC4, vC5, vC6, vC7, vC8, vC9 + uzp1 vR_l4h4l5h5.4s, \vC4\().4s, \vC5\().4s + uzp1 vR_l6h6l7h7.4s, \vC6\().4s, \vC7\().4s + ld1r {vrepack_inner_tmp.2d}, [sp] // @slothy:reads=mask1 + uzp1 vR_l4567.4s, vR_l4h4l5h5.4s, vR_l6h6l7h7.4s + uzp2 vR_h4567.4s, vR_l4h4l5h5.4s, vR_l6h6l7h7.4s + trn1 vR_l89h89.4s, \vC8\().4s, \vC9\().4s + ldr B, [sp, #STACK_MASK2] // @slothy:reads=mask2 + uzp1 vR_l0h0l1h1.4s, \vC0\().4s, \vC1\().4s + uzp1 vR_l2h2l3h3.4s, \vC2\().4s, \vC3\().4s + mov vR_h89xx.d[0], vR_l89h89.d[1] + uzp1 vR_l0123.4s, vR_l0h0l1h1.4s, vR_l2h2l3h3.4s + uzp2 vR_h0123.4s, vR_l0h0l1h1.4s, vR_l2h2l3h3.4s + add vDiff4567.4s, vR_l4567.4s, vrepack_inner_tmp.4s + add vDiff89xx.2s, vR_l89h89.2s, vrepack_inner_tmp.2s + mov vrepack_inner_tmp.b[0], vrepack_inner_tmp2.b[0] + add vSum0123.4s, vR_l0123.4s, vR_h0123.4s + add vSum4567.4s, vR_l4567.4s, vR_h4567.4s + add vSum89xx.2s, vR_l89h89.2s, vR_h89xx.2s + add vDiff0123.4s, vR_l0123.4s, vrepack_inner_tmp.4s + sub vDiff4567.4s, vDiff4567.4s, vR_h4567.4s + sub vDiff0123.4s, vDiff0123.4s, vR_h0123.4s + sub vDiff89xx.2s, vDiff89xx.2s, vR_h89xx.2s + zip1 \vA0\().4s, vDiff0123.4s, vSum0123.4s + zip2 \vA2\().4s, vDiff0123.4s, vSum0123.4s + zip1 \vA4\().4s, vDiff4567.4s, vSum4567.4s + zip2 \vA6\().4s, vDiff4567.4s, vSum4567.4s + zip1 \vA8\().2s, vDiff89xx.2s, vSum89xx.2s + zip2 \vA9\().2s, vDiff89xx.2s, vSum89xx.2s + mov \vA1\().d[0], \vA0\().d[1] + mov \vA3\().d[0], \vA2\().d[1] + mov \vA5\().d[0], \vA4\().d[1] + mov \vA7\().d[0], \vA6\().d[1] +.endm + +.macro vector_addsub_repack vA, vC +vector_addsub_repack_inner \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9, \vC\()0, \vC\()1, \vC\()2, \vC\()3, \vC\()4, \vC\()5, \vC\()6, \vC\()7, \vC\()8, \vC\()9 +.endm + +// sAA0 .. sAA9 output AA = A^2 +// sA0 .. sA9 input A +// TODO: simplify (this is still the same instruction order as before; we can make it simpler and leave the re-ordering to Sloty) +.macro scalar_sqr_inner sAA0, sAA1, sAA2, sAA3, sAA4, sAA5, sAA6, sAA7, sAA8, sAA9, sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9 + lsr \sA1, \sA0, #32 + lsr \sA3, \sA2, #32 + lsr \sA5, \sA4, #32 + lsr \sA7, \sA6, #32 + lsr \sA9, \sA8, #32 + add X, \sA9, \sA9 + add X, \sA8, \sA8 + add X, \sA7, \sA7 + add X, \sA6, \sA6 + add X, \sA5, \sA5 + add X, \sA4, \sA4 + add X, \sA3, \sA3 + add X, \sA2, \sA2 + add X, \sA1, \sA1 + umull X, W<\sA4>, W<\sA4> + umull X, W<\sA4>, W + mul W<\sA9>, W<\sA9>, W + mul W<\sA7>, W<\sA7>, W + mul W<\sA5>, W<\sA5>, W + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA0>, W, X + umull X, W<\sA0>, W<\sA0> + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umull X, W<\sA0>, W + umaddl X, W<\sA0>, W, X + mul W, W<\sA6>, W + umaddl X, W<\sA1>, W, X + umaddl X, W<\sA1>, W, X + umaddl X, W, W, X + umaddl X, W<\sA1>, W, X + umaddl X, W, W, X + umaddl X, W<\sA1>, W, X + umaddl X, W, W, X + umaddl X, W<\sA1>, W, X + mul W, W<\sA8>, W + umaddl X, W<\sA2>, W<\sA2>, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA2>, W, X + umaddl X, W<\sA3>, W, X + umaddl X, W<\sA3>, W, X + umaddl X, W, W, X + umaddl X, W<\sA3>, W, X + umaddl X, W<\sA8>, W, X + umaddl X, W<\sA6>, W, X + add X, X, X, lsr #26 + umaddl X, W<\sA5>, W, X + add X, X, X, lsr #25 + bic X, X, #0x1ffffff + add X, X, X, lsr #24 + and X, X, #0x1ffffff + add X, X, X, lsr #21 + umaddl X, W<\sA7>, W, X + add X, X, X + add X, X, X + add X, X, X + add X, X, X + umaddl X, W, W, X + umaddl X, W, W, X + and X, X, #0x3ffffff + umaddl X, W<\sA7>, W, X + umaddl X, W<\sA7>, W, X + umaddl X, W<\sA7>, W, X + umaddl X, W<\sA7>, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + umaddl X, W<\sA9>, W, X + add \sAA1, X, X, lsr #26 + and \sAA0, X, #0x3ffffff + add \sAA2, X, \sAA1, lsr #25 + bfi \sAA0, \sAA1, #32, #25 + add \sAA3, X, \sAA2, lsr #26 + and \sAA2, \sAA2, #0x3ffffff + add \sAA4, X, \sAA3, lsr #25 + bfi \sAA2, \sAA3, #32, #25 + add \sAA5, X, \sAA4, lsr #26 + and \sAA4, \sAA4, #0x3ffffff + add \sAA6, X, \sAA5, lsr #25 + bfi \sAA4, \sAA5, #32, #25 + add \sAA7, X, \sAA6, lsr #26 + and \sAA6, \sAA6, #0x3ffffff + add \sAA8, X, \sAA7, lsr #25 + bfi \sAA6, \sAA7, #32, #25 + add \sAA9, X, \sAA8, lsr #26 + and \sAA8, \sAA8, #0x3ffffff + bfi \sAA8, \sAA9, #32, #26 +.endm + +.macro scalar_sqr sAA, sA +scalar_sqr_inner \sAA\()0, \sAA\()1, \sAA\()2, \sAA\()3, \sAA\()4, \sAA\()5, \sAA\()6, \sAA\()7, \sAA\()8, \sAA\()9, \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9 +.endm + +// sC0 .. sC9 output C = A*B +// sA0 .. sA9 input A +// sB0 .. sB9 input B +.macro scalar_mul_inner sC0, sC1, sC2, sC3, sC4, sC5, sC6, sC7, sC8, sC9, sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9, sB0, sB1, sB2, sB3, sB4, sB5, sB6, sB7, sB8, sB9 + + + mul W, W<\sA1>, W + mul W, W<\sA2>, W + mul W, W<\sA3>, W + mul W, W<\sA5>, W + mul W, W<\sA6>, W + mul W, W<\sA7>, W + mul W, W<\sA8>, W + mul W, W<\sA9>, W + + umull X, W<\sA1>, W<\sB8> + umaddl X, W<\sA3>, W<\sB6>, X + umaddl X, W<\sA5>, W<\sB4>, X + umaddl X, W<\sA7>, W<\sB2>, X + umaddl X, W<\sA9>, W<\sB0>, X + umaddl X, W<\sA0>, W<\sB9>, X + umaddl X, W<\sA2>, W<\sB7>, X + umaddl X, W<\sA4>, W<\sB5>, X + umaddl X, W<\sA6>, W<\sB3>, X + umaddl X, W<\sA8>, W<\sB1>, X + + umull X, W<\sA1>, W<\sB7> + umaddl X, W<\sA3>, W<\sB5>, X + umaddl X, W<\sA5>, W<\sB3>, X + umaddl X, W<\sA7>, W<\sB1>, X + umaddl X, W, W<\sB9>, X + add X, X, X + umaddl X, W<\sA0>, W<\sB8>, X + umaddl X, W<\sA2>, W<\sB6>, X + umaddl X, W<\sA4>, W<\sB4>, X + umaddl X, W<\sA6>, W<\sB2>, X + umaddl X, W<\sA8>, W<\sB0>, X + + + umull X, W<\sA1>, W<\sB6> + umaddl X, W<\sA3>, W<\sB4>, X + umaddl X, W<\sA5>, W<\sB2>, X + umaddl X, W<\sA7>, W<\sB0>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA0>, W<\sB7>, X + umaddl X, W<\sA2>, W<\sB5>, X + umaddl X, W<\sA4>, W<\sB3>, X + umaddl X, W<\sA6>, W<\sB1>, X + umaddl X, W, W<\sB9>, X + + umull X, W<\sA1>, W<\sB5> + umaddl X, W<\sA3>, W<\sB3>, X + umaddl X, W<\sA5>, W<\sB1>, X + umaddl X, W, W<\sB9>, X + umaddl X, W, W<\sB7>, X + add X, X, X + umaddl X, W<\sA0>, W<\sB6>, X + umaddl X, W<\sA2>, W<\sB4>, X + umaddl X, W<\sA4>, W<\sB2>, X + umaddl X, W<\sA6>, W<\sB0>, X + umaddl X, W, W<\sB8>, X + + umull X, W, W<\sB6> + umaddl X, W<\sA5>, W<\sB0>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA3>, W<\sB2>, X + umaddl X, W<\sA1>, W<\sB4>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA4>, W<\sB1>, X + umaddl X, W<\sA2>, W<\sB3>, X + umaddl X, W<\sA0>, W<\sB5>, X + + umull X, W, W<\sB5> + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA3>, W<\sB1>, X + umaddl X, W<\sA1>, W<\sB3>, X + add X, X, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA4>, W<\sB0>, X + umaddl X, W<\sA2>, W<\sB2>, X + umaddl X, W<\sA0>, W<\sB4>, X + + umull X, W, W<\sB4> + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA3>, W<\sB0>, X + umaddl X, W<\sA1>, W<\sB2>, X + mul W, W<\sA4>, W + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA2>, W<\sB1>, X + umaddl X, W<\sA0>, W<\sB3>, X + + add X, X, X, lsr #26 + and \sC4, X, #0x3ffffff + add X, X, X, lsr #25 + and \sC5, X, #0x1ffffff + add X, X, X, lsr #26 + and \sC6, X, #0x3ffffff + add X, X, X, lsr #25 + bfi \sC6, X, #32, #25 + add X, X, X, lsr #26 + and \sC8, X, #0x3ffffff + bic X, X, #0x3ffffff + lsr X, X, #26 + bfi \sC8, X, #32, #26 + add X, X, X, lsr #25 + add X, X, X, lsr #22 + + umaddl X, W, W<\sB1>, X + umaddl X, W, W<\sB3>, X + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + add X, X, X + umaddl X, W, W<\sB2>, X + umaddl X, W, W<\sB4>, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA0>, W<\sB0>, X + + umull X, W, W<\sB2> + umaddl X, W, W<\sB4>, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA1>, W<\sB0>, X + umaddl X, W, W<\sB3>, X + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA0>, W<\sB1>, X + + umull X, W, W<\sB3> + umaddl X, W, W<\sB5>, X + umaddl X, W, W<\sB7>, X + umaddl X, W, W<\sB9>, X + umaddl X, W<\sA1>, W<\sB1>, X + add X, X, X + umaddl X, W, W<\sB4>, X + umaddl X, W, W<\sB6>, X + umaddl X, W, W<\sB8>, X + umaddl X, W<\sA2>, W<\sB0>, X + umaddl X, W<\sA0>, W<\sB2>, X + + add \sC1, X, X, lsr #26 + and \sC0, X, #0x3ffffff + add \sC2, X, \sC1, lsr #25 + bfi \sC0, \sC1, #32, #25 + add X, X, \sC2, lsr #26 + and \sC2, \sC2, #0x3ffffff + add \sC4, \sC4, X, lsr #25 + bfi \sC2, X, #32, #25 + add \sC5, \sC5, \sC4, lsr #26 + and \sC4, \sC4, #0x3ffffff + bfi \sC4, \sC5, #32, #26 +.endm + +.macro scalar_mul sC, sA, sB +scalar_mul_inner \sC\()0, \sC\()1, \sC\()2, \sC\()3, \sC\()4, \sC\()5, \sC\()6, \sC\()7, \sC\()8, \sC\()9, \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9, \sB\()0, \sB\()1, \sB\()2, \sB\()3, \sB\()4, \sB\()5, \sB\()6, \sB\()7, \sB\()8, \sB\()9 +.endm + +xtmp_scalar_sub_0 .req x21 + +// sC0 .. sC4 output C = A + 4p - B (registers may be the same as A) +// sA0 .. sA4 first operand A +// sB0 .. sB4 second operand B +.macro scalar_sub_inner sC0, sC1, sC2, sC3, sC4, sA0, sA1, sA2, sA3, sA4, sB0, sB1, sB2, sB3, sB4 + + ldr xtmp_scalar_sub_0, =0x07fffffe07fffffc + add \sC1, \sA1, xtmp_scalar_sub_0 + add \sC2, \sA2, xtmp_scalar_sub_0 + add \sC3, \sA3, xtmp_scalar_sub_0 + add \sC4, \sA4, xtmp_scalar_sub_0 + movk xtmp_scalar_sub_0, #0xffb4 + add \sC0, \sA0, xtmp_scalar_sub_0 + sub \sC0, \sC0, \sB0 + sub \sC1, \sC1, \sB1 + sub \sC2, \sC2, \sB2 + sub \sC3, \sC3, \sB3 + sub \sC4, \sC4, \sB4 +.endm + +.macro scalar_sub sC, sA, sB +scalar_sub_inner \sC\()0, \sC\()2, \sC\()4, \sC\()6, \sC\()8, \sA\()0, \sA\()2, \sA\()4, \sA\()6, \sA\()8, \sB\()0, \sB\()2, \sB\()4, \sB\()6, \sB\()8 +.endm + + +.macro scalar_addm_inner sC0, sC1, sC2, sC3, sC4, sC5, sC6, sC7, sC8, sC9, sA0, sA1, sA2, sA3, sA4, sA5, sA6, sA7, sA8, sA9, sB0, sB1, sB2, sB3, sB4, sB5, sB6, sB7, sB8, sB9, multconst + + ldr X, =\multconst + umaddl \sC9, W<\sB9>, W, \sA9 + umaddl \sC0, W<\sB0>, W, \sA0 + umaddl \sC1, W<\sB1>, W, \sA1 + umaddl \sC2, W<\sB2>, W, \sA2 + lsr X, \sC9, #25 + umaddl \sC3, W<\sB3>, W, \sA3 + and \sC9, \sC9, #0x1ffffff + umaddl \sC4, W<\sB4>, W, \sA4 + add \sC0, \sC0, X + umaddl \sC5, W<\sB5>, W, \sA5 + add \sC0, \sC0, X, lsl #1 + umaddl \sC6, W<\sB6>, W, \sA6 + add \sC0, \sC0, X, lsl #4 + umaddl \sC7, W<\sB7>, W, \sA7 + umaddl \sC8, W<\sB8>, W, \sA8 + + add \sC1, \sC1, \sC0, lsr #26 + and \sC0, \sC0, #0x3ffffff + add \sC2, \sC2, \sC1, lsr #25 + and \sC1, \sC1, #0x1ffffff + add \sC3, \sC3, \sC2, lsr #26 + and \sC2, \sC2, #0x3ffffff + add \sC4, \sC4, \sC3, lsr #25 + and \sC3, \sC3, #0x1ffffff + add \sC5, \sC5, \sC4, lsr #26 + and \sC4, \sC4, #0x3ffffff + add \sC6, \sC6, \sC5, lsr #25 + and \sC5, \sC5, #0x1ffffff + add \sC7, \sC7, \sC6, lsr #26 + and \sC6, \sC6, #0x3ffffff + add \sC8, \sC8, \sC7, lsr #25 + and \sC7, \sC7, #0x1ffffff + add \sC9, \sC9, \sC8, lsr #26 + and \sC8, \sC8, #0x3ffffff +.endm + +.macro scalar_addm sC, sA, sB, multconst +scalar_addm_inner \sC\()0, \sC\()1, \sC\()2, \sC\()3, \sC\()4, \sC\()5, \sC\()6, \sC\()7, \sC\()8, \sC\()9, \sA\()0, \sA\()1, \sA\()2, \sA\()3, \sA\()4, \sA\()5, \sA\()6, \sA\()7, \sA\()8, \sA\()9, \sB\()0, \sB\()1, \sB\()2, \sB\()3, \sB\()4, \sB\()5, \sB\()6, \sB\()7, \sB\()8, \sB\()9, \multconst +.endm + +// vAA0 .. vAA9 output AA = A^2 +// vA0 .. vA9 input A +.macro vector_sqr_inner vAA0, vAA1, vAA2, vAA3, vAA4, vAA5, vAA6, vAA7, vAA8, vAA9, vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9 + shl V.2s, \vA9\().2s, #1 + shl V.2s, \vA8\().2s, #1 + shl V.2s, \vA7\().2s, #1 + shl V.2s, \vA6\().2s, #1 + shl V.2s, \vA5\().2s, #1 + shl V.2s, \vA4\().2s, #1 + shl V.2s, \vA3\().2s, #1 + shl V.2s, \vA2\().2s, #1 + shl V.2s, \vA1\().2s, #1 + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, \vA3\().2s, V.2s + umlal V.2d, \vA4\().2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA4\().2s, \vA4\().2s + mul V.2s, \vA9\().2s, vconst19.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, \vA3\().2s, V.2s + umlal V.2d, V.2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umlal V.2d, V.2s, \vA3\().2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umlal V.2d, \vA2\().2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, \vA2\().2s, \vA2\().2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, \vA1\().2s, V.2s + umull V.2d, \vA0\().2s, V.2s + umlal V.2d, V.2s, \vA1\().2s + umull V.2d, \vA0\().2s, V.2s + umull V.2d, \vA0\().2s, \vA0\().2s + usra V.2d, V.2d, #26 + and V.16b, V.16b, vMaskA.16b + mul V.2s, \vA8\().2s, vconst19.2s + bic V.16b, V.16b, vMaskB.16b + and \vA9\().16b, V.16b, vMaskB.16b + usra V.2d, V.2d, #25 + mul V.2s, \vA7\().2s, vconst19.2s + usra V.2d, V.2d, #24 + mul V.2s, \vA6\().2s, vconst19.2s + usra V.2d, V.2d, #21 + mul V.2s, \vA5\().2s, vconst19.2s + shl V.2s, V.2s, #1 + shl V.2s, V.2s, #1 + shl V.2s, V.2s, #1 + shl V.2s, V.2s, #1 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, \vA6\().2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #26 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #25 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #26 + umlal V.2d, V.2s, V.2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #25 + umlal V.2d, V.2s, \vA8\().2s + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #26 + umlal V.2d, V.2s, V.2s + usra V.2d, V.2d, #25 + usra V.2d, V.2d, #26 + usra V.2d, V.2d, #25 + usra \vAA9\().2d, V.2d, #26 + and \vAA4\().16b, V.16b, vMaskA.16b + and \vAA5\().16b, V.16b, vMaskB.16b + and \vAA0\().16b, V.16b, vMaskA.16b + and \vAA6\().16b, V.16b, vMaskA.16b + and \vAA1\().16b, V.16b, vMaskB.16b + and \vAA7\().16b, V.16b, vMaskB.16b + and \vAA2\().16b, V.16b, vMaskA.16b + and \vAA8\().16b, V.16b, vMaskA.16b + and \vAA3\().16b, V.16b, vMaskB.16b +.endm + +.macro vector_sqr vAA, vA +vector_sqr_inner \vAA\()0, \vAA\()1, \vAA\()2, \vAA\()3, \vAA\()4, \vAA\()5, \vAA\()6, \vAA\()7, \vAA\()8, \vAA\()9, \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9 +.endm + +// vC0 .. vC9 output C = A*B +// vA0 .. vA9 first operand A +// vB0 .. vB9 second operand B +.macro vector_mul_inner vC0, vC1, vC2, vC3, vC4, vC5, vC6, vC7, vC8, vC9, vA0, vA1, vA2, vA3, vA4, vA5, vA6, vA7, vA8, vA9, vB0, vB1, vB2, vB3, vB4, vB5, vB6, vB7, vB8, vB9 + umull \vC9\().2d, \vA0\().2s, \vB9\().2s + umlal \vC9\().2d, \vA2\().2s, \vB7\().2s + umlal \vC9\().2d, \vA4\().2s, \vB5\().2s + umlal \vC9\().2d, \vA6\().2s, \vB3\().2s + umlal \vC9\().2d, \vA8\().2s, \vB1\().2s + mul \vB9\().2s, \vB9\().2s, vconst19.2s + umull \vC8\().2d, \vA1\().2s, \vB7\().2s + umlal \vC8\().2d, \vA3\().2s, \vB5\().2s + umlal \vC8\().2d, \vA5\().2s, \vB3\().2s + umlal \vC8\().2d, \vA7\().2s, \vB1\().2s + umlal \vC8\().2d, \vA9\().2s, \vB9\().2s + umlal \vC9\().2d, \vA1\().2s, \vB8\().2s + umlal \vC9\().2d, \vA3\().2s, \vB6\().2s + umlal \vC9\().2d, \vA5\().2s, \vB4\().2s + umlal \vC9\().2d, \vA7\().2s, \vB2\().2s + umlal \vC9\().2d, \vA9\().2s, \vB0\().2s + shl \vC8\().2d, \vC8\().2d, #1 + umull \vC7\().2d, \vA0\().2s, \vB7\().2s + umlal \vC7\().2d, \vA2\().2s, \vB5\().2s + umlal \vC7\().2d, \vA4\().2s, \vB3\().2s + umlal \vC7\().2d, \vA6\().2s, \vB1\().2s + umlal \vC7\().2d, \vA8\().2s, \vB9\().2s + mul \vB7\().2s, \vB7\().2s, vconst19.2s + umlal \vC8\().2d, \vA0\().2s, \vB8\().2s + umlal \vC8\().2d, \vA2\().2s, \vB6\().2s + umlal \vC8\().2d, \vA4\().2s, \vB4\().2s + umlal \vC8\().2d, \vA6\().2s, \vB2\().2s + umlal \vC8\().2d, \vA8\().2s, \vB0\().2s + mul \vB8\().2s, \vB8\().2s, vconst19.2s + umull \vC6\().2d, \vA1\().2s, \vB5\().2s + umlal \vC6\().2d, \vA3\().2s, \vB3\().2s + umlal \vC6\().2d, \vA5\().2s, \vB1\().2s + umlal \vC6\().2d, \vA7\().2s, \vB9\().2s + umlal \vC6\().2d, \vA9\().2s, \vB7\().2s + umlal \vC7\().2d, \vA1\().2s, \vB6\().2s + umlal \vC7\().2d, \vA3\().2s, \vB4\().2s + umlal \vC7\().2d, \vA5\().2s, \vB2\().2s + umlal \vC7\().2d, \vA7\().2s, \vB0\().2s + umlal \vC7\().2d, \vA9\().2s, \vB8\().2s + shl \vC6\().2d, \vC6\().2d, #1 + umull \vC5\().2d, \vA0\().2s, \vB5\().2s + umlal \vC5\().2d, \vA2\().2s, \vB3\().2s + umlal \vC5\().2d, \vA4\().2s, \vB1\().2s + umlal \vC5\().2d, \vA6\().2s, \vB9\().2s + umlal \vC5\().2d, \vA8\().2s, \vB7\().2s + mul \vB5\().2s, \vB5\().2s, vconst19.2s + umlal \vC6\().2d, \vA0\().2s, \vB6\().2s + umlal \vC6\().2d, \vA2\().2s, \vB4\().2s + umlal \vC6\().2d, \vA4\().2s, \vB2\().2s + umlal \vC6\().2d, \vA6\().2s, \vB0\().2s + umlal \vC6\().2d, \vA8\().2s, \vB8\().2s + mul \vB6\().2s, \vB6\().2s, vconst19.2s + umull \vC4\().2d, \vA1\().2s, \vB3\().2s + umlal \vC4\().2d, \vA3\().2s, \vB1\().2s + umlal \vC4\().2d, \vA5\().2s, \vB9\().2s + umlal \vC4\().2d, \vA7\().2s, \vB7\().2s + umlal \vC4\().2d, \vA9\().2s, \vB5\().2s + umlal \vC5\().2d, \vA1\().2s, \vB4\().2s + umlal \vC5\().2d, \vA3\().2s, \vB2\().2s + umlal \vC5\().2d, \vA5\().2s, \vB0\().2s + umlal \vC5\().2d, \vA7\().2s, \vB8\().2s + umlal \vC5\().2d, \vA9\().2s, \vB6\().2s + shl \vC4\().2d, \vC4\().2d, #1 + umull \vC3\().2d, \vA0\().2s, \vB3\().2s + umlal \vC3\().2d, \vA2\().2s, \vB1\().2s + umlal \vC3\().2d, \vA4\().2s, \vB9\().2s + umlal \vC3\().2d, \vA6\().2s, \vB7\().2s + umlal \vC3\().2d, \vA8\().2s, \vB5\().2s + mul \vB3\().2s, \vB3\().2s, vconst19.2s + umlal \vC4\().2d, \vA0\().2s, \vB4\().2s + umlal \vC4\().2d, \vA2\().2s, \vB2\().2s + umlal \vC4\().2d, \vA4\().2s, \vB0\().2s + umlal \vC4\().2d, \vA6\().2s, \vB8\().2s + umlal \vC4\().2d, \vA8\().2s, \vB6\().2s + mul \vB4\().2s, \vB4\().2s, vconst19.2s + umull \vC2\().2d, \vA1\().2s, \vB1\().2s + umlal \vC2\().2d, \vA3\().2s, \vB9\().2s + umlal \vC2\().2d, \vA5\().2s, \vB7\().2s + umlal \vC2\().2d, \vA7\().2s, \vB5\().2s + umlal \vC2\().2d, \vA9\().2s, \vB3\().2s + umlal \vC3\().2d, \vA1\().2s, \vB2\().2s + umlal \vC3\().2d, \vA3\().2s, \vB0\().2s + umlal \vC3\().2d, \vA5\().2s, \vB8\().2s + umlal \vC3\().2d, \vA7\().2s, \vB6\().2s + umlal \vC3\().2d, \vA9\().2s, \vB4\().2s + shl \vC2\().2d, \vC2\().2d, #1 + umull \vC1\().2d, \vA0\().2s, \vB1\().2s + umlal \vC1\().2d, \vA2\().2s, \vB9\().2s + umlal \vC1\().2d, \vA4\().2s, \vB7\().2s + umlal \vC1\().2d, \vA6\().2s, \vB5\().2s + umlal \vC1\().2d, \vA8\().2s, \vB3\().2s + mul \vB1\().2s, \vB1\().2s, vconst19.2s + umlal \vC2\().2d, \vA0\().2s, \vB2\().2s + umlal \vC2\().2d, \vA2\().2s, \vB0\().2s + umlal \vC2\().2d, \vA4\().2s, \vB8\().2s + umlal \vC2\().2d, \vA6\().2s, \vB6\().2s + umlal \vC2\().2d, \vA8\().2s, \vB4\().2s + mul \vB2\().2s, \vB2\().2s, vconst19.2s + umull \vC0\().2d, \vA1\().2s, \vB9\().2s + umlal \vC0\().2d, \vA3\().2s, \vB7\().2s + umlal \vC0\().2d, \vA5\().2s, \vB5\().2s + ushr vMaskB.2d, vMaskA.2d, #1 + usra \vC3\().2d, \vC2\().2d, #26 + and \vC2\().16b, \vC2\().16b, vMaskA.16b + umlal \vC1\().2d, \vA1\().2s, \vB0\().2s + usra \vC4\().2d, \vC3\().2d, #25 + and \vC3\().16b, \vC3\().16b, vMaskB.16b + umlal \vC0\().2d, \vA7\().2s, \vB3\().2s + usra \vC5\().2d, \vC4\().2d, #26 + and \vC4\().16b, \vC4\().16b, vMaskA.16b + umlal \vC1\().2d, \vA3\().2s, \vB8\().2s + usra \vC6\().2d, \vC5\().2d, #25 + and \vC5\().16b, \vC5\().16b, vMaskB.16b + umlal \vC0\().2d, \vA9\().2s, \vB1\().2s + usra \vC7\().2d, \vC6\().2d, #26 + and \vC6\().16b, \vC6\().16b, vMaskA.16b + umlal \vC1\().2d, \vA5\().2s, \vB6\().2s + umlal \vC1\().2d, \vA7\().2s, \vB4\().2s + umlal \vC1\().2d, \vA9\().2s, \vB2\().2s + usra \vC8\().2d, \vC7\().2d, #25 + and \vC7\().16b, \vC7\().16b, vMaskB.16b + shl \vC0\().2d, \vC0\().2d, #1 + usra \vC9\().2d, \vC8\().2d, #26 + and \vC8\().16b, \vC8\().16b, vMaskA.16b + umlal \vC0\().2d, \vA0\().2s, \vB0\().2s + umlal \vC0\().2d, \vA2\().2s, \vB8\().2s + umlal \vC0\().2d, \vA4\().2s, \vB6\().2s + umlal \vC0\().2d, \vA6\().2s, \vB4\().2s + umlal \vC0\().2d, \vA8\().2s, \vB2\().2s + bic \vB9\().16b, \vC9\().16b, vMaskB.16b + and \vC9\().16b, \vC9\().16b, vMaskB.16b + usra \vC0\().2d, \vB9\().2d, #25 + usra \vC0\().2d, \vB9\().2d, #24 + usra \vC0\().2d, \vB9\().2d, #21 + usra \vC1\().2d, \vC0\().2d, #26 + and \vC0\().16b, \vC0\().16b, vMaskA.16b + usra \vC2\().2d, \vC1\().2d, #25 + and \vC1\().16b, \vC1\().16b, vMaskB.16b + usra \vC3\().2d, \vC2\().2d, #26 + and \vC2\().16b, \vC2\().16b, vMaskA.16b +.endm + +.macro vector_mul vC, vA, vB +vector_mul_inner \vC\()0, \vC\()1, \vC\()2, \vC\()3, \vC\()4, \vC\()5, \vC\()6, \vC\()7, \vC\()8, \vC\()9, \vA\()0, \vA\()1, \vA\()2, \vA\()3, \vA\()4, \vA\()5, \vA\()6, \vA\()7, \vA\()8, \vA\()9, \vB\()0, \vB\()1, \vB\()2, \vB\()3, \vB\()4, \vB\()5, \vB\()6, \vB\()7, \vB\()8, \vB\()9 +.endm + + // in: x1: scalar pointer, x2: base point pointer + // out: x0: result pointer + .global x25519_scalarmult_alt_orig_opt_a55 + .global _x25519_scalarmult_alt_orig_opt_a55 + // .type x25519_scalarmult, %function +x25519_scalarmult_alt_orig_opt_a55: +_x25519_scalarmult_alt_orig_opt_a55: + stp x29, x30, [sp, #-160]! + mov x29, sp + stp x19, x20, [sp, #16] + stp x21, x22, [sp, #32] + stp x23, x24, [sp, #48] + stp x25, x26, [sp, #64] + stp x27, x28, [sp, #80] + stp d8, d9, [sp, #96] + stp d10, d11, [sp, #112] + stp d12, d13, [sp, #128] + stp d14, d15, [sp, #144] + sub sp, sp, STACK_OUT_PTR+8 + + // 0: mask1, 8: mask2, 16: AA, 56: B/BB, 96: counter, 100: lastbit, 104: scalar, 136: X1, 176: outptr, 184: padding, 192: fp, 200: lr + + str x0, [sp, STACK_OUT_PTR] // outptr + mov x19, x2 // point + + mov x0, x1 // scalar + bl load256unaligned + + and x3, x3, #0x7fffffffffffffff + and x0, x0, #0xfffffffffffffff8 + orr x3, x3, #0x4000000000000000 + + stp x0, x1, [sp, STACK_SCALAR] + stp x2, x3, [sp, STACK_SCALAR+16] + + mov x0, x19 // point + bl load256unaligned + + // Unpack point (discard most significant bit) + lsr x12, x0, #51 + lsr x17, x2, #51 + orr w12, w12, w1, lsl #13 + orr w17, w17, w3, lsl #13 + ubfx x8, x3, #12, #26 + ubfx x9, x3, #38, #25 + ubfx x11, x0, #26, #25 + ubfx x13, x1, #13, #25 + lsr x14, x1, #38 + ubfx x16, x2, #25, #26 + and w10, w0, #0x3ffffff + and w12, w12, #0x3ffffff + and w15, w2, #0x1ffffff + and w17, w17, #0x1ffffff + stp w10, w11, [sp, STACK_X_0] + stp w12, w13, [sp, STACK_X_8] + stp w14, w15, [sp, STACK_X_16] + stp w16, w17, [sp, STACK_X_24] + stp w8, w9, [sp, STACK_X_32] + + // X2 (initially set to 1) + mov x1, #1 + mov v0.d[0], x1 + mov v2.d[0], xzr + mov v4.d[0], xzr + mov v6.d[0], xzr + mov v8.d[0], xzr + + // Z2 (initially set to 0) + mov v1.d[0], xzr + mov v3.d[0], xzr + mov v5.d[0], xzr + mov v7.d[0], xzr + mov v9.d[0], xzr + + // X3 (initially set to X1) + mov v10.s[0], w10 + mov v10.s[1], w11 + mov v12.s[0], w12 + mov v12.s[1], w13 + mov v14.s[0], w14 + mov v14.s[1], w15 + mov v16.s[0], w16 + mov v16.s[1], w17 + mov v18.s[0], w8 + mov v18.s[1], w9 + + // Z3 (initially set to 1) + mov v11.d[0], x1 + mov v13.d[0], xzr + mov v15.d[0], xzr + mov v17.d[0], xzr + mov v19.d[0], xzr + + mov x0, #255-1 // 255 iterations + str W0, [sp, #STACK_CTR] // @slothy:writes=ctr + + const19 .req x30 + vconst19 .req v31 + + mov w30, #19 + dup vconst19.2s, w30 + mov x0, #(1<<26)-1 + dup v30.2d, x0 + ldr x0, =0x07fffffe07fffffc + // TODO: I do not quite understand what the two stps are doing + // First seems to write bytes 0-15 (mask1+mask2); second seems to write bytes 16-31 (mask2+A) + // stp x0, x0, [sp, #STACK_MASK1] // @slothy:writes=mask1 + + sub x1, x0, #0xfc-0xb4 + str x0, [sp, #STACK_MASK1] // @slothy:writes=mask1 + str x1, [sp, #STACK_MASK2] // @slothy:writes=mask2 + + ldr d28, [sp, #STACK_MASK2] // @slothy:reads=mask2 + ldr d29, [sp, #STACK_MASK1] // @slothy:reads=mask1 + + ldrb w1, [sp, #STACK_SCALAR+31] + lsr w1, w1, #6 + str w1, [sp, #STACK_LASTBIT] // @slothy:writes=lastbit + mainloop: + // Instructions: 958 + // Expected cycles: 484 + // Expected IPC: 1.98 + // + // ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- cycle (expected) -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------> + // 0 25 50 75 100 125 150 175 200 225 250 275 300 325 350 375 400 425 450 475 + // |------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|-------- + add v23.2S, v14.2S, v15.2S // *................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v24.2S, v4.2S, v5.2S // *................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v27.2S, v10.2S, v11.2S // .*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + tst w1, #1 // .*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + sub v21.2S, v28.2S, v11.2S // ..*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v25.2S, v0.2S, v1.2S // ..*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + fcsel d11, d24, d23, eq // ...*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub v26.2S, v29.2S, v3.2S // ...*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v21.2S, v10.2S, v21.2S // ....*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v22.2S, v29.2S, v7.2S // ....*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel d20, d25, d27, eq // .....*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mov x24, v11.d[0] // .....*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + sub v10.2S, v28.2S, v1.2S // ......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v26.2S, v2.2S, v26.2S // ......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + mov x12, v20.d[0] // .......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v7.2S, v6.2S, v7.2S // .......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v20.2S, v12.2S, v13.2S // ........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v11.2S, v29.2S, v9.2S // ........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v2.2S, v2.2S, v3.2S // .........*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x28, x24, #32 // .........*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v3.2S, v8.2S, v11.2S // ..........*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v1.2S, v6.2S, v22.2S // ..........*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v11.2S, v8.2S, v9.2S // ...........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + sub v6.2S, v29.2S, v13.2S // ...........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + fcsel d9, d2, d20, eq // ............*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x29, x12, #32 // ............*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v28.2S, v16.2S, v17.2S // .............*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x9, w12, w12 // .............*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mov x0, v9.d[0] // ..............*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v13.2S, v29.2S, v15.2S // ..............*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v15.2S, v0.2S, v10.2S // ...............*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v9.2S, v29.2S, v19.2S // ...............*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel d10, d7, d28, eq // ................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + sub v8.2S, v29.2S, v17.2S // ................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x14, x28, x28 // .................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v17.2S, v18.2S, v19.2S // .................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + sub v29.2S, v29.2S, v5.2S // ..................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v19.2S, v18.2S, v9.2S // ..................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v0.2S, v14.2S, v13.2S // ...................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + mov x17, v10.d[0] // ...................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add v18.2S, v4.2S, v29.2S // ....................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v4.2S, v11.2S, v3.2S // ....................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add v22.2S, v16.2S, v8.2S // .....................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add v10.2S, v12.2S, v6.2S // .....................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + fcsel d12, d11, d17, eq // ......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + trn2 v14.2S, v25.2S, v15.2S // ......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + fcsel d9, d15, d21, eq // .......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn2 v13.2S, v2.2S, v26.2S // .......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + fcsel d6, d3, d19, eq // ........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v5.2S, v11.2S, v3.2S // ........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v29.2S, v19.2S, v17.2S // .........................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v3.2S, v10.2S, v20.2S // .........................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v8.2S, v0.2S, v23.2S // ..........................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + str d6, [sp, #STACK_B_32] // ..........................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=['B32'] + fcsel d6, d26, d10, eq // ...........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn1 v11.2S, v21.2S, v27.2S // ...........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn2 v21.2S, v21.2S, v27.2S // ............................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v27.2S, v22.2S, v28.2S // ............................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v2.2S, v2.2S, v26.2S // .............................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v26.2S, v22.2S, v28.2S // .............................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel d16, d1, d22, eq // ..............................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn2 v22.2S, v19.2S, v17.2S // ..............................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + fcsel d17, d18, d0, eq // ...............................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + stp d9, d6, [sp, #STACK_B_0] // ...............................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=['B0', 'B8'] + trn2 v6.2S, v7.2S, v1.2S // ................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v23.2S, v0.2S, v23.2S // ................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + stp d17, d16, [sp, #STACK_B_16] // .................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................. // @slothy:writes=['B16', 'B24'] + trn1 v16.2S, v7.2S, v1.2S // .................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + trn1 v0.2S, v24.2S, v18.2S // ..................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + trn2 v7.2S, v24.2S, v18.2S // ..................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x11, x29, x29 // ...................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + mul v18.2S, v27.2S, v31.2S // ...................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + trn2 v20.2S, v10.2S, v20.2S // ....................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mov x3, v12.d[0] // ....................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................... + trn1 v19.2S, v25.2S, v15.2S // .....................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x19, w12, w11 // .....................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x23, x17, x17 // ......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + lsr x20, x3, #32 // ......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull v25.2D, v14.2S, v27.2S // .......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x1, w24, w14 // .......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull v28.2D, v19.2S, v8.2S // ........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x16, w24, w24 // ........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull v17.2D, v14.2S, v8.2S // .........................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w22, w20, w30 // .........................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul v15.2S, v22.2S, v31.2S // ..........................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x27, x20, x20 // ..........................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull v10.2D, v19.2S, v27.2S // ...........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................ + mul w18, w17, w30 // ...........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull v9.2D, v19.2S, v20.2S // ............................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w4, w28, w30 // ............................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x25, w22, w27, x16 // .............................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................... + lsr x2, x17, #32 // .............................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v17.2D, v13.2S, v20.2S // ..............................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x21, w12, w27, x1 // ..............................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v17.2D, v7.2S, v21.2S // ...............................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x10, x3, x3 // ...............................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v17.2D, v6.2S, v15.2S // ................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x6, x2, x2 // ................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v17.2D, v4.2S, v18.2S // .................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x1, w29, w10, x21 // .................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v10.2D, v2.2S, v8.2S // ..................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x28, w12, w6 // ..................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v9.2D, v2.2S, v21.2S // ...................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................ + add x16, x24, x24 // ...................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull v1.2D, v19.2S, v22.2S // ....................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x8, w4, w14, x9 // ....................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................... + shl v24.2D, v17.2D, #1 // .....................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................. + lsr x21, x0, #32 // .....................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................. + add x26, x0, x0 // ......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x15, w29, w23, x28 // ......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull v12.2D, v14.2S, v15.2S // .......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull x24, w12, w26 // .......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v10.2D, v0.2S, v20.2S // ........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w12, w10, x25 // ........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v10.2D, v16.2S, v21.2S // .........................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x15, w0, w14, x15 // .........................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v1.2D, v2.2S, v27.2S // ..........................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x9, w29, w11, x24 // ..........................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v1.2D, v0.2S, v8.2S // ...........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x13, w0, w6, x1 // ...........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v25.2D, v13.2S, v8.2S // ............................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x5, w11, w6, x4 // ............................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v7.2S, v20.2S // .............................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................... + add x7, x21, x21 // .............................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v6.2S, v21.2S // ..............................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x1, w12, w23 // ..............................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v25.2D, v4.2S, v15.2S // ...............................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x27, w17, w18, x9 // ...............................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v1.2D, v16.2S, v20.2S // ................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x20, w21, w23, x13 // ................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v1.2D, v5.2S, v21.2S // .................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x5, w0, w23, x5 // .................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v28.2D, v2.2S, v20.2S // ..................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................. + mul w13, w2, w30 // ..................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................. + umull x9, w12, w14 // ...................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................ + umull v27.2D, v14.2S, v20.2S // ....................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................... + add x24, x14, x14 // ....................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................... + shl v17.2D, v25.2D, #1 // .....................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x4, w11, w14, x1 // .....................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v28.2D, v0.2S, v21.2S // ......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x2, w29, w16, x9 // ......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x17, w7, w14, x5 // .......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................ + mul v25.2S, v8.2S, v31.2S // .......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................ + umull v8.2D, v14.2S, v21.2S // ........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x9, w13, w24, x27 // ........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v10.2D, v5.2S, v15.2S // .........................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x5, w18, w14, x19 // .........................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v27.2D, v13.2S, v21.2S // ..........................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w0, w16, x4 // ..........................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................... + mul w19, w3, w30 // ...........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................ + mul v22.2S, v26.2S, v31.2S // ...........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x28, w13, w16, x5 // ............................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................... + add x1, x20, x17, lsr #26 // ............................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v13.2S, v15.2S // .............................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................... + umull x20, w12, w16 // .............................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................... + add x27, x8, x1, lsr #25 // ..............................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x25, w21, w7, x4 // ..............................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................... + bic x5, x1, #0x1ffffff // ...............................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x4, w19, w7, x28 // ...............................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................... + add x8, x27, x5, lsr #24 // ................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x27, w11, w7, x20 // ................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v16.2S, v15.2S // .................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x28, w19, w16, x9 // .................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v17.2D, v19.2S, v29.2S // ..................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x4, w22, w26, x4 // ..................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v17.2D, v2.2S, v26.2S // ...................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x9, w0, w7, x2 // ...................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v10.2D, v14.2S, v26.2S // ....................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................... + ldr x20, [sp, #STACK_B_8] // ....................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=['B8'] + umaddl x2, w0, w0, x27 // .....................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................. + mul v20.2S, v20.2S, v31.2S // .....................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v1.2D, v14.2S, v29.2S // ......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................. + add x0, x8, x5, lsr #21 // ......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v1.2D, v13.2S, v26.2S // .......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x9, w19, w6, x9 // .......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v24.2D, v19.2S, v26.2S // ........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................... + add x8, x7, x7 // ........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................... + umull v26.2D, v19.2S, v21.2S // .........................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x5, w3, w19, x25 // .........................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v9.2D, v0.2S, v15.2S // ..........................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x2, w13, w6, x2 // ..........................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v5.2S, v18.2S // ...........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x27, w22, w8, x28 // ...........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v28.2D, v14.2S, v23.2S // ............................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................... + add x3, x11, x11 // ............................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v26.2D, v2.2S, v15.2S // .............................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................... + umull x28, w12, w7 // .............................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v12.2D, v13.2S, v18.2S // ..............................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x25, w21, w16, x15 // ..............................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v1.2D, v7.2S, v23.2S // ...............................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x15, w22, w23, x9 // ...............................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v17.2D, v0.2S, v23.2S // ................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................... + add x7, x6, x6 // ................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................... + umlal v17.2D, v16.2S, v3.2S // .................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x29, w29, w26, x28 // .................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v24.2D, v2.2S, v23.2S // ..................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x10, w22, w10, x25 // ..................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................. + umlal v1.2D, v6.2S, v3.2S // ...................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................ + umaddl x21, w19, w23, x2 // ...................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................ + umlal v27.2D, v7.2S, v15.2S // ....................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x25, w13, w23, x29 // ....................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v7.2S, v18.2S // .....................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................. + ldr x28, [sp, #STACK_B_32] // .....................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................. // @slothy:reads=['B32'] + umlal v24.2D, v0.2S, v3.2S // ......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................. + ldr x6, [sp, #STACK_B_16] // ......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................. // @slothy:reads=['B16'] + ldr x2, [sp, #STACK_B_0] // .......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................ // @slothy:reads=['B0'] + umaddl x29, w18, w16, x0 // .......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................ + umlal v27.2D, v6.2S, v18.2S // ........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w19, w14, x25 // ........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v6.2S, v25.2S // .........................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................... + lsr x0, x28, #32 // .........................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................... + umlal v9.2D, v16.2S, v18.2S // ..........................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x25, w13, w8, x29 // ..........................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................... + mul v21.2S, v21.2S, v31.2S // ...........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................ + lsr x9, x20, #32 // ...........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................ + add x23, x6, x6 // ............................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x11, w22, w16, x18 // ............................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v4.2S, v20.2S // .............................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................... + lsr x16, x2, #32 // .............................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................... + umlal v9.2D, v5.2S, v25.2S // ..............................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x25, w19, w26, x25 // ..............................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................... + umlal v26.2D, v0.2S, v18.2S // ...............................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................... + umull x14, w6, w6 // ...............................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................... + umlal v1.2D, v4.2S, v11.2S // ................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w22, w24, x21 // ................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................... + shl v8.2D, v8.2D, #1 // .................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x24, w22, w3, x25 // .................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................. + umlal v27.2D, v4.2S, v25.2S // ..................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................. + umaddl x13, w22, w7, x5 // ..................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................. + umlal v8.2D, v19.2S, v3.2S // ...................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................ + and x29, x17, #0x3ffffff // ...................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................ + umlal v26.2D, v16.2S, v25.2S // ....................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................... + add x21, x4, x24, lsr #26 // ....................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................... + umlal v26.2D, v5.2S, v20.2S // .....................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................. + lsr x12, x6, #32 // .....................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................. + umlal v12.2D, v7.2S, v25.2S // ......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................. + add x25, x27, x21, lsr #25 // ......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................. + shl v15.2D, v27.2D, #1 // .......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................ + add x26, x12, x12 // .......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................ + mul v27.2S, v29.2S, v31.2S // ........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................... + add x4, x28, x28 // ........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v13.2S, v3.2S // .........................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................... + and x8, x1, #0x1ffffff // .........................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................... + umlal v24.2D, v16.2S, v11.2S // ..........................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................... + add x11, x11, x25, lsr #26 // ..........................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................... + umlal v10.2D, v13.2S, v23.2S // ...........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................ + umull x3, w6, w26 // ...........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................ + umlal v26.2D, v14.2S, v11.2S // ............................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................... + add x7, x18, x11, lsr #25 // ............................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................... + umlal v9.2D, v14.2S, v3.2S // .............................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................... + add x6, x0, x0 // .............................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................... + umlal v24.2D, v5.2S, v27.2S // ..............................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................... + add x18, x15, x7, lsr #26 // ..............................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................... + umlal v12.2D, v6.2S, v20.2S // ...............................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................... + mul w17, w0, w30 // ...............................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................... + umlal v10.2D, v7.2S, v3.2S // ................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................... + add x0, x13, x18, lsr #25 // ................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................... + umlal v9.2D, v13.2S, v11.2S // .................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................. + and x5, x7, #0x3ffffff // .................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................. + umlal v9.2D, v7.2S, v27.2S // ..................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................. + add x13, x20, x20 // ..................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................. + umlal v17.2D, v5.2S, v11.2S // ...................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................ + and x19, x24, #0x3ffffff // ...................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................ + umlal v10.2D, v6.2S, v11.2S // ....................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................... + umaddl x27, w17, w6, x14 // ....................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................... + umlal v12.2D, v4.2S, v21.2S // .....................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................. + add x1, x9, x9 // .....................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................. + umlal v15.2D, v19.2S, v23.2S // ......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................. + and x22, x0, #0x3ffffff // ......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................. + umlal v15.2D, v2.2S, v3.2S // .......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................ + and x15, x25, #0x3ffffff // .......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................ + umlal v15.2D, v0.2S, v11.2S // ........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................... + mul w14, w28, w30 // ........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................... + umlal v9.2D, v6.2S, v22.2S // .........................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................... + add x25, x10, x0, lsr #26 // .........................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................... + bfi x5, x18, #32, #25 // ..........................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................... + mul v18.2S, v23.2S, v31.2S // ..........................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................... + umlal v10.2D, v4.2S, v27.2S // ...........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................ + bfi x19, x21, #32, #25 // ...........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................ + umlal v28.2D, v7.2S, v11.2S // ............................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................... + bfi x15, x11, #32, #25 // ............................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................... + umlal v26.2D, v13.2S, v27.2S // .............................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................... + bfi x22, x25, #32, #25 // .............................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................... + umlal v15.2D, v16.2S, v27.2S // ..............................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................... + ldr x21, [sp, #STACK_B_24] // ..............................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................... // @slothy:reads=['B24'] + shl v12.2D, v12.2D, #1 // ...............................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................... + umaddl x11, w2, w4, x27 // ...............................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v6.2S, v27.2S // ................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................... + mul w10, w12, w30 // ................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................... + umlal v28.2D, v4.2S, v22.2S // .................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................. + stp x5, x22, [sp, #STACK_A_16] // .................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................. // @slothy:writes=['A16', 'A24'] + lsr x27, x21, #32 // ..................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................. + mul v21.2S, v3.2S, v31.2S // ..................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................. + umlal v15.2D, v5.2S, v22.2S // ...................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................ + mul w22, w21, w30 // ...................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................ + umlal v9.2D, v4.2S, v18.2S // ....................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................... + stp x19, x15, [sp, #STACK_A_0] // ....................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................... // @slothy:writes=['A0', 'A8'] + umlal v12.2D, v19.2S, v11.2S // .....................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................. + add x7, x27, x27 // .....................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................. + umlal v26.2D, v7.2S, v22.2S // ......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................. + mul w19, w27, w30 // ......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................. + umlal v8.2D, v2.2S, v11.2S // .......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................ + umull x12, w2, w2 // .......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................ + umlal v8.2D, v0.2S, v27.2S // ........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................... + umull x0, w2, w26 // ........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v16.2S, v22.2S // .........................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................... + add x24, x21, x21 // .........................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................... + umlal v8.2D, v5.2S, v18.2S // ..........................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................... + umaddl x18, w10, w26, x12 // ..........................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................... + umlal v26.2D, v6.2S, v18.2S // ...........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................ + umull x5, w2, w24 // ...........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................ + umlal v12.2D, v2.2S, v27.2S // ............................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................... + umaddl x10, w2, w6, x3 // ............................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................... + umlal v12.2D, v0.2S, v22.2S // .............................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................... + add x3, x16, x16 // .............................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................... + usra v9.2D, v8.2D, #26 // ..............................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................... + umaddl x5, w3, w26, x5 // ..............................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................... + and v25.16B, v8.16B, v30.16B // ...............................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................... + umull x12, w2, w13 // ...............................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................... + umlal v26.2D, v4.2S, v21.2S // ................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................... + umaddl x27, w16, w23, x0 // ................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................... + usra v15.2D, v9.2D, #25 // .................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................. + umaddl x5, w20, w23, x5 // .................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................. + umaddl x15, w9, w1, x5 // ....................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................... + ldr b20, [sp, #STACK_MASK2] // ....................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................... // @slothy:reads=mask2 + ld1r {v8.2D}, [sp] // .....................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................. // @slothy:reads=mask1 + umaddl x5, w20, w1, x27 // .....................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................. + usra v28.2D, v15.2D, #26 // ......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................. + umaddl x6, w16, w3, x12 // ......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................. + umlal v12.2D, v16.2S, v18.2S // .......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................ + umaddl x27, w28, w14, x15 // .......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................ + umlal v12.2D, v5.2S, v21.2S // ........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................... + umaddl x28, w14, w7, x5 // ........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................... + usra v24.2D, v28.2D, #25 // .........................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................... + add x0, x29, x25, lsr #25 // .........................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................... + and v29.16B, v15.16B, v30.16B // ..........................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................... + add x12, x26, x26 // ..........................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................... + ushr v15.2D, v30.2D, #1 // ...........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................ + add x25, x8, x0, lsr #26 // ...........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................ + usra v10.2D, v24.2D, #26 // ............................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................... + umull x8, w2, w7 // ............................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................... + and v27.16B, v24.16B, v30.16B // .............................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................... + umull x29, w2, w1 // .............................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................... + and v2.16B, v28.16B, v15.16B // ..............................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................... + umaddl x6, w21, w22, x6 // ..............................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................... + usra v17.2D, v10.2D, #25 // ...............................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................... + umaddl x5, w16, w24, x8 // ...............................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................... + and v23.16B, v10.16B, v15.16B // ................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................... + and x15, x0, #0x3ffffff // ................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................... + uzp1 v11.4S, v29.4S, v2.4S // .................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................. + umaddl x0, w16, w4, x10 // .................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................. + usra v1.2D, v17.2D, #26 // ..................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................. + umaddl x10, w20, w26, x5 // ..................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................. + uzp1 v21.4S, v27.4S, v23.4S // ...................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................ + umaddl x11, w3, w7, x11 // ...................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................ + and v2.16B, v17.16B, v30.16B // ....................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................... + bfi x15, x25, #32, #26 // ....................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................... + and v4.16B, v1.16B, v15.16B // .....................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................. + umaddl x25, w20, w7, x0 // .....................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................. + bic v28.16B, v1.16B, v15.16B // ......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................. + str x15, [sp, #STACK_A_32] // ......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................. // @slothy:writes=['A32'] + trn1 v29.4S, v2.4S, v4.4S // .......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................ + umaddl x6, w19, w12, x6 // .......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................ + usra v12.2D, v28.2D, #25 // ........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................... + umull x8, w2, w23 // ........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................... + and v0.16B, v9.16B, v15.16B // .........................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................... + add x0, x3, x3 // .........................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................... + uzp1 v3.4S, v11.4S, v21.4S // ..........................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................... + umull x5, w2, w3 // ..........................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................... + usra v12.2D, v28.2D, #24 // ...........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................ + umaddl x8, w3, w1, x8 // ...........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................ + add v17.4S, v3.4S, v8.4S // ............................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................... + umaddl x6, w14, w23, x6 // ............................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................... + uzp2 v16.4S, v11.4S, v21.4S // .............................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................... + add x3, x7, x7 // .............................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................... + usra v12.2D, v28.2D, #21 // ..............................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................... + umaddl x5, w22, w26, x5 // ..............................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................... + add v1.2S, v29.2S, v8.2S // ...............................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................... + mov v4.d[0], v29.d[1] // ...............................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................... + sub v27.4S, v17.4S, v16.4S // ................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................... + umaddl x2, w20, w20, x8 // ................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................... + usra v26.2D, v12.2D, #26 // .................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................. + umaddl x21, w19, w23, x5 // .................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................. + add v5.2S, v29.2S, v4.2S // ..................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................. + sub v10.2S, v1.2S, v4.2S // ..................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................. + and v21.16B, v12.16B, v30.16B // ...................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................ + umaddl x7, w19, w7, x2 // ...................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................ + usra v25.2D, v26.2D, #25 // ....................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................... + umaddl x16, w16, w13, x29 // ....................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................... + mov v8.b[0], v20.b[0] // .....................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................. + zip2 v19.2S, v10.2S, v5.2S // .....................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................. + and v11.16B, v26.16B, v15.16B // ......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................. + umaddl x25, w9, w24, x25 // ......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................. + usra v0.2D, v25.2D, #26 // .......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................ + umaddl x2, w20, w24, x11 // .......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................ + and v25.16B, v25.16B, v30.16B // ........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................... + umaddl x16, w19, w24, x16 // ........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................... + uzp1 v21.4S, v21.4S, v11.4S // .........................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................... + umaddl x11, w14, w1, x21 // .........................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................... + uzp1 v17.4S, v25.4S, v0.4S // ..........................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................... + umaddl x2, w1, w26, x2 // ..........................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................... + add v0.4S, v3.4S, v16.4S // ...........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................ + umaddl x21, w14, w26, x16 // ...........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................ + zip1 v22.2S, v10.2S, v5.2S // ............................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................... + umaddl x29, w14, w24, x7 // ............................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................... + uzp1 v9.4S, v21.4S, v17.4S // .............................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................... + ldr x15, =0x07fffffe07fffffc // .............................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................... + uzp2 v17.4S, v21.4S, v17.4S // ..............................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................... + ldr x16, [sp, #STACK_A_32] // ..............................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................... // @slothy:reads=['A32'] + add v21.4S, v9.4S, v8.4S // ...............................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................... + ldr x20, [sp, #STACK_A_8] // ...............................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................... // @slothy:reads=['A8'] + zip2 v26.4S, v27.4S, v0.4S // ................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................... + umaddl x29, w17, w12, x29 // ................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................... + add v25.4S, v9.4S, v17.4S // .................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................. + ldr x8, [sp, #STACK_A_16] // .................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................. // @slothy:reads=['A16'] + sub v16.4S, v21.4S, v17.4S // ..................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................. + add x12, x16, x15 // ..................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................. + zip1 v3.4S, v27.4S, v0.4S // ...................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................ + ldr x5, [sp, #STACK_A_24] // ...................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................ // @slothy:reads=['A24'] + add x26, x20, x15 // ....................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................... + mul v12.2S, v19.2S, v31.2S // ....................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................... + mov v6.d[0], v26.d[1] // .....................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................. + shl v29.2S, v19.2S, #1 // .....................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................. + zip2 v2.4S, v16.4S, v25.4S // ......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................. + add x20, x8, x15 // ......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................. + zip1 v17.4S, v16.4S, v25.4S // .......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................ + umaddl x16, w17, w23, x21 // .......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................ + add x8, x25, x2, lsr #26 // ........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................... + shl v27.2S, v6.2S, #1 // ........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................... + shl v4.2S, v2.2S, #1 // .........................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................... + shl v1.2S, v26.2S, #1 // .........................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................... + umull v24.2D, v17.2S, v27.2S // ..........................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................... + add x18, x18, x8, lsr #25 // ..........................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................... + mov v14.d[0], v17.d[1] // ...........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................ + mov v7.d[0], v3.d[1] // ...........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................ + add x5, x5, x15 // ............................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................... + mul v5.2S, v22.2S, v31.2S // ............................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................... + shl v20.2S, v3.2S, #1 // .............................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................... + shl v23.2S, v22.2S, #1 // .............................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................... + bic x25, x8, #0x1ffffff // ..............................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................... + mul v8.2S, v6.2S, v31.2S // ..............................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................... + umull v9.2D, v17.2S, v29.2S // ...............................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................... + movk x15, #0xffb4 // ...............................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................... + umlal v24.2D, v14.2S, v1.2S // ................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................... + add x21, x18, x25, lsr #24 // ................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................... + add x1, x1, x1 // .................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................. + mul v25.2S, v7.2S, v31.2S // .................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................. + umull v6.2D, v17.2S, v23.2S // ..................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................. + add x25, x21, x25, lsr #21 // ..................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................. + umlal v9.2D, v14.2S, v23.2S // ...................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................ + ldr x7, [sp, #STACK_A_0] // ...................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................ // @slothy:reads=['A0'] + shl v0.2S, v7.2S, #1 // ....................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................... + mov v13.d[0], v2.d[1] // ....................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................... + umull v21.2D, v17.2S, v1.2S // .....................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................. + umaddl x22, w22, w23, x25 // .....................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................. + shl v16.2S, v13.2S, #1 // ......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................. + shl v28.2S, v14.2S, #1 // ......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................. + umlal v9.2D, v2.2S, v27.2S // .......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................ + add x7, x7, x15 // .......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................ + umlal v6.2D, v28.2S, v27.2S // ........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................... + and x18, x2, #0x3ffffff // ........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................... + umlal v6.2D, v2.2S, v1.2S // .........................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................... + umaddl x15, w19, w1, x22 // .........................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................... + umlal v6.2D, v16.2S, v0.2S // ..........................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................... + umaddl x22, w9, w23, x10 // ..........................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................... + umlal v6.2D, v3.2S, v3.2S // ...........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................ + umaddl x23, w17, w3, x27 // ...........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................ + umlal v6.2D, v12.2S, v29.2S // ............................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................... + umaddl x27, w14, w13, x15 // ............................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................... + umull v10.2D, v17.2S, v4.2S // .............................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................... + umaddl x22, w17, w4, x22 // .............................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................... + umull v11.2D, v17.2S, v16.2S // ..............................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................... + umaddl x25, w17, w24, x28 // ..............................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................... + umull v19.2D, v17.2S, v28.2S // ...............................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................... + umaddl x9, w17, w13, x11 // ...............................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................... + umlal v9.2D, v13.2S, v1.2S // ................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................... + umaddl x11, w17, w0, x27 // ................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................... + umlal v21.2D, v28.2S, v0.2S // .................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................. + umaddl x10, w17, w1, x6 // .................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................. + umlal v21.2D, v2.2S, v20.2S // ..................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................. + and x14, x8, #0x1ffffff // ..................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................. + umlal v21.2D, v16.2S, v13.2S // ...................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................ + add x9, x9, x11, lsr #26 // ...................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................ + umlal v24.2D, v2.2S, v0.2S // ....................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................... + and x2, x11, #0x3ffffff // ....................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................... + umlal v11.2D, v14.2S, v4.2S // .....................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................. + add x3, x10, x9, lsr #25 // .....................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................. + umull v29.2D, v17.2S, v0.2S // ......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................. + bfi x2, x9, #32, #25 // ......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................. + umlal v9.2D, v3.2S, v0.2S // .......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................ + add x8, x16, x3, lsr #26 // .......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................ + umlal v24.2D, v13.2S, v20.2S // ........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................... + ldr x4, =121666 // ........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................... + umull v13.2D, v17.2S, v20.2S // .........................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................... + add x19, x29, x8, lsr #25 // .........................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................... + umlal v29.2D, v14.2S, v20.2S // ..........................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................... + and x13, x9, #0x1ffffff // ..........................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................... + umlal v10.2D, v28.2S, v14.2S // ...........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................ + add x27, x25, x19, lsr #26 // ...........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................ + mul v18.2S, v26.2S, v31.2S // ............................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................... + and x9, x19, #0x3ffffff // ............................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................... + umlal v11.2D, v12.2S, v20.2S // .............................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................... + add x28, x23, x27, lsr #25 // .............................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................... + shl v14.2S, v28.2S, #1 // ..............................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................... + shl v3.2S, v16.2S, #1 // ..............................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................... + umlal v21.2D, v5.2S, v22.2S // ...............................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................... + add x25, x22, x28, lsr #26 // ...............................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................... + umlal v13.2D, v28.2S, v16.2S // ................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................... + and x16, x28, #0x3ffffff // ................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................... + umull v17.2D, v17.2S, v17.2S // .................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................. + add x29, x18, x25, lsr #25 // .................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................. + usra v9.2D, v6.2D, #26 // ..................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................. + and x22, x27, #0x1ffffff // ..................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................. + umlal v24.2D, v12.2S, v23.2S // ...................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................ + add x19, x14, x29, lsr #26 // ...................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................ + umlal v13.2D, v2.2S, v2.2S // ....................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................... + and x14, x29, #0x3ffffff // ....................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................... + umlal v10.2D, v18.2S, v26.2S // .....................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................. + bfi x14, x19, #32, #26 // .....................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................. + bic v28.16B, v9.16B, v15.16B // ......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................. + bfi x16, x25, #32, #25 // ......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................. + umlal v11.2D, v5.2S, v0.2S // .......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................ + sub x24, x12, x14 // .......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................ + usra v17.2D, v28.2D, #25 // ........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................... + lsr x10, x24, #32 // ........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................... + umlal v13.2D, v8.2S, v27.2S // .........................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................... + bfi x9, x27, #32, #25 // .........................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................... + umlal v10.2D, v12.2S, v3.2S // ..........................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................... + sub x27, x5, x16 // ..........................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................... + usra v17.2D, v28.2D, #24 // ...........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................ + umaddl x21, w10, w4, x19 // ...........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................ + umlal v19.2D, v12.2S, v4.2S // ............................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................... + and x6, x25, #0x1ffffff // ............................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................... + umlal v19.2D, v5.2S, v16.2S // .............................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................... + sub x29, x20, x9 // .............................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................... + umlal v19.2D, v8.2S, v20.2S // ..............................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................... + add x5, sp, #STACK_X_0 // ..............................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................... + umlal v19.2D, v18.2S, v0.2S // ...............................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................... + and x0, x3, #0x3ffffff // ...............................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................... + umlal v11.2D, v8.2S, v1.2S // ................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................... + lsr x20, x29, #32 // ................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................... + usra v17.2D, v28.2D, #21 // .................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................. + mov w28, w9 // .................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................. + umlal v29.2D, v2.2S, v16.2S // ..................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................. + stp x9, x16, [sp, #STACK_B_16] // ..................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................. // @slothy:writes=['B16', 'B24'] + and v16.16B, v6.16B, v30.16B // ...................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................ + lsr x23, x21, #25 // ...................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................ + umlal v17.2D, v25.2S, v0.2S // ....................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................... + and x15, x21, #0x1ffffff // ....................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................... + umlal v17.2D, v12.2S, v14.2S // .....................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................. + umaddl x18, w29, w4, x28 // .....................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................. + umlal v17.2D, v5.2S, v4.2S // ......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................. + bfi x0, x8, #32, #25 // ......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................. + umlal v17.2D, v8.2S, v3.2S // .......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................ + umaddl x22, w20, w4, x22 // .......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................ + umlal v17.2D, v18.2S, v20.2S // ........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................... + sub x25, x7, x2 // ........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................... + shl v25.2S, v0.2S, #1 // .........................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................... + shl v14.2S, v27.2S, #1 // .........................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................... + umlal v10.2D, v5.2S, v20.2S // ..........................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................... + mov w9, w2 // ..........................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................... + umlal v10.2D, v8.2S, v25.2S // ...........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................ + lsr x3, x25, #32 // ...........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................ + usra v19.2D, v17.2D, #26 // ............................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................... + stp x2, x0, [sp, #STACK_B_0] // ............................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................... // @slothy:writes=['B0', 'B8'] + umlal v13.2D, v12.2S, v25.2S // .............................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................... + umaddl x9, w25, w4, x9 // .............................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................... + umlal v29.2D, v12.2S, v1.2S // ..............................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................... + umaddl x13, w3, w4, x13 // ..............................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................... + usra v10.2D, v19.2D, #25 // ...............................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................... + sub x19, x26, x0 // ...............................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................... + umlal v21.2D, v12.2S, v14.2S // ................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................... + add x9, x9, x23 // ................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................... + umlal v13.2D, v5.2S, v1.2S // .................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................. + and x17, x8, #0x1ffffff // .................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................. + usra v11.2D, v10.2D, #26 // ..................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................. + lsr x1, x19, #32 // ..................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................. + and v2.16B, v10.16B, v30.16B // ...................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................ + mov w8, w0 // ...................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................ + umlal v29.2D, v5.2S, v27.2S // ....................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................... + umaddl x11, w19, w4, x8 // ....................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................... + usra v13.2D, v11.2D, #25 // .....................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................. + add x21, sp, #STACK_B_0 // .....................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................. + and v3.16B, v11.16B, v15.16B // ......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................. + umaddl x8, w1, w4, x17 // ......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................. + ld2 { v10.S, v11.S }[1], [x21], #8 // .......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................ // @slothy:reads=['B0'] + add x9, x9, x23, lsl #1 // .......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................ + usra v29.2D, v13.2D, #26 // ........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................... + lsr x7, x27, #32 // ........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................... + and v1.16B, v19.16B, v15.16B // .........................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................... + add x12, x9, x23, lsl #4 // .........................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................... + ld2 { v25.S, v26.S }[1], [x21], #8 // ..........................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................... // @slothy:reads=['B8'] + mov w23, w16 // ..........................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................... + ld2 { v10.S, v11.S }[0], [x5], #8 // ...........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................ // @slothy:reads=['X0'] + add x9, x13, x12, lsr #26 // ...........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................ + and v4.16B, v13.16B, v30.16B // ............................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................... + umaddl x2, w7, w4, x6 // ............................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................... + ld2 { v13.S, v14.S }[1], [x21], #8 // .............................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................... // @slothy:reads=['B16'] + add x26, x11, x9, lsr #25 // .............................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................... + ld2 { v25.S, v26.S }[0], [x5], #8 // ..............................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................... // @slothy:reads=['X8'] + add x0, sp, #STACK_A_0 // ..............................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................... + and v0.16B, v17.16B, v30.16B // ...............................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................... + add x17, x8, x26, lsr #26 // ...............................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................... + ld2 { v0.S, v1.S }[1], [x0], #8 // ................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................... // @slothy:reads=['A0'] + and x6, x9, #0x1ffffff // ................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................... + and v5.16B, v29.16B, v15.16B // .................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................. + add x18, x18, x17, lsr #25 // .................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................. + ld2 { v13.S, v14.S }[0], [x5], #8 // ..................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................. // @slothy:reads=['X16'] + and x11, x17, #0x1ffffff // ..................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................. + add x13, x22, x18, lsr #26 // ...................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................ + ld2 { v2.S, v3.S }[1], [x0], #8 // ...................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................ // @slothy:reads=['A8'] + umull v18.2D, v1.2S, v11.2S // ....................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................... + umaddl x9, w27, w4, x23 // ....................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................... + usra v21.2D, v29.2D, #25 // .....................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................. + str x14, [sp, #STACK_B_32] // .....................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................. // @slothy:writes=['B32'] + umull x8, w6, w7 // ......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................. + ld2 { v4.S, v5.S }[1], [x0], #8 // ......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................. // @slothy:reads=['A16'] + mov w23, w14 // .......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................ + add x16, x9, x13, lsr #25 // .......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................ + ld2 { v19.S, v20.S }[1], [x21], #8 // ........................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................... // @slothy:reads=['B24'] + and v9.16B, v9.16B, v15.16B // .........................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................... + umull x28, w6, w24 // .........................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................... + and x17, x13, #0x1ffffff // ..........................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................... + mul v27.2S, v14.2S, v31.2S // ..........................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................... + ld2 { v22.S, v23.S }[1], [x21], #8 // ...........................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................ // @slothy:reads=['B32'] + and x21, x16, #0x3ffffff // ...........................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................ + umaddl x22, w11, w27, x28 // ............................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................... + ld2 { v19.S, v20.S }[0], [x5], #8 // ............................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................... // @slothy:reads=['X24'] + usra v24.2D, v21.2D, #26 // .............................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................... + umull x28, w6, w20 // .............................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................... + umull v29.2D, v0.2S, v14.2S // ..............................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................... + umull x9, w6, w27 // ..............................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................... + ld2 { v22.S, v23.S }[0], [x5], #8 // ...............................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................... // @slothy:reads=['X32'] + and x5, x12, #0x3ffffff // ...............................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................... + usra v16.2D, v24.2D, #25 // ................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................... + umaddl x13, w11, w1, x28 // ................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................... + umull v12.2D, v1.2S, v20.2S // .................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................. + umaddl x28, w11, w29, x9 // .................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................. + mul v28.2S, v23.2S, v31.2S // ..................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................. + umaddl x12, w11, w20, x8 // ..................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................. + usra v9.2D, v16.2D, #26 // ...................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................ + umaddl x8, w17, w29, x22 // ...................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................ + umlal v29.2D, v2.2S, v26.2S // ....................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................... + umaddl x22, w17, w19, x28 // ....................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................... + and v8.16B, v16.16B, v30.16B // .....................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................. + umaddl x12, w17, w1, x12 // .....................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................. + umlal v18.2D, v3.2S, v28.2S // ......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................. + and x14, x26, #0x3ffffff // ......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................. + and v7.16B, v24.16B, v15.16B // .......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................ + umaddl x26, w17, w3, x13 // .......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................ + mul v15.2S, v20.2S, v31.2S // ........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................... + umaddl x9, w24, w4, x23 // ........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................... + and v6.16B, v21.16B, v30.16B // .........................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................... + add x23, x2, x16, lsr #26 // .........................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................... + umull v21.2D, v0.2S, v26.2S // ..........................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................... + and x13, x18, #0x3ffffff // ..........................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................... + umlal v29.2D, v4.2S, v11.2S // ...........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................ + add x16, x9, x23, lsr #25 // ...........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................ + umlal v18.2D, v5.2S, v15.2S // ............................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................... + and x9, x23, #0x1ffffff // ............................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................... + umaddl x4, w9, w19, x8 // .............................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................... + ld2 { v6.S, v7.S }[1], [x0], #8 // .............................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................... // @slothy:reads=['A24'] + umlal v21.2D, v2.2S, v11.2S // ..............................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................... + add x28, x15, x16, lsr #26 // ..............................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................... + umull v17.2D, v0.2S, v20.2S // ...............................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................... + umaddl x12, w9, w3, x12 // ...............................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................... + umaddl x15, w28, w25, x4 // ................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................... + ld2 { v8.S, v9.S }[1], [x0], #8 // ................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................... // @slothy:reads=['A32'] + umlal v18.2D, v7.2S, v27.2S // .................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................. + mul w0, w28, w30 // .................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................. + umull v23.2D, v0.2S, v23.2S // ..................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................. + mul w8, w9, w30 // ..................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................. + umlal v17.2D, v2.2S, v14.2S // ...................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................ + umaddl x18, w5, w10, x15 // ...................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................ + umlal v12.2D, v3.2S, v14.2S // ....................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................... + umull x15, w0, w19 // ....................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................... + umlal v29.2D, v6.2S, v28.2S // .....................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................. + umaddl x26, w8, w10, x26 // .....................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................. + umlal v23.2D, v2.2S, v20.2S // ......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................. + umaddl x28, w14, w7, x18 // ......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................. + umull v16.2D, v1.2S, v14.2S // .......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................ + umaddl x2, w9, w25, x22 // .......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................ + umlal v12.2D, v5.2S, v26.2S // ........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................... + umaddl x22, w0, w7, x26 // ........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................... + umlal v12.2D, v7.2S, v11.2S // .........................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................... + mul w23, w17, w30 // .........................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................... + umlal v12.2D, v9.2S, v28.2S // ..........................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................... + umaddl x26, w13, w20, x28 // ..........................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................... + umlal v16.2D, v3.2S, v26.2S // ...........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................ + add x28, x22, x22 // ...........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................ + umlal v16.2D, v5.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................... + umaddl x22, w5, w27, x28 // ............................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................... + umlal v16.2D, v7.2S, v28.2S // .............................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................... + umull x28, w0, w20 // .............................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................... + umlal v16.2D, v9.2S, v15.2S // ..............................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................... + umaddl x18, w21, w1, x26 // ..............................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................... + umlal v23.2D, v4.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................... + umaddl x26, w0, w24, x2 // ...............................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................... + umlal v29.2D, v8.2S, v15.2S // ................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................... + umaddl x9, w14, w29, x22 // ................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................... + shl v24.2D, v12.2D, #1 // .................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................. + umaddl x2, w0, w10, x12 // .................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................. + shl v12.2D, v16.2D, #1 // ..................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................. + umaddl x26, w5, w7, x26 // ..................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................. + umull v14.2D, v1.2S, v26.2S // ...................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................ + umaddl x9, w13, w19, x9 // ...................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................ + umlal v17.2D, v4.2S, v26.2S // ....................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................... + umaddl x12, w8, w29, x15 // ....................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................... + umlal v17.2D, v6.2S, v11.2S // .....................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................. + umlal v12.2D, v0.2S, v19.2S // ......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................. + umaddl x22, w14, w20, x26 // ......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................. + umlal v23.2D, v6.2S, v26.2S // .......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................ + umull x15, w0, w29 // .......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................ + umlal v23.2D, v8.2S, v11.2S // ........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................... + umaddl x26, w8, w7, x28 // ........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................... + umlal v23.2D, v1.2S, v22.2S // .........................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................... + umull x4, w0, w27 // .........................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................... + umlal v23.2D, v3.2S, v19.2S // ..........................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................... + mul w28, w21, w30 // ..........................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................... + umlal v14.2D, v3.2S, v11.2S // ...........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................ + umaddl x26, w23, w10, x26 // ...........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................ + umlal v14.2D, v5.2S, v28.2S // ............................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................... + umaddl x17, w17, w25, x4 // ............................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................... + umlal v14.2D, v7.2S, v15.2S // .............................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................... + and x4, x16, #0x3ffffff // .............................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................... + umlal v14.2D, v9.2S, v27.2S // ..............................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................... + add x2, x2, x2 // ..............................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................... + umlal v23.2D, v5.2S, v13.2S // ...............................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................... + umaddl x16, w8, w24, x17 // ...............................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................... + umlal v17.2D, v8.2S, v28.2S // ................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................... + umaddl x17, w5, w24, x2 // ................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................... + umlal v24.2D, v0.2S, v22.2S // .................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................. + umaddl x22, w13, w1, x22 // .................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................. + umlal v21.2D, v4.2S, v28.2S // ..................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................. + umaddl x2, w11, w19, x16 // ..................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................. + shl v20.2D, v14.2D, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................ + mul w16, w4, w30 // ...................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................ + umlal v23.2D, v7.2S, v25.2S // ....................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................... + umaddl x26, w11, w3, x26 // ....................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................... + umlal v20.2D, v0.2S, v13.2S // .....................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................. + umaddl x17, w14, w27, x17 // .....................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................. + umlal v17.2D, v1.2S, v19.2S // ......................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................. + umaddl x2, w6, w29, x2 // ......................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................. + mul v14.2S, v13.2S, v31.2S // .......................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................ + umlal v21.2D, v6.2S, v15.2S // ........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................... + umaddl x26, w6, w1, x26 // ........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................... + umlal v21.2D, v8.2S, v27.2S // .........................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................... + umaddl x2, w16, w7, x2 // .........................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................... + umlal v29.2D, v1.2S, v13.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................... + umaddl x12, w23, w27, x12 // ..........................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................... + umlal v17.2D, v3.2S, v13.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................ + add x26, x26, x26 // ...........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................ + umlal v24.2D, v2.2S, v19.2S // ............................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................... + umaddl x26, w16, w27, x26 // ............................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................... + umlal v24.2D, v4.2S, v13.2S // .............................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................... + umaddl x9, w21, w25, x9 // .............................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................... + umlal v29.2D, v3.2S, v25.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................... + umaddl x15, w8, w27, x15 // ..............................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................... + umaddl x9, w16, w24, x9 // ................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................... + mul v16.2S, v11.2S, v31.2S // ................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................... + umull v11.2D, v0.2S, v11.2S // .................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................. + umaddl x2, w28, w10, x2 // .................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................. + umlal v12.2D, v2.2S, v13.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................. + umaddl x17, w13, w29, x17 // ..................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................. + mul v13.2S, v26.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................ + umaddl x26, w28, w24, x26 // ...................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................ + umull v26.2D, v1.2S, v28.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................... + umaddl x2, w13, w3, x2 // ....................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................... + umlal v20.2D, v2.2S, v25.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................. + umaddl x17, w21, w19, x17 // .....................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................. + umlal v20.2D, v4.2S, v10.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................. + umaddl x21, w21, w3, x22 // ......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................. + umlal v18.2D, v9.2S, v13.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................ + mul w22, w13, w30 // .......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................ + umlal v26.2D, v3.2S, v15.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................... + umaddl x18, w4, w3, x18 // ........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................... + umlal v26.2D, v5.2S, v27.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................... + umaddl x26, w13, w25, x26 // .........................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................... + umlal v11.2D, v2.2S, v28.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................... + umaddl x17, w4, w25, x17 // ..........................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................... + shl v28.2D, v18.2D, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................ + umaddl x2, w14, w1, x2 // ...........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................ + umaddl x26, w14, w19, x26 // ............................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................... + mul v18.2S, v22.2S, v31.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................... + umlal v26.2D, v7.2S, v13.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................... + umaddl x4, w16, w10, x21 // .............................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................... + umlal v28.2D, v0.2S, v25.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................... + umaddl x2, w5, w20, x2 // ..............................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................... + umlal v21.2D, v1.2S, v25.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................... + umaddl x21, w5, w29, x26 // ...............................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................... + umlal v21.2D, v3.2S, v10.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................... + mul w13, w11, w30 // ................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................... + umlal v21.2D, v5.2S, v18.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................. + umaddl x26, w23, w24, x15 // .................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................. + umlal v28.2D, v2.2S, v10.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................. + add x2, x2, x21, lsr #26 // ..................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................. + umlal v20.2D, v6.2S, v18.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................ + umaddl x12, w13, w24, x12 // ...................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................ + umlal v26.2D, v9.2S, v16.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................... + add x9, x9, x2, lsr #25 // ....................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................... + umaddl x11, w11, w25, x26 // .....................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................. + mul v16.2S, v19.2S, v31.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................. + umlal v11.2D, v4.2S, v15.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................. + add x4, x4, x9, lsr #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................. + umlal v28.2D, v4.2S, v18.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................ + and x26, x9, #0x3ffffff // .......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................ + umlal v17.2D, v5.2S, v25.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................... + add x17, x17, x4, lsr #25 // ........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................... + umlal v21.2D, v7.2S, v16.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................... + bfi x26, x4, #32, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................... + umlal v21.2D, v9.2S, v14.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................... + add x9, x18, x17, lsr #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................... + umlal v28.2D, v6.2S, v16.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................ + umaddl x4, w6, w19, x11 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................ + umlal v28.2D, v8.2S, v14.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................... + umull x15, w0, w1 // ............................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................... + umlal v29.2D, v5.2S, v10.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................... + bic x18, x9, #0x3ffffff // .............................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................... + umlal v20.2D, v8.2S, v16.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................... + umaddl x11, w16, w20, x4 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................... + umlal v12.2D, v4.2S, v25.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................... + umaddl x4, w8, w20, x15 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................... + usra v21.2D, v28.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................... + lsr x15, x18, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................... + umlal v29.2D, v7.2S, v18.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................. + umaddl x11, w28, w7, x11 // .................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................. + umlal v29.2D, v9.2S, v16.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................. + umaddl x4, w23, w7, x4 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................. + usra v20.2D, v21.2D, #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................ + add x15, x15, x18, lsr #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................ + umlal v12.2D, v6.2S, v10.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................... + umaddl x11, w22, w10, x11 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................... + umlal v12.2D, v8.2S, v18.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................. + umaddl x4, w13, w10, x4 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................. + usra v29.2D, v20.2D, #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................. + umaddl x12, w6, w25, x12 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................. + umlal v17.2D, v7.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................ + add x15, x15, x18, lsr #22 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................ + umlal v17.2D, v9.2S, v18.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................... + umaddl x4, w6, w3, x4 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................... + usra v12.2D, v29.2D, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................... + umaddl x12, w16, w1, x12 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................... + umlal v23.2D, v9.2S, v10.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................... + umaddl x15, w0, w3, x15 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................... + umlal v24.2D, v6.2S, v25.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................ + add x4, x4, x4 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................ + usra v17.2D, v12.2D, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................... + umaddl x0, w28, w20, x12 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................... + umlal v11.2D, v6.2S, v27.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................... + umaddl x15, w8, w1, x15 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................... + ushr v19.2D, v30.2D, #1 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................... + mul w12, w14, w30 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................... + and v22.16B, v12.16B, v30.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................... + umaddl x18, w22, w7, x0 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................... + and v21.16B, v21.16B, v19.16B // ................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................... + umaddl x8, w23, w20, x15 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................... + umlal v24.2D, v8.2S, v10.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................. + ldr x15, [sp, #STACK_CTR] // .................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................. // @slothy:reads=['ctr', 'lastbit'] + umlal v11.2D, v8.2S, v13.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................. + umaddl x18, w12, w10, x18 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................. + and v27.16B, v20.16B, v30.16B // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................ + umaddl x7, w13, w7, x8 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................ + and v12.16B, v29.16B, v19.16B // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................... + and x8, x17, #0x3ffffff // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................... + usra v24.2D, v17.2D, #25 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................. + subs w0, w15, #1 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................. + trn1 v15.4S, v27.4S, v12.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................. + and w17, w0, #0x1f // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................. + and v27.16B, v28.16B, v30.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................ + umaddl x20, w14, w3, x11 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................ + usra v23.2D, v24.2D, #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................... + umaddl x13, w16, w29, x4 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................... + umlal v11.2D, v1.2S, v10.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................... + mul w4, w6, w30 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................... + shl v26.2D, v26.2D, #1 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................... + umaddl x11, w5, w1, x20 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................... + umaddl x13, w28, w27, x13 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................ + mul v12.2S, v25.2S, v31.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................ + umlal v26.2D, v0.2S, v10.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................... + umaddl x4, w4, w10, x7 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................... + umlal v11.2D, v3.2S, v18.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................... + umaddl x6, w5, w3, x18 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................... + umlal v11.2D, v5.2S, v16.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................... + umaddl x20, w22, w24, x13 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................... + umlal v11.2D, v7.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................... + add x4, x4, x4 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................... + umlal v26.2D, v2.2S, v18.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................... + umaddl x13, w16, w19, x4 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................... + umlal v26.2D, v4.2S, v16.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................. + umaddl x4, w14, w25, x20 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................. + umlal v26.2D, v6.2S, v14.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................. + bfi x8, x9, #32, #26 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................. + umlal v26.2D, v8.2S, v12.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................ + umaddl x9, w28, w29, x13 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................ + umlal v11.2D, v9.2S, v12.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................... + umaddl x23, w5, w19, x4 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................... + bic v25.16B, v23.16B, v19.16B // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................. + and x29, x2, #0x1ffffff // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................. + trn1 v2.4S, v2.4S, v3.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................. + umaddl x9, w22, w27, x9 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................. + usra v26.2D, v25.2D, #25 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................ + and x22, x21, #0x3ffffff // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................ + ldr d29, [sp, #STACK_MASK1] // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................... // @slothy:reads=mask1 + mov v12.d[0], v2.d[1] // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................... + trn1 v6.4S, v6.4S, v7.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................... + umaddl x9, w12, w24, x9 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................... + usra v26.2D, v25.2D, #24 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................... + add x4, sp, #STACK_SCALAR // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................... + trn1 v8.4S, v8.4S, v9.4S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................ + asr w28, w0, #5 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................ + trn1 v4.4S, v4.4S, v5.4S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................... + umaddl x9, w5, w25, x9 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................... + usra v26.2D, v25.2D, #21 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................... + ldr w20, [x4, w28, SXTW #2] // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................... + mov v14.d[0], v4.d[1] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................... + ldr d28, [sp, #STACK_MASK2] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................... // @slothy:reads=mask2 + and v20.16B, v17.16B, v19.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................... + add x13, x6, x9, lsr #26 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................... + usra v11.2D, v26.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................... + lsr x27, x15, #32 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................... + mov v7.d[0], x26 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................. + mov v18.d[0], v8.d[1] // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................. + and v23.16B, v23.16B, v19.16B // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................. + add x28, x23, x13, lsr #25 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................. + usra v27.2D, v11.2D, #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................ + and x25, x9, #0x3ffffff // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................ + and v17.16B, v11.16B, v19.16B // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............... + add x9, x11, x28, lsr #26 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............... + and v13.16B, v24.16B, v30.16B // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............. + and x4, x28, #0x3ffffff // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............. + usra v21.2D, v27.2D, #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............. + add x28, x22, x9, lsr #25 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............. + and v25.16B, v27.16B, v30.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............ + bfi x4, x9, #32, #25 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............ + trn1 v19.4S, v13.4S, v23.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........... + add x9, x29, x28, lsr #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........... + trn1 v13.4S, v25.4S, v21.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......... + and x28, x28, #0x3ffffff // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......... + and v21.16B, v26.16B, v30.16B // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......... + bfi x28, x9, #32, #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......... + mov v3.d[0], x4 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........ + mov v2.d[0], v13.d[1] // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........ + trn1 v11.4S, v21.4S, v17.4S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....... + lsr w9, w20, w17 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....... + trn1 v0.4S, v0.4S, v1.4S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...... + bfi x25, x13, #32, #25 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...... + mov v9.d[0], x8 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..... + mov v4.d[0], v15.d[1] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..... + trn1 v17.4S, v22.4S, v20.4S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.... + stp w0, w9, [sp, #STACK_CTR] // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.... // @slothy:writes=['ctr', 'lastbit'] + mov v8.d[0], v19.d[1] // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*... + eor w1, w9, w27 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*... + mov v1.d[0], x25 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.. + mov v5.d[0], x28 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.. + mov v16.d[0], v6.d[1] // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*. + mov v6.d[0], v17.d[1] // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*. + mov v10.d[0], v0.d[1] // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................* + mov v0.d[0], v11.d[1] // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................* + + // ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- cycle (expected) -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------> + // 0 25 50 75 100 125 150 175 200 225 250 275 300 325 350 375 400 425 450 475 + // |------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|------------------------|-------- + // sub v21.2S, v28.2S, v11.2S // ..*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v23.2S, v14.2S, v15.2S // *................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v11.2S, v10.2S, v11.2S // .*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // sub v25.2S, v29.2S, v3.2S // ...*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add v20.2S, v10.2S, v21.2S // ....*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v22.2S, v4.2S, v5.2S // *................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v28.2S, v28.2S, v1.2S // ......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // tst w1, #1 // .*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v10.2S, v0.2S, v1.2S // ..*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v1.2S, v2.2S, v25.2S // ......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // fcsel d27, d22, d23, eq // ...*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v21.2S, v20.2S, v11.2S // ............................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v25.2S, v2.2S, v3.2S // .........*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v3.2S, v29.2S, v9.2S // ........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel d2, d10, d11, eq // .....*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v26.2S, v16.2S, v17.2S // .............*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v0.2S, v0.2S, v28.2S // ...............*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v3.2S, v8.2S, v3.2S // ..........*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov x12, v2.d[0] // .......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mov x23, v27.d[0] // .....*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // sub v24.2S, v29.2S, v17.2S // ................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v2.2S, v8.2S, v9.2S // ...........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // sub v8.2S, v29.2S, v5.2S // ..................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // sub v28.2S, v29.2S, v19.2S // ...............*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v19.2S, v18.2S, v19.2S // .................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v27.2S, v6.2S, v7.2S // .......*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // add v28.2S, v18.2S, v28.2S // ..................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v18.2S, v4.2S, v8.2S // ....................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v9.2S, v29.2S, v7.2S // ....*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v7.2S, v12.2S, v13.2S // ........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel d8, d27, d26, eq // ................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v15.2S, v29.2S, v15.2S // ..............*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v5.2S, v2.2S, v3.2S // ........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // sub v13.2S, v29.2S, v13.2S // ...........*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v4.2S, v2.2S, v3.2S // ....................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov x17, v8.d[0] // ...................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // fcsel d29, d25, d7, eq // ............*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v17.2S, v12.2S, v13.2S // .....................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // fcsel d8, d3, d28, eq // ........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x20, w23, w23 // ........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov x0, v29.d[0] // ..............*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v11.2S, v20.2S, v11.2S // ...........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // fcsel d13, d0, d20, eq // .......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn1 v3.2S, v17.2S, v7.2S // .........................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel d12, d1, d17, eq // ...........................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v20.2S, v17.2S, v7.2S // ....................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel d2, d2, d19, eq // ......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // str d8, [sp, #STACK_B_32] // ..........................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x16, x23, x23 // ...................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v8.2S, v28.2S, v19.2S // ..............................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v29.2S, v28.2S, v19.2S // .........................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v28.2S, v16.2S, v24.2S // .....................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // stp d13, d12, [sp, #STACK_B_0] // ...............................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mov x3, v2.d[0] // ....................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add v24.2S, v14.2S, v15.2S // ...................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v13.2S, v25.2S, v1.2S // .......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v14.2S, v10.2S, v0.2S // ......................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // trn1 v19.2S, v10.2S, v0.2S // .....................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add v12.2S, v6.2S, v9.2S // ..........*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v2.2S, v25.2S, v1.2S // .............................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v25.2D, v14.2S, v21.2S // ........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x29, x12, #32 // ............*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x28, x23, #32 // .........*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v15.2S, v8.2S, v31.2S // ..........................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x11, x29, x29 // ...................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn2 v7.2S, v22.2S, v18.2S // ..................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull v1.2D, v19.2S, v8.2S // ....................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x21, w12, w16 // .............................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn2 v8.2S, v28.2S, v26.2S // ............................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v0.2S, v22.2S, v18.2S // ..................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mul w13, w28, w30 // ............................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x15, x3, #32 // ......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // fcsel d6, d12, d28, eq // ..............................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x10, x3, x3 // ...............................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // fcsel d22, d18, d24, eq // ...............................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x25, x15, x15 // ..........................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x14, x28, x28 // .................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // mul w22, w15, w30 // .........................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v10.2D, v19.2S, v8.2S // ...........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // lsr x5, x0, #32 // .....................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................. + // trn2 v9.2S, v24.2S, v23.2S // ..........................*......................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v26.2S, v28.2S, v26.2S // .............................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // trn1 v23.2S, v24.2S, v23.2S // ................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // stp d22, d6, [sp, #STACK_B_16] // .................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull v17.2D, v14.2S, v8.2S // .......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x2, w12, w11 // .....................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v25.2D, v13.2S, v15.2S // .............................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................... + // lsr x9, x17, #32 // .............................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v18.2S, v8.2S, v31.2S // ...................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x28, w22, w25, x20 // .............................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v28.2D, v19.2S, v9.2S // ........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x4, w12, w12 // .............*...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x1, x9, x9 // ................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w26, w9, w30 // ..................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v17.2D, v13.2S, v9.2S // ............................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x9, w23, w14 // .......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v17.2D, v7.2S, v20.2S // .............................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x18, w12, w14 // ...................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................ + // trn1 v16.2S, v27.2S, v12.2S // .................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // trn2 v6.2S, v27.2S, v12.2S // ................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v1.2D, v2.2S, v8.2S // ..........................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x6, w13, w14, x4 // ....................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v10.2D, v2.2S, v9.2S // ..................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x23, x17, x17 // ......................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v17.2D, v6.2S, v21.2S // ..............................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w12, w10, x28 // ........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v2.2S, v20.2S // ..................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x9, w12, w25, x9 // ..............................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v0.2S, v21.2S // ......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x8, x5, x5 // .............................................................*...................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v16.2S, v15.2S // .................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w11, w1, x28 // ............................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v8.2S, v9.2S, v31.2S // .......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x27, w29, w10, x9 // .................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v1.2D, v0.2S, v9.2S // ...........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................ + // mul w20, w17, w30 // ...........................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umull v12.2D, v14.2S, v9.2S // .........................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w0, w23, x28 // .................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v28.2D, v5.2S, v18.2S // ...........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x24, w0, w1, x27 // ...........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v10.2D, v0.2S, v20.2S // ........................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w20, w14, x2 // .........................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v10.2D, v16.2S, v21.2S // .........................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x25, w8, w14, x28 // .......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v1.2D, v16.2S, v20.2S // ................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w5, w23, x24 // ................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v1.2D, v5.2S, v21.2S // .................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w26, w16, x4 // ............................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v12.2D, v13.2S, v20.2S // ..............................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul w15, w3, w30 // ...........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v12.2D, v7.2S, v21.2S // ...............................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x27, x9, x25, lsr #26 // ............................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v12.2D, v6.2S, v15.2S // ................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x2, x0, x0 // ......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v12.2D, v4.2S, v18.2S // .................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x6, x6, x27, lsr #25 // ..............................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v10.2D, v5.2S, v15.2S // .........................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................... + // bic x4, x27, #0x1ffffff // ...............................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v1.2D, v14.2S, v29.2S // ......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................. + // add x9, x6, x4, lsr #24 // ................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v17.2D, v4.2S, v15.2S // ...............................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w15, w8, x28 // ...............................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v9.2D, v19.2S, v20.2S // ............................................*....................................................................................................................................................................................................................................................................................................................................................................................................................................................... + // add x9, x9, x4, lsr #21 // ......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v10.2D, v14.2S, v26.2S // ....................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x13, w29, w16, x18 // ......................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v10.2D, v13.2S, v23.2S // ...........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x9, w20, w16, x9 // .......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v1.2D, v13.2S, v26.2S // .......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................ + // umull x19, w12, w2 // .......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................ + // shl v17.2D, v17.2D, #1 // .....................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x7, x8, x8 // ........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v9.2D, v2.2S, v21.2S // ...................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x9, w26, w7, x9 // ..........................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v9.2D, v0.2S, v15.2S // ..........................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w29, w11, x19 // ..........................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v9.2D, v16.2S, v18.2S // ..........................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................... + // and x24, x25, #0x3ffffff // ...................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v9.2D, v5.2S, v8.2S // ..............................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................... + // umull x25, w12, w8 // .............................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v27.2D, v14.2S, v20.2S // ....................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x18, w17, w20, x4 // ...............................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v1.2D, v7.2S, v23.2S // ...............................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w15, w2, x9 // ..............................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................... + // shl v24.2D, v12.2D, #1 // .....................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................................. + // add x9, x11, x11 // ............................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................... + // umull v12.2D, v14.2S, v15.2S // .......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x25, w29, w2, x25 // .................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v27.2D, v13.2S, v21.2S // ..........................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x19, w22, w9, x4 // .................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v27.2D, v7.2S, v15.2S // ....................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w11, w8, x21 // ................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v27.2D, v6.2S, v18.2S // ........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................... + // umull x6, w12, w1 // ..................................................*................................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v17.2D, v19.2S, v29.2S // ..................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull x9, w12, w23 // ..............................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v17.2D, v2.2S, v26.2S // ...................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................ + // add x21, x14, x14 // ....................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x25, w26, w23, x25 // ....................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................... + // mul v22.2S, v26.2S, v31.2S // ...........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v24.2D, v19.2S, v26.2S // ........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x6, w29, w23, x6 // ......................................................*............................................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v24.2D, v2.2S, v23.2S // ..................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x20, w11, w14, x9 // .....................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................................. + // umull v26.2D, v19.2S, v21.2S // .........................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x9, w15, w14, x25 // ........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v27.2D, v4.2S, v8.2S // ..................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x29, w22, w2, x28 // ..................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v17.2D, v0.2S, v23.2S // ................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x25, w0, w16, x20 // ..........................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v10.2D, v7.2S, v3.2S // ................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x20, w0, w8, x13 // ...................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v26.2D, v2.2S, v15.2S // .............................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w0, w0, x4 // .....................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................. + // shl v15.2D, v27.2D, #1 // .......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x25, w5, w8, x25 // ..............................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v25.2D, v7.2S, v18.2S // .....................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................. + // and x8, x27, #0x1ffffff // .........................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................... + // umlal v25.2D, v6.2S, v8.2S // .........................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................... + // ldr x17, [sp, #STACK_B_16] // ......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v26.2D, v0.2S, v18.2S // ...............................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x13, w0, w14, x6 // .........................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v26.2D, v16.2S, v8.2S // ....................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w26, w21, x18 // ........................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v20.2S, v20.2S, v31.2S // .....................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................................. + // ldr x2, [sp, #STACK_B_0] // .......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v12.2D, v13.2S, v18.2S // ..............................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x3, w3, w15, x25 // .........................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v12.2D, v7.2S, v8.2S // ......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x18, w26, w1, x4 // ..........................................................................................*......................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v21.2S, v21.2S, v31.2S // ...........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x4, w15, w16, x28 // .................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v25.2D, v4.2S, v20.2S // .............................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x28, w5, w16, x13 // ..............................................................................................*..................................................................................................................................................................................................................................................................................................................................................................................................... + // mul v27.2S, v29.2S, v31.2S // ........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................... + // add x27, x1, x1 // ................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v12.2D, v6.2S, v20.2S // ...............................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x11, w22, w7, x4 // ...........................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v24.2D, v0.2S, v3.2S // ......................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x7, w22, w10, x28 // ..................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................. + // shl v8.2D, v25.2D, #1 // .................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................. + // umaddl x0, w22, w27, x3 // ..................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................. + // add x14, x29, x19, lsr #26 // ....................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x6, w15, w1, x20 // .......................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v28.2D, v14.2S, v23.2S // ............................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................................... + // umull x25, w17, w17 // ...............................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x4, w15, w23, x18 // ...................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................ + // add x13, x11, x14, lsr #25 // ......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................. + // ldr x20, [sp, #STACK_B_8] // ....................................................................................*............................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v26.2D, v5.2S, v20.2S // .....................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................. + // ldr x28, [sp, #STACK_B_32] // .....................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................................. + // umlal v10.2D, v6.2S, v11.2S // ....................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................... + // lsr x12, x17, #32 // .....................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................................. + // umlal v1.2D, v6.2S, v3.2S // ...................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x10, w22, w16, x9 // ............................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v12.2D, v4.2S, v21.2S // .....................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................. + // lsr x9, x20, #32 // ...........................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................................ + // umlal v26.2D, v14.2S, v11.2S // ............................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................... + // umaddl x18, w22, w21, x4 // ................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................... + // add x26, x12, x12 // .......................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................................ + // lsr x16, x2, #32 // .............................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v10.2D, v4.2S, v27.2S // ...........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................ + // umull x29, w17, w26 // ...........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................ + // umlal v1.2D, v4.2S, v11.2S // ................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................... + // and x5, x13, #0x3ffffff // .......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................ + // umlal v15.2D, v19.2S, v23.2S // ......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................. + // umaddl x15, w22, w23, x6 // ...............................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v17.2D, v16.2S, v3.2S // .................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................................................. + // add x23, x17, x17 // ............................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v9.2D, v14.2S, v3.2S // .............................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................... + // add x22, x10, x13, lsr #26 // ..........................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................... + // mul v25.2S, v3.2S, v31.2S // ..................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................. + // lsr x27, x28, #32 // .........................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................................... + // umlal v15.2D, v2.2S, v3.2S // .......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................ + // add x10, x18, x22, lsr #25 // ............................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v19.2S, v3.2S // ...................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................................ + // add x3, x16, x16 // .............................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................... + // umlal v28.2D, v13.2S, v3.2S // .........................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................................... + // add x6, x15, x10, lsr #26 // ..............................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................... + // add x11, x27, x27 // .............................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................................... + // mul w17, w27, w30 // ...............................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................... + // add x15, x0, x6, lsr #25 // ................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................... + // mul v18.2S, v23.2S, v31.2S // ..........................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................... + // and x1, x10, #0x3ffffff // .................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................. + // add x4, x28, x28 // ........................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................................... + // umlal v17.2D, v5.2S, v11.2S // ...................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................ + // umaddl x25, w17, w11, x25 // ....................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................... + // bfi x1, x6, #32, #25 // ..........................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................... + // add x27, x7, x15, lsr #26 // .........................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................... + // umlal v24.2D, v16.2S, v11.2S // ..........................................................................................................................*......................................................................................................................................................................................................................................................................................................................................................................... + // and x18, x15, #0x3ffffff // ......................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................................. + // add x6, x24, x27, lsr #25 // .........................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................... + // umaddl x25, w2, w4, x25 // ...............................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................... + // umlal v9.2D, v13.2S, v11.2S // .................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................................. + // and x24, x19, #0x3ffffff // ...................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................ + // umlal v28.2D, v7.2S, v11.2S // ............................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................... + // bfi x18, x27, #32, #25 // .............................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v2.2S, v11.2S // .......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................ + // add x13, x20, x20 // ..................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................. + // umlal v8.2D, v0.2S, v27.2S // ........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................... + // bfi x24, x14, #32, #25 // ...........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................ + // umlal v9.2D, v7.2S, v27.2S // ..................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................................. + // bfi x5, x22, #32, #25 // ............................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................................... + // umlal v9.2D, v6.2S, v22.2S // .........................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................................... + // ldr x27, [sp, #STACK_B_24] // ..............................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................... + // umlal v9.2D, v4.2S, v18.2S // ....................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................... + // umaddl x10, w2, w11, x29 // ............................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................... + // umlal v8.2D, v16.2S, v22.2S // .........................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................... + // stp x1, x18, [sp, #STACK_A_16] // .................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................. + // umlal v8.2D, v5.2S, v18.2S // ..........................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................... + // stp x24, x5, [sp, #STACK_A_0] // ....................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................... + // umlal v15.2D, v0.2S, v11.2S // ........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................... + // add x24, x27, x27 // .........................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................................... + // umlal v15.2D, v16.2S, v27.2S // ..............................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................... + // lsr x15, x27, #32 // ..................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................. + // umlal v15.2D, v5.2S, v22.2S // ...................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................ + // mul w14, w28, w30 // ........................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................................... + // usra v9.2D, v8.2D, #26 // ..............................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................... + // add x7, x15, x15 // .....................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................. + // umlal v28.2D, v6.2S, v27.2S // ................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................... + // umaddl x0, w3, w7, x25 // ...................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................ + // umlal v28.2D, v4.2S, v22.2S // .................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................. + // mul w19, w15, w30 // ......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................. + // usra v15.2D, v9.2D, #25 // .................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................. + // umull x25, w2, w24 // ...........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................ + // umlal v24.2D, v5.2S, v27.2S // ..............................................................................................................................*..................................................................................................................................................................................................................................................................................................................................................................... + // umull x18, w2, w7 // ............................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................... + // umlal v26.2D, v13.2S, v27.2S // .............................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................................... + // mul w22, w27, w30 // ...................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................ + // usra v28.2D, v15.2D, #26 // ......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................. + // umaddl x25, w3, w26, x25 // ..............................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................................... + // shl v23.2D, v12.2D, #1 // ...............................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................................... + // umull x1, w2, w13 // ...............................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................... + // and v12.16B, v15.16B, v30.16B // ..........................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................... + // ldr x5, [sp, #STACK_A_24] // ...................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................ + // usra v24.2D, v28.2D, #25 // .........................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................................... + // umaddl x29, w16, w24, x18 // ...............................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................... + // ushr v15.2D, v30.2D, #1 // ...........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................ + // umaddl x11, w20, w23, x25 // .................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................. + // umlal v23.2D, v19.2S, v11.2S // .....................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................. + // umaddl x15, w16, w3, x1 // ......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................. + // umlal v23.2D, v2.2S, v27.2S // ............................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................................... + // add x1, x9, x9 // .....................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................................................. + // umlal v23.2D, v0.2S, v22.2S // .............................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................................... + // umaddl x25, w9, w1, x11 // ....................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................... + // umlal v23.2D, v16.2S, v18.2S // .......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................ + // mul w11, w12, w30 // ................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................................... + // and v16.16B, v8.16B, v30.16B // ...............................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................................... + // umaddl x21, w27, w22, x15 // ..............................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................... + // and v19.16B, v28.16B, v15.16B // ..............................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................................... + // umaddl x27, w28, w14, x25 // .......................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................ + // usra v10.2D, v24.2D, #26 // ............................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................................... + // umull x15, w2, w2 // .......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................ + // uzp1 v12.4S, v12.4S, v19.4S // .................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................. + // umaddl x10, w16, w4, x10 // .................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................. + // and v24.16B, v24.16B, v30.16B // .............................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................... + // umaddl x29, w20, w26, x29 // ..................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................. + // and v21.16B, v10.16B, v15.16B // ................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................... + // umull x28, w2, w23 // ........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................... + // usra v17.2D, v10.2D, #25 // ...............................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................................... + // umull x25, w2, w26 // ........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................... + // uzp1 v11.4S, v24.4S, v21.4S // ...................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................ + // umaddl x18, w11, w26, x15 // ..........................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................................... + // umlal v26.2D, v7.2S, v22.2S // ......................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................................. + // umaddl x11, w3, w1, x28 // ...........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................ + // usra v1.2D, v17.2D, #26 // ..................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................. + // umaddl x28, w16, w23, x25 // ................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................... + // umlal v23.2D, v5.2S, v25.2S // ........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................... + // umaddl x25, w20, w7, x10 // .....................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................. + // uzp1 v10.4S, v12.4S, v11.4S // ..........................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................... + // umaddl x12, w20, w20, x11 // ................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................... + // bic v21.16B, v1.16B, v15.16B // ......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................. + // umull x10, w2, w1 // .............................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................................... + // umlal v26.2D, v6.2S, v18.2S // ...........................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................................ + // umaddl x25, w9, w24, x25 // ......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................. + // usra v23.2D, v21.2D, #25 // ........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................... + // umaddl x11, w19, w7, x12 // ...................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................ + // and v5.16B, v1.16B, v15.16B // .....................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................. + // umull x2, w2, w3 // ..........................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................... + // and v24.16B, v17.16B, v30.16B // ....................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................... + // umaddl x12, w16, w13, x10 // ....................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................... + // usra v23.2D, v21.2D, #24 // ...........................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................ + // umaddl x15, w20, w1, x28 // .....................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................. + // uzp2 v20.4S, v12.4S, v11.4S // .............................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................... + // umaddl x29, w9, w23, x29 // ..........................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................... + // umlal v26.2D, v4.2S, v25.2S // ................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................................... + // umaddl x10, w22, w26, x2 // ..............................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................... + // usra v23.2D, v21.2D, #21 // ..............................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................................... + // umaddl x16, w20, w24, x0 // .......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................ + // and v19.16B, v9.16B, v15.16B // .........................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................... + // umaddl x28, w14, w7, x15 // ........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................... + // trn1 v18.4S, v24.4S, v5.4S // .......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................ + // umaddl x9, w19, w23, x10 // .................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................. + // usra v26.2D, v23.2D, #26 // .................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................. + // add x0, x8, x6, lsr #26 // ...........................................................................................................................................................................*........................................................................................................................................................................................................................................................................................................................ + // ld1r {v17.2D}, [sp] // .....................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................................................. + // add x8, x26, x26 // ..........................................................................................................................................................................*......................................................................................................................................................................................................................................................................................................................... + // and v29.16B, v23.16B, v30.16B // ...................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................ + // umaddl x15, w19, w8, x21 // .......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................ + // usra v16.2D, v26.2D, #25 // ....................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................... + // and x6, x6, #0x3ffffff // ................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................................... + // mov v5.d[0], v18.d[1] // ...............................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................... + // add v27.2S, v18.2S, v17.2S // ...............................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................................... + // and v4.16B, v26.16B, v15.16B // ......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................. + // umaddl x10, w14, w23, x15 // ............................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................... + // add v22.2S, v18.2S, v5.2S // ..................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................. + // sub v27.2S, v27.2S, v5.2S // ..................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................. + // add v23.4S, v10.4S, v17.4S // ............................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................................... + // umaddl x20, w19, w24, x12 // ........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................... + // umaddl x15, w14, w24, x11 // ............................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................... + // ldr b5, [sp, #STACK_MASK2] // ....................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................................... + // zip2 v11.2S, v27.2S, v22.2S // .....................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................. + // zip1 v26.2S, v27.2S, v22.2S // ............................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................................... + // usra v19.2D, v16.2D, #26 // .......................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................ + // umaddl x12, w1, w26, x16 // ..........................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................... + // mov v17.b[0], v5.b[0] // .....................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................................. + // shl v3.2S, v11.2S, #1 // .....................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................. + // and v27.16B, v16.16B, v30.16B // ........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................... + // umaddl x16, w14, w1, x9 // .........................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................... + // uzp1 v4.4S, v29.4S, v4.4S // .........................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................... + // ldr x21, =0x07fffffe07fffffc // .............................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................... + // uzp1 v5.4S, v27.4S, v19.4S // ..........................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................................... + // bfi x6, x0, #32, #26 // ....................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................... + // add v27.4S, v10.4S, v20.4S // ...........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................ + // umaddl x11, w17, w8, x15 // ................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................... + // sub v16.4S, v23.4S, v20.4S // ................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................................... + // str x6, [sp, #STACK_A_32] // ......................................................................................................................................................................................*............................................................................................................................................................................................................................................................................................................. + // uzp1 v25.4S, v4.4S, v5.4S // .............................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................... + // add x8, x25, x12, lsr #26 // ........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................... + // uzp2 v28.4S, v4.4S, v5.4S // ..............................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................... + // umaddl x2, w17, w13, x16 // ...............................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................... + // zip2 v22.4S, v16.4S, v27.4S // ................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................... + // umaddl x9, w14, w26, x20 // ...........................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................................ + // add v18.4S, v25.4S, v17.4S // ...............................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................... + // add x20, x18, x8, lsr #25 // ..........................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................... + // add v25.4S, v25.4S, v28.4S // .................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................. + // bic x18, x8, #0x1ffffff // ..............................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................... + // zip1 v29.4S, v16.4S, v27.4S // ...................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................ + // add x16, x20, x18, lsr #24 // ................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................... + // sub v21.4S, v18.4S, v28.4S // ..................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................. + // add x0, x3, x3 // .........................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................................................... + // add x15, x16, x18, lsr #21 // ..................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................. + // mul v12.2S, v11.2S, v31.2S // ....................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................... + // mov v6.d[0], v22.d[1] // .....................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................................. + // mov v7.d[0], v29.d[1] // ...........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................ + // zip1 v17.4S, v21.4S, v25.4S // .......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................ + // umaddl x25, w22, w23, x15 // .....................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................. + // shl v20.2S, v29.2S, #1 // .............................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................... + // shl v8.2S, v7.2S, #1 // ....................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................... + // mul v5.2S, v26.2S, v31.2S // ............................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................... + // add x15, x1, x1 // .................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................. + // mov v14.d[0], v17.d[1] // ...........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................ + // shl v1.2S, v22.2S, #1 // .........................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................... + // zip2 v16.4S, v21.4S, v25.4S // ......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................. + // umaddl x25, w19, w15, x25 // .........................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................... + // shl v0.2S, v14.2S, #1 // ......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................. + // shl v27.2S, v6.2S, #1 // ........................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................................... + // mov v13.d[0], v16.d[1] // ....................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................... + // shl v23.2S, v26.2S, #1 // .............................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................................... + // ldr x19, [sp, #STACK_A_8] // ...............................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................... + // mul v7.2S, v7.2S, v31.2S // .................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................. + // shl v4.2S, v16.2S, #1 // .........................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................................... + // shl v28.2S, v13.2S, #1 // ......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................. + // mul v2.2S, v22.2S, v31.2S // ............................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................... + // umaddl x18, w14, w13, x25 // ............................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................... + // umull v21.2D, v17.2S, v1.2S // .....................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................................. + // ldr x25, [sp, #STACK_A_16] // .................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................................. + // umull v9.2D, v17.2S, v3.2S // ...............................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................... + // add x3, x7, x7 // .............................................................................................................................................................................................*...................................................................................................................................................................................................................................................................................................... + // umull v10.2D, v17.2S, v4.2S // .............................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................... + // umaddl x22, w17, w23, x9 // .......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................ + // umull v11.2D, v17.2S, v28.2S // ..............................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................... + // ldr x23, [sp, #STACK_A_32] // ..............................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................................... + // umull v24.2D, v17.2S, v27.2S // ..........................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................................... + // add x20, x25, x21 // ......................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................. + // umull v19.2D, v17.2S, v0.2S // ...............................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................... + // umaddl x25, w17, w0, x18 // ................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................... + // umull v18.2D, v17.2S, v17.2S // .................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................. + // ldr x9, [sp, #STACK_A_0] // ...................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................ + // umull v25.2D, v17.2S, v23.2S // ..................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................. + // umaddl x6, w17, w15, x10 // .................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................. + // umlal v9.2D, v14.2S, v23.2S // ...................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................ + // umaddl x0, w17, w3, x27 // ...........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................ + // umlal v9.2D, v16.2S, v27.2S // .......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................ + // and x14, x12, #0x3ffffff // ........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................... + // umlal v9.2D, v13.2S, v1.2S // ................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................... + // and x7, x25, #0x3ffffff // ....................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................... + // umlal v9.2D, v29.2S, v8.2S // .......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................ + // add x3, x2, x25, lsr #26 // ...................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................ + // umlal v11.2D, v14.2S, v4.2S // .....................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................. + // add x13, x19, x21 // ....................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................................... + // umlal v24.2D, v14.2S, v1.2S // ................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................... + // add x25, x6, x3, lsr #25 // .....................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................................. + // umlal v24.2D, v16.2S, v8.2S // ....................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................... + // bfi x7, x3, #32, #25 // ......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................. + // umlal v19.2D, v12.2S, v4.2S // ............................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................... + // add x12, x23, x21 // ..................................................................................................................................................................................................................*................................................................................................................................................................................................................................................................................. + // umlal v21.2D, v0.2S, v8.2S // .................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................. + // mov w18, w7 // ..........................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................... + // umlal v25.2D, v0.2S, v27.2S // ........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................... + // umaddl x15, w17, w4, x29 // .............................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................................... + // umlal v25.2D, v16.2S, v1.2S // .........................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................................... + // and x27, x25, #0x3ffffff // ...............................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................... + // umlal v25.2D, v28.2S, v8.2S // ..........................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................................... + // add x5, x5, x21 // ............................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................................... + // umlal v25.2D, v29.2S, v29.2S // ...........................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................................ + // add x6, x22, x25, lsr #26 // .......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................ + // umlal v25.2D, v12.2S, v3.2S // ............................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................................... + // ldr x4, =121666 // ........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................... + // umlal v21.2D, v16.2S, v20.2S // ..................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................. + // bfi x27, x6, #32, #25 // ......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................. + // umlal v21.2D, v28.2S, v13.2S // ...................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................ + // movk x21, #0xffb4 // ...............................................................................................................................................................................................................................*.................................................................................................................................................................................................................................................................... + // umlal v24.2D, v13.2S, v20.2S // ........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................... + // mov w10, w27 // ...................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................ + // umull v13.2D, v17.2S, v20.2S // .........................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................... + // add x22, x9, x21 // .......................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................................ + // usra v9.2D, v25.2D, #26 // ..................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................. + // sub x19, x13, x27 // ...............................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................... + // umlal v10.2D, v0.2S, v14.2S // ...........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................ + // umaddl x26, w19, w4, x10 // ....................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................... + // umlal v10.2D, v2.2S, v22.2S // .....................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................. + // sub x25, x22, x7 // ........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................... + // umlal v13.2D, v0.2S, v28.2S // ................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................... + // umaddl x21, w17, w24, x28 // ..............................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................... + // add x29, x11, x6, lsr #25 // .........................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................................... + // mul v3.2S, v6.2S, v31.2S // ..............................................................................................................................................................................................................................*..................................................................................................................................................................................................................................................................... + // umlal v21.2D, v5.2S, v26.2S // ...............................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................... + // umaddl x23, w25, w4, x18 // .............................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................... + // and v22.16B, v25.16B, v30.16B // ...................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................ + // add x28, x21, x29, lsr #26 // ...........................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................................ + // shl v29.2S, v0.2S, #1 // ..............................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................... + // shl v0.2S, v28.2S, #1 // ..............................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................................... + // umlal v13.2D, v16.2S, v16.2S // ....................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................... + // and x22, x8, #0x1ffffff // ..................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................................. + // umlal v13.2D, v3.2S, v27.2S // .........................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................... + // add x9, x0, x28, lsr #25 // .............................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................... + // bic v25.16B, v9.16B, v15.16B // ......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................. + // lsr x1, x19, #32 // ..................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................. + // umull v17.2D, v17.2S, v8.2S // ......................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................................. + // and x13, x9, #0x3ffffff // ................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................... + // usra v18.2D, v25.2D, #25 // ........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................... + // add x0, x15, x9, lsr #26 // ...............................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................... + // umlal v11.2D, v12.2S, v20.2S // .............................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................................... + // and x8, x29, #0x3ffffff // ............................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................................... + // umlal v19.2D, v5.2S, v28.2S // .............................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................... + // add x14, x14, x0, lsr #25 // .................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................................. + // usra v18.2D, v25.2D, #24 // ...........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................ + // stp x7, x27, [sp, #STACK_B_0] // ............................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................... + // umlal v17.2D, v14.2S, v20.2S // ..........................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................... + // and x21, x14, #0x3ffffff // ....................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................... + // umlal v11.2D, v5.2S, v8.2S // .......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................ + // add x27, x22, x14, lsr #26 // ...................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................ + // umlal v11.2D, v3.2S, v1.2S // ................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................... + // bfi x8, x28, #32, #25 // .........................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................................... + // usra v18.2D, v25.2D, #21 // .................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................. + // bfi x21, x27, #32, #26 // .....................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................................. + // umlal v19.2D, v3.2S, v20.2S // ..............................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................... + // sub x29, x20, x8 // .............................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................................... + // umlal v17.2D, v16.2S, v28.2S // ..................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................. + // sub x24, x12, x21 // .......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................ + // shl v16.2S, v8.2S, #1 // .........................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................... + // shl v6.2S, v27.2S, #1 // .........................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................................... + // umlal v18.2D, v7.2S, v8.2S // ....................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................... + // lsr x20, x29, #32 // ................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................... + // umlal v18.2D, v12.2S, v29.2S // .....................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................. + // mov w2, w8 // .................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................. + // umlal v18.2D, v5.2S, v4.2S // ......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................. + // and x17, x6, #0x1ffffff // .................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................. + // umlal v18.2D, v3.2S, v0.2S // .......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................ + // umaddl x11, w29, w4, x2 // .....................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................................. + // umlal v18.2D, v2.2S, v20.2S // ........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................... + // and x9, x28, #0x1ffffff // ..................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................. + // umlal v10.2D, v12.2S, v0.2S // ..........................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................... + // umaddl x22, w20, w4, x9 // .......................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................ + // umlal v10.2D, v5.2S, v20.2S // ..........................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................... + // add x12, sp, #STACK_A_0 // ..............................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................... + // umlal v10.2D, v3.2S, v16.2S // ...........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................ + // add x18, sp, #STACK_X_0 // ..............................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................................... + // umlal v19.2D, v2.2S, v8.2S // ...............................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................................... + // lsr x10, x24, #32 // ........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................... + // usra v19.2D, v18.2D, #26 // ............................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................... + // bfi x13, x0, #32, #25 // ......................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................................. + // umlal v17.2D, v12.2S, v1.2S // ..............................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................... + // add x15, sp, #STACK_B_0 // .....................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................. + // umlal v17.2D, v5.2S, v27.2S // ....................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................... + // mov w28, w13 // ..........................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................... + // usra v10.2D, v19.2D, #25 // ...............................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................... + // and x6, x0, #0x1ffffff // ............................................................................................................................................................................................................................................................................*....................................................................................................................................................................................................................... + // umlal v13.2D, v12.2S, v16.2S // .............................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................................... + // and x9, x3, #0x1ffffff // ..........................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................................... + // umlal v13.2D, v5.2S, v1.2S // .................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................................. + // umaddl x2, w10, w4, x27 // ...........................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................................ + // usra v11.2D, v10.2D, #26 // ..................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................. + // sub x27, x5, x13 // ..........................................................................................................................................................................................................................................................................*......................................................................................................................................................................................................................... + // umlal v24.2D, v12.2S, v23.2S // ...................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................................ + // lsr x7, x27, #32 // ........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................... + // and v0.16B, v18.16B, v30.16B // ...............................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................... + // stp x8, x13, [sp, #STACK_B_16] // ..................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................. + // usra v13.2D, v11.2D, #25 // .....................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................................. + // lsr x13, x2, #25 // ...................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................................ + // and v3.16B, v11.16B, v15.16B // ......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................. + // lsr x3, x25, #32 // ...........................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................................ + // umlal v21.2D, v12.2S, v6.2S // ................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................... + // add x5, x23, x13 // ................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................... + // and v2.16B, v10.16B, v30.16B // ...................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................................ + // add x5, x5, x13, lsl #1 // .......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................ + // ld2 { v10.S, v11.S }[1], [x15], #8 // .......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................ + // umaddl x9, w3, w4, x9 // ..............................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................................... + // usra v17.2D, v13.2D, #26 // ........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................... + // add x23, x5, x13, lsl #4 // .........................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................... + // and v4.16B, v13.16B, v30.16B // ............................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................... + // and x0, x2, #0x1ffffff // ....................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................................... + // ld2 { v25.S, v26.S }[1], [x15], #8 // ..........................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................................... + // add x13, x9, x23, lsr #26 // ...........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................ + // usra v21.2D, v17.2D, #25 // .....................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................. + // umaddl x9, w7, w4, x6 // ............................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................................... + // ld2 { v10.S, v11.S }[0], [x18], #8 // ...........................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................................ + // add x16, x26, x13, lsr #25 // .............................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................... + // ld2 { v13.S, v14.S }[1], [x15], #8 // .............................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................................... + // str x21, [sp, #STACK_B_32] // .....................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................................. + // usra v24.2D, v21.2D, #26 // .............................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................... + // mov w8, w21 // .......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................ + // and x6, x13, #0x1ffffff // ................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................... + // ld2 { v25.S, v26.S }[0], [x18], #8 // ..............................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................................... + // and v5.16B, v17.16B, v15.16B // .................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................. + // umaddl x17, w1, w4, x17 // ......................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................................. + // usra v22.2D, v24.2D, #25 // ................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................... + // umull x5, w6, w20 // .............................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................................... + // ld2 { v13.S, v14.S }[0], [x18], #8 // ..................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................. + // and x14, x16, #0x3ffffff // ......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................. + // and v9.16B, v9.16B, v15.16B // .........................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................... + // add x17, x17, x16, lsr #26 // ...............................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................... + // usra v9.2D, v22.2D, #26 // ...................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................ + // umull x2, w6, w7 // ......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................. + // and v1.16B, v19.16B, v15.16B // .........................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................................... + // add x26, x11, x17, lsr #25 // .................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................................. + // ld2 { v0.S, v1.S }[1], [x12], #8 // ................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................... + // umull x21, w6, w24 // .........................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................................... + // ld2 { v19.S, v20.S }[1], [x15], #8 // ........................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................... + // add x22, x22, x26, lsr #26 // ...................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................ + // and v8.16B, v22.16B, v30.16B // .....................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................. + // umaddl x11, w27, w4, x28 // ....................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................... + // and x13, x26, #0x3ffffff // ..........................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................... + // ld2 { v2.S, v3.S }[1], [x12], #8 // ...................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................ + // ld2 { v22.S, v23.S }[1], [x15], #8 // ...........................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................ + // and x28, x22, #0x1ffffff // ..........................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................... + // ld2 { v19.S, v20.S }[0], [x18], #8 // ............................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................... + // add x16, x11, x22, lsr #25 // .......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................ + // ld2 { v4.S, v5.S }[1], [x12], #8 // ......................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................. + // and x11, x17, #0x1ffffff // ..................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................................. + // and v7.16B, v24.16B, v15.16B // .......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................ + // umaddl x22, w11, w27, x21 // ............................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................................... + // and v6.16B, v21.16B, v30.16B // .........................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................... + // add x15, x9, x16, lsr #26 // .........................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................................... + // umaddl x9, w11, w1, x5 // ................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................... + // ld2 { v6.S, v7.S }[1], [x12], #8 // .............................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................... + // umull v21.2D, v0.2S, v26.2S // ..........................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................... + // umaddl x26, w28, w29, x22 // ...................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................ + // ld2 { v22.S, v23.S }[0], [x18], #8 // ...............................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................... + // umaddl x5, w24, w4, x8 // ........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................... + // ld2 { v8.S, v9.S }[1], [x12], #8 // ................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................... + // umaddl x4, w11, w20, x2 // ..................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................. + // umull v24.2D, v1.2S, v20.2S // .................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................. + // and x21, x16, #0x3ffffff // ...........................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................................ + // add x12, x5, x15, lsr #25 // ...........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................ + // mul v15.2S, v20.2S, v31.2S // ........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................... + // umaddl x18, w28, w1, x4 // .....................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................................. + // mul v28.2S, v23.2S, v31.2S // ..................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................................. + // umull v23.2D, v0.2S, v23.2S // ..................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................. + // and x4, x15, #0x1ffffff // ............................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................... + // mul v16.2S, v19.2S, v31.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................. + // add x15, x0, x12, lsr #26 // ..............................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................... + // umull v18.2D, v1.2S, v11.2S // ....................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................................... + // umaddl x17, w4, w19, x26 // .............................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................................... + // umlal v24.2D, v3.2S, v14.2S // ....................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................... + // mul w0, w15, w30 // .................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................. + // umlal v21.2D, v2.2S, v11.2S // ..............................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................... + // umull x2, w0, w27 // .........................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................... + // umull v29.2D, v0.2S, v14.2S // ..............................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................... + // umaddl x22, w28, w3, x9 // .......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................ + // umull v12.2D, v1.2S, v14.2S // .......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................ + // umaddl x15, w15, w25, x17 // ................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................... + // umlal v18.2D, v3.2S, v28.2S // ......................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................. + // and x5, x23, #0x3ffffff // ...............................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................... + // umlal v18.2D, v5.2S, v15.2S // ............................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................................... + // mul w8, w4, w30 // ..................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................. + // umlal v29.2D, v2.2S, v26.2S // ....................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................... + // umull x9, w0, w20 // .............................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................... + // umlal v12.2D, v3.2S, v26.2S // ...........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................ + // and x17, x12, #0x3ffffff // .............................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................... + // umull v17.2D, v0.2S, v20.2S // ...............................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................... + // umaddl x2, w28, w25, x2 // ............................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................... + // umlal v21.2D, v4.2S, v28.2S // ..................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................. + // umaddl x16, w4, w3, x18 // ...............................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................... + // umlal v29.2D, v4.2S, v11.2S // ...........................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................................ + // umaddl x9, w8, w7, x9 // ........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................... + // umlal v29.2D, v6.2S, v28.2S // .....................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................. + // mul w23, w28, w30 // .........................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................... + // umlal v17.2D, v2.2S, v14.2S // ...................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................ + // umaddl x15, w5, w10, x15 // ...................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................................ + // umlal v17.2D, v4.2S, v26.2S // ....................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................... + // umaddl x12, w0, w10, x16 // .................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................. + // umlal v17.2D, v6.2S, v11.2S // .....................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................. + // umaddl x9, w23, w10, x9 // ...........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................ + // umlal v12.2D, v5.2S, v11.2S // ............................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................... + // umaddl x15, w14, w7, x15 // ......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................. + // umlal v12.2D, v7.2S, v28.2S // .............................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................................... + // umaddl x16, w8, w10, x22 // .....................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................................................. + // umlal v12.2D, v9.2S, v15.2S // ..............................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................... + // umull x22, w0, w29 // .......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................ + // umaddl x15, w13, w20, x15 // ..........................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................... + // mul v27.2S, v14.2S, v31.2S // ..........................................................................................................................................................................................................................................................................................................................*......................................................................................................................................................................... + // umlal v23.2D, v2.2S, v20.2S // ......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................. + // umaddl x26, w0, w7, x16 // ........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................... + // umlal v23.2D, v4.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................... + // add x12, x12, x12 // ..............................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................... + // umlal v23.2D, v6.2S, v26.2S // .......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................ + // umaddl x18, w5, w24, x12 // ................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................... + // umlal v23.2D, v8.2S, v11.2S // ........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................... + // add x26, x26, x26 // ...........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................ + // umull v20.2D, v1.2S, v26.2S // ...................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................ + // umaddl x26, w5, w27, x26 // ............................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................................... + // umlal v29.2D, v8.2S, v15.2S // ................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................... + // mul w16, w17, w30 // ...................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................ + // umull x12, w6, w27 // ..............................................................................................................................................................................................................................................................................................................................*..................................................................................................................................................................... + // umlal v24.2D, v5.2S, v26.2S // ........................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................................... + // umaddl x18, w14, w27, x18 // .....................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................. + // umlal v18.2D, v7.2S, v27.2S // .................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................. + // umaddl x26, w14, w29, x26 // ................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................... + // umlal v20.2D, v3.2S, v11.2S // ...........................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................................ + // umaddl x12, w11, w29, x12 // .................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................................................. + // umlal v20.2D, v5.2S, v28.2S // ............................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................................... + // umaddl x18, w13, w29, x18 // ..................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................. + // umlal v24.2D, v7.2S, v11.2S // .........................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................................... + // umaddl x26, w13, w19, x26 // ...................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................ + // umlal v24.2D, v9.2S, v28.2S // ..........................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................................... + // umaddl x28, w28, w19, x12 // ....................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................................... + // umlal v23.2D, v1.2S, v22.2S // .........................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................................... + // umaddl x12, w21, w19, x18 // .....................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................. + // umlal v23.2D, v3.2S, v19.2S // ..........................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................... + // umaddl x26, w21, w25, x26 // .............................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................... + // umlal v23.2D, v5.2S, v13.2S // ...............................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................... + // umull x18, w0, w19 // ....................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................... + // shl v24.2D, v24.2D, #1 // .................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................. + // umaddl x9, w11, w3, x9 // ....................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................... + // umlal v20.2D, v7.2S, v15.2S // .............................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................................... + // umaddl x4, w4, w25, x28 // .......................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................................ + // umlal v20.2D, v9.2S, v27.2S // ..............................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................... + // umaddl x15, w21, w1, x15 // ..............................................................................................................................................................................................................................................................................................................................................................*..................................................................................................................................... + // umlal v24.2D, v0.2S, v22.2S // .................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................. + // umaddl x9, w6, w1, x9 // ........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................... + // umlal v24.2D, v2.2S, v19.2S // ............................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................... + // umaddl x4, w0, w24, x4 // ...............................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................................... + // umlal v24.2D, v4.2S, v13.2S // .............................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................................... + // umaddl x28, w8, w24, x2 // ...............................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................... + // shl v12.2D, v12.2D, #1 // ..................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................. + // add x9, x9, x9 // ...........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................ + // mul v14.2S, v13.2S, v31.2S // .......................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................ + // umaddl x2, w5, w7, x4 // ..................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................................. + // umlal v12.2D, v0.2S, v19.2S // ......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................. + // umaddl x4, w11, w19, x28 // ..................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................. + // umlal v17.2D, v8.2S, v28.2S // ................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................... + // mul w28, w21, w30 // ..........................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................................... + // shl v20.2D, v20.2D, #1 // ...................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................................ + // umaddl x2, w14, w20, x2 // ......................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................. + // umlal v29.2D, v1.2S, v13.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................... + // umaddl x9, w16, w27, x9 // ............................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................................... + // umlal v12.2D, v2.2S, v13.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................. + // umaddl x4, w6, w29, x4 // ......................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................. + // umlal v20.2D, v0.2S, v13.2S // .....................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................................. + // umaddl x2, w13, w1, x2 // .................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................................. + // umaddl x4, w16, w7, x4 // .........................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................... + // umlal v17.2D, v1.2S, v19.2S // ......................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................. + // umaddl x2, w21, w3, x2 // ......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................. + // umlal v17.2D, v3.2S, v13.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................ + // umaddl x21, w17, w3, x15 // ........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................... + // umaddl x4, w28, w10, x4 // .................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................. + // mul v13.2S, v26.2S, v31.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................ + // umlal v12.2D, v4.2S, v25.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................... + // umaddl x15, w8, w29, x18 // ....................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................................... + // umlal v23.2D, v7.2S, v25.2S // ....................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................................... + // mul w18, w11, w30 // ................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................... + // umlal v23.2D, v9.2S, v10.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................... + // umaddl x4, w13, w3, x4 // ....................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................... + // umlal v18.2D, v9.2S, v13.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................ + // umaddl x15, w23, w27, x15 // ..........................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................................... + // umlal v17.2D, v5.2S, v25.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................... + // umaddl x9, w28, w24, x9 // ...................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................ + // umaddl x4, w14, w1, x4 // ...........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................ + // mul v26.2S, v11.2S, v31.2S // ................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................... + // umull v11.2D, v0.2S, v11.2S // .................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................................. + // umaddl x17, w17, w25, x12 // ..........................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................... + // shl v19.2D, v18.2D, #1 // ...........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................ + // umaddl x22, w8, w27, x22 // ..............................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................... + // umaddl x15, w18, w24, x15 // ...................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................ + // mul v18.2S, v22.2S, v31.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................... + // umlal v19.2D, v0.2S, v25.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................... + // umaddl x4, w5, w20, x4 // ..............................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................... + // umull v22.2D, v1.2S, v28.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................... + // umaddl x12, w13, w25, x9 // .........................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................... + // umlal v21.2D, v6.2S, v15.2S // ........................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................................... + // umaddl x9, w6, w25, x15 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................. + // umlal v21.2D, v8.2S, v27.2S // .........................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................................... + // umaddl x15, w23, w24, x22 // .................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................. + // umlal v12.2D, v6.2S, v10.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................... + // umaddl x22, w14, w19, x12 // ............................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................................... + // umlal v22.2D, v3.2S, v15.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................................... + // umaddl x12, w16, w1, x9 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................... + // umlal v22.2D, v5.2S, v27.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................................... + // umaddl x11, w11, w25, x15 // .....................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................. + // umlal v17.2D, v7.2S, v10.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................ + // umaddl x22, w5, w29, x22 // ...............................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................... + // umlal v19.2D, v2.2S, v10.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................. + // umlal v21.2D, v1.2S, v25.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................... + // umaddl x26, w16, w24, x26 // ................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................................... + // umlal v21.2D, v3.2S, v10.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................... + // add x9, x4, x22, lsr #26 // ..................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................. + // umlal v11.2D, v2.2S, v28.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................................... + // umaddl x2, w16, w10, x2 // .............................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................... + // umlal v11.2D, v4.2S, v15.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................. + // add x26, x26, x9, lsr #25 // ....................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................... + // umlal v22.2D, v7.2S, v13.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................................... + // mul w15, w13, w30 // .......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................ + // umlal v22.2D, v9.2S, v26.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................... + // add x2, x2, x26, lsr #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................. + // umlal v21.2D, v5.2S, v18.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................................. + // umlal v29.2D, v3.2S, v25.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................*..................................................................................................... + // add x13, x17, x2, lsr #25 // ........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................... + // umlal v29.2D, v5.2S, v10.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................... + // shl v15.2D, v22.2D, #1 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................... + // add x4, x21, x13, lsr #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................... + // umlal v21.2D, v7.2S, v16.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................... + // and x13, x13, #0x3ffffff // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................... + // umlal v21.2D, v9.2S, v14.2S // ..........................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................................... + // bic x17, x4, #0x3ffffff // .............................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................................... + // umlal v29.2D, v7.2S, v18.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................. + // lsr x21, x17, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................... + // umlal v19.2D, v4.2S, v18.2S // .......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................ + // umaddl x11, w6, w19, x11 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................ + // umlal v19.2D, v6.2S, v16.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................................ + // add x21, x21, x17, lsr #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................ + // umlal v19.2D, v8.2S, v14.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................... + // bfi x13, x4, #32, #26 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................. + // umlal v15.2D, v0.2S, v10.2S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................... + // and x26, x26, #0x3ffffff // .......................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................ + // umlal v20.2D, v2.2S, v25.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................................................. + // add x17, x21, x17, lsr #22 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................ + // umlal v20.2D, v4.2S, v10.2S // ......................................................................................................................................................................................................................................................................................................................................................................................................*............................................................................................. + // umull x21, w0, w1 // ............................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................................... + // umlal v12.2D, v8.2S, v18.2S // .....................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................. + // umaddl x0, w0, w3, x17 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................................... + // umlal v15.2D, v2.2S, v18.2S // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................... + // umaddl x11, w16, w20, x11 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................... + // umlal v15.2D, v4.2S, v16.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................. + // umaddl x17, w28, w20, x12 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................... + // umlal v20.2D, v6.2S, v18.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................................ + // umaddl x0, w8, w1, x0 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................... + // umlal v20.2D, v8.2S, v16.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................................... + // umaddl x12, w8, w20, x21 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................... + // usra v21.2D, v19.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................... + // bfi x26, x2, #32, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................................... + // umlal v11.2D, v6.2S, v27.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................................... + // umaddl x8, w28, w7, x11 // .................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................................. + // umlal v29.2D, v9.2S, v16.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................. + // umaddl x0, w23, w20, x0 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................... + // usra v20.2D, v21.2D, #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................ + // umaddl x23, w23, w7, x12 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................................. + // umlal v24.2D, v6.2S, v25.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................ + // mul w12, w6, w30 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................... + // umlal v15.2D, v6.2S, v14.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................. + // umaddl x2, w18, w7, x0 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................ + // usra v29.2D, v20.2D, #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................. + // umaddl x20, w18, w10, x23 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................................. + // umlal v17.2D, v9.2S, v18.2S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................... + // umaddl x7, w15, w7, x17 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................... + // umlal v24.2D, v8.2S, v10.2S // .................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................. + // umaddl x2, w12, w10, x2 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................... + // usra v12.2D, v29.2D, #25 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................................... + // umaddl x0, w6, w3, x20 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................... + // umlal v11.2D, v8.2S, v13.2S // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................. + // mul w11, w14, w30 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................... + // umlal v11.2D, v1.2S, v10.2S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................................... + // add x4, x2, x2 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................... + // usra v17.2D, v12.2D, #26 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................*....................................................... + // umaddl x20, w16, w19, x4 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................... + // and v22.16B, v12.16B, v30.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................... + // add x12, x0, x0 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................................ + // mul v12.2S, v25.2S, v31.2S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................ + // umaddl x12, w16, w29, x12 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................... + // usra v24.2D, v17.2D, #25 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................. + // umaddl x4, w28, w29, x20 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................ + // umlal v11.2D, v3.2S, v18.2S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................... + // umaddl x7, w11, w10, x7 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................. + // ushr v25.2D, v30.2D, #1 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................................... + // umaddl x18, w28, w27, x12 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................ + // usra v23.2D, v24.2D, #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................................... + // umaddl x12, w15, w27, x4 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................. + // umlal v15.2D, v8.2S, v12.2S // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................ + // umaddl x4, w5, w3, x7 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................................... + // and v26.16B, v19.16B, v30.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................ + // umaddl x27, w15, w24, x18 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................... + // bic v19.16B, v23.16B, v25.16B // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................. + // umaddl x24, w11, w24, x12 // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................... + // umlal v11.2D, v5.2S, v16.2S // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................................... + // and x17, x9, #0x1ffffff // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................. + // usra v15.2D, v19.2D, #25 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................ + // umaddl x28, w15, w10, x8 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................................... + // and v27.16B, v20.16B, v30.16B // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................ + // umaddl x0, w5, w25, x24 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................... + // and v28.16B, v29.16B, v25.16B // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................... + // umaddl x20, w14, w25, x27 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................. + // usra v15.2D, v19.2D, #24 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................... + // umaddl x28, w14, w3, x28 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................ + // umlal v11.2D, v7.2S, v14.2S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................... + // and x9, x22, #0x3ffffff // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................ + // umlal v11.2D, v9.2S, v12.2S // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................... + // umaddl x25, w5, w19, x20 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................... + // usra v15.2D, v19.2D, #21 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................... + // umaddl x28, w5, w1, x28 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................................... + // and v13.16B, v24.16B, v30.16B // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............. + // add x23, x4, x0, lsr #26 // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................... + // and v20.16B, v23.16B, v25.16B // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................. + // ldr x4, [sp, #STACK_CTR] // .................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................................................. + // usra v11.2D, v15.2D, #26 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................... + // add x29, x25, x23, lsr #25 // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................. + // and v24.16B, v15.16B, v30.16B // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......... + // and x11, x0, #0x3ffffff // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................ + // trn1 v19.4S, v13.4S, v20.4S // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........... + // add x20, x28, x29, lsr #26 // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............... + // trn1 v8.4S, v8.4S, v9.4S // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................ + // and x22, x29, #0x3ffffff // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............. + // and v13.16B, v21.16B, v25.16B // ................................................................................................................................................................................................................................................................................................................................................................................................................................................*................................................... + // bfi x22, x20, #32, #25 // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............ + // usra v26.2D, v11.2D, #25 // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................ + // add x25, x9, x20, lsr #25 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............. + // ldr d29, [sp, #STACK_MASK1] // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................... + // mov v18.d[0], v8.d[1] // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................. + // trn1 v4.4S, v4.4S, v5.4S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....................... + // add x28, x17, x25, lsr #26 // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........... + // trn1 v2.4S, v2.4S, v3.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................. + // and x2, x25, #0x3ffffff // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......... + // mov v14.d[0], v4.d[1] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................... + // mov v3.d[0], x22 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........ + // usra v13.2D, v26.2D, #26 // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............. + // add x27, sp, #STACK_SCALAR // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......................... + // and v21.16B, v26.16B, v30.16B // .......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............ + // bfi x2, x28, #32, #26 // ..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*......... + // and v20.16B, v11.16B, v25.16B // ....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*............... + // subs w0, w4, #1 // .....................................................................................................................................................................................................................................................................................................................................................................................................................................................*.............................................. + // trn1 v6.4S, v6.4S, v7.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......................... + // asr w16, w0, #5 // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................ + // trn1 v13.4S, v21.4S, v13.4S // .........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.......... + // ldr w9, [x27, w16, SXTW #2] // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...................... + // trn1 v15.4S, v27.4S, v28.4S // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................. + // and w17, w0, #0x1f // ......................................................................................................................................................................................................................................................................................................................................................................................................................................................*............................................. + // mov v12.d[0], v2.d[1] // ........................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........................... + // ldr d28, [sp, #STACK_MASK2] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..................... + // and v17.16B, v17.16B, v25.16B // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................... + // lsr x8, x4, #32 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*................... + // mov v7.d[0], x26 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.................. + // mov v2.d[0], v13.d[1] // ...........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*........ + // trn1 v11.4S, v24.4S, v20.4S // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....... + // lsr w27, w9, w17 // ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*....... + // trn1 v0.4S, v0.4S, v1.4S // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...... + // stp w0, w27, [sp, #STACK_CTR] // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.... + // mov v9.d[0], x13 // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..... + // mov v4.d[0], v15.d[1] // ..............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*..... + // bfi x11, x23, #32, #25 // .............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*...... + // mov v8.d[0], v19.d[1] // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*... + // trn1 v17.4S, v22.4S, v17.4S // ...............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.... + // eor w1, w27, w8 // ................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*... + // mov v1.d[0], x11 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.. + // mov v5.d[0], x2 // .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*.. + // mov v16.d[0], v6.d[1] // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*. + // mov v6.d[0], v17.d[1] // ..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................*. + // mov v10.d[0], v0.d[1] // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................* + // mov v0.d[0], v11.d[1] // ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................* + + end_label: + + + + + + + + + + subs w11, w0, #-1 + cbnz w11, mainloop + + + mov w0, v1.s[0] + mov w1, v1.s[1] + mov w2, v3.s[0] + mov w3, v3.s[1] + mov w4, v5.s[0] + mov w5, v5.s[1] + mov w6, v7.s[0] + mov w7, v7.s[1] + mov w8, v9.s[0] + mov w9, v9.s[1] + + stp w0, w1, [sp, #80] + stp w2, w3, [sp, #88] + stp w4, w5, [sp, #96] + stp w6, w7, [sp, #104] + stp w8, w9, [sp, #112] + + mov x10, v0.d[0] + mov x11, v2.d[0] + mov x12, v4.d[0] + mov x13, v6.d[0] + mov x14, v8.d[0] + + stp x10, x11, [sp] + stp x12, x13, [sp, #16] + str x14, [sp, #32] + + adr x10, invtable + str x10, [sp, #160] + +.Linvloopnext: + ldrh w11, [x10], #2 + mov v20.s[0], w11 + str x10, [sp, #160] + + and w12, w11, #0x7f + subs w30, w12, #1 // square times + bmi .Lskipsquare + + mov w23, w3 + mov w24, w4 + mov w25, w5 + mov w26, w6 + mov w27, w7 + mov w14, w8 + add w10, w0, w0 + add w11, w1, w1 + add w12, w2, w2 + +.Lsqrloop1: + umull x20, w0, w0 + add x4, x24, x23, lsr #25 + umull x21, w10, w1 + and x3, x23, #0x1ffffff + umull x22, w10, w2 + add w13, w3, w3 + umull x23, w10, w3 + add x5, x25, x4, lsr #26 + umull x24, w11, w13 + and x4, x4, #0x3ffffff + umull x28, w4, w4 + add x6, x26, x5, lsr #25 + umull x25, w12, w3 + and x5, x5, #0x1ffffff + umull x26, w13, w3 + add w15, w5, w5 + umaddl x28, w13, w15, x28 + add x7, x27, x6, lsr #26 + umull x19, w4, w15 + and x6, x6, #0x3ffffff + umull x27, w11, w6 + add x8, x14, x7, lsr #25 + umaddl x28, w12, w6, x28 + and x7, x7, #0x1ffffff + umaddl x19, w13, w6, x19 + add x9, x9, x8, lsr #26 + umaddl x27, w10, w7, x27 + add w17, w7, w7 + umaddl x28, w11, w17, x28 + and x8, x8, #0x3ffffff + umaddl x19, w10, w9, x19 + add w14, w9, w9 + umaddl x27, w12, w5, x27 + add w16, w14, w14, lsl #1 + umaddl x28, w10, w8, x28 + add w3, w15, w15, lsl #1 + umaddl x19, w12, w7, x19 + add w16, w16, w14, lsl #4 + umaddl x27, w13, w4, x27 + add w3, w3, w15, lsl #4 + umaddl x28, w16, w9, x28 + + umaddl x19, w11, w8, x19 + add w9, w6, w6, lsl #1 + umaddl x20, w3, w5, x20 + + umaddl x24, w10, w4, x24 + add w9, w9, w6, lsl #4 + umaddl x25, w10, w5, x25 + add x19, x19, x28, lsr #26 + umaddl x26, w10, w6, x26 + and x14, x28, #0x3ffffff + umaddl x22, w11, w1, x22 + add x20, x20, x19, lsr #25 + umaddl x23, w11, w2, x23 + bic x1, x19, #0x1ffffff + umaddl x26, w12, w4, x26 + add x20, x20, x1, lsr #24 + umaddl x24, w2, w2, x24 + add w0, w4, w4 + umaddl x25, w11, w4, x25 + add x20, x20, x1, lsr #21 + umaddl x26, w11, w15, x26 + add w1, w17, w17, lsl #1 + umaddl x20, w9, w0, x20 + + umaddl x21, w9, w15, x21 + add w1, w1, w17, lsl #4 + umaddl x22, w9, w6, x22 + add w10, w8, w8, lsl #1 + umaddl x20, w1, w13, x20 + and x9, x19, #0x1ffffff + umaddl x21, w1, w4, x21 + add w10, w10, w8, lsl #4 + umaddl x22, w1, w15, x22 + subs w30, w30, #1 + umaddl x20, w10, w12, x20 + + umaddl x21, w10, w13, x21 + + umaddl x22, w10, w0, x22 + + umaddl x20, w16, w11, x20 + + umaddl x21, w16, w2, x21 + + umaddl x22, w16, w13, x22 + add w11, w6, w6 + umaddl x23, w1, w6, x23 + + umaddl x24, w1, w7, x24 + add x21, x21, x20, lsr #26 + umaddl x26, w10, w8, x26 + and x0, x20, #0x3ffffff + umaddl x23, w10, w15, x23 + add x22, x22, x21, lsr #25 + umaddl x24, w10, w11, x24 + and x1, x21, #0x1ffffff + umaddl x25, w10, w17, x25 + and x2, x22, #0x3ffffff + umaddl x23, w16, w4, x23 + add w10, w0, w0 + umaddl x24, w16, w15, x24 + add w11, w1, w1 + umaddl x25, w16, w6, x25 + add w12, w2, w2 + umaddl x26, w16, w17, x26 + add x23, x23, x22, lsr #26 + umaddl x27, w16, w8, x27 + bpl .Lsqrloop1 + + mov w11, v20.s[0] + add x4, x24, x23, lsr #25 + and x3, x23, #0x1ffffff + add x5, x25, x4, lsr #26 + and x4, x4, #0x3ffffff + add x6, x26, x5, lsr #25 + and x5, x5, #0x1ffffff + add x7, x27, x6, lsr #26 + and x6, x6, #0x3ffffff + add x8, x14, x7, lsr #25 + and x7, x7, #0x1ffffff + add x9, x9, x8, lsr #26 + and x8, x8, #0x3ffffff +.Lskipsquare: + mov w12, #40 + tst w11, #1<<8 + ubfx w13, w11, #9, #2 + bne .Lskipmul + mul w20, w13, w12 + add x20, sp, x20 + + ldp w10, w11, [x20] + ldp w12, w13, [x20, #8] + ldp w14, w15, [x20, #16] + ldp w16, w17, [x20, #24] + ldp w19, w20, [x20, #32] + mov w30, #19 + + umull x21, w1, w19 + umull x22, w1, w17 + umull x23, w1, w16 + umull x24, w1, w15 + umaddl x21, w3, w16, x21 + umaddl x22, w3, w15, x22 + umaddl x23, w3, w14, x23 + umaddl x24, w3, w13, x24 + umaddl x21, w5, w14, x21 + umaddl x22, w5, w13, x22 + umaddl x23, w5, w12, x23 + umaddl x24, w5, w11, x24 + umaddl x21, w7, w12, x21 + umaddl x22, w7, w11, x22 + umaddl x23, w7, w10, x23 + mul w27, w7, w30 + mul w25, w9, w30 + mul w26, w8, w30 + mul w28, w6, w30 + umaddl x24, w27, w20, x24 + umaddl x21, w9, w10, x21 + umaddl x22, w25, w20, x22 + umaddl x23, w25, w19, x23 + umaddl x24, w25, w17, x24 + add x22, x22, x22 + umaddl x21, w0, w20, x21 + add x24, x24, x24 + umaddl x22, w0, w19, x22 + umaddl x23, w0, w17, x23 + umaddl x24, w0, w16, x24 + umaddl x21, w2, w17, x21 + umaddl x22, w2, w16, x22 + umaddl x23, w2, w15, x23 + umaddl x24, w2, w14, x24 + umaddl x21, w4, w15, x21 + umaddl x22, w4, w14, x22 + umaddl x23, w4, w13, x23 + umaddl x24, w4, w12, x24 + umaddl x21, w6, w13, x21 + umaddl x22, w6, w12, x22 + umaddl x23, w6, w11, x23 + umaddl x24, w6, w10, x24 + umaddl x21, w8, w11, x21 + umaddl x22, w8, w10, x22 + umaddl x23, w26, w20, x23 + umaddl x24, w26, w19, x24 + umull x6, w25, w16 + umull x7, w25, w15 + umull x8, w25, w14 + umaddl x6, w5, w10, x6 + mul w5, w5, w30 + umaddl x7, w27, w17, x7 + umaddl x8, w27, w16, x8 + umaddl x6, w27, w19, x6 + umaddl x7, w5, w20, x7 + umaddl x8, w5, w19, x8 + umaddl x6, w3, w12, x6 + umaddl x7, w3, w11, x7 + umaddl x8, w3, w10, x8 + umaddl x6, w1, w14, x6 + umaddl x7, w1, w13, x7 + umaddl x8, w1, w12, x8 + mul w9, w4, w30 + add x7, x7, x7 + umaddl x6, w26, w17, x6 + umaddl x7, w26, w16, x7 + umaddl x8, w26, w15, x8 + umaddl x6, w28, w20, x6 + umaddl x7, w28, w19, x7 + umaddl x8, w28, w17, x8 + umaddl x6, w4, w11, x6 + umaddl x7, w4, w10, x7 + umaddl x8, w9, w20, x8 + umaddl x6, w2, w13, x6 + umaddl x7, w2, w12, x7 + umaddl x8, w2, w11, x8 + umaddl x6, w0, w15, x6 + umaddl x7, w0, w14, x7 + umaddl x8, w0, w13, x8 + mul w4, w3, w30 + add x6, x6, x7, lsr #26 + and x7, x7, #0x3ffffff + add x24, x24, x6, lsr #25 + and x6, x6, #0x1ffffff + add x23, x23, x24, lsr #26 + and x24, x24, #0x3ffffff + add x22, x22, x23, lsr #25 + bfi x24, x23, #32, #25 + add x21, x21, x22, lsr #26 + and x22, x22, #0x3ffffff + bic x3, x21, #0x3ffffff + lsr x23, x3, #26 + bfi x22, x21, #32, #26 + add x23, x23, x3, lsr #25 + umull x21, w25, w13 + add x23, x23, x3, lsr #22 + umull x3, w25, w12 + umaddl x23, w25, w11, x23 + umaddl x21, w27, w15, x21 + umaddl x3, w27, w14, x3 + umaddl x23, w27, w13, x23 + mul w27, w1, w30 + umaddl x3, w5, w16, x3 + umaddl x23, w5, w15, x23 + umaddl x21, w5, w17, x21 + umaddl x3, w4, w19, x3 + umaddl x23, w4, w17, x23 + umaddl x21, w4, w20, x21 + umaddl x3, w1, w10, x3 + umaddl x23, w27, w20, x23 + umaddl x21, w1, w11, x21 + mul w25, w2, w30 + add x23, x23, x23 + add x21, x21, x21 + umaddl x23, w26, w12, x23 + umaddl x3, w26, w13, x3 + umaddl x21, w26, w14, x21 + umaddl x23, w28, w14, x23 + umaddl x3, w28, w15, x3 + umaddl x21, w28, w16, x21 + umaddl x23, w9, w16, x23 + umaddl x3, w9, w17, x3 + umaddl x21, w9, w19, x21 + umaddl x23, w25, w19, x23 + umaddl x3, w25, w20, x3 + umaddl x21, w2, w10, x21 + umaddl x23, w0, w10, x23 + umaddl x3, w0, w11, x3 + umaddl x21, w0, w12, x21 + add x1, x3, x23, lsr #26 + and x0, x23, #0x3ffffff + add x2, x21, x1, lsr #25 + and x1, x1, #0x1ffffff + add x3, x8, x2, lsr #26 + and x2, x2, #0x3ffffff + add x4, x7, x3, lsr #25 + and x3, x3, #0x1ffffff + add x5, x6, x4, lsr #26 + and x4, x4, #0x3ffffff + and x5, x5, #0x3ffffff + + mov w11, v20.s[0] + mov w6, w24 + lsr x7, x24, #32 + mov w8, w22 + lsr x9, x22, #32 +.Lskipmul: + ubfx w12, w11, #11, #2 + cbz w12, .Lskipstore + mov w13, #40 + mul w12, w12, w13 + add x12, sp, x12 + + stp w0, w1, [x12] + stp w2, w3, [x12, #8] + stp w4, w5, [x12, #16] + stp w6, w7, [x12, #24] + stp w8, w9, [x12, #32] +.Lskipstore: + + ldr x10, [sp, #160] + adr x11, invtable+13*2 + cmp x10, x11 + bne .Linvloopnext + + // Final reduce + // w5 and w9 are 26 bits instead of 25 + + orr x10, x0, x1, lsl #26 + orr x10, x10, x2, lsl #51 + + lsr x11, x2, #13 + orr x11, x11, x3, lsl #13 + orr x11, x11, x4, lsl #38 + + add x12, x5, x6, lsl #25 + adds x12, x12, x7, lsl #51 + + lsr x13, x7, #13 + orr x13, x13, x8, lsl #12 + orr x13, x13, x9, lsl #38 + + adcs x13, x13, xzr + adc x14, xzr, xzr + + extr x17, x14, x13, #63 + mov w19, #19 + mul w15, w17, w19 + add w15, w15, #19 + + adds x15, x10, x15 + adcs x15, x11, xzr + adcs x15, x12, xzr + adcs x15, x13, xzr + adc x16, x14, xzr + + extr x16, x16, x15, #63 + mul w16, w16, w19 + + adds x10, x10, x16 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + and x13, x13, 0x7fffffffffffffff + + ldr x17, [sp, STACK_OUT_PTR] + stp x10, x11, [x17] + stp x12, x13, [x17, #16] + + add sp, sp, STACK_OUT_PTR+8 + + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp d8, d9, [sp, #96] + ldp d10, d11, [sp, #112] + ldp d12, d13, [sp, #128] + ldp d14, d15, [sp, #144] + ldp x29, x30, [sp], #160 + + ret + // .size x25519_scalarmult, .-x25519_scalarmult + // .type invtable, %object +invtable: + // square times, + // skip mul, + // mulsource, + // dest + .hword 1|(1<<8) |(1<<11) + .hword 2| (2<<9)|(2<<11) + .hword 0| (1<<9)|(1<<11) + .hword 1| (2<<9)|(2<<11) + .hword 5| (2<<9)|(2<<11) + .hword 10| (2<<9)|(3<<11) + .hword 20| (3<<9) + .hword 10| (2<<9)|(2<<11) + .hword 50| (2<<9)|(3<<11) + .hword 100| (3<<9) + .hword 50| (2<<9) + .hword 5| (1<<9) + .hword 0| (0<<9) + // .size invtable, .-invtable + +END: