diff --git a/slothy/targets/aarch64/aarch64_neon.py b/slothy/targets/aarch64/aarch64_neon.py
index 4e74f343c..cd0f464ac 100644
--- a/slothy/targets/aarch64/aarch64_neon.py
+++ b/slothy/targets/aarch64/aarch64_neon.py
@@ -2781,12 +2781,18 @@ class ror(AArch64Shift):
     outputs = ["Xd"]
 
 
-class asr(AArch64Shift):
+class asr_imm(AArch64Shift):
     pattern = "asr <Xd>, <Xa>, <imm>"
     inputs = ["Xa"]
     outputs = ["Xd"]
 
 
+class asr(AArch64Shift):
+    pattern = "asr <Xd>, <Xa>, <Xb>"
+    inputs = ["Xa", "Xb"]
+    outputs = ["Xd"]
+
+
 class AArch64Logical(AArch64Instruction):
     pass
 
diff --git a/slothy/targets/aarch64/cortex_a55.py b/slothy/targets/aarch64/cortex_a55.py
index 8d46b0e5a..f3983cc2b 100644
--- a/slothy/targets/aarch64/cortex_a55.py
+++ b/slothy/targets/aarch64/cortex_a55.py
@@ -66,6 +66,7 @@
     vmul,
     Instruction,
     csel,
+    csel_xzr_ne,
     fcsel,
     Q_Ld2_Lane_Post_Inc,
     q_ld2_lane_s,
@@ -162,6 +163,8 @@
     ngc_zero,
     subs_wform,
     asr_wform,
+    asr_imm,
+    asr,
     and_imm_wform,
     eor_wform,
     eon_wform,
@@ -181,6 +184,7 @@
     fmov_s_form,  # from double/single to gen reg
     fmov_d_form,  # from double/single to gen reg (64-bit)
     cmp,
+    cmp_xzr,
     vdup_w,
     crc32b,
     crc32h,
@@ -439,12 +443,15 @@ def get_min_max_objective(slothy):
         sub,
         sub_imm,
         cmp,
+        cmp_xzr,
         sbcs_zero_to_zero,
         cmp_xzr2,
         mov,
         ngc_zero,
         subs_wform,
         asr_wform,
+        asr_imm,
+        asr,
         and_imm_wform,
         lsr_wform,
         lsr,
@@ -457,7 +464,7 @@ def get_min_max_objective(slothy):
     # NOTE: AESE/AESMC and AESD/AESIMC pairs can be dual-issued on A55 but this
     # is not modeled
     AESInstruction: [[ExecutionUnit.VEC0, ExecutionUnit.VEC1]],
-    csel: ExecutionUnit.SCALAR(),
+    (csel, csel_xzr_ne): ExecutionUnit.SCALAR(),
     (
         crc32b,
         crc32h,
@@ -486,7 +493,7 @@ def get_min_max_objective(slothy):
         umov_d,
         vuaddlv_sform,
     ): 1,
-    (sub_imm, cmp): 1,
+    (sub_imm, cmp, cmp_xzr): 1,
     (
         vmla,
         vmla_lane,
@@ -514,7 +521,7 @@ def get_min_max_objective(slothy):
     vshrn: 2,
     vtbl: 1,  # N cycles (N = number of registers in the table)
     (fcsel): 1,
-    csel: 1,
+    (csel, csel_xzr_ne): 1,
     (VecToGprMov, Mov_xtov_d, mov_wtov_s): 1,
     (
         movk_imm,
@@ -545,7 +552,17 @@ def get_min_max_objective(slothy):
         adcs_zero_r_to_zero,
         cmn,
     ): 1,
-    (cmp_xzr2, cmp_imm, sub, subs_wform, asr_wform, sbcs_zero_to_zero, ngc_zero): 1,
+    (
+        cmp_xzr2,
+        cmp_imm,
+        sub,
+        subs_wform,
+        asr_wform,
+        asr_imm,
+        sbcs_zero_to_zero,
+        ngc_zero,
+    ): 1,
+    asr: 2,
     (bfi, ubfx): 1,
     VShiftImmediateRounding: 1,
     AArch64NeonShiftInsert: 1,
@@ -592,7 +609,7 @@ def get_min_max_objective(slothy):
     ): 4,
     (Ldr_D): 3,
     (Ldr_Q, Str_Q): 4,
-    (sub_imm, cmp): 2,
+    (sub_imm, cmp, cmp_xzr): 2,
     AArch64NeonCount: 2,
     St4: 5,
     St3: 3,
@@ -612,7 +629,7 @@ def get_min_max_objective(slothy):
     (Vins, umov_d): 2,
     (tst_wform): 1,
     (fcsel): 2,
-    csel: 1,
+    (csel, csel_xzr_ne): 1,
     (VecToGprMov, Mov_xtov_d, mov_wtov_s): 2,
     (
         movk_imm,
@@ -643,12 +660,13 @@ def get_min_max_objective(slothy):
         cmn,
         sub,
         subs_wform,
-        asr_wform,
+        asr,
         sbcs_zero_to_zero,
         cmp_xzr2,
         ngc_zero,
         cmp_imm,
     ): 1,
+    (asr_wform, asr_imm): 2,
     (bfi, ubfx): 2,
     VShiftImmediateRounding: 3,
     VShiftImmediateBasic: 2,
diff --git a/slothy/targets/aarch64/cortex_a72_frontend.py b/slothy/targets/aarch64/cortex_a72_frontend.py
index f77bf2334..66495a79d 100644
--- a/slothy/targets/aarch64/cortex_a72_frontend.py
+++ b/slothy/targets/aarch64/cortex_a72_frontend.py
@@ -123,10 +123,14 @@
     q_ld2_lane_s,
     Ldp_W,
     cmp,
+    cmp_xzr,
     cmp_imm,
     csel,
+    csel_xzr_ne,
     q_ldp_with_inc,
     AArch64CRC32,
+    asr,
+    asr_imm,
 )
 
 # From the A72 SWOG, Section "4.1 Dispatch Constraints"
@@ -237,7 +241,7 @@ def get_min_max_objective(slothy):
     ],
     (AArch64NeonShiftInsert, vusra): [ExecutionUnit.ASIMD1],
     fcsel: ExecutionUnit.ASIMD(),
-    csel: ExecutionUnit.INT(),
+    (csel, csel_xzr_ne): ExecutionUnit.INT(),
     AArch64ConditionalCompare: ExecutionUnit.INT(),
     AArch64Logical: [ExecutionUnit.INT()],
     # 8B/8H occupies both F0, F1
@@ -262,11 +266,13 @@ def get_min_max_objective(slothy):
     lsr_imm: ExecutionUnit.INT(),
     lsr: ExecutionUnit.INT(),
     movk_imm_lsl: ExecutionUnit.INT(),
-    (sub_imm, cmp, cmp_imm): ExecutionUnit.INT(),
+    (sub_imm, cmp_imm): ExecutionUnit.INT(),
+    (cmp, cmp_xzr): ExecutionUnit.MINT(),
     Ldp_W: ExecutionUnit.LOAD(),
     q_ldp_with_inc: ExecutionUnit.LOAD(),
     Stp_W: ExecutionUnit.STORE(),
     AArch64CRC32: ExecutionUnit.MINT(),
+    (asr, asr_imm): ExecutionUnit.INT(),
 }
 
 inverse_throughput = {
@@ -291,7 +297,7 @@ def get_min_max_objective(slothy):
     AArch64NeonLogical: 1,
     (AArch64NeonShiftInsert, vusra): 1,
     fcsel: 1,
-    csel: 1,
+    (csel, csel_xzr_ne): 1,
     AArch64ConditionalCompare: 1,
     AArch64Logical: 1,
     Vins: 1,
@@ -312,7 +318,7 @@ def get_min_max_objective(slothy):
     q_ld2_lane_s: 1,
     vtbl: 1,  # SWOG contains a blank throughput (approximating from AArch32)
     AESInstruction: 1,
-    (sub_imm, cmp, cmp_imm): 1,
+    (sub_imm, cmp, cmp_xzr, cmp_imm): 1,
     vuaddlv_sform: 1,
     fmov_s_form: 1,  # from vec to gen reg
     fmov_d_form: 1,  # from vec to gen reg (64-bit)
@@ -327,6 +333,8 @@ def get_min_max_objective(slothy):
     Ldp_W: 1,
     Stp_W: 1,
     AArch64CRC32: 1,
+    asr: 1,
+    asr_imm: 1,
 }
 
 # REVISIT
@@ -358,7 +366,7 @@ def get_min_max_objective(slothy):
     AArch64NeonShiftInsert: 3,
     vusra: 4,
     fcsel: 3,
-    csel: 1,
+    (csel, csel_xzr_ne): 1,
     AArch64ConditionalCompare: 1,
     AArch64Logical: 1,
     (Ldr_D, Ldr_Q, Ldr_X, Str_Q, Str_X): 4,  # approx
@@ -381,7 +389,8 @@ def get_min_max_objective(slothy):
     q_ld2_lane_s: 8,
     vtbl: 6,  # q-form: 3*N+3 cycles (N = number of registers in the table)
     AESInstruction: 3,
-    (sub_imm, cmp, cmp_imm): 1,
+    (sub_imm, cmp_imm): 1,
+    (cmp, cmp_xzr): 2,
     vuaddlv_sform: 6,  # 8B/8H
     fmov_s_form: 5,  # from vec to gen reg
     fmov_d_form: 5,  # from vec to gen reg (64-bit)
@@ -396,6 +405,8 @@ def get_min_max_objective(slothy):
     Ldp_W: 4,
     Stp_W: 1,
     AArch64CRC32: 2,
+    asr: 1,
+    asr_imm: 1,
 }
 
 
diff --git a/tests/naive/aarch64/instructions.s b/tests/naive/aarch64/instructions.s
index 693de5b07..bdbd1db70 100644
--- a/tests/naive/aarch64/instructions.s
+++ b/tests/naive/aarch64/instructions.s
@@ -216,4 +216,13 @@ crc32ch w6, w6, w7
 crc32cw w6, w6, w7
 crc32cx w6, w6, x8
 fmov x5, d7
+
+asr x11, x12, x7
+asr x11, x12, #7
+csel x11, x10, xzr, eq
+csel x11, x10, xzr, ne
+csel x11, x10, xzr, lt
+csel x11, x10, xzr, gt
+cmp x3, xzr
+
 end: