Skip to content

Commit 86f8d4c

Browse files
committed
AArch64: Add support for csel_xzr_ne to the A55, A72 uArch model
This commit adds support for the csel_xzr_ne instruction to the A55, A72 uArch model. This pattern is a variant of csel using the zero register xzr. This commit reuses the existing csel uArch model definition for the uArch model - a55 SWOG CSEL(page: 18/48) - latency: 1 - Inverse throughput: 2/2 = 1 - ExecutionUnit: SCALAR (ALU0, ALU1) - a72 SWOG CSEL(page: 8/42) - latency: 1 - Inverse throughput: 2/2 = 1 - ExecutionUnit: INT (INT0, INT1) Signed-off-by: willieyz <willie.zhao@chelpis.com>
1 parent 27663d8 commit 86f8d4c

3 files changed

Lines changed: 12 additions & 6 deletions

File tree

slothy/targets/aarch64/cortex_a55.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
vmul,
6767
Instruction,
6868
csel,
69+
csel_xzr_ne,
6970
fcsel,
7071
Q_Ld2_Lane_Post_Inc,
7172
q_ld2_lane_s,
@@ -461,7 +462,7 @@ def get_min_max_objective(slothy):
461462
# NOTE: AESE/AESMC and AESD/AESIMC pairs can be dual-issued on A55 but this
462463
# is not modeled
463464
AESInstruction: [[ExecutionUnit.VEC0, ExecutionUnit.VEC1]],
464-
csel: ExecutionUnit.SCALAR(),
465+
(csel, csel_xzr_ne): ExecutionUnit.SCALAR(),
465466
(
466467
crc32b,
467468
crc32h,
@@ -518,7 +519,7 @@ def get_min_max_objective(slothy):
518519
vshrn: 2,
519520
vtbl: 1, # N cycles (N = number of registers in the table)
520521
(fcsel): 1,
521-
csel: 1,
522+
(csel, csel_xzr_ne): 1,
522523
(VecToGprMov, Mov_xtov_d, mov_wtov_s): 1,
523524
(
524525
movk_imm,
@@ -626,7 +627,7 @@ def get_min_max_objective(slothy):
626627
(Vins, umov_d): 2,
627628
(tst_wform): 1,
628629
(fcsel): 2,
629-
csel: 1,
630+
(csel, csel_xzr_ne): 1,
630631
(VecToGprMov, Mov_xtov_d, mov_wtov_s): 2,
631632
(
632633
movk_imm,

slothy/targets/aarch64/cortex_a72_frontend.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
cmp,
126126
cmp_imm,
127127
csel,
128+
csel_xzr_ne,
128129
q_ldp_with_inc,
129130
AArch64CRC32,
130131
asr,
@@ -239,7 +240,7 @@ def get_min_max_objective(slothy):
239240
],
240241
(AArch64NeonShiftInsert, vusra): [ExecutionUnit.ASIMD1],
241242
fcsel: ExecutionUnit.ASIMD(),
242-
csel: ExecutionUnit.INT(),
243+
(csel, csel_xzr_ne): ExecutionUnit.INT(),
243244
AArch64ConditionalCompare: ExecutionUnit.INT(),
244245
AArch64Logical: [ExecutionUnit.INT()],
245246
# 8B/8H occupies both F0, F1
@@ -294,7 +295,7 @@ def get_min_max_objective(slothy):
294295
AArch64NeonLogical: 1,
295296
(AArch64NeonShiftInsert, vusra): 1,
296297
fcsel: 1,
297-
csel: 1,
298+
(csel, csel_xzr_ne): 1,
298299
AArch64ConditionalCompare: 1,
299300
AArch64Logical: 1,
300301
Vins: 1,
@@ -363,7 +364,7 @@ def get_min_max_objective(slothy):
363364
AArch64NeonShiftInsert: 3,
364365
vusra: 4,
365366
fcsel: 3,
366-
csel: 1,
367+
(csel, csel_xzr_ne): 1,
367368
AArch64ConditionalCompare: 1,
368369
AArch64Logical: 1,
369370
(Ldr_D, Ldr_Q, Ldr_X, Str_Q, Str_X): 4, # approx

tests/naive/aarch64/instructions.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,5 +219,9 @@ fmov x5, d7
219219

220220
asr x11, x12, x7
221221
asr x11, x12, #7
222+
csel x11, x10, xzr, eq
223+
csel x11, x10, xzr, ne
224+
csel x11, x10, xzr, lt
225+
csel x11, x10, xzr, gt
222226

223227
end:

0 commit comments

Comments
 (0)