Skip to content

Commit 6a6aac3

Browse files
committed
AArch64: Add support for cmp_xzr to the A55, A72 uArch model
This commit adds support for the `cmp_xzr` instruction to the A55, A72 uArch model. This pattern is a variant of cmp using the zero register xzr. This commit reuses the existing cmp uArch model definition for the uArch model, and cmp is alias of SUBS(according to page C6-1953 of Aarch64 Base Instruction Descriptions), so we reference the SUBS to model this instruction. - a55 SWOG SUBS(page: 18/48) - latency: 2 - Inverse throughput: 2/2 = 1 - ExecutionUnit: SCALAR (ALU0, ALU1) - a72 SWOG SUBS(page: 8/48) - latency: 1 - Inverse throughput: 2/2 = 1 - ExecutionUnit: INT (INT0, INT1) Signed-off-by: willieyz <willie.zhao@chelpis.com>
1 parent 86f8d4c commit 6a6aac3

3 files changed

Lines changed: 9 additions & 5 deletions

File tree

slothy/targets/aarch64/cortex_a55.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@
184184
fmov_s_form, # from double/single to gen reg
185185
fmov_d_form, # from double/single to gen reg (64-bit)
186186
cmp,
187+
cmp_xzr,
187188
vdup_w,
188189
crc32b,
189190
crc32h,
@@ -442,6 +443,7 @@ def get_min_max_objective(slothy):
442443
sub,
443444
sub_imm,
444445
cmp,
446+
cmp_xzr,
445447
sbcs_zero_to_zero,
446448
cmp_xzr2,
447449
mov,
@@ -491,7 +493,7 @@ def get_min_max_objective(slothy):
491493
umov_d,
492494
vuaddlv_sform,
493495
): 1,
494-
(sub_imm, cmp): 1,
496+
(sub_imm, cmp, cmp_xzr): 1,
495497
(
496498
vmla,
497499
vmla_lane,
@@ -607,7 +609,7 @@ def get_min_max_objective(slothy):
607609
): 4,
608610
(Ldr_D): 3,
609611
(Ldr_Q, Str_Q): 4,
610-
(sub_imm, cmp): 2,
612+
(sub_imm, cmp, cmp_xzr): 2,
611613
AArch64NeonCount: 2,
612614
St4: 5,
613615
St3: 3,

slothy/targets/aarch64/cortex_a72_frontend.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
q_ld2_lane_s,
124124
Ldp_W,
125125
cmp,
126+
cmp_xzr,
126127
cmp_imm,
127128
csel,
128129
csel_xzr_ne,
@@ -265,7 +266,7 @@ def get_min_max_objective(slothy):
265266
lsr_imm: ExecutionUnit.INT(),
266267
lsr: ExecutionUnit.INT(),
267268
movk_imm_lsl: ExecutionUnit.INT(),
268-
(sub_imm, cmp, cmp_imm): ExecutionUnit.INT(),
269+
(sub_imm, cmp, cmp_xzr, cmp_imm): ExecutionUnit.INT(),
269270
Ldp_W: ExecutionUnit.LOAD(),
270271
q_ldp_with_inc: ExecutionUnit.LOAD(),
271272
Stp_W: ExecutionUnit.STORE(),
@@ -316,7 +317,7 @@ def get_min_max_objective(slothy):
316317
q_ld2_lane_s: 1,
317318
vtbl: 1, # SWOG contains a blank throughput (approximating from AArch32)
318319
AESInstruction: 1,
319-
(sub_imm, cmp, cmp_imm): 1,
320+
(sub_imm, cmp, cmp_xzr, cmp_imm): 1,
320321
vuaddlv_sform: 1,
321322
fmov_s_form: 1, # from vec to gen reg
322323
fmov_d_form: 1, # from vec to gen reg (64-bit)
@@ -387,7 +388,7 @@ def get_min_max_objective(slothy):
387388
q_ld2_lane_s: 8,
388389
vtbl: 6, # q-form: 3*N+3 cycles (N = number of registers in the table)
389390
AESInstruction: 3,
390-
(sub_imm, cmp, cmp_imm): 1,
391+
(sub_imm, cmp, cmp_xzr, cmp_imm): 1,
391392
vuaddlv_sform: 6, # 8B/8H
392393
fmov_s_form: 5, # from vec to gen reg
393394
fmov_d_form: 5, # from vec to gen reg (64-bit)

tests/naive/aarch64/instructions.s

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,5 +223,6 @@ csel x11, x10, xzr, eq
223223
csel x11, x10, xzr, ne
224224
csel x11, x10, xzr, lt
225225
csel x11, x10, xzr, gt
226+
cmp x3, xzr
226227

227228
end:

0 commit comments

Comments
 (0)