Skip to content

Commit 0c42e5b

Browse files
committed
[aarch64] Emit abs, min, and max instructions during lifting
1 parent fde1241 commit 0c42e5b

2 files changed

Lines changed: 65 additions & 13 deletions

File tree

arch/arm64/arm64test.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12725,7 +12725,56 @@
1272512725
(b'\x1F\x20\x03\xD5', 'LLIL_NOP()'), # nop, gets optimized from function
1272612726
]
1272712727

12728+
# FEAT_CSSC integer min/max and absolute value (lifted to dedicated LLIL ops)
12729+
tests_cssc = [
12730+
# smax w0, w1, w2
12731+
(b'\x20\x60\xc2\x1a', 'LLIL_SET_REG.d(w0,LLIL_MAXS.d(LLIL_REG.d(w1),LLIL_REG.d(w2)))'),
12732+
# smax x0, x1, x2
12733+
(b'\x20\x60\xc2\x9a', 'LLIL_SET_REG.q(x0,LLIL_MAXS.q(LLIL_REG.q(x1),LLIL_REG.q(x2)))'),
12734+
# umax w0, w1, w2
12735+
(b'\x20\x64\xc2\x1a', 'LLIL_SET_REG.d(w0,LLIL_MAXU.d(LLIL_REG.d(w1),LLIL_REG.d(w2)))'),
12736+
# umax x0, x1, x2
12737+
(b'\x20\x64\xc2\x9a', 'LLIL_SET_REG.q(x0,LLIL_MAXU.q(LLIL_REG.q(x1),LLIL_REG.q(x2)))'),
12738+
# smin w0, w1, w2
12739+
(b'\x20\x68\xc2\x1a', 'LLIL_SET_REG.d(w0,LLIL_MINS.d(LLIL_REG.d(w1),LLIL_REG.d(w2)))'),
12740+
# smin x0, x1, x2
12741+
(b'\x20\x68\xc2\x9a', 'LLIL_SET_REG.q(x0,LLIL_MINS.q(LLIL_REG.q(x1),LLIL_REG.q(x2)))'),
12742+
# umin w0, w1, w2
12743+
(b'\x20\x6c\xc2\x1a', 'LLIL_SET_REG.d(w0,LLIL_MINU.d(LLIL_REG.d(w1),LLIL_REG.d(w2)))'),
12744+
# umin x0, x1, x2
12745+
(b'\x20\x6c\xc2\x9a', 'LLIL_SET_REG.q(x0,LLIL_MINU.q(LLIL_REG.q(x1),LLIL_REG.q(x2)))'),
12746+
# abs w0, w1
12747+
(b'\x20\x20\xc0\x5a', 'LLIL_SET_REG.d(w0,LLIL_ABS.d(LLIL_REG.d(w1)))'),
12748+
# abs x0, x1
12749+
(b'\x20\x20\xc0\xda', 'LLIL_SET_REG.q(x0,LLIL_ABS.q(LLIL_REG.q(x1)))'),
12750+
# smax w0, w1, #0x5
12751+
(b'\x20\x14\xc0\x11', 'LLIL_SET_REG.d(w0,LLIL_MAXS.d(LLIL_REG.d(w1),LLIL_CONST.d(0x5)))'),
12752+
# umin w0, w1, #0x7
12753+
(b'\x20\x1c\xcc\x11', 'LLIL_SET_REG.d(w0,LLIL_MINU.d(LLIL_REG.d(w1),LLIL_CONST.d(0x7)))'),
12754+
# ctz w0, w1
12755+
(b'\x20\x18\xc0\x5a', 'LLIL_SET_REG.d(w0,LLIL_CTZ.d(LLIL_REG.d(w1)))'),
12756+
# ctz x0, x1
12757+
(b'\x20\x18\xc0\xda', 'LLIL_SET_REG.q(x0,LLIL_CTZ.q(LLIL_REG.q(x1)))'),
12758+
# cnt w0, w1 (FEAT_CSSC scalar population count)
12759+
(b'\x20\x1c\xc0\x5a', 'LLIL_SET_REG.d(w0,LLIL_POPCNT.d(LLIL_REG.d(w1)))'),
12760+
# cnt x0, x1 (FEAT_CSSC scalar population count)
12761+
(b'\x20\x1c\xc0\xda', 'LLIL_SET_REG.q(x0,LLIL_POPCNT.q(LLIL_REG.q(x1)))'),
12762+
12763+
# Vector/SVE forms of these mnemonics are not FEAT_CSSC scalar ops and must not be lifted
12764+
# as whole-register scalar operations. The NEON cnt has a per-element intrinsic; the others
12765+
# have no native scalar representation and are left unimplemented.
12766+
# cnt v0.8b, v1.8b
12767+
(b'\x20\x58\x20\x0e', 'LLIL_INTRINSIC([v0],_PopulationCount,[LLIL_REG.o(v1)])'),
12768+
# cnt v0.16b, v1.16b
12769+
(b'\x20\x58\x20\x4e', 'LLIL_INTRINSIC([v0],_PopulationCount,[LLIL_REG.o(v1)])'),
12770+
# abs v0.8b, v1.8b
12771+
(b'\x20\xb8\x20\x0e', 'LLIL_UNIMPL()'),
12772+
# abs v0.2d, v1.2d
12773+
(b'\x20\xb8\xe0\x4e', 'LLIL_UNIMPL()'),
12774+
]
12775+
1272812776
test_cases = \
12777+
tests_cssc + \
1272912778
tests_shll + \
1273012779
tests_udf + \
1273112780
tests_pac + \

arch/arm64/il.cpp

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,12 +1329,19 @@ bool GetLowLevelILForInstruction(
13291329
switch (instr.operation)
13301330
{
13311331
case ARM64_ABS:
1332-
{
1333-
ExprId src = ILREG_O(operand2);
1334-
GenIfElse(il, il.CompareSignedLessThan(REGSZ_O(operand2), src, il.Const(REGSZ_O(operand2), 0)),
1335-
ILSETREG_O(operand1, il.Neg(REGSZ_O(operand2), src)), ILSETREG_O(operand1, src));
1332+
switch (instr.encoding)
1333+
{
1334+
case ENC_ABS_32_DP_1SRC:
1335+
case ENC_ABS_64_DP_1SRC:
1336+
// FEAT_CSSC scalar absolute value on a general-purpose register
1337+
il.AddInstruction(ILSETREG_O(operand1, il.AbsoluteValue(REGSZ_O(operand2), ILREG_O(operand2))));
1338+
break;
1339+
default:
1340+
// The NEON and SVE forms are per-element absolute values, which have no native scalar
1341+
// representation
1342+
il.AddInstruction(il.Unimplemented());
1343+
}
13361344
break;
1337-
}
13381345
case ARM64_ADD:
13391346
switch (instr.encoding)
13401347
{
@@ -4004,8 +4011,7 @@ bool GetLowLevelILForInstruction(
40044011
return true;
40054012
}
40064013

4007-
GenIfElse(il, il.CompareSignedGreaterThan(REGSZ_O(operand2), op2, op3), ILSETREG_O(operand1, op2),
4008-
ILSETREG_O(operand1, op3));
4014+
il.AddInstruction(ILSETREG_O(operand1, il.MaxSigned(REGSZ_O(operand2), op2, op3)));
40094015
break;
40104016
}
40114017
case ARM64_SMIN:
@@ -4028,8 +4034,7 @@ bool GetLowLevelILForInstruction(
40284034
return true;
40294035
}
40304036

4031-
GenIfElse(il, il.CompareSignedLessThan(REGSZ_O(operand2), op2, op3), ILSETREG_O(operand1, op2),
4032-
ILSETREG_O(operand1, op3));
4037+
il.AddInstruction(ILSETREG_O(operand1, il.MinSigned(REGSZ_O(operand2), op2, op3)));
40334038
break;
40344039
}
40354040
case ARM64_UDIV:
@@ -4064,8 +4069,7 @@ bool GetLowLevelILForInstruction(
40644069
return true;
40654070
}
40664071

4067-
GenIfElse(il, il.CompareUnsignedGreaterThan(REGSZ_O(operand2), op2, op3), ILSETREG_O(operand1, op2),
4068-
ILSETREG_O(operand1, op3));
4072+
il.AddInstruction(ILSETREG_O(operand1, il.MaxUnsigned(REGSZ_O(operand2), op2, op3)));
40694073
break;
40704074
}
40714075
case ARM64_UMIN:
@@ -4088,8 +4092,7 @@ bool GetLowLevelILForInstruction(
40884092
return true;
40894093
}
40904094

4091-
GenIfElse(il, il.CompareUnsignedLessThan(REGSZ_O(operand2), op2, op3), ILSETREG_O(operand1, op2),
4092-
ILSETREG_O(operand1, op3));
4095+
il.AddInstruction(ILSETREG_O(operand1, il.MinUnsigned(REGSZ_O(operand2), op2, op3)));
40934096
break;
40944097
}
40954098
case ARM64_UBFIZ:

0 commit comments

Comments
 (0)