Skip to content

Commit 7669b5a

Browse files
authored
cdmemset() for AArch64 (dlang#21508)
1 parent 6273cf7 commit 7669b5a

5 files changed

Lines changed: 327 additions & 7 deletions

File tree

compiler/src/dmd/backend/arm/cod2.d

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,293 @@ void cdind(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
11631163
fixresult(cdb,e,retregs,pretregs);
11641164
}
11651165

1166+
/*********************************
1167+
* Generate code for memset(s,value,numbytes) intrinsic.
1168+
* (s OPmemset (numbytes OPparam value))
1169+
*/
1170+
1171+
@trusted
1172+
void cdmemset(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
1173+
{
1174+
//printf("cdmemset(pretregs = %s)\n", regm_str(pretregs));
1175+
elem* e2 = e.E2;
1176+
assert(e2.Eoper == OPparam);
1177+
1178+
elem* evalue = e2.E2;
1179+
elem* enumbytes = e2.E1;
1180+
1181+
const sz = tysize(evalue.Ety);
1182+
if (sz > 1)
1183+
{
1184+
cdmemsetn(cg, cdb, e, pretregs);
1185+
return;
1186+
}
1187+
1188+
bool valueIsConst = false;
1189+
targ_size_t value;
1190+
if (evalue.Eoper == OPconst)
1191+
{
1192+
value = el_tolong(evalue) & 0xFF;
1193+
value |= value << 8;
1194+
value |= value << 16;
1195+
value |= value << 32;
1196+
valueIsConst = true;
1197+
}
1198+
else if (evalue.Eoper == OPstrpar) // happens if evalue is a struct of 0 size
1199+
{
1200+
value = 0;
1201+
valueIsConst = true;
1202+
}
1203+
else
1204+
value = 0xDEADBEEF; // stop annoying false positives that value is not inited
1205+
1206+
// Get nbytes into CX
1207+
regm_t nbytesregs = 0;
1208+
if (enumbytes.Eoper != OPconst)
1209+
{
1210+
nbytesregs = cgstate.allregs & ~pretregs;
1211+
if (!nbytesregs)
1212+
nbytesregs = cgstate.allregs;
1213+
codelem(cgstate,cdb,enumbytes,nbytesregs,false);
1214+
}
1215+
1216+
// Get value into valuereg
1217+
regm_t valueregs;
1218+
reg_t valuereg;
1219+
if (valueIsConst)
1220+
{
1221+
if (value == 0)
1222+
{
1223+
valueregs = 0;
1224+
valuereg = 0x1F; // xzr
1225+
}
1226+
else
1227+
{
1228+
valueregs = cgstate.allregs & ~(pretregs | nbytesregs);
1229+
if (!valueregs)
1230+
valueregs = cgstate.allregs & ~nbytesregs;
1231+
regwithvalue(cdb, valueregs, value, 64);
1232+
getregs(cdb, valueregs);
1233+
valuereg = findreg(valueregs);
1234+
cgstate.regimmed_set(valuereg, value);
1235+
}
1236+
freenode(evalue);
1237+
}
1238+
else
1239+
{
1240+
scodelem(cgstate,cdb,evalue,valueregs,nbytesregs,false);
1241+
1242+
valuereg = findreg(valueregs);
1243+
getregs(cdb,valueregs);
1244+
1245+
regm_t regm = cgstate.allregs & ~(valueregs | nbytesregs);
1246+
const r = regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,64); // MOV r,0x01010101_01010101
1247+
cdb.gen2(0x0FAF,modregrmx(3,valuereg,r)); // IMUL valuereg,r
1248+
}
1249+
freenode(e2);
1250+
1251+
// Get destination into dstreg
1252+
regm_t dstregs = cgstate.allregs & ~(nbytesregs | valueregs);
1253+
scodelem(cgstate,cdb,e.E1,dstregs,nbytesregs | valueregs,false);
1254+
reg_t dstreg = findreg(dstregs);
1255+
1256+
regm_t retregs;
1257+
if (pretregs) // if need return value
1258+
{
1259+
retregs = pretregs & ~(nbytesregs | valueregs | dstregs);
1260+
if (!retregs)
1261+
retregs = cgstate.allregs & ~(nbytesregs | valueregs | dstregs);
1262+
reg_t retreg = allocreg(cdb,retregs,TYnptr);
1263+
genmovreg(cdb,retreg,dstreg); // MOV retreg,dstreg
1264+
}
1265+
1266+
if (enumbytes.Eoper == OPconst)
1267+
{
1268+
uint numbytes = cast(uint)el_tolong(enumbytes);
1269+
if (const n = numbytes & ~(REGSIZE - 1))
1270+
{
1271+
regm_t limits = cgstate.allregs & ~(nbytesregs | valueregs | dstregs | retregs);
1272+
reg_t limit = regwithvalue(cdb,limits,n / REGSIZE,64); // MOV limit,#n / REGSIZE
1273+
cdb.gen1(INSTR.addsub_ext(1,0,0,0,limit,6,3,dstreg,limit)); // ADD limit,dstreg,limit,UXTW #3
1274+
1275+
code* cnop = gen1(null, INSTR.nop);
1276+
cdb.append(cnop);
1277+
1278+
cdb.gen1(INSTR.ldst_immpost(3,0,0,8,dstreg,valuereg)); // STR valuereg,[dstreg],#8 // *dstreg++ = valuereg
1279+
cdb.gen1(INSTR.cmp_shift(1,dstreg,0,0,limit)); // CMP limit,dstreg
1280+
genBranch(cdb,COND.ne,FL.code,cast(block*)cnop); // JNE cnop
1281+
}
1282+
1283+
auto remainder = numbytes & (REGSIZE - 1);
1284+
if (remainder >= 4)
1285+
{
1286+
cdb.gen1(INSTR.ldst_immpost(3,0,0,4,dstreg,valuereg)); // STR valuereg,[dstreg],#4 // *dstreg++ = valuereg
1287+
remainder -= 4;
1288+
}
1289+
for (; remainder; --remainder)
1290+
cdb.gen1(INSTR.ldst_immpost(3,0,0,1,dstreg,valuereg)); // STR valuereg,[dstreg],#0 // *dstreg++ = valuereg
1291+
fixresult(cdb,e,retregs,pretregs);
1292+
return;
1293+
}
1294+
1295+
// TODO AArch64
1296+
1297+
getregs(cdb,mDI | mCX);
1298+
1299+
/* MOV sreg,ECX
1300+
SHR ECX,n
1301+
REP
1302+
STOSD/Q
1303+
1304+
ADC ECX,ECX
1305+
REP
1306+
STOSD
1307+
1308+
MOV ECX,sreg
1309+
AND ECX,3
1310+
REP
1311+
STOSB
1312+
*/
1313+
regm_t regs = cgstate.allregs & (pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI));
1314+
const sreg = allocreg(cdb,regs,TYint);
1315+
genregs(cdb,0x89,CX,sreg); // MOV sreg,ECX (32 bits only)
1316+
1317+
const n = I64 ? 3 : 2;
1318+
cdb.genc2(0xC1, modregrm(3,5,CX), n); // SHR ECX,n
1319+
1320+
cdb.gen1(0xF3); // REP
1321+
cdb.gen1(STOS); // STOSD/Q
1322+
if (I64)
1323+
code_orrex(cdb.last(), REX_W);
1324+
1325+
if (I64)
1326+
{
1327+
cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC ECX,ECX
1328+
cdb.gen1(0xF3); // REP
1329+
cdb.gen1(STOS); // STOSD
1330+
}
1331+
1332+
genregs(cdb,0x89,sreg,CX); // MOV ECX,sreg (32 bits only)
1333+
cdb.genc2(0x81, modregrm(3,4,CX), 3); // AND ECX,3
1334+
cdb.gen1(0xF3); // REP
1335+
cdb.gen1(STOSB); // STOSB
1336+
1337+
cgstate.regimmed_set(CX, 0); // CX is now 0
1338+
fixresult(cdb,e,mES|mBX,pretregs);
1339+
}
1340+
1341+
/***********************************************
1342+
* Do memset for values larger than a byte.
1343+
* Has many similarities to cod4.cdeq().
1344+
* Doesn't work for 16 bit code.
1345+
*/
1346+
@trusted
1347+
private void cdmemsetn(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
1348+
{
1349+
//printf("cdmemsetn(pretregs = %s)\n", regm_str(pretregs));
1350+
elem* e2 = e.E2;
1351+
assert(e2.Eoper == OPparam);
1352+
1353+
elem* evalue = e2.E2;
1354+
elem* enelems = e2.E1;
1355+
1356+
tym_t tymv = tybasic(evalue.Ety);
1357+
const sz = tysize(evalue.Ety);
1358+
assert(cast(int)sz > 1);
1359+
1360+
if (tyxmmreg(tymv) && config.fpxmmregs)
1361+
assert(0); // fix later
1362+
if (tyfloating(tymv) && config.inline8087)
1363+
assert(0); // fix later
1364+
1365+
const grex = I64 ? (REX_W << 16) : 0;
1366+
1367+
// get the count of elems into CX
1368+
regm_t mregcx = mCX;
1369+
codelem(cgstate,cdb,enelems,mregcx,false);
1370+
1371+
// Get value into AX
1372+
regm_t retregs3 = cgstate.allregs & ~mregcx;
1373+
if (sz == 2 * REGSIZE)
1374+
retregs3 &= ~(mBP | IDXREGS); // BP cannot be used for register pair,
1375+
// IDXREGS could deplete index regs - see sdtor.d test14815()
1376+
scodelem(cgstate,cdb,evalue,retregs3,mregcx,false);
1377+
1378+
/* Necessary because if evalue calls a function, and that function never returns,
1379+
* it doesn't affect registers. Which means those registers can be used for enregistering
1380+
* variables, and next pass fails because it can't use those registers, and so cannot
1381+
* allocate registers for retregs3. See ice11596.d
1382+
*/
1383+
useregs(retregs3);
1384+
1385+
reg_t valreg = findreg(retregs3);
1386+
reg_t valreghi;
1387+
if (sz == 2 * REGSIZE)
1388+
{
1389+
valreg = findreglsw(retregs3);
1390+
valreghi = findregmsw(retregs3);
1391+
}
1392+
1393+
freenode(e2);
1394+
1395+
// Get s into ES:DI
1396+
regm_t mregidx = IDXREGS & ~(mregcx | retregs3);
1397+
assert(mregidx);
1398+
tym_t ty1 = tybasic(e.E1.Ety);
1399+
if (!tyreg(ty1))
1400+
mregidx |= mES;
1401+
scodelem(cgstate,cdb,e.E1,mregidx,mregcx | retregs3,false);
1402+
reg_t idxreg = findreg(mregidx);
1403+
1404+
regm_t mregbx = 0;
1405+
if (pretregs) // if need return value
1406+
{
1407+
mregbx = pretregs & ~(mregidx | mregcx | retregs3);
1408+
if (!mregbx)
1409+
mregbx = cgstate.allregs & ~(mregidx | mregcx | retregs3);
1410+
const regbx = allocreg(cdb, mregbx, TYnptr);
1411+
getregs(cdb, mregbx);
1412+
genmovreg(cdb,regbx,idxreg); // MOV BX,DI
1413+
}
1414+
1415+
getregs(cdb,mask(idxreg) | mCX); // modify DI and CX
1416+
1417+
/* Generate:
1418+
* JCXZ L1
1419+
* L2:
1420+
* MOV [idxreg],AX
1421+
* ADD idxreg,sz
1422+
* LOOP L2
1423+
* L1:
1424+
* NOP
1425+
*/
1426+
code* c1 = gennop(null);
1427+
genjmp(cdb, JCXZ, FL.code, cast(block*)c1);
1428+
code cs;
1429+
buildEA(&cs,idxreg,-1,1,0);
1430+
cs.Iop = 0x89;
1431+
if (!I16 && sz == 2)
1432+
cs.Iflags |= CFopsize;
1433+
if (I64 && sz == 8)
1434+
cs.Irex |= REX_W;
1435+
code_newreg(&cs, valreg);
1436+
cdb.gen(&cs); // MOV [idxreg],AX
1437+
code* c2 = cdb.last();
1438+
if (sz == REGSIZE * 2)
1439+
{
1440+
cs.IEV1.Vuns = REGSIZE;
1441+
code_newreg(&cs, valreghi);
1442+
cdb.gen(&cs); // MOV REGSIZE[idxreg],DX
1443+
}
1444+
cdb.genc2(0x81, grex | modregrmx(3,0,idxreg), sz); // ADD idxreg,sz
1445+
genjmp(cdb, LOOP, FL.code, cast(block*)c2); // LOOP L2
1446+
cdb.append(c1);
1447+
1448+
cgstate.regimmed_set(CX, 0); // CX is now 0
1449+
1450+
fixresult(cdb,e,mregbx,pretregs);
1451+
}
1452+
11661453
/**********************
11671454
* Do structure assignments.
11681455
* This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2).

compiler/src/dmd/backend/arm/cod3.d

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1763,6 +1763,7 @@ void assignaddrc(code* c)
17631763
* Note: only works for forward referenced code.
17641764
* only direct jumps and branches are detected.
17651765
* LOOP instructions only work for backward refs.
1766+
* Reference: http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#condbranch
17661767
*/
17671768
@trusted
17681769
void jmpaddr(code* c)
@@ -1805,7 +1806,7 @@ void jmpaddr(code* c)
18051806
ad += calccodsize(ci);
18061807
ci = code_next(ci);
18071808
}
1808-
c.Iop = (-(ad >> 2)) << 5;
1809+
c.Iop |= (-(ad >> 2) & ((1 << 19) - 1)) << 5; // set the signed imm19 field
18091810
c.IFL1 = FL.unde;
18101811
}
18111812
c = code_next(c);

compiler/src/dmd/backend/arm/disasmarm.d

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,9 @@ void disassemble(uint c) @trusted
743743
const char* format = oO ? "bc.%s" : "b.%s";
744744
const n = sprintf(buf.ptr, format, condstring[cond].ptr);
745745
p1 = buf[0 .. n];
746-
p2 = wordtostring(imm19);
746+
if (imm19 & (1 << 18)) // if bit 19 is set
747+
imm19 |= -1 << 18; // sign extend
748+
p2 = signedWordtostring(imm19);
747749
}
748750
else if (field(ins, 31, 24) == 0x55) // http://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#miscbranch
749751
{
@@ -2368,10 +2370,35 @@ void disassemble(uint c) @trusted
23682370
p4 = eaString(op24, cast(ubyte)Rn, offset);
23692371
}
23702372

2371-
// Load/store register pair (unscaled immediate) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_unscaled
2372-
// Load/store register pair (immediate post-indexed) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_immpost
2373-
// Load/store register pair (unprivileged) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_unpriv
2374-
// Load/store register pair (immediate pre-indexed) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_immpre
2373+
// Load/store register (unscaled immediate) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_unscaled
2374+
2375+
// Load/store register (immediate post-indexed) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_immpost
2376+
if (field(ins, 29, 27) == 7 && field(ins, 25, 24) == 0 && field(ins, 21, 21) == 0 && field(ins, 11,10) == 1)
2377+
{
2378+
url = "ldst_immpost";
2379+
2380+
uint size = field(ins, 31, 30);
2381+
uint VR = field(ins, 26, 26);
2382+
uint opc = field(ins, 23, 22);
2383+
uint imm9 = field(ins, 20, 12);
2384+
uint Rn = field(ins, 9, 5);
2385+
uint Rt = field(ins, 4, 0);
2386+
2387+
if ((size & 2) && VR == 0 && opc == 0)
2388+
{
2389+
url2 = "str_imm_gen";
2390+
2391+
p1 = "str";
2392+
p2 = regString(size & 1, Rt);
2393+
if (size & 1 && Rt == 0x1F)
2394+
p2 = "xzr";
2395+
p3 = eaString(1, cast(ubyte)Rn, imm9);
2396+
}
2397+
}
2398+
else
2399+
2400+
// Load/store register (unprivileged) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_unpriv
2401+
// Load/store register (immediate pre-indexed) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_immpre
23752402
// Atomic memory operations https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#memop
23762403
// Load/store register (register offset) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_regoff
23772404
// Load/store register (pac) https://www.scs.stanford.edu/~zyedidia/arm64/encodingindex.html#ldst_pac
@@ -3032,8 +3059,9 @@ unittest
30323059
unittest
30333060
{
30343061
int line64 = __LINE__;
3035-
string[84] cases64 = // 64 bit code gen
3062+
string[85] cases64 = // 64 bit code gen
30363063
[
3064+
"F8 00 84 5F str xzr,[x2],#8",
30373065
"6F 00 E4 01 movi v1.2d,#0x0",
30383066
"9E AF 00 3E fmov v30.d[1],x1",
30393067
"4E BE 1F C0 mov v0.16b,v30.16b",

compiler/src/dmd/backend/arm/instr.d

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ struct INSTR
10201020
(VR << 26) |
10211021
(opc << 22) |
10221022
(imm9 << 12) |
1023+
(1 << 10) |
10231024
(Rn << 5) |
10241025
Rt;
10251026
}

compiler/src/dmd/backend/x86/cod2.d

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4114,6 +4114,9 @@ void cdmemcpy(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
41144114
@trusted
41154115
void cdmemset(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
41164116
{
4117+
if (cg.AArch64)
4118+
return dmd.backend.arm.cod2.cdmemset(cg, cdb, e, pretregs);
4119+
41174120
regm_t retregs1;
41184121
regm_t retregs3;
41194122
reg_t reg;

0 commit comments

Comments
 (0)