@@ -1163,6 +1163,293 @@ void cdind(ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
11631163 fixresult(cdb,e,retregs,pretregs);
11641164}
11651165
1166+ /* ********************************
1167+ * Generate code for memset(s,value,numbytes) intrinsic.
1168+ * (s OPmemset (numbytes OPparam value))
1169+ */
1170+
1171+ @trusted
1172+ void cdmemset (ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
1173+ {
1174+ // printf("cdmemset(pretregs = %s)\n", regm_str(pretregs));
1175+ elem* e2 = e.E2 ;
1176+ assert (e2.Eoper == OPparam);
1177+
1178+ elem* evalue = e2.E2 ;
1179+ elem* enumbytes = e2.E1 ;
1180+
1181+ const sz = tysize(evalue.Ety);
1182+ if (sz > 1 )
1183+ {
1184+ cdmemsetn(cg, cdb, e, pretregs);
1185+ return ;
1186+ }
1187+
1188+ bool valueIsConst = false ;
1189+ targ_size_t value;
1190+ if (evalue.Eoper == OPconst)
1191+ {
1192+ value = el_tolong(evalue) & 0xFF ;
1193+ value |= value << 8 ;
1194+ value |= value << 16 ;
1195+ value |= value << 32 ;
1196+ valueIsConst = true ;
1197+ }
1198+ else if (evalue.Eoper == OPstrpar) // happens if evalue is a struct of 0 size
1199+ {
1200+ value = 0 ;
1201+ valueIsConst = true ;
1202+ }
1203+ else
1204+ value = 0xDEADBEEF ; // stop annoying false positives that value is not inited
1205+
1206+ // Get nbytes into CX
1207+ regm_t nbytesregs = 0 ;
1208+ if (enumbytes.Eoper != OPconst)
1209+ {
1210+ nbytesregs = cgstate.allregs & ~ pretregs;
1211+ if (! nbytesregs)
1212+ nbytesregs = cgstate.allregs;
1213+ codelem(cgstate,cdb,enumbytes,nbytesregs,false );
1214+ }
1215+
1216+ // Get value into valuereg
1217+ regm_t valueregs;
1218+ reg_t valuereg;
1219+ if (valueIsConst)
1220+ {
1221+ if (value == 0 )
1222+ {
1223+ valueregs = 0 ;
1224+ valuereg = 0x1F ; // xzr
1225+ }
1226+ else
1227+ {
1228+ valueregs = cgstate.allregs & ~ (pretregs | nbytesregs);
1229+ if (! valueregs)
1230+ valueregs = cgstate.allregs & ~ nbytesregs;
1231+ regwithvalue(cdb, valueregs, value, 64 );
1232+ getregs(cdb, valueregs);
1233+ valuereg = findreg(valueregs);
1234+ cgstate.regimmed_set(valuereg, value);
1235+ }
1236+ freenode(evalue);
1237+ }
1238+ else
1239+ {
1240+ scodelem(cgstate,cdb,evalue,valueregs,nbytesregs,false );
1241+
1242+ valuereg = findreg(valueregs);
1243+ getregs(cdb,valueregs);
1244+
1245+ regm_t regm = cgstate.allregs & ~ (valueregs | nbytesregs);
1246+ const r = regwithvalue(cdb,regm,cast (targ_size_t)0x01010101_01010101,64 ); // MOV r,0x01010101_01010101
1247+ cdb.gen2(0x0FAF ,modregrmx(3 ,valuereg,r)); // IMUL valuereg,r
1248+ }
1249+ freenode(e2);
1250+
1251+ // Get destination into dstreg
1252+ regm_t dstregs = cgstate.allregs & ~ (nbytesregs | valueregs);
1253+ scodelem(cgstate,cdb,e.E1 ,dstregs,nbytesregs | valueregs,false );
1254+ reg_t dstreg = findreg(dstregs);
1255+
1256+ regm_t retregs;
1257+ if (pretregs) // if need return value
1258+ {
1259+ retregs = pretregs & ~ (nbytesregs | valueregs | dstregs);
1260+ if (! retregs)
1261+ retregs = cgstate.allregs & ~ (nbytesregs | valueregs | dstregs);
1262+ reg_t retreg = allocreg(cdb,retregs,TYnptr);
1263+ genmovreg(cdb,retreg,dstreg); // MOV retreg,dstreg
1264+ }
1265+
1266+ if (enumbytes.Eoper == OPconst)
1267+ {
1268+ uint numbytes = cast (uint )el_tolong(enumbytes);
1269+ if (const n = numbytes & ~ (REGSIZE - 1 ))
1270+ {
1271+ regm_t limits = cgstate.allregs & ~ (nbytesregs | valueregs | dstregs | retregs);
1272+ reg_t limit = regwithvalue(cdb,limits,n / REGSIZE ,64 ); // MOV limit,#n / REGSIZE
1273+ cdb.gen1(INSTR .addsub_ext(1 ,0 ,0 ,0 ,limit,6 ,3 ,dstreg,limit)); // ADD limit,dstreg,limit,UXTW #3
1274+
1275+ code* cnop = gen1(null , INSTR .nop);
1276+ cdb.append(cnop);
1277+
1278+ cdb.gen1(INSTR .ldst_immpost(3 ,0 ,0 ,8 ,dstreg,valuereg)); // STR valuereg,[dstreg],#8 // *dstreg++ = valuereg
1279+ cdb.gen1(INSTR .cmp_shift(1 ,dstreg,0 ,0 ,limit)); // CMP limit,dstreg
1280+ genBranch(cdb,COND .ne,FL .code,cast (block* )cnop); // JNE cnop
1281+ }
1282+
1283+ auto remainder = numbytes & (REGSIZE - 1 );
1284+ if (remainder >= 4 )
1285+ {
1286+ cdb.gen1(INSTR .ldst_immpost(3 ,0 ,0 ,4 ,dstreg,valuereg)); // STR valuereg,[dstreg],#4 // *dstreg++ = valuereg
1287+ remainder -= 4 ;
1288+ }
1289+ for (; remainder; -- remainder)
1290+ cdb.gen1(INSTR .ldst_immpost(3 ,0 ,0 ,1 ,dstreg,valuereg)); // STR valuereg,[dstreg],#0 // *dstreg++ = valuereg
1291+ fixresult(cdb,e,retregs,pretregs);
1292+ return ;
1293+ }
1294+
1295+ // TODO AArch64
1296+
1297+ getregs(cdb,mDI | mCX);
1298+
1299+ /* MOV sreg,ECX
1300+ SHR ECX,n
1301+ REP
1302+ STOSD/Q
1303+
1304+ ADC ECX,ECX
1305+ REP
1306+ STOSD
1307+
1308+ MOV ECX,sreg
1309+ AND ECX,3
1310+ REP
1311+ STOSB
1312+ */
1313+ regm_t regs = cgstate.allregs & (pretregs ? ~ (mAX|mBX|mCX|mDI) : ~ (mAX|mCX|mDI));
1314+ const sreg = allocreg(cdb,regs,TYint);
1315+ genregs(cdb,0x89 ,CX ,sreg); // MOV sreg,ECX (32 bits only)
1316+
1317+ const n = I64 ? 3 : 2 ;
1318+ cdb.genc2(0xC1 , modregrm (3 ,5 ,CX ), n); // SHR ECX,n
1319+
1320+ cdb.gen1(0xF3 ); // REP
1321+ cdb.gen1(STOS ); // STOSD/Q
1322+ if (I64 )
1323+ code_orrex(cdb.last(), REX_W );
1324+
1325+ if (I64 )
1326+ {
1327+ cdb.gen2(0x11 ,modregrm(3 ,CX ,CX )); // ADC ECX,ECX
1328+ cdb.gen1(0xF3 ); // REP
1329+ cdb.gen1(STOS ); // STOSD
1330+ }
1331+
1332+ genregs(cdb,0x89 ,sreg,CX ); // MOV ECX,sreg (32 bits only)
1333+ cdb.genc2(0x81 , modregrm (3 ,4 ,CX ), 3 ); // AND ECX,3
1334+ cdb.gen1(0xF3 ); // REP
1335+ cdb.gen1(STOSB ); // STOSB
1336+
1337+ cgstate.regimmed_set(CX , 0 ); // CX is now 0
1338+ fixresult(cdb,e,mES|mBX,pretregs);
1339+ }
1340+
1341+ /* **********************************************
1342+ * Do memset for values larger than a byte.
1343+ * Has many similarities to cod4.cdeq().
1344+ * Doesn't work for 16 bit code.
1345+ */
1346+ @trusted
1347+ private void cdmemsetn (ref CGstate cg, ref CodeBuilder cdb,elem* e,ref regm_t pretregs)
1348+ {
1349+ // printf("cdmemsetn(pretregs = %s)\n", regm_str(pretregs));
1350+ elem* e2 = e.E2 ;
1351+ assert (e2.Eoper == OPparam);
1352+
1353+ elem* evalue = e2.E2 ;
1354+ elem* enelems = e2.E1 ;
1355+
1356+ tym_t tymv = tybasic(evalue.Ety);
1357+ const sz = tysize(evalue.Ety);
1358+ assert (cast (int )sz > 1 );
1359+
1360+ if (tyxmmreg(tymv) && config.fpxmmregs)
1361+ assert (0 ); // fix later
1362+ if (tyfloating(tymv) && config.inline8087)
1363+ assert (0 ); // fix later
1364+
1365+ const grex = I64 ? (REX_W << 16 ) : 0 ;
1366+
1367+ // get the count of elems into CX
1368+ regm_t mregcx = mCX;
1369+ codelem(cgstate,cdb,enelems,mregcx,false );
1370+
1371+ // Get value into AX
1372+ regm_t retregs3 = cgstate.allregs & ~ mregcx;
1373+ if (sz == 2 * REGSIZE )
1374+ retregs3 &= ~ (mBP | IDXREGS ); // BP cannot be used for register pair,
1375+ // IDXREGS could deplete index regs - see sdtor.d test14815()
1376+ scodelem(cgstate,cdb,evalue,retregs3,mregcx,false );
1377+
1378+ /* Necessary because if evalue calls a function, and that function never returns,
1379+ * it doesn't affect registers. Which means those registers can be used for enregistering
1380+ * variables, and next pass fails because it can't use those registers, and so cannot
1381+ * allocate registers for retregs3. See ice11596.d
1382+ */
1383+ useregs(retregs3);
1384+
1385+ reg_t valreg = findreg(retregs3);
1386+ reg_t valreghi;
1387+ if (sz == 2 * REGSIZE )
1388+ {
1389+ valreg = findreglsw(retregs3);
1390+ valreghi = findregmsw(retregs3);
1391+ }
1392+
1393+ freenode(e2);
1394+
1395+ // Get s into ES:DI
1396+ regm_t mregidx = IDXREGS & ~ (mregcx | retregs3);
1397+ assert (mregidx);
1398+ tym_t ty1 = tybasic(e.E1 .Ety);
1399+ if (! tyreg(ty1))
1400+ mregidx |= mES;
1401+ scodelem(cgstate,cdb,e.E1 ,mregidx,mregcx | retregs3,false );
1402+ reg_t idxreg = findreg(mregidx);
1403+
1404+ regm_t mregbx = 0 ;
1405+ if (pretregs) // if need return value
1406+ {
1407+ mregbx = pretregs & ~ (mregidx | mregcx | retregs3);
1408+ if (! mregbx)
1409+ mregbx = cgstate.allregs & ~ (mregidx | mregcx | retregs3);
1410+ const regbx = allocreg(cdb, mregbx, TYnptr);
1411+ getregs(cdb, mregbx);
1412+ genmovreg(cdb,regbx,idxreg); // MOV BX,DI
1413+ }
1414+
1415+ getregs(cdb,mask(idxreg) | mCX); // modify DI and CX
1416+
1417+ /* Generate:
1418+ * JCXZ L1
1419+ * L2:
1420+ * MOV [idxreg],AX
1421+ * ADD idxreg,sz
1422+ * LOOP L2
1423+ * L1:
1424+ * NOP
1425+ */
1426+ code* c1 = gennop(null );
1427+ genjmp(cdb, JCXZ , FL .code, cast (block* )c1);
1428+ code cs;
1429+ buildEA(&cs,idxreg,- 1 ,1 ,0 );
1430+ cs.Iop = 0x89 ;
1431+ if (! I16 && sz == 2 )
1432+ cs.Iflags |= CFopsize;
1433+ if (I64 && sz == 8 )
1434+ cs.Irex |= REX_W ;
1435+ code_newreg(&cs, valreg);
1436+ cdb.gen(&cs); // MOV [idxreg],AX
1437+ code* c2 = cdb.last();
1438+ if (sz == REGSIZE * 2 )
1439+ {
1440+ cs.IEV1 .Vuns = REGSIZE ;
1441+ code_newreg(&cs, valreghi);
1442+ cdb.gen(&cs); // MOV REGSIZE[idxreg],DX
1443+ }
1444+ cdb.genc2(0x81 , grex | modregrmx(3 ,0 ,idxreg), sz); // ADD idxreg,sz
1445+ genjmp(cdb, LOOP , FL .code, cast (block* )c2); // LOOP L2
1446+ cdb.append(c1);
1447+
1448+ cgstate.regimmed_set(CX , 0 ); // CX is now 0
1449+
1450+ fixresult(cdb,e,mregbx,pretregs);
1451+ }
1452+
11661453/* *********************
11671454 * Do structure assignments.
11681455 * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2).
0 commit comments