Skip to content

Commit 5f1056b

Browse files
committed
Optimise EXRCore Deep pixel unpacking
The functions for doing Deep pixel unpacking, had two branch checks (switch statements) for checking the pixel and requested data type within the inner most loop that goes over the pixels of each line. This is definitely unnecessary, since the data types remain the same. However the compiler is not able to optimise this, and the generated assembly for the pixel for loop is massive since it contains all these branch checks. This commit fixes this issue by moving the two switch statements outside of the pixel for loop, which makes the compiler able to generate much more efficient assembly for the pixel unpacking operations. Signed-off-by: Nikolaos Koutsikos <nikolaos.koutsikos@foundry.com>
1 parent ffda353 commit 5f1056b

1 file changed

Lines changed: 275 additions & 19 deletions

File tree

src/lib/OpenEXRCore/unpack.c

Lines changed: 275 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,19 +1341,163 @@ generic_unpack_deep_pointers (exr_decode_pipeline_t* decode)
13411341
(((size_t) decc->user_line_stride) / sizeof (void*));
13421342
pixstride = ((size_t) decc->user_pixel_stride) / sizeof (void*);
13431343

1344-
for (int x = 0; x < w; ++x)
1345-
{
1346-
void* outpix = *pdata;
1347-
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1348-
1349-
pdata += pixstride;
1350-
if (outpix)
1351-
{
1352-
uint8_t* cdata = outpix;
13531344

1354-
UNPACK_SAMPLES (samps)
1355-
}
1356-
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1345+
switch (decc->data_type)
1346+
{
1347+
case EXR_PIXEL_HALF:
1348+
switch (decc->user_data_type)
1349+
{
1350+
case EXR_PIXEL_HALF: {
1351+
for (int x = 0; x < w; ++x)
1352+
{
1353+
void* outpix = *pdata;
1354+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1355+
pdata += pixstride;
1356+
if (outpix)
1357+
{
1358+
uint8_t* cdata = outpix;
1359+
UNPACK_HALF_TO_HALF_SAMPLES(samps)
1360+
}
1361+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1362+
}
1363+
break;
1364+
}
1365+
case EXR_PIXEL_FLOAT: {
1366+
for (int x = 0; x < w; ++x)
1367+
{
1368+
void* outpix = *pdata;
1369+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1370+
pdata += pixstride;
1371+
if (outpix)
1372+
{
1373+
uint8_t* cdata = outpix;
1374+
UNPACK_HALF_TO_FLOAT_SAMPLES(samps)
1375+
}
1376+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1377+
}
1378+
break;
1379+
}
1380+
case EXR_PIXEL_UINT: {
1381+
for (int x = 0; x < w; ++x)
1382+
{
1383+
void* outpix = *pdata;
1384+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1385+
pdata += pixstride;
1386+
if (outpix)
1387+
{
1388+
uint8_t* cdata = outpix;
1389+
UNPACK_HALF_TO_UINT_SAMPLES(samps)
1390+
}
1391+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1392+
}
1393+
break;
1394+
}
1395+
default: return EXR_ERR_INVALID_ARGUMENT;
1396+
}
1397+
break;
1398+
case EXR_PIXEL_FLOAT:
1399+
switch (decc->user_data_type)
1400+
{
1401+
case EXR_PIXEL_HALF: {
1402+
for (int x = 0; x < w; ++x)
1403+
{
1404+
void* outpix = *pdata;
1405+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1406+
pdata += pixstride;
1407+
if (outpix)
1408+
{
1409+
uint8_t* cdata = outpix;
1410+
UNPACK_FLOAT_TO_HALF_SAMPLES(samps)
1411+
}
1412+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1413+
}
1414+
break;
1415+
}
1416+
case EXR_PIXEL_FLOAT: {
1417+
for (int x = 0; x < w; ++x)
1418+
{
1419+
void* outpix = *pdata;
1420+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1421+
pdata += pixstride;
1422+
if (outpix)
1423+
{
1424+
uint8_t* cdata = outpix;
1425+
UNPACK_FLOAT_TO_FLOAT_SAMPLES(samps)
1426+
}
1427+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1428+
}
1429+
break;
1430+
}
1431+
case EXR_PIXEL_UINT: {
1432+
for (int x = 0; x < w; ++x)
1433+
{
1434+
void* outpix = *pdata;
1435+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1436+
pdata += pixstride;
1437+
if (outpix)
1438+
{
1439+
uint8_t* cdata = outpix;
1440+
UNPACK_FLOAT_TO_UINT_SAMPLES(samps)
1441+
}
1442+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1443+
}
1444+
break;
1445+
}
1446+
default: return EXR_ERR_INVALID_ARGUMENT;
1447+
}
1448+
break;
1449+
case EXR_PIXEL_UINT:
1450+
switch (decc->user_data_type)
1451+
{
1452+
case EXR_PIXEL_HALF: {
1453+
for (int x = 0; x < w; ++x)
1454+
{
1455+
void* outpix = *pdata;
1456+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1457+
pdata += pixstride;
1458+
if (outpix)
1459+
{
1460+
uint8_t* cdata = outpix;
1461+
UNPACK_UINT_TO_HALF_SAMPLES(samps)
1462+
}
1463+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1464+
}
1465+
break;
1466+
}
1467+
case EXR_PIXEL_FLOAT: {
1468+
for (int x = 0; x < w; ++x)
1469+
{
1470+
void* outpix = *pdata;
1471+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1472+
pdata += pixstride;
1473+
if (outpix)
1474+
{
1475+
uint8_t* cdata = outpix;
1476+
UNPACK_UINT_TO_FLOAT_SAMPLES(samps)
1477+
}
1478+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1479+
}
1480+
break;
1481+
}
1482+
case EXR_PIXEL_UINT: {
1483+
for (int x = 0; x < w; ++x)
1484+
{
1485+
void* outpix = *pdata;
1486+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1487+
pdata += pixstride;
1488+
if (outpix)
1489+
{
1490+
uint8_t* cdata = outpix;
1491+
UNPACK_UINT_TO_UINT_SAMPLES(samps)
1492+
}
1493+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1494+
}
1495+
break;
1496+
}
1497+
default: return EXR_ERR_INVALID_ARGUMENT;
1498+
}
1499+
break;
1500+
default: return EXR_ERR_INVALID_ARGUMENT;
13571501
}
13581502
}
13591503
sampbuffer += w;
@@ -1410,14 +1554,126 @@ generic_unpack_deep (exr_decode_pipeline_t* decode)
14101554

14111555
cdata += totsamps * ((size_t) ubpc);
14121556

1413-
for (int x = 0; x < w; ++x)
1557+
switch (decc->data_type)
14141558
{
1415-
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1416-
1417-
UNPACK_SAMPLES (samps)
1418-
1419-
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1420-
if (incr_tot) totsamps += (size_t) samps;
1559+
case EXR_PIXEL_HALF:
1560+
switch (decc->user_data_type)
1561+
{
1562+
case EXR_PIXEL_HALF: {
1563+
for (int x = 0; x < w; ++x)
1564+
{
1565+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1566+
UNPACK_HALF_TO_HALF_SAMPLES(samps)
1567+
1568+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1569+
if (incr_tot) totsamps += (size_t) samps;
1570+
}
1571+
break;
1572+
}
1573+
case EXR_PIXEL_FLOAT: {
1574+
for (int x = 0; x < w; ++x)
1575+
{
1576+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1577+
UNPACK_HALF_TO_FLOAT_SAMPLES(samps)
1578+
1579+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1580+
if (incr_tot) totsamps += (size_t) samps;
1581+
}
1582+
break;
1583+
}
1584+
case EXR_PIXEL_UINT: {
1585+
for (int x = 0; x < w; ++x)
1586+
{
1587+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1588+
UNPACK_HALF_TO_UINT_SAMPLES(samps)
1589+
1590+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1591+
if (incr_tot) totsamps += (size_t) samps;
1592+
}
1593+
break;
1594+
}
1595+
default: return EXR_ERR_INVALID_ARGUMENT;
1596+
}
1597+
break;
1598+
case EXR_PIXEL_FLOAT:
1599+
switch (decc->user_data_type)
1600+
{
1601+
case EXR_PIXEL_HALF: {
1602+
for (int x = 0; x < w; ++x)
1603+
{
1604+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1605+
UNPACK_FLOAT_TO_HALF_SAMPLES(samps)
1606+
1607+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1608+
if (incr_tot) totsamps += (size_t) samps;
1609+
}
1610+
break;
1611+
}
1612+
case EXR_PIXEL_FLOAT: {
1613+
for (int x = 0; x < w; ++x)
1614+
{
1615+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1616+
UNPACK_FLOAT_TO_FLOAT_SAMPLES(samps)
1617+
1618+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1619+
if (incr_tot) totsamps += (size_t) samps;
1620+
}
1621+
break;
1622+
}
1623+
case EXR_PIXEL_UINT: {
1624+
for (int x = 0; x < w; ++x)
1625+
{
1626+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1627+
UNPACK_FLOAT_TO_UINT_SAMPLES(samps)
1628+
1629+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1630+
if (incr_tot) totsamps += (size_t) samps;
1631+
}
1632+
break;
1633+
}
1634+
default: return EXR_ERR_INVALID_ARGUMENT;
1635+
}
1636+
break;
1637+
case EXR_PIXEL_UINT:
1638+
switch (decc->user_data_type)
1639+
{
1640+
case EXR_PIXEL_HALF: {
1641+
for (int x = 0; x < w; ++x)
1642+
{
1643+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1644+
UNPACK_UINT_TO_HALF_SAMPLES(samps)
1645+
1646+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1647+
if (incr_tot) totsamps += (size_t) samps;
1648+
}
1649+
break;
1650+
}
1651+
case EXR_PIXEL_FLOAT: {
1652+
for (int x = 0; x < w; ++x)
1653+
{
1654+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1655+
UNPACK_UINT_TO_FLOAT_SAMPLES(samps)
1656+
1657+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1658+
if (incr_tot) totsamps += (size_t) samps;
1659+
}
1660+
break;
1661+
}
1662+
case EXR_PIXEL_UINT: {
1663+
for (int x = 0; x < w; ++x)
1664+
{
1665+
PREPARE_SAMPLES (sampbuffer, prevsamps, decode)
1666+
UNPACK_UINT_TO_UINT_SAMPLES(samps)
1667+
1668+
srcbuffer += ((size_t) bpc) * ((size_t) samps);
1669+
if (incr_tot) totsamps += (size_t) samps;
1670+
}
1671+
break;
1672+
}
1673+
default: return EXR_ERR_INVALID_ARGUMENT;
1674+
}
1675+
break;
1676+
default: return EXR_ERR_INVALID_ARGUMENT;
14211677
}
14221678
}
14231679
sampbuffer += w;

0 commit comments

Comments
 (0)