Skip to content

Commit 40ce162

Browse files
authored
Bank-level and Fulcrum Upcasting (#315)
2 parents 57a6437 + 196b8bd commit 40ce162

3 files changed

Lines changed: 192 additions & 24 deletions

File tree

libpimeval/src/pimResMgr.cpp

Lines changed: 93 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -515,20 +515,21 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
515515
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
516516
}
517517
} else if (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) {
518-
if (bitsPerElement > bitsPerElementAssoc) {
518+
if ((bitsPerElement > bitsPerElementAssoc) && (m_device->getSimTarget() != PIM_DEVICE_BANK_LEVEL && m_device->getSimTarget() != PIM_DEVICE_FULCRUM)) {
519519
printf("PIM-Error: pimAllocAssociated: New object data type %s (%u bits) is wider than associated object (%u bits), which is not supported in H layout\n",
520-
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
520+
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
521521
return -1;
522522
} else if (bitsPerElement < bitsPerElementAssoc) {
523523
if (m_debugAlloc) {
524524
printf("PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is padded to associated object (%u bits) in H layout\n",
525-
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
525+
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
526526
}
527527
bitsPerElement = bitsPerElementAssoc; // padding
528528
} else {
529+
// same bit width, no padding needed
529530
if (m_debugAlloc) {
530531
printf("PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is associated with object (%u bits) in H layout\n",
531-
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
532+
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
532533
}
533534
}
534535
} else {
@@ -541,31 +542,99 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
541542
pimObjInfo newObj(m_availObjId, dataType, allocType, numElements, bitsPerElement, m_device);
542543
m_availObjId++;
543544

545+
unsigned numCols = m_device->getNumCols();
546+
uint64_t numRegions = 0;
547+
unsigned numColsToAllocLast = 0;
548+
uint64_t numElemPerRegion = 0;
549+
uint64_t numElemPerRegionLast = 0;
550+
unsigned numColsPerElem = 0;
551+
552+
// The reason other horizontal bit-parallel (AiM, Aquabolt) PIM is not included in this condition is that
553+
// they support only 16-bit floats/ints.
554+
// If more bit-parallel PIMs are added, this condition should be extended.
555+
if ((allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (bitsPerElement > bitsPerElementAssoc) && (m_device->getSimTarget() == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget() == PIM_DEVICE_FULCRUM)) {
556+
// allocate one region per core, with horizontal layout
557+
numRegions = (numElements * bitsPerElement - 1) / numCols + 1;
558+
559+
// This is a controversial design decision. I am not fully sold on this
560+
// TODO: discuss with professor before implementing the `non-controversial` design
561+
if (numRegions > assocObj.getRegions().size()) {
562+
printf("PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than associated object (%lu)\n",
563+
pimUtils::pimAllocEnumToStr(allocType).c_str(), numRegions, assocObj.getRegions().size());
564+
return -1;
565+
}
566+
567+
if (numRegions > numCores) {
568+
printf("PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than number of cores (%u)\n",
569+
pimUtils::pimAllocEnumToStr(allocType).c_str(), numRegions, numCores);
570+
return -1;
571+
}
572+
573+
numColsToAllocLast = (numElements * bitsPerElement) % numCols;
574+
if (numColsToAllocLast == 0) {
575+
numColsToAllocLast = numCols;
576+
}
577+
numElemPerRegion = numCols / bitsPerElement;
578+
numElemPerRegionLast = numColsToAllocLast / bitsPerElement;
579+
numColsPerElem = bitsPerElement;
580+
}
581+
544582
bool success = true;
545583
for (unsigned i = 0; i < numCores; ++i) {
546584
m_coreUsage.at(i)->newAllocStart();
547585
}
548-
for ( const pimRegion& region : assocObj.getRegions()) {
549-
PimCoreId coreId = region.getCoreId();
550-
unsigned numAllocRows = region.getNumAllocRows();
551-
unsigned numAllocCols = region.getNumAllocCols();
552-
if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
553-
numAllocRows = bitsPerElement;
554-
}
555-
pimRegion newRegion = findAvailRegionOnCore(coreId, numAllocRows, numAllocCols);
556-
if (!newRegion.isValid()) {
557-
printf("PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n");
558-
success = false;
559-
break;
586+
587+
unsigned regionIdx = 0;
588+
uint64_t elemIdx = 0;
589+
for (const pimRegion& region : assocObj.getRegions()) {
590+
if ((bitsPerElement > bitsPerElementAssoc) && (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (m_device->getSimTarget() == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget() == PIM_DEVICE_FULCRUM)) {
591+
PimCoreId coreId = region.getCoreId();
592+
unsigned numAllocRows = region.getNumAllocRows() * bitsPerElement / bitsPerElementAssoc;
593+
unsigned numAllocCols = (regionIdx == numRegions - 1 ? numColsToAllocLast : numCols);
594+
pimRegion newRegion = findAvailRegionOnCore(coreId, numAllocRows, numAllocCols);
595+
if (!newRegion.isValid()) {
596+
printf("PIM-Error: pimAlloc: Failed: Out of PIM memory\n");
597+
success = false;
598+
break;
599+
}
600+
newRegion.setElemIdxBegin(elemIdx);
601+
elemIdx += (regionIdx == numRegions - 1 ? numElemPerRegionLast : numElemPerRegion);
602+
if (elemIdx != region.getElemIdxEnd()) {
603+
printf("PIM-Error: pimAllocAssociated: Mismatch in element index range: %lu vs %lu\n",
604+
elemIdx, region.getElemIdxEnd());
605+
success = false;
606+
break;
607+
}
608+
newRegion.setElemIdxEnd(region.getElemIdxEnd()); // exclusive
609+
newRegion.setNumColsPerElem(numColsPerElem);
610+
newObj.addRegion(newRegion);
611+
612+
// add to core usage map
613+
auto alloc = std::make_pair(newRegion.getRowIdx(), numAllocRows);
614+
m_coreUsage.at(coreId)->addRange(alloc, newObj.getObjId());
615+
} else {
616+
PimCoreId coreId = region.getCoreId();
617+
unsigned numAllocRows = region.getNumAllocRows();
618+
unsigned numAllocCols = region.getNumAllocCols();
619+
if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
620+
numAllocRows = bitsPerElement;
621+
}
622+
pimRegion newRegion = findAvailRegionOnCore(coreId, numAllocRows, numAllocCols);
623+
if (!newRegion.isValid()) {
624+
printf("PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n");
625+
success = false;
626+
break;
627+
}
628+
newRegion.setElemIdxBegin(region.getElemIdxBegin());
629+
newRegion.setElemIdxEnd(region.getElemIdxEnd()); // exclusive
630+
newRegion.setNumColsPerElem(region.getNumColsPerElem());
631+
newObj.addRegion(newRegion);
632+
633+
// add to core usage map
634+
auto alloc = std::make_pair(newRegion.getRowIdx(), numAllocRows);
635+
m_coreUsage.at(coreId)->addRange(alloc, newObj.getObjId());
560636
}
561-
newRegion.setElemIdxBegin(region.getElemIdxBegin());
562-
newRegion.setElemIdxEnd(region.getElemIdxEnd()); // exclusive
563-
newRegion.setNumColsPerElem(region.getNumColsPerElem());
564-
newObj.addRegion(newRegion);
565-
566-
// add to core usage map
567-
auto alloc = std::make_pair(newRegion.getRowIdx(), numAllocRows);
568-
m_coreUsage.at(coreId)->addRange(alloc, newObj.getObjId());
637+
regionIdx++;
569638
}
570639
for (unsigned i = 0; i < numCores; ++i) {
571640
m_coreUsage.at(i)->newAllocEnd(success); // rollback if failed

tests/test-upcasting/Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Makefile: Test Allocation Upcasting
2+
# Copyright (c) 2024 University of Virginia
3+
# This file is licensed under the MIT License.
4+
# See the LICENSE file in the root of this repository for more details.
5+
6+
PROJ_ROOT = ../..
7+
include ${PROJ_ROOT}/Makefile.common
8+
9+
EXEC := test-upcasting.out
10+
SRC := test-upcasting.cpp
11+
12+
debug perf dramsim3_integ: $(EXEC)
13+
14+
$(EXEC): $(SRC) $(DEPS)
15+
$(CXX) $< $(CXXFLAGS) -o $@
16+
17+
clean:
18+
rm -rf $(EXEC) *.dSYM
19+
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// Test: Test PIM API Fusion
2+
// Copyright (c) 2024 University of Virginia
3+
// This file is licensed under the MIT License.
4+
// See the LICENSE file in the root of this repository for more details.
5+
6+
#include "libpimeval.h"
7+
#include <iostream>
8+
#include <vector>
9+
#include <algorithm>
10+
#include <bitset>
11+
#include <cassert>
12+
#include <cstdlib>
13+
#include <cstdio>
14+
15+
16+
bool testFused(PimDeviceEnum deviceType)
17+
{
18+
// 1GB capacity
19+
unsigned numRanks = 1;
20+
unsigned numBankPerRank = 1;
21+
unsigned numSubarrayPerBank = 8;
22+
unsigned numRows = 1024;
23+
unsigned numCols = 8192;
24+
25+
uint64_t numElements = 48;
26+
27+
std::vector<int16_t> src(numElements);
28+
std::vector<int8_t> dest(numElements);
29+
30+
for (uint64_t i = 0; i < numElements; ++i) {
31+
src[i] = static_cast<int16_t>(i);
32+
}
33+
34+
PimStatus status = pimCreateDevice(deviceType, numRanks, numBankPerRank, numSubarrayPerBank, numRows, numCols);
35+
assert(status == PIM_OK);
36+
37+
bool ok = true;
38+
// Emitting Allocations
39+
PimObjId fuse_root = pimAlloc(PIM_ALLOC_AUTO, numElements, PIM_INT8);
40+
PimObjId fuse_expr_0 = pimAllocAssociated(fuse_root, PIM_INT16);
41+
// Emitting Copy Host to Device
42+
pimCopyHostToDevice((void*)src.data(), fuse_expr_0, 0UL, 0UL);
43+
// Creating PIM Fused Program
44+
pimConvertType(fuse_expr_0 , fuse_root);
45+
// Emitting Copy Device to Host
46+
pimCopyDeviceToHost(fuse_root,(void*)dest.data(), 0UL, 0UL);
47+
48+
// Emitting Deallocations
49+
pimFree(fuse_root);
50+
pimFree(fuse_expr_0);
51+
pimShowStats();
52+
pimResetStats();
53+
pimDeleteDevice();
54+
55+
// Verifying results
56+
for (uint64_t i = 0; i < numElements; ++i) {
57+
if (dest[i] != static_cast<int8_t>(src[i])) {
58+
std::cout << "Mismatch at index " << i << ": expected " << static_cast<int8_t>(src[i]) << ", got " << static_cast<int>(dest[i]) << std::endl;
59+
ok = false;
60+
break;
61+
}
62+
}
63+
64+
std::cout << "Fused Test " << (ok ? "PASSED" : "FAILED") << std::endl;
65+
return ok;
66+
}
67+
68+
int main()
69+
{
70+
std::cout << "PIM Regression Test: PIM fused operations" << std::endl;
71+
72+
bool ok = true;
73+
ok &= testFused(PIM_DEVICE_BITSIMD_V);
74+
ok &= testFused(PIM_DEVICE_FULCRUM);
75+
ok &= testFused(PIM_DEVICE_BANK_LEVEL);
76+
77+
std::cout << (ok ? "PASSED" : "FAILED") << std::endl;
78+
return 0;
79+
}
80+

0 commit comments

Comments
 (0)