Skip to content

Commit 383b5f6

Browse files
committed
Allocation fix for horizontal PIM
1 parent 57a6437 commit 383b5f6

3 files changed

Lines changed: 189 additions & 24 deletions

File tree

libpimeval/src/pimResMgr.cpp

Lines changed: 90 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -515,20 +515,21 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
515515
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
516516
}
517517
} else if (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) {
518-
if (bitsPerElement > bitsPerElementAssoc) {
518+
if ((bitsPerElement > bitsPerElementAssoc) && (m_device->getSimTarget() != PIM_DEVICE_BANK_LEVEL && m_device->getSimTarget() != PIM_DEVICE_FULCRUM)) {
519519
printf("PIM-Error: pimAllocAssociated: New object data type %s (%u bits) is wider than associated object (%u bits), which is not supported in H layout\n",
520-
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
520+
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
521521
return -1;
522522
} else if (bitsPerElement < bitsPerElementAssoc) {
523523
if (m_debugAlloc) {
524524
printf("PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is padded to associated object (%u bits) in H layout\n",
525-
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
525+
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
526526
}
527527
bitsPerElement = bitsPerElementAssoc; // padding
528528
} else {
529+
// same bit width, no padding needed
529530
if (m_debugAlloc) {
530531
printf("PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is associated with object (%u bits) in H layout\n",
531-
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
532+
pimUtils::pimDataTypeEnumToStr(dataType).c_str(), bitsPerElement, bitsPerElementAssoc);
532533
}
533534
}
534535
} else {
@@ -541,31 +542,96 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
541542
pimObjInfo newObj(m_availObjId, dataType, allocType, numElements, bitsPerElement, m_device);
542543
m_availObjId++;
543544

545+
unsigned numCols = m_device->getNumCols();
546+
uint64_t numRegions = 0;
547+
unsigned numColsToAllocLast = 0;
548+
uint64_t numElemPerRegion = 0;
549+
uint64_t numElemPerRegionLast = 0;
550+
unsigned numColsPerElem = 0;
551+
552+
if ((allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (bitsPerElement > bitsPerElementAssoc) && (m_device->getSimTarget() == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget() == PIM_DEVICE_FULCRUM)) {
553+
// allocate one region per core, with horizontal layout
554+
numRegions = (numElements * bitsPerElement - 1) / numCols + 1;
555+
556+
// This is a controversial design decision. I am not fully sold on this
557+
// TODO: discuss with professor before implementing the `non-controversial` design
558+
if (numRegions > assocObj.getRegions().size()) {
559+
printf("PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than associated object (%lu)\n",
560+
pimUtils::pimAllocEnumToStr(allocType).c_str(), numRegions, assocObj.getRegions().size());
561+
return -1;
562+
}
563+
564+
if (numRegions > numCores) {
565+
printf("PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than number of cores (%u)\n",
566+
pimUtils::pimAllocEnumToStr(allocType).c_str(), numRegions, numCores);
567+
return -1;
568+
}
569+
570+
numColsToAllocLast = (numElements * bitsPerElement) % numCols;
571+
if (numColsToAllocLast == 0) {
572+
numColsToAllocLast = numCols;
573+
}
574+
numElemPerRegion = numCols / bitsPerElement;
575+
numElemPerRegionLast = numColsToAllocLast / bitsPerElement;
576+
numColsPerElem = bitsPerElement;
577+
}
578+
544579
bool success = true;
545580
for (unsigned i = 0; i < numCores; ++i) {
546581
m_coreUsage.at(i)->newAllocStart();
547582
}
548-
for ( const pimRegion& region : assocObj.getRegions()) {
549-
PimCoreId coreId = region.getCoreId();
550-
unsigned numAllocRows = region.getNumAllocRows();
551-
unsigned numAllocCols = region.getNumAllocCols();
552-
if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
553-
numAllocRows = bitsPerElement;
554-
}
555-
pimRegion newRegion = findAvailRegionOnCore(coreId, numAllocRows, numAllocCols);
556-
if (!newRegion.isValid()) {
557-
printf("PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n");
558-
success = false;
559-
break;
583+
584+
unsigned regionIdx = 0;
585+
uint64_t elemIdx = 0;
586+
for (const pimRegion& region : assocObj.getRegions()) {
587+
if ((bitsPerElement > bitsPerElementAssoc) && (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (m_device->getSimTarget() == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget() == PIM_DEVICE_FULCRUM)) {
588+
PimCoreId coreId = region.getCoreId();
589+
unsigned numAllocRows = region.getNumAllocRows() * bitsPerElement / bitsPerElementAssoc;
590+
unsigned numAllocCols = (regionIdx == numRegions - 1 ? numColsToAllocLast : numCols);
591+
pimRegion newRegion = findAvailRegionOnCore(coreId, numAllocRows, numAllocCols);
592+
if (!newRegion.isValid()) {
593+
printf("PIM-Error: pimAlloc: Failed: Out of PIM memory\n");
594+
success = false;
595+
break;
596+
}
597+
newRegion.setElemIdxBegin(elemIdx);
598+
elemIdx += (regionIdx == numRegions - 1 ? numElemPerRegionLast : numElemPerRegion);
599+
if (elemIdx != region.getElemIdxEnd()) {
600+
printf("PIM-Error: pimAllocAssociated: Mismatch in element index range: %lu vs %lu\n",
601+
elemIdx, region.getElemIdxEnd());
602+
success = false;
603+
break;
604+
}
605+
newRegion.setElemIdxEnd(region.getElemIdxEnd()); // exclusive
606+
newRegion.setNumColsPerElem(numColsPerElem);
607+
newObj.addRegion(newRegion);
608+
609+
// add to core usage map
610+
auto alloc = std::make_pair(newRegion.getRowIdx(), numAllocRows);
611+
m_coreUsage.at(coreId)->addRange(alloc, newObj.getObjId());
612+
} else {
613+
PimCoreId coreId = region.getCoreId();
614+
unsigned numAllocRows = region.getNumAllocRows();
615+
unsigned numAllocCols = region.getNumAllocCols();
616+
if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
617+
numAllocRows = bitsPerElement;
618+
}
619+
pimRegion newRegion = findAvailRegionOnCore(coreId, numAllocRows, numAllocCols);
620+
if (!newRegion.isValid()) {
621+
printf("PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n");
622+
success = false;
623+
break;
624+
}
625+
newRegion.setElemIdxBegin(region.getElemIdxBegin());
626+
newRegion.setElemIdxEnd(region.getElemIdxEnd()); // exclusive
627+
newRegion.setNumColsPerElem(region.getNumColsPerElem());
628+
newObj.addRegion(newRegion);
629+
630+
// add to core usage map
631+
auto alloc = std::make_pair(newRegion.getRowIdx(), numAllocRows);
632+
m_coreUsage.at(coreId)->addRange(alloc, newObj.getObjId());
560633
}
561-
newRegion.setElemIdxBegin(region.getElemIdxBegin());
562-
newRegion.setElemIdxEnd(region.getElemIdxEnd()); // exclusive
563-
newRegion.setNumColsPerElem(region.getNumColsPerElem());
564-
newObj.addRegion(newRegion);
565-
566-
// add to core usage map
567-
auto alloc = std::make_pair(newRegion.getRowIdx(), numAllocRows);
568-
m_coreUsage.at(coreId)->addRange(alloc, newObj.getObjId());
634+
regionIdx++;
569635
}
570636
for (unsigned i = 0; i < numCores; ++i) {
571637
m_coreUsage.at(i)->newAllocEnd(success); // rollback if failed

tests/test-upcasting/Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Makefile: Test Allocation Upcasting
2+
# Copyright (c) 2024 University of Virginia
3+
# This file is licensed under the MIT License.
4+
# See the LICENSE file in the root of this repository for more details.
5+
6+
PROJ_ROOT = ../..
7+
include ${PROJ_ROOT}/Makefile.common
8+
9+
EXEC := test-upcasting.out
10+
SRC := test-upcasting.cpp
11+
12+
debug perf dramsim3_integ: $(EXEC)
13+
14+
$(EXEC): $(SRC) $(DEPS)
15+
$(CXX) $< $(CXXFLAGS) -o $@
16+
17+
clean:
18+
rm -rf $(EXEC) *.dSYM
19+
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// Test: Test PIM API Fusion
2+
// Copyright (c) 2024 University of Virginia
3+
// This file is licensed under the MIT License.
4+
// See the LICENSE file in the root of this repository for more details.
5+
6+
#include "libpimeval.h"
7+
#include <iostream>
8+
#include <vector>
9+
#include <algorithm>
10+
#include <bitset>
11+
#include <cassert>
12+
#include <cstdlib>
13+
#include <cstdio>
14+
15+
16+
bool testFused(PimDeviceEnum deviceType)
17+
{
18+
// 1GB capacity
19+
unsigned numRanks = 1;
20+
unsigned numBankPerRank = 1;
21+
unsigned numSubarrayPerBank = 8;
22+
unsigned numRows = 1024;
23+
unsigned numCols = 8192;
24+
25+
uint64_t numElements = 48;
26+
27+
std::vector<int16_t> src(numElements);
28+
std::vector<int8_t> dest(numElements);
29+
30+
for (uint64_t i = 0; i < numElements; ++i) {
31+
src[i] = static_cast<int16_t>(i);
32+
}
33+
34+
PimStatus status = pimCreateDevice(deviceType, numRanks, numBankPerRank, numSubarrayPerBank, numRows, numCols);
35+
assert(status == PIM_OK);
36+
37+
bool ok = true;
38+
// Emitting Allocations
39+
PimObjId fuse_root = pimAlloc(PIM_ALLOC_AUTO, numElements, PIM_INT8);
40+
PimObjId fuse_expr_0 = pimAllocAssociated(fuse_root, PIM_INT16);
41+
// Emitting Copy Host to Device
42+
pimCopyHostToDevice((void*)src.data(), fuse_expr_0, 0UL, 0UL);
43+
// Creating PIM Fused Program
44+
pimConvertType(fuse_expr_0 , fuse_root);
45+
// Emitting Copy Device to Host
46+
pimCopyDeviceToHost(fuse_root,(void*)dest.data(), 0UL, 0UL);
47+
48+
// Emitting Deallocations
49+
pimFree(fuse_root);
50+
pimFree(fuse_expr_0);
51+
pimShowStats();
52+
pimResetStats();
53+
pimDeleteDevice();
54+
55+
// Verifying results
56+
for (uint64_t i = 0; i < numElements; ++i) {
57+
if (dest[i] != static_cast<int8_t>(src[i])) {
58+
std::cout << "Mismatch at index " << i << ": expected " << static_cast<int8_t>(src[i]) << ", got " << static_cast<int>(dest[i]) << std::endl;
59+
ok = false;
60+
break;
61+
}
62+
}
63+
64+
std::cout << "Fused Test " << (ok ? "PASSED" : "FAILED") << std::endl;
65+
return ok;
66+
}
67+
68+
int main()
69+
{
70+
std::cout << "PIM Regression Test: PIM fused operations" << std::endl;
71+
72+
bool ok = true;
73+
ok &= testFused(PIM_DEVICE_BITSIMD_V);
74+
ok &= testFused(PIM_DEVICE_FULCRUM);
75+
ok &= testFused(PIM_DEVICE_BANK_LEVEL);
76+
77+
std::cout << (ok ? "PASSED" : "FAILED") << std::endl;
78+
return 0;
79+
}
80+

0 commit comments

Comments
 (0)