@@ -515,20 +515,21 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
515515 pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
516516 }
517517 } else if (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) {
518- if (bitsPerElement > bitsPerElementAssoc) {
518+ if (( bitsPerElement > bitsPerElementAssoc) && (m_device-> getSimTarget () != PIM_DEVICE_BANK_LEVEL && m_device-> getSimTarget () != PIM_DEVICE_FULCRUM) ) {
519519 printf (" PIM-Error: pimAllocAssociated: New object data type %s (%u bits) is wider than associated object (%u bits), which is not supported in H layout\n " ,
520- pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
520+ pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
521521 return -1 ;
522522 } else if (bitsPerElement < bitsPerElementAssoc) {
523523 if (m_debugAlloc) {
524524 printf (" PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is padded to associated object (%u bits) in H layout\n " ,
525- pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
525+ pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
526526 }
527527 bitsPerElement = bitsPerElementAssoc; // padding
528528 } else {
529+ // same bit width, no padding needed
529530 if (m_debugAlloc) {
530531 printf (" PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is associated with object (%u bits) in H layout\n " ,
531- pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
532+ pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
532533 }
533534 }
534535 } else {
@@ -541,31 +542,99 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
541542 pimObjInfo newObj (m_availObjId, dataType, allocType, numElements, bitsPerElement, m_device);
542543 m_availObjId++;
543544
545+ unsigned numCols = m_device->getNumCols ();
546+ uint64_t numRegions = 0 ;
547+ unsigned numColsToAllocLast = 0 ;
548+ uint64_t numElemPerRegion = 0 ;
549+ uint64_t numElemPerRegionLast = 0 ;
550+ unsigned numColsPerElem = 0 ;
551+
552+ // The reason other horizontal bit-parallel (AiM, Aquabolt) PIM is not included in this condition is that
553+ // they support only 16-bit floats/ints.
554+ // If more bit-parallel PIMs are added, this condition should be extended.
555+ if ((allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (bitsPerElement > bitsPerElementAssoc) && (m_device->getSimTarget () == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget () == PIM_DEVICE_FULCRUM)) {
556+ // allocate one region per core, with horizontal layout
557+ numRegions = (numElements * bitsPerElement - 1 ) / numCols + 1 ;
558+
559+ // This is a controversial design decision. I am not fully sold on this
560+ // TODO: discuss with professor before implementing the `non-controversial` design
561+ if (numRegions > assocObj.getRegions ().size ()) {
562+ printf (" PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than associated object (%lu)\n " ,
563+ pimUtils::pimAllocEnumToStr (allocType).c_str (), numRegions, assocObj.getRegions ().size ());
564+ return -1 ;
565+ }
566+
567+ if (numRegions > numCores) {
568+ printf (" PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than number of cores (%u)\n " ,
569+ pimUtils::pimAllocEnumToStr (allocType).c_str (), numRegions, numCores);
570+ return -1 ;
571+ }
572+
573+ numColsToAllocLast = (numElements * bitsPerElement) % numCols;
574+ if (numColsToAllocLast == 0 ) {
575+ numColsToAllocLast = numCols;
576+ }
577+ numElemPerRegion = numCols / bitsPerElement;
578+ numElemPerRegionLast = numColsToAllocLast / bitsPerElement;
579+ numColsPerElem = bitsPerElement;
580+ }
581+
544582 bool success = true ;
545583 for (unsigned i = 0 ; i < numCores; ++i) {
546584 m_coreUsage.at (i)->newAllocStart ();
547585 }
548- for ( const pimRegion& region : assocObj.getRegions ()) {
549- PimCoreId coreId = region.getCoreId ();
550- unsigned numAllocRows = region.getNumAllocRows ();
551- unsigned numAllocCols = region.getNumAllocCols ();
552- if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
553- numAllocRows = bitsPerElement;
554- }
555- pimRegion newRegion = findAvailRegionOnCore (coreId, numAllocRows, numAllocCols);
556- if (!newRegion.isValid ()) {
557- printf (" PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n " );
558- success = false ;
559- break ;
586+
587+ unsigned regionIdx = 0 ;
588+ uint64_t elemIdx = 0 ;
589+ for (const pimRegion& region : assocObj.getRegions ()) {
590+ if ((bitsPerElement > bitsPerElementAssoc) && (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (m_device->getSimTarget () == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget () == PIM_DEVICE_FULCRUM)) {
591+ PimCoreId coreId = region.getCoreId ();
592+ unsigned numAllocRows = region.getNumAllocRows () * bitsPerElement / bitsPerElementAssoc;
593+ unsigned numAllocCols = (regionIdx == numRegions - 1 ? numColsToAllocLast : numCols);
594+ pimRegion newRegion = findAvailRegionOnCore (coreId, numAllocRows, numAllocCols);
595+ if (!newRegion.isValid ()) {
596+ printf (" PIM-Error: pimAlloc: Failed: Out of PIM memory\n " );
597+ success = false ;
598+ break ;
599+ }
600+ newRegion.setElemIdxBegin (elemIdx);
601+ elemIdx += (regionIdx == numRegions - 1 ? numElemPerRegionLast : numElemPerRegion);
602+ if (elemIdx != region.getElemIdxEnd ()) {
603+ printf (" PIM-Error: pimAllocAssociated: Mismatch in element index range: %lu vs %lu\n " ,
604+ elemIdx, region.getElemIdxEnd ());
605+ success = false ;
606+ break ;
607+ }
608+ newRegion.setElemIdxEnd (region.getElemIdxEnd ()); // exclusive
609+ newRegion.setNumColsPerElem (numColsPerElem);
610+ newObj.addRegion (newRegion);
611+
612+ // add to core usage map
613+ auto alloc = std::make_pair (newRegion.getRowIdx (), numAllocRows);
614+ m_coreUsage.at (coreId)->addRange (alloc, newObj.getObjId ());
615+ } else {
616+ PimCoreId coreId = region.getCoreId ();
617+ unsigned numAllocRows = region.getNumAllocRows ();
618+ unsigned numAllocCols = region.getNumAllocCols ();
619+ if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
620+ numAllocRows = bitsPerElement;
621+ }
622+ pimRegion newRegion = findAvailRegionOnCore (coreId, numAllocRows, numAllocCols);
623+ if (!newRegion.isValid ()) {
624+ printf (" PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n " );
625+ success = false ;
626+ break ;
627+ }
628+ newRegion.setElemIdxBegin (region.getElemIdxBegin ());
629+ newRegion.setElemIdxEnd (region.getElemIdxEnd ()); // exclusive
630+ newRegion.setNumColsPerElem (region.getNumColsPerElem ());
631+ newObj.addRegion (newRegion);
632+
633+ // add to core usage map
634+ auto alloc = std::make_pair (newRegion.getRowIdx (), numAllocRows);
635+ m_coreUsage.at (coreId)->addRange (alloc, newObj.getObjId ());
560636 }
561- newRegion.setElemIdxBegin (region.getElemIdxBegin ());
562- newRegion.setElemIdxEnd (region.getElemIdxEnd ()); // exclusive
563- newRegion.setNumColsPerElem (region.getNumColsPerElem ());
564- newObj.addRegion (newRegion);
565-
566- // add to core usage map
567- auto alloc = std::make_pair (newRegion.getRowIdx (), numAllocRows);
568- m_coreUsage.at (coreId)->addRange (alloc, newObj.getObjId ());
637+ regionIdx++;
569638 }
570639 for (unsigned i = 0 ; i < numCores; ++i) {
571640 m_coreUsage.at (i)->newAllocEnd (success); // rollback if failed
0 commit comments