@@ -515,20 +515,21 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
515515 pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
516516 }
517517 } else if (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) {
518- if (bitsPerElement > bitsPerElementAssoc) {
518+ if (( bitsPerElement > bitsPerElementAssoc) && (m_device-> getSimTarget () != PIM_DEVICE_BANK_LEVEL && m_device-> getSimTarget () != PIM_DEVICE_FULCRUM) ) {
519519 printf (" PIM-Error: pimAllocAssociated: New object data type %s (%u bits) is wider than associated object (%u bits), which is not supported in H layout\n " ,
520- pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
520+ pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
521521 return -1 ;
522522 } else if (bitsPerElement < bitsPerElementAssoc) {
523523 if (m_debugAlloc) {
524524 printf (" PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is padded to associated object (%u bits) in H layout\n " ,
525- pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
525+ pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
526526 }
527527 bitsPerElement = bitsPerElementAssoc; // padding
528528 } else {
529+ // same bit width, no padding needed
529530 if (m_debugAlloc) {
530531 printf (" PIM-Debug: pimAllocAssociated: New object of data type %s (%u bits) is associated with object (%u bits) in H layout\n " ,
531- pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
532+ pimUtils::pimDataTypeEnumToStr (dataType).c_str (), bitsPerElement, bitsPerElementAssoc);
532533 }
533534 }
534535 } else {
@@ -541,31 +542,96 @@ pimResMgr::pimAllocAssociated(PimObjId assocId, PimDataType dataType)
541542 pimObjInfo newObj (m_availObjId, dataType, allocType, numElements, bitsPerElement, m_device);
542543 m_availObjId++;
543544
545+ unsigned numCols = m_device->getNumCols ();
546+ uint64_t numRegions = 0 ;
547+ unsigned numColsToAllocLast = 0 ;
548+ uint64_t numElemPerRegion = 0 ;
549+ uint64_t numElemPerRegionLast = 0 ;
550+ unsigned numColsPerElem = 0 ;
551+
552+ if ((allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (bitsPerElement > bitsPerElementAssoc) && (m_device->getSimTarget () == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget () == PIM_DEVICE_FULCRUM)) {
553+ // allocate one region per core, with horizontal layout
554+ numRegions = (numElements * bitsPerElement - 1 ) / numCols + 1 ;
555+
556+ // This is a controversial design decision. I am not fully sold on this
557+ // TODO: discuss with professor before implementing the `non-controversial` design
558+ if (numRegions > assocObj.getRegions ().size ()) {
559+ printf (" PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than associated object (%lu)\n " ,
560+ pimUtils::pimAllocEnumToStr (allocType).c_str (), numRegions, assocObj.getRegions ().size ());
561+ return -1 ;
562+ }
563+
564+ if (numRegions > numCores) {
565+ printf (" PIM-Error: pimAllocAssociated: Allocation type %s does not allow to allocate more regions (%lu) than number of cores (%u)\n " ,
566+ pimUtils::pimAllocEnumToStr (allocType).c_str (), numRegions, numCores);
567+ return -1 ;
568+ }
569+
570+ numColsToAllocLast = (numElements * bitsPerElement) % numCols;
571+ if (numColsToAllocLast == 0 ) {
572+ numColsToAllocLast = numCols;
573+ }
574+ numElemPerRegion = numCols / bitsPerElement;
575+ numElemPerRegionLast = numColsToAllocLast / bitsPerElement;
576+ numColsPerElem = bitsPerElement;
577+ }
578+
544579 bool success = true ;
545580 for (unsigned i = 0 ; i < numCores; ++i) {
546581 m_coreUsage.at (i)->newAllocStart ();
547582 }
548- for ( const pimRegion& region : assocObj.getRegions ()) {
549- PimCoreId coreId = region.getCoreId ();
550- unsigned numAllocRows = region.getNumAllocRows ();
551- unsigned numAllocCols = region.getNumAllocCols ();
552- if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
553- numAllocRows = bitsPerElement;
554- }
555- pimRegion newRegion = findAvailRegionOnCore (coreId, numAllocRows, numAllocCols);
556- if (!newRegion.isValid ()) {
557- printf (" PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n " );
558- success = false ;
559- break ;
583+
584+ unsigned regionIdx = 0 ;
585+ uint64_t elemIdx = 0 ;
586+ for (const pimRegion& region : assocObj.getRegions ()) {
587+ if ((bitsPerElement > bitsPerElementAssoc) && (allocType == PIM_ALLOC_H || allocType == PIM_ALLOC_H1) && (m_device->getSimTarget () == PIM_DEVICE_BANK_LEVEL || m_device->getSimTarget () == PIM_DEVICE_FULCRUM)) {
588+ PimCoreId coreId = region.getCoreId ();
589+ unsigned numAllocRows = region.getNumAllocRows () * bitsPerElement / bitsPerElementAssoc;
590+ unsigned numAllocCols = (regionIdx == numRegions - 1 ? numColsToAllocLast : numCols);
591+ pimRegion newRegion = findAvailRegionOnCore (coreId, numAllocRows, numAllocCols);
592+ if (!newRegion.isValid ()) {
593+ printf (" PIM-Error: pimAlloc: Failed: Out of PIM memory\n " );
594+ success = false ;
595+ break ;
596+ }
597+ newRegion.setElemIdxBegin (elemIdx);
598+ elemIdx += (regionIdx == numRegions - 1 ? numElemPerRegionLast : numElemPerRegion);
599+ if (elemIdx != region.getElemIdxEnd ()) {
600+ printf (" PIM-Error: pimAllocAssociated: Mismatch in element index range: %lu vs %lu\n " ,
601+ elemIdx, region.getElemIdxEnd ());
602+ success = false ;
603+ break ;
604+ }
605+ newRegion.setElemIdxEnd (region.getElemIdxEnd ()); // exclusive
606+ newRegion.setNumColsPerElem (numColsPerElem);
607+ newObj.addRegion (newRegion);
608+
609+ // add to core usage map
610+ auto alloc = std::make_pair (newRegion.getRowIdx (), numAllocRows);
611+ m_coreUsage.at (coreId)->addRange (alloc, newObj.getObjId ());
612+ } else {
613+ PimCoreId coreId = region.getCoreId ();
614+ unsigned numAllocRows = region.getNumAllocRows ();
615+ unsigned numAllocCols = region.getNumAllocCols ();
616+ if (allocType == PIM_ALLOC_V || allocType == PIM_ALLOC_V1) {
617+ numAllocRows = bitsPerElement;
618+ }
619+ pimRegion newRegion = findAvailRegionOnCore (coreId, numAllocRows, numAllocCols);
620+ if (!newRegion.isValid ()) {
621+ printf (" PIM-Error: pimAllocAssociated: Failed: Out of PIM memory\n " );
622+ success = false ;
623+ break ;
624+ }
625+ newRegion.setElemIdxBegin (region.getElemIdxBegin ());
626+ newRegion.setElemIdxEnd (region.getElemIdxEnd ()); // exclusive
627+ newRegion.setNumColsPerElem (region.getNumColsPerElem ());
628+ newObj.addRegion (newRegion);
629+
630+ // add to core usage map
631+ auto alloc = std::make_pair (newRegion.getRowIdx (), numAllocRows);
632+ m_coreUsage.at (coreId)->addRange (alloc, newObj.getObjId ());
560633 }
561- newRegion.setElemIdxBegin (region.getElemIdxBegin ());
562- newRegion.setElemIdxEnd (region.getElemIdxEnd ()); // exclusive
563- newRegion.setNumColsPerElem (region.getNumColsPerElem ());
564- newObj.addRegion (newRegion);
565-
566- // add to core usage map
567- auto alloc = std::make_pair (newRegion.getRowIdx (), numAllocRows);
568- m_coreUsage.at (coreId)->addRange (alloc, newObj.getObjId ());
634+ regionIdx++;
569635 }
570636 for (unsigned i = 0 ; i < numCores; ++i) {
571637 m_coreUsage.at (i)->newAllocEnd (success); // rollback if failed
0 commit comments