1212
1313#include < cuda_runtime.h>
1414
15- #include < unistd.h>
1615#include < vector>
1716
1817#include " ITStrackingGPU/TimeFrameGPU.h"
@@ -63,6 +62,24 @@ void TimeFrameGPU<NLayers>::loadIndexTableUtils()
6362 GPUChkErrS (cudaMemcpy (mIndexTableUtilsDevice , &(this ->mIndexTableUtils ), sizeof (IndexTableUtilsN), cudaMemcpyHostToDevice));
6463}
6564
65+ template <int NLayers>
66+ void TimeFrameGPU<NLayers>::loadTrackingParametersDevice(const TrackingParameters& trkParam)
67+ {
68+ GPUTimer timer (" loading tracking parameters" );
69+ if (mLayerRadiiDevice == nullptr ) {
70+ allocMem (reinterpret_cast <void **>(&mLayerRadiiDevice ), trkParam.LayerRadii .size () * sizeof (float ), this ->hasFrameworkAllocator ());
71+ }
72+ if (mMinPtsDevice == nullptr ) {
73+ allocMem (reinterpret_cast <void **>(&mMinPtsDevice ), trkParam.MinPt .size () * sizeof (float ), this ->hasFrameworkAllocator ());
74+ }
75+ if (mLayerxX0Device == nullptr ) {
76+ allocMem (reinterpret_cast <void **>(&mLayerxX0Device ), trkParam.LayerxX0 .size () * sizeof (float ), this ->hasFrameworkAllocator ());
77+ }
78+ GPUChkErrS (cudaMemcpy (mLayerRadiiDevice , trkParam.LayerRadii .data (), trkParam.LayerRadii .size () * sizeof (float ), cudaMemcpyHostToDevice));
79+ GPUChkErrS (cudaMemcpy (mMinPtsDevice , trkParam.MinPt .data (), trkParam.MinPt .size () * sizeof (float ), cudaMemcpyHostToDevice));
80+ GPUChkErrS (cudaMemcpy (mLayerxX0Device , trkParam.LayerxX0 .data (), trkParam.LayerxX0 .size () * sizeof (float ), cudaMemcpyHostToDevice));
81+ }
82+
6683template <int NLayers>
6784void TimeFrameGPU<NLayers>::createUnsortedClustersDeviceArray(const int maxLayers)
6885{
@@ -420,29 +437,6 @@ void TimeFrameGPU<NLayers>::createTrackletsBuffers(const int layer)
420437 GPUChkErrS (cudaMemcpyAsync (&mTrackletsDeviceArray [layer], &mTrackletsDevice [layer], sizeof (Tracklet*), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
421438}
422439
423- template <int NLayers>
424- void TimeFrameGPU<NLayers>::loadTrackletsDevice()
425- {
426- GPUTimer timer (mGpuStreams , " loading tracklets" , NLayers - 1 );
427- for (auto iLayer{0 }; iLayer < NLayers - 1 ; ++iLayer) {
428- GPULog (" gpu-transfer: loading {} tracklets on layer {}, for {:.2f} MB." , this ->mTracklets [iLayer].size (), iLayer, this ->mTracklets [iLayer].size () * sizeof (Tracklet) / constants::MB );
429- GPUChkErrS (cudaHostRegister (this ->mTracklets [iLayer].data (), this ->mTracklets [iLayer].size () * sizeof (Tracklet), cudaHostRegisterPortable));
430- GPUChkErrS (cudaMemcpyAsync (mTrackletsDevice [iLayer], this ->mTracklets [iLayer].data (), this ->mTracklets [iLayer].size () * sizeof (Tracklet), cudaMemcpyHostToDevice, mGpuStreams [iLayer].get ()));
431- }
432- }
433-
434- template <int NLayers>
435- void TimeFrameGPU<NLayers>::loadTrackletsLUTDevice()
436- {
437- GPUTimer timer (" loading tracklets" );
438- for (auto iLayer{0 }; iLayer < NLayers - 2 ; ++iLayer) {
439- GPULog (" gpu-transfer: loading tracklets LUT for {} elements on layer {}, for {:.2f} MB" , this ->mTrackletsLookupTable [iLayer].size (), iLayer + 1 , this ->mTrackletsLookupTable [iLayer].size () * sizeof (int ) / constants::MB );
440- GPUChkErrS (cudaMemcpyAsync (mTrackletsLUTDevice [iLayer + 1 ], this ->mTrackletsLookupTable [iLayer].data (), this ->mTrackletsLookupTable [iLayer].size () * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [iLayer].get ()));
441- }
442- mGpuStreams .sync ();
443- GPUChkErrS (cudaMemcpy (mTrackletsLUTDeviceArray , mTrackletsLUTDevice .data (), (NLayers - 1 ) * sizeof (int *), cudaMemcpyHostToDevice));
444- }
445-
446440template <int NLayers>
447441void TimeFrameGPU<NLayers>::createNeighboursIndexTablesDevice(const int layer)
448442{
@@ -462,19 +456,6 @@ void TimeFrameGPU<NLayers>::createNeighboursLUTDevice(const int layer, const uns
462456 GPUChkErrS (cudaMemcpyAsync (&mNeighboursCellLUTDeviceArray [layer], &mNeighboursLUTDevice [layer], sizeof (int *), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
463457}
464458
465- template <int NLayers>
466- void TimeFrameGPU<NLayers>::loadCellsDevice()
467- {
468- GPUTimer timer (mGpuStreams , " loading cell seeds" , NLayers - 2 );
469- for (auto iLayer{0 }; iLayer < NLayers - 2 ; ++iLayer) {
470- GPULog (" gpu-transfer: loading {} cell seeds on layer {}, for {:.2f} MB." , this ->mCells [iLayer].size (), iLayer, this ->mCells [iLayer].size () * sizeof (CellSeed) / constants::MB );
471- allocMemAsync (reinterpret_cast <void **>(&mCellsDevice [iLayer]), this ->mCells [iLayer].size () * sizeof (CellSeed), mGpuStreams [iLayer], this ->hasFrameworkAllocator ());
472- allocMemAsync (reinterpret_cast <void **>(&mNeighboursIndexTablesDevice [iLayer]), (this ->mCells [iLayer].size () + 1 ) * sizeof (int ), mGpuStreams [iLayer], this ->hasFrameworkAllocator ()); // accessory for the neigh. finding.
473- GPUChkErrS (cudaMemsetAsync (mNeighboursIndexTablesDevice [iLayer], 0 , (this ->mCells [iLayer].size () + 1 ) * sizeof (int ), mGpuStreams [iLayer].get ()));
474- GPUChkErrS (cudaMemcpyAsync (mCellsDevice [iLayer], this ->mCells [iLayer].data (), this ->mCells [iLayer].size () * sizeof (CellSeed), cudaMemcpyHostToDevice, mGpuStreams [iLayer].get ()));
475- }
476- }
477-
478459template <int NLayers>
479460void TimeFrameGPU<NLayers>::createCellsLUTDeviceArray()
480461{
@@ -523,17 +504,6 @@ void TimeFrameGPU<NLayers>::createCellsBuffers(const int layer)
523504 GPUChkErrS (cudaMemcpyAsync (&mCellsDeviceArray [layer], &mCellsDevice [layer], sizeof (CellSeed*), cudaMemcpyHostToDevice, mGpuStreams [layer].get ()));
524505}
525506
526- template <int NLayers>
527- void TimeFrameGPU<NLayers>::loadCellsLUTDevice()
528- {
529- GPUTimer timer (mGpuStreams , " loading cells LUTs" , NLayers - 3 );
530- for (auto iLayer{0 }; iLayer < NLayers - 3 ; ++iLayer) {
531- GPULog (" gpu-transfer: loading cell LUT for {} elements on layer {}, for {:.2f} MB." , this ->mCellsLookupTable [iLayer].size (), iLayer, this ->mCellsLookupTable [iLayer].size () * sizeof (int ) / constants::MB );
532- GPUChkErrS (cudaHostRegister (this ->mCellsLookupTable [iLayer].data (), this ->mCellsLookupTable [iLayer].size () * sizeof (int ), cudaHostRegisterPortable));
533- GPUChkErrS (cudaMemcpyAsync (mCellsLUTDevice [iLayer + 1 ], this ->mCellsLookupTable [iLayer].data (), this ->mCellsLookupTable [iLayer].size () * sizeof (int ), cudaMemcpyHostToDevice, mGpuStreams [iLayer].get ()));
534- }
535- }
536-
537507template <int NLayers>
538508void TimeFrameGPU<NLayers>::loadTrackSeedsDevice(bounded_vector<TrackSeedN>& seeds)
539509{
@@ -581,44 +551,6 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
581551 GPUChkErrS (cudaMemset (mTrackITSExtDevice , 0 , mNTracks * sizeof (o2::its::TrackITSExt)));
582552}
583553
584- template <int NLayers>
585- void TimeFrameGPU<NLayers>::downloadCellsDevice()
586- {
587- GPUTimer timer (mGpuStreams , " downloading cells" , NLayers - 2 );
588- for (int iLayer{0 }; iLayer < NLayers - 2 ; ++iLayer) {
589- GPULog (" gpu-transfer: downloading {} cells on layer: {}, for {:.2f} MB." , mNCells [iLayer], iLayer, mNCells [iLayer] * sizeof (CellSeed) / constants::MB );
590- this ->mCells [iLayer].resize (mNCells [iLayer]);
591- GPUChkErrS (cudaMemcpyAsync (this ->mCells [iLayer].data (), this ->mCellsDevice [iLayer], mNCells [iLayer] * sizeof (CellSeed), cudaMemcpyDeviceToHost, mGpuStreams [iLayer].get ()));
592- }
593- }
594-
595- template <int NLayers>
596- void TimeFrameGPU<NLayers>::downloadCellsLUTDevice()
597- {
598- GPUTimer timer (mGpuStreams , " downloading cell luts" , NLayers - 3 );
599- for (auto iLayer{0 }; iLayer < NLayers - 3 ; ++iLayer) {
600- GPULog (" gpu-transfer: downloading cells lut on layer {} for {} elements" , iLayer, (mNTracklets [iLayer + 1 ] + 1 ));
601- this ->mCellsLookupTable [iLayer].resize (mNTracklets [iLayer + 1 ] + 1 );
602- GPUChkErrS (cudaMemcpyAsync (this ->mCellsLookupTable [iLayer].data (), mCellsLUTDevice [iLayer + 1 ], (mNTracklets [iLayer + 1 ] + 1 ) * sizeof (int ), cudaMemcpyDeviceToHost, mGpuStreams [iLayer].get ()));
603- }
604- }
605-
606- template <int NLayers>
607- void TimeFrameGPU<NLayers>::downloadCellsNeighboursDevice(std::vector<bounded_vector<CellNeighbour>>& neighbours, const int layer)
608- {
609- GPUTimer timer (mGpuStreams [layer], " downloading neighbours from layer" , layer);
610- GPULog (" gpu-transfer: downloading {} neighbours, for {:.2f} MB." , neighbours[layer].size (), neighbours[layer].size () * sizeof (CellNeighbour) / constants::MB );
611- GPUChkErrS (cudaMemcpyAsync (neighbours[layer].data (), mNeighboursDevice [layer], neighbours[layer].size () * sizeof (CellNeighbour), cudaMemcpyDeviceToHost, mGpuStreams [layer].get ()));
612- }
613-
614- template <int NLayers>
615- void TimeFrameGPU<NLayers>::downloadNeighboursLUTDevice(bounded_vector<int >& lut, const int layer)
616- {
617- GPUTimer timer (mGpuStreams [layer], " downloading neighbours LUT from layer" , layer);
618- GPULog (" gpu-transfer: downloading neighbours LUT for {} elements on layer {}, for {:.2f} MB." , lut.size (), layer, lut.size () * sizeof (int ) / constants::MB );
619- GPUChkErrS (cudaMemcpyAsync (lut.data (), mNeighboursLUTDevice [layer], lut.size () * sizeof (int ), cudaMemcpyDeviceToHost, mGpuStreams [layer].get ()));
620- }
621-
622554template <int NLayers>
623555void TimeFrameGPU<NLayers>::downloadTrackITSExtDevice()
624556{
0 commit comments