Skip to content

Commit a91ea7b

Browse files
committed
wip
1 parent 8670fd5 commit a91ea7b

7 files changed

Lines changed: 394 additions & 325 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
namespace o2::its::gpu
2323
{
2424

25-
class Stream;
26-
2725
class DefaultGPUAllocator : public ExternalAllocator
2826
{
2927
void* allocate(size_t size) override;
@@ -80,10 +78,11 @@ class TimeFrameGPU : public TimeFrame
8078
void downloadCellsLUTDevice();
8179
void unregisterRest();
8280
template <Task task>
83-
Stream& getStream(const size_t stream)
81+
auto getStream(const size_t stream)
8482
{
85-
return *mGpuStreams[stream];
83+
return mGpuStreams[stream];
8684
}
85+
auto& getStreams() { return mGpuStreams; }
8786
void wipe(const int);
8887

8988
/// interface
@@ -141,7 +140,8 @@ class TimeFrameGPU : public TimeFrame
141140
int getNumberOfCells() const;
142141

143142
private:
144-
void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations
143+
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations
144+
void allocMemAsync(void**, size_t, Stream&, bool, int); // Abstract owned and unowned memory allocations and set
145145
bool mHostRegistered = false;
146146
TimeFrameGPUParameters mGpuParams;
147147

@@ -194,7 +194,7 @@ class TimeFrameGPU : public TimeFrame
194194
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;
195195

196196
// State
197-
std::vector<Stream*> mGpuStreams;
197+
Streams mGpuStreams;
198198
size_t mAvailMemGB;
199199
bool mFirstInit = true;
200200

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
7979
std::vector<float>& radii,
8080
std::vector<float>& mulScatAng,
8181
const int nBlocks,
82-
const int nThreads);
82+
const int nThreads,
83+
gpu::Streams& streams);
8384

8485
template <int nLayers = 7>
8586
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
@@ -112,7 +113,8 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
112113
std::vector<float>& radii,
113114
std::vector<float>& mulScatAng,
114115
const int nBlocks,
115-
const int nThreads);
116+
const int nThreads,
117+
gpu::Streams& streams);
116118

117119
void countCellsHandler(const Cluster** sortedClusters,
118120
const Cluster** unsortedClusters,

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
#ifndef ITSTRACKINGGPU_UTILS_H_
1717
#define ITSTRACKINGGPU_UTILS_H_
1818

19+
#include <vector>
20+
1921
#include "GPUCommonDef.h"
22+
#include "GPUCommonHelpers.h"
2023

2124
namespace o2
2225
{
@@ -31,11 +34,6 @@ struct gpuPair {
3134
namespace gpu
3235
{
3336

34-
template <typename T>
35-
void discardResult(const T&)
36-
{
37-
}
38-
3937
// Poor man implementation of a span-like struct. It is very limited.
4038
template <typename T>
4139
struct gpuSpan {
@@ -96,8 +94,71 @@ GPUhd() const T* getPtrFromRuler(int index, const T* src, const int* ruler, cons
9694
{
9795
return src + ruler[index] * stride;
9896
}
97+
98+
class Stream
99+
{
100+
public:
101+
#if defined(__HIPCC__)
102+
using Handle = hipStream_t;
103+
static constexpr Handle Default = 0;
104+
#elif defined(__CUDACC__)
105+
using Handle = cudaStream_t;
106+
static constexpr Handle Default = 0;
107+
#else
108+
using Handle = void*;
109+
static constexpr Handle Default = nullptr;
110+
#endif
111+
112+
Stream(unsigned int flags = 0)
113+
{
114+
#if defined(__HIPCC__)
115+
GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
116+
#elif defined(__CUDACC__)
117+
GPUChkErrS(cudaStreamCreateWithFlags(&mHandle, flags));
118+
#endif
119+
}
120+
121+
Stream(Handle h) : mHandle(h) {}
122+
~Stream()
123+
{
124+
if (mHandle != Default) {
125+
#if defined(__HIPCC__)
126+
GPUChkErrS(hipStreamDestroy(mHandle));
127+
#elif defined(__CUDACC__)
128+
GPUChkErrS(cudaStreamDestroy(mHandle));
129+
#endif
130+
}
131+
}
132+
133+
operator bool() const { return mHandle != Default; }
134+
const Handle& get() { return mHandle; }
135+
void sync() const
136+
{
137+
#if defined(__HIPCC__)
138+
GPUChkErrS(hipStreamSynchronize(mHandle));
139+
#elif defined(__CUDACC__)
140+
GPUChkErrS(cudaStreamSynchronize(mHandle));
141+
#endif
142+
}
143+
144+
private:
145+
Handle mHandle{Default};
146+
};
147+
static_assert(sizeof(Stream) == sizeof(void*), "Stream type must match pointer type!");
148+
149+
class Streams
150+
{
151+
public:
152+
void resize(size_t n) { mStreams.resize(n); }
153+
void clear() { mStreams.clear(); }
154+
auto& operator[](size_t i) { return mStreams[i % mStreams.size()]; }
155+
156+
private:
157+
std::vector<Stream> mStreams;
158+
};
159+
99160
} // namespace gpu
100161
} // namespace its
101162
} // namespace o2
102163

103-
#endif
164+
#endif
Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,45 @@
1-
# Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2-
# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
1+
# Copyright 2019-2020 CERN and copyright holders of ALICE O2. See
2+
# https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
33
# All rights not expressly granted are reserved.
44
#
5-
# This software is distributed under the terms of the GNU General Public
6-
# License v3 (GPL Version 3), copied verbatim in the file "COPYING".
5+
# This software is distributed under the terms of the GNU General Public License
6+
# v3 (GPL Version 3), copied verbatim in the file "COPYING".
77
#
88
# In applying this license CERN does not waive the privileges and immunities
9-
# granted to it by virtue of its status as an Intergovernmental Organization
10-
# or submit itself to any jurisdiction.
9+
# granted to it by virtue of its status as an Intergovernmental Organization or
10+
# submit itself to any jurisdiction.
1111

1212
# CUDA
1313
if(CUDA_ENABLED)
14-
find_package(CUDAToolkit)
15-
message(STATUS "Building ITS CUDA tracker")
16-
# add_compile_options(-O0 -g -lineinfo -fPIC)
17-
# add_compile_definitions(ITS_MEASURE_GPU_TIME)
18-
o2_add_library(ITStrackingCUDA
19-
SOURCES ClusterLinesGPU.cu
20-
TrackerTraitsGPU.cxx
21-
TimeFrameGPU.cu
22-
TracerGPU.cu
23-
TrackingKernels.cu
24-
VertexingKernels.cu
25-
VertexerTraitsGPU.cxx
26-
PUBLIC_INCLUDE_DIRECTORIES ../
27-
PUBLIC_LINK_LIBRARIES O2::ITStracking
28-
O2::SimConfig
29-
O2::SimulationDataFormat
30-
O2::ReconstructionDataFormats
31-
O2::GPUCommon
32-
PRIVATE_LINK_LIBRARIES O2::GPUTrackingCUDAExternalProvider
33-
TARGETVARNAME targetName)
14+
find_package(CUDAToolkit)
15+
message(STATUS "Building ITS CUDA tracker")
16+
add_compile_options(-lineinfo)
17+
# add_compile_options(-O0 -g -lineinfo -fPIC)
18+
# add_compile_definitions(ITS_MEASURE_GPU_TIME)
19+
o2_add_library(
20+
ITStrackingCUDA
21+
SOURCES ClusterLinesGPU.cu
22+
TrackerTraitsGPU.cxx
23+
TimeFrameGPU.cu
24+
TracerGPU.cu
25+
TrackingKernels.cu
26+
VertexingKernels.cu
27+
VertexerTraitsGPU.cxx
28+
PUBLIC_INCLUDE_DIRECTORIES ../
29+
PUBLIC_LINK_LIBRARIES
30+
O2::ITStracking
31+
O2::SimConfig
32+
O2::SimulationDataFormat
33+
O2::ReconstructionDataFormats
34+
O2::GPUCommon
35+
PRIVATE_LINK_LIBRARIES
36+
O2::GPUTrackingCUDAExternalProvider
37+
TARGETVARNAME targetName)
3438

35-
set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
36-
target_compile_definitions(${targetName} PRIVATE $<TARGET_PROPERTY:O2::ITStracking,COMPILE_DEFINITIONS>)
37-
set_target_cuda_arch(${targetName})
39+
set_property(TARGET ${targetName} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
40+
target_compile_definitions(
41+
${targetName}
42+
PRIVATE $<TARGET_PROPERTY:O2::ITStracking,COMPILE_DEFINITIONS>)
43+
set_target_cuda_arch(${targetName})
3844

3945
endif()

0 commit comments

Comments
 (0)