Skip to content

Commit a0be844

Browse files
authored
Support cuda 13 (#6316)
* Support cuda 13 * formatting * formatting --------- Co-authored-by: jmackay2 <jmackay2>
1 parent 11ed9cc commit a0be844

5 files changed

Lines changed: 33 additions & 20 deletions

File tree

cuda/common/include/pcl/cuda/cutil_inline_runtime.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ inline int cutGetMaxGflopsDeviceId()
110110
int max_perf_device = 0;
111111
int device_count = 0;
112112
int best_SM_arch = 0;
113+
int clock_rate = 0;
113114

114115
cudaGetDeviceCount( &device_count );
115116
// Find the best major SM Architecture GPU device
@@ -129,7 +130,8 @@ inline int cutGetMaxGflopsDeviceId()
129130
cudaGetDeviceProperties( &deviceProp, current_device );
130131
int sm_per_multiproc = (deviceProp.major == 9999 && deviceProp.minor == 9999) ? 1 : _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
131132

132-
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
133+
cudaDeviceGetAttribute(&clock_rate, cudaDevAttrClockRate, current_device);
134+
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * clock_rate;
133135
if( compute_perf > max_compute_perf ) {
134136
// If we find GPU with SM major > 2, search only these
135137
if ( best_SM_arch > 2 ) {
@@ -156,7 +158,8 @@ inline int cutGetMaxGflopsGraphicsDeviceId()
156158
int max_perf_device = 0;
157159
int device_count = 0;
158160
int best_SM_arch = 0;
159-
int bTCC = 0;
161+
int bTCC = 0;
162+
int clock_rate = 0;
160163

161164
cudaGetDeviceCount( &device_count );
162165
// Find the best major SM Architecture GPU device that is graphics capable
@@ -185,7 +188,8 @@ inline int cutGetMaxGflopsGraphicsDeviceId()
185188

186189
if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this
187190
{
188-
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
191+
cudaDeviceGetAttribute(&clock_rate, cudaDevAttrClockRate, current_device);
192+
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * clock_rate;
189193
if( compute_perf > max_compute_perf ) {
190194
// If we find GPU with SM major > 2, search only these
191195
if ( best_SM_arch > 2 ) {

cuda/sample_consensus/src/sac_model_1point_plane.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ namespace pcl
326326
//thrust::counting_iterator<int> first (0);
327327
// Input: Point Cloud, Indices
328328
// Output: Hypotheses
329-
transform (//first, first + max_iterations,
329+
thrust::transform (//first, first + max_iterations,
330330
//index_sequence_begin,
331331
//index_sequence_begin + max_iterations,
332332
randoms.begin (), randoms.begin () + max_iterations,
@@ -360,7 +360,7 @@ namespace pcl
360360
//thrust::counting_iterator<int> first (0);
361361
// Input: Point Cloud, Indices
362362
// Output: Hypotheses
363-
transform (//first, first + max_iterations,
363+
thrust::transform (//first, first + max_iterations,
364364
//index_sequence_begin,
365365
//index_sequence_begin + max_iterations,
366366
randoms.begin (), randoms.begin () + max_iterations,
@@ -555,7 +555,7 @@ namespace pcl
555555
coefficients.z = model_coefficients[2];
556556
coefficients.w = model_coefficients[3];
557557

558-
return (int) count_if (
558+
return (int) thrust::count_if (
559559
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())),
560560
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())) +
561561
indices_->size (),
@@ -608,7 +608,7 @@ namespace pcl
608608
{
609609
// pcl::ScopeTime t ("transform");
610610
// Send the data to the device
611-
transform (
611+
thrust::transform (
612612
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())),
613613
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())) +
614614
nr_points,
@@ -667,7 +667,7 @@ namespace pcl
667667
{
668668
// pcl::ScopeTime t ("transform");
669669
// Send the data to the device
670-
transform (
670+
thrust::transform (
671671
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())),
672672
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())) +
673673
nr_points,

cuda/sample_consensus/src/sac_model_plane.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ namespace pcl
238238
coefficients.z = model_coefficients[2];
239239
coefficients.w = model_coefficients[3];
240240

241-
return (int) count_if (
241+
return (int) thrust::count_if (
242242
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())),
243243
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())) +
244244
indices_->size (),
@@ -286,7 +286,7 @@ namespace pcl
286286
coefficients.w = model_coefficients[3];
287287

288288
// Send the data to the device
289-
transform (
289+
thrust::transform (
290290
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())),
291291
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())) +
292292
nr_points,
@@ -331,7 +331,7 @@ namespace pcl
331331
coefficients.w = ((float4)h[idx]).w;
332332

333333
// Send the data to the device
334-
transform (
334+
thrust::transform (
335335
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())),
336336
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())) +
337337
nr_points,
@@ -372,7 +372,7 @@ namespace pcl
372372
coefficients.z = ((float4)h[idx]).z;
373373
coefficients.w = ((float4)h[idx]).w;
374374

375-
transform (
375+
thrust::transform (
376376
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())),
377377
make_zip_iterator (make_tuple (input_->points.begin (), indices_->begin ())) +
378378
nr_points,

gpu/containers/src/initialization.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,10 @@ pcl::gpu::printCudaDeviceInfo(int device)
229229
prop.multiProcessorCount,
230230
sm_cores,
231231
sm_cores * prop.multiProcessorCount);
232+
int clockRate;
233+
cudaSafeCall(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, dev));
232234
printf(" GPU Clock Speed: %.2f GHz\n",
233-
prop.clockRate * 1e-6f);
235+
clockRate * 1e-6f);
234236

235237
// This is not available in the CUDA Runtime API, so we make the necessary calls the
236238
// driver API to support this for output
@@ -285,10 +287,13 @@ pcl::gpu::printCudaDeviceInfo(int device)
285287

286288
printf(
287289
" Concurrent copy and execution: %s with %d copy engine(s)\n",
288-
(prop.deviceOverlap ? "Yes" : "No"),
290+
(prop.asyncEngineCount ? "Yes" : "No"),
289291
prop.asyncEngineCount);
292+
int kernelExecTimeoutEnabled;
293+
cudaSafeCall(cudaDeviceGetAttribute(
294+
&kernelExecTimeoutEnabled, cudaDevAttrKernelExecTimeout, dev));
290295
printf(" Run time limit on kernels: %s\n",
291-
prop.kernelExecTimeoutEnabled ? "Yes" : "No");
296+
kernelExecTimeoutEnabled ? "Yes" : "No");
292297
printf(" Integrated GPU sharing Host Memory: %s\n",
293298
prop.integrated ? "Yes" : "No");
294299
printf(" Support host page-locked memory mapping: %s\n",
@@ -307,8 +312,10 @@ pcl::gpu::printCudaDeviceInfo(int device)
307312
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n",
308313
prop.pciBusID,
309314
prop.pciDeviceID);
315+
int propComputeMode;
316+
cudaSafeCall(cudaDeviceGetAttribute(&propComputeMode, cudaDevAttrComputeMode, dev));
310317
printf(" Compute Mode:\n");
311-
printf(" %s \n", computeMode[prop.computeMode]);
318+
printf(" %s \n", computeMode[propComputeMode]);
312319
}
313320

314321
printf("\n");

gpu/utils/include/pcl/gpu/utils/device/functional.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,12 @@
3939
#define PCL_DEVICE_FUNCTIONAL_HPP_
4040

4141
#include <thrust/functional.h>
42+
#include <cuda.h>
4243

4344
namespace pcl
4445
{
4546
namespace device
4647
{
47-
// Function Objects
48-
49-
using thrust::binary_function;
5048

5149
// Arithmetic Operations
5250

@@ -87,7 +85,11 @@ namespace pcl
8785

8886
// Generalized Identity Operations
8987

90-
using thrust::identity;
88+
#if CUDA_VERSION >= 13000
89+
using cuda::std::identity;
90+
#else
91+
using thrust::identity;
92+
#endif
9193
using thrust::project1st;
9294
using thrust::project2nd;
9395

0 commit comments

Comments
 (0)