Skip to content

Commit 097601c

Browse files
committed
Add pow1p NVCC testing
1 parent 699b093 commit 097601c

3 files changed

Lines changed: 224 additions & 0 deletions

File tree

test/cuda_jamfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,5 @@ run test_trigamma_double.cu ;
118118
run test_trigamma_float.cu ;
119119
run test_trunc_double.cu ;
120120
run test_trunc_float.cu ;
121+
run test_pow1p_double.cu ;
122+
run test_pow1p_float.cu ;

test/test_pow1p_double.cu

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
2+
// Copyright John Maddock 2016.
3+
// Copyright Matt Borland 2024.
4+
// Use, modification and distribution are subject to the
5+
// Boost Software License, Version 1.0. (See accompanying file
6+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7+
8+
#include <iostream>
9+
#include <iomanip>
10+
#include <vector>
11+
#include <random>
12+
#include <boost/math/special_functions.hpp>
13+
#include "cuda_managed_ptr.hpp"
14+
#include "stopwatch.hpp"
15+
16+
// For the CUDA runtime routines (prefixed with "cuda_")
17+
#include <cuda_runtime.h>
18+
19+
typedef double float_type;
20+
21+
/**
22+
* CUDA Kernel Device code
23+
*
24+
*/
25+
__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
26+
{
27+
using std::cos;
28+
int i = blockDim.x * blockIdx.x + threadIdx.x;
29+
30+
if (i < numElements)
31+
{
32+
out[i] = boost::math::pow1p(in1[i], in2[i]);
33+
}
34+
}
35+
36+
/**
37+
* Host main routine
38+
*/
39+
int main(void)
40+
{
41+
// Error code to check return values for CUDA calls
42+
cudaError_t err = cudaSuccess;
43+
44+
// Print the vector length to be used, and compute its size
45+
int numElements = 50000;
46+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
47+
48+
// Allocate the managed input vector A
49+
cuda_managed_ptr<float_type> input_vector1(numElements);
50+
51+
// Allocate the managed input vector B
52+
cuda_managed_ptr<float_type> input_vector2(numElements);
53+
54+
// Allocate the managed output vector C
55+
cuda_managed_ptr<float_type> output_vector(numElements);
56+
57+
// Initialize the input vectors
58+
std::mt19937_64 rng;
59+
std::uniform_real_distribution<float_type> x_vals(-1, 1);
60+
std::uniform_real_distribution<float_type> y_vals(-10, 10);
61+
for (int i = 0; i < numElements; ++i)
62+
{
63+
input_vector1[i] = x_vals(rng);
64+
input_vector2[i] = y_vals(rng);
65+
}
66+
67+
// Launch the Vector Add CUDA Kernel
68+
int threadsPerBlock = 1024;
69+
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
70+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
71+
72+
watch w;
73+
74+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
75+
cudaDeviceSynchronize();
76+
77+
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
78+
79+
err = cudaGetLastError();
80+
81+
if (err != cudaSuccess)
82+
{
83+
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
84+
return EXIT_FAILURE;
85+
}
86+
87+
// Verify that the result vector is correct
88+
std::vector<float_type> results;
89+
results.reserve(numElements);
90+
w.reset();
91+
for(int i = 0; i < numElements; ++i)
92+
{
93+
results.push_back(boost::math::pow1p(input_vector1[i], input_vector2[i]));
94+
}
95+
96+
double t = w.elapsed();
97+
// check the results
98+
for(int i = 0; i < numElements; ++i)
99+
{
100+
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
101+
{
102+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
103+
return EXIT_FAILURE;
104+
}
105+
}
106+
107+
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
108+
std::cout << "Done\n";
109+
110+
return 0;
111+
}

test/test_pow1p_float.cu

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
2+
// Copyright John Maddock 2016.
3+
// Copyright Matt Borland 2024.
4+
// Use, modification and distribution are subject to the
5+
// Boost Software License, Version 1.0. (See accompanying file
6+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7+
8+
#include <iostream>
9+
#include <iomanip>
10+
#include <vector>
11+
#include <random>
12+
#include <boost/math/special_functions.hpp>
13+
#include "cuda_managed_ptr.hpp"
14+
#include "stopwatch.hpp"
15+
16+
// For the CUDA runtime routines (prefixed with "cuda_")
17+
#include <cuda_runtime.h>
18+
19+
typedef float float_type;
20+
21+
/**
22+
* CUDA Kernel Device code
23+
*
24+
*/
25+
__global__ void cuda_test(const float_type *in1, const float_type *in2, float_type *out, int numElements)
26+
{
27+
using std::cos;
28+
int i = blockDim.x * blockIdx.x + threadIdx.x;
29+
30+
if (i < numElements)
31+
{
32+
out[i] = boost::math::pow1p(in1[i], in2[i]);
33+
}
34+
}
35+
36+
/**
37+
* Host main routine
38+
*/
39+
int main(void)
40+
{
41+
// Error code to check return values for CUDA calls
42+
cudaError_t err = cudaSuccess;
43+
44+
// Print the vector length to be used, and compute its size
45+
int numElements = 50000;
46+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
47+
48+
// Allocate the managed input vector A
49+
cuda_managed_ptr<float_type> input_vector1(numElements);
50+
51+
// Allocate the managed input vector B
52+
cuda_managed_ptr<float_type> input_vector2(numElements);
53+
54+
// Allocate the managed output vector C
55+
cuda_managed_ptr<float_type> output_vector(numElements);
56+
57+
// Initialize the input vectors
58+
std::mt19937_64 rng;
59+
std::uniform_real_distribution<float_type> x_vals(-1, 1);
60+
std::uniform_real_distribution<float_type> y_vals(-10, 10);
61+
for (int i = 0; i < numElements; ++i)
62+
{
63+
input_vector1[i] = x_vals(rng);
64+
input_vector2[i] = y_vals(rng);
65+
}
66+
67+
// Launch the Vector Add CUDA Kernel
68+
int threadsPerBlock = 1024;
69+
int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
70+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
71+
72+
watch w;
73+
74+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1.get(), input_vector2.get(), output_vector.get(), numElements);
75+
cudaDeviceSynchronize();
76+
77+
std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
78+
79+
err = cudaGetLastError();
80+
81+
if (err != cudaSuccess)
82+
{
83+
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
84+
return EXIT_FAILURE;
85+
}
86+
87+
// Verify that the result vector is correct
88+
std::vector<float_type> results;
89+
results.reserve(numElements);
90+
w.reset();
91+
for(int i = 0; i < numElements; ++i)
92+
{
93+
results.push_back(boost::math::pow1p(input_vector1[i], input_vector2[i]));
94+
}
95+
96+
double t = w.elapsed();
97+
// check the results
98+
for(int i = 0; i < numElements; ++i)
99+
{
100+
if (boost::math::epsilon_difference(output_vector[i], results[i]) > 10)
101+
{
102+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
103+
return EXIT_FAILURE;
104+
}
105+
}
106+
107+
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
108+
std::cout << "Done\n";
109+
110+
return 0;
111+
}

0 commit comments

Comments
 (0)