Skip to content

Commit ee24b26

Browse files
author
Kent Knox
committed
Merge pull request #165 from jlgreathouse/develop
Fix for #161 + Print GFLOPs :+1:
2 parents abbfece + 3c95930 commit ee24b26

7 files changed

Lines changed: 15 additions & 12 deletions

File tree

src/benchmarks/clsparse-bench/functions/clfunc_xSpMdV.hpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,12 @@ class xSpMdV: public clsparseFunc
7171

7272
double gflops( )
7373
{
74-
return 0.0;
74+
return ((2 * csrMtx.num_nonzeros) / time_in_ns ( ));
7575
}
7676

7777
std::string gflops_formula( )
7878
{
79-
return "N/A";
79+
return "GFLOPs";
8080
}
8181

8282
double bandwidth( )
@@ -207,12 +207,15 @@ class xSpMdV: public clsparseFunc
207207
{
208208
std::cout << "clSPARSE matrix: " << sparseFile << std::endl;
209209
size_t sparseBytes = sizeof( cl_int )*( csrMtx.num_nonzeros + csrMtx.num_rows ) + sizeof( T ) * ( csrMtx.num_nonzeros + csrMtx.num_cols + csrMtx.num_rows );
210+
size_t sparseFlops = 2 * csrMtx.num_nonzeros;
210211
cpuTimer->pruneOutliers( 3.0 );
211212
cpuTimer->Print( sparseBytes, "GiB/s" );
213+
cpuTimer->Print( sparseFlops, "GFLOPs" );
212214
cpuTimer->Reset( );
213215

214216
gpuTimer->pruneOutliers( 3.0 );
215217
gpuTimer->Print( sparseBytes, "GiB/s" );
218+
gpuTimer->Print( sparseFlops, "GFLOPs" );
216219
gpuTimer->Reset( );
217220
}
218221

src/benchmarks/cusparse-bench/functions/cufunc_xSpMdV.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ class xSpMdV : public cusparseFunc
5151

5252
double gflops( )
5353
{
54-
return 0.0;
54+
return ((2 * n_vals) / time_in_ns ( ));
5555
}
5656

5757
std::string gflops_formula( )
5858
{
59-
return "N/A";
59+
return "GFLOPs";
6060
}
6161

6262
double bandwidth( )

src/benchmarks/cusparse-bench/src/main.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,10 @@ int main(int argc, char *argv[])
278278
timer.pruneOutliers( 3.0 );
279279
std::cout << "cuSPARSE matrix: " << path << std::endl;
280280
std::cout << "cuSPARSE kernel execution time < ns >: " << my_function->time_in_ns( ) << std::endl;
281-
std::cout << "cuSPARSE kernel execution Gflops < " <<
281+
std::cout << "cuSPARSE kernel execution < " <<
282282
my_function->bandwidth_formula( ) << " >: " << my_function->bandwidth( ) << std::endl << std::endl;
283+
std::cout << "cuSPARSE kernel execution < " <<
284+
my_function->gflops_formula( ) << " >: " << my_function->gflops( ) << std::endl << std::endl;
283285
}
284286

285287
}

src/library/io/mm-reader.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,6 @@ clsparseSCsrMatrixfromFile(clsparseCsrMatrix* csrMatx, const char* filePath, cls
568568
MatrixMarketReader< cl_float > mm_reader;
569569
if( mm_reader.MMReadFormat( filePath ) )
570570
return clsparseInvalidFile;
571-
#if 0
572571
// BUG: We need to check to see if openCL buffers currently exist and deallocate them first!
573572
// FIX: Below code will check whether the buffers were allocated in the first place;
574573
{
@@ -590,7 +589,6 @@ clsparseSCsrMatrixfromFile(clsparseCsrMatrix* csrMatx, const char* filePath, cls
590589
if (validationStatus != clsparseSuccess)
591590
return validationStatus;
592591
}
593-
#endif
594592

595593
// JPA: Shouldn't that just be an assertion check? It seems to me that
596594
// the user have to call clsparseHeaderfromFile before calling this function,

src/library/kernels/atomic_reduce.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ R"(
3535
#endif
3636
#endif
3737

38-
#if defined(ATOMIC_FLOAT) || defined (ATOMIC_INT)
38+
#if __OPENCL_VERSION__ <= CL_VERSION_1_0 && (defined(ATOMIC_FLOAT) || defined (ATOMIC_INT))
3939
#if defined(cl_khr_global_int32_base_atomics) && defined(cl_khr_global_int32_extended_atomics)
4040
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
4141
#pragma OPENCL_EXTENSION cl_khr_global_int32_extended_atomics : enable

src/library/kernels/csrmm_adaptive.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ R"(
4242
#if __OPENCL_VERSION__ > CL_VERSION_1_0
4343
#define ATOM32
4444
#elif defined(cl_khr_global_int32_base_atomics) && defined(cl_khr_global_int32_extended_atomics)
45-
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : require
46-
#pragma OPENCL_EXTENSION cl_khr_global_int32_extended_atomics : require
45+
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
46+
#pragma OPENCL_EXTENSION cl_khr_global_int32_extended_atomics : enable
4747
#define ATOM32
4848
#else
4949
#error "Required integer atomics not supported by this OpenCL implemenation."

src/library/kernels/csrmv_adaptive.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ R"(
3535
#if __OPENCL_VERSION__ > CL_VERSION_1_0
3636
#define ATOM32
3737
#elif defined(cl_khr_global_int32_base_atomics) && defined(cl_khr_global_int32_extended_atomics)
38-
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : require
39-
#pragma OPENCL_EXTENSION cl_khr_global_int32_extended_atomics : require
38+
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
39+
#pragma OPENCL_EXTENSION cl_khr_global_int32_extended_atomics : enable
4040
#define ATOM32
4141
#else
4242
#error "Required integer atomics not supported by this OpenCL implemenation."

0 commit comments

Comments
 (0)