@@ -35,7 +35,11 @@ extern "C" __launch_bounds__(BASIS_INTERP_BLOCK_SIZE) __global__
3535 __syncthreads ();
3636
3737 // Apply basis element by element
38+ #if CEED_HIP_USE_CHIPSTAR
3839 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
40+ #else
41+ const CeedInt elem_loop_bound = num_elem ;
42+ #endif
3943
4044 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
4145 const CeedInt elem = e % num_elem ;
@@ -78,7 +82,11 @@ extern "C" __launch_bounds__(BASIS_INTERP_BLOCK_SIZE) __global__
7882 CeedScalar r_U [BASIS_NUM_COMP * (BASIS_DIM > 2 ? BASIS_P_1D : 1 )];
7983
8084 // Apply basis element by element
85+ #if CEED_HIP_USE_CHIPSTAR
8186 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
87+ #else
88+ const CeedInt elem_loop_bound = num_elem ;
89+ #endif
8290
8391 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
8492 const CeedInt elem = e % num_elem ;
@@ -124,7 +132,11 @@ extern "C" __launch_bounds__(BASIS_INTERP_BLOCK_SIZE) __global__
124132 __syncthreads ();
125133
126134 // Apply basis element by element
135+ #if CEED_HIP_USE_CHIPSTAR
127136 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
137+ #else
138+ const CeedInt elem_loop_bound = num_elem ;
139+ #endif
128140
129141 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
130142 const CeedInt elem = e % num_elem ;
@@ -167,7 +179,11 @@ extern "C" __launch_bounds__(BASIS_INTERP_BLOCK_SIZE) __global__
167179 CeedScalar r_U [BASIS_NUM_COMP * (BASIS_DIM > 2 ? BASIS_Q_1D : 1 )];
168180
169181 // Apply basis element by element
182+ #if CEED_HIP_USE_CHIPSTAR
170183 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
184+ #else
185+ const CeedInt elem_loop_bound = num_elem ;
186+ #endif
171187
172188 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
173189 const CeedInt elem = e % num_elem ;
@@ -213,7 +229,11 @@ extern "C" __launch_bounds__(BASIS_INTERP_BLOCK_SIZE) __global__
213229 __syncthreads ();
214230
215231 // Apply basis element by element
232+ #if CEED_HIP_USE_CHIPSTAR
216233 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
234+ #else
235+ const CeedInt elem_loop_bound = num_elem ;
236+ #endif
217237
218238 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
219239 const CeedInt elem = e % num_elem ;
@@ -257,7 +277,11 @@ extern "C" __launch_bounds__(BASIS_INTERP_BLOCK_SIZE) __global__
257277 CeedScalar r_U [BASIS_NUM_COMP * (BASIS_DIM > 2 ? BASIS_Q_1D : 1 )];
258278
259279 // Apply basis element by element
280+ #if CEED_HIP_USE_CHIPSTAR
260281 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
282+ #else
283+ const CeedInt elem_loop_bound = num_elem ;
284+ #endif
261285
262286 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
263287 const CeedInt elem = e % num_elem ;
@@ -308,7 +332,11 @@ extern "C" __launch_bounds__(BASIS_GRAD_BLOCK_SIZE) __global__ void Grad(const C
308332 __syncthreads ();
309333
310334 // Apply basis element by element
335+ #if CEED_HIP_USE_CHIPSTAR
311336 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
337+ #else
338+ const CeedInt elem_loop_bound = num_elem ;
339+ #endif
312340
313341 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
314342 const CeedInt elem = e % num_elem ;
@@ -360,7 +388,11 @@ extern "C" __launch_bounds__(BASIS_GRAD_BLOCK_SIZE) __global__
360388 __syncthreads ();
361389
362390 // Apply basis element by element
391+ #if CEED_HIP_USE_CHIPSTAR
363392 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
393+ #else
394+ const CeedInt elem_loop_bound = num_elem ;
395+ #endif
364396
365397 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
366398 const CeedInt elem = e % num_elem ;
@@ -413,7 +445,11 @@ extern "C" __launch_bounds__(BASIS_GRAD_BLOCK_SIZE) __global__
413445 __syncthreads ();
414446
415447 // Apply basis element by element
448+ #if CEED_HIP_USE_CHIPSTAR
416449 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
450+ #else
451+ const CeedInt elem_loop_bound = num_elem ;
452+ #endif
417453
418454 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
419455 const CeedInt elem = e % num_elem ;
@@ -465,7 +501,11 @@ extern "C" __launch_bounds__(BASIS_GRAD_BLOCK_SIZE) __global__
465501 __syncthreads ();
466502
467503 // Apply basis element by element
504+ #if CEED_HIP_USE_CHIPSTAR
468505 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
506+ #else
507+ const CeedInt elem_loop_bound = num_elem ;
508+ #endif
469509
470510 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
471511 const CeedInt elem = e % num_elem ;
@@ -518,7 +558,11 @@ extern "C" __launch_bounds__(BASIS_GRAD_BLOCK_SIZE) __global__
518558 __syncthreads ();
519559
520560 // Apply basis element by element
561+ #if CEED_HIP_USE_CHIPSTAR
521562 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
563+ #else
564+ const CeedInt elem_loop_bound = num_elem ;
565+ #endif
522566
523567 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
524568 const CeedInt elem = e % num_elem ;
@@ -570,7 +614,11 @@ extern "C" __launch_bounds__(BASIS_GRAD_BLOCK_SIZE) __global__
570614 __syncthreads ();
571615
572616 // Apply basis element by element
617+ #if CEED_HIP_USE_CHIPSTAR
573618 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
619+ #else
620+ const CeedInt elem_loop_bound = num_elem ;
621+ #endif
574622
575623 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
576624 const CeedInt elem = e % num_elem ;
@@ -616,7 +664,11 @@ extern "C" __launch_bounds__(BASIS_WEIGHT_BLOCK_SIZE) __global__
616664
617665 CeedScalar r_W [BASIS_DIM > 2 ? BASIS_Q_1D : 1 ];
618666
667+ #if CEED_HIP_USE_CHIPSTAR
619668 const CeedInt elem_loop_bound = (gridDim .x * blockDim .z ) * ceil (1.0 * num_elem / (gridDim .x * blockDim .z ));
669+ #else
670+ const CeedInt elem_loop_bound = num_elem ;
671+ #endif
620672
621673 for (CeedInt e = blockIdx .x * blockDim .z + threadIdx .z ; e < elem_loop_bound ; e += gridDim .x * blockDim .z ) {
622674 const CeedInt elem = e % num_elem ;
0 commit comments