@@ -198,19 +198,22 @@ static void bound_objective_rescaling(pdhg_solver_state_t *state, rescale_info_t
198198 compute_bound_contrib_kernel<<<state->num_blocks_dual, THREADS_PER_BLOCK >>> (
199199 state->constraint_lower_bound , state->constraint_upper_bound , num_constraints, contrib_d);
200200
201- double *bnd_norm_sq_d = nullptr ;
202- CUDA_CHECK (cudaMalloc (&bnd_norm_sq_d, sizeof (double )));
203- void *temp_storage = nullptr ;
204- size_t temp_bytes = 0 ;
205- CUDA_CHECK (cub::DeviceReduce::Sum (temp_storage, temp_bytes, contrib_d, bnd_norm_sq_d, num_constraints));
206- CUDA_CHECK (cudaMalloc (&temp_storage, temp_bytes));
207- CUDA_CHECK (cub::DeviceReduce::Sum (temp_storage, temp_bytes, contrib_d, bnd_norm_sq_d, num_constraints));
201+ double bnd_norm_sq_h = 0.0 ;
202+
203+ cublasPointerMode_t old_mode;
204+ cublasGetPointerMode (state->blas_handle , &old_mode);
205+ cublasSetPointerMode (state->blas_handle , CUBLAS_POINTER_MODE_HOST );
206+
207+ CUBLAS_CHECK (cublasDasum (state->blas_handle ,
208+ num_constraints,
209+ contrib_d,
210+ 1 ,
211+ &bnd_norm_sq_h));
212+
213+ cublasSetPointerMode (state->blas_handle , old_mode);
214+
208215 CUDA_CHECK (cudaFree (contrib_d));
209- CUDA_CHECK (cudaFree (temp_storage));
210216
211- double bnd_norm_sq_h = 0.0 ;
212- CUDA_CHECK (cudaMemcpy (&bnd_norm_sq_h, bnd_norm_sq_d, sizeof (double ), cudaMemcpyDeviceToHost));
213- CUDA_CHECK (cudaFree (bnd_norm_sq_d));
214217 double bnd_norm = sqrt (bnd_norm_sq_h);
215218
216219 double obj_norm = 0.0 ;
0 commit comments