@@ -9,6 +9,9 @@ function ruiz_rescaling(
99 dims:: PDLPDims ,
1010 )
1111
12+ # Identify the number of blocks to use
13+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
14+
1215 # Set up temporary variables for intermediate scaling results
1316 temp_variable_rescaling = CuArray {Float64} (undef, size (variable_rescaling))
1417 temp_constraint_rescaling = CuArray {Float64} (undef, size (constraint_rescaling))
@@ -57,6 +60,9 @@ function pock_chambolle_rescaling(
5760 dims:: PDLPDims ,
5861 )
5962
63+ # Identify the number of blocks to use
64+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
65+
6066 # Preallocate space for intermediate rescaling terms
6167 temp_variable_rescaling = CuArray {Float64} (undef, size (variable_rescaling))
6268 temp_constraint_rescaling = CuArray {Float64} (undef, size (constraint_rescaling))
@@ -102,6 +108,9 @@ function scale_problem(
102108 dims:: PDLPDims ,
103109 )
104110
111+ # Identify the number of blocks to use
112+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
113+
105114 # Perform the following steps:
106115 # 1) problem.objective_vector = problem.objective_vector ./ variable_rescaling
107116 # 2) problem.variable_lower_bound = problem.variable_lower_bound .* variable_rescaling
@@ -140,6 +149,7 @@ function select_initial_primal_weight(
140149 # Theoretically the primal importance can change, but the default in the MOI_wrapper
141150 # is to set it to 1.0. The other parameters are un-settable in cuPDLP but theoretically
142151 # could be changed as well
152+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
143153 CUDA. @sync @cuda blocks= GPU_blocks threads= 512 primal_weight_kernel (
144154 primal_weight,
145155 problem. objective_vector,
@@ -152,6 +162,7 @@ function select_initial_primal_weight(
152162end
153163
154164function update_step_size (problem:: LinearProgramSet , step_size:: CuArray{Float64} , dims:: PDLPDims )
165+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
155166 CUDA. @sync @cuda blocks= GPU_blocks threads= 512 group_max_kernel (
156167 step_size,
157168 problem. constraint_matrix,
@@ -177,6 +188,9 @@ function add_LP_objective_constraint(
177188 active_constraint:: CuArray{Bool} ,
178189 dims:: PDLPDims
179190 )
191+
192+ # Identify the number of blocks to use
193+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
180194
181195 # Add the constraint
182196 CUDA. @sync @cuda blocks= GPU_blocks threads= 256 add_LP_constraint_kernel (
@@ -210,6 +224,9 @@ function add_LP_constraint(
210224 dims:: PDLPDims ;
211225 geq:: Bool = true
212226 )
227+
228+ # Identify the number of blocks to use
229+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
213230
214231 # Add the constraint
215232 CUDA. @sync @cuda blocks= GPU_blocks threads= 256 add_LP_constraint_kernel (
@@ -240,6 +257,9 @@ function add_LP_lower_bound(
240257 active_constraint:: CuArray{Bool} ,
241258 dims:: PDLPDims
242259 )
260+
261+ # Identify the number of blocks to use
262+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
243263
244264 # Add the constraint
245265 CUDA. @sync @cuda blocks= GPU_blocks threads= 256 add_LP_lower_bound_kernel (
@@ -351,6 +371,9 @@ function add_best_obj_LP_constraints(
351371 n_points:: Int32 ,
352372 num_linearizations:: Int ,
353373 )
374+
375+ # Identify the number of blocks to use
376+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
354377
355378 # Add the best constraint, based on the comparison vector, num_linearizations times
356379 CUDA. @sync @cuda blocks= GPU_blocks threads= 256 add_best_obj_LP_constraints_kernel (
@@ -393,6 +416,9 @@ function add_best_cons_LP_constraints(
393416 num_linearizations:: Int ;
394417 geq:: Bool = true ,
395418 )
419+
420+ # Identify the number of blocks to use
421+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
396422
397423 # Add the best constraint, based on the comparison vector, num_linearizations times
398424 CUDA. @sync @cuda blocks= GPU_blocks threads= 256 add_best_cons_LP_constraints_kernel (
@@ -431,6 +457,9 @@ function add_multiple_LP_lower_bound(
431457 active_constraint:: CuArray{Bool} ,
432458 n_points:: Int32 ,
433459 )
460+
461+ # Identify the number of blocks to use
462+ GPU_blocks = Int32 (CUDA. attribute (CUDA. device (), CUDA. DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT))
434463
435464 # Add the constraint
436465 CUDA. @sync @cuda blocks= GPU_blocks threads= 256 add_multiple_LP_lower_bound_kernel (
0 commit comments